@inproceedings{00df87289ee44b93a2cb1c24d7859614,
title = "SDGnE: A Synthetic Data Generation and Evaluation System for Rare Event Prediction",
abstract = "Class imbalance in datasets creates a significant challenge for building efficient classifiers and results in poor prediction of rare events. This problem is more critical in applications where the size of the dataset is often small such as individual-based health risk prediction modeling and engineering problems heavily based on simulations. While several techniques have been proposed in this field, their performance with small size datasets requires improvement for practical use of the machine learning algorithms. This paper presents a system framework called “Synthetic Data Generation and Evaluation (SDGnE)” for the class imbalance problem by generating synthetic data using various techniques, analyzing data quality, and comparing the performance of the implemented techniques. We demonstrate the proposed system using a web-based user interface that includes methods for data generation, statistical analysis, and visual evaluation. The proposed system can help users have better understanding and insight of the generated data when using different techniques and can be straightforwardly extended to include new data generation techniques and evaluation tools.",
keywords = "autoencoder, class imbalance, classification, generative adversarial network, SMOTE, synthetic data generation",
author = "Bae, \{Wan D.\} and Shayma Alkobaisi and Sartaj Bhuvaji and Siddheshwari Bankar",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2024.; 29th International Conference on Database Systems for Advanced Applications, DASFAA 2024 ; Conference date: 02-07-2024 Through 05-07-2024",
year = "2024",
doi = "10.1007/978-981-97-5575-2\_49",
language = "English",
isbn = "9789819755745",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "508--512",
editor = "Makoto Onizuka and Jae-Gil Lee and Yongxin Tong and Chuan Xiao and Yoshiharu Ishikawa and Kejing Lu and Sihem Amer-Yahia and H.V. Jagadish",
booktitle = "Database Systems for Advanced Applications - 29th International Conference, DASFAA 2024, Proceedings",
}