@inproceedings { , title = {Topology for preserving feature correlation in tabular synthetic data.}, abstract = {Tabular synthetic data generating models based on Generative Adversarial Network (GAN) show significant contributions to enhancing the performance of deep learning models by providing a sufficient amount of training data. However, the existing GAN-based models cannot preserve the feature correlations in synthetic data during the data synthesis process. Therefore, the synthetic data become unrealistic and creates a problem for certain applications like correlation-based feature weighting. In this short theoretical paper, we showed a promising approach based on the topology of datasets to preserve correlation in synthetic data. We formulated our hypothesis for preserving correlation in synthetic data and used persistent homology to show that the topological spaces of the original and synthetic data have dissimilarity in topological features, especially in 0th and 1st Homology groups. Finally, we concluded that minimizing the difference in topological features can make the synthetic data space locally homeomorphic to the original data space, and the synthetic data may preserve the feature correlation under homeomorphism conditions.}, conference = {15th International conference on security of information and networks 2022 (SINCONF 2022)}, doi = {10.1109/SIN56466.2022.9970505}, isbn = {9781665454650}, note = {INFO COMPLETE (Info via contact 6/1/2023 LM) PERMISSION GRANTED (version = AAM; embargo = none; licence = Pub's own; POLICY = https://conferences.ieeeauthorcenter.ieee.org/author-ethics/guidelines-and-policies/post-publication-policies/ 9/1/2023 LM) DOCUMENT READY (AAM rec'd from contact 6/1/2023 LM) ADDITIONAL INFO - Contact: Dipto Arifeen; Andrei Petrovski Set Statement: (© 2022 IEEE. Personal use of this material is permitted. Permission from IEEE must be obtained for all other uses, in any current or future media, including reprinting/republishing this material for advertising or promotional purposes, creating new collective works, for resale or redistribution to servers or lists, or reuse of any copyrighted component of this work in other works.)}, pages = {61-66}, publicationstatus = {Published}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://rgu-repository.worktribe.com/output/1853567}, keyword = {Synthetic data, Correlation, GAN, Topology, Persistent homology}, year = {2022}, author = {Arifeen, Murshedul and Petrovski, Andrei} }