@article {10.3844/jcssp.2013.1514.1518,
article_type = {journal},
title = {CLUSTER BASED DUPLICATE DETECTION},
author = {Kumar, A. Venkatesh and Vengataasalam, S.},
volume = {9},
number = {11},
year = {2013},
month = {Sep},
pages = {1514-1518},
doi = {10.3844/jcssp.2013.1514.1518},
url = {https://thescipub.com/abstract/jcssp.2013.1514.1518},
abstract = {We propose a clustering technique for entropy based text dis-similarity calculation of de-duplication system. Improve the quality of grouping; in this study we propose a Multi-Level Group Detection (MLGD) algorithm which produces a most accurate group with most closely related object using Alternative Decision Tree (ADT) technique. Our propose a two new algorithm; first one is Multi-Level Group Detection (MLGD) formation using Alternative Decision Tree (AD Tree), which will split the bunch of record into self-sized cluster to reduce the volume of data for text comparisons. Second one is calculating the dis-similarity percentage using entropy and Information Gain (IG). We show experimentally our proposed technique achieves higher average accuracy than existing traditional de-duplication system. Further, our technique not required any manual tuning for clustering formations as well as dis-similarity calculation for any kind of business data. In this study, we have presented a new efficient method is introduced for clustering formation using ADTree algorithm for duplicate deduction. The new method offers more accuracy dis-similarity measure for each cluster data without manual intervention at the time of duplicate deduction.},
journal = {Journal of Computer Science},
publisher = {Science Publications}
}