@article {10.3844/jcssp.2022.1179.1188,
article_type = {journal},
title = {Complex SQL-NoSQL Query Translation for Data Lake Management},
author = {Nurhadi, and Kadir, Rabiah Abdul and Surin, Ely Salwana Mat},
volume = {18},
number = {12},
year = {2022},
month = {Dec},
pages = {1179-1188},
doi = {10.3844/jcssp.2022.1179.1188},
url = {https://thescipub.com/abstract/jcssp.2022.1179.1188},
abstract = {A data lake refers to an extremely large data resource or repository. Data lakes store large amounts of data and use advanced analytics to pair data from multiple sources with different types of structured, semi-structured, and unstructured information. NoSQL databases such as Mongodb, Redis, Neo4j, and Cassandra are nontabular and they store data differently rather than use relational tables. NoSQL databases come in many forms, mostly documents, key values, wide columns, and graphs based on their data model. NoSQL gives less complicated scalability and higher overall performance as compared with traditional relational databases. NoSQL databases can store different types of data, but they cannot fully support Automation, Consistency, Isolation, and endurance (ACID) features, i.e., trigger functions in multi-transaction management, because a NoSQL database uses a non-relational database system. Thus, an interpreter is necessary for SQL-to-NoSQL queries. We used ANTLR (ANother Tool for Language Recognition), which has five main stages: The input SQL query, the tokenizer, the parser, the parser tree, and lastly the generation of the query results in NoSQL. The tool gave users to write a flexible multi-pass language parser that is expected to solve problems in querying complex ACID functions and other problems in complex queries in NoSQL databases. In the measurement, analysis, and evaluation of the translation results through the comparison of each NoSQL criterion against a Relational Database Management System (MySQL), the scores obtained were as follows. The performance criterion achieved the highest score (98.40%) by the MongoDB database, followed by scalability (97.40%) and accuracy (97.00%). The criterion with the lowest score was complexity (91.65%).},
journal = {Journal of Computer Science},
publisher = {Science Publications}
}