@article {10.3844/jcssp.2021.762.775, article_type = {journal}, title = {Predictive Modeling Applied to Structured Clinical Data Extracted from Electronic Health Records: An Architectural Hypothesis and A First Experiment}, author = {Pieroni, Alessandra and Cabroni, Alessandro and Fallucchi, Francesca and Scarpato, Noemi}, volume = {17}, number = {9}, year = {2021}, month = {Sep}, pages = {762-775}, doi = {10.3844/jcssp.2021.762.775}, url = {https://thescipub.com/abstract/jcssp.2021.762.775}, abstract = {Predictive analysis is one of current important issues in the healthcare context. A lot of patients’ input data can be obtained from their Electronic Health Records. In our research, we propose a general architecture named Health Prediction Architecture. Initially, we consider that data refer to strongly structured health datasets (no free text). Our objectives are related to exploring some problems in the prediction context for healthcare. In particular, we consider dataset heterogeneity, accuracy together explain ability, dataset for benchmarking. After a presentation of Electronic Health Record and some useful related standards, we propose our architecture based on two principal modules. First module produces features extraction and it implements a Convolutional Neural Network or alternatively a Multi-Layer Perceptron. Second module produces predictions and it implements alternatively one from Graph Convolutional Network, Simplified Graph Transduction Game, Nearest Nodes and Classes Graph. We define the datasets randomly so to have the possibility to manage data sufficiently heterogeneous and useful for a benchmarking, without any privacy problem too. In this study, we experiment a first instantiation of the architecture, based on Multi-Layer Perceptron as first module and Simplified Graph Transduction Game as second module, considering health data related to type 2 diabetes risk, generated according to a healthcare rule. We try the architecture by slightly increasing both cardinalities of datasets and extracted features. As first results of our research, in this study we produce training and testing randomized datasets and we obtain a testing accuracy behavior generally better than using only Multi-Layer Perceptron (best accuracy with 200 labelled elements). Our architecture aims to evolve to be used as a general solution in healthcare predictions context. We are also interested in studying our solution in future works from the explain ability point of view, with particular interest in explaining the results in terms of input attributes.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }