@article {10.3844/jcssp.2026.9.24, article_type = {journal}, title = {Speech Corpora for Different Languages: A Systematic Review}, author = {Fedoseev, Vladimir Igorevich and Konev , Anton Aleksandrovich and Repyuk, Natalia Sergeevna}, volume = {22}, number = {1}, year = {2026}, month = {Feb}, pages = {9-24}, doi = {10.3844/jcssp.2026.9.24}, url = {https://thescipub.com/abstract/jcssp.2026.9.24}, abstract = {The study of speech signals relies on carefully curated audio recordings, which are compiled and stored within specialized speech corpora. This article provides a comprehensive overview of such corpora across multiple languages, with particular focus on Russian, English, and Arabic. It notes that Russian and Arabic are represented by fewer corpora compared to the more extensive resources available for English. The discussion includes an examination of typical speech corpus structures, a description of standard parameters for characterizing corpora, and an outline of common metrics used to describe the speech signal itself.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }