@article {10.3844/jcssp.2019.1022.1039, article_type = {journal}, title = {shu-torjoma: An English↔Bangla Statistical Machine Translation System}, author = {Al Mumin, Mohammad Abdullah and Seddiqui, Md Hanif and Iqbal, Muhammed Zafar and Islam, Mohammed Jahirul}, volume = {15}, number = {7}, year = {2019}, month = {Jul}, pages = {1022-1039}, doi = {10.3844/jcssp.2019.1022.1039}, url = {https://thescipub.com/abstract/jcssp.2019.1022.1039}, abstract = {An efficient and publicly open machine translation system is in dire need to get the maximum benefits of Information and Communication Technology through removing the language barrier in this era of globalization. In this study, we present a Phrase-Based Statistical Machine Translation (PBMT) system between English and Bangla languages in both directions. To the best of our knowledge, the system is trained on the largest dataset of more than three million tokens each side in English↔Bangla translation task. In the system, we perform data preprocessing and use optimized parameters to produce efficient system output. We analyze our system output from several viewpoints: overall results, comparisons with the available systems, sentence type and length effect, and behaviour of two challenging linguistic properties–  prepositional phrase and noun inflection. Our analysis provides useful insights that translating into morphologically richer language is harder than translating from them and this is mainly due to the difficulties of translating noun inflections. Comparisons with the available systems show that our system outperforms the other systems significantly and gain 10.84 BLEU, 2.18 NIST and 19.02 TER points over the next best system. The analysis of the sentence type and length effect shows that simple sentences are easier to translate and the sentences longer than 15 words are harder to translate for English↔Bangla translation task. To foster the English↔Bangla machine translation research, we have developed development and test datasets, which are representative in sentence length and balanced in genre to be used as a benchmark and are made publicly available.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }