@article {10.3844/jcssp.2019.1627.1637,
article_type = {journal},
title = {Neural Machine Translation for Low-resource English-Bangla},
author = {Al Mumin, Mohammad Abdullah and Seddiqui, Md Hanif and Iqbal, Muhammed Zafar and Islam, Mohammed Jahirul},
volume = {15},
number = {11},
year = {2019},
month = {Nov},
pages = {1627-1637},
doi = {10.3844/jcssp.2019.1627.1637},
url = {https://thescipub.com/abstract/jcssp.2019.1627.1637},
abstract = {Neural machine translation has recently been able to gain state-of-the-art translation quality for many language pairs. However, neural machine translation has been less tested for English-Bangla language pair, two linguistically distant and widely spoken languages. In this paper, we apply neural machine translation to the task of English-Bangla translation in both directions and compare it against a standard phrase-based statistical machine translation system. We obtain up to +0.30 and +4.95 BLEU improvement over phrase-based statistical machine translation for English-to-Bangla and Bangla-to-English respectively. Due to low-resource and morphological richness of Bangla, English-Bangla translation task produces a large number of rare words. We apply subword segmentation with byte pair encoding to handle this rare words issue. We obtain up to +0.69 and +0.30 BLEU improvement over baseline neural machine translation for English-to-Bangla and Bangla-to-English respectively. We further investigate our system output for several challenging linguistic properties like subject-verb agreement, noun inflection, long distance reordering and rare words translation. We observe that neural machine translation with and without subword segmentation significantly outperform the phrase-based statistical machine translation system, thus establishing itself as the state-of-the-art technology for low-resource English-Bangla machine translation.},
journal = {Journal of Computer Science},
publisher = {Science Publications}
}