@article {10.3844/jcssp.2019.1627.1637, article_type = {journal}, title = {Neural Machine Translation for Low-resource English-Bangla}, author = {Al Mumin, Mohammad Abdullah and Seddiqui, Md Hanif and Iqbal, Muhammed Zafar and Islam, Mohammed Jahirul}, volume = {15}, number = {11}, year = {2019}, month = {Nov}, pages = {1627-1637}, doi = {10.3844/jcssp.2019.1627.1637}, url = {https://thescipub.com/abstract/jcssp.2019.1627.1637}, abstract = {Neural machine translation has recently been able to gain state-of-the-art translation quality for many language pairs. However, neural machine translation has been less tested for English-Bangla language pair, two linguistically distant and widely spoken languages. In this paper, we apply neural machine translation to the task of English-Bangla translation in both directions and compare it against a standard phrase-based statistical machine translation system. We obtain up to +0.30 and +4.95 BLEU improvement over phrase-based statistical machine translation for English-to-Bangla and Bangla-to-English respectively. Due to low-resource and morphological richness of Bangla, English-Bangla translation task produces a large number of rare words. We apply subword segmentation with byte pair encoding to handle this rare words issue. We obtain up to +0.69 and +0.30 BLEU improvement over baseline neural machine translation for English-to-Bangla and Bangla-to-English respectively. We further investigate our system output for several challenging linguistic properties like subject-verb agreement, noun inflection, long distance reordering and rare words translation. We observe that neural machine translation with and without subword segmentation significantly outperform the phrase-based statistical machine translation system, thus establishing itself as the state-of-the-art technology for low-resource English-Bangla machine translation.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }