@article {10.3844/ajassp.2017.843.851, article_type = {journal}, title = {Comparison of Stochastic and Rule-Based POS Tagging on Malay Online Text}, author = {Anbananthen, Kalaiarasi Sonai Muthu and Krishnan, Jaya Kumar and Sayeed, Mohd. Shohel and Muniapan, Praviny}, volume = {14}, year = {2017}, month = {Apr}, pages = {843-851}, doi = {10.3844/ajassp.2017.843.851}, url = {https://thescipub.com/abstract/ajassp.2017.843.851}, abstract = {Extensive development of web 2.0 has led to production of gigantic amount of user generated data. These data consist of many useful information. Manual analyzing these data and classifying sentiment in them, is an exhausting task, thus opinion mining method is needed. Opinion mining approach uses natural language processing where Part-of-Speech (POS) Tagging is a crucial part. The performance of any NLP system depends on the accuracy of a POS tagger. Two main issues that affect the accuracy of POS tagger are unknown words and ambiguity. Although research on POS tagging has been back dated few decades ago, yet they have been mostly focused on English. Research on Malay language is still in the early stage. Also, online Malay Text differs from proper Malay text, in the sense of structure and also grammar. Online users tend use a lot of abbreviations and short forms in their text. Besides this, the “BahasaRojak” phenomena complicate tagging process even further. Thus taking all these into consideration, in this study, we will review stochastic and rule-based POS tagging methodologies to deal with ambiguous and unknown words on online Malay text.}, journal = {American Journal of Applied Sciences}, publisher = {Science Publications} }