@article {10.3844/jcssp.2018.699.704, article_type = {journal}, title = {Evaluating the Efficiency of CPUs, GPUs and FPGAs on a Near-Duplicate Document Detection Via OpenCL}, author = {Canhasi, Ercan}, volume = {14}, number = {5}, year = {2018}, month = {Apr}, pages = {699-704}, doi = {10.3844/jcssp.2018.699.704}, url = {https://thescipub.com/abstract/jcssp.2018.699.704}, abstract = {Discovering identical or near-identical items is urgently important in many applications such as Web crawling since it drastically reduces the text processing costs. Simhash is a widely used technique, able to attribute a bit-string identity to a text, such that similar texts have similar identities. In this study, a real-time solution for a simhash calculation in OpenCL is presented. We also show how it can be utilized by multi-CPUs, GPUs and FPGAs. As a result we indicate that the bottom line computation realized on the FPGA through OpenCL provides significant power advantages.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }