@inproceedings{96a3e6bfb0dc45189b2edf1cb8753b0b,
title = "HiSAT: Hierarchical Framework for Sentiment Analysis on Twitter Data",
abstract = "Social media websites such as Twitter have become so indispensable today that people use them almost on a daily basis for sharing their emotions, opinions, suggestions and thoughts. Motivated by such behavioral tendencies, the purpose of this study is to define an approach to automatically classify the tweets on Twitter data into two main classes, namely, hate speech and non-hate speech. This provides a valuable source of information in analyzing and understanding target audiences and spotting marketing trends. We thus propose HiSAT, a Hierarchical framework for Sentiment Analysis on Twitter data. Sentiments/opinions in tweets are highly unstructured-and do not have a proper defined sequence. They constitute a heterogeneous data from many sources having different formats, and express either positive or negative, or neutral sentiment. Hence, in HiSAT we conduct Natural Language Processing encompassing tokenization, stemming and lemmatization techniques that convert text to tokens; as well as Bag-of-Words (BoW) and Term Frequency-Inverse Document Frequency (TF-IDF) techniques that convert text sentences into numeric vectors. These are then fed as inputs to Machine learning algorithms within the HiSAT framework; more specifically, Random Forest, Logistic Regression and Na{\"i}ve Bayes are used as text-binary classifiers to detect hate speech and non-hate speech from the tweets. Results of experiments performed with the HiSAT framework show that Random Forest outperforms the others with a better prediction in estimating the correct labels (with accuracy above the 95% range). We present the HiSAT approach, its implementation and experiments, along with related work and ongoing research.",
keywords = "Bayesian models, Knowledge discovery, Logistic Regression, NLP, Opinion mining, Random Forest, Social media, Text mining",
author = "Amrutha Kommu and Snehal Patel and Sebastian Derosa and Jiayin Wang and Varde, {Aparna S.}",
note = "Funding Information: acknowledge a grant from the US National Science Foundation NSF MRI: Acquisition of a High-Performance GPU Cluster for Research and Education. Award Number 2018575. Dr. Aparna Varde is a visiting researcher at Max Planck Institute for Informatics, Saarbrucken, Germany, in the research group of Dr. Gerhard Weikum, during the academic year 2021–2022, including a sabbatical visit. The authors acknowledge the CSAM Dean{\textquoteright}s Office Travel Grant from Montclair State University to support attending this conference. The authors would like to make the disclaimer that the opinions expressed, analyzed and presented in this work are obtained from knowledge discovery by mining the concerned data only. These do not reflect the personal or professional views of the authors. Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; null ; Conference date: 01-09-2022 Through 02-09-2022",
year = "2023",
doi = "10.1007/978-3-031-16072-1_28",
language = "English",
isbn = "9783031160714",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "376--392",
editor = "Kohei Arai",
booktitle = "Intelligent Systems and Applications - Proceedings of the 2022 Intelligent Systems Conference IntelliSys Volume 1",
}