@inbook{abce6848e1ec49b0ae3729cf4f131e90,
title = "Optimizing News Categorization with Machine Learning: A Comprehensive Study Using Naive Bayes (MultinomialNB) Classifier",
abstract = "The rapid growth of online news content necessitates efficient automated categorization systems to manage and organize vast amounts of information. This study addresses the gap in effective news article classification by utilizing a Naive Bayes (MultinomialNB) classifier. We leverage the “News Aggregator” dataset from the “UCI Machine Learning Repository”, consisting of over 400,000 news articles categorized into business, science and technology, entertainment, and health. Our preprocessing steps include handling missing values, text normalization, and “term frequency-inverse document frequency (TF-IDF)” vectorization. The trained Naive Bayes model achieved an overall accuracy of 89.6%, with high precision and recall particularly in the {\textquoteleft}Entertainment{\textquoteright} category. Charts like the confusion matrix, ROC curve, and learning curve offer a detailed assessment of how well the model performs. These results highlight the Naive Bayes classifier{\textquoteright}s effectiveness in news categorization and suggest potential areas for further improvement, particularly in distinguishing {\textquoteleft}Science and Technology{\textquoteright} and {\textquoteleft}Health{\textquoteright} articles. This study demonstrates the practical application of machine learning in organizing news content, with implications for enhancing automated news categorization systems.",
keywords = "Automated news systems, Data preprocessing, Machine learning, MultinomialNB, Naive Bayes, Natural language processing, News categorization, Text classification, TF-IDF, UCI machine learning repository",
author = "Ahmed Mansoori and Khalaf Tahat and Tahat, {Dina Naser} and Mohammad Habes and Salloum, {Said A.}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.",
year = "2025",
doi = "10.1007/978-3-031-70855-8_15",
language = "English",
series = "Studies in Big Data",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "169--178",
booktitle = "Studies in Big Data",
}