News-Summarizer / model.py
Shoaib-33's picture
Upload 20 files
fbb3f82 verified
import spacy
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
nlp = spacy.load("en_core_web_sm")
#define a function for filter stop words and punctuations and extract lemma from the txts
def preprocesser(text_array):
preprocessed_texts = []
for text in text_array:
doc = nlp(text)
words_lst = []
for token in doc:
if not token.is_stop and not token.is_punct:
words_lst.append(token.lemma_)
preprocessed_text = " ".join(words_lst)
preprocessed_texts.append(preprocessed_text)
return preprocessed_texts
import joblib
# Save the trained model to a file
# To load the model back in the future
loaded_model = joblib.load('models/Logisticmainmodel.pkl')
def pd(text):
label_to_category = {
0: 'BUSINESS-MONEY',
1: 'EMPOWERED VOICES',
2: 'ENVIRONMENT',
3: 'GENERAL',
4: 'LIFESTYLE AND WELLNESS',
5: 'MISC',
6: 'PARENTING AND EDUCATION',
7: 'POLITICS',
8: 'SCIENCE AND TECH',
9: 'SPORTS AND ENTERTAINMENT',
10: 'TRAVEL-TOURISM & ART-CULTURE',
11: 'U.S. NEWS',
12: 'WORLDNEWS'
}
new_texts =[text]
predicted_labels = loaded_model.predict(new_texts)
# Convert predicted numerical labels to category names using the mapping
predicted_categories = [label_to_category[label] for label in predicted_labels]
# Print the predicted categories
for text, predicted_category in zip(new_texts, predicted_categories):
print(f"Text: {text}\nPredicted Category: {predicted_category}\n")
return predicted_category
pd("election")