Spaces:
Sleeping
Sleeping
import spacy | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import FunctionTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import classification_report | |
nlp = spacy.load("en_core_web_sm") | |
#define a function for filter stop words and punctuations and extract lemma from the txts | |
def preprocesser(text_array): | |
preprocessed_texts = [] | |
for text in text_array: | |
doc = nlp(text) | |
words_lst = [] | |
for token in doc: | |
if not token.is_stop and not token.is_punct: | |
words_lst.append(token.lemma_) | |
preprocessed_text = " ".join(words_lst) | |
preprocessed_texts.append(preprocessed_text) | |
return preprocessed_texts | |
import joblib | |
# Save the trained model to a file | |
# To load the model back in the future | |
loaded_model = joblib.load('models/Logisticmainmodel.pkl') | |
def pd(text): | |
label_to_category = { | |
0: 'BUSINESS-MONEY', | |
1: 'EMPOWERED VOICES', | |
2: 'ENVIRONMENT', | |
3: 'GENERAL', | |
4: 'LIFESTYLE AND WELLNESS', | |
5: 'MISC', | |
6: 'PARENTING AND EDUCATION', | |
7: 'POLITICS', | |
8: 'SCIENCE AND TECH', | |
9: 'SPORTS AND ENTERTAINMENT', | |
10: 'TRAVEL-TOURISM & ART-CULTURE', | |
11: 'U.S. NEWS', | |
12: 'WORLDNEWS' | |
} | |
new_texts =[text] | |
predicted_labels = loaded_model.predict(new_texts) | |
# Convert predicted numerical labels to category names using the mapping | |
predicted_categories = [label_to_category[label] for label in predicted_labels] | |
# Print the predicted categories | |
for text, predicted_category in zip(new_texts, predicted_categories): | |
print(f"Text: {text}\nPredicted Category: {predicted_category}\n") | |
return predicted_category | |
pd("election") | |