Spaces:

Shoaib-33
/

News-Summarizer

Sleeping

App Files Files Community

News-Summarizer / model.py

Shoaib-33

Upload 20 files

fbb3f82 verified over 1 year ago

raw

history blame contribute delete

1.89 kB

	import spacy
	from sklearn.pipeline import make_pipeline
	from sklearn.preprocessing import FunctionTransformer
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import classification_report
	nlp = spacy.load("en_core_web_sm")

	#define a function for filter stop words and punctuations and extract lemma from the txts
	def preprocesser(text_array):
	preprocessed_texts = []
	for text in text_array:
	doc = nlp(text)
	words_lst = []
	for token in doc:
	if not token.is_stop and not token.is_punct:
	words_lst.append(token.lemma_)
	preprocessed_text = " ".join(words_lst)
	preprocessed_texts.append(preprocessed_text)
	return preprocessed_texts


	import joblib

	# Save the trained model to a file

	# To load the model back in the future
	loaded_model = joblib.load('models/Logisticmainmodel.pkl')


	def pd(text):
	label_to_category = {
	0: 'BUSINESS-MONEY',
	1: 'EMPOWERED VOICES',
	2: 'ENVIRONMENT',
	3: 'GENERAL',
	4: 'LIFESTYLE AND WELLNESS',
	5: 'MISC',
	6: 'PARENTING AND EDUCATION',
	7: 'POLITICS',
	8: 'SCIENCE AND TECH',
	9: 'SPORTS AND ENTERTAINMENT',
	10: 'TRAVEL-TOURISM & ART-CULTURE',
	11: 'U.S. NEWS',
	12: 'WORLDNEWS'
	}


	new_texts =[text]
	predicted_labels = loaded_model.predict(new_texts)

	# Convert predicted numerical labels to category names using the mapping
	predicted_categories = [label_to_category[label] for label in predicted_labels]

	# Print the predicted categories
	for text, predicted_category in zip(new_texts, predicted_categories):
	print(f"Text: {text}\nPredicted Category: {predicted_category}\n")
	return predicted_category




	pd("election")