import spacy
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
nlp = spacy.load("en_core_web_sm")

#define a function for filter stop words and punctuations and extract lemma from the txts
def preprocesser(text_array):
    preprocessed_texts = []
    for text in text_array:
        doc = nlp(text)
        words_lst = []
        for token in doc:
            if not token.is_stop and not token.is_punct:
                words_lst.append(token.lemma_)
        preprocessed_text = " ".join(words_lst)
        preprocessed_texts.append(preprocessed_text)
    return preprocessed_texts


import joblib

# Save the trained model to a file

# To load the model back in the future
loaded_model = joblib.load('models/Logisticmainmodel.pkl')


def pd(text):
    label_to_category = {
      0: 'BUSINESS-MONEY',
      1: 'EMPOWERED VOICES',
      2: 'ENVIRONMENT',
       3: 'GENERAL',
       4: 'LIFESTYLE AND WELLNESS',
       5: 'MISC',
       6: 'PARENTING AND EDUCATION',
       7: 'POLITICS',
       8: 'SCIENCE AND TECH',
       9: 'SPORTS AND ENTERTAINMENT',
       10: 'TRAVEL-TOURISM & ART-CULTURE',
       11: 'U.S. NEWS',
       12: 'WORLDNEWS'
      }


    new_texts =[text]
    predicted_labels = loaded_model.predict(new_texts)

# Convert predicted numerical labels to category names using the mapping
    predicted_categories = [label_to_category[label] for label in predicted_labels]

# Print the predicted categories
    for text, predicted_category in zip(new_texts, predicted_categories):
        print(f"Text: {text}\nPredicted Category: {predicted_category}\n")
        return predicted_category


pd("election")