|
--- |
|
license: mit |
|
language: |
|
- en |
|
metrics: |
|
- accuracy |
|
library_name: sklearn |
|
pipeline_tag: text-classification |
|
tags: |
|
- code |
|
--- |
|
|
|
|
|
## Model Training |
|
|
|
The sentiment analysis model is trained using a Support Vector Machine (SVM) classifier with a linear kernel. The cleaned text data is transformed into a bag-of-words representation using the CountVectorizer. The trained model is saved as `Sentiment_classifier_model.joblib`, and the corresponding TF-IDF vectorizer is saved as `vectorizer_model.joblib`. |
|
|
|
|
|
|
|
# Usage : |
|
|
|
from huggingface_hub import hf_hub_download |
|
import joblib |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
model = joblib.load( |
|
hf_hub_download("DineshKumar1329/Sentiment_Analysis", "sklearn_model.joblib") |
|
) |
|
|
|
tfidf_vectorizer = joblib.load('/content/vectorizer_model.joblib') # Replace with your path |
|
|
|
def clean_text(text): |
|
return text.lower() |
|
|
|
def predict_sentiment(user_input): |
|
"""Predicts sentiment for a given user input.""" |
|
cleaned_text = clean_text(user_input) |
|
input_matrix = tfidf_vectorizer.transform([cleaned_text]) |
|
prediction = model.predict(input_matrix)[0] |
|
|
|
if isinstance(model.classes_, LabelEncoder): |
|
prediction = model.classes_.inverse_transform([prediction])[0] |
|
|
|
return prediction |
|
|
|
user_input = input("Enter a sentence: ") |
|
|
|
predicted_sentiment = predict_sentiment(user_input) |
|
|
|
print(f"Predicted Sentiment: {predicted_sentiment}") |
|
|
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from sklearn.preprocessing import LabelEncoder |
|
import joblib |
|
|
|
|
|
def load_model_and_tokenizer(model_name="DineshKumar1329/Sentiment_Analysis"): |
|
"""Loads the sentiment analysis model and tokenizer from Hugging Face Hub.""" |
|
|
|
# Replace with desired model name if using a different model |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
return model, tokenizer |
|
|
|
|
|
def clean_text(text): |
|
"""Converts the input text to lowercase for case-insensitive processing.""" |
|
return text.lower() |
|
|
|
|
|
def predict_sentiment(user_input, model, tokenizer): |
|
"""Predicts sentiment for a given user input.""" |
|
|
|
cleaned_text = clean_text(user_input) |
|
encoded_text = tokenizer(cleaned_text, return_tensors="pt") |
|
|
|
with torch.no_grad(): |
|
outputs = model(**encoded_text) |
|
logits = outputs.logits |
|
prediction = torch.argmax(logits, dim=-1).item() |
|
|
|
if isinstance(model.config.label_list, LabelEncoder): |
|
prediction = model.config.label_list.inverse_transform([prediction])[0] |
|
|
|
return prediction |
|
|
|
|
|
if __name__ == "__main__": |
|
model, tokenizer = load_model_and_tokenizer() |
|
|
|
user_input = input("Enter a sentence: ") |
|
|
|
predicted_sentiment = predict_sentiment(user_input, model, tokenizer) |
|
|
|
print(f"Predicted Sentiment: {predicted_sentiment}") |
|
|
|
|
|
|