File size: 2,834 Bytes
89c40fe 5b7a180 89c40fe fbff6ba d4b6683 eef508e d4b6683 212e319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
---
license: mit
language:
- en
metrics:
- accuracy
library_name: sklearn
pipeline_tag: text-classification
tags:
- code
---
## Model Training
The sentiment analysis model is trained using a Support Vector Machine (SVM) classifier with a linear kernel. The cleaned text data is transformed into a bag-of-words representation using the CountVectorizer. The trained model is saved as `Sentiment_classifier_model.joblib`, and the corresponding TF-IDF vectorizer is saved as `vectorizer_model.joblib`.
# Usage :
from huggingface_hub import hf_hub_download
import joblib
from sklearn.preprocessing import LabelEncoder
model = joblib.load(
hf_hub_download("DineshKumar1329/Sentiment_Analysis", "sklearn_model.joblib")
)
tfidf_vectorizer = joblib.load('/content/vectorizer_model.joblib') # Replace with your path
def clean_text(text):
return text.lower()
def predict_sentiment(user_input):
"""Predicts sentiment for a given user input."""
cleaned_text = clean_text(user_input)
input_matrix = tfidf_vectorizer.transform([cleaned_text])
prediction = model.predict(input_matrix)[0]
if isinstance(model.classes_, LabelEncoder):
prediction = model.classes_.inverse_transform([prediction])[0]
return prediction
user_input = input("Enter a sentence: ")
predicted_sentiment = predict_sentiment(user_input)
print(f"Predicted Sentiment: {predicted_sentiment}")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
import joblib
def load_model_and_tokenizer(model_name="DineshKumar1329/Sentiment_Analysis"):
"""Loads the sentiment analysis model and tokenizer from Hugging Face Hub."""
# Replace with desired model name if using a different model
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
def clean_text(text):
"""Converts the input text to lowercase for case-insensitive processing."""
return text.lower()
def predict_sentiment(user_input, model, tokenizer):
"""Predicts sentiment for a given user input."""
cleaned_text = clean_text(user_input)
encoded_text = tokenizer(cleaned_text, return_tensors="pt")
with torch.no_grad():
outputs = model(**encoded_text)
logits = outputs.logits
prediction = torch.argmax(logits, dim=-1).item()
if isinstance(model.config.label_list, LabelEncoder):
prediction = model.config.label_list.inverse_transform([prediction])[0]
return prediction
if __name__ == "__main__":
model, tokenizer = load_model_and_tokenizer()
user_input = input("Enter a sentence: ")
predicted_sentiment = predict_sentiment(user_input, model, tokenizer)
print(f"Predicted Sentiment: {predicted_sentiment}")
|