|
import pandas as pd |
|
import re |
|
import tempfile |
|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from wordcloud import WordCloud |
|
from flair.models import TextClassifier |
|
from flair.data import Sentence |
|
from flair.nn import Classifier |
|
|
|
|
|
classifier = Classifier.load("sentiment") |
|
|
|
|
|
def clean_text_for_flair(text): |
|
if pd.isnull(text): |
|
return "" |
|
text = re.sub(r"http\S+|www\S+", "", text) |
|
text = re.sub(r"<.*?>", "", text) |
|
text = re.sub(r"\s+", " ", text).strip() |
|
return text |
|
|
|
|
|
def generate_wordclouds(df): |
|
if "sentiment" not in df.columns or "clean_text" not in df.columns: |
|
return None, None |
|
|
|
positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str)) |
|
negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str)) |
|
|
|
pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text) |
|
neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text) |
|
|
|
pos_path = "positive_wordcloud.png" |
|
plt.figure(figsize=(10, 5)) |
|
plt.imshow(pos_wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
plt.title("Positive Word Cloud") |
|
plt.tight_layout() |
|
plt.savefig(pos_path) |
|
plt.close() |
|
|
|
neg_path = "negative_wordcloud.png" |
|
plt.figure(figsize=(10, 5)) |
|
plt.imshow(neg_wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
plt.title("Negative Word Cloud") |
|
plt.tight_layout() |
|
plt.savefig(neg_path) |
|
plt.close() |
|
|
|
return pos_path, neg_path |
|
|
|
|
|
def analyze_sentiment_flair(file, text_column): |
|
try: |
|
df = pd.read_csv(file.name) |
|
except Exception as e: |
|
return f"Error loading file: {e}", None, None, None, None, None |
|
|
|
if text_column not in df.columns: |
|
return "Selected text column not found.", None, None, None, None, None |
|
|
|
df["clean_text"] = df[text_column].apply(clean_text_for_flair) |
|
|
|
sentiments = [] |
|
scores = [] |
|
|
|
for text in df["clean_text"]: |
|
sentence = Sentence(text) |
|
classifier.predict(sentence) |
|
label = sentence.labels[0].value |
|
score = sentence.labels[0].score |
|
sentiments.append(label) |
|
scores.append(score) |
|
|
|
df["sentiment"] = sentiments |
|
df["confidence"] = scores |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp: |
|
df.to_csv(tmp.name, index=False) |
|
csv_path = tmp.name |
|
|
|
|
|
plt.figure(figsize=(6, 4)) |
|
sns.countplot(data=df, x="sentiment", palette="pastel") |
|
plt.title("Sentiment Distribution") |
|
plt.tight_layout() |
|
sentiment_plot_path = "sentiment_flair_plot.png" |
|
plt.savefig(sentiment_plot_path) |
|
plt.close() |
|
|
|
|
|
plt.figure(figsize=(6, 4)) |
|
sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue") |
|
plt.title("Confidence Score Distribution") |
|
plt.tight_layout() |
|
confidence_plot_path = "confidence_flair_plot.png" |
|
plt.savefig(confidence_plot_path) |
|
plt.close() |
|
|
|
|
|
pos_wc_path, neg_wc_path = generate_wordclouds(df) |
|
|
|
return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds") |
|
gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File(label="Upload CSV", file_types=[".csv"]) |
|
col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True) |
|
|
|
def get_text_columns(file): |
|
try: |
|
df = pd.read_csv(file.name) |
|
text_cols = df.select_dtypes(include='object').columns.tolist() |
|
if not text_cols: |
|
return gr.update(choices=[], label="⚠️ No text columns found") |
|
return gr.update(choices=text_cols, value=text_cols[0]) |
|
except: |
|
return gr.update(choices=[], value=None) |
|
|
|
file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown) |
|
|
|
analyze_btn = gr.Button("Run Sentiment Analysis") |
|
|
|
output_text = gr.Textbox(label="Status") |
|
file_output = gr.File(label="Download Results CSV") |
|
sentiment_plot = gr.Image(label="Sentiment Distribution") |
|
confidence_plot = gr.Image(label="Confidence Score Distribution") |
|
wordcloud_pos = gr.Image(label="Positive Word Cloud") |
|
wordcloud_neg = gr.Image(label="Negative Word Cloud") |
|
|
|
analyze_btn.click( |
|
analyze_sentiment_flair, |
|
inputs=[file_input, col_dropdown], |
|
outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg] |
|
) |
|
|
|
app.launch(share=True, debug=True) |
|
|