pvaluedotone's picture
Update app.py
ed5c736 verified
import pandas as pd
import re
import tempfile
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from flair.models import TextClassifier
from flair.data import Sentence
from flair.nn import Classifier
# Load FLAIR sentiment model
classifier = Classifier.load("sentiment")
# Minimal text cleaning
def clean_text_for_flair(text):
if pd.isnull(text):
return ""
text = re.sub(r"http\S+|www\S+", "", text) # remove URLs
text = re.sub(r"<.*?>", "", text) # remove HTML
text = re.sub(r"\s+", " ", text).strip() # normalize whitespace
return text
# Generate word clouds
def generate_wordclouds(df):
if "sentiment" not in df.columns or "clean_text" not in df.columns:
return None, None
positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
pos_path = "positive_wordcloud.png"
plt.figure(figsize=(10, 5))
plt.imshow(pos_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Positive Word Cloud")
plt.tight_layout()
plt.savefig(pos_path)
plt.close()
neg_path = "negative_wordcloud.png"
plt.figure(figsize=(10, 5))
plt.imshow(neg_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Negative Word Cloud")
plt.tight_layout()
plt.savefig(neg_path)
plt.close()
return pos_path, neg_path
# Main analysis function
def analyze_sentiment_flair(file, text_column):
try:
df = pd.read_csv(file.name)
except Exception as e:
return f"Error loading file: {e}", None, None, None, None, None
if text_column not in df.columns:
return "Selected text column not found.", None, None, None, None, None
df["clean_text"] = df[text_column].apply(clean_text_for_flair)
sentiments = []
scores = []
for text in df["clean_text"]:
sentence = Sentence(text)
classifier.predict(sentence)
label = sentence.labels[0].value
score = sentence.labels[0].score
sentiments.append(label)
scores.append(score)
df["sentiment"] = sentiments
df["confidence"] = scores
# Save results
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
df.to_csv(tmp.name, index=False)
csv_path = tmp.name
# Sentiment count plot
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="sentiment", palette="pastel")
plt.title("Sentiment Distribution")
plt.tight_layout()
sentiment_plot_path = "sentiment_flair_plot.png"
plt.savefig(sentiment_plot_path)
plt.close()
# Confidence score distribution
plt.figure(figsize=(6, 4))
sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
plt.title("Confidence Score Distribution")
plt.tight_layout()
confidence_plot_path = "confidence_flair_plot.png"
plt.savefig(confidence_plot_path)
plt.close()
# Word clouds
pos_wc_path, neg_wc_path = generate_wordclouds(df)
return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
with gr.Row():
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
def get_text_columns(file):
try:
df = pd.read_csv(file.name)
text_cols = df.select_dtypes(include='object').columns.tolist()
if not text_cols:
return gr.update(choices=[], label="⚠️ No text columns found")
return gr.update(choices=text_cols, value=text_cols[0])
except:
return gr.update(choices=[], value=None)
file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
analyze_btn = gr.Button("Run Sentiment Analysis")
output_text = gr.Textbox(label="Status")
file_output = gr.File(label="Download Results CSV")
sentiment_plot = gr.Image(label="Sentiment Distribution")
confidence_plot = gr.Image(label="Confidence Score Distribution")
wordcloud_pos = gr.Image(label="Positive Word Cloud")
wordcloud_neg = gr.Image(label="Negative Word Cloud")
analyze_btn.click(
analyze_sentiment_flair,
inputs=[file_input, col_dropdown],
outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
)
app.launch(share=True, debug=True)