pvaluedotone's picture
Update app.py
fd0f5ba verified
import nltk
nltk.download('punkt')
import textblob.download_corpora
textblob.download_corpora.download_all()
import pandas as pd
import re
from textblob import TextBlob
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import tempfile
from wordcloud import WordCloud
# Text cleaning function
def clean_text(text):
if pd.isnull(text):
return ""
text = str(text)
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
text = re.sub(r"\@w+|\#", '', text)
text = re.sub(r"[^A-Za-z0-9\s]+", '', text)
text = text.lower()
return text.strip()
# Sentiment classification using thresholds
def get_sentiment_label(polarity, pos_thresh, neg_thresh):
if polarity >= pos_thresh:
return "Positive"
elif polarity <= neg_thresh:
return "Negative"
else:
return "Neutral"
# Generate word cloud
def generate_wordcloud(text_series, title):
text = " ".join(text_series.dropna())
if not text.strip():
fig = plt.figure(figsize=(6, 4))
plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center')
plt.axis("off")
plt.title(title)
plt.tight_layout()
return fig
wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
fig = plt.figure(figsize=(6, 4))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.title(title)
plt.tight_layout()
return fig
# Main processing function
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
try:
df = pd.read_csv(file)
except Exception as e:
return f"❌ Error reading CSV file: {e}", None, None, None, None
if text_column not in df.columns:
return "⚠️ Selected column not found in the uploaded file.", None, None, None, None
df["clean_text"] = df[text_column].apply(clean_text)
df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity)
df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh))
# Plot sentiment distribution
fig1 = plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False)
plt.title("Sentiment Label Distribution")
plt.tight_layout()
# Plot polarity distribution
fig2 = plt.figure(figsize=(6, 4))
sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue")
plt.title("Polarity Score Distribution")
plt.tight_layout()
# Preview table
preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10)
# Word Clouds per sentiment
pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud")
neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud")
neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud")
# Save full results
output_file_path = "TextBlob_sentiment_results.csv"
df.to_csv(output_file_path, index=False)
return (
f"βœ… Sentiment analysis complete. Processed {len(df)} rows.",
preview_df,
fig1,
fig2,
output_file_path,
pos_wc,
neg_wc,
neu_wc
)
# Dropdown update function
def get_text_columns(file):
df = pd.read_csv(file)
text_columns = df.select_dtypes(include='object').columns.tolist()
if not text_columns:
return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
return gr.update(choices=text_columns, value=text_columns[0])
# Word cloud function
def generate_wordcloud(text_series, title):
text = " ".join(text_series.dropna())
wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
fig = plt.figure(figsize=(6, 4))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.title(title)
plt.tight_layout()
return fig
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("## πŸ“ Sentiment Analysis with TextBlob")
gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.")
gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app")
with gr.Row():
file_input = gr.File(label="Upload CSV File")
column_dropdown = gr.Dropdown(label="Select Text Column", choices=[])
file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)
with gr.Row():
pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold")
neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold")
analyze_button = gr.Button("Run Sentiment Analysis")
status_box = gr.Textbox(label="Status", interactive=False)
data_output = gr.Dataframe(label="Sample results")
plot1 = gr.Plot(label="Sentiment Label Distribution")
plot2 = gr.Plot(label="Polarity Distribution")
pos_wordcloud = gr.Plot(label="Positive Word Cloud")
neg_wordcloud = gr.Plot(label="Negative Word Cloud")
neu_wordcloud = gr.Plot(label="Neutral Word Cloud")
csv_download = gr.File(label="Download Full Results")
analyze_button.click(
fn=analyze_sentiment,
inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
outputs=[
status_box,
data_output,
plot1,
plot2,
csv_download,
pos_wordcloud,
neg_wordcloud,
neu_wordcloud
]
)
app.launch(share=True, debug=True)