Spaces:
Sleeping
Sleeping
import nltk | |
nltk.download('punkt') | |
import textblob.download_corpora | |
textblob.download_corpora.download_all() | |
import pandas as pd | |
import re | |
from textblob import TextBlob | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import tempfile | |
from wordcloud import WordCloud | |
# Text cleaning function | |
def clean_text(text): | |
if pd.isnull(text): | |
return "" | |
text = str(text) | |
text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE) | |
text = re.sub(r"\@w+|\#", '', text) | |
text = re.sub(r"[^A-Za-z0-9\s]+", '', text) | |
text = text.lower() | |
return text.strip() | |
# Sentiment classification using thresholds | |
def get_sentiment_label(polarity, pos_thresh, neg_thresh): | |
if polarity >= pos_thresh: | |
return "Positive" | |
elif polarity <= neg_thresh: | |
return "Negative" | |
else: | |
return "Neutral" | |
# Generate word cloud | |
def generate_wordcloud(text_series, title): | |
text = " ".join(text_series.dropna()) | |
if not text.strip(): | |
fig = plt.figure(figsize=(6, 4)) | |
plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center') | |
plt.axis("off") | |
plt.title(title) | |
plt.tight_layout() | |
return fig | |
wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text) | |
fig = plt.figure(figsize=(6, 4)) | |
plt.imshow(wc, interpolation="bilinear") | |
plt.axis("off") | |
plt.title(title) | |
plt.tight_layout() | |
return fig | |
# Main processing function | |
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh): | |
try: | |
df = pd.read_csv(file) | |
except Exception as e: | |
return f"β Error reading CSV file: {e}", None, None, None, None | |
if text_column not in df.columns: | |
return "β οΈ Selected column not found in the uploaded file.", None, None, None, None | |
df["clean_text"] = df[text_column].apply(clean_text) | |
df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity) | |
df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity) | |
df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh)) | |
# Plot sentiment distribution | |
fig1 = plt.figure(figsize=(6, 4)) | |
sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False) | |
plt.title("Sentiment Label Distribution") | |
plt.tight_layout() | |
# Plot polarity distribution | |
fig2 = plt.figure(figsize=(6, 4)) | |
sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue") | |
plt.title("Polarity Score Distribution") | |
plt.tight_layout() | |
# Preview table | |
preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10) | |
# Word Clouds per sentiment | |
pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud") | |
neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud") | |
neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud") | |
# Save full results | |
output_file_path = "TextBlob_sentiment_results.csv" | |
df.to_csv(output_file_path, index=False) | |
return ( | |
f"β Sentiment analysis complete. Processed {len(df)} rows.", | |
preview_df, | |
fig1, | |
fig2, | |
output_file_path, | |
pos_wc, | |
neg_wc, | |
neu_wc | |
) | |
# Dropdown update function | |
def get_text_columns(file): | |
df = pd.read_csv(file) | |
text_columns = df.select_dtypes(include='object').columns.tolist() | |
if not text_columns: | |
return gr.update(choices=[], value=None, label="β οΈ No text columns found!") | |
return gr.update(choices=text_columns, value=text_columns[0]) | |
# Word cloud function | |
def generate_wordcloud(text_series, title): | |
text = " ".join(text_series.dropna()) | |
wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text) | |
fig = plt.figure(figsize=(6, 4)) | |
plt.imshow(wc, interpolation="bilinear") | |
plt.axis("off") | |
plt.title(title) | |
plt.tight_layout() | |
return fig | |
# Gradio Interface | |
with gr.Blocks() as app: | |
gr.Markdown("## π Sentiment Analysis with TextBlob") | |
gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.") | |
gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app") | |
with gr.Row(): | |
file_input = gr.File(label="Upload CSV File") | |
column_dropdown = gr.Dropdown(label="Select Text Column", choices=[]) | |
file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown) | |
with gr.Row(): | |
pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold") | |
neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold") | |
analyze_button = gr.Button("Run Sentiment Analysis") | |
status_box = gr.Textbox(label="Status", interactive=False) | |
data_output = gr.Dataframe(label="Sample results") | |
plot1 = gr.Plot(label="Sentiment Label Distribution") | |
plot2 = gr.Plot(label="Polarity Distribution") | |
pos_wordcloud = gr.Plot(label="Positive Word Cloud") | |
neg_wordcloud = gr.Plot(label="Negative Word Cloud") | |
neu_wordcloud = gr.Plot(label="Neutral Word Cloud") | |
csv_download = gr.File(label="Download Full Results") | |
analyze_button.click( | |
fn=analyze_sentiment, | |
inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider], | |
outputs=[ | |
status_box, | |
data_output, | |
plot1, | |
plot2, | |
csv_download, | |
pos_wordcloud, | |
neg_wordcloud, | |
neu_wordcloud | |
] | |
) | |
app.launch(share=True, debug=True) | |