import nltk nltk.download('punkt') import textblob.download_corpora textblob.download_corpora.download_all() import pandas as pd import re from textblob import TextBlob import gradio as gr import matplotlib.pyplot as plt import seaborn as sns import tempfile from wordcloud import WordCloud # Text cleaning function def clean_text(text): if pd.isnull(text): return "" text = str(text) text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE) text = re.sub(r"\@w+|\#", '', text) text = re.sub(r"[^A-Za-z0-9\s]+", '', text) text = text.lower() return text.strip() # Sentiment classification using thresholds def get_sentiment_label(polarity, pos_thresh, neg_thresh): if polarity >= pos_thresh: return "Positive" elif polarity <= neg_thresh: return "Negative" else: return "Neutral" # Generate word cloud def generate_wordcloud(text_series, title): text = " ".join(text_series.dropna()) if not text.strip(): fig = plt.figure(figsize=(6, 4)) plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center') plt.axis("off") plt.title(title) plt.tight_layout() return fig wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text) fig = plt.figure(figsize=(6, 4)) plt.imshow(wc, interpolation="bilinear") plt.axis("off") plt.title(title) plt.tight_layout() return fig # Main processing function def analyze_sentiment(file, text_column, pos_thresh, neg_thresh): try: df = pd.read_csv(file) except Exception as e: return f"❌ Error reading CSV file: {e}", None, None, None, None if text_column not in df.columns: return "⚠️ Selected column not found in the uploaded file.", None, None, None, None df["clean_text"] = df[text_column].apply(clean_text) df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity) df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity) df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh)) # Plot sentiment distribution fig1 = plt.figure(figsize=(6, 4)) sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False) plt.title("Sentiment Label Distribution") plt.tight_layout() # Plot polarity distribution fig2 = plt.figure(figsize=(6, 4)) sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue") plt.title("Polarity Score Distribution") plt.tight_layout() # Preview table preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10) # Word Clouds per sentiment pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud") neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud") neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud") # Save full results output_file_path = "TextBlob_sentiment_results.csv" df.to_csv(output_file_path, index=False) return ( f"✅ Sentiment analysis complete. Processed {len(df)} rows.", preview_df, fig1, fig2, output_file_path, pos_wc, neg_wc, neu_wc ) # Dropdown update function def get_text_columns(file): df = pd.read_csv(file) text_columns = df.select_dtypes(include='object').columns.tolist() if not text_columns: return gr.update(choices=[], value=None, label="⚠️ No text columns found!") return gr.update(choices=text_columns, value=text_columns[0]) # Word cloud function def generate_wordcloud(text_series, title): text = " ".join(text_series.dropna()) wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text) fig = plt.figure(figsize=(6, 4)) plt.imshow(wc, interpolation="bilinear") plt.axis("off") plt.title(title) plt.tight_layout() return fig # Gradio Interface with gr.Blocks() as app: gr.Markdown("## 📝 Sentiment Analysis with TextBlob") gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.") gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app") with gr.Row(): file_input = gr.File(label="Upload CSV File") column_dropdown = gr.Dropdown(label="Select Text Column", choices=[]) file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown) with gr.Row(): pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold") neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold") analyze_button = gr.Button("Run Sentiment Analysis") status_box = gr.Textbox(label="Status", interactive=False) data_output = gr.Dataframe(label="Sample results") plot1 = gr.Plot(label="Sentiment Label Distribution") plot2 = gr.Plot(label="Polarity Distribution") pos_wordcloud = gr.Plot(label="Positive Word Cloud") neg_wordcloud = gr.Plot(label="Negative Word Cloud") neu_wordcloud = gr.Plot(label="Neutral Word Cloud") csv_download = gr.File(label="Download Full Results") analyze_button.click( fn=analyze_sentiment, inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider], outputs=[ status_box, data_output, plot1, plot2, csv_download, pos_wordcloud, neg_wordcloud, neu_wordcloud ] ) app.launch(share=True, debug=True)