import pandas as pd import re import tempfile import gradio as gr import matplotlib.pyplot as plt import seaborn as sns from wordcloud import WordCloud from flair.models import TextClassifier from flair.data import Sentence from flair.nn import Classifier # Load FLAIR sentiment model classifier = Classifier.load("sentiment") # Minimal text cleaning def clean_text_for_flair(text): if pd.isnull(text): return "" text = re.sub(r"http\S+|www\S+", "", text) # remove URLs text = re.sub(r"<.*?>", "", text) # remove HTML text = re.sub(r"\s+", " ", text).strip() # normalize whitespace return text # Generate word clouds def generate_wordclouds(df): if "sentiment" not in df.columns or "clean_text" not in df.columns: return None, None positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str)) negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str)) pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text) neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text) pos_path = "positive_wordcloud.png" plt.figure(figsize=(10, 5)) plt.imshow(pos_wordcloud, interpolation='bilinear') plt.axis("off") plt.title("Positive Word Cloud") plt.tight_layout() plt.savefig(pos_path) plt.close() neg_path = "negative_wordcloud.png" plt.figure(figsize=(10, 5)) plt.imshow(neg_wordcloud, interpolation='bilinear') plt.axis("off") plt.title("Negative Word Cloud") plt.tight_layout() plt.savefig(neg_path) plt.close() return pos_path, neg_path # Main analysis function def analyze_sentiment_flair(file, text_column): try: df = pd.read_csv(file.name) except Exception as e: return f"Error loading file: {e}", None, None, None, None, None if text_column not in df.columns: return "Selected text column not found.", None, None, None, None, None df["clean_text"] = df[text_column].apply(clean_text_for_flair) sentiments = [] scores = [] for text in df["clean_text"]: sentence = Sentence(text) classifier.predict(sentence) label = sentence.labels[0].value score = sentence.labels[0].score sentiments.append(label) scores.append(score) df["sentiment"] = sentiments df["confidence"] = scores # Save results with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp: df.to_csv(tmp.name, index=False) csv_path = tmp.name # Sentiment count plot plt.figure(figsize=(6, 4)) sns.countplot(data=df, x="sentiment", palette="pastel") plt.title("Sentiment Distribution") plt.tight_layout() sentiment_plot_path = "sentiment_flair_plot.png" plt.savefig(sentiment_plot_path) plt.close() # Confidence score distribution plt.figure(figsize=(6, 4)) sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue") plt.title("Confidence Score Distribution") plt.tight_layout() confidence_plot_path = "confidence_flair_plot.png" plt.savefig(confidence_plot_path) plt.close() # Word clouds pos_wc_path, neg_wc_path = generate_wordclouds(df) return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path # Gradio interface with gr.Blocks() as app: gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds") gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.") with gr.Row(): file_input = gr.File(label="Upload CSV", file_types=[".csv"]) col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True) def get_text_columns(file): try: df = pd.read_csv(file.name) text_cols = df.select_dtypes(include='object').columns.tolist() if not text_cols: return gr.update(choices=[], label="⚠️ No text columns found") return gr.update(choices=text_cols, value=text_cols[0]) except: return gr.update(choices=[], value=None) file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown) analyze_btn = gr.Button("Run Sentiment Analysis") output_text = gr.Textbox(label="Status") file_output = gr.File(label="Download Results CSV") sentiment_plot = gr.Image(label="Sentiment Distribution") confidence_plot = gr.Image(label="Confidence Score Distribution") wordcloud_pos = gr.Image(label="Positive Word Cloud") wordcloud_neg = gr.Image(label="Negative Word Cloud") analyze_btn.click( analyze_sentiment_flair, inputs=[file_input, col_dropdown], outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg] ) app.launch(share=True, debug=True)