Spaces:

pvaluedotone
/

flair_sentiment_analysis

Sleeping

App Files Files Community

pvaluedotone commited on May 22

Commit

ed5c736

verified ·

1 Parent(s): ed3f812

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -144

app.py CHANGED Viewed

@@ -1,144 +1,145 @@
-import pandas as pd
-import re
-import tempfile
-import gradio as gr
-import matplotlib.pyplot as plt
-import seaborn as sns
-from wordcloud import WordCloud
-from flair.models import TextClassifier
-from flair.data import Sentence
-# Load FLAIR sentiment model
-classifier = TextClassifier.load("sentiment")
-# Minimal text cleaning
-def clean_text_for_flair(text):
-    if pd.isnull(text):
-        return ""
-    text = re.sub(r"http\S+|www\S+", "", text)  # remove URLs
-    text = re.sub(r"<.*?>", "", text)  # remove HTML
-    text = re.sub(r"\s+", " ", text).strip()  # normalize whitespace
-    return text
-# Generate word clouds
-def generate_wordclouds(df):
-    if "sentiment" not in df.columns or "clean_text" not in df.columns:
-        return None, None
-    positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
-    negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
-    pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
-    neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
-    pos_path = "positive_wordcloud.png"
-    plt.figure(figsize=(10, 5))
-    plt.imshow(pos_wordcloud, interpolation='bilinear')
-    plt.axis("off")
-    plt.title("Positive Word Cloud")
-    plt.tight_layout()
-    plt.savefig(pos_path)
-    plt.close()
-    neg_path = "negative_wordcloud.png"
-    plt.figure(figsize=(10, 5))
-    plt.imshow(neg_wordcloud, interpolation='bilinear')
-    plt.axis("off")
-    plt.title("Negative Word Cloud")
-    plt.tight_layout()
-    plt.savefig(neg_path)
-    plt.close()
-    return pos_path, neg_path
-# Main analysis function
-def analyze_sentiment_flair(file, text_column):
-    try:
-        df = pd.read_csv(file.name)
-    except Exception as e:
-        return f"Error loading file: {e}", None, None, None, None, None
-    if text_column not in df.columns:
-        return "Selected text column not found.", None, None, None, None, None
-    df["clean_text"] = df[text_column].apply(clean_text_for_flair)
-    sentiments = []
-    scores = []
-    for text in df["clean_text"]:
-        sentence = Sentence(text)
-        classifier.predict(sentence)
-        label = sentence.labels[0].value
-        score = sentence.labels[0].score
-        sentiments.append(label)
-        scores.append(score)
-    df["sentiment"] = sentiments
-    df["confidence"] = scores
-    # Save results
-    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
-        df.to_csv(tmp.name, index=False)
-        csv_path = tmp.name
-    # Sentiment count plot
-    plt.figure(figsize=(6, 4))
-    sns.countplot(data=df, x="sentiment", palette="pastel")
-    plt.title("Sentiment Distribution")
-    plt.tight_layout()
-    sentiment_plot_path = "sentiment_flair_plot.png"
-    plt.savefig(sentiment_plot_path)
-    plt.close()
-    # Confidence score distribution
-    plt.figure(figsize=(6, 4))
-    sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
-    plt.title("Confidence Score Distribution")
-    plt.tight_layout()
-    confidence_plot_path = "confidence_flair_plot.png"
-    plt.savefig(confidence_plot_path)
-    plt.close()
-    # Word clouds
-    pos_wc_path, neg_wc_path = generate_wordclouds(df)
-    return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
-# Gradio interface
-with gr.Blocks() as app:
-    gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
-    gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
-    with gr.Row():
-        file_input = gr.File(label="Upload CSV", file_types=[".csv"])
-        col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
-    def get_text_columns(file):
-        try:
-            df = pd.read_csv(file.name)
-            text_cols = df.select_dtypes(include='object').columns.tolist()
-            if not text_cols:
-                return gr.update(choices=[], label="⚠️ No text columns found")
-            return gr.update(choices=text_cols, value=text_cols[0])
-        except:
-            return gr.update(choices=[], value=None)
-    file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
-    analyze_btn = gr.Button("Run Sentiment Analysis")
-    output_text = gr.Textbox(label="Status")
-    file_output = gr.File(label="Download Results CSV")
-    sentiment_plot = gr.Image(label="Sentiment Distribution")
-    confidence_plot = gr.Image(label="Confidence Score Distribution")
-    wordcloud_pos = gr.Image(label="Positive Word Cloud")
-    wordcloud_neg = gr.Image(label="Negative Word Cloud")
-    analyze_btn.click(
-        analyze_sentiment_flair,
-        inputs=[file_input, col_dropdown],
-        outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
-    )
-app.launch(share=True, debug=True)

+import pandas as pd
+import re
+import tempfile
+import gradio as gr
+import matplotlib.pyplot as plt
+import seaborn as sns
+from wordcloud import WordCloud
+from flair.models import TextClassifier
+from flair.data import Sentence
+from flair.nn import Classifier
+# Load FLAIR sentiment model
+classifier = Classifier.load("sentiment")
+# Minimal text cleaning
+def clean_text_for_flair(text):
+    if pd.isnull(text):
+        return ""
+    text = re.sub(r"http\S+|www\S+", "", text)  # remove URLs
+    text = re.sub(r"<.*?>", "", text)  # remove HTML
+    text = re.sub(r"\s+", " ", text).strip()  # normalize whitespace
+    return text
+# Generate word clouds
+def generate_wordclouds(df):
+    if "sentiment" not in df.columns or "clean_text" not in df.columns:
+        return None, None
+    positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
+    negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
+    pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
+    neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
+    pos_path = "positive_wordcloud.png"
+    plt.figure(figsize=(10, 5))
+    plt.imshow(pos_wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    plt.title("Positive Word Cloud")
+    plt.tight_layout()
+    plt.savefig(pos_path)
+    plt.close()
+    neg_path = "negative_wordcloud.png"
+    plt.figure(figsize=(10, 5))
+    plt.imshow(neg_wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    plt.title("Negative Word Cloud")
+    plt.tight_layout()
+    plt.savefig(neg_path)
+    plt.close()
+    return pos_path, neg_path
+# Main analysis function
+def analyze_sentiment_flair(file, text_column):
+    try:
+        df = pd.read_csv(file.name)
+    except Exception as e:
+        return f"Error loading file: {e}", None, None, None, None, None
+    if text_column not in df.columns:
+        return "Selected text column not found.", None, None, None, None, None
+    df["clean_text"] = df[text_column].apply(clean_text_for_flair)
+    sentiments = []
+    scores = []
+    for text in df["clean_text"]:
+        sentence = Sentence(text)
+        classifier.predict(sentence)
+        label = sentence.labels[0].value
+        score = sentence.labels[0].score
+        sentiments.append(label)
+        scores.append(score)
+    df["sentiment"] = sentiments
+    df["confidence"] = scores
+    # Save results
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
+        df.to_csv(tmp.name, index=False)
+        csv_path = tmp.name
+    # Sentiment count plot
+    plt.figure(figsize=(6, 4))
+    sns.countplot(data=df, x="sentiment", palette="pastel")
+    plt.title("Sentiment Distribution")
+    plt.tight_layout()
+    sentiment_plot_path = "sentiment_flair_plot.png"
+    plt.savefig(sentiment_plot_path)
+    plt.close()
+    # Confidence score distribution
+    plt.figure(figsize=(6, 4))
+    sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
+    plt.title("Confidence Score Distribution")
+    plt.tight_layout()
+    confidence_plot_path = "confidence_flair_plot.png"
+    plt.savefig(confidence_plot_path)
+    plt.close()
+    # Word clouds
+    pos_wc_path, neg_wc_path = generate_wordclouds(df)
+    return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
+# Gradio interface
+with gr.Blocks() as app:
+    gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
+    gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
+    with gr.Row():
+        file_input = gr.File(label="Upload CSV", file_types=[".csv"])
+        col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
+    def get_text_columns(file):
+        try:
+            df = pd.read_csv(file.name)
+            text_cols = df.select_dtypes(include='object').columns.tolist()
+            if not text_cols:
+                return gr.update(choices=[], label="⚠️ No text columns found")
+            return gr.update(choices=text_cols, value=text_cols[0])
+        except:
+            return gr.update(choices=[], value=None)
+    file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
+    analyze_btn = gr.Button("Run Sentiment Analysis")
+    output_text = gr.Textbox(label="Status")
+    file_output = gr.File(label="Download Results CSV")
+    sentiment_plot = gr.Image(label="Sentiment Distribution")
+    confidence_plot = gr.Image(label="Confidence Score Distribution")
+    wordcloud_pos = gr.Image(label="Positive Word Cloud")
+    wordcloud_neg = gr.Image(label="Negative Word Cloud")
+    analyze_btn.click(
+        analyze_sentiment_flair,
+        inputs=[file_input, col_dropdown],
+        outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
+    )
+app.launch(share=True, debug=True)