pvaluedotone commited on
Commit
ed5c736
·
verified ·
1 Parent(s): ed3f812

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -144
app.py CHANGED
@@ -1,144 +1,145 @@
1
- import pandas as pd
2
- import re
3
- import tempfile
4
- import gradio as gr
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
7
- from wordcloud import WordCloud
8
- from flair.models import TextClassifier
9
- from flair.data import Sentence
10
-
11
- # Load FLAIR sentiment model
12
- classifier = TextClassifier.load("sentiment")
13
-
14
- # Minimal text cleaning
15
- def clean_text_for_flair(text):
16
- if pd.isnull(text):
17
- return ""
18
- text = re.sub(r"http\S+|www\S+", "", text) # remove URLs
19
- text = re.sub(r"<.*?>", "", text) # remove HTML
20
- text = re.sub(r"\s+", " ", text).strip() # normalize whitespace
21
- return text
22
-
23
- # Generate word clouds
24
- def generate_wordclouds(df):
25
- if "sentiment" not in df.columns or "clean_text" not in df.columns:
26
- return None, None
27
-
28
- positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
29
- negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
30
-
31
- pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
32
- neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
33
-
34
- pos_path = "positive_wordcloud.png"
35
- plt.figure(figsize=(10, 5))
36
- plt.imshow(pos_wordcloud, interpolation='bilinear')
37
- plt.axis("off")
38
- plt.title("Positive Word Cloud")
39
- plt.tight_layout()
40
- plt.savefig(pos_path)
41
- plt.close()
42
-
43
- neg_path = "negative_wordcloud.png"
44
- plt.figure(figsize=(10, 5))
45
- plt.imshow(neg_wordcloud, interpolation='bilinear')
46
- plt.axis("off")
47
- plt.title("Negative Word Cloud")
48
- plt.tight_layout()
49
- plt.savefig(neg_path)
50
- plt.close()
51
-
52
- return pos_path, neg_path
53
-
54
- # Main analysis function
55
- def analyze_sentiment_flair(file, text_column):
56
- try:
57
- df = pd.read_csv(file.name)
58
- except Exception as e:
59
- return f"Error loading file: {e}", None, None, None, None, None
60
-
61
- if text_column not in df.columns:
62
- return "Selected text column not found.", None, None, None, None, None
63
-
64
- df["clean_text"] = df[text_column].apply(clean_text_for_flair)
65
-
66
- sentiments = []
67
- scores = []
68
-
69
- for text in df["clean_text"]:
70
- sentence = Sentence(text)
71
- classifier.predict(sentence)
72
- label = sentence.labels[0].value
73
- score = sentence.labels[0].score
74
- sentiments.append(label)
75
- scores.append(score)
76
-
77
- df["sentiment"] = sentiments
78
- df["confidence"] = scores
79
-
80
- # Save results
81
- with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
82
- df.to_csv(tmp.name, index=False)
83
- csv_path = tmp.name
84
-
85
- # Sentiment count plot
86
- plt.figure(figsize=(6, 4))
87
- sns.countplot(data=df, x="sentiment", palette="pastel")
88
- plt.title("Sentiment Distribution")
89
- plt.tight_layout()
90
- sentiment_plot_path = "sentiment_flair_plot.png"
91
- plt.savefig(sentiment_plot_path)
92
- plt.close()
93
-
94
- # Confidence score distribution
95
- plt.figure(figsize=(6, 4))
96
- sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
97
- plt.title("Confidence Score Distribution")
98
- plt.tight_layout()
99
- confidence_plot_path = "confidence_flair_plot.png"
100
- plt.savefig(confidence_plot_path)
101
- plt.close()
102
-
103
- # Word clouds
104
- pos_wc_path, neg_wc_path = generate_wordclouds(df)
105
-
106
- return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
107
-
108
- # Gradio interface
109
- with gr.Blocks() as app:
110
- gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
111
- gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
112
-
113
- with gr.Row():
114
- file_input = gr.File(label="Upload CSV", file_types=[".csv"])
115
- col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
116
-
117
- def get_text_columns(file):
118
- try:
119
- df = pd.read_csv(file.name)
120
- text_cols = df.select_dtypes(include='object').columns.tolist()
121
- if not text_cols:
122
- return gr.update(choices=[], label="⚠️ No text columns found")
123
- return gr.update(choices=text_cols, value=text_cols[0])
124
- except:
125
- return gr.update(choices=[], value=None)
126
-
127
- file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
128
-
129
- analyze_btn = gr.Button("Run Sentiment Analysis")
130
-
131
- output_text = gr.Textbox(label="Status")
132
- file_output = gr.File(label="Download Results CSV")
133
- sentiment_plot = gr.Image(label="Sentiment Distribution")
134
- confidence_plot = gr.Image(label="Confidence Score Distribution")
135
- wordcloud_pos = gr.Image(label="Positive Word Cloud")
136
- wordcloud_neg = gr.Image(label="Negative Word Cloud")
137
-
138
- analyze_btn.click(
139
- analyze_sentiment_flair,
140
- inputs=[file_input, col_dropdown],
141
- outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
142
- )
143
-
144
- app.launch(share=True, debug=True)
 
 
1
+ import pandas as pd
2
+ import re
3
+ import tempfile
4
+ import gradio as gr
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from wordcloud import WordCloud
8
+ from flair.models import TextClassifier
9
+ from flair.data import Sentence
10
+ from flair.nn import Classifier
11
+
12
+ # Load FLAIR sentiment model
13
+ classifier = Classifier.load("sentiment")
14
+
15
+ # Minimal text cleaning
16
+ def clean_text_for_flair(text):
17
+ if pd.isnull(text):
18
+ return ""
19
+ text = re.sub(r"http\S+|www\S+", "", text) # remove URLs
20
+ text = re.sub(r"<.*?>", "", text) # remove HTML
21
+ text = re.sub(r"\s+", " ", text).strip() # normalize whitespace
22
+ return text
23
+
24
+ # Generate word clouds
25
+ def generate_wordclouds(df):
26
+ if "sentiment" not in df.columns or "clean_text" not in df.columns:
27
+ return None, None
28
+
29
+ positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
30
+ negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
31
+
32
+ pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
33
+ neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
34
+
35
+ pos_path = "positive_wordcloud.png"
36
+ plt.figure(figsize=(10, 5))
37
+ plt.imshow(pos_wordcloud, interpolation='bilinear')
38
+ plt.axis("off")
39
+ plt.title("Positive Word Cloud")
40
+ plt.tight_layout()
41
+ plt.savefig(pos_path)
42
+ plt.close()
43
+
44
+ neg_path = "negative_wordcloud.png"
45
+ plt.figure(figsize=(10, 5))
46
+ plt.imshow(neg_wordcloud, interpolation='bilinear')
47
+ plt.axis("off")
48
+ plt.title("Negative Word Cloud")
49
+ plt.tight_layout()
50
+ plt.savefig(neg_path)
51
+ plt.close()
52
+
53
+ return pos_path, neg_path
54
+
55
+ # Main analysis function
56
+ def analyze_sentiment_flair(file, text_column):
57
+ try:
58
+ df = pd.read_csv(file.name)
59
+ except Exception as e:
60
+ return f"Error loading file: {e}", None, None, None, None, None
61
+
62
+ if text_column not in df.columns:
63
+ return "Selected text column not found.", None, None, None, None, None
64
+
65
+ df["clean_text"] = df[text_column].apply(clean_text_for_flair)
66
+
67
+ sentiments = []
68
+ scores = []
69
+
70
+ for text in df["clean_text"]:
71
+ sentence = Sentence(text)
72
+ classifier.predict(sentence)
73
+ label = sentence.labels[0].value
74
+ score = sentence.labels[0].score
75
+ sentiments.append(label)
76
+ scores.append(score)
77
+
78
+ df["sentiment"] = sentiments
79
+ df["confidence"] = scores
80
+
81
+ # Save results
82
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
83
+ df.to_csv(tmp.name, index=False)
84
+ csv_path = tmp.name
85
+
86
+ # Sentiment count plot
87
+ plt.figure(figsize=(6, 4))
88
+ sns.countplot(data=df, x="sentiment", palette="pastel")
89
+ plt.title("Sentiment Distribution")
90
+ plt.tight_layout()
91
+ sentiment_plot_path = "sentiment_flair_plot.png"
92
+ plt.savefig(sentiment_plot_path)
93
+ plt.close()
94
+
95
+ # Confidence score distribution
96
+ plt.figure(figsize=(6, 4))
97
+ sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
98
+ plt.title("Confidence Score Distribution")
99
+ plt.tight_layout()
100
+ confidence_plot_path = "confidence_flair_plot.png"
101
+ plt.savefig(confidence_plot_path)
102
+ plt.close()
103
+
104
+ # Word clouds
105
+ pos_wc_path, neg_wc_path = generate_wordclouds(df)
106
+
107
+ return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
108
+
109
+ # Gradio interface
110
+ with gr.Blocks() as app:
111
+ gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
112
+ gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
113
+
114
+ with gr.Row():
115
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
116
+ col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
117
+
118
+ def get_text_columns(file):
119
+ try:
120
+ df = pd.read_csv(file.name)
121
+ text_cols = df.select_dtypes(include='object').columns.tolist()
122
+ if not text_cols:
123
+ return gr.update(choices=[], label="⚠️ No text columns found")
124
+ return gr.update(choices=text_cols, value=text_cols[0])
125
+ except:
126
+ return gr.update(choices=[], value=None)
127
+
128
+ file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
129
+
130
+ analyze_btn = gr.Button("Run Sentiment Analysis")
131
+
132
+ output_text = gr.Textbox(label="Status")
133
+ file_output = gr.File(label="Download Results CSV")
134
+ sentiment_plot = gr.Image(label="Sentiment Distribution")
135
+ confidence_plot = gr.Image(label="Confidence Score Distribution")
136
+ wordcloud_pos = gr.Image(label="Positive Word Cloud")
137
+ wordcloud_neg = gr.Image(label="Negative Word Cloud")
138
+
139
+ analyze_btn.click(
140
+ analyze_sentiment_flair,
141
+ inputs=[file_input, col_dropdown],
142
+ outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
143
+ )
144
+
145
+ app.launch(share=True, debug=True)