pvaluedotone commited on
Commit
afbce0d
Β·
verified Β·
1 Parent(s): 9b7dc3c

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +34 -12
  2. app.py +102 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
- ---
2
- title: VADER Sentiment Analysis
3
- emoji: πŸ‘€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.30.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ metadata
2
+ title: VADER Sentiment Analysis
3
+ emoji: πŸ‘€
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.30.0
8
+ app_file: app.py
9
+ pinned: false
10
+
11
+ # VADER Sentiment Analyzer - Hugging Face Space
12
+
13
+ This is a simple web app built with [Gradio](https://gradio.app) for performing sentiment analysis using VADER.
14
+
15
+ ## Features
16
+
17
+ - Upload a CSV file.
18
+ - Select the column that contains text data.
19
+ - Automatically clean the text and compute VADER sentiment scores.
20
+ - Assign sentiment labels: Positive, Neutral, or Negative.
21
+ - Download the processed CSV file.
22
+
23
+ ## How to Use
24
+
25
+ 1. Upload your CSV file (e.g., `reviews.csv`).
26
+ 2. Click "Load Columns" to populate the dropdown with available columns.
27
+ 3. Select the column that contains the text (e.g., `ReviewBody`).
28
+ 4. Click "Run Sentiment Analysis".
29
+ 5. Download the processed file with the sentiment labels.
30
+
31
+ ## Output File
32
+
33
+ The processed file will be saved with the filename:
34
+
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import re
3
+ import nltk
4
+ import gradio as gr
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from nltk.sentiment import SentimentIntensityAnalyzer
8
+
9
+ nltk.download("vader_lexicon")
10
+ sia = SentimentIntensityAnalyzer()
11
+
12
+ def clean_text(text):
13
+ if not isinstance(text, str):
14
+ return ""
15
+ text = re.sub(r"http\S+", "", text)
16
+ text = re.sub(r"@\w+|#\w+", "", text)
17
+ text = re.sub(r"[^\w\s]", "", text)
18
+ text = text.lower().strip()
19
+ return text
20
+
21
+ def get_sentiment_label(score, pos_thresh, neg_thresh):
22
+ if score >= pos_thresh:
23
+ return "Positive"
24
+ elif score <= neg_thresh:
25
+ return "Negative"
26
+ else:
27
+ return "Neutral"
28
+
29
+ def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
30
+ try:
31
+ df = pd.read_csv(file.name)
32
+ except Exception as e:
33
+ return f"Error reading CSV file: {e}", None, None, None
34
+
35
+ if text_column not in df.columns:
36
+ return "Error: Selected column not found in the uploaded file.", None, None, None
37
+
38
+ df["clean_text"] = df[text_column].apply(clean_text)
39
+ df["compound"] = df["clean_text"].apply(lambda x: sia.polarity_scores(x)["compound"])
40
+ df["sentiment"] = df["compound"].apply(lambda score: get_sentiment_label(score, pos_thresh, neg_thresh))
41
+
42
+ # Save CSV
43
+ output_file = "VADER_sentiment_results.csv"
44
+ df.to_csv(output_file, index=False)
45
+
46
+ # Plot 1: Sentiment distribution
47
+ plt.figure(figsize=(6, 4))
48
+ sns.countplot(data=df, x="sentiment", palette="Set2")
49
+ plt.title("Sentiment Distribution")
50
+ plt.tight_layout()
51
+ sentiment_fig = "sentiment_dist.png"
52
+ plt.savefig(sentiment_fig)
53
+ plt.close()
54
+
55
+ # Plot 2: Compound score histogram
56
+ plt.figure(figsize=(6, 4))
57
+ sns.histplot(df["compound"], bins=30, kde=True, color="purple")
58
+ plt.title("Compound Score Distribution")
59
+ plt.xlabel("Compound Score")
60
+ plt.tight_layout()
61
+ compound_fig = "compound_dist.png"
62
+ plt.savefig(compound_fig)
63
+ plt.close()
64
+
65
+ return f"Sentiment analysis complete. Processed {len(df)} rows.", output_file, sentiment_fig, compound_fig
66
+
67
+ def get_columns(file):
68
+ try:
69
+ df = pd.read_csv(file.name, nrows=1)
70
+ return list(df.columns)
71
+ except Exception:
72
+ return []
73
+
74
+ with gr.Blocks() as demo:
75
+ gr.Markdown("## VADER Sentiment Analyzer with Custom Thresholds and Visualizations")
76
+ gr.Markdown("Upload a CSV, choose a text column, adjust thresholds, and view/download results.")
77
+
78
+ with gr.Row():
79
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
80
+ col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
81
+
82
+ load_columns_btn = gr.Button("Load Columns")
83
+ load_columns_btn.click(fn=get_columns, inputs=file_input, outputs=col_dropdown)
84
+
85
+ with gr.Row():
86
+ pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01, label="Positive Threshold")
87
+ neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.05, step=0.01, label="Negative Threshold")
88
+
89
+ analyze_btn = gr.Button("Run Sentiment Analysis")
90
+
91
+ output_text = gr.Textbox(label="Status")
92
+ file_output = gr.File(label="Download Processed CSV")
93
+ sentiment_plot = gr.Image(label="Sentiment Distribution")
94
+ compound_plot = gr.Image(label="Compound Score Distribution")
95
+
96
+ analyze_btn.click(
97
+ fn=analyze_sentiment,
98
+ inputs=[file_input, col_dropdown, pos_thresh_slider, neg_thresh_slider],
99
+ outputs=[output_text, file_output, sentiment_plot, compound_plot]
100
+ )
101
+
102
+ demo.launch(share=True, debug=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ nltk
4
+ matplotlib
5
+ seaborn