Spaces:

pvaluedotone
/

VADER_sentiment_analysis

Sleeping

App Files Files Community

VADER_sentiment_analysis / app.py

pvaluedotone

Update app.py

42148b2 verified about 1 month ago

raw

history blame contribute delete

4.15 kB

	import pandas as pd
	import re
	import nltk
	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	from nltk.sentiment import SentimentIntensityAnalyzer

	nltk.download("vader_lexicon")
	sia = SentimentIntensityAnalyzer()

	def clean_text(text):
	if not isinstance(text, str):
	return ""
	text = re.sub(r"http\S+", "", text)
	text = re.sub(r"@\w+\|#\w+", "", text)
	text = re.sub(r"[^\w\s]", "", text)
	text = text.lower().strip()
	return text

	def get_sentiment_label(score, pos_thresh, neg_thresh):
	if score >= pos_thresh:
	return "Positive"
	elif score <= neg_thresh:
	return "Negative"
	else:
	return "Neutral"

	def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
	try:
	df = pd.read_csv(file.name)
	except Exception as e:
	return f"Error reading CSV file: {e}", None, None, None, None

	if text_column not in df.columns:
	return "Selected column not found.", None, None, None, None

	df["clean_text"] = df[text_column].apply(clean_text)
	df["compound"] = df["clean_text"].apply(lambda x: sia.polarity_scores(x)["compound"])
	df["sentiment"] = df["compound"].apply(lambda score: get_sentiment_label(score, pos_thresh, neg_thresh))

	# Save CSV
	output_file = "VADER_sentiment_results.csv"
	df.to_csv(output_file, index=False)

	# Plot 1: Sentiment distribution
	plt.figure(figsize=(6, 4))
	sns.countplot(data=df, x="sentiment", palette="Set2")
	plt.title("Sentiment Distribution")
	plt.tight_layout()
	sentiment_fig = "sentiment_dist.png"
	plt.savefig(sentiment_fig)
	plt.close()

	# Plot 2: Compound score histogram
	plt.figure(figsize=(6, 4))
	sns.histplot(df["compound"], bins=30, kde=True, color="skyblue")
	plt.title("Compound score distribution")
	plt.xlabel("Compound score")
	plt.tight_layout()
	compound_fig = "compound_dist.png"
	plt.savefig(compound_fig)
	plt.close()

	# Sample preview
	preview = df[[text_column, "compound", "sentiment"]].head(10)

	return f"Sentiment analysis complete. Processed {len(df)} rows.", preview, output_file, sentiment_fig, compound_fig

	def get_text_columns(file):
	try:
	df = pd.read_csv(file.name, nrows=1)
	text_columns = df.select_dtypes(include='object').columns.tolist()
	if not text_columns:
	return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
	return gr.update(choices=text_columns, value=text_columns[0])
	except Exception:
	return gr.update(choices=[], value=None, label="⚠️ Error reading file")

	with gr.Blocks() as app:
	gr.Markdown("## Sentiment analysis with VADER")
	gr.Markdown("Upload a CSV, choose a text column, adjust sentiment thresholds, and run analysis.")
	gr.Markdown("Citation: Mat Roni, S. (2025). Sentiment analysis with VADER on Gradio (version 1.0) [software]. https://huggingface.co/spaces/pvaluedotone/VADER_sentiment_analysis")

	with gr.Row():
	file_input = gr.File(label="Upload CSV", file_types=[".csv"])
	column_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)

	file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

	with gr.Row():
	pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01, label="Positive Threshold")
	neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.05, step=0.01, label="Negative Threshold")

	analyze_button = gr.Button("Run Sentiment Analysis")

	status_box = gr.Textbox(label="Status")
	data_output = gr.Dataframe(label="Sample Output (Top 10)")
	file_output = gr.File(label="Download Full Results")
	sentiment_plot = gr.Image(label="Sentiment Label Distribution")
	compound_plot = gr.Image(label="Compound Score Distribution")

	analyze_button.click(
	fn=analyze_sentiment,
	inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
	outputs=[status_box, data_output, file_output, sentiment_plot, compound_plot]
	)

	app.launch(debug=True, share=True)