Spaces:

pvaluedotone
/

textblob-sentiment-app

Sleeping

App Files Files Community

textblob-sentiment-app / app.py

pvaluedotone

Update app.py

fd0f5ba verified about 1 month ago

raw

history blame contribute delete

5.97 kB

	import nltk
	nltk.download('punkt')

	import textblob.download_corpora
	textblob.download_corpora.download_all()

	import pandas as pd
	import re
	from textblob import TextBlob
	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	import tempfile
	from wordcloud import WordCloud


	# Text cleaning function
	def clean_text(text):
	if pd.isnull(text):
	return ""
	text = str(text)
	text = re.sub(r"http\S+\|www\S+\|https\S+", '', text, flags=re.MULTILINE)
	text = re.sub(r"\@w+\|\#", '', text)
	text = re.sub(r"[^A-Za-z0-9\s]+", '', text)
	text = text.lower()
	return text.strip()

	# Sentiment classification using thresholds
	def get_sentiment_label(polarity, pos_thresh, neg_thresh):
	if polarity >= pos_thresh:
	return "Positive"
	elif polarity <= neg_thresh:
	return "Negative"
	else:
	return "Neutral"

	# Generate word cloud
	def generate_wordcloud(text_series, title):
	text = " ".join(text_series.dropna())
	if not text.strip():
	fig = plt.figure(figsize=(6, 4))
	plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center')
	plt.axis("off")
	plt.title(title)
	plt.tight_layout()
	return fig

	wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
	fig = plt.figure(figsize=(6, 4))
	plt.imshow(wc, interpolation="bilinear")
	plt.axis("off")
	plt.title(title)
	plt.tight_layout()
	return fig


	# Main processing function
	def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
	try:
	df = pd.read_csv(file)
	except Exception as e:
	return f"❌ Error reading CSV file: {e}", None, None, None, None

	if text_column not in df.columns:
	return "⚠️ Selected column not found in the uploaded file.", None, None, None, None

	df["clean_text"] = df[text_column].apply(clean_text)
	df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity)
	df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
	df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh))

	# Plot sentiment distribution
	fig1 = plt.figure(figsize=(6, 4))
	sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False)
	plt.title("Sentiment Label Distribution")
	plt.tight_layout()

	# Plot polarity distribution
	fig2 = plt.figure(figsize=(6, 4))
	sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue")
	plt.title("Polarity Score Distribution")
	plt.tight_layout()

	# Preview table
	preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10)

	# Word Clouds per sentiment
	pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud")
	neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud")
	neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud")

	# Save full results
	output_file_path = "TextBlob_sentiment_results.csv"
	df.to_csv(output_file_path, index=False)

	return (
	f"✅ Sentiment analysis complete. Processed {len(df)} rows.",
	preview_df,
	fig1,
	fig2,
	output_file_path,
	pos_wc,
	neg_wc,
	neu_wc
	)


	# Dropdown update function
	def get_text_columns(file):
	df = pd.read_csv(file)
	text_columns = df.select_dtypes(include='object').columns.tolist()
	if not text_columns:
	return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
	return gr.update(choices=text_columns, value=text_columns[0])

	# Word cloud function
	def generate_wordcloud(text_series, title):
	text = " ".join(text_series.dropna())
	wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
	fig = plt.figure(figsize=(6, 4))
	plt.imshow(wc, interpolation="bilinear")
	plt.axis("off")
	plt.title(title)
	plt.tight_layout()
	return fig


	# Gradio Interface
	with gr.Blocks() as app:
	gr.Markdown("## 📝 Sentiment Analysis with TextBlob")
	gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.")
	gr.Markdown("Citation: Mat Roni, S. (2025). Sentiment analysis with TextBlob on Gradio (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app")

	with gr.Row():
	file_input = gr.File(label="Upload CSV File")
	column_dropdown = gr.Dropdown(label="Select Text Column", choices=[])

	file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

	with gr.Row():
	pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold")
	neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold")

	analyze_button = gr.Button("Run Sentiment Analysis")

	status_box = gr.Textbox(label="Status", interactive=False)
	data_output = gr.Dataframe(label="Sample results")
	plot1 = gr.Plot(label="Sentiment Label Distribution")
	plot2 = gr.Plot(label="Polarity Distribution")
	pos_wordcloud = gr.Plot(label="Positive Word Cloud")
	neg_wordcloud = gr.Plot(label="Negative Word Cloud")
	neu_wordcloud = gr.Plot(label="Neutral Word Cloud")

	csv_download = gr.File(label="Download Full Results")

	analyze_button.click(
	fn=analyze_sentiment,
	inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
	outputs=[
	status_box,
	data_output,
	plot1,
	plot2,
	csv_download,
	pos_wordcloud,
	neg_wordcloud,
	neu_wordcloud
	]
	)


	app.launch(share=True, debug=True)