Spaces:

pvaluedotone
/

flair_sentiment_analysis

Sleeping

App Files Files Community

flair_sentiment_analysis / app.py

pvaluedotone

Update app.py

ed5c736 verified about 1 month ago

raw

history blame contribute delete

5.17 kB

	import pandas as pd
	import re
	import tempfile
	import gradio as gr
	import matplotlib.pyplot as plt
	import seaborn as sns
	from wordcloud import WordCloud
	from flair.models import TextClassifier
	from flair.data import Sentence
	from flair.nn import Classifier

	# Load FLAIR sentiment model
	classifier = Classifier.load("sentiment")

	# Minimal text cleaning
	def clean_text_for_flair(text):
	if pd.isnull(text):
	return ""
	text = re.sub(r"http\S+\|www\S+", "", text) # remove URLs
	text = re.sub(r"<.*?>", "", text) # remove HTML
	text = re.sub(r"\s+", " ", text).strip() # normalize whitespace
	return text

	# Generate word clouds
	def generate_wordclouds(df):
	if "sentiment" not in df.columns or "clean_text" not in df.columns:
	return None, None

	positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
	negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))

	pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
	neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)

	pos_path = "positive_wordcloud.png"
	plt.figure(figsize=(10, 5))
	plt.imshow(pos_wordcloud, interpolation='bilinear')
	plt.axis("off")
	plt.title("Positive Word Cloud")
	plt.tight_layout()
	plt.savefig(pos_path)
	plt.close()

	neg_path = "negative_wordcloud.png"
	plt.figure(figsize=(10, 5))
	plt.imshow(neg_wordcloud, interpolation='bilinear')
	plt.axis("off")
	plt.title("Negative Word Cloud")
	plt.tight_layout()
	plt.savefig(neg_path)
	plt.close()

	return pos_path, neg_path

	# Main analysis function
	def analyze_sentiment_flair(file, text_column):
	try:
	df = pd.read_csv(file.name)
	except Exception as e:
	return f"Error loading file: {e}", None, None, None, None, None

	if text_column not in df.columns:
	return "Selected text column not found.", None, None, None, None, None

	df["clean_text"] = df[text_column].apply(clean_text_for_flair)

	sentiments = []
	scores = []

	for text in df["clean_text"]:
	sentence = Sentence(text)
	classifier.predict(sentence)
	label = sentence.labels[0].value
	score = sentence.labels[0].score
	sentiments.append(label)
	scores.append(score)

	df["sentiment"] = sentiments
	df["confidence"] = scores

	# Save results
	with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
	df.to_csv(tmp.name, index=False)
	csv_path = tmp.name

	# Sentiment count plot
	plt.figure(figsize=(6, 4))
	sns.countplot(data=df, x="sentiment", palette="pastel")
	plt.title("Sentiment Distribution")
	plt.tight_layout()
	sentiment_plot_path = "sentiment_flair_plot.png"
	plt.savefig(sentiment_plot_path)
	plt.close()

	# Confidence score distribution
	plt.figure(figsize=(6, 4))
	sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
	plt.title("Confidence Score Distribution")
	plt.tight_layout()
	confidence_plot_path = "confidence_flair_plot.png"
	plt.savefig(confidence_plot_path)
	plt.close()

	# Word clouds
	pos_wc_path, neg_wc_path = generate_wordclouds(df)

	return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path

	# Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
	gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")

	with gr.Row():
	file_input = gr.File(label="Upload CSV", file_types=[".csv"])
	col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)

	def get_text_columns(file):
	try:
	df = pd.read_csv(file.name)
	text_cols = df.select_dtypes(include='object').columns.tolist()
	if not text_cols:
	return gr.update(choices=[], label="⚠️ No text columns found")
	return gr.update(choices=text_cols, value=text_cols[0])
	except:
	return gr.update(choices=[], value=None)

	file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)

	analyze_btn = gr.Button("Run Sentiment Analysis")

	output_text = gr.Textbox(label="Status")
	file_output = gr.File(label="Download Results CSV")
	sentiment_plot = gr.Image(label="Sentiment Distribution")
	confidence_plot = gr.Image(label="Confidence Score Distribution")
	wordcloud_pos = gr.Image(label="Positive Word Cloud")
	wordcloud_neg = gr.Image(label="Negative Word Cloud")

	analyze_btn.click(
	analyze_sentiment_flair,
	inputs=[file_input, col_dropdown],
	outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
	)

	app.launch(share=True, debug=True)