import nltk
nltk.download('punkt')

import textblob.download_corpora
textblob.download_corpora.download_all()

import pandas as pd
import re
from textblob import TextBlob
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import tempfile
from wordcloud import WordCloud


# Text cleaning function
def clean_text(text):
    if pd.isnull(text):
        return ""
    text = str(text)
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r"\@w+|\#", '', text)
    text = re.sub(r"[^A-Za-z0-9\s]+", '', text)
    text = text.lower()
    return text.strip()

# Sentiment classification using thresholds
def get_sentiment_label(polarity, pos_thresh, neg_thresh):
    if polarity >= pos_thresh:
        return "Positive"
    elif polarity <= neg_thresh:
        return "Negative"
    else:
        return "Neutral"

# Generate word cloud
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    if not text.strip():
        fig = plt.figure(figsize=(6, 4))
        plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center')
        plt.axis("off")
        plt.title(title)
        plt.tight_layout()
        return fig

    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Main processing function
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
    try:
        df = pd.read_csv(file)
    except Exception as e:
        return f"❌ Error reading CSV file: {e}", None, None, None, None

    if text_column not in df.columns:
        return "⚠️ Selected column not found in the uploaded file.", None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text)
    df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
    df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh))

    # Plot sentiment distribution
    fig1 = plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False)
    plt.title("Sentiment Label Distribution")
    plt.tight_layout()

    # Plot polarity distribution
    fig2 = plt.figure(figsize=(6, 4))
    sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue")
    plt.title("Polarity Score Distribution")
    plt.tight_layout()

    # Preview table
    preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10)

    # Word Clouds per sentiment
    pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud")
    neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud")
    neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud")

    # Save full results
    output_file_path = "TextBlob_sentiment_results.csv"
    df.to_csv(output_file_path, index=False)

    return (
        f"✅ Sentiment analysis complete. Processed {len(df)} rows.",
        preview_df,
        fig1,
        fig2,
        output_file_path,
        pos_wc,
        neg_wc,
        neu_wc
    )


# Dropdown update function
def get_text_columns(file):
    df = pd.read_csv(file)
    text_columns = df.select_dtypes(include='object').columns.tolist()
    if not text_columns:
        return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
    return gr.update(choices=text_columns, value=text_columns[0])

# Word cloud function 
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## 📝 Sentiment Analysis with TextBlob")
    gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.")
    gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app")

    with gr.Row():
        file_input = gr.File(label="Upload CSV File")
        column_dropdown = gr.Dropdown(label="Select Text Column", choices=[])

    file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

    with gr.Row():
        pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold")
        neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold")

    analyze_button = gr.Button("Run Sentiment Analysis")

    status_box = gr.Textbox(label="Status", interactive=False)
    data_output = gr.Dataframe(label="Sample results")
    plot1 = gr.Plot(label="Sentiment Label Distribution")
    plot2 = gr.Plot(label="Polarity Distribution")
    pos_wordcloud = gr.Plot(label="Positive Word Cloud")
    neg_wordcloud = gr.Plot(label="Negative Word Cloud")
    neu_wordcloud = gr.Plot(label="Neutral Word Cloud")

    csv_download = gr.File(label="Download Full Results")

    analyze_button.click(
        fn=analyze_sentiment,
        inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
        outputs=[
            status_box,
            data_output,
            plot1,
            plot2,
            csv_download,
            pos_wordcloud,
            neg_wordcloud,
            neu_wordcloud
        ]
    )


app.launch(share=True, debug=True)