import gradio as gr
from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
import torch
import unicodedata
import re
import whisper
import tempfile
import os

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

import fitz  # PyMuPDF
import docx
from bs4 import BeautifulSoup
import markdown2
import chardet

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model configuration
MODELS = {
    "english_wolof": {
        "model_name": "LocaleNLP/localenlp-eng-wol-0.03",
        "target_tag": ">>wol<<"
    },
    "wolof_english": {
        "model_name": "LocaleNLP/localenlp-wol-eng-0.03",
        "target_tag": ">>eng<<"
    },
    "english_hausa": {
        "model_name": "LocaleNLP/localenlp-eng-hau-0.01",
        "target_tag": ">>hau<<"
    },
    "hausa_english": {
        "model_name": "LocaleNLP/localenlp-hau-eng-0.01",
        "target_tag": ">>eng<<"
    }
}

# Global variables
translator = None
current_model = None
whisper_model = None

HF_TOKEN = os.getenv("HF_TOKEN")

def load_translation_model(input_lang, output_lang):
    global translator, current_model
    
    model_key = f"{input_lang.lower()}_{output_lang.lower()}"
    if model_key not in MODELS:
        raise ValueError(f"Translation from {input_lang} to {output_lang} is not supported")
    
    if current_model != model_key or translator is None:
        model_config = MODELS[model_key]
        model = AutoModelForSeq2SeqLM.from_pretrained(model_config["model_name"], token=HF_TOKEN).to(device)
        tokenizer = MarianTokenizer.from_pretrained(model_config["model_name"], token=HF_TOKEN)
        translator = {
            "pipeline": pipeline("translation", model=model, tokenizer=tokenizer, 
                               device=0 if device.type == 'cuda' else -1),
            "target_tag": model_config["target_tag"]
        }
        current_model = model_key
    
    return translator

def load_whisper_model():
    global whisper_model
    if whisper_model is None:
        whisper_model = whisper.load_model("base")
    return whisper_model

def transcribe_audio(audio_file):
    model = load_whisper_model()
    if isinstance(audio_file, str):
        audio_path = audio_file
    else:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            tmp.write(audio_file.read())
            audio_path = tmp.name
    result = model.transcribe(audio_path)
    if not isinstance(audio_file, str):
        os.remove(audio_path)
    return result["text"]

def extract_text_from_file(uploaded_file):
    if isinstance(uploaded_file, str):
        file_path = uploaded_file
        file_type = file_path.split('.')[-1].lower()
        with open(file_path, "rb") as f:
            content = f.read()
    else:
        file_type = uploaded_file.name.split('.')[-1].lower()
        content = uploaded_file.read()

    if file_type == "pdf":
        with fitz.open(stream=content, filetype="pdf") as doc:
            return "\n".join([page.get_text() for page in doc])
    elif file_type == "docx":
        if isinstance(uploaded_file, str):
            doc = docx.Document(file_path)
        else:
            doc = docx.Document(uploaded_file)
        return "\n".join([para.text for para in doc.paragraphs])
    else:
        encoding = chardet.detect(content)['encoding']
        if encoding:
            content = content.decode(encoding, errors='ignore')
        if file_type in ("html", "htm"):
            soup = BeautifulSoup(content, "html.parser")
            return soup.get_text()
        elif file_type == "md":
            html = markdown2.markdown(content)
            soup = BeautifulSoup(html, "html.parser")
            return soup.get_text()
        elif file_type == "srt":
            return re.sub(r"\d+\n\d{2}:\d{2}:\d{2},\d{3} --> .*?\n", "", content)
        elif file_type in ("txt", "text"):
            return content
        else:
            raise ValueError("Unsupported file type")

def translate(text, input_lang, output_lang):
    translator = load_translation_model(input_lang, output_lang)
    lang_tag = translator["target_tag"]
    translation_pipeline = translator["pipeline"]

    paragraphs = text.split("\n")
    translated_output = []

    with torch.no_grad():
        for para in paragraphs:
            if not para.strip():
                translated_output.append("")
                continue
            sentences = [s.strip() for s in para.split('. ') if s.strip()]
            formatted = [f"{lang_tag} {s}" for s in sentences]

            results = translation_pipeline(formatted,
                                 max_length=5000,
                                 num_beams=5,
                                 early_stopping=True,
                                 no_repeat_ngram_size=3,
                                 repetition_penalty=1.5,
                                 length_penalty=1.2)
            translated_sentences = [r['translation_text'].capitalize() for r in results]
            translated_output.append('. '.join(translated_sentences))

    return "\n".join(translated_output)

def process_input(input_mode, text, audio_file, file_obj, input_lang):
    input_text = ""
    if input_mode == "Text":
        input_text = text
    elif input_mode == "Audio":
        if audio_file is not None:
            input_text = transcribe_audio(audio_file)
    elif input_mode == "File":
        if file_obj is not None:
            input_text = extract_text_from_file(file_obj)
    return input_text

def translate_and_return(text, input_lang, output_lang):
    if not text.strip():
        return "No input text to translate."
    return translate(text, input_lang, output_lang)

def update_input_lang_dropdown(input_mode):
    if input_mode == "Audio":
        return gr.Dropdown(value="English", interactive=False)
    else:
        return gr.Dropdown(interactive=True)

# Gradio UI components
with gr.Blocks() as demo:
    gr.Markdown("## LocaleNLP Translator")
    gr.Markdown("Translate between English, Wolof, and Hausa using Localenlp models.")

    with gr.Row():
        input_mode = gr.Radio(choices=["Text", "Audio", "File"], label="Select input mode", value="Text")
    
    with gr.Row():
        input_lang = gr.Dropdown(choices=["English", "Wolof", "Hausa"], label="Input Language", value="English")
        output_lang = gr.Dropdown(choices=["English", "Wolof", "Hausa"], label="Output Language", value="Hausa")

    input_text = gr.Textbox(label="Enter text", lines=10, visible=True)
    audio_input = gr.Audio(label="Upload audio (.wav, .mp3, .m4a)", type="filepath", visible=False)
    file_input = gr.File(file_types=['.pdf', '.docx', '.html', '.htm', '.md', '.srt', '.txt'], label="Upload document", visible=False)

    extracted_text = gr.Textbox(label="Extracted / Transcribed Text", lines=10, interactive=False)
    translate_button = gr.Button("Translate")
    output_text = gr.Textbox(label="Translated Text", lines=10, interactive=False)

    def update_visibility(mode):
        return {
            input_text: gr.update(visible=(mode=="Text")),
            audio_input: gr.update(visible=(mode=="Audio")),
            file_input: gr.update(visible=(mode=="File")),
            extracted_text: gr.update(value="", visible=True),
            output_text: gr.update(value="")
        }

    input_mode.change(fn=update_visibility, inputs=input_mode, outputs=[input_text, audio_input, file_input, extracted_text, output_text])
    input_mode.change(fn=update_input_lang_dropdown, inputs=input_mode, outputs=input_lang)

    def handle_process(mode, text, audio, file_obj, in_lang):
        try:
            extracted = process_input(mode, text, audio, file_obj, in_lang)
            return extracted, ""
        except Exception as e:
            return "", f"Error: {str(e)}"

    translate_button.click(fn=handle_process, inputs=[input_mode, input_text, audio_input, file_input, input_lang], outputs=[extracted_text, output_text])

    def handle_translate(text, in_lang, out_lang):
        return translate_and_return(text, in_lang, out_lang)

    translate_button.click(fn=handle_translate, inputs=[extracted_text, input_lang, output_lang], outputs=output_text)

demo.launch()