# app.py

import os
import requests
import wikipedia
import gradio as gr
import torch

from functools import lru_cache
from concurrent.futures import ThreadPoolExecutor
from typing import List

from transformers import (
    SeamlessM4TTokenizer,
    SeamlessM4TProcessor,
    SeamlessM4TForTextToText,
    pipeline as hf_pipeline
)

# ── 1) Model setup ────────────────────────────────────────────────────────────

MODEL = "facebook/hf-seamless-m4t-medium"
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = SeamlessM4TTokenizer.from_pretrained(MODEL, use_fast=False)
processor = SeamlessM4TProcessor.from_pretrained(MODEL, tokenizer=tokenizer)

m4t_model = SeamlessM4TForTextToText.from_pretrained(MODEL).to(device)
if device == "cuda":
    m4t_model = m4t_model.half()   # FP16 for faster inference on GPU
m4t_model.eval()

def translate_m4t(text: str, src_iso3: str, tgt_iso3: str, auto_detect=False) -> str:
    src = None if auto_detect else src_iso3
    inputs = processor(text=text, src_lang=src, return_tensors="pt").to(device)
    tokens = m4t_model.generate(**inputs, tgt_lang=tgt_iso3)
    return processor.decode(tokens[0].tolist(), skip_special_tokens=True)

def translate_m4t_batch(
    texts: List[str], src_iso3: str, tgt_iso3: str, auto_detect=False
) -> List[str]:
    src = None if auto_detect else src_iso3
    inputs = processor(
        text=texts, src_lang=src, return_tensors="pt", padding=True
    ).to(device)
    tokens = m4t_model.generate(
        **inputs,
        tgt_lang=tgt_iso3,
        max_new_tokens=60,
        num_beams=1
    )
    return processor.batch_decode(tokens, skip_special_tokens=True)


# ── 2) NER pipeline (updated for deprecation) ────────────────────────────────

ner = hf_pipeline(
    "ner",
    model="dslim/bert-base-NER-uncased",
    aggregation_strategy="simple"
)


# ── 3) CACHING helpers ──────────────────────────────────────────────────────

@lru_cache(maxsize=256)
def geocode_cache(place: str):
    r = requests.get(
        "https://nominatim.openstreetmap.org/search",
        params={"q": place, "format": "json", "limit": 1},
        headers={"User-Agent": "iVoiceContext/1.0"}
    ).json()
    if not r:
        return None
    return {"lat": float(r[0]["lat"]), "lon": float(r[0]["lon"])}

@lru_cache(maxsize=256)
def fetch_osm_cache(lat: float, lon: float, osm_filter: str, limit: int = 5):
    payload = f"""
      [out:json][timeout:25];
      (
        node{osm_filter}(around:1000,{lat},{lon});
        way{osm_filter}(around:1000,{lat},{lon});
      );
      out center {limit};
    """
    resp = requests.post(
        "https://overpass-api.de/api/interpreter",
        data={"data": payload}
    )
    elems = resp.json().get("elements", [])
    return [
        {"name": e["tags"]["name"]}
        for e in elems
        if e.get("tags", {}).get("name")
    ]

@lru_cache(maxsize=256)
def wiki_summary_cache(name: str) -> str:
    try:
        return wikipedia.summary(name, sentences=2)
    except:
        return "No summary available."


# ── 4) Per-entity worker ────────────────────────────────────────────────────

def process_entity(ent) -> dict:
    w = ent["word"]
    lbl = ent["entity_group"]

    if lbl == "LOC":
        geo = geocode_cache(w)
        if not geo:
            return {
                "text": w,
                "label": lbl,
                "type": "location",
                "error": "could not geocode"
            }

        restaurants = fetch_osm_cache(geo["lat"], geo["lon"], '["amenity"="restaurant"]')
        attractions = fetch_osm_cache(geo["lat"], geo["lon"], '["tourism"="attraction"]')

        return {
            "text": w,
            "label": lbl,
            "type": "location",
            "geo": geo,
            "restaurants": restaurants,
            "attractions": attractions
        }

    # PERSON / ORG / MISC → Wikipedia
    summary = wiki_summary_cache(w)
    return {"text": w, "label": lbl, "type": "wiki", "summary": summary}


# ── 5) Main function ────────────────────────────────────────────────────────

def get_context(
    text: str,
    source_lang: str,
    output_lang: str,
    auto_detect: bool
):
    # a) Ensure English for NER
    if auto_detect or source_lang != "eng":
        en = translate_m4t(text, source_lang, "eng", auto_detect=auto_detect)
    else:
        en = text

    # b) Run NER + dedupe
    ner_out = ner(en)
    seen = set()
    unique_ents = []
    for ent in ner_out:
        w = ent["word"]
        if w in seen:
            continue
        seen.add(w)
        unique_ents.append(ent)

    # c) Parallel I/O
    entities = []
    with ThreadPoolExecutor(max_workers=8) as exe:
        futures = [exe.submit(process_entity, ent) for ent in unique_ents]
        for fut in futures:
            entities.append(fut.result())

    # d) Batch-translate non-English fields
    if output_lang != "eng":
        to_translate = []
        translations_info = []

        for i, e in enumerate(entities):
            if e["type"] == "wiki":
                translations_info.append(("summary", i))
                to_translate.append(e["summary"])
            elif e["type"] == "location":
                for j, r in enumerate(e["restaurants"]):
                    translations_info.append(("restaurant", i, j))
                    to_translate.append(r["name"])
                for j, a in enumerate(e["attractions"]):
                    translations_info.append(("attraction", i, j))
                    to_translate.append(a["name"])

        translated = translate_m4t_batch(to_translate, "eng", output_lang)

        for txt, info in zip(translated, translations_info):
            kind = info[0]
            if kind == "summary":
                _, ei = info
                entities[ei]["summary"] = txt
            elif kind == "restaurant":
                _, ei, ri = info
                entities[ei]["restaurants"][ri]["name"] = txt
            elif kind == "attraction":
                _, ei, ai = info
                entities[ei]["attractions"][ai]["name"] = txt

    return {"entities": entities}


# ── 6) Gradio interface ─────────────────────────────────────────────────────

iface = gr.Interface(
    fn=get_context,
    inputs=[
        gr.Textbox(lines=3, placeholder="Enter text…"),
        gr.Textbox(label="Source Language (ISO 639-3)"),
        gr.Textbox(label="Target Language (ISO 639-3)"),
        gr.Checkbox(label="Auto-detect source language")
    ],
    outputs="json",
    title="iVoice Context-Aware",
    description="Returns only the detected entities and their related info."
).queue()    # ← removed unsupported kwargs

if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=int(os.environ.get("PORT", 7860)),
        share=True
    )