import spacy
import gradio as gr
from transformers import pipeline, AutoTokenizer
from pysentimiento.preprocessing import preprocess_tweet

nlp = spacy.load("en_core_web_sm")

tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base", add_prefix_space=True, model_max_length=512)
pl = pipeline("ner", tokenizer=tokenizer, model="Recognai/veganuary_ner", aggregation_strategy="first")

def ner(text):
    text = preprocess_tweet(text)
    doc = nlp(text)
    text = " ".join([token.text for token in doc])
    predictions = pl(text)
    mentions = [pred["word"].strip() for pred in predictions if pred["entity_group"] == "FOOD"]   
    return "\n".join(mentions)

iface = gr.Interface(
    ner,
    gr.inputs.Textbox(placeholder="copy&paste your veganuary tweet here ...", label="Tweet"),
    gr.outputs.Textbox(label="List of detected food mentions in the tweet"),
    examples=[
        ["Fruit is delicious 😋 AND healthy 🥗! Brighten up your plate & palate with fresh watermelon, Greek yoghurt & berries, smashed avocado or lime added to water.  A piece of #fruit a day keeps the doctor away! #Veganuary2022"]
    ],
    allow_flagging=False,
    title="Veganuary NER",
    description="Extract food entities from veganuary tweets 😋",
)

iface.launch(share=False)