import spacy import gradio as gr from transformers import pipeline, AutoTokenizer from pysentimiento.preprocessing import preprocess_tweet nlp = spacy.load("en_core_web_sm") tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base", add_prefix_space=True, model_max_length=512) pl = pipeline("ner", tokenizer=tokenizer, model="Recognai/veganuary_ner", aggregation_strategy="first") def ner(text): text = preprocess_tweet(text) doc = nlp(text) text = " ".join([token.text for token in doc]) predictions = pl(text) mentions = [pred["word"].strip() for pred in predictions if pred["entity_group"] == "FOOD"] return "\n".join(mentions) iface = gr.Interface( ner, gr.inputs.Textbox(placeholder="copy&paste your veganuary tweet here ...", label="Tweet"), gr.outputs.Textbox(label="List of detected food mentions in the tweet"), examples=[ ["Fruit is delicious 😋 AND healthy 🥗! Brighten up your plate & palate with fresh watermelon, Greek yoghurt & berries, smashed avocado or lime added to water. A piece of #fruit a day keeps the doctor away! #Veganuary2022"] ], allow_flagging=False, title="Veganuary NER", description="Extract food entities from veganuary tweets 😋", ) iface.launch(share=False)