|
import streamlit as st |
|
from transformers import pipeline, AutoModelForMaskedLM, AutoTokenizer |
|
|
|
|
|
try: |
|
from cltk import NLP |
|
nlp_lat = NLP(language="lat") |
|
except ImportError: |
|
nlp_lat = None |
|
|
|
|
|
st.title("Completamento di parole in testi Latino Antico con Analisi Morfologica") |
|
|
|
|
|
st.write("Esempi di testo:") |
|
st.write("Asdrubal, frater Annibalis, qui secundo Punico bello [MASK] ingentibus copiis ab Hispania veniens ...") |
|
st.write("hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt ...") |
|
st.write("Lorem ipsum dolor sit amet, [MASK] adipiscing elit.") |
|
st.write("Populus Romanus cum Macedonibus [MASK] ter gessit") |
|
|
|
|
|
input_text = st.text_input("Testo:", |
|
value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.") |
|
|
|
|
|
|
|
tokenizer_roberta = AutoTokenizer.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased") |
|
model_roberta = AutoModelForMaskedLM.from_pretrained("Cicciokr/Roberta-Base-Latin-Uncased") |
|
fill_mask_roberta = pipeline("fill-mask", model=model_roberta, tokenizer=tokenizer_roberta) |
|
|
|
if input_text: |
|
|
|
input_text_roberta = input_text.replace("[MASK]", "<mask>") |
|
|
|
|
|
predictions_roberta = fill_mask_roberta(input_text_roberta) |
|
|
|
st.subheader("Risultati delle previsioni:") |
|
for i, pred_roberta in enumerate(predictions_roberta): |
|
st.write( |
|
f"**Previsione {i+1}:** " |
|
f"Parola: {pred_roberta['token_str']} | " |
|
f"Probabilità: {pred_roberta['score']:.4f}" |
|
) |
|
st.write(f"Sequence: {pred_roberta['sequence']}") |
|
st.write("---") |
|
|
|
|
|
if nlp_lat is not None: |
|
st.subheader("Analisi Morfologica CLTK delle frasi predette") |
|
|
|
for i, pred_roberta in enumerate(predictions_roberta): |
|
|
|
predicted_text = input_text_roberta.replace("<mask>", pred_roberta['token_str']) |
|
|
|
|
|
doc = nlp_lat(predicted_text) |
|
|
|
st.write(f"**Analisi frase {i+1}:** {predicted_text}") |
|
for token in doc.tokens: |
|
st.write( |
|
f"- **Token**: {token.string}\n" |
|
f" - Lemma: {token.lemma}\n" |
|
f" - UPOS: {token.upos}\n" |
|
f" - Caratteristiche morfologiche: {token.features}\n" |
|
) |
|
st.write("---") |
|
else: |
|
st.write("**CLTK non installato o non importato correttamente.** " |
|
"Esegui `pip install cltk` e riavvia l'app.") |