Spaces:
Running
Running
File size: 2,653 Bytes
24830f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
import torch
model_name_fr = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer_fr = MarianTokenizer.from_pretrained(model_name_fr)
model_fr = MarianMTModel.from_pretrained(model_name_fr)
model_name_hi = 'Helsinki-NLP/opus-mt-en-hi'
tokenizer_hi = MarianTokenizer.from_pretrained(model_name_hi)
model_hi = MarianMTModel.from_pretrained(model_name_hi)
def translate_en_to_fr(text):
inputs = tokenizer_fr(text, return_tensors='pt')
with torch.no_grad():
translated = model_fr.generate(**inputs)
return tokenizer_fr.decode(translated[0], skip_special_tokens=True)
def translate_en_to_hi(text):
inputs = tokenizer_hi(text, return_tensors='pt')
with torch.no_grad():
translated = model_hi.generate(**inputs)
return tokenizer_hi.decode(translated[0], skip_special_tokens=True)
def main():
st.title("Simultaneous Translation: English to French and Hindi")
st.write("Enter a 10-letter English word to see translations:")
text = st.text_input("English Word", "")
if text and len(text) == 10:
st.write("English to French:", translate_en_to_fr(text))
st.write("English to Hindi:", translate_en_to_hi(text))
elif text:
st.write("Please enter exactly 10 letters.")
if __name__ == "__main__":
main()
dataset = [
{"en": "translate", "fr": "traduire", "hi": "अनुवाद"},
{"en": "education", "fr": "éducation", "hi": "शिक्षा"},
# Add more examples
]
from sklearn.metrics import accuracy_score
def evaluate_translation_model(model, tokenizer, test_data, target_lang):
predictions = []
ground_truth = []
for data in test_data:
input_text = data["en"]
true_translation = data[target_lang]
inputs = tokenizer(input_text, return_tensors='pt')
with torch.no_grad():
translated = model.generate(**inputs)
predicted_translation = tokenizer.decode(translated[0], skip_special_tokens=True)
predictions.append(predicted_translation.strip())
ground_truth.append(true_translation.strip())
return accuracy_score(ground_truth, predictions)
def evaluate_models():
# Evaluate English to French
accuracy_fr = evaluate_translation_model(model_fr, tokenizer_fr, dataset, "fr")
print(f"Accuracy for English to French translation: {accuracy_fr*100:.2f}%")
# Evaluate English to Hindi
accuracy_hi = evaluate_translation_model(model_hi, tokenizer_hi, dataset, "hi")
print(f"Accuracy for English to Hindi translation: {accuracy_hi*100:.2f}%")
evaluate_models() |