File size: 2,653 Bytes
24830f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
import torch

model_name_fr = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer_fr = MarianTokenizer.from_pretrained(model_name_fr)
model_fr = MarianMTModel.from_pretrained(model_name_fr)

model_name_hi = 'Helsinki-NLP/opus-mt-en-hi'
tokenizer_hi = MarianTokenizer.from_pretrained(model_name_hi)
model_hi = MarianMTModel.from_pretrained(model_name_hi)

def translate_en_to_fr(text):
    inputs = tokenizer_fr(text, return_tensors='pt')
    with torch.no_grad():
        translated = model_fr.generate(**inputs)
    return tokenizer_fr.decode(translated[0], skip_special_tokens=True)

def translate_en_to_hi(text):
    inputs = tokenizer_hi(text, return_tensors='pt')
    with torch.no_grad():
        translated = model_hi.generate(**inputs)
    return tokenizer_hi.decode(translated[0], skip_special_tokens=True)

def main():
    st.title("Simultaneous Translation: English to French and Hindi")
    st.write("Enter a 10-letter English word to see translations:")

    text = st.text_input("English Word", "")
    if text and len(text) == 10:
        st.write("English to French:", translate_en_to_fr(text))
        st.write("English to Hindi:", translate_en_to_hi(text))
    elif text:
        st.write("Please enter exactly 10 letters.")

if __name__ == "__main__":
    main()

dataset = [
    {"en": "translate", "fr": "traduire", "hi": "अनुवाद"},
    {"en": "education", "fr": "éducation", "hi": "शिक्षा"},
    # Add more examples
]

from sklearn.metrics import accuracy_score

def evaluate_translation_model(model, tokenizer, test_data, target_lang):
    predictions = []
    ground_truth = []

    for data in test_data:
        input_text = data["en"]
        true_translation = data[target_lang]
        
        inputs = tokenizer(input_text, return_tensors='pt')
        with torch.no_grad():
            translated = model.generate(**inputs)
        predicted_translation = tokenizer.decode(translated[0], skip_special_tokens=True)
        
        predictions.append(predicted_translation.strip())
        ground_truth.append(true_translation.strip())
    
    return accuracy_score(ground_truth, predictions)

def evaluate_models():
    # Evaluate English to French
    accuracy_fr = evaluate_translation_model(model_fr, tokenizer_fr, dataset, "fr")
    print(f"Accuracy for English to French translation: {accuracy_fr*100:.2f}%")

    # Evaluate English to Hindi
    accuracy_hi = evaluate_translation_model(model_hi, tokenizer_hi, dataset, "hi")
    print(f"Accuracy for English to Hindi translation: {accuracy_hi*100:.2f}%")

evaluate_models()