BryanBradfo commited on
Commit
1d3f3e3
·
1 Parent(s): bbae07d

back to t5

Browse files
Files changed (2) hide show
  1. app.py +62 -132
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,169 +1,99 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Initialiser les pipelines de traduction pour chaque paire de langues
5
- # Utiliser des modèles Helsinki-NLP qui sont spécifiquement entraînés pour la traduction
6
- translation_models = {
7
- "English-French": pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr"),
8
- "French-English": pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en"),
9
- "English-Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"),
10
- "Spanish-English": pipeline("translation", model="Helsinki-NLP/opus-mt-es-en"),
11
- "English-Japanese": pipeline("translation", model="Helsinki-NLP/opus-mt-en-jap"),
12
- "Japanese-English": pipeline("translation", model="Helsinki-NLP/opus-mt-jap-en"),
13
- "English-Chinese": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"),
14
- "Chinese-English": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"),
15
- "French-Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-fr-es"),
16
- "Spanish-French": pipeline("translation", model="Helsinki-NLP/opus-mt-es-fr"),
17
- "French-Japanese": pipeline("translation", model="Helsinki-NLP/opus-mt-fr-jap"),
18
- "Japanese-French": pipeline("translation", model="Helsinki-NLP/opus-mt-jap-fr"),
19
- "French-Chinese": pipeline("translation", model="Helsinki-NLP/opus-mt-fr-zh"),
20
- "Chinese-French": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-fr"),
21
- "Spanish-Japanese": pipeline("translation", model="Helsinki-NLP/opus-mt-es-jap"),
22
- "Japanese-Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-jap-es"),
23
- "Spanish-Chinese": pipeline("translation", model="Helsinki-NLP/opus-mt-es-zh"),
24
- "Chinese-Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-es"),
25
- "Japanese-Chinese": pipeline("translation", model="Helsinki-NLP/opus-mt-jap-zh"),
26
- "Chinese-Japanese": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-jap")
27
- }
28
 
29
- # Mappages des noms des langues
30
- language_names = {
31
- "English": "English",
32
- "French": "French",
33
- "Japanese": "Japanese",
34
- "Spanish": "Spanish",
35
- "Chinese": "Chinese" # Renommé de "Mandarin" à "Chinese" pour correspondre aux noms des modèles
 
36
  }
37
 
 
38
  def translate(text, source_lang, target_lang):
39
- """Traduire le texte en utilisant le modèle approprié"""
40
- if not text:
41
- return ""
42
-
43
  if source_lang == target_lang:
44
  return text
45
 
46
- # Construire la clé du modèle
47
- model_key = f"{source_lang}-{target_lang}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Vérifier si ce modèle existe
50
- if model_key in translation_models:
51
- try:
52
- result = translation_models[model_key](text)
53
- return result[0]["translation_text"]
54
- except Exception as e:
55
- return f"Erreur de traduction: {str(e)}"
56
- else:
57
- return f"La traduction de {source_lang} vers {target_lang} n'est pas prise en charge."
58
 
59
- # Exemples de textes dans différentes langues avec des étiquettes descriptives
60
  examples = {
61
- "English": {
62
- "Basic greeting": "Hello, how are you today?",
63
- "Travel plans": "I would like to visit Japan someday.",
64
- "Food preference": "I like potatoes and vegetables.",
65
- "Weather": "It's a beautiful sunny day today.",
66
- "Question": "Where is the nearest train station?"
67
- },
68
- "French": {
69
- "Salutation": "Bonjour, comment allez-vous aujourd'hui?",
70
- "Voyage": "J'aimerais visiter le Japon un jour.",
71
- "Nourriture": "J'aime les pommes de terre et les légumes.",
72
- "Météo": "Il fait beau et ensoleillé aujourd'hui.",
73
- "Question": "Où est la gare la plus proche?"
74
- },
75
- "Japanese": {
76
- "挨拶": "こんにちは、今日はお元気ですか?",
77
- "旅行": "いつか日本を訪れたいです。",
78
- "食べ物": "ジャガイモと野菜が好きです。",
79
- "天気": "今日は晴れていて美しい一日です。",
80
- "質問": "最寄りの駅はどこですか?"
81
- },
82
- "Spanish": {
83
- "Saludo": "Hola, ¿cómo estás hoy?",
84
- "Viaje": "Me gustaría visitar Japón algún día.",
85
- "Comida": "Me gustan las patatas y las verduras.",
86
- "Clima": "Hoy es un hermoso día soleado.",
87
- "Pregunta": "¿Dónde está la estación de tren más cercana?"
88
- },
89
- "Chinese": {
90
- "问候": "你好,今天好吗?",
91
- "旅行": "我希望有一天能去日本。",
92
- "食物": "我喜欢土豆和蔬菜。",
93
- "天气": "今天是个阳光明媚的美好日子。",
94
- "问题": "最近的火车站在哪里?"
95
- }
96
  }
97
 
98
- # Créer l'interface Gradio
99
  with gr.Blocks() as demo:
100
- gr.Markdown("# Traduction Multilingue")
101
 
102
  with gr.Row():
103
  source_lang = gr.Dropdown(
104
- choices=list(language_names.keys()),
105
- label="Langue source",
106
  value="English"
107
  )
108
  target_lang = gr.Dropdown(
109
- choices=list(language_names.keys()),
110
- label="Langue cible",
111
  value="French"
112
  )
113
 
114
  with gr.Row():
115
  with gr.Column():
116
- source_text = gr.Textbox(label="Texte source", lines=5)
117
- translate_btn = gr.Button(value="Traduire")
118
-
119
  with gr.Column():
120
- target_text = gr.Textbox(label="Résultat de la traduction", lines=5)
121
 
122
- # Ajouter une section d'exemples
123
- with gr.Accordion("Exemples", open=True):
124
- example_dropdown = gr.Dropdown(
125
- choices=list(examples["English"].keys()),
126
- label="Sélectionner un exemple",
127
- interactive=True,
128
- value=list(examples["English"].keys())[0] if examples["English"] else None
129
- )
130
- load_example_btn = gr.Button(value="Charger l'exemple")
131
-
132
- # Mettre à jour le menu déroulant d'exemples lorsque la langue source change
133
- def update_examples_dropdown(lang):
134
- return gr.Dropdown.update(
135
- choices=list(examples.get(lang, {}).keys()),
136
- value=list(examples.get(lang, {}).keys())[0] if examples.get(lang, {}) else None
137
- )
138
 
139
- source_lang.change(
140
- update_examples_dropdown,
141
- inputs=[source_lang],
142
- outputs=[example_dropdown]
143
- )
144
-
145
- # Charger l'exemple sélectionné dans le texte source
146
- def load_selected_example(example_label, lang):
147
- return examples.get(lang, {}).get(example_label, "")
148
 
149
- load_example_btn.click(
150
- load_selected_example,
151
- inputs=[example_dropdown, source_lang],
152
- outputs=[source_text]
153
- )
154
 
155
- # Configurer la fonction de traduction
156
  translate_btn.click(
157
- translate,
158
- inputs=[source_text, source_lang, target_lang],
159
- outputs=target_text
160
  )
161
 
162
- # Traduire également lorsque la touche Entrée est appuyée dans la zone de texte source
163
- source_text.submit(
164
- translate,
165
- inputs=[source_text, source_lang, target_lang],
166
- outputs=target_text
167
  )
168
 
169
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
 
4
+ # Initialize T5 model and tokenizer
5
+ tokenizer = T5Tokenizer.from_pretrained("t5-base")
6
+ model = T5ForConditionalGeneration.from_pretrained("t5-base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Supported language pairs for T5-base
9
+ languages = ["English", "French", "Japanese", "Spanish", "Chinese"]
10
+ language_codes = {
11
+ "English": "en",
12
+ "French": "fr",
13
+ "Japanese": "ja",
14
+ "Spanish": "es",
15
+ "Chinese": "zh" # Using Chinese instead of Mandarin for T5 compatibility
16
  }
17
 
18
+ # Translation function using T5
19
  def translate(text, source_lang, target_lang):
 
 
 
 
20
  if source_lang == target_lang:
21
  return text
22
 
23
+ source_code = language_codes[source_lang]
24
+ target_code = language_codes[target_lang]
25
+
26
+ # Format the input as expected by T5
27
+ task_prefix = f"translate {source_code} to {target_code}: "
28
+ input_text = task_prefix + text
29
+
30
+ # Tokenize and generate translation
31
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True)
32
+
33
+ output_sequences = model.generate(
34
+ input_ids=inputs["input_ids"],
35
+ attention_mask=inputs["attention_mask"],
36
+ max_length=512,
37
+ do_sample=False
38
+ )
39
 
40
+ # Decode the output
41
+ translation = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
42
+ return translation
 
 
 
 
 
 
43
 
44
+ # Example texts for each language
45
  examples = {
46
+ "English": ["I went to the supermarket yesterday.", "The weather is beautiful today."],
47
+ "French": ["Je suis allé au supermarché hier.", "Le temps est magnifique aujourd'hui."],
48
+ "Japanese": ["昨日スーパーマーケットに行きました。", "今日の天気は素晴らしいです。"],
49
+ "Spanish": ["Fui al supermercado ayer.", "El clima está hermoso hoy."],
50
+ "Chinese": ["我昨天去了超市。", "今天天气很好。"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
 
53
+ # Create Gradio interface
54
  with gr.Blocks() as demo:
55
+ gr.Markdown("# Multilingual Translation App (T5-base)")
56
 
57
  with gr.Row():
58
  source_lang = gr.Dropdown(
59
+ languages,
60
+ label="Source Language",
61
  value="English"
62
  )
63
  target_lang = gr.Dropdown(
64
+ languages,
65
+ label="Target Language",
66
  value="French"
67
  )
68
 
69
  with gr.Row():
70
  with gr.Column():
71
+ input_text = gr.Textbox(label="Source Text", placeholder="Enter text to translate...")
72
+ translate_btn = gr.Button(value="Translate")
 
73
  with gr.Column():
74
+ output_text = gr.Textbox(label="Translated Text")
75
 
76
+ # Dynamic examples based on source language
77
+ example_component = gr.Examples(examples=examples["English"], inputs=input_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Update examples when source language changes
80
+ def update_examples(lang):
81
+ return gr.Examples(examples=examples.get(lang, []), inputs=input_text)
 
 
 
 
 
 
82
 
83
+ source_lang.change(update_examples, inputs=source_lang, outputs=example_component)
 
 
 
 
84
 
85
+ # Translation function connections
86
  translate_btn.click(
87
+ translate,
88
+ inputs=[input_text, source_lang, target_lang],
89
+ outputs=output_text
90
  )
91
 
92
+ # Also translate when Enter key is pressed
93
+ input_text.submit(
94
+ translate,
95
+ inputs=[input_text, source_lang, target_lang],
96
+ outputs=output_text
97
  )
98
 
99
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio>=3.50.2
2
  transformers>=4.35.0
3
  torch>=2.0.0
4
- sentencepiece>=0.1.99
5
- sacremoses>=0.0.53
 
1
  gradio>=3.50.2
2
  transformers>=4.35.0
3
  torch>=2.0.0
4
+ sentencepiece>=0.1.99