Spaces:
Running
on
Zero
Running
on
Zero
Add translation to English sample with automatic source language detection
Browse files- app.py +13 -2
- requirements.txt +1 -0
- translation.py +130 -0
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from image_classification import create_image_classification_tab
|
|
| 8 |
from image_to_text import create_image_to_text_tab
|
| 9 |
from text_to_image import create_text_to_image_tab
|
| 10 |
from text_to_speech import create_text_to_speech_tab
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
class App:
|
|
@@ -25,7 +26,8 @@ class App:
|
|
| 25 |
image_classification_model: str,
|
| 26 |
text_to_speech_model: str,
|
| 27 |
audio_transcription_model: str,
|
| 28 |
-
chat_model: str
|
|
|
|
| 29 |
):
|
| 30 |
"""Initialize the App with an InferenceClient instance and model IDs.
|
| 31 |
|
|
@@ -38,6 +40,8 @@ class App:
|
|
| 38 |
text_to_speech_model: Model ID for text-to-speech.
|
| 39 |
audio_transcription_model: Model ID for automatic speech recognition.
|
| 40 |
chat_model: Model ID for chatbot.
|
|
|
|
|
|
|
| 41 |
"""
|
| 42 |
self.client = client
|
| 43 |
self.text_to_image_model = text_to_image_model
|
|
@@ -46,6 +50,7 @@ class App:
|
|
| 46 |
self.text_to_speech_model = text_to_speech_model
|
| 47 |
self.audio_transcription_model = audio_transcription_model
|
| 48 |
self.chat_model = chat_model
|
|
|
|
| 49 |
|
| 50 |
def run(self):
|
| 51 |
"""Launch the Gradio application with all building block tabs.
|
|
@@ -70,6 +75,11 @@ class App:
|
|
| 70 |
create_asr_tab(self.client, self.audio_transcription_model)
|
| 71 |
with gr.Tab("Chat"):
|
| 72 |
create_chatbot_tab(self.chat_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
demo.launch()
|
| 75 |
|
|
@@ -83,6 +93,7 @@ if __name__ == "__main__":
|
|
| 83 |
image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"),
|
| 84 |
text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"),
|
| 85 |
audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"),
|
| 86 |
-
chat_model=getenv("CHAT_MODEL")
|
|
|
|
| 87 |
)
|
| 88 |
app.run()
|
|
|
|
| 8 |
from image_to_text import create_image_to_text_tab
|
| 9 |
from text_to_image import create_text_to_image_tab
|
| 10 |
from text_to_speech import create_text_to_speech_tab
|
| 11 |
+
from translation import create_translation_tab
|
| 12 |
|
| 13 |
|
| 14 |
class App:
|
|
|
|
| 26 |
image_classification_model: str,
|
| 27 |
text_to_speech_model: str,
|
| 28 |
audio_transcription_model: str,
|
| 29 |
+
chat_model: str,
|
| 30 |
+
fallback_translation_model: str
|
| 31 |
):
|
| 32 |
"""Initialize the App with an InferenceClient instance and model IDs.
|
| 33 |
|
|
|
|
| 40 |
text_to_speech_model: Model ID for text-to-speech.
|
| 41 |
audio_transcription_model: Model ID for automatic speech recognition.
|
| 42 |
chat_model: Model ID for chatbot.
|
| 43 |
+
fallback_translation_model: Fallback translation model ID for languages
|
| 44 |
+
without specific translation models.
|
| 45 |
"""
|
| 46 |
self.client = client
|
| 47 |
self.text_to_image_model = text_to_image_model
|
|
|
|
| 50 |
self.text_to_speech_model = text_to_speech_model
|
| 51 |
self.audio_transcription_model = audio_transcription_model
|
| 52 |
self.chat_model = chat_model
|
| 53 |
+
self.fallback_translation_model = fallback_translation_model
|
| 54 |
|
| 55 |
def run(self):
|
| 56 |
"""Launch the Gradio application with all building block tabs.
|
|
|
|
| 75 |
create_asr_tab(self.client, self.audio_transcription_model)
|
| 76 |
with gr.Tab("Chat"):
|
| 77 |
create_chatbot_tab(self.chat_model)
|
| 78 |
+
with gr.Tab("Translation to English"):
|
| 79 |
+
create_translation_tab(
|
| 80 |
+
self.client,
|
| 81 |
+
self.fallback_translation_model
|
| 82 |
+
)
|
| 83 |
|
| 84 |
demo.launch()
|
| 85 |
|
|
|
|
| 93 |
image_classification_model=getenv("IMAGE_CLASSIFICATION_MODEL"),
|
| 94 |
text_to_speech_model=getenv("TEXT_TO_SPEECH_MODEL"),
|
| 95 |
audio_transcription_model=getenv("AUDIO_TRANSCRIPTION_MODEL"),
|
| 96 |
+
chat_model=getenv("CHAT_MODEL"),
|
| 97 |
+
fallback_translation_model=getenv("FALLBACK_TRANSLATION_MODEL")
|
| 98 |
)
|
| 99 |
app.run()
|
requirements.txt
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
gradio>=5.49.1
|
| 2 |
huggingface-hub>=0.34.0,<1.0
|
| 3 |
inflect>=7.0.0
|
|
|
|
| 4 |
librosa>=0.10.0
|
| 5 |
numpy>=1.24.0
|
| 6 |
pandas>=2.0.0
|
|
|
|
| 1 |
gradio>=5.49.1
|
| 2 |
huggingface-hub>=0.34.0,<1.0
|
| 3 |
inflect>=7.0.0
|
| 4 |
+
langdetect>=1.0.9
|
| 5 |
librosa>=0.10.0
|
| 6 |
numpy>=1.24.0
|
| 7 |
pandas>=2.0.0
|
translation.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import partial
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from huggingface_hub import InferenceClient
|
| 4 |
+
from langdetect import detect, LangDetectException
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Language code mapping to Helsinki-NLP translation models
|
| 8 |
+
# If a specific language pair model doesn't exist, we'll use the multilingual model
|
| 9 |
+
LANGUAGE_TO_MODEL_MAP = {
|
| 10 |
+
"fr": "Helsinki-NLP/opus-mt-fr-en",
|
| 11 |
+
"de": "Helsinki-NLP/opus-mt-de-en",
|
| 12 |
+
"es": "Helsinki-NLP/opus-mt-es-en",
|
| 13 |
+
"it": "Helsinki-NLP/opus-mt-it-en",
|
| 14 |
+
"pt": "Helsinki-NLP/opus-mt-pt-en",
|
| 15 |
+
"ru": "Helsinki-NLP/opus-mt-ru-en",
|
| 16 |
+
"zh": "Helsinki-NLP/opus-mt-zh-en",
|
| 17 |
+
"ja": "Helsinki-NLP/opus-mt-ja-en",
|
| 18 |
+
"ko": "Helsinki-NLP/opus-mt-ko-en",
|
| 19 |
+
"ar": "Helsinki-NLP/opus-mt-ar-en",
|
| 20 |
+
"nl": "Helsinki-NLP/opus-mt-nl-en",
|
| 21 |
+
"pl": "Helsinki-NLP/opus-mt-pl-en",
|
| 22 |
+
"tr": "Helsinki-NLP/opus-mt-tr-en",
|
| 23 |
+
"vi": "Helsinki-NLP/opus-mt-vi-en",
|
| 24 |
+
"hi": "Helsinki-NLP/opus-mt-hi-en",
|
| 25 |
+
"cs": "Helsinki-NLP/opus-mt-cs-en",
|
| 26 |
+
"sv": "Helsinki-NLP/opus-mt-sv-en",
|
| 27 |
+
"fi": "Helsinki-NLP/opus-mt-fi-en",
|
| 28 |
+
"uk": "Helsinki-NLP/opus-mt-uk-en",
|
| 29 |
+
"ro": "Helsinki-NLP/opus-mt-ro-en",
|
| 30 |
+
"th": "Helsinki-NLP/opus-mt-th-en",
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def detect_language(text: str) -> str:
|
| 35 |
+
"""Detect the language of the input text using langdetect library.
|
| 36 |
+
|
| 37 |
+
Uses the langdetect library, which is a Python port of Google's language-detection
|
| 38 |
+
library. It supports over 55 languages and is known for high accuracy, especially
|
| 39 |
+
for languages with unique character sets like Korean, Japanese, and Chinese.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
text: Input text to detect the language of.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
ISO 639-1 language code (e.g., "en", "fr", "de", "ko", "ja") of the detected language.
|
| 46 |
+
|
| 47 |
+
Raises:
|
| 48 |
+
LangDetectException: If the language cannot be detected (e.g., text is too short).
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
language_code = detect(text)
|
| 52 |
+
return language_code
|
| 53 |
+
except LangDetectException:
|
| 54 |
+
# If detection fails, default to English (will be handled by translation logic)
|
| 55 |
+
return "en"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_translation_model(language_code: str, fallback_model: str) -> str:
|
| 59 |
+
"""Get the appropriate translation model for a given language code.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
language_code: ISO 639-1 language code (e.g., "fr", "de", "en").
|
| 63 |
+
fallback_model: Fallback model to use if no specific model is available.
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Model ID for translation, or fallback model if language not in mapping.
|
| 67 |
+
"""
|
| 68 |
+
if language_code == "en":
|
| 69 |
+
return None # Already in English
|
| 70 |
+
return LANGUAGE_TO_MODEL_MAP.get(language_code, fallback_model)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def translate_to_english(
|
| 74 |
+
client: InferenceClient,
|
| 75 |
+
fallback_translation_model: str,
|
| 76 |
+
text: str
|
| 77 |
+
) -> str:
|
| 78 |
+
"""Translate text to English using automatic language detection.
|
| 79 |
+
|
| 80 |
+
First detects the source language using the langdetect library, then selects
|
| 81 |
+
the appropriate translation model and translates the text to English.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
client: Hugging Face InferenceClient instance for API calls.
|
| 85 |
+
fallback_translation_model: Fallback translation model to use if no
|
| 86 |
+
language-specific model is available.
|
| 87 |
+
text: Input text to translate to English.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
String containing the translated text in English, or a message if the
|
| 91 |
+
text is already in English.
|
| 92 |
+
"""
|
| 93 |
+
# Detect the language using langdetect library
|
| 94 |
+
detected_lang = detect_language(text)
|
| 95 |
+
|
| 96 |
+
# Check if already in English
|
| 97 |
+
if detected_lang == "en":
|
| 98 |
+
return text
|
| 99 |
+
|
| 100 |
+
# Get the appropriate translation model
|
| 101 |
+
translation_model = get_translation_model(detected_lang, fallback_translation_model)
|
| 102 |
+
|
| 103 |
+
# Translate using the selected model
|
| 104 |
+
result = client.translation(text, model=translation_model)
|
| 105 |
+
return result.translation_text
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def create_translation_tab(
|
| 109 |
+
client: InferenceClient,
|
| 110 |
+
fallback_translation_model: str
|
| 111 |
+
):
|
| 112 |
+
"""Create the translation to English tab in the Gradio interface.
|
| 113 |
+
|
| 114 |
+
This function sets up all UI components for translation with automatic
|
| 115 |
+
language detection, including input textbox, translate button, and output textbox.
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
client: Hugging Face InferenceClient instance for API calls.
|
| 119 |
+
fallback_translation_model: Fallback translation model to use if no
|
| 120 |
+
language-specific model is available.
|
| 121 |
+
"""
|
| 122 |
+
gr.Markdown("Translate text to English. The source language will be automatically detected.")
|
| 123 |
+
translation_input = gr.Textbox(label="Input Text", lines=5)
|
| 124 |
+
translation_button = gr.Button("Translate")
|
| 125 |
+
translation_output = gr.Textbox(label="Translated Text", lines=5, interactive=False)
|
| 126 |
+
translation_button.click(
|
| 127 |
+
fn=partial(translate_to_english, client, fallback_translation_model),
|
| 128 |
+
inputs=translation_input,
|
| 129 |
+
outputs=translation_output
|
| 130 |
+
)
|