Spaces:

bilalfaye
/

wolof_english_translation

Sleeping

App Files Files Community

bilalfaye commited on Jan 16

Commit

fc00984

verified ·

1 Parent(s): 065ebce

Create app.py

Browse files

Files changed (1) hide show

app.py +126 -0

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import gradio as gr
+import torch
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
+from datasets import load_dataset
+import soundfile as sf
+import numpy as np
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Modèle 1 : Traduction texte Wolof -> texte anglais
+model_name = "bilalfaye/nllb-200-distilled-600M-en-wo"
+translator = pipeline("translation", model=model_name, device=device)
+# Modèle 2 : Transcription audio Wolof -> texte Wolof
+pipe_wolof = pipeline(
+    task="automatic-speech-recognition",
+    model="bilalfaye/wav2vec2-large-mms-1b-wolof",
+    processor="bilalfaye/wav2vec2-large-mms-1b-wolof",
+    device=device
+)
+# Modèle 3 : Texte anglais -> audio anglais
+synthesiser_english = pipeline("text-to-speech", "microsoft/speecht5_tts")
+embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+speaker_embedding_english = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
+speaker_embedding_wolof = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+# Modèle 4 : Transcription audio anglais -> texte anglais
+processor_whisper = WhisperProcessor.from_pretrained("openai/whisper-small")
+model_whisper = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+model_whisper.to(device)
+# Modèle 5 : Texte anglais -> audio Wolof
+synthesiser_wolof = pipeline("text-to-speech", "bilalfaye/speecht5_tts-wolof")
+# Fonction 1 : Transcription audio Wolof -> texte Wolof
+def transcribe_audio_wolof(audio):
+    result = pipe_wolof(audio)
+    return result['text']
+# Fonction 2 : Traduction texte Wolof -> texte anglais
+def translate_wolof_to_english(wolof_text):
+    translated = translator(wolof_text, src_lang="wol_Latn", tgt_lang="eng_Latn")
+    return translated[0]['translation_text']
+def translate_english_to_wolof(wolof_text):
+    translated = translator(wolof_text, src_lang="eng_Latn", tgt_lang="wol_Latn")
+    return translated[0]['translation_text']
+# Fonction 3 : Texte anglais -> audio anglais ou Wolof
+def text_to_speech(text, language, voice_type):
+    if language == "english":
+        synthesiser = synthesiser_english
+    else:
+        synthesiser = synthesiser_wolof
+    embedding = speaker_embedding_english if voice_type == "Male" else speaker_embedding_wolof
+    speech = synthesiser(text, forward_params={"speaker_embeddings": embedding})
+    return speech["sampling_rate"], speech["audio"]
+# Fonction 4 : Transcription audio anglais -> texte anglais
+def transcribe_audio_english(audio):
+    audio_data, samplerate = sf.read(audio)
+    audio_data = audio_data.astype(np.float32)
+    input_features = processor_whisper(audio_data, sampling_rate=16000, return_tensors="pt").input_features.to(device)
+    predicted_ids = model_whisper.generate(input_features)
+    transcription = processor_whisper.batch_decode(predicted_ids, skip_special_tokens=True)
+    return transcription[0]
+# Fonction 5 : Traitement audio Wolof vers anglais
+def process_audio_wolof(audio, voice_type):
+    wolof_text = transcribe_audio_wolof(audio)
+    english_text = translate_wolof_to_english(wolof_text)
+    audio_english = text_to_speech(english_text, "english", voice_type)
+    return wolof_text, english_text, audio_english
+# Fonction 6 : Traitement audio anglais vers Wolof
+def process_audio_english(audio, voice_type):
+    english_text = transcribe_audio_english(audio)
+    wolof_text = translate_english_to_wolof(english_text)
+    audio_wolof = text_to_speech(wolof_text, "wolof", voice_type)
+    return english_text, wolof_text, audio_wolof
+# Updated Gradio Interface
+iface = gr.TabbedInterface(
+    [
+        gr.Interface(
+            fn=process_audio_wolof,
+            inputs=[
+                gr.Audio(sources=["upload", "microphone"], type="filepath", label="Provide Audio in Wolof"),
+                gr.Radio(["Male", "Female"], label="Select Voice Type")
+            ],
+            outputs=[
+                gr.Textbox(label="Texte Wolof"),
+                gr.Textbox(label="Texte traduit en Anglais"),
+                gr.Audio(label="Audio en Anglais")
+            ],
+            title="Wolof vers Anglais",
+            description="You can upload an audio file or record using a microphone to process Wolof audio."
+        ),
+        gr.Interface(
+            fn=process_audio_english,
+            inputs=[
+                gr.Audio(sources=["upload", "microphone"], type="filepath", label="Provide Audio in English"),
+                gr.Radio(["Male", "Female"], label="Select Voice Type")
+            ],
+            outputs=[
+                gr.Textbox(label="Texte Anglais"),
+                gr.Textbox(label="Texte traduit en Wolof"),
+                gr.Audio(label="Audio en Wolof")
+            ],
+            title="Anglais vers Wolof",
+            description="You can upload an audio file or record using a microphone to process English audio."
+        )
+    ],
+    tab_names=["Wolof vers Anglais", "Anglais vers Wolof"]
+)
+iface.launch(debug=True, share=True)