Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
4 |
+
from datasets import load_dataset
|
5 |
+
import soundfile as sf
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
# Modèle 1 : Traduction texte Wolof -> texte anglais
|
14 |
+
model_name = "bilalfaye/nllb-200-distilled-600M-en-wo"
|
15 |
+
translator = pipeline("translation", model=model_name, device=device)
|
16 |
+
|
17 |
+
# Modèle 2 : Transcription audio Wolof -> texte Wolof
|
18 |
+
pipe_wolof = pipeline(
|
19 |
+
task="automatic-speech-recognition",
|
20 |
+
model="bilalfaye/wav2vec2-large-mms-1b-wolof",
|
21 |
+
processor="bilalfaye/wav2vec2-large-mms-1b-wolof",
|
22 |
+
device=device
|
23 |
+
)
|
24 |
+
|
25 |
+
# Modèle 3 : Texte anglais -> audio anglais
|
26 |
+
synthesiser_english = pipeline("text-to-speech", "microsoft/speecht5_tts")
|
27 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
28 |
+
speaker_embedding_english = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
|
29 |
+
speaker_embedding_wolof = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
30 |
+
|
31 |
+
# Modèle 4 : Transcription audio anglais -> texte anglais
|
32 |
+
processor_whisper = WhisperProcessor.from_pretrained("openai/whisper-small")
|
33 |
+
model_whisper = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
34 |
+
model_whisper.to(device)
|
35 |
+
|
36 |
+
# Modèle 5 : Texte anglais -> audio Wolof
|
37 |
+
synthesiser_wolof = pipeline("text-to-speech", "bilalfaye/speecht5_tts-wolof")
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
# Fonction 1 : Transcription audio Wolof -> texte Wolof
|
43 |
+
def transcribe_audio_wolof(audio):
|
44 |
+
result = pipe_wolof(audio)
|
45 |
+
return result['text']
|
46 |
+
|
47 |
+
# Fonction 2 : Traduction texte Wolof -> texte anglais
|
48 |
+
def translate_wolof_to_english(wolof_text):
|
49 |
+
translated = translator(wolof_text, src_lang="wol_Latn", tgt_lang="eng_Latn")
|
50 |
+
return translated[0]['translation_text']
|
51 |
+
|
52 |
+
def translate_english_to_wolof(wolof_text):
|
53 |
+
translated = translator(wolof_text, src_lang="eng_Latn", tgt_lang="wol_Latn")
|
54 |
+
return translated[0]['translation_text']
|
55 |
+
|
56 |
+
|
57 |
+
# Fonction 3 : Texte anglais -> audio anglais ou Wolof
|
58 |
+
def text_to_speech(text, language, voice_type):
|
59 |
+
if language == "english":
|
60 |
+
synthesiser = synthesiser_english
|
61 |
+
else:
|
62 |
+
synthesiser = synthesiser_wolof
|
63 |
+
|
64 |
+
embedding = speaker_embedding_english if voice_type == "Male" else speaker_embedding_wolof
|
65 |
+
speech = synthesiser(text, forward_params={"speaker_embeddings": embedding})
|
66 |
+
return speech["sampling_rate"], speech["audio"]
|
67 |
+
|
68 |
+
# Fonction 4 : Transcription audio anglais -> texte anglais
|
69 |
+
def transcribe_audio_english(audio):
|
70 |
+
audio_data, samplerate = sf.read(audio)
|
71 |
+
audio_data = audio_data.astype(np.float32)
|
72 |
+
input_features = processor_whisper(audio_data, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
73 |
+
predicted_ids = model_whisper.generate(input_features)
|
74 |
+
transcription = processor_whisper.batch_decode(predicted_ids, skip_special_tokens=True)
|
75 |
+
return transcription[0]
|
76 |
+
|
77 |
+
# Fonction 5 : Traitement audio Wolof vers anglais
|
78 |
+
def process_audio_wolof(audio, voice_type):
|
79 |
+
wolof_text = transcribe_audio_wolof(audio)
|
80 |
+
english_text = translate_wolof_to_english(wolof_text)
|
81 |
+
audio_english = text_to_speech(english_text, "english", voice_type)
|
82 |
+
return wolof_text, english_text, audio_english
|
83 |
+
|
84 |
+
# Fonction 6 : Traitement audio anglais vers Wolof
|
85 |
+
def process_audio_english(audio, voice_type):
|
86 |
+
english_text = transcribe_audio_english(audio)
|
87 |
+
wolof_text = translate_english_to_wolof(english_text)
|
88 |
+
audio_wolof = text_to_speech(wolof_text, "wolof", voice_type)
|
89 |
+
return english_text, wolof_text, audio_wolof
|
90 |
+
|
91 |
+
# Updated Gradio Interface
|
92 |
+
iface = gr.TabbedInterface(
|
93 |
+
[
|
94 |
+
gr.Interface(
|
95 |
+
fn=process_audio_wolof,
|
96 |
+
inputs=[
|
97 |
+
gr.Audio(sources=["upload", "microphone"], type="filepath", label="Provide Audio in Wolof"),
|
98 |
+
gr.Radio(["Male", "Female"], label="Select Voice Type")
|
99 |
+
],
|
100 |
+
outputs=[
|
101 |
+
gr.Textbox(label="Texte Wolof"),
|
102 |
+
gr.Textbox(label="Texte traduit en Anglais"),
|
103 |
+
gr.Audio(label="Audio en Anglais")
|
104 |
+
],
|
105 |
+
title="Wolof vers Anglais",
|
106 |
+
description="You can upload an audio file or record using a microphone to process Wolof audio."
|
107 |
+
),
|
108 |
+
gr.Interface(
|
109 |
+
fn=process_audio_english,
|
110 |
+
inputs=[
|
111 |
+
gr.Audio(sources=["upload", "microphone"], type="filepath", label="Provide Audio in English"),
|
112 |
+
gr.Radio(["Male", "Female"], label="Select Voice Type")
|
113 |
+
],
|
114 |
+
outputs=[
|
115 |
+
gr.Textbox(label="Texte Anglais"),
|
116 |
+
gr.Textbox(label="Texte traduit en Wolof"),
|
117 |
+
gr.Audio(label="Audio en Wolof")
|
118 |
+
],
|
119 |
+
title="Anglais vers Wolof",
|
120 |
+
description="You can upload an audio file or record using a microphone to process English audio."
|
121 |
+
)
|
122 |
+
],
|
123 |
+
tab_names=["Wolof vers Anglais", "Anglais vers Wolof"]
|
124 |
+
)
|
125 |
+
|
126 |
+
iface.launch(debug=True, share=True)
|