import gradio as gr import speech_recognition as sr from ttsmms import TTS from deep_translator import GoogleTranslator # Initialize the TTS model for Ewe and Twi languages ewe = TTS("data/ewe") twi = TTS("data/aka") # Create a list of supported languages and their corresponding TTS models langs = [{"lang": 'ewe', "tts": ewe}, {"lang": 'twi', "tts": twi}] # Function to convert speech to text using Google's speech recognition API def speech_to_text(audio_file): r = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio = r.record(source) try: text = r.recognize_google(audio) return text except sr.UnknownValueError: return None except sr.RequestError: print("Speech recognition service unavailable.") return None # Function to convert text to speech def text_to_speech(text, lang): # Find the selected language in the list of supported languages selected_lang = next((lang_item for lang_item in langs if lang_item["lang"] == lang), None) if selected_lang is None: raise ValueError(f"Language '{lang}' is not supported.") selected_tts = selected_lang["tts"] # Translate the text to the selected language using Google Translator translated = GoogleTranslator(source='auto', target=lang).translate(text) wav_path = "output.wav" # Generate speech synthesis and save it as a WAV file selected_tts.synthesis(translated, wav_path=wav_path) return wav_path, translated # Function to handle the speech to text app def speech_to_text_app(audio_file): text = speech_to_text(audio_file) return text if text else "Unable to transcribe audio." # Function to handle the text to speech output def text_to_speech_output(text, lang): wav_path, translated = text_to_speech(text, lang) return wav_path,translated # Function to handle the speech to text and text to speech app def speech_to_text_and_tts_app(lang_input, audio_file, text_input): if audio_file: print("Converting audio to text:", audio_file) text = speech_to_text(audio_file) wav_path, translates = text_to_speech_output(text, lang_input) return translates, wav_path else: wav_path, translates = text_to_speech_output(text_input, lang_input) return translates, wav_path # Define the Gradio interface inputs and outputs audio_input = gr.inputs.Audio(source="microphone", type="filepath", label="Record Audio") text_input = gr.inputs.Textbox(label="Enter your text here") lang_input = gr.inputs.Dropdown(choices=[lang["lang"] for lang in langs], label="Language") output_text = gr.outputs.Textbox(label="Transcription") output_audio = gr.outputs.Audio(label="Text-to-Speech Audio", type='filepath') # Create the Gradio interface interface = gr.Interface( fn=speech_to_text_and_tts_app, inputs=[lang_input, audio_input, text_input], outputs=[output_text, output_audio], title="English to Twi - Ewe Speech Generator(MMS TTS)", description="Translate English to Twi and Ewe Language(from Ghana)" ) # Launch the interface interface.launch()