# -*- coding: utf-8 -*- import gradio as gr from faster_whisper import WhisperModel from israwave import IsrawaveTTS import tempfile import os import zipfile from huggingface_hub import hf_hub_download # Download model files from HF dataset (YoniAfek/israwaveTTS) espeak_zip_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="espeak-ng-data.zip") israwave_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="israwave.onnx") nakdimon_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="nakdimon.onnx") # Extract espeak-ng-data espeak_dir = os.path.join(tempfile.gettempdir(), "espeak-ng-data") os.makedirs(espeak_dir, exist_ok=True) with zipfile.ZipFile(espeak_zip_path, "r") as zip_ref: zip_ref.extractall(espeak_dir) # Load Whisper model whisper_model = WhisperModel("ivrit-ai/whisper-large-v3-turbo-ct2") # Load Israwave TTS tts = IsrawaveTTS( model_path=israwave_path, speaker_model_path=nakdimon_path, espeak_data_path=espeak_dir ) # Transcribe + TTS def process_audio(audio_path): segments, _ = whisper_model.transcribe(audio_path, language="he") text = " ".join([seg.text for seg in segments]) tts_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name tts.tts_to_file(text, tts_path) return text, tts_path # Gradio interface demo = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath", label="🎙️ הקלט את עצמך"), outputs=[gr.Text(label="תמלול"), gr.Audio(label="חזרה בקול עברי")], title="תמלול ודיבור עם Israwave", description="המערכת מתמללת את מה שנאמר ומשמיעה אותו חזרה בקול עברי. הקבצים יורדים מ-Hugging Face Datasets" ) demo.launch()