Yoni commited on
Commit
05c9154
ยท
1 Parent(s): 3f8595e

no message

Browse files
Files changed (2) hide show
  1. app.py +8 -18
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,21 +1,11 @@
1
  # -*- coding: utf-8 -*-
2
- import os
3
- import zipfile
4
  import gradio as gr
5
  from faster_whisper import WhisperModel
 
6
  import tempfile
 
 
7
  from huggingface_hub import hf_hub_download
8
- import subprocess
9
- import sys
10
-
11
- # Unzip and install cleaned israwave
12
- if not os.path.exists("israwave-main"):
13
- with zipfile.ZipFile("israwave-clean.zip", "r") as zip_ref:
14
- zip_ref.extractall(".")
15
- # subprocess.check_call([sys.executable, "-m", "pip", "install", "./israwave-clean"])
16
-
17
- # Only after install, import israwave
18
- from israwave import IsrawaveTTS
19
 
20
  # Download model files from HF dataset (YoniAfek/israwaveTTS)
21
  espeak_zip_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="espeak-ng-data.zip")
@@ -28,17 +18,17 @@ os.makedirs(espeak_dir, exist_ok=True)
28
  with zipfile.ZipFile(espeak_zip_path, "r") as zip_ref:
29
  zip_ref.extractall(espeak_dir)
30
 
31
- # Load whisper
32
  whisper_model = WhisperModel("ivrit-ai/whisper-large-v3-turbo-ct2")
33
 
34
- # Load israwave TTS
35
  tts = IsrawaveTTS(
36
  model_path=israwave_path,
37
  speaker_model_path=nakdimon_path,
38
  espeak_data_path=espeak_dir
39
  )
40
 
41
- # Transcribe + speak
42
  def process_audio(audio_path):
43
  segments, _ = whisper_model.transcribe(audio_path, language="he")
44
  text = " ".join([seg.text for seg in segments])
@@ -46,11 +36,11 @@ def process_audio(audio_path):
46
  tts.tts_to_file(text, tts_path)
47
  return text, tts_path
48
 
49
- # Interface
50
  demo = gr.Interface(
51
  fn=process_audio,
52
  inputs=gr.Audio(type="filepath", label="๐ŸŽ™๏ธ ื”ืงืœื˜ ืืช ืขืฆืžืš"),
53
- outputs=[gr.Text(label="ืชืžืœื•ืœ"), gr.Audio(label="ื”ืฉืžืขื” ื‘ืงื•ืœ ืขื‘ืจื™")],
54
  title="ืชืžืœื•ืœ ื•ื“ื™ื‘ื•ืจ ืขื Israwave",
55
  description="ื”ืžืขืจื›ืช ืžืชืžืœืœืช ืืช ืžื” ืฉื ืืžืจ ื•ืžืฉืžื™ืขื” ืื•ืชื• ื—ื–ืจื” ื‘ืงื•ืœ ืขื‘ืจื™. ื”ืงื‘ืฆื™ื ื™ื•ืจื“ื™ื ืž-Hugging Face Datasets"
56
  )
 
1
  # -*- coding: utf-8 -*-
 
 
2
  import gradio as gr
3
  from faster_whisper import WhisperModel
4
+ from israwave import IsrawaveTTS
5
  import tempfile
6
+ import os
7
+ import zipfile
8
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Download model files from HF dataset (YoniAfek/israwaveTTS)
11
  espeak_zip_path = hf_hub_download(repo_id="YoniAfek/israwaveTTS", filename="espeak-ng-data.zip")
 
18
  with zipfile.ZipFile(espeak_zip_path, "r") as zip_ref:
19
  zip_ref.extractall(espeak_dir)
20
 
21
+ # Load Whisper model
22
  whisper_model = WhisperModel("ivrit-ai/whisper-large-v3-turbo-ct2")
23
 
24
+ # Load Israwave TTS
25
  tts = IsrawaveTTS(
26
  model_path=israwave_path,
27
  speaker_model_path=nakdimon_path,
28
  espeak_data_path=espeak_dir
29
  )
30
 
31
+ # Transcribe + TTS
32
  def process_audio(audio_path):
33
  segments, _ = whisper_model.transcribe(audio_path, language="he")
34
  text = " ".join([seg.text for seg in segments])
 
36
  tts.tts_to_file(text, tts_path)
37
  return text, tts_path
38
 
39
+ # Gradio interface
40
  demo = gr.Interface(
41
  fn=process_audio,
42
  inputs=gr.Audio(type="filepath", label="๐ŸŽ™๏ธ ื”ืงืœื˜ ืืช ืขืฆืžืš"),
43
+ outputs=[gr.Text(label="ืชืžืœื•ืœ"), gr.Audio(label="ื—ื–ืจื” ื‘ืงื•ืœ ืขื‘ืจื™")],
44
  title="ืชืžืœื•ืœ ื•ื“ื™ื‘ื•ืจ ืขื Israwave",
45
  description="ื”ืžืขืจื›ืช ืžืชืžืœืœืช ืืช ืžื” ืฉื ืืžืจ ื•ืžืฉืžื™ืขื” ืื•ืชื• ื—ื–ืจื” ื‘ืงื•ืœ ืขื‘ืจื™. ื”ืงื‘ืฆื™ื ื™ื•ืจื“ื™ื ืž-Hugging Face Datasets"
46
  )
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio
2
  faster-whisper
3
  huggingface_hub
 
 
 
1
  gradio
2
  faster-whisper
3
  huggingface_hub
4
+ numpy
5
+ onnxruntime