naishwarya commited on
Commit
e551362
·
1 Parent(s): 860db05

temporary disable tts due to version issues in hf

Browse files
Files changed (1) hide show
  1. app_merlin_ai_coach.py +11 -10
app_merlin_ai_coach.py CHANGED
@@ -20,8 +20,8 @@ import torch
20
  import numpy as np
21
  import soundfile as sf
22
  import whisper
23
- from TTS.api import TTS
24
- from TTS.utils.manage import ModelManager # <-- Add this import
25
 
26
  # Load environment variables from .env if present
27
  load_dotenv()
@@ -592,7 +592,7 @@ def build_merlin_graph():
592
 
593
  # --- Load models (smallest variants for speed) ---
594
  whisper_model = whisper.load_model("base")
595
- tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
596
 
597
  def transcribe_audio(audio):
598
  """
@@ -611,12 +611,13 @@ def synthesize_speech(text):
611
  Synthesize speech from text using Coqui TTS.
612
  Returns a (sample_rate, numpy array) tuple.
613
  """
614
- if not text:
615
- return None
616
- wav = tts_model.tts(text)
617
- # Ensure output is a numpy array
618
- wav_np = np.array(wav, dtype=np.float32)
619
- return (22050, wav_np)
 
620
 
621
  def get_task_dropdown_choices():
622
  """
@@ -754,7 +755,7 @@ with gr.Blocks(title="🧙 Merlin AI Coach") as demo:
754
  checklist_str = show_checklist()
755
  chat_history = chat_history + [[user_message, assistant_display]]
756
  # Synthesize assistant reply to audio only if TTS is enabled
757
- audio_reply = synthesize_speech(assistant_display) if tts_enabled else None
758
  # Always keep conversation group visible
759
  return chat_history, notes_str, checklist_str, "", tasks_str, state_plan_val, gr.update(visible=False), audio_reply, gr.update(visible=True)
760
 
 
20
  import numpy as np
21
  import soundfile as sf
22
  import whisper
23
+ # from TTS.api import TTS
24
+ # from TTS.utils.manage import ModelManager # <-- Add this import
25
 
26
  # Load environment variables from .env if present
27
  load_dotenv()
 
592
 
593
  # --- Load models (smallest variants for speed) ---
594
  whisper_model = whisper.load_model("base")
595
+ #tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
596
 
597
  def transcribe_audio(audio):
598
  """
 
611
  Synthesize speech from text using Coqui TTS.
612
  Returns a (sample_rate, numpy array) tuple.
613
  """
614
+ return None
615
+ # if not text:
616
+ # return None
617
+ # wav = tts_model.tts(text)
618
+ # # Ensure output is a numpy array
619
+ # wav_np = np.array(wav, dtype=np.float32)
620
+ # return (22050, wav_np)
621
 
622
  def get_task_dropdown_choices():
623
  """
 
755
  checklist_str = show_checklist()
756
  chat_history = chat_history + [[user_message, assistant_display]]
757
  # Synthesize assistant reply to audio only if TTS is enabled
758
+ audio_reply = synthesize_speech(assistant_display) if False else None
759
  # Always keep conversation group visible
760
  return chat_history, notes_str, checklist_str, "", tasks_str, state_plan_val, gr.update(visible=False), audio_reply, gr.update(visible=True)
761