Spaces:

Agents-MCP-Hackathon
/

Merlin-AI-Coach

Running

naishwarya commited on Jun 11

Commit

e551362

1 Parent(s): 860db05

temporary disable tts due to version issues in hf

Files changed (1) hide show

app_merlin_ai_coach.py CHANGED Viewed

@@ -20,8 +20,8 @@ import torch
 import numpy as np
 import soundfile as sf
 import whisper
-from TTS.api import TTS
-from TTS.utils.manage import ModelManager  # <-- Add this import
 # Load environment variables from .env if present
 load_dotenv()
@@ -592,7 +592,7 @@ def build_merlin_graph():
 # --- Load models (smallest variants for speed) ---
 whisper_model = whisper.load_model("base")
-tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
 def transcribe_audio(audio):
     """
@@ -611,12 +611,13 @@ def synthesize_speech(text):
     Synthesize speech from text using Coqui TTS.
     Returns a (sample_rate, numpy array) tuple.
     """
-    if not text:
-        return None
-    wav = tts_model.tts(text)
-    # Ensure output is a numpy array
-    wav_np = np.array(wav, dtype=np.float32)
-    return (22050, wav_np)
 def get_task_dropdown_choices():
     """
@@ -754,7 +755,7 @@ with gr.Blocks(title="🧙 Merlin AI Coach") as demo:
         checklist_str = show_checklist()
         chat_history = chat_history + [[user_message, assistant_display]]
         # Synthesize assistant reply to audio only if TTS is enabled
-        audio_reply = synthesize_speech(assistant_display) if tts_enabled else None
         # Always keep conversation group visible
         return chat_history, notes_str, checklist_str, "", tasks_str, state_plan_val, gr.update(visible=False), audio_reply, gr.update(visible=True)

 import numpy as np
 import soundfile as sf
 import whisper
+# from TTS.api import TTS
+# from TTS.utils.manage import ModelManager  # <-- Add this import
 # Load environment variables from .env if present
 load_dotenv()
 # --- Load models (smallest variants for speed) ---
 whisper_model = whisper.load_model("base")
+#tts_model = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
 def transcribe_audio(audio):
     """
     Synthesize speech from text using Coqui TTS.
     Returns a (sample_rate, numpy array) tuple.
     """
+    return None
+    # if not text:
+    #     return None
+    # wav = tts_model.tts(text)
+    # # Ensure output is a numpy array
+    # wav_np = np.array(wav, dtype=np.float32)
+    # return (22050, wav_np)
 def get_task_dropdown_choices():
     """
         checklist_str = show_checklist()
         chat_history = chat_history + [[user_message, assistant_display]]
         # Synthesize assistant reply to audio only if TTS is enabled
+        audio_reply = synthesize_speech(assistant_display) if False else None
         # Always keep conversation group visible
         return chat_history, notes_str, checklist_str, "", tasks_str, state_plan_val, gr.update(visible=False), audio_reply, gr.update(visible=True)