Chatterbox

Running

App Files Files Community

neo7team commited on Jun 9

Commit

74d7062

verified ·

1 Parent(s): bf4bbc3

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ from chatterbox.src.chatterbox.tts import ChatterboxTTS
 import gradio as gr
 import spaces
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🚀 Running on device: {DEVICE}")
 # --- Global Model Initialization ---
@@ -18,8 +19,9 @@ def get_or_load_model():
     if MODEL is None:
         print("Model not loaded, initializing...")
         try:
             MODEL = ChatterboxTTS.from_pretrained(DEVICE)
-            if hasattr(MODEL, 'to') and str(MODEL.device) != DEVICE:
                 MODEL.to(DEVICE)
             print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
         except Exception as e:
@@ -36,13 +38,11 @@ except Exception as e:
 def set_seed(seed: int):
     """Sets the random seed for reproducibility across torch, numpy, and random."""
     torch.manual_seed(seed)
-    if DEVICE == "cuda":
-        torch.cuda.manual_seed(seed)
-        torch.cuda.manual_seed_all(seed)
     random.seed(seed)
     np.random.seed(seed)
-@spaces.GPU
 def generate_tts_audio(
     text_input: str,
     audio_prompt_path_input: str,
@@ -74,6 +74,7 @@ def generate_tts_audio(
         set_seed(int(seed_num_input))
     print(f"Generating audio for text: '{text_input[:50]}...'")
     wav = current_model.generate(
         text_input[:300],  # Truncate text to max chars
         audio_prompt_path=audio_prompt_path_input,
@@ -82,12 +83,12 @@ def generate_tts_audio(
         cfg_weight=cfgw_input,
     )
     print("Audio generation complete.")
-    return (current_model.sr, wav.squeeze(0).numpy())
 with gr.Blocks() as demo:
     gr.Markdown(
         """
-        # Chatterbox TTS Demo
         Generate high-quality speech from text with reference audio styling.
         """
     )
@@ -133,4 +134,4 @@ with gr.Blocks() as demo:
         outputs=[audio_output],
     )
-demo.launch()

 import gradio as gr
 import spaces
+# Force the device to CPU
+DEVICE = "cpu"
 print(f"🚀 Running on device: {DEVICE}")
 # --- Global Model Initialization ---
     if MODEL is None:
         print("Model not loaded, initializing...")
         try:
+            # Load the model directly to the specified DEVICE (CPU)
             MODEL = ChatterboxTTS.from_pretrained(DEVICE)
+            if hasattr(MODEL, 'to') and str(getattr(MODEL, 'device', 'cpu')) != DEVICE:
                 MODEL.to(DEVICE)
             print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
         except Exception as e:
 def set_seed(seed: int):
     """Sets the random seed for reproducibility across torch, numpy, and random."""
     torch.manual_seed(seed)
+    # No need for CUDA-specific seed setting
     random.seed(seed)
     np.random.seed(seed)
+# Removed @spaces.GPU decorator as we are targeting CPU
 def generate_tts_audio(
     text_input: str,
     audio_prompt_path_input: str,
         set_seed(int(seed_num_input))
     print(f"Generating audio for text: '{text_input[:50]}...'")
+    # Generate the waveform on the CPU
     wav = current_model.generate(
         text_input[:300],  # Truncate text to max chars
         audio_prompt_path=audio_prompt_path_input,
         cfg_weight=cfgw_input,
     )
     print("Audio generation complete.")
+    return (current_model.sr, wav.squeeze(0).cpu().numpy()) # Ensure tensor is on CPU before converting to numpy
 with gr.Blocks() as demo:
     gr.Markdown(
         """
+        # Chatterbox TTS Demo (CPU Version)
         Generate high-quality speech from text with reference audio styling.
         """
     )
         outputs=[audio_output],
     )
+demo.launch()