Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,8 @@ from chatterbox.src.chatterbox.tts import ChatterboxTTS
|
|
5 |
import gradio as gr
|
6 |
import spaces
|
7 |
|
8 |
-
|
|
|
9 |
print(f"🚀 Running on device: {DEVICE}")
|
10 |
|
11 |
# --- Global Model Initialization ---
|
@@ -18,8 +19,9 @@ def get_or_load_model():
|
|
18 |
if MODEL is None:
|
19 |
print("Model not loaded, initializing...")
|
20 |
try:
|
|
|
21 |
MODEL = ChatterboxTTS.from_pretrained(DEVICE)
|
22 |
-
if hasattr(MODEL, 'to') and str(MODEL
|
23 |
MODEL.to(DEVICE)
|
24 |
print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
|
25 |
except Exception as e:
|
@@ -36,13 +38,11 @@ except Exception as e:
|
|
36 |
def set_seed(seed: int):
|
37 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
38 |
torch.manual_seed(seed)
|
39 |
-
|
40 |
-
torch.cuda.manual_seed(seed)
|
41 |
-
torch.cuda.manual_seed_all(seed)
|
42 |
random.seed(seed)
|
43 |
np.random.seed(seed)
|
44 |
|
45 |
-
@spaces.GPU
|
46 |
def generate_tts_audio(
|
47 |
text_input: str,
|
48 |
audio_prompt_path_input: str,
|
@@ -74,6 +74,7 @@ def generate_tts_audio(
|
|
74 |
set_seed(int(seed_num_input))
|
75 |
|
76 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
|
|
77 |
wav = current_model.generate(
|
78 |
text_input[:300], # Truncate text to max chars
|
79 |
audio_prompt_path=audio_prompt_path_input,
|
@@ -82,12 +83,12 @@ def generate_tts_audio(
|
|
82 |
cfg_weight=cfgw_input,
|
83 |
)
|
84 |
print("Audio generation complete.")
|
85 |
-
return (current_model.sr, wav.squeeze(0).numpy())
|
86 |
|
87 |
with gr.Blocks() as demo:
|
88 |
gr.Markdown(
|
89 |
"""
|
90 |
-
# Chatterbox TTS Demo
|
91 |
Generate high-quality speech from text with reference audio styling.
|
92 |
"""
|
93 |
)
|
@@ -133,4 +134,4 @@ with gr.Blocks() as demo:
|
|
133 |
outputs=[audio_output],
|
134 |
)
|
135 |
|
136 |
-
demo.launch()
|
|
|
5 |
import gradio as gr
|
6 |
import spaces
|
7 |
|
8 |
+
# Force the device to CPU
|
9 |
+
DEVICE = "cpu"
|
10 |
print(f"🚀 Running on device: {DEVICE}")
|
11 |
|
12 |
# --- Global Model Initialization ---
|
|
|
19 |
if MODEL is None:
|
20 |
print("Model not loaded, initializing...")
|
21 |
try:
|
22 |
+
# Load the model directly to the specified DEVICE (CPU)
|
23 |
MODEL = ChatterboxTTS.from_pretrained(DEVICE)
|
24 |
+
if hasattr(MODEL, 'to') and str(getattr(MODEL, 'device', 'cpu')) != DEVICE:
|
25 |
MODEL.to(DEVICE)
|
26 |
print(f"Model loaded successfully. Internal device: {getattr(MODEL, 'device', 'N/A')}")
|
27 |
except Exception as e:
|
|
|
38 |
def set_seed(seed: int):
|
39 |
"""Sets the random seed for reproducibility across torch, numpy, and random."""
|
40 |
torch.manual_seed(seed)
|
41 |
+
# No need for CUDA-specific seed setting
|
|
|
|
|
42 |
random.seed(seed)
|
43 |
np.random.seed(seed)
|
44 |
|
45 |
+
# Removed @spaces.GPU decorator as we are targeting CPU
|
46 |
def generate_tts_audio(
|
47 |
text_input: str,
|
48 |
audio_prompt_path_input: str,
|
|
|
74 |
set_seed(int(seed_num_input))
|
75 |
|
76 |
print(f"Generating audio for text: '{text_input[:50]}...'")
|
77 |
+
# Generate the waveform on the CPU
|
78 |
wav = current_model.generate(
|
79 |
text_input[:300], # Truncate text to max chars
|
80 |
audio_prompt_path=audio_prompt_path_input,
|
|
|
83 |
cfg_weight=cfgw_input,
|
84 |
)
|
85 |
print("Audio generation complete.")
|
86 |
+
return (current_model.sr, wav.squeeze(0).cpu().numpy()) # Ensure tensor is on CPU before converting to numpy
|
87 |
|
88 |
with gr.Blocks() as demo:
|
89 |
gr.Markdown(
|
90 |
"""
|
91 |
+
# Chatterbox TTS Demo (CPU Version)
|
92 |
Generate high-quality speech from text with reference audio styling.
|
93 |
"""
|
94 |
)
|
|
|
134 |
outputs=[audio_output],
|
135 |
)
|
136 |
|
137 |
+
demo.launch()
|