Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -167,19 +167,34 @@ def tts_interface(text_input, speaker_audio):
|
|
167 |
print("Sentiment analyzer not loaded.")
|
168 |
|
169 |
try:
|
170 |
-
print("Attempting to generate audio...")
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
text=text_input,
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
)
|
|
|
|
|
|
|
178 |
print(f"Audio generated and saved to: {audio_output_path}")
|
179 |
return audio_output_path
|
180 |
except Exception as e:
|
181 |
-
print(f"Error during TTS: {e}")
|
182 |
-
return f"Error during TTS: {e}"
|
183 |
|
184 |
iface = gr.Interface(
|
185 |
fn=tts_interface,
|
|
|
167 |
print("Sentiment analyzer not loaded.")
|
168 |
|
169 |
try:
|
170 |
+
print("Attempting to generate audio using model.inference...")
|
171 |
+
# Extract speaker embedding
|
172 |
+
try:
|
173 |
+
(
|
174 |
+
gpt_cond_latent,
|
175 |
+
speaker_embedding,
|
176 |
+
) = model.get_conditioning_latents(audio_path=speaker_audio, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
|
177 |
+
print("Speaker embedding extracted successfully.")
|
178 |
+
except Exception as e:
|
179 |
+
print("Speaker encoding error:", str(e))
|
180 |
+
return f"Error during speaker encoding: {e}"
|
181 |
+
|
182 |
+
# Perform inference
|
183 |
+
out = model.inference(
|
184 |
text=text_input,
|
185 |
+
language=language,
|
186 |
+
gpt_cond_latent=gpt_cond_latent,
|
187 |
+
speaker_embedding=speaker_embedding,
|
188 |
+
emotion=emotion # You might need to adjust how emotion is used, Xtts might not directly take an 'emotion' parameter like this.
|
189 |
)
|
190 |
+
|
191 |
+
# Save the generated audio
|
192 |
+
torchaudio.save(audio_output_path, torch.tensor(out["wav"]).unsqueeze(0).cpu(), 24000)
|
193 |
print(f"Audio generated and saved to: {audio_output_path}")
|
194 |
return audio_output_path
|
195 |
except Exception as e:
|
196 |
+
print(f"Error during TTS inference: {e}")
|
197 |
+
return f"Error during TTS inference: {e}"
|
198 |
|
199 |
iface = gr.Interface(
|
200 |
fn=tts_interface,
|