moustafa1-1 commited on
Commit
7f7afee
·
verified ·
1 Parent(s): fa36699

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -167,19 +167,34 @@ def tts_interface(text_input, speaker_audio):
167
  print("Sentiment analyzer not loaded.")
168
 
169
  try:
170
- print("Attempting to generate audio...")
171
- model.tts_to_file(
 
 
 
 
 
 
 
 
 
 
 
 
172
  text=text_input,
173
- file_path=audio_output_path,
174
- emotion=emotion,
175
- speaker_wav=speaker_audio,
176
- language=language
177
  )
 
 
 
178
  print(f"Audio generated and saved to: {audio_output_path}")
179
  return audio_output_path
180
  except Exception as e:
181
- print(f"Error during TTS: {e}")
182
- return f"Error during TTS: {e}"
183
 
184
  iface = gr.Interface(
185
  fn=tts_interface,
 
167
  print("Sentiment analyzer not loaded.")
168
 
169
  try:
170
+ print("Attempting to generate audio using model.inference...")
171
+ # Extract speaker embedding
172
+ try:
173
+ (
174
+ gpt_cond_latent,
175
+ speaker_embedding,
176
+ ) = model.get_conditioning_latents(audio_path=speaker_audio, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
177
+ print("Speaker embedding extracted successfully.")
178
+ except Exception as e:
179
+ print("Speaker encoding error:", str(e))
180
+ return f"Error during speaker encoding: {e}"
181
+
182
+ # Perform inference
183
+ out = model.inference(
184
  text=text_input,
185
+ language=language,
186
+ gpt_cond_latent=gpt_cond_latent,
187
+ speaker_embedding=speaker_embedding,
188
+ emotion=emotion # You might need to adjust how emotion is used, Xtts might not directly take an 'emotion' parameter like this.
189
  )
190
+
191
+ # Save the generated audio
192
+ torchaudio.save(audio_output_path, torch.tensor(out["wav"]).unsqueeze(0).cpu(), 24000)
193
  print(f"Audio generated and saved to: {audio_output_path}")
194
  return audio_output_path
195
  except Exception as e:
196
+ print(f"Error during TTS inference: {e}")
197
+ return f"Error during TTS inference: {e}"
198
 
199
  iface = gr.Interface(
200
  fn=tts_interface,