moustafa1-1 commited on
Commit
c06ea85
·
verified ·
1 Parent(s): 7f7afee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -132,10 +132,13 @@ def arabic_sentiment_analysis(text):
132
  print(f"Error during Arabic sentiment analysis: {e}")
133
  return "neutral"
134
 
 
 
 
135
  def tts_interface(text_input, speaker_audio):
136
  print("--- tts_interface function called ---")
137
  print(f"Text Input: {text_input}")
138
- print(f"Speaker Audio Path: {speaker_audio}")
139
 
140
  if model is None:
141
  print("Error: TTS model failed to load.")
@@ -166,14 +169,23 @@ def tts_interface(text_input, speaker_audio):
166
  else:
167
  print("Sentiment analyzer not loaded.")
168
 
 
169
  try:
170
  print("Attempting to generate audio using model.inference...")
171
- # Extract speaker embedding
 
 
 
 
 
 
 
 
172
  try:
173
  (
174
  gpt_cond_latent,
175
  speaker_embedding,
176
- ) = model.get_conditioning_latents(audio_path=speaker_audio, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
177
  print("Speaker embedding extracted successfully.")
178
  except Exception as e:
179
  print("Speaker encoding error:", str(e))
@@ -185,7 +197,7 @@ def tts_interface(text_input, speaker_audio):
185
  language=language,
186
  gpt_cond_latent=gpt_cond_latent,
187
  speaker_embedding=speaker_embedding,
188
- emotion=emotion # You might need to adjust how emotion is used, Xtts might not directly take an 'emotion' parameter like this.
189
  )
190
 
191
  # Save the generated audio
@@ -195,6 +207,13 @@ def tts_interface(text_input, speaker_audio):
195
  except Exception as e:
196
  print(f"Error during TTS inference: {e}")
197
  return f"Error during TTS inference: {e}"
 
 
 
 
 
 
 
198
 
199
  iface = gr.Interface(
200
  fn=tts_interface,
 
132
  print(f"Error during Arabic sentiment analysis: {e}")
133
  return "neutral"
134
 
135
+ import tempfile
136
+ import numpy as np
137
+
138
  def tts_interface(text_input, speaker_audio):
139
  print("--- tts_interface function called ---")
140
  print(f"Text Input: {text_input}")
141
+ print(f"Speaker Audio: {speaker_audio}") # Log the tuple
142
 
143
  if model is None:
144
  print("Error: TTS model failed to load.")
 
169
  else:
170
  print("Sentiment analyzer not loaded.")
171
 
172
+ temp_audio_file = None
173
  try:
174
  print("Attempting to generate audio using model.inference...")
175
+ # Save the uploaded audio to a temporary file
176
+ sampling_rate, audio_data = speaker_audio
177
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
178
+ temp_audio_path = tmp_file.name
179
+ write(temp_audio_path, sampling_rate, audio_data)
180
+ print(f"Temporary audio file saved to: {temp_audio_path}")
181
+ temp_audio_file = temp_audio_path
182
+
183
+ # Extract speaker embedding using the temporary file path
184
  try:
185
  (
186
  gpt_cond_latent,
187
  speaker_embedding,
188
+ ) = model.get_conditioning_latents(audio_path=temp_audio_file, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
189
  print("Speaker embedding extracted successfully.")
190
  except Exception as e:
191
  print("Speaker encoding error:", str(e))
 
197
  language=language,
198
  gpt_cond_latent=gpt_cond_latent,
199
  speaker_embedding=speaker_embedding,
200
+ # emotion=emotion # Emotion handling might need further investigation
201
  )
202
 
203
  # Save the generated audio
 
207
  except Exception as e:
208
  print(f"Error during TTS inference: {e}")
209
  return f"Error during TTS inference: {e}"
210
+ finally:
211
+ # Clean up the temporary audio file
212
+ if temp_audio_file:
213
+ os.remove(temp_audio_file)
214
+ print(f"Temporary audio file removed: {temp_audio_file}")
215
+
216
+ from scipy.io.wavfile import write # Ensure this import is present
217
 
218
  iface = gr.Interface(
219
  fn=tts_interface,