Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -132,10 +132,13 @@ def arabic_sentiment_analysis(text):
|
|
132 |
print(f"Error during Arabic sentiment analysis: {e}")
|
133 |
return "neutral"
|
134 |
|
|
|
|
|
|
|
135 |
def tts_interface(text_input, speaker_audio):
|
136 |
print("--- tts_interface function called ---")
|
137 |
print(f"Text Input: {text_input}")
|
138 |
-
print(f"Speaker Audio
|
139 |
|
140 |
if model is None:
|
141 |
print("Error: TTS model failed to load.")
|
@@ -166,14 +169,23 @@ def tts_interface(text_input, speaker_audio):
|
|
166 |
else:
|
167 |
print("Sentiment analyzer not loaded.")
|
168 |
|
|
|
169 |
try:
|
170 |
print("Attempting to generate audio using model.inference...")
|
171 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
try:
|
173 |
(
|
174 |
gpt_cond_latent,
|
175 |
speaker_embedding,
|
176 |
-
) = model.get_conditioning_latents(audio_path=
|
177 |
print("Speaker embedding extracted successfully.")
|
178 |
except Exception as e:
|
179 |
print("Speaker encoding error:", str(e))
|
@@ -185,7 +197,7 @@ def tts_interface(text_input, speaker_audio):
|
|
185 |
language=language,
|
186 |
gpt_cond_latent=gpt_cond_latent,
|
187 |
speaker_embedding=speaker_embedding,
|
188 |
-
emotion=emotion #
|
189 |
)
|
190 |
|
191 |
# Save the generated audio
|
@@ -195,6 +207,13 @@ def tts_interface(text_input, speaker_audio):
|
|
195 |
except Exception as e:
|
196 |
print(f"Error during TTS inference: {e}")
|
197 |
return f"Error during TTS inference: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
iface = gr.Interface(
|
200 |
fn=tts_interface,
|
|
|
132 |
print(f"Error during Arabic sentiment analysis: {e}")
|
133 |
return "neutral"
|
134 |
|
135 |
+
import tempfile
|
136 |
+
import numpy as np
|
137 |
+
|
138 |
def tts_interface(text_input, speaker_audio):
|
139 |
print("--- tts_interface function called ---")
|
140 |
print(f"Text Input: {text_input}")
|
141 |
+
print(f"Speaker Audio: {speaker_audio}") # Log the tuple
|
142 |
|
143 |
if model is None:
|
144 |
print("Error: TTS model failed to load.")
|
|
|
169 |
else:
|
170 |
print("Sentiment analyzer not loaded.")
|
171 |
|
172 |
+
temp_audio_file = None
|
173 |
try:
|
174 |
print("Attempting to generate audio using model.inference...")
|
175 |
+
# Save the uploaded audio to a temporary file
|
176 |
+
sampling_rate, audio_data = speaker_audio
|
177 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
178 |
+
temp_audio_path = tmp_file.name
|
179 |
+
write(temp_audio_path, sampling_rate, audio_data)
|
180 |
+
print(f"Temporary audio file saved to: {temp_audio_path}")
|
181 |
+
temp_audio_file = temp_audio_path
|
182 |
+
|
183 |
+
# Extract speaker embedding using the temporary file path
|
184 |
try:
|
185 |
(
|
186 |
gpt_cond_latent,
|
187 |
speaker_embedding,
|
188 |
+
) = model.get_conditioning_latents(audio_path=temp_audio_file, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
|
189 |
print("Speaker embedding extracted successfully.")
|
190 |
except Exception as e:
|
191 |
print("Speaker encoding error:", str(e))
|
|
|
197 |
language=language,
|
198 |
gpt_cond_latent=gpt_cond_latent,
|
199 |
speaker_embedding=speaker_embedding,
|
200 |
+
# emotion=emotion # Emotion handling might need further investigation
|
201 |
)
|
202 |
|
203 |
# Save the generated audio
|
|
|
207 |
except Exception as e:
|
208 |
print(f"Error during TTS inference: {e}")
|
209 |
return f"Error during TTS inference: {e}"
|
210 |
+
finally:
|
211 |
+
# Clean up the temporary audio file
|
212 |
+
if temp_audio_file:
|
213 |
+
os.remove(temp_audio_file)
|
214 |
+
print(f"Temporary audio file removed: {temp_audio_file}")
|
215 |
+
|
216 |
+
from scipy.io.wavfile import write # Ensure this import is present
|
217 |
|
218 |
iface = gr.Interface(
|
219 |
fn=tts_interface,
|