Spaces:

moustafa1-1
/

TTS_Gradio2

Sleeping

App Files Files Community

moustafa1-1 commited on May 8

Commit

c06ea85

verified ·

1 Parent(s): 7f7afee

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -132,10 +132,13 @@ def arabic_sentiment_analysis(text):
             print(f"Error during Arabic sentiment analysis: {e}")
             return "neutral"
 def tts_interface(text_input, speaker_audio):
     print("--- tts_interface function called ---")
     print(f"Text Input: {text_input}")
-    print(f"Speaker Audio Path: {speaker_audio}")
     if model is None:
         print("Error: TTS model failed to load.")
@@ -166,14 +169,23 @@ def tts_interface(text_input, speaker_audio):
     else:
         print("Sentiment analyzer not loaded.")
     try:
         print("Attempting to generate audio using model.inference...")
-        # Extract speaker embedding
         try:
             (
                 gpt_cond_latent,
                 speaker_embedding,
-            ) = model.get_conditioning_latents(audio_path=speaker_audio, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
             print("Speaker embedding extracted successfully.")
         except Exception as e:
             print("Speaker encoding error:", str(e))
@@ -185,7 +197,7 @@ def tts_interface(text_input, speaker_audio):
             language=language,
             gpt_cond_latent=gpt_cond_latent,
             speaker_embedding=speaker_embedding,
-            emotion=emotion # You might need to adjust how emotion is used, Xtts might not directly take an 'emotion' parameter like this.
         )
         # Save the generated audio
@@ -195,6 +207,13 @@ def tts_interface(text_input, speaker_audio):
     except Exception as e:
         print(f"Error during TTS inference: {e}")
         return f"Error during TTS inference: {e}"
 iface = gr.Interface(
     fn=tts_interface,

             print(f"Error during Arabic sentiment analysis: {e}")
             return "neutral"
+import tempfile
+import numpy as np
 def tts_interface(text_input, speaker_audio):
     print("--- tts_interface function called ---")
     print(f"Text Input: {text_input}")
+    print(f"Speaker Audio: {speaker_audio}") # Log the tuple
     if model is None:
         print("Error: TTS model failed to load.")
     else:
         print("Sentiment analyzer not loaded.")
+    temp_audio_file = None
     try:
         print("Attempting to generate audio using model.inference...")
+        # Save the uploaded audio to a temporary file
+        sampling_rate, audio_data = speaker_audio
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            temp_audio_path = tmp_file.name
+            write(temp_audio_path, sampling_rate, audio_data)
+            print(f"Temporary audio file saved to: {temp_audio_path}")
+            temp_audio_file = temp_audio_path
+        # Extract speaker embedding using the temporary file path
         try:
             (
                 gpt_cond_latent,
                 speaker_embedding,
+            ) = model.get_conditioning_latents(audio_path=temp_audio_file, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
             print("Speaker embedding extracted successfully.")
         except Exception as e:
             print("Speaker encoding error:", str(e))
             language=language,
             gpt_cond_latent=gpt_cond_latent,
             speaker_embedding=speaker_embedding,
+            # emotion=emotion # Emotion handling might need further investigation
         )
         # Save the generated audio
     except Exception as e:
         print(f"Error during TTS inference: {e}")
         return f"Error during TTS inference: {e}"
+    finally:
+        # Clean up the temporary audio file
+        if temp_audio_file:
+            os.remove(temp_audio_file)
+            print(f"Temporary audio file removed: {temp_audio_file}")
+from scipy.io.wavfile import write # Ensure this import is present
 iface = gr.Interface(
     fn=tts_interface,