Spaces:
Runtime error
Runtime error
backtrack to wav
Browse files
app.py
CHANGED
|
@@ -146,33 +146,9 @@ def generate_podcast(topic: str):
|
|
| 146 |
t0 = time.time()
|
| 147 |
ref_s = pipeline_voice[len(ps) - 1]
|
| 148 |
audio_numpy = kmodel(ps, ref_s, speed).numpy()
|
| 149 |
-
|
| 150 |
-
# Convert numpy array to MP3
|
| 151 |
-
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
|
| 152 |
-
sf.write(temp_wav.name, audio_numpy, sr)
|
| 153 |
-
temp_wav_path = temp_wav.name
|
| 154 |
-
|
| 155 |
-
# Use pydub to convert WAV to MP3
|
| 156 |
-
audio_segment = AudioSegment.from_wav(temp_wav_path)
|
| 157 |
-
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_mp3:
|
| 158 |
-
audio_segment.export(temp_mp3.name, format="mp3")
|
| 159 |
-
temp_mp3_path = temp_mp3.name
|
| 160 |
-
|
| 161 |
-
# Read the MP3 data
|
| 162 |
-
with open(temp_mp3_path, 'rb') as mp3_file:
|
| 163 |
-
mp3_data = mp3_file.read()
|
| 164 |
-
|
| 165 |
-
# Clean up temporary files
|
| 166 |
-
os.unlink(temp_wav_path)
|
| 167 |
-
os.unlink(temp_mp3_path)
|
| 168 |
-
|
| 169 |
-
# Yield MP3 data instead of numpy array
|
| 170 |
-
yield (sr, mp3_data)
|
| 171 |
-
|
| 172 |
t1 = time.time()
|
| 173 |
-
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds.
|
| 174 |
-
|
| 175 |
-
return temp_mp3_path # Return the path to the MP3 file
|
| 176 |
|
| 177 |
EXAMPLES = [
|
| 178 |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
|
|
@@ -190,7 +166,13 @@ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M) and [Llama-3.3-
|
|
| 190 |
placeholder="You can leave this blank for a general discussion.",
|
| 191 |
),
|
| 192 |
],
|
| 193 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
theme=gr.themes.Soft(),
|
| 195 |
submit_btn="Generate podcast ๐๏ธ",
|
| 196 |
)
|
|
|
|
| 146 |
t0 = time.time()
|
| 147 |
ref_s = pipeline_voice[len(ps) - 1]
|
| 148 |
audio_numpy = kmodel(ps, ref_s, speed).numpy()
|
| 149 |
+
yield (sr, audio_numpy)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
t1 = time.time()
|
| 151 |
+
print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
|
|
|
|
|
|
|
| 152 |
|
| 153 |
EXAMPLES = [
|
| 154 |
["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
|
|
|
|
| 166 |
placeholder="You can leave this blank for a general discussion.",
|
| 167 |
),
|
| 168 |
],
|
| 169 |
+
outputs=[
|
| 170 |
+
gr.Audio(
|
| 171 |
+
label="Listen to your podcast! ๐",
|
| 172 |
+
format="wav",
|
| 173 |
+
streaming=True,
|
| 174 |
+
),
|
| 175 |
+
],
|
| 176 |
theme=gr.themes.Soft(),
|
| 177 |
submit_btn="Generate podcast ๐๏ธ",
|
| 178 |
)
|