Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ import numpy as np
|
|
13 |
import soundfile as sf
|
14 |
import torchaudio
|
15 |
from cached_path import cached_path
|
|
|
|
|
16 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
17 |
|
18 |
try:
|
@@ -534,7 +536,68 @@ with gr.Blocks() as app_multistyle:
|
|
534 |
inputs=[gen_text_input_multistyle, regular_name] + speech_type_names,
|
535 |
outputs=generate_multistyle_btn,
|
536 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
537 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
with gr.Blocks() as app_chat:
|
540 |
gr.Markdown(
|
@@ -852,8 +915,8 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
|
|
852 |
)
|
853 |
|
854 |
gr.TabbedInterface(
|
855 |
-
[app_tts, app_multistyle, app_chat, app_credits],
|
856 |
-
["Basic-TTS", "Multi-Speech", "Voice-Chat", "Credits"],
|
857 |
)
|
858 |
|
859 |
|
|
|
13 |
import soundfile as sf
|
14 |
import torchaudio
|
15 |
from cached_path import cached_path
|
16 |
+
from pydub import AudioSegment, silence
|
17 |
+
import re
|
18 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
19 |
|
20 |
try:
|
|
|
536 |
inputs=[gen_text_input_multistyle, regular_name] + speech_type_names,
|
537 |
outputs=generate_multistyle_btn,
|
538 |
)
|
539 |
+
with gr.Blocks() as app_podcast:
|
540 |
+
gr.Markdown("# Podcast Generation")
|
541 |
+
speaker1_name = gr.Textbox(label="Speaker 1 Name")
|
542 |
+
ref_audio_input1 = gr.Audio(label="Reference Audio (Speaker 1)", type="filepath")
|
543 |
+
ref_text_input1 = gr.Textbox(label="Reference Text (Speaker 1)", lines=2)
|
544 |
+
|
545 |
+
speaker2_name = gr.Textbox(label="Speaker 2 Name")
|
546 |
+
ref_audio_input2 = gr.Audio(label="Reference Audio (Speaker 2)", type="filepath")
|
547 |
+
ref_text_input2 = gr.Textbox(label="Reference Text (Speaker 2)", lines=2)
|
548 |
+
|
549 |
+
script_input = gr.Textbox(label="Podcast Script", lines=10,
|
550 |
+
placeholder="Enter script with speaker names...")
|
551 |
+
|
552 |
+
podcast_model_choice = gr.Radio(
|
553 |
+
choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS"
|
554 |
+
)
|
555 |
+
podcast_remove_silence = gr.Checkbox(label="Remove Silences", value=True)
|
556 |
+
generate_podcast_btn = gr.Button("Generate Podcast", variant="primary")
|
557 |
+
podcast_output = gr.Audio(label="Generated Podcast")
|
558 |
|
559 |
+
def generate_podcast(script, speaker1, ref_audio1, ref_text1, speaker2, ref_audio2, ref_text2, model, remove_silence):
|
560 |
+
speaker_blocks = re.split(f"({re.escape(speaker1)}:|({re.escape(speaker2)}:)", script)[1:]
|
561 |
+
generated_audio_segments = []
|
562 |
+
|
563 |
+
for i in range(0, len(speaker_blocks), 2):
|
564 |
+
speaker = speaker_blocks[i].strip(":")
|
565 |
+
text = speaker_blocks[i+1].strip()
|
566 |
+
|
567 |
+
ref_audio = ref_audio1 if speaker == speaker1 else ref_audio2
|
568 |
+
ref_text = ref_text1 if speaker == speaker1 else ref_text2
|
569 |
+
|
570 |
+
audio_result, _, _ = infer(
|
571 |
+
ref_audio,
|
572 |
+
ref_text,
|
573 |
+
text,
|
574 |
+
model,
|
575 |
+
remove_silence,
|
576 |
+
cross_fade_duration=0.15,
|
577 |
+
nfe_step=32,
|
578 |
+
speed=1.0
|
579 |
+
)
|
580 |
+
sr, audio_data = audio_result
|
581 |
+
generated_audio_segments.append(audio_data)
|
582 |
+
|
583 |
+
final_audio = np.concatenate(generated_audio_segments)
|
584 |
+
return (target_sample_rate, final_audio)
|
585 |
+
|
586 |
+
generate_podcast_btn.click(
|
587 |
+
generate_podcast,
|
588 |
+
inputs=[
|
589 |
+
script_input,
|
590 |
+
speaker1_name,
|
591 |
+
ref_audio_input1,
|
592 |
+
ref_text_input1,
|
593 |
+
speaker2_name,
|
594 |
+
ref_audio_input2,
|
595 |
+
ref_text_input2,
|
596 |
+
podcast_model_choice,
|
597 |
+
podcast_remove_silence,
|
598 |
+
],
|
599 |
+
outputs=podcast_output,
|
600 |
+
)
|
601 |
|
602 |
with gr.Blocks() as app_chat:
|
603 |
gr.Markdown(
|
|
|
915 |
)
|
916 |
|
917 |
gr.TabbedInterface(
|
918 |
+
[app_tts, app_multistyle, app_podcast, app_chat, app_credits], # Added app_podcast
|
919 |
+
["Basic-TTS", "Multi-Speech", "Podcast", "Voice-Chat", "Credits"],
|
920 |
)
|
921 |
|
922 |
|