eBlessings commited on
Commit
e10864f
·
verified ·
1 Parent(s): a3013f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -2
app.py CHANGED
@@ -13,6 +13,8 @@ import numpy as np
13
  import soundfile as sf
14
  import torchaudio
15
  from cached_path import cached_path
 
 
16
  from transformers import AutoModelForCausalLM, AutoTokenizer
17
 
18
  try:
@@ -534,7 +536,68 @@ with gr.Blocks() as app_multistyle:
534
  inputs=[gen_text_input_multistyle, regular_name] + speech_type_names,
535
  outputs=generate_multistyle_btn,
536
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
 
539
  with gr.Blocks() as app_chat:
540
  gr.Markdown(
@@ -852,8 +915,8 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
852
  )
853
 
854
  gr.TabbedInterface(
855
- [app_tts, app_multistyle, app_chat, app_credits],
856
- ["Basic-TTS", "Multi-Speech", "Voice-Chat", "Credits"],
857
  )
858
 
859
 
 
13
  import soundfile as sf
14
  import torchaudio
15
  from cached_path import cached_path
16
+ from pydub import AudioSegment, silence
17
+ import re
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
 
20
  try:
 
536
  inputs=[gen_text_input_multistyle, regular_name] + speech_type_names,
537
  outputs=generate_multistyle_btn,
538
  )
539
+ with gr.Blocks() as app_podcast:
540
+ gr.Markdown("# Podcast Generation")
541
+ speaker1_name = gr.Textbox(label="Speaker 1 Name")
542
+ ref_audio_input1 = gr.Audio(label="Reference Audio (Speaker 1)", type="filepath")
543
+ ref_text_input1 = gr.Textbox(label="Reference Text (Speaker 1)", lines=2)
544
+
545
+ speaker2_name = gr.Textbox(label="Speaker 2 Name")
546
+ ref_audio_input2 = gr.Audio(label="Reference Audio (Speaker 2)", type="filepath")
547
+ ref_text_input2 = gr.Textbox(label="Reference Text (Speaker 2)", lines=2)
548
+
549
+ script_input = gr.Textbox(label="Podcast Script", lines=10,
550
+ placeholder="Enter script with speaker names...")
551
+
552
+ podcast_model_choice = gr.Radio(
553
+ choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS"
554
+ )
555
+ podcast_remove_silence = gr.Checkbox(label="Remove Silences", value=True)
556
+ generate_podcast_btn = gr.Button("Generate Podcast", variant="primary")
557
+ podcast_output = gr.Audio(label="Generated Podcast")
558
 
559
+ def generate_podcast(script, speaker1, ref_audio1, ref_text1, speaker2, ref_audio2, ref_text2, model, remove_silence):
560
+ speaker_blocks = re.split(f"({re.escape(speaker1)}:|({re.escape(speaker2)}:)", script)[1:]
561
+ generated_audio_segments = []
562
+
563
+ for i in range(0, len(speaker_blocks), 2):
564
+ speaker = speaker_blocks[i].strip(":")
565
+ text = speaker_blocks[i+1].strip()
566
+
567
+ ref_audio = ref_audio1 if speaker == speaker1 else ref_audio2
568
+ ref_text = ref_text1 if speaker == speaker1 else ref_text2
569
+
570
+ audio_result, _, _ = infer(
571
+ ref_audio,
572
+ ref_text,
573
+ text,
574
+ model,
575
+ remove_silence,
576
+ cross_fade_duration=0.15,
577
+ nfe_step=32,
578
+ speed=1.0
579
+ )
580
+ sr, audio_data = audio_result
581
+ generated_audio_segments.append(audio_data)
582
+
583
+ final_audio = np.concatenate(generated_audio_segments)
584
+ return (target_sample_rate, final_audio)
585
+
586
+ generate_podcast_btn.click(
587
+ generate_podcast,
588
+ inputs=[
589
+ script_input,
590
+ speaker1_name,
591
+ ref_audio_input1,
592
+ ref_text_input1,
593
+ speaker2_name,
594
+ ref_audio_input2,
595
+ ref_text_input2,
596
+ podcast_model_choice,
597
+ podcast_remove_silence,
598
+ ],
599
+ outputs=podcast_output,
600
+ )
601
 
602
  with gr.Blocks() as app_chat:
603
  gr.Markdown(
 
915
  )
916
 
917
  gr.TabbedInterface(
918
+ [app_tts, app_multistyle, app_podcast, app_chat, app_credits], # Added app_podcast
919
+ ["Basic-TTS", "Multi-Speech", "Podcast", "Voice-Chat", "Credits"],
920
  )
921
 
922