eBlessings commited on
Commit
4feee2b
·
verified ·
1 Parent(s): cd90725

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py CHANGED
@@ -308,6 +308,97 @@ with gr.Blocks() as app_podcast:
308
  ],
309
  outputs=podcast_output
310
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
  # ========== REST OF ORIGINAL CODE (UNCHANGED BELOW) ========== #
313
  # [Main app configuration, other tabs (TTS, Multistyle, Chat, Credits)]
 
308
  ],
309
  outputs=podcast_output
310
  )
311
+ with gr.Blocks() as app_credits:
312
+ gr.Markdown("""
313
+ # Credits
314
+
315
+ * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
316
+ * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
317
+ * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
318
+ """)
319
+
320
+ with gr.Blocks() as app_tts:
321
+ gr.Markdown("# Batched TTS")
322
+ ref_audio_input = gr.Audio(label="Reference Audio", type="filepath")
323
+ gen_text_input = gr.Textbox(label="Text to Generate", lines=10)
324
+ generate_btn = gr.Button("Synthesize", variant="primary")
325
+ with gr.Accordion("Advanced Settings", open=False):
326
+ ref_text_input = gr.Textbox(
327
+ label="Reference Text",
328
+ info="Leave blank to automatically transcribe the reference audio. If you enter text it will override automatic transcription.",
329
+ lines=2,
330
+ )
331
+ remove_silence = gr.Checkbox(
332
+ label="Remove Silences",
333
+ info="The model tends to produce silences, especially on longer audio. We can manually remove silences if needed. Note that this is an experimental feature and may produce strange results. This will also increase generation time.",
334
+ value=False,
335
+ )
336
+ speed_slider = gr.Slider(
337
+ label="Speed",
338
+ minimum=0.3,
339
+ maximum=2.0,
340
+ value=1.0,
341
+ step=0.1,
342
+ info="Adjust the speed of the audio.",
343
+ )
344
+ nfe_slider = gr.Slider(
345
+ label="NFE Steps",
346
+ minimum=4,
347
+ maximum=64,
348
+ value=32,
349
+ step=2,
350
+ info="Set the number of denoising steps.",
351
+ )
352
+ cross_fade_duration_slider = gr.Slider(
353
+ label="Cross-Fade Duration (s)",
354
+ minimum=0.0,
355
+ maximum=1.0,
356
+ value=0.15,
357
+ step=0.01,
358
+ info="Set the duration of the cross-fade between audio clips.",
359
+ )
360
+
361
+ audio_output = gr.Audio(label="Synthesized Audio")
362
+ spectrogram_output = gr.Image(label="Spectrogram")
363
+
364
+ @gpu_decorator
365
+ def basic_tts(
366
+ ref_audio_input,
367
+ ref_text_input,
368
+ gen_text_input,
369
+ remove_silence,
370
+ cross_fade_duration_slider,
371
+ nfe_slider,
372
+ speed_slider,
373
+ ):
374
+ audio_out, spectrogram_path, ref_text_out = infer(
375
+ ref_audio_input,
376
+ ref_text_input,
377
+ gen_text_input,
378
+ tts_model_choice,
379
+ remove_silence,
380
+ cross_fade_duration=cross_fade_duration_slider,
381
+ nfe_step=nfe_slider,
382
+ speed=speed_slider,
383
+ )
384
+ return audio_out, spectrogram_path, ref_text_out
385
+
386
+ generate_btn.click(
387
+ basic_tts,
388
+ inputs=[
389
+ ref_audio_input,
390
+ ref_text_input,
391
+ gen_text_input,
392
+ remove_silence,
393
+ cross_fade_duration_slider,
394
+ nfe_slider,
395
+ speed_slider,
396
+ ],
397
+ outputs=[audio_output, spectrogram_output, ref_text_input],
398
+ )
399
+ with gr.Blocks() as app_multistyle:
400
+ gr.Markdown("# Multiple Speech-Type Generation")
401
+ # ... [Keep original multistyle interface unchanged] ...
402
 
403
  # ========== REST OF ORIGINAL CODE (UNCHANGED BELOW) ========== #
404
  # [Main app configuration, other tabs (TTS, Multistyle, Chat, Credits)]