Spaces:

mrfakename
/

E2-F5-TTS

Running on Zero

mrfakename commited on Jul 26

Commit

b7a138c

verified ·

1 Parent(s): 95ed3d1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,12 +34,13 @@ DEFAULT_TTS_MODEL_CFG = [
 # Load vocoder and models on module load
 vocoder = load_vocoder()
-F5TTS_ema_model = load_model(
     DiT,
     json.loads(DEFAULT_TTS_MODEL_CFG[2]),
     str(cached_path(DEFAULT_TTS_MODEL_CFG[0]))
 )
-E2TTS_ema_model = load_model(
     UNetT,
     dict(dim=1024, depth=24, heads=16, ff_mult=4, text_mask_padding=False, pe_attn_head=1),
     str(cached_path("hf://SWivid/E2-TTS/E2TTS_Base/model_1200000.safetensors"))
@@ -72,11 +73,6 @@ with gr.Blocks() as app:
     audio_output = gr.Audio(label="Synthesized Audio")
     spectrogram_output = gr.Image(label="Spectrogram")
-    model_cache = {
-        DEFAULT_TTS_MODEL: F5TTS_ema_model,
-        "E2-TTS": E2TTS_ema_model
-    }
     @gpu_decorator
     def infer(
         ref_audio_orig,
@@ -119,7 +115,7 @@ with gr.Blocks() as app:
                 pre_custom_path = model[1]
             ema_model = custom_ema_model
         else:
-            ema_model = model_cache.get(model, F5TTS_ema_model)
         final_wave, final_sample_rate, combined_spectrogram = infer_process(
             ref_audio,

 # Load vocoder and models on module load
 vocoder = load_vocoder()
+model_cache = {}
+model_cache[DEFAULT_TTS_MODEL] = load_model(
     DiT,
     json.loads(DEFAULT_TTS_MODEL_CFG[2]),
     str(cached_path(DEFAULT_TTS_MODEL_CFG[0]))
 )
+model_cache["E2-TTS"] = load_model(
     UNetT,
     dict(dim=1024, depth=24, heads=16, ff_mult=4, text_mask_padding=False, pe_attn_head=1),
     str(cached_path("hf://SWivid/E2-TTS/E2TTS_Base/model_1200000.safetensors"))
     audio_output = gr.Audio(label="Synthesized Audio")
     spectrogram_output = gr.Image(label="Spectrogram")
     @gpu_decorator
     def infer(
         ref_audio_orig,
                 pre_custom_path = model[1]
             ema_model = custom_ema_model
         else:
+            ema_model = model_cache.get(model, model_cache[DEFAULT_TTS_MODEL])
         final_wave, final_sample_rate, combined_spectrogram = infer_process(
             ref_audio,