Spaces:
Sleeping
Sleeping
File size: 3,540 Bytes
1a3ef6b 1945c48 1785140 b865d16 a627d55 b865d16 a627d55 1785140 1945c48 a627d55 3cc5048 1785140 a627d55 b865d16 1a3ef6b 3408722 a627d55 b865d16 1a3ef6b b865d16 fb4364b b865d16 a627d55 3408722 1a3ef6b b865d16 a627d55 33b51a6 b865d16 fb4364b 1a3ef6b fb4364b b865d16 a627d55 a7161ed b865d16 a627d55 1a3ef6b b865d16 a7161ed 1945c48 a627d55 1945c48 b865d16 0dcc709 6f17cca 1945c48 1a3ef6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import spaces
import gradio as gr
import torch
from TTS.api import TTS
import os
import tempfile
import torchaudio
from huggingface_hub import hf_hub_download
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts
# Aceptar los t茅rminos de COQUI
os.environ["COQUI_TOS_AGREED"] = "1"
# Definir el dispositivo como CPU
device = "cpu"
# Descargar archivos desde HuggingFace
model_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="model.pth")
config_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="config.json")
vocab_path = hf_hub_download(repo_id="RedSparkie/danielmula", filename="vocab.json")
# Funci贸n para limpiar la cach茅 de GPU (por si en el futuro se usa GPU)
def clear_gpu_cache():
if torch.cuda.is_available():
torch.cuda.empty_cache()
# Cargar el modelo XTTS
XTTS_MODEL = None
def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
global XTTS_MODEL
clear_gpu_cache()
if not xtts_checkpoint or not xtts_config or not xtts_vocab:
return "You need to run the previous steps or manually set the `XTTS checkpoint path`, `XTTS config path`, and `XTTS vocab path` fields !!"
# Configuraci贸n del modelo
config = XttsConfig()
config.load_json(xtts_config)
# Inicializar el modelo
XTTS_MODEL = Xtts.init_from_config(config)
print("Loading XTTS model!")
# Cargar el checkpoint del modelo
XTTS_MODEL.load_checkpoint(config, checkpoint_path=xtts_checkpoint, vocab_path=xtts_vocab, use_deepspeed=False, weights_only=True)
print("Model Loaded!")
# Funci贸n para ejecutar TTS
def run_tts(lang, tts_text, speaker_audio_file):
if XTTS_MODEL is None or not speaker_audio_file:
return "You need to run the previous step to load the model !!", None, None
# Usar inference_mode para mejorar el rendimiento
with torch.inference_mode():
gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
audio_path=speaker_audio_file,
gpt_cond_len=XTTS_MODEL.config.gpt_cond_len,
max_ref_length=XTTS_MODEL.config.max_ref_len,
sound_norm_refs=XTTS_MODEL.config.sound_norm_refs
)
out = XTTS_MODEL.inference(
text=tts_text,
language=lang,
gpt_cond_latent=gpt_cond_latent,
speaker_embedding=speaker_embedding,
temperature=XTTS_MODEL.config.temperature,
length_penalty=XTTS_MODEL.config.length_penalty,
repetition_penalty=XTTS_MODEL.config.repetition_penalty,
top_k=XTTS_MODEL.config.top_k,
top_p=XTTS_MODEL.config.top_p,
)
# Guardar el audio generado en un archivo temporal
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
out["wav"] = torch.tensor(out["wav"]).unsqueeze(0)
out_path = fp.name
torchaudio.save(out_path, out["wav"], 24000)
print("Speech generated!")
return out_path, speaker_audio_file
# Definir la funci贸n para Gradio
@spaces.GPU
def generate(text, audio):
load_model(model_path, config_path, vocab_path)
out_path, speaker_audio_file = run_tts(lang='es', tts_text=text, speaker_audio_file=audio)
return out_path
# Configurar la interfaz de Gradio
demo = gr.Interface(
fn=generate,
inputs=[gr.Textbox(label='Frase a generar'), gr.Audio(type='filepath', label='Voz de referencia')],
outputs=gr.Audio(type='filepath')
)
# Lanzar la interfaz
demo.launch() |