HugoZeballos's picture
Update app.py
96b52c7 verified
import gradio as gr
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
import librosa
import torch
import os
from huggingface_hub import login
# Obtener el token desde las variables de entorno
token = os.getenv("HF_TOKEN")
if token:
login(token=token)
else:
raise ValueError("El token de Hugging Face no est谩 configurado en las variables de entorno.")
device = "cuda" if torch.cuda.is_available() else "cpu"
# Cargar el modelo
repo_name = "HugoZeballos/rapa_nui_asr_2" # Ajusta al nombre de tu modelo en Hugging Face
processor = Speech2TextProcessor.from_pretrained(repo_name)
model = Speech2TextForConditionalGeneration.from_pretrained(repo_name).to(device)
# Cambiar `source` a una configuraci贸n v谩lida o eliminarlo
inputs = gr.Audio(type="filepath")
outputs = gr.Textbox(label="Transcripci贸n")
def transcribe(audio_path):
audio, sr = librosa.load(audio_path, sr=16000)
inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding="longest").to("cuda")
with torch.no_grad():
predicted_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# Crear interfaz
interface = gr.Interface(
fn=transcribe,
inputs=inputs,
outputs=outputs,
title="ASR Demo"
)
# Ejecutar la app
if __name__ == "__main__":
interface.launch()