Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration | |
import librosa | |
import torch | |
import os | |
from huggingface_hub import login | |
# Obtener el token desde las variables de entorno | |
token = os.getenv("HF_TOKEN") | |
if token: | |
login(token=token) | |
else: | |
raise ValueError("El token de Hugging Face no est谩 configurado en las variables de entorno.") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Cargar el modelo | |
repo_name = "HugoZeballos/rapa_nui_asr_2" # Ajusta al nombre de tu modelo en Hugging Face | |
processor = Speech2TextProcessor.from_pretrained(repo_name) | |
model = Speech2TextForConditionalGeneration.from_pretrained(repo_name).to(device) | |
# Cambiar `source` a una configuraci贸n v谩lida o eliminarlo | |
inputs = gr.Audio(type="filepath") | |
outputs = gr.Textbox(label="Transcripci贸n") | |
def transcribe(audio_path): | |
audio, sr = librosa.load(audio_path, sr=16000) | |
inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding="longest").to("cuda") | |
with torch.no_grad(): | |
predicted_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"]) | |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
return transcription | |
# Crear interfaz | |
interface = gr.Interface( | |
fn=transcribe, | |
inputs=inputs, | |
outputs=outputs, | |
title="ASR Demo" | |
) | |
# Ejecutar la app | |
if __name__ == "__main__": | |
interface.launch() |