HugoZeballos commited on
Commit
2db5857
·
verified ·
1 Parent(s): 2a56af8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -27
app.py CHANGED
@@ -1,28 +1,28 @@
1
- import gradio as gr
2
- from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
3
- import librosa
4
- import torch
5
-
6
- # Cargar el modelo
7
- repo_name = "HugoZeballos/rapa_nui_asr" # Ajusta al nombre de tu modelo en Hugging Face
8
- processor = Speech2TextProcessor.from_pretrained(repo_name)
9
- model = Speech2TextForConditionalGeneration.from_pretrained(repo_name).to("cuda")
10
-
11
- def transcribe(audio_path):
12
- audio, sr = librosa.load(audio_path, sr=16000)
13
- inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding="longest").to("cuda")
14
-
15
- with torch.no_grad():
16
- predicted_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
17
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
18
- return transcription
19
-
20
- # Crear interfaz Gradio
21
- interface = gr.Interface(
22
- fn=transcribe,
23
- inputs=gr.Audio(source="upload", type="filepath"),
24
- outputs="text",
25
- title="Transcriptor ASR Rapa Nui"
26
- )
27
-
28
  interface.launch()
 
1
+ import gradio as gr
2
+ from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
3
+ import librosa
4
+ import torch
5
+
6
+ # Cargar el modelo
7
+ repo_name = "HugoZeballos/rapa_nui_asr_2" # Ajusta al nombre de tu modelo en Hugging Face
8
+ processor = Speech2TextProcessor.from_pretrained(repo_name)
9
+ model = Speech2TextForConditionalGeneration.from_pretrained(repo_name).to("cuda")
10
+
11
+ def transcribe(audio_path):
12
+ audio, sr = librosa.load(audio_path, sr=16000)
13
+ inputs = processor(audio, sampling_rate=sr, return_tensors="pt", padding="longest").to("cuda")
14
+
15
+ with torch.no_grad():
16
+ predicted_ids = model.generate(inputs["input_features"], attention_mask=inputs["attention_mask"])
17
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
18
+ return transcription
19
+
20
+ # Crear interfaz Gradio
21
+ interface = gr.Interface(
22
+ fn=transcribe,
23
+ inputs=gr.Audio(source="upload", type="filepath"),
24
+ outputs="text",
25
+ title="Transcriptor ASR Rapa Nui"
26
+ )
27
+
28
  interface.launch()