File size: 2,039 Bytes
25ca65d
d2d0553
6e17754
a9f1ad5
6e17754
 
 
 
25ca65d
39a9210
6e17754
7fb76d7
6e17754
d2d0553
 
 
6e17754
d2d0553
 
39a9210
d2d0553
a9f1ad5
d2d0553
 
7fb76d7
25ca65d
d2d0553
 
7fb76d7
0ed15d8
7fb76d7
d2d0553
7fb76d7
ee7da9b
 
d2d0553
cd7d7b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import requests
import os
import time

# Retrieve ASR API URL and Authorization Token from environment variables
ASR_API_URL = os.getenv('ASR_API_URL')
AUTH_TOKEN = os.getenv('AUTH_TOKEN')

def transcribe_audio(file_path):
    if not ASR_API_URL or not AUTH_TOKEN:
        return "Error: Missing ASR_API_URL or AUTH_TOKEN.", None

    # Prepare headers and data
    headers = {
        'accept': 'application/json',
        'Authorization': f'Bearer {AUTH_TOKEN}',
    }
    files = {
        'file': (file_path, open(file_path, 'rb'), 'audio/mpeg'),
    }
    start_time = time.time()
    # Send POST request
    response = requests.post(ASR_API_URL, headers=headers, files=files)
    inference_time = time.time() - start_time  # in seconds

    # Check if response is successful
    if response.status_code == 200:
        transcription = response.json().get("transcription", "No transcription returned.")
        inference_time_str = f"{response.json().get('time', 'No inference time returned.')} seconds"
        return transcription, inference_time_str
    else:
        return f"Error: {response.status_code}, {response.text}", None


# Set up the Gradio interface
def launch_interface():
    gr.Interface(
        fn=transcribe_audio,
        inputs=gr.Audio(type="filepath", label="Upload Audio (Max 30s)"),
        outputs=[
            gr.Textbox(label="📝 Transcription"),
            gr.Textbox(label="⏱️ Inference Time (s)")
        ],
        title="🗣️ Gooya v1.4 – Persian ASR",
        description=(
            "The **Gooya Persian ASR** model is crazy fast and incredibly [powerful]"
            "(https://huggingface.co/spaces/navidved/open_persian_asr_leaderboard) "
            "when it comes to Persian speech recognition!\n\n"
            "🎤 Just drop a short Persian audio clip (max 30s), and boom — "
            "you’ll get a top-notch transcription instantly. 🚀🔥"
        ),
        theme="default",
        allow_flagging="never"
    ).launch()

launch_interface()