Integrated sarvam speech-to-text api
Browse files- app.py +7 -7
- pages/scoreboard.py +6 -4
app.py
CHANGED
@@ -92,10 +92,11 @@ class ResultWriter:
|
|
92 |
'path',
|
93 |
'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
|
94 |
'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
|
95 |
-
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
|
|
96 |
]
|
97 |
|
98 |
-
self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
99 |
|
100 |
if not fs.exists(save_path):
|
101 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
@@ -216,7 +217,6 @@ def call_function(model_name):
|
|
216 |
"audio_b64":True
|
217 |
}}
|
218 |
elif st.session_state.current_audio_type == "uploaded":
|
219 |
-
# For uploaded files, use the processed audio data
|
220 |
array = st.session_state.audio['data']
|
221 |
sr = st.session_state.audio['sample_rate']
|
222 |
if sr != 22050:
|
@@ -247,7 +247,7 @@ def call_function(model_name):
|
|
247 |
return transcript
|
248 |
|
249 |
def transcribe_audio():
|
250 |
-
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
|
251 |
|
252 |
if st.session_state.model_1_selection == "Random":
|
253 |
model1_name = random.choice(models_list)
|
@@ -375,7 +375,7 @@ def on_option_none_click():
|
|
375 |
|
376 |
def on_click_transcribe():
|
377 |
if st.session_state.has_audio:
|
378 |
-
with st.spinner("Transcribing audio... this may take
|
379 |
option_1_text, option_2_text = transcribe_audio(
|
380 |
)
|
381 |
st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
|
@@ -516,7 +516,7 @@ def main():
|
|
516 |
st.markdown("### Model Selection")
|
517 |
col_model1, col_model2 = st.columns(2)
|
518 |
|
519 |
-
models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure"]
|
520 |
|
521 |
with col_model1:
|
522 |
st.selectbox(
|
@@ -582,7 +582,7 @@ def main():
|
|
582 |
* Model names are revealed after the vote is cast.
|
583 |
* Currently Hindi and English are supported, and
|
584 |
the results for Hindi will be in Hinglish (Hindi in Latin script)
|
585 |
-
* It may take up to 30 seconds for speech recognition in some cases.
|
586 |
* Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
|
587 |
""".strip()
|
588 |
|
|
|
92 |
'path',
|
93 |
'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
|
94 |
'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
|
95 |
+
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration',
|
96 |
+
'sarvam_score','sarvam_appearance','sarvam_duration',
|
97 |
]
|
98 |
|
99 |
+
self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
100 |
|
101 |
if not fs.exists(save_path):
|
102 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
|
|
217 |
"audio_b64":True
|
218 |
}}
|
219 |
elif st.session_state.current_audio_type == "uploaded":
|
|
|
220 |
array = st.session_state.audio['data']
|
221 |
sr = st.session_state.audio['sample_rate']
|
222 |
if sr != 22050:
|
|
|
247 |
return transcript
|
248 |
|
249 |
def transcribe_audio():
|
250 |
+
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure",'sarvam']
|
251 |
|
252 |
if st.session_state.model_1_selection == "Random":
|
253 |
model1_name = random.choice(models_list)
|
|
|
375 |
|
376 |
def on_click_transcribe():
|
377 |
if st.session_state.has_audio:
|
378 |
+
with st.spinner("Transcribing audio... this may take some time"):
|
379 |
option_1_text, option_2_text = transcribe_audio(
|
380 |
)
|
381 |
st.session_state.option_1 = option_1_text if option_1_text else "* inaudible *"
|
|
|
516 |
st.markdown("### Model Selection")
|
517 |
col_model1, col_model2 = st.columns(2)
|
518 |
|
519 |
+
models_list = ["Random", "Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime", "azure","sarvam"]
|
520 |
|
521 |
with col_model1:
|
522 |
st.selectbox(
|
|
|
582 |
* Model names are revealed after the vote is cast.
|
583 |
* Currently Hindi and English are supported, and
|
584 |
the results for Hindi will be in Hinglish (Hindi in Latin script)
|
585 |
+
* It may take up to 30-60 seconds for speech recognition in some cases.
|
586 |
* Uploaded audio files must be .wav, .mp3, or .flac format and under 30 seconds duration.
|
587 |
""".strip()
|
588 |
|
pages/scoreboard.py
CHANGED
@@ -30,13 +30,14 @@ def get_model_abbreviation(model_name):
|
|
30 |
'deepgram': 'Deepgram',
|
31 |
'Ori Swift': 'Ori Swift',
|
32 |
'Ori Prime': 'Ori Prime',
|
33 |
-
'azure' : 'Azure'
|
|
|
34 |
}
|
35 |
return abbrev_map.get(model_name, model_name)
|
36 |
|
37 |
|
38 |
def calculate_metrics(df):
|
39 |
-
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
40 |
metrics = {}
|
41 |
|
42 |
for model in models:
|
@@ -100,7 +101,7 @@ def create_appearance_chart(metrics):
|
|
100 |
return fig
|
101 |
|
102 |
def create_head_to_head_matrix(df):
|
103 |
-
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
104 |
matrix = np.zeros((len(models), len(models)))
|
105 |
|
106 |
for i, model1 in enumerate(models):
|
@@ -213,7 +214,8 @@ def dashboard():
|
|
213 |
"Ori Apex": "The top-performing model, fast and stable.",
|
214 |
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
215 |
"Deepgram" : "Deepgram Nova-2 API",
|
216 |
-
"Azure" : "Azure Speech Services API"
|
|
|
217 |
}
|
218 |
|
219 |
st.header('Model Descriptions')
|
|
|
30 |
'deepgram': 'Deepgram',
|
31 |
'Ori Swift': 'Ori Swift',
|
32 |
'Ori Prime': 'Ori Prime',
|
33 |
+
'azure' : 'Azure',
|
34 |
+
'sarvam':'Sarvam'
|
35 |
}
|
36 |
return abbrev_map.get(model_name, model_name)
|
37 |
|
38 |
|
39 |
def calculate_metrics(df):
|
40 |
+
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
41 |
metrics = {}
|
42 |
|
43 |
for model in models:
|
|
|
101 |
return fig
|
102 |
|
103 |
def create_head_to_head_matrix(df):
|
104 |
+
models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure','sarvam']
|
105 |
matrix = np.zeros((len(models), len(models)))
|
106 |
|
107 |
for i, model1 in enumerate(models):
|
|
|
214 |
"Ori Apex": "The top-performing model, fast and stable.",
|
215 |
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
216 |
"Deepgram" : "Deepgram Nova-2 API",
|
217 |
+
"Azure" : "Azure Speech Services API",
|
218 |
+
"Sarvam": "Sarvam AI saarika:v2 API"
|
219 |
}
|
220 |
|
221 |
st.header('Model Descriptions')
|