Commit
·
3e3e17d
1
Parent(s):
3843f4e
add overall statistics
Browse files
app.py
CHANGED
@@ -64,6 +64,46 @@ target_dtype = np.int16
|
|
64 |
max_range = np.iinfo(target_dtype).max
|
65 |
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def get_visualisation(idx, model="large-v2", round_dp=2, ngram_degree=5):
|
68 |
idx -= 1
|
69 |
audio = dataset[idx]["audio"]
|
@@ -141,6 +181,18 @@ if __name__ == "__main__":
|
|
141 |
"relative to the target transcriptions. Insertions are displayed in <span style='background-color:Lightgreen'>green</span>, and "
|
142 |
"deletions in <span style='background-color:#FFCCCB'><s>red</s></span>."
|
143 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
slider = gr.Slider(
|
145 |
minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
|
146 |
)
|
|
|
64 |
max_range = np.iinfo(target_dtype).max
|
65 |
|
66 |
|
67 |
+
def get_statistics(model="large-v2", round_dp=2, ngram_degree=5):
|
68 |
+
text1 = norm_target
|
69 |
+
if model == "large-v2":
|
70 |
+
text2 = norm_pred_v2
|
71 |
+
elif model == "large-32-2":
|
72 |
+
text2 = norm_pred_32_2
|
73 |
+
else:
|
74 |
+
raise ValueError(
|
75 |
+
f"Got unknown model {model}, should be one of `'large-v2'` or `'large-32-2'`."
|
76 |
+
)
|
77 |
+
|
78 |
+
wer_output = process_words(text1, text2, wer_default, wer_default)
|
79 |
+
wer_percentage = round(100 * wer_output.wer, round_dp)
|
80 |
+
ier_percentage = round(
|
81 |
+
100 * wer_output.insertions / sum([len(ref) for ref in wer_output.references]), round_dp
|
82 |
+
)
|
83 |
+
|
84 |
+
all_ngrams = list(ngrams(" ".join(text2).split(), ngram_degree))
|
85 |
+
|
86 |
+
unique_ngrams = []
|
87 |
+
for ngram in all_ngrams:
|
88 |
+
if ngram not in unique_ngrams:
|
89 |
+
unique_ngrams.append(ngram)
|
90 |
+
|
91 |
+
repeated_ngrams = len(all_ngrams) - len(unique_ngrams)
|
92 |
+
|
93 |
+
return wer_percentage, ier_percentage, repeated_ngrams
|
94 |
+
|
95 |
+
|
96 |
+
def get_overall_table():
|
97 |
+
large_v2 = get_statistics(model="large-v2")
|
98 |
+
large_32_2 = get_statistics(model="large-32-2")
|
99 |
+
# format the rows
|
100 |
+
table = [large_v2, large_32_2]
|
101 |
+
# format the model names
|
102 |
+
table[0] = ["large-v2", *table[0]]
|
103 |
+
table[1] = ["large-32-2", *table[1]]
|
104 |
+
return table
|
105 |
+
|
106 |
+
|
107 |
def get_visualisation(idx, model="large-v2", round_dp=2, ngram_degree=5):
|
108 |
idx -= 1
|
109 |
audio = dataset[idx]["audio"]
|
|
|
181 |
"relative to the target transcriptions. Insertions are displayed in <span style='background-color:Lightgreen'>green</span>, and "
|
182 |
"deletions in <span style='background-color:#FFCCCB'><s>red</s></span>."
|
183 |
)
|
184 |
+
gr.Markdown("**Overall statistics:**")
|
185 |
+
table = gr.Dataframe(
|
186 |
+
value=get_overall_table(),
|
187 |
+
headers=[
|
188 |
+
"Model",
|
189 |
+
"Word Error Rate (WER)",
|
190 |
+
"Insertion Error Rate (IER)",
|
191 |
+
"Repeated 5-grams",
|
192 |
+
],
|
193 |
+
row_count=2,
|
194 |
+
)
|
195 |
+
gr.Markdown("**Per-sample statistics:**")
|
196 |
slider = gr.Slider(
|
197 |
minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
|
198 |
)
|