Spaces:
Sleeping
Sleeping
import gradio as gr | |
from jiwer import wer, process_words | |
def make_string(words): | |
return " ".join(words) | |
# Function to highlight errors | |
def highlight_errors(ground_truth, hypothesis): | |
highlighted_text = [] | |
processed = process_words(ground_truth, hypothesis) | |
# Process each alignment operation in measures | |
for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses): | |
for chunk in alignment: | |
if chunk.type == 'equal': | |
# Add equal words without highlighting | |
highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx]) | |
elif chunk.type == 'insert': | |
# Highlight inserted words in green | |
highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') | |
elif chunk.type == 'substitute': | |
# Highlight substitutions in purple: ground truth is striked through | |
highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') # Hypothesis word | |
highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') # Ground truth word | |
elif chunk.type == 'delete': | |
# Highlight deleted words in red with strikethrough | |
highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') | |
highlighted_text_str = ' '.join(highlighted_text) | |
# Color Legend HTML | |
legend_html = """ | |
<div style="margin-top: 10px;"> | |
<strong>Legend</strong><br> | |
<span style="color:green;">Insertion</span>: Green<br> | |
<span style="color:purple;">Substitution</span>: Purple<br> | |
<span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br> | |
</div> | |
""" | |
# Combine highlighted output and legend | |
combined_output = f"{legend_html}<br>{highlighted_text_str}" | |
return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=highlight_errors, | |
inputs=["text", "text"], | |
outputs=[ | |
gr.HTML(label="Highlighted Transcript"), | |
gr.Number(label="Word Error Rate"), | |
gr.Number(label="Substitutions"), | |
gr.Number(label="Insertions"), | |
gr.Number(label="Deletions") | |
], | |
title="WER Analysis" | |
) | |
interface.launch() | |