wer-analysis / app.py
janaab's picture
Simplified highlighting
e4806e6 verified
raw
history blame contribute delete
2.63 kB
import gradio as gr
from jiwer import wer, process_words
def make_string(words):
return " ".join(words)
# Function to highlight errors
def highlight_errors(ground_truth, hypothesis):
highlighted_text = []
processed = process_words(ground_truth, hypothesis)
# Process each alignment operation in measures
for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses):
for chunk in alignment:
if chunk.type == 'equal':
# Add equal words without highlighting
highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx])
elif chunk.type == 'insert':
# Highlight inserted words in green
highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')
elif chunk.type == 'substitute':
# Highlight substitutions in purple: ground truth is striked through
highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') # Hypothesis word
highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') # Ground truth word
elif chunk.type == 'delete':
# Highlight deleted words in red with strikethrough
highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')
highlighted_text_str = ' '.join(highlighted_text)
# Color Legend HTML
legend_html = """
<div style="margin-top: 10px;">
<strong>Legend</strong><br>
<span style="color:green;">Insertion</span>: Green<br>
<span style="color:purple;">Substitution</span>: Purple<br>
<span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br>
</div>
"""
# Combine highlighted output and legend
combined_output = f"{legend_html}<br>{highlighted_text_str}"
return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions
# Gradio Interface
interface = gr.Interface(
fn=highlight_errors,
inputs=["text", "text"],
outputs=[
gr.HTML(label="Highlighted Transcript"),
gr.Number(label="Word Error Rate"),
gr.Number(label="Substitutions"),
gr.Number(label="Insertions"),
gr.Number(label="Deletions")
],
title="WER Analysis"
)
interface.launch()