Spaces:

adalat-ai
/

wer-analysis

Sleeping

App Files Files Community

janaab commited on Oct 20, 2024

Commit

e4806e6

verified ·

1 Parent(s): d7b8130

Simplified highlighting

Browse files

Files changed (1) hide show

app.py +23 -41

app.py CHANGED Viewed

@@ -1,70 +1,52 @@
 import gradio as gr
-from jiwer import wer, compute_measures
 # Function to highlight errors
 def highlight_errors(ground_truth, hypothesis):
-    measures = compute_measures(ground_truth, hypothesis)
-    highlighted_hyp = []
-    # Split the ground truth and hypothesis into words
-    gt_words = ground_truth.split()
-    hyp_words = hypothesis.split()
-    gt_index = 0
-    hyp_index = 0
     # Process each alignment operation in measures
-    for alignment in measures['ops']:
         for chunk in alignment:
             if chunk.type == 'equal':
                 # Add equal words without highlighting
-                highlighted_hyp.extend(gt_words[chunk.ref_start_idx:chunk.ref_end_idx])
-                gt_index = chunk.ref_end_idx
-                hyp_index = chunk.hyp_end_idx
             elif chunk.type == 'insert':
                 # Highlight inserted words in green
-                highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
-                hyp_index += 1
             elif chunk.type == 'substitute':
-                # Highlight substitutions: hypothesis in purple, ground truth in red
-                highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')  # Hypothesis word
-                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')  # Ground truth word
-                gt_index += 1
-                hyp_index += 1
             elif chunk.type == 'delete':
                 # Highlight deleted words in red with strikethrough
-                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
-                gt_index += 1
-    # Handle any remaining words in hypothesis as insertions
-    while hyp_index < len(hyp_words):
-        highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
-        hyp_index += 1
-    # Handle any remaining words in ground truth that were not matched
-    while gt_index < len(gt_words):
-        highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
-        gt_index += 1
-    highlighted_hyp_str = ' '.join(highlighted_hyp)
-    error_rate = wer(ground_truth, hypothesis)
     # Color Legend HTML
     legend_html = """
     <div style="margin-top: 10px;">
-        <strong>Legend:</strong><br>
-        <span style="color:green;">Insertion</span>: Words in green<br>
-        <span style="color:purple;">Substitution</span>: Words in purple<br>
-        <span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
     </div>
     """
     # Combine highlighted output and legend
-    combined_output = f"{highlighted_hyp_str}<br>{legend_html}"
-    return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
 # Gradio Interface
 interface = gr.Interface(

 import gradio as gr
+from jiwer import wer, process_words
+def make_string(words):
+    return " ".join(words)
 # Function to highlight errors
 def highlight_errors(ground_truth, hypothesis):
+    highlighted_text = []
+    processed = process_words(ground_truth, hypothesis)
     # Process each alignment operation in measures
+    for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses):
         for chunk in alignment:
             if chunk.type == 'equal':
                 # Add equal words without highlighting
+                highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx])
             elif chunk.type == 'insert':
                 # Highlight inserted words in green
+                highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')
             elif chunk.type == 'substitute':
+                # Highlight substitutions in purple: ground truth is striked through
+                highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')  # Hypothesis word
+                highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')  # Ground truth word
             elif chunk.type == 'delete':
                 # Highlight deleted words in red with strikethrough
+                highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')
+    highlighted_text_str = ' '.join(highlighted_text)
     # Color Legend HTML
     legend_html = """
     <div style="margin-top: 10px;">
+        <strong>Legend</strong><br>
+        <span style="color:green;">Insertion</span>: Green<br>
+        <span style="color:purple;">Substitution</span>: Purple<br>
+        <span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br>
     </div>
     """
     # Combine highlighted output and legend
+    combined_output = f"{legend_html}<br>{highlighted_text_str}"
+    return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions
 # Gradio Interface
 interface = gr.Interface(