janaab commited on
Commit
e4806e6
·
verified ·
1 Parent(s): d7b8130

Simplified highlighting

Browse files
Files changed (1) hide show
  1. app.py +23 -41
app.py CHANGED
@@ -1,70 +1,52 @@
1
  import gradio as gr
2
- from jiwer import wer, compute_measures
 
 
 
3
 
4
  # Function to highlight errors
5
  def highlight_errors(ground_truth, hypothesis):
6
- measures = compute_measures(ground_truth, hypothesis)
7
 
8
- highlighted_hyp = []
9
-
10
- # Split the ground truth and hypothesis into words
11
- gt_words = ground_truth.split()
12
- hyp_words = hypothesis.split()
13
 
14
- gt_index = 0
15
- hyp_index = 0
16
 
17
  # Process each alignment operation in measures
18
- for alignment in measures['ops']:
19
  for chunk in alignment:
20
  if chunk.type == 'equal':
21
  # Add equal words without highlighting
22
- highlighted_hyp.extend(gt_words[chunk.ref_start_idx:chunk.ref_end_idx])
23
- gt_index = chunk.ref_end_idx
24
- hyp_index = chunk.hyp_end_idx
25
  elif chunk.type == 'insert':
26
  # Highlight inserted words in green
27
- highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
28
- hyp_index += 1
29
  elif chunk.type == 'substitute':
30
- # Highlight substitutions: hypothesis in purple, ground truth in red
31
- highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>') # Hypothesis word
32
- highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Ground truth word
33
- gt_index += 1
34
- hyp_index += 1
35
  elif chunk.type == 'delete':
36
  # Highlight deleted words in red with strikethrough
37
- highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
38
- gt_index += 1
39
-
40
- # Handle any remaining words in hypothesis as insertions
41
- while hyp_index < len(hyp_words):
42
- highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
43
- hyp_index += 1
44
 
45
- # Handle any remaining words in ground truth that were not matched
46
- while gt_index < len(gt_words):
47
- highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
48
- gt_index += 1
49
-
50
- highlighted_hyp_str = ' '.join(highlighted_hyp)
51
 
52
- error_rate = wer(ground_truth, hypothesis)
53
-
54
  # Color Legend HTML
55
  legend_html = """
56
  <div style="margin-top: 10px;">
57
- <strong>Legend:</strong><br>
58
- <span style="color:green;">Insertion</span>: Words in green<br>
59
- <span style="color:purple;">Substitution</span>: Words in purple<br>
60
- <span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
61
  </div>
62
  """
63
 
64
  # Combine highlighted output and legend
65
- combined_output = f"{highlighted_hyp_str}<br>{legend_html}"
66
 
67
- return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
68
 
69
  # Gradio Interface
70
  interface = gr.Interface(
 
1
  import gradio as gr
2
+ from jiwer import wer, process_words
3
+
4
+ def make_string(words):
5
+ return " ".join(words)
6
 
7
  # Function to highlight errors
8
  def highlight_errors(ground_truth, hypothesis):
 
9
 
10
+ highlighted_text = []
 
 
 
 
11
 
12
+ processed = process_words(ground_truth, hypothesis)
 
13
 
14
  # Process each alignment operation in measures
15
+ for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses):
16
  for chunk in alignment:
17
  if chunk.type == 'equal':
18
  # Add equal words without highlighting
19
+ highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx])
20
+
 
21
  elif chunk.type == 'insert':
22
  # Highlight inserted words in green
23
+ highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')
24
+
25
  elif chunk.type == 'substitute':
26
+ # Highlight substitutions in purple: ground truth is striked through
27
+ highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') # Hypothesis word
28
+ highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') # Ground truth word
29
+
 
30
  elif chunk.type == 'delete':
31
  # Highlight deleted words in red with strikethrough
32
+ highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')
 
 
 
 
 
 
33
 
34
+ highlighted_text_str = ' '.join(highlighted_text)
 
 
 
 
 
35
 
 
 
36
  # Color Legend HTML
37
  legend_html = """
38
  <div style="margin-top: 10px;">
39
+ <strong>Legend</strong><br>
40
+ <span style="color:green;">Insertion</span>: Green<br>
41
+ <span style="color:purple;">Substitution</span>: Purple<br>
42
+ <span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br>
43
  </div>
44
  """
45
 
46
  # Combine highlighted output and legend
47
+ combined_output = f"{legend_html}<br>{highlighted_text_str}"
48
 
49
+ return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions
50
 
51
  # Gradio Interface
52
  interface = gr.Interface(