Spaces:

adalat-ai
/

wer-analysis

Sleeping

App Files Files Community

wer-analysis / app.py

janaab

Simplified highlighting

e4806e6 verified 7 months ago

raw

history blame contribute delete

2.63 kB

	import gradio as gr
	from jiwer import wer, process_words

	def make_string(words):
	return " ".join(words)

	# Function to highlight errors
	def highlight_errors(ground_truth, hypothesis):

	highlighted_text = []

	processed = process_words(ground_truth, hypothesis)

	# Process each alignment operation in measures
	for alignment, ref, hyp in zip(processed.alignments, processed.references, processed.hypotheses):
	for chunk in alignment:
	if chunk.type == 'equal':
	# Add equal words without highlighting
	highlighted_text.extend(ref[chunk.ref_start_idx:chunk.ref_end_idx])

	elif chunk.type == 'insert':
	# Highlight inserted words in green
	highlighted_text.append(f'<span style="color:green;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>')

	elif chunk.type == 'substitute':
	# Highlight substitutions in purple: ground truth is striked through
	highlighted_text.append(f'<span style="color:purple;">{make_string(hyp[chunk.hyp_start_idx:chunk.hyp_end_idx])}</span>') # Hypothesis word
	highlighted_text.append(f'<span style="color:purple; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>') # Ground truth word

	elif chunk.type == 'delete':
	# Highlight deleted words in red with strikethrough
	highlighted_text.append(f'<span style="color:red; text-decoration:line-through;">{make_string(ref[chunk.ref_start_idx:chunk.ref_end_idx])}</span>')

	highlighted_text_str = ' '.join(highlighted_text)

	# Color Legend HTML
	legend_html = """
	<div style="margin-top: 10px;">
	<strong>Legend</strong><br>
	<span style="color:green;">Insertion</span>: Green<br>
	<span style="color:purple;">Substitution</span>: Purple<br>
	<span style="color:red; text-decoration:line-through;">Deletion</span>: Red<br>
	</div>
	"""

	# Combine highlighted output and legend
	combined_output = f"{legend_html}<br>{highlighted_text_str}"

	return combined_output, processed.wer, processed.substitutions, processed.insertions, processed.deletions

	# Gradio Interface
	interface = gr.Interface(
	fn=highlight_errors,
	inputs=["text", "text"],
	outputs=[
	gr.HTML(label="Highlighted Transcript"),
	gr.Number(label="Word Error Rate"),
	gr.Number(label="Substitutions"),
	gr.Number(label="Insertions"),
	gr.Number(label="Deletions")
	],
	title="WER Analysis"
	)

	interface.launch()