|
import json |
|
import random |
|
|
|
import gradio as gr |
|
from difflib import SequenceMatcher |
|
|
|
file_path = "dataset.jsonl" |
|
similarity_threshold = 0.85 |
|
current_index = 0 |
|
|
|
description_text = """ |
|
This Space is inspired by [Luis Hunt's](https://www.linkedin.com/posts/louiswhunt_see-below-for-6882-pages-of-mmlu-and-gsm8k-activity-7281011488692047872-fWCE?utm_source=share&utm_medium=member_desktop) post. |
|
He highlights how current top performing models from major vendors are contaminated with benchmark data that is supposed to be used to assess their performance. |
|
|
|
This space aims to partially reproduce this work. I chose to look at the contamination of **Qwen/Qwen2.5-14B** by **GSM8K** dataset. |
|
""" |
|
|
|
|
|
def find_similar_chunks(original, output): |
|
matcher = SequenceMatcher(None, original, output) |
|
left = 0 |
|
highlighted_sequence = [] |
|
for _, j, n in matcher.get_matching_blocks(): |
|
if left < j: |
|
highlighted_sequence.append((output[left:j], None)) |
|
highlighted_sequence.append((output[j:j+n], 1)) |
|
left = j + n |
|
if j+n < len(output) - 1: |
|
highlighted_sequence.append((output[j+n:], None)) |
|
|
|
return highlighted_sequence |
|
|
|
with open(file_path, "r") as file: |
|
examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold] |
|
|
|
def next_example(): |
|
new_example = random.choice(examples) |
|
|
|
highlighted_output = find_similar_chunks(new_example["original"], new_example["output"]) |
|
return( |
|
[ |
|
new_example["prompt"], |
|
new_example["original"], |
|
highlighted_output, |
|
new_example["similarity_ratio"], |
|
new_example["seed"] |
|
] |
|
) |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown(description_text) |
|
with gr.Column(scale=1): |
|
pass |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
interactive=False, |
|
value=examples[current_index]["prompt"], |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=4): |
|
original = gr.Textbox( |
|
label="Original", |
|
interactive=False, |
|
value=examples[current_index]["original"], |
|
) |
|
with gr.Column(scale=4): |
|
output = gr.HighlightedText( |
|
label="Output", |
|
color_map={"1": "yellow"}, |
|
value=find_similar_chunks(examples[current_index]["original"], |
|
examples[current_index]["output"]), |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
similarity = gr.Textbox( |
|
label="Similarity ratio", |
|
interactive=False, |
|
value=examples[current_index]["similarity_ratio"], |
|
) |
|
with gr.Column(scale=1): |
|
seed = gr.Textbox( |
|
label="Seed", |
|
interactive=False, |
|
value=examples[current_index]["seed"], |
|
) |
|
|
|
next_btn = gr.Button("Anoter example") |
|
|
|
next_btn.click(fn=next_example, |
|
outputs=[prompt, original, output, similarity, seed]) |
|
|
|
|
|
|
|
demo.launch() |