Spaces:

jerpint
/

advent24-llm

Running

App Files Files Community

advent24-llm / app.py

jerpint

gradio app to visualize results

ec9edcd about 2 months ago

raw

history blame

2.1 kB

	import gradio as gr
	import pandas as pd
	import json

	from evaluate import get_solution_code


	# For now, only evaluate first 9 days
	df = pd.read_csv("results.csv")
	df = df[df.day < 10]

	with open("solutions.json") as f:
	solutions = json.load(f)

	def score_submissions(row):
	result = row["result"]
	day = row["day"]
	solution = solutions[str(day)]

	score_1 = solution[0] in result
	score_2 = solution[1] in result
	return [score_1, score_2]


	df["scores"] = df.apply(score_submissions, axis=1)
	df["code"] = df.apply(lambda x: get_solution_code(day = x["day"], model=x["model"]), axis=1)
	df["code_md"] = df.code.apply(lambda x: "```python"+x+"```")

	df["part_1"] = df["scores"].apply(lambda x: x[0])
	df["part_2"] = df["scores"].apply(lambda x: x[1])


	star_summary = {}
	for model in df.model.unique():
	df_model = df[df.model == model]
	silver_stars = df_model.part_1.sum()
	gold_stars = df_model.part_2.sum()
	total_stars = silver_stars + gold_stars
	star_summary[model] = {
	"Model": model,
	"Silver Stars ⭐️": silver_stars,
	"Gold Stars ⭐️": gold_stars,
	"Total Stars ⭐️": total_stars,
	}

	star_df = pd.DataFrame.from_dict(star_summary, orient="index")

	with gr.Blocks() as demo:
	md = gr.Markdown("Hello!")
	with gr.Tab("Stars"):
	gr_star_df = gr.DataFrame(star_df)
	with gr.Tab("Daily"):

	# Parse the info to something more readable
	df_daily = df[["model", "day", "part_1", "part_2", "total_time"]]
	df_daily["Part 1"] = df_daily["part_1"].apply(lambda x: "⭐️" if x else "❌")
	df_daily["Part 2"] = df_daily["part_2"].apply(lambda x: "⭐️" if x else "❌")
	df_daily["Runtime (s)"] = df_daily["total_time"].apply(lambda x: str(x)[0:6])
	df_daily = df_daily[["model", "day", "Part 1", "Part 2", "Runtime (s)"]]

	gr_df_daily = gr.DataFrame(df_daily.sort_values(by="day"))

	# with gr.Tab("Code"):
	# gr_code_df = gr.DataFrame(df[["model", "day", "code_md", "result"]], datatype=["str", "str", "markdown", "str"])

	demo.launch()