import gradio as gr import pandas as pd import json from evaluate import get_solution_code # For now, only evaluate first 9 days df = pd.read_csv("results.csv") with open("solutions.json") as f: solutions = json.load(f) def score_submissions(row): result = row["result"] day = row["day"] solution = solutions[str(day)] score_1 = solution[0] in result score_2 = solution[1] in result # if solution[0] == "N/A": # score_1 = "N/A" # else: # score_1 = solution[0] in result # if not score_1: # score_2 = False # Can't get to level 2 without level 1 # elif solution[1] == "N/A": # score_2 = "N/A" # Won't evaluate an answer i dont have # else: # score_2 = solution[1] in result return [score_1, score_2] df["scores"] = df.apply(score_submissions, axis=1) df["code"] = df.apply(lambda x: get_solution_code(day = x["day"], model=x["model"]), axis=1) df["code_md"] = df.code.apply(lambda x: "```python\n"+x+"\n```") df["Runtime (s)"] = df["total_time"].apply(lambda x: str(x)[0:6]) df["part_1"] = df["scores"].apply(lambda x: x[0]) df["part_2"] = df["scores"].apply(lambda x: x[1]) star_summary = {} for model in df.model.unique(): df_model = df[df.model == model] # silver_stars = sum([s for s in df_model.part_1.to_list() if not isinstance(s, str)]) # gold_stars = sum([s for s in df_model.part_2.to_list() if not isinstance(s, str)]) silver_stars = df_model.part_1.sum() gold_stars = df_model.part_2.sum() total_stars = silver_stars + gold_stars star_summary[model] = { "Model": model, "Total Stars ⭐️": total_stars, "Part 1 ⭐️": silver_stars, "Part 2 ⭐️": gold_stars, } def score_to_string(s): return "⭐️" if s else "❌" # if s == True: # return "⭐️" # elif s == False: # return "❌" # else: # return "N/A" with gr.Blocks() as demo: md = gr.Markdown("Performance of LLMs (and my own) on AoC 2024. For more info, read this [blog post](https://www.jerpint.io/blog/advent-of-code-llms/).") with gr.Tab("Stars"): star_df = pd.DataFrame.from_dict(star_summary, orient="index") gr_star_df = gr.DataFrame(star_df[["Model", "Total Stars ⭐️"]].sort_values(by="Total Stars ⭐️", ascending=False)) gr_star_df = gr.DataFrame(star_df[["Model", "Part 1 ⭐️", "Part 2 ⭐️"]].sort_values(by="Part 1 ⭐️", ascending=False)) with gr.Tab("Daily"): # Parse the info to something more readable df_daily = df[["model", "day", "part_1", "part_2", "Runtime (s)"]] df_daily["Part 1"] = df_daily["part_1"].apply(score_to_string) df_daily["Part 2"] = df_daily["part_2"].apply(score_to_string) df_daily = df_daily[["model", "day", "Part 1", "Part 2", "Runtime (s)"]] gr_df_daily = gr.DataFrame(df_daily.sort_values(by="day")) with gr.Tab("Outputs"): outputs_df = df[["model", "day", "Runtime (s)", "result"]].rename(columns={"result": "Output"}) gr_outputs_df = gr.DataFrame(outputs_df, datatype=["str", "str", "str"]) with gr.Tab("Code"): code_df = df[["model", "day", "code_md"]].rename(columns={"code_md": "Code"}) gr_code_df = gr.DataFrame(code_df, datatype=['str', 'str', 'markdown']) demo.launch()