Spaces:
Running
Running
File size: 3,352 Bytes
ec9edcd 84520b3 ec9edcd 5fa3b50 ec9edcd 84520b3 ec9edcd 84520b3 ec9edcd ceb41f3 ec9edcd 84520b3 ec9edcd fa7fc18 ec9edcd 5fa3b50 ec9edcd 84520b3 ec9edcd 84520b3 5fa3b50 ec9edcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
import pandas as pd
import json
from evaluate import get_solution_code
# For now, only evaluate first 9 days
df = pd.read_csv("results.csv")
with open("solutions.json") as f:
solutions = json.load(f)
def score_submissions(row):
result = row["result"]
day = row["day"]
solution = solutions[str(day)]
score_1 = solution[0] in result
score_2 = solution[1] in result
# if solution[0] == "N/A":
# score_1 = "N/A"
# else:
# score_1 = solution[0] in result
# if not score_1:
# score_2 = False # Can't get to level 2 without level 1
# elif solution[1] == "N/A":
# score_2 = "N/A" # Won't evaluate an answer i dont have
# else:
# score_2 = solution[1] in result
return [score_1, score_2]
df["scores"] = df.apply(score_submissions, axis=1)
df["code"] = df.apply(lambda x: get_solution_code(day = x["day"], model=x["model"]), axis=1)
df["code_md"] = df.code.apply(lambda x: "```python\n"+x+"\n```")
df["Runtime (s)"] = df["total_time"].apply(lambda x: str(x)[0:6])
df["part_1"] = df["scores"].apply(lambda x: x[0])
df["part_2"] = df["scores"].apply(lambda x: x[1])
star_summary = {}
for model in df.model.unique():
df_model = df[df.model == model]
# silver_stars = sum([s for s in df_model.part_1.to_list() if not isinstance(s, str)])
# gold_stars = sum([s for s in df_model.part_2.to_list() if not isinstance(s, str)])
silver_stars = df_model.part_1.sum()
gold_stars = df_model.part_2.sum()
total_stars = silver_stars + gold_stars
star_summary[model] = {
"Model": model,
"Total Stars ⭐️": total_stars,
"Part 1 ⭐️": silver_stars,
"Part 2 ⭐️": gold_stars,
}
def score_to_string(s):
return "⭐️" if s else "❌"
# if s == True:
# return "⭐️"
# elif s == False:
# return "❌"
# else:
# return "N/A"
with gr.Blocks() as demo:
md = gr.Markdown("Performance of LLMs (and my own) on AoC 2024. For more info, read this [blog post](https://www.jerpint.io/blog/advent-of-code-llms/).")
with gr.Tab("Stars"):
star_df = pd.DataFrame.from_dict(star_summary, orient="index")
gr_star_df = gr.DataFrame(star_df[["Model", "Total Stars ⭐️"]].sort_values(by="Total Stars ⭐️", ascending=False))
gr_star_df = gr.DataFrame(star_df[["Model", "Part 1 ⭐️", "Part 2 ⭐️"]].sort_values(by="Part 1 ⭐️", ascending=False))
with gr.Tab("Daily"):
# Parse the info to something more readable
df_daily = df[["model", "day", "part_1", "part_2", "Runtime (s)"]]
df_daily["Part 1"] = df_daily["part_1"].apply(score_to_string)
df_daily["Part 2"] = df_daily["part_2"].apply(score_to_string)
df_daily = df_daily[["model", "day", "Part 1", "Part 2", "Runtime (s)"]]
gr_df_daily = gr.DataFrame(df_daily.sort_values(by="day"))
with gr.Tab("Outputs"):
outputs_df = df[["model", "day", "Runtime (s)", "result"]].rename(columns={"result": "Output"})
gr_outputs_df = gr.DataFrame(outputs_df, datatype=["str", "str", "str"])
with gr.Tab("Code"):
code_df = df[["model", "day", "code_md"]].rename(columns={"code_md": "Code"})
gr_code_df = gr.DataFrame(code_df, datatype=['str', 'str', 'markdown'])
demo.launch() |