Spaces:
Running
Running
clean up
Browse files- evaluate.py +19 -13
evaluate.py
CHANGED
@@ -2,22 +2,23 @@ import os
|
|
2 |
import json
|
3 |
import subprocess
|
4 |
import pandas as pd
|
5 |
-
# from sklearn.manifold import TSNE
|
6 |
|
7 |
from generate import get_solution_file_path, all_models
|
8 |
-
from openai import OpenAI
|
9 |
import time
|
10 |
|
11 |
import os
|
12 |
import subprocess
|
13 |
|
14 |
|
|
|
|
|
15 |
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
# cd to the day directory
|
23 |
os.chdir(f"day{day:02d}")
|
@@ -31,7 +32,6 @@ def evaluate_submission(day: int, model: str):
|
|
31 |
print(f"Evaluating {file_path} for day {day} with model {model}")
|
32 |
|
33 |
# run the solution, and capture the output
|
34 |
-
timeout = 60 * 5
|
35 |
start_time = time.time()
|
36 |
try:
|
37 |
result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
|
@@ -60,14 +60,17 @@ def get_solution_code(day: int, model: str) -> str:
|
|
60 |
return file.read()
|
61 |
|
62 |
|
63 |
-
def extract_solutions(df, output_file = "solutions.json"):
|
64 |
-
|
|
|
|
|
|
|
65 |
solutions = {}
|
66 |
for day in range(1, 26):
|
67 |
-
sub_df = df[(df.model == "jerpint") & (df.day == day)]
|
68 |
-
|
69 |
|
|
|
70 |
day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
|
|
|
71 |
if len(day_solution) == 0:
|
72 |
part1 = "N/A"
|
73 |
part2 = "N/A"
|
@@ -125,8 +128,11 @@ def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
|
|
125 |
|
126 |
|
127 |
if __name__ == "__main__":
|
|
|
128 |
all_models["human"] = ["jerpint"]
|
|
|
|
|
129 |
df = evaluate_submissions(all_models, results_file="results.csv")
|
130 |
|
131 |
-
#
|
132 |
-
solutions = extract_solutions(df, output_file="solutions.json")
|
|
|
2 |
import json
|
3 |
import subprocess
|
4 |
import pandas as pd
|
|
|
5 |
|
6 |
from generate import get_solution_file_path, all_models
|
|
|
7 |
import time
|
8 |
|
9 |
import os
|
10 |
import subprocess
|
11 |
|
12 |
|
13 |
+
def evaluate_submission(day: int, model: str, timeout = 60 * 5):
|
14 |
+
"""Evaluates the python code of a submission for the given day and model.
|
15 |
|
16 |
+
Returns the result captured from stdout and the total time taken.
|
17 |
|
18 |
+
Does not score the actual submission (e.g. reward a star), this comes later.
|
19 |
+
Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
|
20 |
+
If errors are produced, they are also returned.
|
21 |
+
"""
|
22 |
|
23 |
# cd to the day directory
|
24 |
os.chdir(f"day{day:02d}")
|
|
|
32 |
print(f"Evaluating {file_path} for day {day} with model {model}")
|
33 |
|
34 |
# run the solution, and capture the output
|
|
|
35 |
start_time = time.time()
|
36 |
try:
|
37 |
result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
|
|
|
60 |
return file.read()
|
61 |
|
62 |
|
63 |
+
def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
|
64 |
+
"""This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.
|
65 |
+
|
66 |
+
Results saved in a .json format
|
67 |
+
"""
|
68 |
solutions = {}
|
69 |
for day in range(1, 26):
|
|
|
|
|
70 |
|
71 |
+
sub_df = df[(df.model == model) & (df.day == day)]
|
72 |
day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
|
73 |
+
|
74 |
if len(day_solution) == 0:
|
75 |
part1 = "N/A"
|
76 |
part2 = "N/A"
|
|
|
128 |
|
129 |
|
130 |
if __name__ == "__main__":
|
131 |
+
# Add my submissions to the list of available models, for convenience
|
132 |
all_models["human"] = ["jerpint"]
|
133 |
+
|
134 |
+
# Collects all outputs from running the python code
|
135 |
df = evaluate_submissions(all_models, results_file="results.csv")
|
136 |
|
137 |
+
# Extracts solutions
|
138 |
+
solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint")
|