jerpint commited on
Commit
5ea436e
·
1 Parent(s): 97a885e
Files changed (1) hide show
  1. evaluate.py +19 -13
evaluate.py CHANGED
@@ -2,22 +2,23 @@ import os
2
  import json
3
  import subprocess
4
  import pandas as pd
5
- # from sklearn.manifold import TSNE
6
 
7
  from generate import get_solution_file_path, all_models
8
- from openai import OpenAI
9
  import time
10
 
11
  import os
12
  import subprocess
13
 
14
 
 
 
15
 
16
- client = OpenAI()
17
 
18
-
19
- def evaluate_submission(day: int, model: str):
20
- """Evaluates the submission for the given day and model. Returns the result captured from stdout and the total time taken."""
 
21
 
22
  # cd to the day directory
23
  os.chdir(f"day{day:02d}")
@@ -31,7 +32,6 @@ def evaluate_submission(day: int, model: str):
31
  print(f"Evaluating {file_path} for day {day} with model {model}")
32
 
33
  # run the solution, and capture the output
34
- timeout = 60 * 5
35
  start_time = time.time()
36
  try:
37
  result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
@@ -60,14 +60,17 @@ def get_solution_code(day: int, model: str) -> str:
60
  return file.read()
61
 
62
 
63
- def extract_solutions(df, output_file = "solutions.json"):
64
- # TODO: better way of getting this?
 
 
 
65
  solutions = {}
66
  for day in range(1, 26):
67
- sub_df = df[(df.model == "jerpint") & (df.day == day)]
68
-
69
 
 
70
  day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
 
71
  if len(day_solution) == 0:
72
  part1 = "N/A"
73
  part2 = "N/A"
@@ -125,8 +128,11 @@ def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
125
 
126
 
127
  if __name__ == "__main__":
 
128
  all_models["human"] = ["jerpint"]
 
 
129
  df = evaluate_submissions(all_models, results_file="results.csv")
130
 
131
- # Run once to save results
132
- solutions = extract_solutions(df, output_file="solutions.json")
 
2
  import json
3
  import subprocess
4
  import pandas as pd
 
5
 
6
  from generate import get_solution_file_path, all_models
 
7
  import time
8
 
9
  import os
10
  import subprocess
11
 
12
 
13
+ def evaluate_submission(day: int, model: str, timeout = 60 * 5):
14
+ """Evaluates the python code of a submission for the given day and model.
15
 
16
+ Returns the result captured from stdout and the total time taken.
17
 
18
+ Does not score the actual submission (e.g. reward a star), this comes later.
19
+ Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
20
+ If errors are produced, they are also returned.
21
+ """
22
 
23
  # cd to the day directory
24
  os.chdir(f"day{day:02d}")
 
32
  print(f"Evaluating {file_path} for day {day} with model {model}")
33
 
34
  # run the solution, and capture the output
 
35
  start_time = time.time()
36
  try:
37
  result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
 
60
  return file.read()
61
 
62
 
63
+ def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
64
+ """This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.
65
+
66
+ Results saved in a .json format
67
+ """
68
  solutions = {}
69
  for day in range(1, 26):
 
 
70
 
71
+ sub_df = df[(df.model == model) & (df.day == day)]
72
  day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
73
+
74
  if len(day_solution) == 0:
75
  part1 = "N/A"
76
  part2 = "N/A"
 
128
 
129
 
130
  if __name__ == "__main__":
131
+ # Add my submissions to the list of available models, for convenience
132
  all_models["human"] = ["jerpint"]
133
+
134
+ # Collects all outputs from running the python code
135
  df = evaluate_submissions(all_models, results_file="results.csv")
136
 
137
+ # Extracts solutions
138
+ solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint")