Spaces:

jerpint
/

advent24-llm

Running

App Files Files Community

jerpint commited on Dec 29, 2024

Commit

5ea436e

1 Parent(s): 97a885e

clean up

Browse files

Files changed (1) hide show

evaluate.py +19 -13

evaluate.py CHANGED Viewed

@@ -2,22 +2,23 @@ import os
 import json
 import subprocess
 import pandas as pd
-# from sklearn.manifold import TSNE
 from generate import get_solution_file_path, all_models
-from openai import OpenAI
 import time
 import os
 import subprocess
-client = OpenAI()
-def evaluate_submission(day: int, model: str):
-    """Evaluates the submission for the given day and model. Returns the result captured from stdout and the total time taken."""
     # cd to the day directory
     os.chdir(f"day{day:02d}")
@@ -31,7 +32,6 @@ def evaluate_submission(day: int, model: str):
         print(f"Evaluating {file_path} for day {day} with model {model}")
     # run the solution, and capture the output
-    timeout = 60 * 5
     start_time = time.time()
     try:
         result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
@@ -60,14 +60,17 @@ def get_solution_code(day: int, model: str) -> str:
         return file.read()
-def extract_solutions(df, output_file = "solutions.json"):
-    # TODO: better way of getting this?
     solutions = {}
     for day in range(1, 26):
-        sub_df = df[(df.model == "jerpint") & (df.day == day)]
         day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
         if len(day_solution) == 0:
             part1 = "N/A"
             part2 = "N/A"
@@ -125,8 +128,11 @@ def evaluate_submissions(all_models, results_file = "results.csv", skip = True):
 if __name__ == "__main__":
     all_models["human"] = ["jerpint"]
     df = evaluate_submissions(all_models, results_file="results.csv")
-    # Run once to save results
-    solutions = extract_solutions(df, output_file="solutions.json")

 import json
 import subprocess
 import pandas as pd
 from generate import get_solution_file_path, all_models
 import time
 import os
 import subprocess
+def evaluate_submission(day: int, model: str, timeout = 60 * 5):
+    """Evaluates the python code of a submission for the given day and model.
+    Returns the result captured from stdout and the total time taken.
+    Does not score the actual submission (e.g. reward a star), this comes later.
+    Timeout (seconds) is used to halt the program after that amount of time, in case infinite loops arise.
+    If errors are produced, they are also returned.
+    """
     # cd to the day directory
     os.chdir(f"day{day:02d}")
         print(f"Evaluating {file_path} for day {day} with model {model}")
     # run the solution, and capture the output
     start_time = time.time()
     try:
         result = subprocess.run(["python", file_path], capture_output=True, text=True, timeout=timeout)
         return file.read()
+def extract_solutions(df, model: str, output_file = "solutions.json") -> dict:
+    """This will get all solutions produced by the model, and use those as 'ground truth', which can be used to score other models.
+    Results saved in a .json format
+    """
     solutions = {}
     for day in range(1, 26):
+        sub_df = df[(df.model == model) & (df.day == day)]
         day_solution = sub_df.result.to_list()[0].strip("\n").split("\n")
         if len(day_solution) == 0:
             part1 = "N/A"
             part2 = "N/A"
 if __name__ == "__main__":
+    # Add my submissions to the list of available models, for convenience
     all_models["human"] = ["jerpint"]
+    # Collects all outputs from running the python code
     df = evaluate_submissions(all_models, results_file="results.csv")
+    # Extracts solutions
+    solutions = extract_solutions(df, output_file="solutions.json", model = "jerpint")