Spaces:

albertvillanova
/

open-llm-leaderboard-results-compare-json

Sleeping

App Files Files Community

albertvillanova HF Staff commited on Oct 4, 2024

Commit

f8be00e

verified ·

1 Parent(s): b09a5e0

Use latest result per model

Browse files

Files changed (1) hide show

app.py +49 -31

app.py CHANGED Viewed

@@ -10,41 +10,59 @@ RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results"
 fs = HfFileSystem()
-def fetch_results():
-    files = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
-    results = [file[len(RESULTS_DATASET_ID) +1:] for file in files]
-    return results
 def load_result(result_path) -> dict:
-    with fs.open(f"{RESULTS_DATASET_ID}/{result_path}", "r") as f:
         data = json.load(f)
     return data
-if __name__ == "__main__":
-    results = fetch_results()
-    with gr.Blocks() as demo:
-        gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
-        gr.HTML("<h3 style='text-align: center;'>Select 2 results to load and compare</h3>")
-        with gr.Row():
-            with gr.Column():
-                result_path_1 = gr.Dropdown(choices=results, label="Results")
-                load_btn_1 = gr.Button("Load")
-                result_1 = gr.JSON(label="Result")
-                load_btn_1.click(
-                    fn=load_result,
-                    inputs=result_path_1,
-                    outputs=result_1,
-                )
-            with gr.Column():
-                result_path_2 = gr.Dropdown(choices=results, label="Results")
-                load_btn_2 = gr.Button("Load")
-                result_2 = gr.JSON(label="Result")
-                load_btn_2.click(
-                    fn=load_result,
-                    inputs=result_path_2,
-                    outputs=result_2,
-                )
-    demo.launch()

 fs = HfFileSystem()
+def fetch_result_paths():
+    paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
+    return paths
+def filter_latest_result_path_per_model(paths):
+    from collections import defaultdict
+    d = defaultdict(list)
+    for path in paths:
+        model_id, _ = path[len(RESULTS_DATASET_ID) +1:].rsplit("/", 1)
+        d[model_id].append(path)
+    return {model_id: max(paths) for model_id, paths in d.items()}
+def get_result_path_from_model(model_id, result_path_per_model):
+    return result_path_per_model[model_id]
 def load_result(result_path) -> dict:
+    with fs.open(result_path, "r") as f:
         data = json.load(f)
     return data
+def render_result(model_id):
+    result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
+    return load_result(result_path)
+# if __name__ == "__main__":
+latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
+with gr.Blocks() as demo:
+    gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
+    gr.HTML("<h3 style='text-align: center;'>Select 2 results to load and compare</h3>")
+    with gr.Row():
+        with gr.Column():
+            model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
+            load_btn_1 = gr.Button("Load")
+            result_1 = gr.JSON(label="Result")
+            load_btn_1.click(
+                fn=render_result,
+                inputs=model_id_1,
+                outputs=result_1,
+            )
+        with gr.Column():
+            model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
+            load_btn_2 = gr.Button("Load")
+            result_2 = gr.JSON(label="Result")
+            load_btn_2.click(
+                fn=render_result,
+                inputs=model_id_2,
+                outputs=result_2,
+            )
+demo.launch()