polish_medical_leaderboard

Running on CPU Upgrade

djstrong commited on Feb 24, 2024

Commit

a8630b1

1 Parent(s): b15949c

only 0-shot

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -38,6 +38,7 @@ class EvalResult:
             data = json.load(fp)
         config = data.get("config")
         # Precision
         precision = Precision.from_str(config.get("model_dtype"))
@@ -72,7 +73,7 @@ class EvalResult:
             task = task.value
             # We average all scores of a given metric (not all metrics are present in all files)
-            accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue

             data = json.load(fp)
         config = data.get("config")
+        n_shot = data.get("n-shot")
         # Precision
         precision = Precision.from_str(config.get("model_dtype"))
             task = task.value
             # We average all scores of a given metric (not all metrics are present in all files)
+            accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == 0])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue