Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
only 0-shot
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -38,6 +38,7 @@ class EvalResult:
|
|
| 38 |
data = json.load(fp)
|
| 39 |
|
| 40 |
config = data.get("config")
|
|
|
|
| 41 |
|
| 42 |
# Precision
|
| 43 |
precision = Precision.from_str(config.get("model_dtype"))
|
|
@@ -72,7 +73,7 @@ class EvalResult:
|
|
| 72 |
task = task.value
|
| 73 |
|
| 74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 75 |
-
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
| 76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 77 |
continue
|
| 78 |
|
|
|
|
| 38 |
data = json.load(fp)
|
| 39 |
|
| 40 |
config = data.get("config")
|
| 41 |
+
n_shot = data.get("n-shot")
|
| 42 |
|
| 43 |
# Precision
|
| 44 |
precision = Precision.from_str(config.get("model_dtype"))
|
|
|
|
| 73 |
task = task.value
|
| 74 |
|
| 75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 76 |
+
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == 0])
|
| 77 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 78 |
continue
|
| 79 |
|