Lucas Georges Gabriel Charpentier Pacheco
commited on
Commit
·
0bb05a6
1
Parent(s):
0dffe6b
[Bugfix] Fix error in Text Average calculations
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -203,7 +203,7 @@ class EvalResult:
|
|
203 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
204 |
eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
|
205 |
vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
|
206 |
-
text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG", "Reading", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "
|
207 |
num_text_tasks = len(text_benchmarks) // 2
|
208 |
text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
|
209 |
if self.still_on_hub:
|
|
|
203 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
204 |
eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
|
205 |
vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
|
206 |
+
text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG Adjective Nominalization", "WUG Past Tense", "COMPS", "Reading", "AoA", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "wug_adj", "wug_past", "comps", "reading", "aoa", "glue")
|
207 |
num_text_tasks = len(text_benchmarks) // 2
|
208 |
text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
|
209 |
if self.still_on_hub:
|