Spaces:

BabyLM-community
/

babylm-leaderboard-2025-all-tasks

Running

Lucas Georges Gabriel Charpentier Pacheco commited on 2 days ago

Commit

0bb05a6

1 Parent(s): 0dffe6b

[Bugfix] Fix error in Text Average calculations

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -203,7 +203,7 @@ class EvalResult:
         """Converts the Eval Result to a dict compatible with our dataframe display"""
         eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
         vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
-        text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG", "Reading", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "wug", "reading", "glue")
         num_text_tasks = len(text_benchmarks) // 2
         text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
         if self.still_on_hub:

         """Converts the Eval Result to a dict compatible with our dataframe display"""
         eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
         vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
+        text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG Adjective Nominalization", "WUG Past Tense", "COMPS", "Reading", "AoA", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "wug_adj", "wug_past", "comps", "reading", "aoa", "glue")
         num_text_tasks = len(text_benchmarks) // 2
         text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
         if self.still_on_hub: