Lucas Georges Gabriel Charpentier Pacheco commited on
Commit
0bb05a6
·
1 Parent(s): 0dffe6b

[Bugfix] Fix error in Text Average calculations

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +1 -1
src/leaderboard/read_evals.py CHANGED
@@ -203,7 +203,7 @@ class EvalResult:
203
  """Converts the Eval Result to a dict compatible with our dataframe display"""
204
  eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
205
  vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
206
- text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG", "Reading", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "wug", "reading", "glue")
207
  num_text_tasks = len(text_benchmarks) // 2
208
  text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
209
  if self.still_on_hub:
 
203
  """Converts the Eval Result to a dict compatible with our dataframe display"""
204
  eval_column = AutoEvalColumnMultimodal if self.track.lower() == "multimodal" else AutoEvalColumn
205
  vision_tasks = ("VQA", "Winoground", "DevBench", "vqa", "winoground", "devbench")
206
+ text_benchmarks = ("BLiMP", "BLiMP Supplement", "EWoK", "Entity Tracking", "WUG Adjective Nominalization", "WUG Past Tense", "COMPS", "Reading", "AoA", "(Super)GLUE", "blimp", "blimp_supplement", "ewok", "entity_tracking", "wug_adj", "wug_past", "comps", "reading", "aoa", "glue")
207
  num_text_tasks = len(text_benchmarks) // 2
208
  text_average = sum([v for k, v in self.results.items() if v is not None and k in text_benchmarks]) / num_text_tasks
209
  if self.still_on_hub: