geoalgo commited on
Commit
0548301
ยท
1 Parent(s): eef3091

change tab order

Browse files
Files changed (1) hide show
  1. main.py +44 -36
main.py CHANGED
@@ -55,6 +55,7 @@ df_mah_pivot.reset_index(drop=False, inplace=True)
55
 
56
  df_eval = pd.read_csv("multilingual_results.csv")
57
 
 
58
  def map_task_to_group(task: str) -> str | None:
59
  if task == "xcopa":
60
  return "XCOPA"
@@ -70,6 +71,7 @@ def map_task_to_group(task: str) -> str | None:
70
  return "Global MMLU"
71
  return None
72
 
 
73
  df_eval["group"] = df_eval.task.apply(map_task_to_group)
74
  df_eval_grouped = df_eval[df_eval["group"].notna()].copy()
75
  df_eval_grouped["Model"] = df_eval_grouped.model_name.apply(lambda s: s.split("/")[-1])
@@ -88,12 +90,14 @@ group_nshot = (
88
  .to_dict()
89
  )
90
 
 
91
  def display_name(group: str) -> str:
92
  label = group_nshot.get(group, "unknown")
93
  if label == "mixed" or label == "unknown" or label == "unknown":
94
  return f"{group} [mixed]" if label == "mixed" else f"{group} [unknown]"
95
  return f"{group} [{label}]"
96
 
 
97
  # Build a renamed version for display, preserving Model and Average columns
98
  display_columns_map = {
99
  col: display_name(col)
@@ -133,6 +137,46 @@ with gr.Blocks() as demo:
133
  ),
134
  )
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  with gr.Tab("Instruction-tuning ๐ŸŽฏ๓ ง๓ ข๓ ฅ๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ"):
137
  gr.Markdown(
138
  """
@@ -195,42 +239,6 @@ with gr.Blocks() as demo:
195
  ),
196
  )
197
 
198
- with gr.Tab("Multilingual evaluations ๐ŸŒ"):
199
- gr.Markdown(
200
- """
201
- Aggregated multilingual performance by task group (mean across languages when applicable).
202
- """
203
- )
204
- # Order columns: Model, groups..., Average
205
- raw_group_columns = [
206
- col
207
- for col in [
208
- "INCLUDE",
209
- "Belebele",
210
- "Global MMLU",
211
- "XCOPA",
212
- "XStoryCloze",
213
- "XWinograd",
214
- ]
215
- if col in df_multilingual_pivot.columns
216
- ]
217
- display_group_columns = [display_columns_map[col] for col in raw_group_columns]
218
- ordered_columns = ["Model", *display_group_columns, "Average โฌ†๏ธ"]
219
- df_multilingual_display = df_multilingual_display_all.loc[:, ordered_columns]
220
- Leaderboard(
221
- value=df_multilingual_display.round(2),
222
- select_columns=SelectColumns(
223
- default_selection=list(df_multilingual_display.columns),
224
- cant_deselect=["Model"],
225
- label="Select Columns to Display:",
226
- ),
227
- search_columns=SearchColumns(
228
- primary_column="Model",
229
- label="Filter a model",
230
- secondary_columns=[],
231
- ),
232
- )
233
-
234
 
235
  if __name__ == "__main__":
236
  demo.launch()
 
55
 
56
  df_eval = pd.read_csv("multilingual_results.csv")
57
 
58
+
59
  def map_task_to_group(task: str) -> str | None:
60
  if task == "xcopa":
61
  return "XCOPA"
 
71
  return "Global MMLU"
72
  return None
73
 
74
+
75
  df_eval["group"] = df_eval.task.apply(map_task_to_group)
76
  df_eval_grouped = df_eval[df_eval["group"].notna()].copy()
77
  df_eval_grouped["Model"] = df_eval_grouped.model_name.apply(lambda s: s.split("/")[-1])
 
90
  .to_dict()
91
  )
92
 
93
+
94
  def display_name(group: str) -> str:
95
  label = group_nshot.get(group, "unknown")
96
  if label == "mixed" or label == "unknown" or label == "unknown":
97
  return f"{group} [mixed]" if label == "mixed" else f"{group} [unknown]"
98
  return f"{group} [{label}]"
99
 
100
+
101
  # Build a renamed version for display, preserving Model and Average columns
102
  display_columns_map = {
103
  col: display_name(col)
 
137
  ),
138
  )
139
 
140
+ with gr.Tab("Multilingual evaluations ๐ŸŒ"):
141
+ gr.Markdown(
142
+ """
143
+ Aggregated multilingual performance by task group (mean across languages when applicable).
144
+ """
145
+ )
146
+ # Order columns: Model, groups..., Average
147
+ raw_group_columns = [
148
+ col
149
+ for col in [
150
+ "INCLUDE",
151
+ "Belebele",
152
+ "Global MMLU",
153
+ "XCOPA",
154
+ "XStoryCloze",
155
+ "XWinograd",
156
+ ]
157
+ if col in df_multilingual_pivot.columns
158
+ ]
159
+ display_group_columns = [
160
+ display_columns_map[col] for col in raw_group_columns
161
+ ]
162
+ ordered_columns = ["Model", *display_group_columns, "Average โฌ†๏ธ"]
163
+ df_multilingual_display = df_multilingual_display_all.loc[
164
+ :, ordered_columns
165
+ ]
166
+ Leaderboard(
167
+ value=df_multilingual_display.round(2),
168
+ select_columns=SelectColumns(
169
+ default_selection=list(df_multilingual_display.columns),
170
+ cant_deselect=["Model"],
171
+ label="Select Columns to Display:",
172
+ ),
173
+ search_columns=SearchColumns(
174
+ primary_column="Model",
175
+ label="Filter a model",
176
+ secondary_columns=[],
177
+ ),
178
+ )
179
+
180
  with gr.Tab("Instruction-tuning ๐ŸŽฏ๓ ง๓ ข๓ ฅ๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ"):
181
  gr.Markdown(
182
  """
 
239
  ),
240
  )
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  if __name__ == "__main__":
244
  demo.launch()