timurcarstensen commited on
Commit
eef3091
·
unverified ·
1 Parent(s): 3288843

feat: multilingual tab and data

Browse files
Files changed (2) hide show
  1. main.py +85 -0
  2. multilingual_results.csv +338 -0
main.py CHANGED
@@ -53,6 +53,55 @@ df_mah_pivot.sort_values(by="Average ⬆️", ascending=False, inplace=True)
53
  df_mah_pivot.index.rename("Model", inplace=True)
54
  df_mah_pivot.reset_index(drop=False, inplace=True)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  cols = [
57
  #'Llama-3.1-8B',
58
  "Llama-3.1-Tulu-3-8B-SFT",
@@ -146,6 +195,42 @@ with gr.Blocks() as demo:
146
  ),
147
  )
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  if __name__ == "__main__":
151
  demo.launch()
 
53
  df_mah_pivot.index.rename("Model", inplace=True)
54
  df_mah_pivot.reset_index(drop=False, inplace=True)
55
 
56
+ df_eval = pd.read_csv("multilingual_results.csv")
57
+
58
+ def map_task_to_group(task: str) -> str | None:
59
+ if task == "xcopa":
60
+ return "XCOPA"
61
+ if task == "xstorycloze":
62
+ return "XStoryCloze"
63
+ if task == "xwinograd":
64
+ return "XWinograd"
65
+ if task.startswith("include_base_44_"):
66
+ return "INCLUDE"
67
+ if task.startswith("belebele_"):
68
+ return "Belebele"
69
+ if task.startswith("global_mmlu_full_"):
70
+ return "Global MMLU"
71
+ return None
72
+
73
+ df_eval["group"] = df_eval.task.apply(map_task_to_group)
74
+ df_eval_grouped = df_eval[df_eval["group"].notna()].copy()
75
+ df_eval_grouped["Model"] = df_eval_grouped.model_name.apply(lambda s: s.split("/")[-1])
76
+ df_multilingual_pivot = df_eval_grouped.pivot_table(
77
+ index="Model", columns="group", values="performance", aggfunc="mean"
78
+ )
79
+ df_multilingual_pivot["Average ⬆️"] = df_multilingual_pivot.mean(axis=1)
80
+ df_multilingual_pivot.sort_values(by="Average ⬆️", ascending=False, inplace=True)
81
+ df_multilingual_pivot.index.rename("Model", inplace=True)
82
+ df_multilingual_pivot.reset_index(drop=False, inplace=True)
83
+
84
+ # Determine display names for groups including n_shot when unique
85
+ group_nshot = (
86
+ df_eval_grouped.groupby("group")["n_shot"]
87
+ .agg(lambda s: s.iloc[0] if s.nunique() == 1 else "mixed")
88
+ .to_dict()
89
+ )
90
+
91
+ def display_name(group: str) -> str:
92
+ label = group_nshot.get(group, "unknown")
93
+ if label == "mixed" or label == "unknown" or label == "unknown":
94
+ return f"{group} [mixed]" if label == "mixed" else f"{group} [unknown]"
95
+ return f"{group} [{label}]"
96
+
97
+ # Build a renamed version for display, preserving Model and Average columns
98
+ display_columns_map = {
99
+ col: display_name(col)
100
+ for col in df_multilingual_pivot.columns
101
+ if col not in ["Model", "Average ⬆️"]
102
+ }
103
+ df_multilingual_display_all = df_multilingual_pivot.rename(columns=display_columns_map)
104
+
105
  cols = [
106
  #'Llama-3.1-8B',
107
  "Llama-3.1-Tulu-3-8B-SFT",
 
195
  ),
196
  )
197
 
198
+ with gr.Tab("Multilingual evaluations 🌍"):
199
+ gr.Markdown(
200
+ """
201
+ Aggregated multilingual performance by task group (mean across languages when applicable).
202
+ """
203
+ )
204
+ # Order columns: Model, groups..., Average
205
+ raw_group_columns = [
206
+ col
207
+ for col in [
208
+ "INCLUDE",
209
+ "Belebele",
210
+ "Global MMLU",
211
+ "XCOPA",
212
+ "XStoryCloze",
213
+ "XWinograd",
214
+ ]
215
+ if col in df_multilingual_pivot.columns
216
+ ]
217
+ display_group_columns = [display_columns_map[col] for col in raw_group_columns]
218
+ ordered_columns = ["Model", *display_group_columns, "Average ⬆️"]
219
+ df_multilingual_display = df_multilingual_display_all.loc[:, ordered_columns]
220
+ Leaderboard(
221
+ value=df_multilingual_display.round(2),
222
+ select_columns=SelectColumns(
223
+ default_selection=list(df_multilingual_display.columns),
224
+ cant_deselect=["Model"],
225
+ label="Select Columns to Display:",
226
+ ),
227
+ search_columns=SearchColumns(
228
+ primary_column="Model",
229
+ label="Filter a model",
230
+ secondary_columns=[],
231
+ ),
232
+ )
233
+
234
 
235
  if __name__ == "__main__":
236
  demo.launch()
multilingual_results.csv ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,task,n_shot,performance
2
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_italian,0,0.2937956204379562
3
+ HuggingFaceTB/SmolLM3-3B,belebele_swe_Latn,5,0.69
4
+ HuggingFaceTB/SmolLM3-3B,include_base_44_hungarian,0,0.2818181818181818
5
+ Qwen/Qwen3-1.7B,belebele_est_Latn,5,0.56
6
+ HuggingFaceTB/SmolLM2-1.7B,belebele_dan_Latn,5,0.3466666666666667
7
+ HuggingFaceTB/SmolLM3-3B,include_base_44_georgian,0,0.276
8
+ HuggingFaceTB/SmolLM3-3B,belebele_por_Latn,5,0.7944444444444444
9
+ google/gemma-3-4b-it,global_mmlu_full_pt,5,0.527204101979775
10
+ HuggingFaceTB/SmolLM3-3B,include_base_44_estonian,0,0.29464285714285715
11
+ google/gemma-3-4b-it,belebele_slv_Latn,5,0.7533333333333333
12
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_pl,5,0.31825950719270757
13
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_it,5,0.35878080045577554
14
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_bulgarian,0,0.28909090909090907
15
+ HuggingFaceTB/SmolLM3-3B,belebele_hrv_Latn,5,0.5722222222222222
16
+ google/gemma-3-4b-it,include_base_44_serbian,0,0.5672727272727273
17
+ HuggingFaceTB/SmolLM3-3B,belebele_mlt_Latn,5,0.3566666666666667
18
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_spanish,0,0.3181818181818182
19
+ Qwen/Qwen3-1.7B,belebele_fra_Latn,5,0.7922222222222223
20
+ google/gemma-3-4b-it,include_base_44_estonian,0,0.47767857142857145
21
+ google/gemma-3-4b-it,include_base_44_dutch,0,0.5517241379310345
22
+ HuggingFaceTB/SmolLM3-3B,include_base_44_dutch,0,0.47005444646098005
23
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_es,5,0.3245976356644353
24
+ Qwen/Qwen3-1.7B,include_base_44_lithuanian,0,0.37265917602996257
25
+ Qwen/Qwen3-1.7B,include_base_44_polish,0,0.4142335766423358
26
+ Qwen/Qwen3-1.7B,global_mmlu_full_cs,5,0.4690927218344965
27
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_deu_Latn,5,0.33444444444444443
28
+ HuggingFaceTB/SmolLM2-1.7B,belebele_ita_Latn,5,0.4033333333333333
29
+ HuggingFaceTB/SmolLM3-3B,belebele_ell_Grek,5,0.7633333333333333
30
+ HuggingFaceTB/SmolLM3-3B,belebele_dan_Latn,5,0.6522222222222223
31
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_lithuanian,0,0.27153558052434457
32
+ Qwen/Qwen3-1.7B,belebele_eng_Latn,5,0.8255555555555556
33
+ Qwen/Qwen3-1.7B,belebele_hun_Latn,5,0.6733333333333333
34
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_ro,5,0.451502634952286
35
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_serbian,0,0.28363636363636363
36
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_belarusian,0,0.26
37
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_german,0,0.35251798561151076
38
+ HuggingFaceTB/SmolLM2-1.7B,belebele_nld_Latn,5,0.41333333333333333
39
+ google/gemma-3-4b-it,global_mmlu_full_sr,5,0.4661016949152542
40
+ google/gemma-3-4b-it,global_mmlu_full_ro,5,0.5182310212220481
41
+ google/gemma-3-4b-it,include_base_44_north macedonian,0,0.6678765880217786
42
+ google/gemma-3-4b-it,include_base_44_hungarian,0,0.41454545454545455
43
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_hrv_Latn,5,0.27
44
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_serbian,0,0.22545454545454546
45
+ HuggingFaceTB/SmolLM2-1.7B,belebele_hrv_Latn,5,0.33666666666666667
46
+ Qwen/Qwen3-1.7B,belebele_ita_Latn,5,0.7511111111111111
47
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_basque,0,0.278
48
+ google/gemma-3-4b-it,include_base_44_portuguese,0,0.49364791288566245
49
+ Qwen/Qwen3-1.7B,include_base_44_hungarian,0,0.37636363636363634
50
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_pt,5,0.3721692066657171
51
+ google/gemma-3-4b-it,global_mmlu_full_tr,5,0.4814129041447087
52
+ google/gemma-3-4b-it,xcopa,0,0.6247272727272727
53
+ HuggingFaceTB/SmolLM3-3B,include_base_44_greek,0,0.41304347826086957
54
+ Qwen/Qwen3-1.7B,include_base_44_basque,0,0.316
55
+ HuggingFaceTB/SmolLM3-3B,belebele_ces_Latn,5,0.6222222222222222
56
+ Qwen/Qwen3-1.7B,global_mmlu_full_sv,5,0.4794188861985472
57
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_pol_Latn,5,0.28555555555555556
58
+ HuggingFaceTB/SmolLM2-1.7B,belebele_pol_Latn,5,0.3288888888888889
59
+ Qwen/Qwen3-1.7B,belebele_deu_Latn,5,0.74
60
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_ru,5,0.3166215638797892
61
+ HuggingFaceTB/SmolLM2-1.7B,belebele_eng_Latn,5,0.5633333333333334
62
+ Qwen/Qwen3-1.7B,belebele_lit_Latn,5,0.6266666666666667
63
+ HuggingFaceTB/SmolLM2-1.7B,belebele_deu_Latn,5,0.3933333333333333
64
+ google/gemma-3-4b-it,belebele_ces_Latn,5,0.7733333333333333
65
+ google/gemma-3-4b-it,include_base_44_croatian,0,0.6236363636363637
66
+ HuggingFaceTB/SmolLM2-1.7B,belebele_est_Latn,5,0.2922222222222222
67
+ HuggingFaceTB/SmolLM3-3B,belebele_slk_Latn,5,0.5855555555555556
68
+ HuggingFaceTB/SmolLM3-3B,include_base_44_north macedonian,0,0.5081669691470054
69
+ HuggingFaceTB/SmolLM3-3B,include_base_44_armenian,0,0.2581818181818182
70
+ HuggingFaceTB/SmolLM2-1.7B,belebele_fin_Latn,5,0.32
71
+ HuggingFaceTB/SmolLM3-3B,belebele_deu_Latn,5,0.8155555555555556
72
+ Qwen/Qwen3-1.7B,belebele_ell_Grek,5,0.6688888888888889
73
+ HuggingFaceTB/SmolLM2-1.7B,belebele_slv_Latn,5,0.3233333333333333
74
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_ro,5,0.28051559606893606
75
+ google/gemma-3-4b-it,global_mmlu_full_de,5,0.5190856003418316
76
+ Qwen/Qwen3-1.7B,include_base_44_north macedonian,0,0.5353901996370236
77
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_fr,5,0.32068081469876086
78
+ Qwen/Qwen3-1.7B,xcopa,0,0.5750909090909091
79
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_bul_Cyrl,5,0.3211111111111111
80
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_lt,5,0.29155390969947304
81
+ HuggingFaceTB/SmolLM2-1.7B,belebele_lvs_Latn,5,0.33
82
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_el,5,0.2901296111665005
83
+ google/gemma-3-4b-it,include_base_44_albanian,0,0.5753176043557169
84
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_dutch,0,0.33030852994555354
85
+ HuggingFaceTB/SmolLM2-1.7B,belebele_lit_Latn,5,0.32
86
+ HuggingFaceTB/SmolLM2-1.7B,belebele_bul_Cyrl,5,0.3244444444444444
87
+ HuggingFaceTB/SmolLM3-3B,include_base_44_belarusian,0,0.24909090909090909
88
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_uk,5,0.4272183449651047
89
+ google/gemma-3-4b-it,include_base_44_bulgarian,0,0.6127272727272727
90
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_azerbaijani,0,0.26094890510948904
91
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_croatian,0,0.25272727272727274
92
+ HuggingFaceTB/SmolLM2-1.7B,belebele_spa_Latn,5,0.45666666666666667
93
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_russian,0,0.28442028985507245
94
+ HuggingFaceTB/SmolLM3-3B,include_base_44_albanian,0,0.3666061705989111
95
+ Qwen/Qwen3-1.7B,global_mmlu_full_tr,5,0.44281441390115367
96
+ HuggingFaceTB/SmolLM3-3B,belebele_nld_Latn,5,0.6777777777777778
97
+ Qwen/Qwen3-1.7B,include_base_44_azerbaijani,0,0.39233576642335766
98
+ google/gemma-3-4b-it,belebele_fin_Latn,5,0.7744444444444445
99
+ Qwen/Qwen3-1.7B,global_mmlu_full_sr,5,0.43355647343683235
100
+ Qwen/Qwen3-1.7B,global_mmlu_full_en,5,0.6010539809143997
101
+ google/gemma-3-4b-it,belebele_hrv_Latn,5,0.7711111111111111
102
+ google/gemma-3-4b-it,belebele_bul_Cyrl,5,0.7744444444444445
103
+ HuggingFaceTB/SmolLM3-3B,include_base_44_basque,0,0.318
104
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_greek,0,0.25181159420289856
105
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_el,5,0.480985614584817
106
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_polish,0,0.25364963503649635
107
+ HuggingFaceTB/SmolLM3-3B,include_base_44_polish,0,0.34124087591240876
108
+ Qwen/Qwen3-1.7B,global_mmlu_full_ru,5,0.5029198119925936
109
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_tr,5,0.3821392963965247
110
+ Qwen/Qwen3-1.7B,include_base_44_ukrainian,0,0.49272727272727274
111
+ HuggingFaceTB/SmolLM3-3B,include_base_44_lithuanian,0,0.3389513108614232
112
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_slv_Latn,5,0.27666666666666667
113
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_ukrainian,0,0.36
114
+ Qwen/Qwen3-1.7B,include_base_44_georgian,0,0.314
115
+ Qwen/Qwen3-1.7B,include_base_44_estonian,0,0.3705357142857143
116
+ Qwen/Qwen3-1.7B,belebele_nld_Latn,5,0.7166666666666667
117
+ HuggingFaceTB/SmolLM2-1.7B,belebele_ces_Latn,5,0.33
118
+ google/gemma-3-4b-it,belebele_mlt_Latn,5,0.65
119
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_uk,5,0.2947585813986612
120
+ HuggingFaceTB/SmolLM3-3B,belebele_bul_Cyrl,5,0.6366666666666667
121
+ Qwen/Qwen3-1.7B,belebele_bul_Cyrl,5,0.6966666666666667
122
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_he,5,0.24896738356359494
123
+ HuggingFaceTB/SmolLM3-3B,xstorycloze,0,0.6174718729318333
124
+ HuggingFaceTB/SmolLM3-3B,xcopa,0,0.5889090909090909
125
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_turkish,0,0.23905109489051096
126
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_dan_Latn,5,0.30666666666666664
127
+ Qwen/Qwen3-1.7B,include_base_44_spanish,0,0.5472727272727272
128
+ HuggingFaceTB/SmolLM2-1.7B,belebele_mlt_Latn,5,0.3
129
+ google/gemma-3-4b-it,include_base_44_azerbaijani,0,0.40693430656934304
130
+ google/gemma-3-4b-it,belebele_dan_Latn,5,0.7888888888888889
131
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_mlt_Latn,5,0.32222222222222224
132
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xwinograd,0,0.6983591818386155
133
+ google/gemma-3-4b-it,include_base_44_russian,0,0.4782608695652174
134
+ google/gemma-3-4b-it,global_mmlu_full_cs,5,0.5044865403788634
135
+ google/gemma-3-4b-it,global_mmlu_full_it,5,0.5321179319185301
136
+ Qwen/Qwen3-1.7B,belebele_por_Latn,5,0.7644444444444445
137
+ Qwen/Qwen3-1.7B,include_base_44_greek,0,0.3713768115942029
138
+ google/gemma-3-4b-it,include_base_44_french,0,0.548926014319809
139
+ google/gemma-3-4b-it,belebele_lit_Latn,5,0.7366666666666667
140
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_finnish,0,0.24682395644283123
141
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_fr,5,0.5495655889474433
142
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ron_Latn,5,0.31222222222222223
143
+ Qwen/Qwen3-1.7B,global_mmlu_full_pt,5,0.5347528842045293
144
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_it,5,0.30366044722973934
145
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_azerbaijani,0,0.2791970802919708
146
+ google/gemma-3-4b-it,belebele_spa_Latn,5,0.7722222222222223
147
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_russian,0,0.26992753623188404
148
+ Qwen/Qwen3-1.7B,global_mmlu_full_nl,5,0.4950149551345962
149
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_hungarian,0,0.24363636363636362
150
+ Qwen/Qwen3-1.7B,belebele_swe_Latn,5,0.73
151
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_georgian,0,0.252
152
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_turkish,0,0.2208029197080292
153
+ google/gemma-3-4b-it,global_mmlu_full_sv,5,0.5211508332146418
154
+ HuggingFaceTB/SmolLM3-3B,belebele_ita_Latn,5,0.7711111111111111
155
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_north macedonian,0,0.2631578947368421
156
+ Qwen/Qwen3-1.7B,belebele_mlt_Latn,5,0.43444444444444447
157
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_nld_Latn,5,0.3288888888888889
158
+ Qwen/Qwen3-1.7B,belebele_pol_Latn,5,0.7244444444444444
159
+ HuggingFaceTB/SmolLM2-1.7B,belebele_por_Latn,5,0.4633333333333333
160
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_pl,5,0.25986326734083465
161
+ Qwen/Qwen3-1.7B,belebele_fin_Latn,5,0.6155555555555555
162
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_nl,5,0.4615439396097422
163
+ HuggingFaceTB/SmolLM3-3B,belebele_slv_Latn,5,0.49666666666666665
164
+ google/gemma-3-4b-it,include_base_44_belarusian,0,0.2872727272727273
165
+ HuggingFaceTB/SmolLM2-1.7B,belebele_slk_Latn,5,0.32222222222222224
166
+ Qwen/Qwen3-1.7B,include_base_44_armenian,0,0.3054545454545455
167
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_swe_Latn,5,0.33444444444444443
168
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_es,5,0.551345962113659
169
+ HuggingFaceTB/SmolLM3-3B,include_base_44_russian,0,0.4492753623188406
170
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_nl,5,0.3525851018373451
171
+ google/gemma-3-4b-it,include_base_44_turkish,0,0.5237226277372263
172
+ google/gemma-3-4b-it,belebele_eng_Latn,5,0.8555555555555555
173
+ google/gemma-3-4b-it,belebele_swe_Latn,5,0.7955555555555556
174
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_cs,5,0.31932773109243695
175
+ HuggingFaceTB/SmolLM3-3B,include_base_44_turkish,0,0.3795620437956204
176
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_greek,0,0.2554347826086957
177
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_albanian,0,0.2613430127041742
178
+ Qwen/Qwen3-1.7B,include_base_44_portuguese,0,0.4791288566243194
179
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_lt,5,0.3356359492949722
180
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_slk_Latn,5,0.3233333333333333
181
+ HuggingFaceTB/SmolLM3-3B,include_base_44_portuguese,0,0.5045372050816697
182
+ Qwen/Qwen3-1.7B,include_base_44_french,0,0.48448687350835323
183
+ Qwen/Qwen3-1.7B,belebele_slk_Latn,5,0.6788888888888889
184
+ google/gemma-3-4b-it,include_base_44_spanish,0,0.5690909090909091
185
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_nl,5,0.2975359635379576
186
+ Qwen/Qwen3-1.7B,global_mmlu_full_it,5,0.5266343825665859
187
+ google/gemma-3-4b-it,include_base_44_basque,0,0.356
188
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_de,5,0.5394530693633386
189
+ Qwen/Qwen3-1.7B,belebele_ces_Latn,5,0.71
190
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_lit_Latn,5,0.30333333333333334
191
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_french,0,0.3317422434367542
192
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_georgian,0,0.258
193
+ google/gemma-3-4b-it,include_base_44_ukrainian,0,0.5836363636363636
194
+ HuggingFaceTB/SmolLM3-3B,belebele_hun_Latn,5,0.48
195
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_german,0,0.3381294964028777
196
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_sv,5,0.2956843754450933
197
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_fr,5,0.3785785500640934
198
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_belarusian,0,0.2672727272727273
199
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xstorycloze,0,0.515071295349257
200
+ HuggingFaceTB/SmolLM3-3B,include_base_44_ukrainian,0,0.46545454545454545
201
+ HuggingFaceTB/SmolLM3-3B,belebele_spa_Latn,5,0.7955555555555556
202
+ HuggingFaceTB/SmolLM3-3B,include_base_44_german,0,0.43884892086330934
203
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_north macedonian,0,0.2395644283121597
204
+ google/gemma-3-4b-it,belebele_slk_Latn,5,0.7811111111111111
205
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_finnish,0,0.279491833030853
206
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_pl,5,0.4150405925081897
207
+ google/gemma-3-4b-it,include_base_44_polish,0,0.4635036496350365
208
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xcopa,0,0.518
209
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_bulgarian,0,0.27090909090909093
210
+ Qwen/Qwen3-1.7B,include_base_44_russian,0,0.4746376811594203
211
+ Qwen/Qwen3-1.7B,belebele_ron_Latn,5,0.72
212
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_lt,5,0.2622133599202393
213
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_es,5,0.38199686654322745
214
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_pt,5,0.543156245549067
215
+ google/gemma-3-4b-it,belebele_nld_Latn,5,0.7766666666666666
216
+ google/gemma-3-4b-it,belebele_ita_Latn,5,0.7866666666666666
217
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_cs,5,0.2676969092721834
218
+ Qwen/Qwen3-1.7B,include_base_44_bulgarian,0,0.48
219
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_spa_Latn,5,0.37444444444444447
220
+ google/gemma-3-4b-it,global_mmlu_full_ru,5,0.5084745762711864
221
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_he,5,0.28165503489531407
222
+ google/gemma-3-4b-it,global_mmlu_full_en,5,0.5827517447657029
223
+ google/gemma-3-4b-it,global_mmlu_full_fr,5,0.5274889616863695
224
+ HuggingFaceTB/SmolLM2-1.7B,belebele_hun_Latn,5,0.3011111111111111
225
+ google/gemma-3-4b-it,belebele_ell_Grek,5,0.7922222222222223
226
+ Qwen/Qwen3-1.7B,include_base_44_german,0,0.45323741007194246
227
+ google/gemma-3-4b-it,global_mmlu_full_el,5,0.48511608033043724
228
+ google/gemma-3-4b-it,global_mmlu_full_pl,5,0.5032046716991881
229
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_cs,5,0.4178179746474861
230
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ces_Latn,5,0.30333333333333334
231
+ Qwen/Qwen3-1.7B,xstorycloze,0,0.5671740569159497
232
+ Qwen/Qwen3-1.7B,include_base_44_finnish,0,0.3720508166969147
233
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_croatian,0,0.27454545454545454
234
+ google/gemma-3-4b-it,belebele_deu_Latn,5,0.8066666666666666
235
+ Qwen/Qwen3-1.7B,global_mmlu_full_uk,5,0.45748468879077053
236
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_polish,0,0.26094890510948904
237
+ Qwen/Qwen3-1.7B,xwinograd,0,0.7145425938413127
238
+ google/gemma-3-4b-it,xwinograd,0,0.7772533153517645
239
+ Qwen/Qwen3-1.7B,include_base_44_dutch,0,0.484573502722323
240
+ Qwen/Qwen3-1.7B,include_base_44_serbian,0,0.44363636363636366
241
+ Qwen/Qwen3-1.7B,include_base_44_belarusian,0,0.3
242
+ Qwen/Qwen3-1.7B,global_mmlu_full_he,5,0.4016521862982481
243
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_estonian,0,0.24553571428571427
244
+ HuggingFaceTB/SmolLM3-3B,include_base_44_italian,0,0.6441605839416058
245
+ Qwen/Qwen3-1.7B,global_mmlu_full_es,5,0.5344680244979347
246
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_sr,5,0.3767981768978778
247
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_sr,5,0.25523429710867396
248
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_en,5,0.498860561173622
249
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_fra_Latn,5,0.3488888888888889
250
+ HuggingFaceTB/SmolLM3-3B,belebele_fin_Latn,5,0.46555555555555556
251
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_eng_Latn,5,0.5866666666666667
252
+ HuggingFaceTB/SmolLM3-3B,include_base_44_finnish,0,0.3393829401088929
253
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_he,5,0.3240991311778949
254
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_fin_Latn,5,0.3
255
+ HuggingFaceTB/SmolLM3-3B,belebele_ron_Latn,5,0.6511111111111111
256
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_portuguese,0,0.30671506352087113
257
+ google/gemma-3-4b-it,xstorycloze,0,0.668732326574815
258
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_hun_Latn,5,0.23666666666666666
259
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_por_Latn,5,0.36
260
+ HuggingFaceTB/SmolLM3-3B,belebele_fra_Latn,5,0.8055555555555556
261
+ google/gemma-3-4b-it,include_base_44_georgian,0,0.526
262
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_spanish,0,0.3327272727272727
263
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_tr,5,0.31035465033471016
264
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_dutch,0,0.2250453720508167
265
+ google/gemma-3-4b-it,belebele_hun_Latn,5,0.7322222222222222
266
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_french,0,0.3054892601431981
267
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ell_Grek,5,0.28444444444444444
268
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_sv,5,0.436476285429426
269
+ HuggingFaceTB/SmolLM2-1.7B,belebele_ell_Grek,5,0.31444444444444447
270
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_hungarian,0,0.27090909090909093
271
+ Qwen/Qwen3-1.7B,belebele_spa_Latn,5,0.7844444444444445
272
+ Qwen/Qwen3-1.7B,include_base_44_albanian,0,0.455535390199637
273
+ HuggingFaceTB/SmolLM3-3B,belebele_eng_Latn,5,0.8488888888888889
274
+ google/gemma-3-4b-it,belebele_est_Latn,5,0.6988888888888889
275
+ HuggingFaceTB/SmolLM3-3B,include_base_44_azerbaijani,0,0.31204379562043794
276
+ google/gemma-3-4b-it,belebele_fra_Latn,5,0.82
277
+ google/gemma-3-4b-it,include_base_44_armenian,0,0.3472727272727273
278
+ google/gemma-3-4b-it,belebele_por_Latn,5,0.7866666666666666
279
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_lithuanian,0,0.26779026217228463
280
+ Qwen/Qwen3-1.7B,belebele_hrv_Latn,5,0.6966666666666667
281
+ Qwen/Qwen3-1.7B,include_base_44_italian,0,0.5894160583941606
282
+ google/gemma-3-4b-it,belebele_ron_Latn,5,0.78
283
+ HuggingFaceTB/SmolLM3-3B,xwinograd,0,0.7988311980220274
284
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_ru,5,0.508759435977781
285
+ HuggingFaceTB/SmolLM3-3B,belebele_pol_Latn,5,0.6066666666666667
286
+ HuggingFaceTB/SmolLM3-3B,belebele_lit_Latn,5,0.4388888888888889
287
+ Qwen/Qwen3-1.7B,belebele_lvs_Latn,5,0.6677777777777778
288
+ HuggingFaceTB/SmolLM2-1.7B,xstorycloze,0,0.5408218518741351
289
+ Qwen/Qwen3-1.7B,include_base_44_croatian,0,0.5127272727272727
290
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_armenian,0,0.2581818181818182
291
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_ru,5,0.258082894174619
292
+ Qwen/Qwen3-1.7B,include_base_44_turkish,0,0.42700729927007297
293
+ HuggingFaceTB/SmolLM3-3B,include_base_44_croatian,0,0.44
294
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_tr,5,0.25872382851445663
295
+ Qwen/Qwen3-1.7B,global_mmlu_full_pl,5,0.47393533684660305
296
+ HuggingFaceTB/SmolLM3-3B,belebele_est_Latn,5,0.37
297
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_est_Latn,5,0.27444444444444444
298
+ google/gemma-3-4b-it,belebele_pol_Latn,5,0.7666666666666667
299
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_sv,5,0.3335707164221621
300
+ Qwen/Qwen3-1.7B,global_mmlu_full_el,5,0.4171058253809999
301
+ HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_sr,5,0.2920524141860134
302
+ Qwen/Qwen3-1.7B,global_mmlu_full_de,5,0.5170203674690215
303
+ google/gemma-3-4b-it,global_mmlu_full_he,5,0.4596211365902293
304
+ google/gemma-3-4b-it,include_base_44_greek,0,0.4891304347826087
305
+ google/gemma-3-4b-it,include_base_44_finnish,0,0.4355716878402904
306
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_estonian,0,0.19196428571428573
307
+ google/gemma-3-4b-it,include_base_44_italian,0,0.6131386861313869
308
+ Qwen/Qwen3-1.7B,belebele_dan_Latn,5,0.7044444444444444
309
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_el,5,0.2511038313630537
310
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_en,5,0.48454636091724823
311
+ google/gemma-3-4b-it,global_mmlu_full_uk,5,0.4910269192422732
312
+ Qwen/Qwen3-1.7B,global_mmlu_full_ro,5,0.4859706594502208
313
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_pt,5,0.3166927788064378
314
+ HuggingFaceTB/SmolLM3-3B,include_base_44_french,0,0.5823389021479713
315
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_portuguese,0,0.279491833030853
316
+ google/gemma-3-4b-it,global_mmlu_full_nl,5,0.5225751317476143
317
+ HuggingFaceTB/SmolLM3-3B,include_base_44_spanish,0,0.5836363636363636
318
+ HuggingFaceTB/SmolLM3-3B,belebele_lvs_Latn,5,0.41555555555555557
319
+ HuggingFaceTB/SmolLM2-1.7B,xcopa,0,0.5305454545454545
320
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_en,5,0.6007691212078051
321
+ Qwen/Qwen3-1.7B,global_mmlu_full_fr,5,0.5274177467597209
322
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_ukrainian,0,0.30727272727272725
323
+ google/gemma-3-4b-it,belebele_lvs_Latn,5,0.7455555555555555
324
+ HuggingFaceTB/SmolLM3-3B,include_base_44_bulgarian,0,0.45636363636363636
325
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ita_Latn,5,0.30333333333333334
326
+ HuggingFaceTB/SmolLM2-1.7B,xwinograd,0,0.7289278489548213
327
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_de,5,0.30957128614157525
328
+ google/gemma-3-4b-it,include_base_44_lithuanian,0,0.5056179775280899
329
+ HuggingFaceTB/SmolLM3-3B,global_mmlu_full_it,5,0.5514883919669563
330
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_lvs_Latn,5,0.3011111111111111
331
+ HuggingFaceTB/SmolLM2-1.7B,belebele_ron_Latn,5,0.3711111111111111
332
+ open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_albanian,0,0.2831215970961887
333
+ Qwen/Qwen3-1.7B,belebele_slv_Latn,5,0.6611111111111111
334
+ google/gemma-3-4b-it,global_mmlu_full_lt,5,0.4684517874946589
335
+ google/gemma-3-4b-it,include_base_44_german,0,0.381294964028777
336
+ Qwen/Qwen3-1.7B,global_mmlu_full_lt,5,0.4099843327161373
337
+ HuggingFaceTB/SmolLM3-3B,include_base_44_serbian,0,0.40545454545454546
338
+ HuggingFaceTB/SmolLM2-1.7B,include_base_44_italian,0,0.29927007299270075