Spaces:
Running
Running
feat: multilingual tab and data
Browse files- main.py +85 -0
- multilingual_results.csv +338 -0
main.py
CHANGED
|
@@ -53,6 +53,55 @@ df_mah_pivot.sort_values(by="Average ⬆️", ascending=False, inplace=True)
|
|
| 53 |
df_mah_pivot.index.rename("Model", inplace=True)
|
| 54 |
df_mah_pivot.reset_index(drop=False, inplace=True)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
cols = [
|
| 57 |
#'Llama-3.1-8B',
|
| 58 |
"Llama-3.1-Tulu-3-8B-SFT",
|
|
@@ -146,6 +195,42 @@ with gr.Blocks() as demo:
|
|
| 146 |
),
|
| 147 |
)
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
if __name__ == "__main__":
|
| 151 |
demo.launch()
|
|
|
|
| 53 |
df_mah_pivot.index.rename("Model", inplace=True)
|
| 54 |
df_mah_pivot.reset_index(drop=False, inplace=True)
|
| 55 |
|
| 56 |
+
df_eval = pd.read_csv("multilingual_results.csv")
|
| 57 |
+
|
| 58 |
+
def map_task_to_group(task: str) -> str | None:
|
| 59 |
+
if task == "xcopa":
|
| 60 |
+
return "XCOPA"
|
| 61 |
+
if task == "xstorycloze":
|
| 62 |
+
return "XStoryCloze"
|
| 63 |
+
if task == "xwinograd":
|
| 64 |
+
return "XWinograd"
|
| 65 |
+
if task.startswith("include_base_44_"):
|
| 66 |
+
return "INCLUDE"
|
| 67 |
+
if task.startswith("belebele_"):
|
| 68 |
+
return "Belebele"
|
| 69 |
+
if task.startswith("global_mmlu_full_"):
|
| 70 |
+
return "Global MMLU"
|
| 71 |
+
return None
|
| 72 |
+
|
| 73 |
+
df_eval["group"] = df_eval.task.apply(map_task_to_group)
|
| 74 |
+
df_eval_grouped = df_eval[df_eval["group"].notna()].copy()
|
| 75 |
+
df_eval_grouped["Model"] = df_eval_grouped.model_name.apply(lambda s: s.split("/")[-1])
|
| 76 |
+
df_multilingual_pivot = df_eval_grouped.pivot_table(
|
| 77 |
+
index="Model", columns="group", values="performance", aggfunc="mean"
|
| 78 |
+
)
|
| 79 |
+
df_multilingual_pivot["Average ⬆️"] = df_multilingual_pivot.mean(axis=1)
|
| 80 |
+
df_multilingual_pivot.sort_values(by="Average ⬆️", ascending=False, inplace=True)
|
| 81 |
+
df_multilingual_pivot.index.rename("Model", inplace=True)
|
| 82 |
+
df_multilingual_pivot.reset_index(drop=False, inplace=True)
|
| 83 |
+
|
| 84 |
+
# Determine display names for groups including n_shot when unique
|
| 85 |
+
group_nshot = (
|
| 86 |
+
df_eval_grouped.groupby("group")["n_shot"]
|
| 87 |
+
.agg(lambda s: s.iloc[0] if s.nunique() == 1 else "mixed")
|
| 88 |
+
.to_dict()
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
def display_name(group: str) -> str:
|
| 92 |
+
label = group_nshot.get(group, "unknown")
|
| 93 |
+
if label == "mixed" or label == "unknown" or label == "unknown":
|
| 94 |
+
return f"{group} [mixed]" if label == "mixed" else f"{group} [unknown]"
|
| 95 |
+
return f"{group} [{label}]"
|
| 96 |
+
|
| 97 |
+
# Build a renamed version for display, preserving Model and Average columns
|
| 98 |
+
display_columns_map = {
|
| 99 |
+
col: display_name(col)
|
| 100 |
+
for col in df_multilingual_pivot.columns
|
| 101 |
+
if col not in ["Model", "Average ⬆️"]
|
| 102 |
+
}
|
| 103 |
+
df_multilingual_display_all = df_multilingual_pivot.rename(columns=display_columns_map)
|
| 104 |
+
|
| 105 |
cols = [
|
| 106 |
#'Llama-3.1-8B',
|
| 107 |
"Llama-3.1-Tulu-3-8B-SFT",
|
|
|
|
| 195 |
),
|
| 196 |
)
|
| 197 |
|
| 198 |
+
with gr.Tab("Multilingual evaluations 🌍"):
|
| 199 |
+
gr.Markdown(
|
| 200 |
+
"""
|
| 201 |
+
Aggregated multilingual performance by task group (mean across languages when applicable).
|
| 202 |
+
"""
|
| 203 |
+
)
|
| 204 |
+
# Order columns: Model, groups..., Average
|
| 205 |
+
raw_group_columns = [
|
| 206 |
+
col
|
| 207 |
+
for col in [
|
| 208 |
+
"INCLUDE",
|
| 209 |
+
"Belebele",
|
| 210 |
+
"Global MMLU",
|
| 211 |
+
"XCOPA",
|
| 212 |
+
"XStoryCloze",
|
| 213 |
+
"XWinograd",
|
| 214 |
+
]
|
| 215 |
+
if col in df_multilingual_pivot.columns
|
| 216 |
+
]
|
| 217 |
+
display_group_columns = [display_columns_map[col] for col in raw_group_columns]
|
| 218 |
+
ordered_columns = ["Model", *display_group_columns, "Average ⬆️"]
|
| 219 |
+
df_multilingual_display = df_multilingual_display_all.loc[:, ordered_columns]
|
| 220 |
+
Leaderboard(
|
| 221 |
+
value=df_multilingual_display.round(2),
|
| 222 |
+
select_columns=SelectColumns(
|
| 223 |
+
default_selection=list(df_multilingual_display.columns),
|
| 224 |
+
cant_deselect=["Model"],
|
| 225 |
+
label="Select Columns to Display:",
|
| 226 |
+
),
|
| 227 |
+
search_columns=SearchColumns(
|
| 228 |
+
primary_column="Model",
|
| 229 |
+
label="Filter a model",
|
| 230 |
+
secondary_columns=[],
|
| 231 |
+
),
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
|
| 235 |
if __name__ == "__main__":
|
| 236 |
demo.launch()
|
multilingual_results.csv
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name,task,n_shot,performance
|
| 2 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_italian,0,0.2937956204379562
|
| 3 |
+
HuggingFaceTB/SmolLM3-3B,belebele_swe_Latn,5,0.69
|
| 4 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_hungarian,0,0.2818181818181818
|
| 5 |
+
Qwen/Qwen3-1.7B,belebele_est_Latn,5,0.56
|
| 6 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_dan_Latn,5,0.3466666666666667
|
| 7 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_georgian,0,0.276
|
| 8 |
+
HuggingFaceTB/SmolLM3-3B,belebele_por_Latn,5,0.7944444444444444
|
| 9 |
+
google/gemma-3-4b-it,global_mmlu_full_pt,5,0.527204101979775
|
| 10 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_estonian,0,0.29464285714285715
|
| 11 |
+
google/gemma-3-4b-it,belebele_slv_Latn,5,0.7533333333333333
|
| 12 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_pl,5,0.31825950719270757
|
| 13 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_it,5,0.35878080045577554
|
| 14 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_bulgarian,0,0.28909090909090907
|
| 15 |
+
HuggingFaceTB/SmolLM3-3B,belebele_hrv_Latn,5,0.5722222222222222
|
| 16 |
+
google/gemma-3-4b-it,include_base_44_serbian,0,0.5672727272727273
|
| 17 |
+
HuggingFaceTB/SmolLM3-3B,belebele_mlt_Latn,5,0.3566666666666667
|
| 18 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_spanish,0,0.3181818181818182
|
| 19 |
+
Qwen/Qwen3-1.7B,belebele_fra_Latn,5,0.7922222222222223
|
| 20 |
+
google/gemma-3-4b-it,include_base_44_estonian,0,0.47767857142857145
|
| 21 |
+
google/gemma-3-4b-it,include_base_44_dutch,0,0.5517241379310345
|
| 22 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_dutch,0,0.47005444646098005
|
| 23 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_es,5,0.3245976356644353
|
| 24 |
+
Qwen/Qwen3-1.7B,include_base_44_lithuanian,0,0.37265917602996257
|
| 25 |
+
Qwen/Qwen3-1.7B,include_base_44_polish,0,0.4142335766423358
|
| 26 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_cs,5,0.4690927218344965
|
| 27 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_deu_Latn,5,0.33444444444444443
|
| 28 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_ita_Latn,5,0.4033333333333333
|
| 29 |
+
HuggingFaceTB/SmolLM3-3B,belebele_ell_Grek,5,0.7633333333333333
|
| 30 |
+
HuggingFaceTB/SmolLM3-3B,belebele_dan_Latn,5,0.6522222222222223
|
| 31 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_lithuanian,0,0.27153558052434457
|
| 32 |
+
Qwen/Qwen3-1.7B,belebele_eng_Latn,5,0.8255555555555556
|
| 33 |
+
Qwen/Qwen3-1.7B,belebele_hun_Latn,5,0.6733333333333333
|
| 34 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_ro,5,0.451502634952286
|
| 35 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_serbian,0,0.28363636363636363
|
| 36 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_belarusian,0,0.26
|
| 37 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_german,0,0.35251798561151076
|
| 38 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_nld_Latn,5,0.41333333333333333
|
| 39 |
+
google/gemma-3-4b-it,global_mmlu_full_sr,5,0.4661016949152542
|
| 40 |
+
google/gemma-3-4b-it,global_mmlu_full_ro,5,0.5182310212220481
|
| 41 |
+
google/gemma-3-4b-it,include_base_44_north macedonian,0,0.6678765880217786
|
| 42 |
+
google/gemma-3-4b-it,include_base_44_hungarian,0,0.41454545454545455
|
| 43 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_hrv_Latn,5,0.27
|
| 44 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_serbian,0,0.22545454545454546
|
| 45 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_hrv_Latn,5,0.33666666666666667
|
| 46 |
+
Qwen/Qwen3-1.7B,belebele_ita_Latn,5,0.7511111111111111
|
| 47 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_basque,0,0.278
|
| 48 |
+
google/gemma-3-4b-it,include_base_44_portuguese,0,0.49364791288566245
|
| 49 |
+
Qwen/Qwen3-1.7B,include_base_44_hungarian,0,0.37636363636363634
|
| 50 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_pt,5,0.3721692066657171
|
| 51 |
+
google/gemma-3-4b-it,global_mmlu_full_tr,5,0.4814129041447087
|
| 52 |
+
google/gemma-3-4b-it,xcopa,0,0.6247272727272727
|
| 53 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_greek,0,0.41304347826086957
|
| 54 |
+
Qwen/Qwen3-1.7B,include_base_44_basque,0,0.316
|
| 55 |
+
HuggingFaceTB/SmolLM3-3B,belebele_ces_Latn,5,0.6222222222222222
|
| 56 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_sv,5,0.4794188861985472
|
| 57 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_pol_Latn,5,0.28555555555555556
|
| 58 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_pol_Latn,5,0.3288888888888889
|
| 59 |
+
Qwen/Qwen3-1.7B,belebele_deu_Latn,5,0.74
|
| 60 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_ru,5,0.3166215638797892
|
| 61 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_eng_Latn,5,0.5633333333333334
|
| 62 |
+
Qwen/Qwen3-1.7B,belebele_lit_Latn,5,0.6266666666666667
|
| 63 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_deu_Latn,5,0.3933333333333333
|
| 64 |
+
google/gemma-3-4b-it,belebele_ces_Latn,5,0.7733333333333333
|
| 65 |
+
google/gemma-3-4b-it,include_base_44_croatian,0,0.6236363636363637
|
| 66 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_est_Latn,5,0.2922222222222222
|
| 67 |
+
HuggingFaceTB/SmolLM3-3B,belebele_slk_Latn,5,0.5855555555555556
|
| 68 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_north macedonian,0,0.5081669691470054
|
| 69 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_armenian,0,0.2581818181818182
|
| 70 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_fin_Latn,5,0.32
|
| 71 |
+
HuggingFaceTB/SmolLM3-3B,belebele_deu_Latn,5,0.8155555555555556
|
| 72 |
+
Qwen/Qwen3-1.7B,belebele_ell_Grek,5,0.6688888888888889
|
| 73 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_slv_Latn,5,0.3233333333333333
|
| 74 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_ro,5,0.28051559606893606
|
| 75 |
+
google/gemma-3-4b-it,global_mmlu_full_de,5,0.5190856003418316
|
| 76 |
+
Qwen/Qwen3-1.7B,include_base_44_north macedonian,0,0.5353901996370236
|
| 77 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_fr,5,0.32068081469876086
|
| 78 |
+
Qwen/Qwen3-1.7B,xcopa,0,0.5750909090909091
|
| 79 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_bul_Cyrl,5,0.3211111111111111
|
| 80 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_lt,5,0.29155390969947304
|
| 81 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_lvs_Latn,5,0.33
|
| 82 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_el,5,0.2901296111665005
|
| 83 |
+
google/gemma-3-4b-it,include_base_44_albanian,0,0.5753176043557169
|
| 84 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_dutch,0,0.33030852994555354
|
| 85 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_lit_Latn,5,0.32
|
| 86 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_bul_Cyrl,5,0.3244444444444444
|
| 87 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_belarusian,0,0.24909090909090909
|
| 88 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_uk,5,0.4272183449651047
|
| 89 |
+
google/gemma-3-4b-it,include_base_44_bulgarian,0,0.6127272727272727
|
| 90 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_azerbaijani,0,0.26094890510948904
|
| 91 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_croatian,0,0.25272727272727274
|
| 92 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_spa_Latn,5,0.45666666666666667
|
| 93 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_russian,0,0.28442028985507245
|
| 94 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_albanian,0,0.3666061705989111
|
| 95 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_tr,5,0.44281441390115367
|
| 96 |
+
HuggingFaceTB/SmolLM3-3B,belebele_nld_Latn,5,0.6777777777777778
|
| 97 |
+
Qwen/Qwen3-1.7B,include_base_44_azerbaijani,0,0.39233576642335766
|
| 98 |
+
google/gemma-3-4b-it,belebele_fin_Latn,5,0.7744444444444445
|
| 99 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_sr,5,0.43355647343683235
|
| 100 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_en,5,0.6010539809143997
|
| 101 |
+
google/gemma-3-4b-it,belebele_hrv_Latn,5,0.7711111111111111
|
| 102 |
+
google/gemma-3-4b-it,belebele_bul_Cyrl,5,0.7744444444444445
|
| 103 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_basque,0,0.318
|
| 104 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_greek,0,0.25181159420289856
|
| 105 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_el,5,0.480985614584817
|
| 106 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_polish,0,0.25364963503649635
|
| 107 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_polish,0,0.34124087591240876
|
| 108 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_ru,5,0.5029198119925936
|
| 109 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_tr,5,0.3821392963965247
|
| 110 |
+
Qwen/Qwen3-1.7B,include_base_44_ukrainian,0,0.49272727272727274
|
| 111 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_lithuanian,0,0.3389513108614232
|
| 112 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_slv_Latn,5,0.27666666666666667
|
| 113 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_ukrainian,0,0.36
|
| 114 |
+
Qwen/Qwen3-1.7B,include_base_44_georgian,0,0.314
|
| 115 |
+
Qwen/Qwen3-1.7B,include_base_44_estonian,0,0.3705357142857143
|
| 116 |
+
Qwen/Qwen3-1.7B,belebele_nld_Latn,5,0.7166666666666667
|
| 117 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_ces_Latn,5,0.33
|
| 118 |
+
google/gemma-3-4b-it,belebele_mlt_Latn,5,0.65
|
| 119 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_uk,5,0.2947585813986612
|
| 120 |
+
HuggingFaceTB/SmolLM3-3B,belebele_bul_Cyrl,5,0.6366666666666667
|
| 121 |
+
Qwen/Qwen3-1.7B,belebele_bul_Cyrl,5,0.6966666666666667
|
| 122 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_he,5,0.24896738356359494
|
| 123 |
+
HuggingFaceTB/SmolLM3-3B,xstorycloze,0,0.6174718729318333
|
| 124 |
+
HuggingFaceTB/SmolLM3-3B,xcopa,0,0.5889090909090909
|
| 125 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_turkish,0,0.23905109489051096
|
| 126 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_dan_Latn,5,0.30666666666666664
|
| 127 |
+
Qwen/Qwen3-1.7B,include_base_44_spanish,0,0.5472727272727272
|
| 128 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_mlt_Latn,5,0.3
|
| 129 |
+
google/gemma-3-4b-it,include_base_44_azerbaijani,0,0.40693430656934304
|
| 130 |
+
google/gemma-3-4b-it,belebele_dan_Latn,5,0.7888888888888889
|
| 131 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_mlt_Latn,5,0.32222222222222224
|
| 132 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xwinograd,0,0.6983591818386155
|
| 133 |
+
google/gemma-3-4b-it,include_base_44_russian,0,0.4782608695652174
|
| 134 |
+
google/gemma-3-4b-it,global_mmlu_full_cs,5,0.5044865403788634
|
| 135 |
+
google/gemma-3-4b-it,global_mmlu_full_it,5,0.5321179319185301
|
| 136 |
+
Qwen/Qwen3-1.7B,belebele_por_Latn,5,0.7644444444444445
|
| 137 |
+
Qwen/Qwen3-1.7B,include_base_44_greek,0,0.3713768115942029
|
| 138 |
+
google/gemma-3-4b-it,include_base_44_french,0,0.548926014319809
|
| 139 |
+
google/gemma-3-4b-it,belebele_lit_Latn,5,0.7366666666666667
|
| 140 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_finnish,0,0.24682395644283123
|
| 141 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_fr,5,0.5495655889474433
|
| 142 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ron_Latn,5,0.31222222222222223
|
| 143 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_pt,5,0.5347528842045293
|
| 144 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_it,5,0.30366044722973934
|
| 145 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_azerbaijani,0,0.2791970802919708
|
| 146 |
+
google/gemma-3-4b-it,belebele_spa_Latn,5,0.7722222222222223
|
| 147 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_russian,0,0.26992753623188404
|
| 148 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_nl,5,0.4950149551345962
|
| 149 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_hungarian,0,0.24363636363636362
|
| 150 |
+
Qwen/Qwen3-1.7B,belebele_swe_Latn,5,0.73
|
| 151 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_georgian,0,0.252
|
| 152 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_turkish,0,0.2208029197080292
|
| 153 |
+
google/gemma-3-4b-it,global_mmlu_full_sv,5,0.5211508332146418
|
| 154 |
+
HuggingFaceTB/SmolLM3-3B,belebele_ita_Latn,5,0.7711111111111111
|
| 155 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_north macedonian,0,0.2631578947368421
|
| 156 |
+
Qwen/Qwen3-1.7B,belebele_mlt_Latn,5,0.43444444444444447
|
| 157 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_nld_Latn,5,0.3288888888888889
|
| 158 |
+
Qwen/Qwen3-1.7B,belebele_pol_Latn,5,0.7244444444444444
|
| 159 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_por_Latn,5,0.4633333333333333
|
| 160 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_pl,5,0.25986326734083465
|
| 161 |
+
Qwen/Qwen3-1.7B,belebele_fin_Latn,5,0.6155555555555555
|
| 162 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_nl,5,0.4615439396097422
|
| 163 |
+
HuggingFaceTB/SmolLM3-3B,belebele_slv_Latn,5,0.49666666666666665
|
| 164 |
+
google/gemma-3-4b-it,include_base_44_belarusian,0,0.2872727272727273
|
| 165 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_slk_Latn,5,0.32222222222222224
|
| 166 |
+
Qwen/Qwen3-1.7B,include_base_44_armenian,0,0.3054545454545455
|
| 167 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_swe_Latn,5,0.33444444444444443
|
| 168 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_es,5,0.551345962113659
|
| 169 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_russian,0,0.4492753623188406
|
| 170 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_nl,5,0.3525851018373451
|
| 171 |
+
google/gemma-3-4b-it,include_base_44_turkish,0,0.5237226277372263
|
| 172 |
+
google/gemma-3-4b-it,belebele_eng_Latn,5,0.8555555555555555
|
| 173 |
+
google/gemma-3-4b-it,belebele_swe_Latn,5,0.7955555555555556
|
| 174 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_cs,5,0.31932773109243695
|
| 175 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_turkish,0,0.3795620437956204
|
| 176 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_greek,0,0.2554347826086957
|
| 177 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_albanian,0,0.2613430127041742
|
| 178 |
+
Qwen/Qwen3-1.7B,include_base_44_portuguese,0,0.4791288566243194
|
| 179 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_lt,5,0.3356359492949722
|
| 180 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_slk_Latn,5,0.3233333333333333
|
| 181 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_portuguese,0,0.5045372050816697
|
| 182 |
+
Qwen/Qwen3-1.7B,include_base_44_french,0,0.48448687350835323
|
| 183 |
+
Qwen/Qwen3-1.7B,belebele_slk_Latn,5,0.6788888888888889
|
| 184 |
+
google/gemma-3-4b-it,include_base_44_spanish,0,0.5690909090909091
|
| 185 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_nl,5,0.2975359635379576
|
| 186 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_it,5,0.5266343825665859
|
| 187 |
+
google/gemma-3-4b-it,include_base_44_basque,0,0.356
|
| 188 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_de,5,0.5394530693633386
|
| 189 |
+
Qwen/Qwen3-1.7B,belebele_ces_Latn,5,0.71
|
| 190 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_lit_Latn,5,0.30333333333333334
|
| 191 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_french,0,0.3317422434367542
|
| 192 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_georgian,0,0.258
|
| 193 |
+
google/gemma-3-4b-it,include_base_44_ukrainian,0,0.5836363636363636
|
| 194 |
+
HuggingFaceTB/SmolLM3-3B,belebele_hun_Latn,5,0.48
|
| 195 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_german,0,0.3381294964028777
|
| 196 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_sv,5,0.2956843754450933
|
| 197 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_fr,5,0.3785785500640934
|
| 198 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_belarusian,0,0.2672727272727273
|
| 199 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xstorycloze,0,0.515071295349257
|
| 200 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_ukrainian,0,0.46545454545454545
|
| 201 |
+
HuggingFaceTB/SmolLM3-3B,belebele_spa_Latn,5,0.7955555555555556
|
| 202 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_german,0,0.43884892086330934
|
| 203 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_north macedonian,0,0.2395644283121597
|
| 204 |
+
google/gemma-3-4b-it,belebele_slk_Latn,5,0.7811111111111111
|
| 205 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_finnish,0,0.279491833030853
|
| 206 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_pl,5,0.4150405925081897
|
| 207 |
+
google/gemma-3-4b-it,include_base_44_polish,0,0.4635036496350365
|
| 208 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,xcopa,0,0.518
|
| 209 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_bulgarian,0,0.27090909090909093
|
| 210 |
+
Qwen/Qwen3-1.7B,include_base_44_russian,0,0.4746376811594203
|
| 211 |
+
Qwen/Qwen3-1.7B,belebele_ron_Latn,5,0.72
|
| 212 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_lt,5,0.2622133599202393
|
| 213 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_es,5,0.38199686654322745
|
| 214 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_pt,5,0.543156245549067
|
| 215 |
+
google/gemma-3-4b-it,belebele_nld_Latn,5,0.7766666666666666
|
| 216 |
+
google/gemma-3-4b-it,belebele_ita_Latn,5,0.7866666666666666
|
| 217 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_cs,5,0.2676969092721834
|
| 218 |
+
Qwen/Qwen3-1.7B,include_base_44_bulgarian,0,0.48
|
| 219 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_spa_Latn,5,0.37444444444444447
|
| 220 |
+
google/gemma-3-4b-it,global_mmlu_full_ru,5,0.5084745762711864
|
| 221 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_he,5,0.28165503489531407
|
| 222 |
+
google/gemma-3-4b-it,global_mmlu_full_en,5,0.5827517447657029
|
| 223 |
+
google/gemma-3-4b-it,global_mmlu_full_fr,5,0.5274889616863695
|
| 224 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_hun_Latn,5,0.3011111111111111
|
| 225 |
+
google/gemma-3-4b-it,belebele_ell_Grek,5,0.7922222222222223
|
| 226 |
+
Qwen/Qwen3-1.7B,include_base_44_german,0,0.45323741007194246
|
| 227 |
+
google/gemma-3-4b-it,global_mmlu_full_el,5,0.48511608033043724
|
| 228 |
+
google/gemma-3-4b-it,global_mmlu_full_pl,5,0.5032046716991881
|
| 229 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_cs,5,0.4178179746474861
|
| 230 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ces_Latn,5,0.30333333333333334
|
| 231 |
+
Qwen/Qwen3-1.7B,xstorycloze,0,0.5671740569159497
|
| 232 |
+
Qwen/Qwen3-1.7B,include_base_44_finnish,0,0.3720508166969147
|
| 233 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_croatian,0,0.27454545454545454
|
| 234 |
+
google/gemma-3-4b-it,belebele_deu_Latn,5,0.8066666666666666
|
| 235 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_uk,5,0.45748468879077053
|
| 236 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_polish,0,0.26094890510948904
|
| 237 |
+
Qwen/Qwen3-1.7B,xwinograd,0,0.7145425938413127
|
| 238 |
+
google/gemma-3-4b-it,xwinograd,0,0.7772533153517645
|
| 239 |
+
Qwen/Qwen3-1.7B,include_base_44_dutch,0,0.484573502722323
|
| 240 |
+
Qwen/Qwen3-1.7B,include_base_44_serbian,0,0.44363636363636366
|
| 241 |
+
Qwen/Qwen3-1.7B,include_base_44_belarusian,0,0.3
|
| 242 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_he,5,0.4016521862982481
|
| 243 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_estonian,0,0.24553571428571427
|
| 244 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_italian,0,0.6441605839416058
|
| 245 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_es,5,0.5344680244979347
|
| 246 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_sr,5,0.3767981768978778
|
| 247 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_sr,5,0.25523429710867396
|
| 248 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_en,5,0.498860561173622
|
| 249 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_fra_Latn,5,0.3488888888888889
|
| 250 |
+
HuggingFaceTB/SmolLM3-3B,belebele_fin_Latn,5,0.46555555555555556
|
| 251 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_eng_Latn,5,0.5866666666666667
|
| 252 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_finnish,0,0.3393829401088929
|
| 253 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_he,5,0.3240991311778949
|
| 254 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_fin_Latn,5,0.3
|
| 255 |
+
HuggingFaceTB/SmolLM3-3B,belebele_ron_Latn,5,0.6511111111111111
|
| 256 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_portuguese,0,0.30671506352087113
|
| 257 |
+
google/gemma-3-4b-it,xstorycloze,0,0.668732326574815
|
| 258 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_hun_Latn,5,0.23666666666666666
|
| 259 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_por_Latn,5,0.36
|
| 260 |
+
HuggingFaceTB/SmolLM3-3B,belebele_fra_Latn,5,0.8055555555555556
|
| 261 |
+
google/gemma-3-4b-it,include_base_44_georgian,0,0.526
|
| 262 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_spanish,0,0.3327272727272727
|
| 263 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_tr,5,0.31035465033471016
|
| 264 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_dutch,0,0.2250453720508167
|
| 265 |
+
google/gemma-3-4b-it,belebele_hun_Latn,5,0.7322222222222222
|
| 266 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_french,0,0.3054892601431981
|
| 267 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ell_Grek,5,0.28444444444444444
|
| 268 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_sv,5,0.436476285429426
|
| 269 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_ell_Grek,5,0.31444444444444447
|
| 270 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_hungarian,0,0.27090909090909093
|
| 271 |
+
Qwen/Qwen3-1.7B,belebele_spa_Latn,5,0.7844444444444445
|
| 272 |
+
Qwen/Qwen3-1.7B,include_base_44_albanian,0,0.455535390199637
|
| 273 |
+
HuggingFaceTB/SmolLM3-3B,belebele_eng_Latn,5,0.8488888888888889
|
| 274 |
+
google/gemma-3-4b-it,belebele_est_Latn,5,0.6988888888888889
|
| 275 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_azerbaijani,0,0.31204379562043794
|
| 276 |
+
google/gemma-3-4b-it,belebele_fra_Latn,5,0.82
|
| 277 |
+
google/gemma-3-4b-it,include_base_44_armenian,0,0.3472727272727273
|
| 278 |
+
google/gemma-3-4b-it,belebele_por_Latn,5,0.7866666666666666
|
| 279 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_lithuanian,0,0.26779026217228463
|
| 280 |
+
Qwen/Qwen3-1.7B,belebele_hrv_Latn,5,0.6966666666666667
|
| 281 |
+
Qwen/Qwen3-1.7B,include_base_44_italian,0,0.5894160583941606
|
| 282 |
+
google/gemma-3-4b-it,belebele_ron_Latn,5,0.78
|
| 283 |
+
HuggingFaceTB/SmolLM3-3B,xwinograd,0,0.7988311980220274
|
| 284 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_ru,5,0.508759435977781
|
| 285 |
+
HuggingFaceTB/SmolLM3-3B,belebele_pol_Latn,5,0.6066666666666667
|
| 286 |
+
HuggingFaceTB/SmolLM3-3B,belebele_lit_Latn,5,0.4388888888888889
|
| 287 |
+
Qwen/Qwen3-1.7B,belebele_lvs_Latn,5,0.6677777777777778
|
| 288 |
+
HuggingFaceTB/SmolLM2-1.7B,xstorycloze,0,0.5408218518741351
|
| 289 |
+
Qwen/Qwen3-1.7B,include_base_44_croatian,0,0.5127272727272727
|
| 290 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_armenian,0,0.2581818181818182
|
| 291 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_ru,5,0.258082894174619
|
| 292 |
+
Qwen/Qwen3-1.7B,include_base_44_turkish,0,0.42700729927007297
|
| 293 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_croatian,0,0.44
|
| 294 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_tr,5,0.25872382851445663
|
| 295 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_pl,5,0.47393533684660305
|
| 296 |
+
HuggingFaceTB/SmolLM3-3B,belebele_est_Latn,5,0.37
|
| 297 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_est_Latn,5,0.27444444444444444
|
| 298 |
+
google/gemma-3-4b-it,belebele_pol_Latn,5,0.7666666666666667
|
| 299 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_sv,5,0.3335707164221621
|
| 300 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_el,5,0.4171058253809999
|
| 301 |
+
HuggingFaceTB/SmolLM2-1.7B,global_mmlu_full_sr,5,0.2920524141860134
|
| 302 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_de,5,0.5170203674690215
|
| 303 |
+
google/gemma-3-4b-it,global_mmlu_full_he,5,0.4596211365902293
|
| 304 |
+
google/gemma-3-4b-it,include_base_44_greek,0,0.4891304347826087
|
| 305 |
+
google/gemma-3-4b-it,include_base_44_finnish,0,0.4355716878402904
|
| 306 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_estonian,0,0.19196428571428573
|
| 307 |
+
google/gemma-3-4b-it,include_base_44_italian,0,0.6131386861313869
|
| 308 |
+
Qwen/Qwen3-1.7B,belebele_dan_Latn,5,0.7044444444444444
|
| 309 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_el,5,0.2511038313630537
|
| 310 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_en,5,0.48454636091724823
|
| 311 |
+
google/gemma-3-4b-it,global_mmlu_full_uk,5,0.4910269192422732
|
| 312 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_ro,5,0.4859706594502208
|
| 313 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_pt,5,0.3166927788064378
|
| 314 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_french,0,0.5823389021479713
|
| 315 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_portuguese,0,0.279491833030853
|
| 316 |
+
google/gemma-3-4b-it,global_mmlu_full_nl,5,0.5225751317476143
|
| 317 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_spanish,0,0.5836363636363636
|
| 318 |
+
HuggingFaceTB/SmolLM3-3B,belebele_lvs_Latn,5,0.41555555555555557
|
| 319 |
+
HuggingFaceTB/SmolLM2-1.7B,xcopa,0,0.5305454545454545
|
| 320 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_en,5,0.6007691212078051
|
| 321 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_fr,5,0.5274177467597209
|
| 322 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_ukrainian,0,0.30727272727272725
|
| 323 |
+
google/gemma-3-4b-it,belebele_lvs_Latn,5,0.7455555555555555
|
| 324 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_bulgarian,0,0.45636363636363636
|
| 325 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_ita_Latn,5,0.30333333333333334
|
| 326 |
+
HuggingFaceTB/SmolLM2-1.7B,xwinograd,0,0.7289278489548213
|
| 327 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,global_mmlu_full_de,5,0.30957128614157525
|
| 328 |
+
google/gemma-3-4b-it,include_base_44_lithuanian,0,0.5056179775280899
|
| 329 |
+
HuggingFaceTB/SmolLM3-3B,global_mmlu_full_it,5,0.5514883919669563
|
| 330 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,belebele_lvs_Latn,5,0.3011111111111111
|
| 331 |
+
HuggingFaceTB/SmolLM2-1.7B,belebele_ron_Latn,5,0.3711111111111111
|
| 332 |
+
open-sci/open-sci-ref-v0.01-1.7b-nemotron-cc-hq-1T-4096,include_base_44_albanian,0,0.2831215970961887
|
| 333 |
+
Qwen/Qwen3-1.7B,belebele_slv_Latn,5,0.6611111111111111
|
| 334 |
+
google/gemma-3-4b-it,global_mmlu_full_lt,5,0.4684517874946589
|
| 335 |
+
google/gemma-3-4b-it,include_base_44_german,0,0.381294964028777
|
| 336 |
+
Qwen/Qwen3-1.7B,global_mmlu_full_lt,5,0.4099843327161373
|
| 337 |
+
HuggingFaceTB/SmolLM3-3B,include_base_44_serbian,0,0.40545454545454546
|
| 338 |
+
HuggingFaceTB/SmolLM2-1.7B,include_base_44_italian,0,0.29927007299270075
|