Spaces:
Running
Running
Fix missing PLU Results
Browse files- results/zero-shot/aya-23-8b.json +30 -24
- results/zero-shot/aya-expanse-8b.json +30 -24
- results/zero-shot/llama-3-8b-instruct.json +30 -24
- results/zero-shot/llama-3-8b.json +30 -24
- results/zero-shot/llama-3.1-8b-instruct.json +30 -24
- results/zero-shot/llama-3.1-8b.json +30 -24
- results/zero-shot/llama-3.2-1b.json +30 -24
- results/zero-shot/llama-3.2-3b-instruct.json +30 -24
- results/zero-shot/llama-3.2-3b.json +30 -24
results/zero-shot/aya-23-8b.json
CHANGED
|
@@ -83,30 +83,6 @@
|
|
| 83 |
"exact_match": 0.2062780269058296,
|
| 84 |
"f1": 0.4653972244152745
|
| 85 |
},
|
| 86 |
-
{
|
| 87 |
-
"name": "turkish_plu_goal_inference",
|
| 88 |
-
"task": "multiple_choice",
|
| 89 |
-
"acc": 0.3918757467144564,
|
| 90 |
-
"acc_norm": 0.3859020310633214
|
| 91 |
-
},
|
| 92 |
-
{
|
| 93 |
-
"name": "turkish_plu_next_event_prediction",
|
| 94 |
-
"task": "multiple_choice",
|
| 95 |
-
"acc": 0.4687022900763359,
|
| 96 |
-
"acc_norm": 0.5374045801526718
|
| 97 |
-
},
|
| 98 |
-
{
|
| 99 |
-
"name": "turkish_plu_step_inference",
|
| 100 |
-
"task": "multiple_choice",
|
| 101 |
-
"acc": 0.33986928104575165,
|
| 102 |
-
"acc_norm": 0.45098039215686275
|
| 103 |
-
},
|
| 104 |
-
{
|
| 105 |
-
"name": "turkish_plu_step_ordering",
|
| 106 |
-
"task": "multiple_choice",
|
| 107 |
-
"acc": 0.6180215475024485,
|
| 108 |
-
"acc_norm": 0.6180215475024485
|
| 109 |
-
},
|
| 110 |
{
|
| 111 |
"name": "xcopa_tr",
|
| 112 |
"task": "multiple_choice",
|
|
@@ -187,6 +163,36 @@
|
|
| 187 |
"task": "multiple_choice",
|
| 188 |
"acc": 0.5857142857142857,
|
| 189 |
"acc_norm": 0.5857142857142857
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
}
|
| 191 |
]
|
| 192 |
}
|
|
|
|
| 83 |
"exact_match": 0.2062780269058296,
|
| 84 |
"f1": 0.4653972244152745
|
| 85 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
{
|
| 87 |
"name": "xcopa_tr",
|
| 88 |
"task": "multiple_choice",
|
|
|
|
| 163 |
"task": "multiple_choice",
|
| 164 |
"acc": 0.5857142857142857,
|
| 165 |
"acc_norm": 0.5857142857142857
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"name": "turkish_plu",
|
| 169 |
+
"task": "multiple_choice",
|
| 170 |
+
"acc": 0.4928,
|
| 171 |
+
"acc_norm": 0.40416
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"name": "turkish_plu_goal_inference",
|
| 175 |
+
"task": "multiple_choice",
|
| 176 |
+
"acc": 0.42054958183990443,
|
| 177 |
+
"acc_norm": 0.2724014336917563
|
| 178 |
+
},
|
| 179 |
+
{
|
| 180 |
+
"name": "turkish_plu_next_event_prediction",
|
| 181 |
+
"task": "multiple_choice",
|
| 182 |
+
"acc": 0.48854961832061067,
|
| 183 |
+
"acc_norm": 0.2732824427480916
|
| 184 |
+
},
|
| 185 |
+
{
|
| 186 |
+
"name": "turkish_plu_step_inference",
|
| 187 |
+
"task": "multiple_choice",
|
| 188 |
+
"acc": 0.37254901960784315,
|
| 189 |
+
"acc_norm": 0.35294117647058826
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"name": "turkish_plu_step_ordering",
|
| 193 |
+
"task": "multiple_choice",
|
| 194 |
+
"acc": 0.6268364348677767,
|
| 195 |
+
"acc_norm": 0.6268364348677767
|
| 196 |
}
|
| 197 |
]
|
| 198 |
}
|
results/zero-shot/aya-expanse-8b.json
CHANGED
|
@@ -93,30 +93,6 @@
|
|
| 93 |
"exact_match": 0.13452914798206278,
|
| 94 |
"f1": 0.435087842533856
|
| 95 |
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_goal_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.4062126642771804,
|
| 100 |
-
"acc_norm": 0.3930704898446834
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_next_event_prediction",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.4900763358778626,
|
| 106 |
-
"acc_norm": 0.5465648854961832
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"name": "turkish_plu_step_inference",
|
| 110 |
-
"task": "multiple_choice",
|
| 111 |
-
"acc": 0.3464052287581699,
|
| 112 |
-
"acc_norm": 0.4395424836601307
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"name": "turkish_plu_step_ordering",
|
| 116 |
-
"task": "multiple_choice",
|
| 117 |
-
"acc": 0.5935357492654261,
|
| 118 |
-
"acc_norm": 0.5935357492654261
|
| 119 |
-
},
|
| 120 |
{
|
| 121 |
"name": "wiki_lingua_tr",
|
| 122 |
"task": "summarization",
|
|
@@ -185,6 +161,36 @@
|
|
| 185 |
"task": "multiple_choice",
|
| 186 |
"acc": 0.5428571428571428,
|
| 187 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
]
|
| 190 |
}
|
|
|
|
| 93 |
"exact_match": 0.13452914798206278,
|
| 94 |
"f1": 0.435087842533856
|
| 95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
{
|
| 97 |
"name": "wiki_lingua_tr",
|
| 98 |
"task": "summarization",
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.5428571428571428,
|
| 163 |
"acc_norm": 0.5428571428571428
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkish_plu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.50208,
|
| 169 |
+
"acc_norm": 0.40704
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkish_plu_goal_inference",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.43010752688172044,
|
| 175 |
+
"acc_norm": 0.27956989247311825
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkish_plu_next_event_prediction",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.5114503816793893,
|
| 181 |
+
"acc_norm": 0.27938931297709924
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "turkish_plu_step_inference",
|
| 185 |
+
"task": "multiple_choice",
|
| 186 |
+
"acc": 0.4035947712418301,
|
| 187 |
+
"acc_norm": 0.37254901960784315
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"name": "turkish_plu_step_ordering",
|
| 191 |
+
"task": "multiple_choice",
|
| 192 |
+
"acc": 0.614103819784525,
|
| 193 |
+
"acc_norm": 0.614103819784525
|
| 194 |
}
|
| 195 |
]
|
| 196 |
}
|
results/zero-shot/llama-3-8b-instruct.json
CHANGED
|
@@ -82,30 +82,6 @@
|
|
| 82 |
"exact_match": 0.1289237668161435,
|
| 83 |
"f1": 0.4134057883004977
|
| 84 |
},
|
| 85 |
-
{
|
| 86 |
-
"name": "turkish_plu_goal_inference",
|
| 87 |
-
"task": "multiple_choice",
|
| 88 |
-
"acc": 0.38829151732377537,
|
| 89 |
-
"acc_norm": 0.43130227001194743
|
| 90 |
-
},
|
| 91 |
-
{
|
| 92 |
-
"name": "turkish_plu_next_event_prediction",
|
| 93 |
-
"task": "multiple_choice",
|
| 94 |
-
"acc": 0.4549618320610687,
|
| 95 |
-
"acc_norm": 0.517557251908397
|
| 96 |
-
},
|
| 97 |
-
{
|
| 98 |
-
"name": "turkish_plu_step_inference",
|
| 99 |
-
"task": "multiple_choice",
|
| 100 |
-
"acc": 0.3137254901960784,
|
| 101 |
-
"acc_norm": 0.44281045751633985
|
| 102 |
-
},
|
| 103 |
-
{
|
| 104 |
-
"name": "turkish_plu_step_ordering",
|
| 105 |
-
"task": "multiple_choice",
|
| 106 |
-
"acc": 0.6160626836434868,
|
| 107 |
-
"acc_norm": 0.6160626836434868
|
| 108 |
-
},
|
| 109 |
{
|
| 110 |
"name": "xcopa_tr",
|
| 111 |
"task": "multiple_choice",
|
|
@@ -186,6 +162,36 @@
|
|
| 186 |
"task": "multiple_choice",
|
| 187 |
"acc": 0.6142857142857143,
|
| 188 |
"acc_norm": 0.6142857142857143
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
}
|
| 190 |
]
|
| 191 |
}
|
|
|
|
| 82 |
"exact_match": 0.1289237668161435,
|
| 83 |
"f1": 0.4134057883004977
|
| 84 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
{
|
| 86 |
"name": "xcopa_tr",
|
| 87 |
"task": "multiple_choice",
|
|
|
|
| 162 |
"task": "multiple_choice",
|
| 163 |
"acc": 0.6142857142857143,
|
| 164 |
"acc_norm": 0.6142857142857143
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"name": "turkish_plu",
|
| 168 |
+
"task": "multiple_choice",
|
| 169 |
+
"acc": 0.47136,
|
| 170 |
+
"acc_norm": 0.4032
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"name": "turkish_plu_goal_inference",
|
| 174 |
+
"task": "multiple_choice",
|
| 175 |
+
"acc": 0.3763440860215054,
|
| 176 |
+
"acc_norm": 0.26642771804062126
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"name": "turkish_plu_next_event_prediction",
|
| 180 |
+
"task": "multiple_choice",
|
| 181 |
+
"acc": 0.46564885496183206,
|
| 182 |
+
"acc_norm": 0.2595419847328244
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"name": "turkish_plu_step_inference",
|
| 186 |
+
"task": "multiple_choice",
|
| 187 |
+
"acc": 0.3349673202614379,
|
| 188 |
+
"acc_norm": 0.35784313725490197
|
| 189 |
+
},
|
| 190 |
+
{
|
| 191 |
+
"name": "turkish_plu_step_ordering",
|
| 192 |
+
"task": "multiple_choice",
|
| 193 |
+
"acc": 0.6346718903036239,
|
| 194 |
+
"acc_norm": 0.6346718903036239
|
| 195 |
}
|
| 196 |
]
|
| 197 |
}
|
results/zero-shot/llama-3-8b.json
CHANGED
|
@@ -81,30 +81,6 @@
|
|
| 81 |
"exact_match": 0.28475336322869954,
|
| 82 |
"f1": 0.5013148868557868
|
| 83 |
},
|
| 84 |
-
{
|
| 85 |
-
"name": "turkish_plu_goal_inference",
|
| 86 |
-
"task": "multiple_choice",
|
| 87 |
-
"acc": 0.38948626045400236,
|
| 88 |
-
"acc_norm": 0.4169653524492234
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"name": "turkish_plu_next_event_prediction",
|
| 92 |
-
"task": "multiple_choice",
|
| 93 |
-
"acc": 0.4488549618320611,
|
| 94 |
-
"acc_norm": 0.5328244274809161
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_step_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.32189542483660133,
|
| 100 |
-
"acc_norm": 0.47058823529411764
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_step_ordering",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.6278158667972575,
|
| 106 |
-
"acc_norm": 0.6278158667972575
|
| 107 |
-
},
|
| 108 |
{
|
| 109 |
"name": "xcopa_tr",
|
| 110 |
"task": "multiple_choice",
|
|
@@ -185,6 +161,36 @@
|
|
| 185 |
"task": "multiple_choice",
|
| 186 |
"acc": 0.5428571428571428,
|
| 187 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
]
|
| 190 |
}
|
|
|
|
| 81 |
"exact_match": 0.28475336322869954,
|
| 82 |
"f1": 0.5013148868557868
|
| 83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
{
|
| 85 |
"name": "xcopa_tr",
|
| 86 |
"task": "multiple_choice",
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.5428571428571428,
|
| 163 |
"acc_norm": 0.5428571428571428
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkish_plu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.46496,
|
| 169 |
+
"acc_norm": 0.39616
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkish_plu_goal_inference",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.36917562724014336,
|
| 175 |
+
"acc_norm": 0.25925925925925924
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkish_plu_next_event_prediction",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.46106870229007635,
|
| 181 |
+
"acc_norm": 0.26259541984732826
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "turkish_plu_step_inference",
|
| 185 |
+
"task": "multiple_choice",
|
| 186 |
+
"acc": 0.3284313725490196,
|
| 187 |
+
"acc_norm": 0.33986928104575165
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"name": "turkish_plu_step_ordering",
|
| 191 |
+
"task": "multiple_choice",
|
| 192 |
+
"acc": 0.6278158667972575,
|
| 193 |
+
"acc_norm": 0.6278158667972575
|
| 194 |
}
|
| 195 |
]
|
| 196 |
}
|
results/zero-shot/llama-3.1-8b-instruct.json
CHANGED
|
@@ -81,30 +81,6 @@
|
|
| 81 |
"exact_match": 0.23318385650224216,
|
| 82 |
"f1": 0.5062272078338648
|
| 83 |
},
|
| 84 |
-
{
|
| 85 |
-
"name": "turkish_plu_goal_inference",
|
| 86 |
-
"task": "multiple_choice",
|
| 87 |
-
"acc": 0.40860215053763443,
|
| 88 |
-
"acc_norm": 0.45997610513739545
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"name": "turkish_plu_next_event_prediction",
|
| 92 |
-
"task": "multiple_choice",
|
| 93 |
-
"acc": 0.4442748091603053,
|
| 94 |
-
"acc_norm": 0.5419847328244275
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_step_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.33169934640522875,
|
| 100 |
-
"acc_norm": 0.4624183006535948
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_step_ordering",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.633692458374143,
|
| 106 |
-
"acc_norm": 0.633692458374143
|
| 107 |
-
},
|
| 108 |
{
|
| 109 |
"name": "xcopa_tr",
|
| 110 |
"task": "multiple_choice",
|
|
@@ -185,6 +161,36 @@
|
|
| 185 |
"task": "multiple_choice",
|
| 186 |
"acc": 0.6428571428571429,
|
| 187 |
"acc_norm": 0.6428571428571429
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
]
|
| 190 |
}
|
|
|
|
| 81 |
"exact_match": 0.23318385650224216,
|
| 82 |
"f1": 0.5062272078338648
|
| 83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
{
|
| 85 |
"name": "xcopa_tr",
|
| 86 |
"task": "multiple_choice",
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.6428571428571429,
|
| 163 |
"acc_norm": 0.6428571428571429
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkish_plu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.4848,
|
| 169 |
+
"acc_norm": 0.40896
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkish_plu_goal_inference",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.40860215053763443,
|
| 175 |
+
"acc_norm": 0.27718040621266427
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkish_plu_next_event_prediction",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.44580152671755724,
|
| 181 |
+
"acc_norm": 0.2549618320610687
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "turkish_plu_step_inference",
|
| 185 |
+
"task": "multiple_choice",
|
| 186 |
+
"acc": 0.3431372549019608,
|
| 187 |
+
"acc_norm": 0.33986928104575165
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"name": "turkish_plu_step_ordering",
|
| 191 |
+
"task": "multiple_choice",
|
| 192 |
+
"acc": 0.6571988246816847,
|
| 193 |
+
"acc_norm": 0.6571988246816847
|
| 194 |
}
|
| 195 |
]
|
| 196 |
}
|
results/zero-shot/llama-3.1-8b.json
CHANGED
|
@@ -81,30 +81,6 @@
|
|
| 81 |
"exact_match": 0.2757847533632287,
|
| 82 |
"f1": 0.5178366277473359
|
| 83 |
},
|
| 84 |
-
{
|
| 85 |
-
"name": "turkish_plu_goal_inference",
|
| 86 |
-
"task": "multiple_choice",
|
| 87 |
-
"acc": 0.4145758661887694,
|
| 88 |
-
"acc_norm": 0.4324970131421744
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"name": "turkish_plu_next_event_prediction",
|
| 92 |
-
"task": "multiple_choice",
|
| 93 |
-
"acc": 0.4488549618320611,
|
| 94 |
-
"acc_norm": 0.5358778625954198
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_step_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.3382352941176471,
|
| 100 |
-
"acc_norm": 0.4738562091503268
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_step_ordering",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.6425073457394711,
|
| 106 |
-
"acc_norm": 0.6425073457394711
|
| 107 |
-
},
|
| 108 |
{
|
| 109 |
"name": "xcopa_tr",
|
| 110 |
"task": "multiple_choice",
|
|
@@ -185,6 +161,36 @@
|
|
| 185 |
"task": "multiple_choice",
|
| 186 |
"acc": 0.5857142857142857,
|
| 187 |
"acc_norm": 0.5857142857142857
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
]
|
| 190 |
}
|
|
|
|
| 81 |
"exact_match": 0.2757847533632287,
|
| 82 |
"f1": 0.5178366277473359
|
| 83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
{
|
| 85 |
"name": "xcopa_tr",
|
| 86 |
"task": "multiple_choice",
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.5857142857142857,
|
| 163 |
"acc_norm": 0.5857142857142857
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkish_plu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.47552,
|
| 169 |
+
"acc_norm": 0.39872
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkish_plu_goal_inference",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.38829151732377537,
|
| 175 |
+
"acc_norm": 0.2628434886499403
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkish_plu_next_event_prediction",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.4595419847328244,
|
| 181 |
+
"acc_norm": 0.26106870229007634
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "turkish_plu_step_inference",
|
| 185 |
+
"task": "multiple_choice",
|
| 186 |
+
"acc": 0.35130718954248363,
|
| 187 |
+
"acc_norm": 0.3431372549019608
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"name": "turkish_plu_step_ordering",
|
| 191 |
+
"task": "multiple_choice",
|
| 192 |
+
"acc": 0.6317335945151812,
|
| 193 |
+
"acc_norm": 0.6317335945151812
|
| 194 |
}
|
| 195 |
]
|
| 196 |
}
|
results/zero-shot/llama-3.2-1b.json
CHANGED
|
@@ -93,30 +93,6 @@
|
|
| 93 |
"exact_match": 0.06278026905829596,
|
| 94 |
"f1": 0.21486130318406463
|
| 95 |
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_goal_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.35842293906810035,
|
| 100 |
-
"acc_norm": 0.4026284348864994
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_next_event_prediction",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.3709923664122137,
|
| 106 |
-
"acc_norm": 0.467175572519084
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"name": "turkish_plu_step_inference",
|
| 110 |
-
"task": "multiple_choice",
|
| 111 |
-
"acc": 0.27941176470588236,
|
| 112 |
-
"acc_norm": 0.41830065359477125
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"name": "turkish_plu_step_ordering",
|
| 116 |
-
"task": "multiple_choice",
|
| 117 |
-
"acc": 0.5759059745347699,
|
| 118 |
-
"acc_norm": 0.5759059745347699
|
| 119 |
-
},
|
| 120 |
{
|
| 121 |
"name": "wiki_lingua_tr",
|
| 122 |
"task": "summarization",
|
|
@@ -217,6 +193,36 @@
|
|
| 217 |
"task": "multiple_choice",
|
| 218 |
"acc": 0.5285714285714286,
|
| 219 |
"acc_norm": 0.5285714285714286
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
}
|
| 221 |
]
|
| 222 |
}
|
|
|
|
| 93 |
"exact_match": 0.06278026905829596,
|
| 94 |
"f1": 0.21486130318406463
|
| 95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
{
|
| 97 |
"name": "wiki_lingua_tr",
|
| 98 |
"task": "summarization",
|
|
|
|
| 193 |
"task": "multiple_choice",
|
| 194 |
"acc": 0.5285714285714286,
|
| 195 |
"acc_norm": 0.5285714285714286
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"name": "turkish_plu",
|
| 199 |
+
"task": "multiple_choice",
|
| 200 |
+
"acc": 0.4208,
|
| 201 |
+
"acc_norm": 0.368
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"name": "turkish_plu_goal_inference",
|
| 205 |
+
"task": "multiple_choice",
|
| 206 |
+
"acc": 0.36200716845878134,
|
| 207 |
+
"acc_norm": 0.23894862604540024
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"name": "turkish_plu_next_event_prediction",
|
| 211 |
+
"task": "multiple_choice",
|
| 212 |
+
"acc": 0.37251908396946565,
|
| 213 |
+
"acc_norm": 0.2366412213740458
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"name": "turkish_plu_step_inference",
|
| 217 |
+
"task": "multiple_choice",
|
| 218 |
+
"acc": 0.29248366013071897,
|
| 219 |
+
"acc_norm": 0.3366013071895425
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"name": "turkish_plu_step_ordering",
|
| 223 |
+
"task": "multiple_choice",
|
| 224 |
+
"acc": 0.5768854064642507,
|
| 225 |
+
"acc_norm": 0.5768854064642507
|
| 226 |
}
|
| 227 |
]
|
| 228 |
}
|
results/zero-shot/llama-3.2-3b-instruct.json
CHANGED
|
@@ -93,30 +93,6 @@
|
|
| 93 |
"exact_match": 0.18721973094170405,
|
| 94 |
"f1": 0.5109898180473623
|
| 95 |
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_goal_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.3321385902031063,
|
| 100 |
-
"acc_norm": 0.3548387096774194
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_next_event_prediction",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.3648854961832061,
|
| 106 |
-
"acc_norm": 0.4488549618320611
|
| 107 |
-
},
|
| 108 |
-
{
|
| 109 |
-
"name": "turkish_plu_step_inference",
|
| 110 |
-
"task": "multiple_choice",
|
| 111 |
-
"acc": 0.24183006535947713,
|
| 112 |
-
"acc_norm": 0.3758169934640523
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"name": "turkish_plu_step_ordering",
|
| 116 |
-
"task": "multiple_choice",
|
| 117 |
-
"acc": 0.5710088148873653,
|
| 118 |
-
"acc_norm": 0.5710088148873653
|
| 119 |
-
},
|
| 120 |
{
|
| 121 |
"name": "wiki_lingua_tr",
|
| 122 |
"task": "summarization",
|
|
@@ -217,6 +193,36 @@
|
|
| 217 |
"task": "multiple_choice",
|
| 218 |
"acc": 0.5428571428571428,
|
| 219 |
"acc_norm": 0.5428571428571428
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
}
|
| 221 |
]
|
| 222 |
}
|
|
|
|
| 93 |
"exact_match": 0.18721973094170405,
|
| 94 |
"f1": 0.5109898180473623
|
| 95 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
{
|
| 97 |
"name": "wiki_lingua_tr",
|
| 98 |
"task": "summarization",
|
|
|
|
| 193 |
"task": "multiple_choice",
|
| 194 |
"acc": 0.5428571428571428,
|
| 195 |
"acc_norm": 0.5428571428571428
|
| 196 |
+
},
|
| 197 |
+
{
|
| 198 |
+
"name": "turkish_plu",
|
| 199 |
+
"task": "multiple_choice",
|
| 200 |
+
"acc": 0.44,
|
| 201 |
+
"acc_norm": 0.3952
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"name": "turkish_plu_goal_inference",
|
| 205 |
+
"task": "multiple_choice",
|
| 206 |
+
"acc": 0.3548387096774194,
|
| 207 |
+
"acc_norm": 0.26045400238948624
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"name": "turkish_plu_next_event_prediction",
|
| 211 |
+
"task": "multiple_choice",
|
| 212 |
+
"acc": 0.3938931297709924,
|
| 213 |
+
"acc_norm": 0.24427480916030533
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"name": "turkish_plu_step_inference",
|
| 217 |
+
"task": "multiple_choice",
|
| 218 |
+
"acc": 0.2777777777777778,
|
| 219 |
+
"acc_norm": 0.3382352941176471
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"name": "turkish_plu_step_ordering",
|
| 223 |
+
"task": "multiple_choice",
|
| 224 |
+
"acc": 0.6366307541625857,
|
| 225 |
+
"acc_norm": 0.6366307541625857
|
| 226 |
}
|
| 227 |
]
|
| 228 |
}
|
results/zero-shot/llama-3.2-3b.json
CHANGED
|
@@ -81,30 +81,6 @@
|
|
| 81 |
"exact_match": 0.21188340807174888,
|
| 82 |
"f1": 0.4583574684635767
|
| 83 |
},
|
| 84 |
-
{
|
| 85 |
-
"name": "turkish_plu_goal_inference",
|
| 86 |
-
"task": "multiple_choice",
|
| 87 |
-
"acc": 0.3906810035842294,
|
| 88 |
-
"acc_norm": 0.3906810035842294
|
| 89 |
-
},
|
| 90 |
-
{
|
| 91 |
-
"name": "turkish_plu_next_event_prediction",
|
| 92 |
-
"task": "multiple_choice",
|
| 93 |
-
"acc": 0.4122137404580153,
|
| 94 |
-
"acc_norm": 0.5389312977099237
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"name": "turkish_plu_step_inference",
|
| 98 |
-
"task": "multiple_choice",
|
| 99 |
-
"acc": 0.30718954248366015,
|
| 100 |
-
"acc_norm": 0.4493464052287582
|
| 101 |
-
},
|
| 102 |
-
{
|
| 103 |
-
"name": "turkish_plu_step_ordering",
|
| 104 |
-
"task": "multiple_choice",
|
| 105 |
-
"acc": 0.5974534769833496,
|
| 106 |
-
"acc_norm": 0.5974534769833496
|
| 107 |
-
},
|
| 108 |
{
|
| 109 |
"name": "xcopa_tr",
|
| 110 |
"task": "multiple_choice",
|
|
@@ -185,6 +161,36 @@
|
|
| 185 |
"task": "multiple_choice",
|
| 186 |
"acc": 0.5714285714285714,
|
| 187 |
"acc_norm": 0.5714285714285714
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
}
|
| 189 |
]
|
| 190 |
}
|
|
|
|
| 81 |
"exact_match": 0.21188340807174888,
|
| 82 |
"f1": 0.4583574684635767
|
| 83 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
{
|
| 85 |
"name": "xcopa_tr",
|
| 86 |
"task": "multiple_choice",
|
|
|
|
| 161 |
"task": "multiple_choice",
|
| 162 |
"acc": 0.5714285714285714,
|
| 163 |
"acc_norm": 0.5714285714285714
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"name": "turkish_plu",
|
| 167 |
+
"task": "multiple_choice",
|
| 168 |
+
"acc": 0.45408,
|
| 169 |
+
"acc_norm": 0.38752
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"name": "turkish_plu_goal_inference",
|
| 173 |
+
"task": "multiple_choice",
|
| 174 |
+
"acc": 0.4002389486260454,
|
| 175 |
+
"acc_norm": 0.25925925925925924
|
| 176 |
+
},
|
| 177 |
+
{
|
| 178 |
+
"name": "turkish_plu_next_event_prediction",
|
| 179 |
+
"task": "multiple_choice",
|
| 180 |
+
"acc": 0.43206106870229005,
|
| 181 |
+
"acc_norm": 0.26106870229007634
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"name": "turkish_plu_step_inference",
|
| 185 |
+
"task": "multiple_choice",
|
| 186 |
+
"acc": 0.315359477124183,
|
| 187 |
+
"acc_norm": 0.35130718954248363
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"name": "turkish_plu_step_ordering",
|
| 191 |
+
"task": "multiple_choice",
|
| 192 |
+
"acc": 0.5954946131243879,
|
| 193 |
+
"acc_norm": 0.5954946131243879
|
| 194 |
}
|
| 195 |
]
|
| 196 |
}
|