lm1-misc-pile / 1b12b12b /1b112b12bpile /evaluation /lm1-1b1-12b-results_lm-eval_global_step23189_2023-01-24-13-53-32_2shots.json
Muennighoff's picture
Add'
0fc6405
{
"results": {
"anli_r1": {
"acc": 0.317,
"acc_stderr": 0.014721675438880229
},
"anli_r2": {
"acc": 0.334,
"acc_stderr": 0.01492201952373296
},
"anli_r3": {
"acc": 0.3325,
"acc_stderr": 0.013605417345710526
},
"cb": {
"acc": 0.35714285714285715,
"acc_stderr": 0.0646095738380922,
"f1": 0.2563323201621074
},
"copa": {
"acc": 0.65,
"acc_stderr": 0.0479372485441102
},
"hellaswag": {
"acc": 0.29685321649073887,
"acc_stderr": 0.004559375835805974,
"acc_norm": 0.32593108942441745,
"acc_norm_stderr": 0.0046776374633913965
},
"rte": {
"acc": 0.5234657039711191,
"acc_stderr": 0.03006330041190266
},
"winogrande": {
"acc": 0.5430149960536701,
"acc_stderr": 0.01400038676159829
},
"storycloze_2016": {
"acc": 0.584179583110636,
"acc_stderr": 0.01139738683314014
},
"boolq": {
"acc": 0.4792048929663609,
"acc_stderr": 0.008737488341370727
},
"arc_easy": {
"acc": 0.47095959595959597,
"acc_stderr": 0.01024246382639562,
"acc_norm": 0.4494949494949495,
"acc_norm_stderr": 0.010207308833916035
},
"arc_challenge": {
"acc": 0.20477815699658702,
"acc_stderr": 0.011792544338513398,
"acc_norm": 0.2431740614334471,
"acc_norm_stderr": 0.012536554144587089
},
"sciq": {
"acc": 0.841,
"acc_stderr": 0.011569479368271289,
"acc_norm": 0.819,
"acc_norm_stderr": 0.01218143617917791
},
"piqa": {
"acc": 0.6392818280739935,
"acc_stderr": 0.011204064809088518,
"acc_norm": 0.6360174102285092,
"acc_norm_stderr": 0.011225875703487171
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}