lm1-misc-pile
/
421m32b32b
/421m32b32bpile
/evaluation
/rankeval
/lm1-421m-32b-results_lm-eval_global_step60336_2023-01-24-13-57-00_4shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.338, | |
"acc_stderr": 0.01496596071022449 | |
}, | |
"anli_r2": { | |
"acc": 0.37, | |
"acc_stderr": 0.015275252316519362 | |
}, | |
"anli_r3": { | |
"acc": 0.33916666666666667, | |
"acc_stderr": 0.013672343491681815 | |
}, | |
"cb": { | |
"acc": 0.39285714285714285, | |
"acc_stderr": 0.0658538889806635, | |
"f1": 0.26161616161616164 | |
}, | |
"copa": { | |
"acc": 0.67, | |
"acc_stderr": 0.04725815626252607 | |
}, | |
"hellaswag": { | |
"acc": 0.3036247759410476, | |
"acc_stderr": 0.0045888279587751124, | |
"acc_norm": 0.33061143198566023, | |
"acc_norm_stderr": 0.0046947189182257555 | |
}, | |
"rte": { | |
"acc": 0.4548736462093863, | |
"acc_stderr": 0.029973636495415252 | |
}, | |
"winogrande": { | |
"acc": 0.5248618784530387, | |
"acc_stderr": 0.014035102883627752 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5980758952431855, | |
"acc_stderr": 0.011337815169572413 | |
}, | |
"boolq": { | |
"acc": 0.537920489296636, | |
"acc_stderr": 0.008719868567159636 | |
}, | |
"arc_easy": { | |
"acc": 0.4877946127946128, | |
"acc_stderr": 0.010256726235129026, | |
"acc_norm": 0.4591750841750842, | |
"acc_norm_stderr": 0.010225526906982613 | |
}, | |
"arc_challenge": { | |
"acc": 0.20392491467576793, | |
"acc_stderr": 0.011774262478702247, | |
"acc_norm": 0.2525597269624573, | |
"acc_norm_stderr": 0.012696728980207704 | |
}, | |
"sciq": { | |
"acc": 0.836, | |
"acc_stderr": 0.011715000693181323, | |
"acc_norm": 0.819, | |
"acc_norm_stderr": 0.012181436179177904 | |
}, | |
"piqa": { | |
"acc": 0.6371055495103374, | |
"acc_stderr": 0.011218667570840881, | |
"acc_norm": 0.6409140369967355, | |
"acc_norm_stderr": 0.011192949073844112 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |