lm1-misc-pile / 421m32b32b /421m32b32bpile /evaluation /rankeval /lm1-421m-32b-results_lm-eval_global_step60336_2023-01-24-13-57-00_4shots.csv
Muennighoff's picture
Add
0461a88
task,metric,value,err,version
anli_r1,acc,0.338,0.01496596071022449,0
anli_r2,acc,0.37,0.015275252316519362,0
anli_r3,acc,0.33916666666666667,0.013672343491681815,0
arc_challenge,acc,0.20392491467576793,0.011774262478702247,0
arc_challenge,acc_norm,0.2525597269624573,0.012696728980207704,0
arc_easy,acc,0.4877946127946128,0.010256726235129026,0
arc_easy,acc_norm,0.4591750841750842,0.010225526906982613,0
boolq,acc,0.537920489296636,0.008719868567159636,1
cb,acc,0.39285714285714285,0.0658538889806635,1
cb,f1,0.26161616161616164,,1
copa,acc,0.67,0.04725815626252607,0
hellaswag,acc,0.3036247759410476,0.0045888279587751124,0
hellaswag,acc_norm,0.33061143198566023,0.0046947189182257555,0
piqa,acc,0.6371055495103374,0.011218667570840881,0
piqa,acc_norm,0.6409140369967355,0.011192949073844112,0
rte,acc,0.4548736462093863,0.029973636495415252,0
sciq,acc,0.836,0.011715000693181323,0
sciq,acc_norm,0.819,0.012181436179177904,0
storycloze_2016,acc,0.5980758952431855,0.011337815169572413,0
winogrande,acc,0.5248618784530387,0.014035102883627752,0