lm1-misc-pile / 421m32b32b /421m32b32bpile /evaluation /rankeval /lm1-421m-32b-results_lm-eval_global_step60336_2023-01-24-17-05-33_5shots.csv
Muennighoff's picture
Add
0461a88
task,metric,value,err,version
anli_r1,acc,0.325,0.014818724459095526,0
anli_r2,acc,0.322,0.014782913600996662,0
anli_r3,acc,0.3383333333333333,0.013664144006618275,0
arc_challenge,acc,0.20051194539249148,0.011700318050499358,0
arc_challenge,acc_norm,0.2363481228668942,0.01241496052430183,0
arc_easy,acc,0.48569023569023567,0.010255580881603624,0
arc_easy,acc_norm,0.4642255892255892,0.010233488709726556,0
boolq,acc,0.5382262996941896,0.00871946009810685,1
cb,acc,0.5,0.06741998624632421,1
cb,f1,0.3176319176319176,,1
copa,acc,0.63,0.04852365870939099,0
hellaswag,acc,0.29954192391953793,0.004571212360565283,0
hellaswag,acc_norm,0.3345947022505477,0.0047088426001774385,0
piqa,acc,0.6398258977149075,0.011200375176667486,0
piqa,acc_norm,0.6436343852013058,0.011174109865864729,0
rte,acc,0.5270758122743683,0.030052303463143706,0
sciq,acc,0.844,0.01148023500612236,0
sciq,acc_norm,0.826,0.01199449323097343,0
storycloze_2016,acc,0.5889898450026724,0.011377828319387507,0
winogrande,acc,0.5217048145224941,0.01403923921648463,0