lm1-misc-pile
/
421m32b32b
/421m32b32bpile
/evaluation
/rankeval
/lm1-421m-32b-results_lm-eval_global_step60336_2023-01-24-17-05-33_5shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.325,0.014818724459095526,0 | |
anli_r2,acc,0.322,0.014782913600996662,0 | |
anli_r3,acc,0.3383333333333333,0.013664144006618275,0 | |
arc_challenge,acc,0.20051194539249148,0.011700318050499358,0 | |
arc_challenge,acc_norm,0.2363481228668942,0.01241496052430183,0 | |
arc_easy,acc,0.48569023569023567,0.010255580881603624,0 | |
arc_easy,acc_norm,0.4642255892255892,0.010233488709726556,0 | |
boolq,acc,0.5382262996941896,0.00871946009810685,1 | |
cb,acc,0.5,0.06741998624632421,1 | |
cb,f1,0.3176319176319176,,1 | |
copa,acc,0.63,0.04852365870939099,0 | |
hellaswag,acc,0.29954192391953793,0.004571212360565283,0 | |
hellaswag,acc_norm,0.3345947022505477,0.0047088426001774385,0 | |
piqa,acc,0.6398258977149075,0.011200375176667486,0 | |
piqa,acc_norm,0.6436343852013058,0.011174109865864729,0 | |
rte,acc,0.5270758122743683,0.030052303463143706,0 | |
sciq,acc,0.844,0.01148023500612236,0 | |
sciq,acc_norm,0.826,0.01199449323097343,0 | |
storycloze_2016,acc,0.5889898450026724,0.011377828319387507,0 | |
winogrande,acc,0.5217048145224941,0.01403923921648463,0 | |