lm1-misc-pile
/
421m32b32b
/421m32b32bpile
/evaluation
/rankeval
/lm1-421m-32b-results_lm-eval_global_step60336_2023-01-22-18-55-49_0shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.339,0.01497675877162034,0 | |
anli_r2,acc,0.342,0.015008706182121728,0 | |
anli_r3,acc,0.36666666666666664,0.013916893275819934,0 | |
arc_challenge,acc,0.19795221843003413,0.0116439909715734,0 | |
arc_challenge,acc_norm,0.23378839590443687,0.012368225378507137,0 | |
arc_easy,acc,0.4722222222222222,0.010243938285881115,0 | |
arc_easy,acc_norm,0.4276094276094276,0.010151683397430685,0 | |
boolq,acc,0.5948012232415902,0.008586427929715531,1 | |
cb,acc,0.39285714285714285,0.0658538889806635,1 | |
cb,f1,0.20750000000000002,,1 | |
copa,acc,0.63,0.04852365870939099,0 | |
hellaswag,acc,0.29944234216291576,0.004570777326263895,0 | |
hellaswag,acc_norm,0.3315076677952599,0.004697929774670284,0 | |
piqa,acc,0.6409140369967355,0.0111929490738441,0 | |
piqa,acc_norm,0.6376496191512514,0.01121504021510457,0 | |
rte,acc,0.5306859205776173,0.030039730592197812,0 | |
sciq,acc,0.803,0.01258369378796813,0 | |
sciq,acc_norm,0.717,0.014251810906481737,0 | |
storycloze_2016,acc,0.5954035275253875,0.011350002165791937,0 | |
winogrande,acc,0.5224940805051302,0.014038257824059886,0 | |