lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-13-57-05_4shots.csv
Muennighoff's picture
Add
b1799ed
task,metric,value,err,version
anli_r1,acc,0.324,0.014806864733738859,0
anli_r2,acc,0.34,0.014987482264363937,0
anli_r3,acc,0.335,0.013630871843821479,0
arc_challenge,acc,0.1689419795221843,0.010949795652485024,0
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406448,0
arc_easy,acc,0.32786195286195285,0.009632587076170018,0
arc_easy,acc_norm,0.3085016835016835,0.009477472342978126,0
boolq,acc,0.6146788990825688,0.008511930879680647,1
cb,acc,0.48214285714285715,0.0673769750864465,1
cb,f1,0.3356643356643356,,1
copa,acc,0.57,0.049756985195624284,0
hellaswag,acc,0.26538538139812784,0.004406358190678484,0
hellaswag,acc_norm,0.2664807807209719,0.004412149415717922,0
piqa,acc,0.5701849836779108,0.011550322268694083,0
piqa,acc_norm,0.5522306855277476,0.011601999796866812,0
rte,acc,0.5126353790613718,0.030086851767188564,0
sciq,acc,0.561,0.015701131345400774,0
sciq,acc_norm,0.554,0.015726771166750357,0
storycloze_2016,acc,0.5227151256012827,0.01155049419200895,0
winogrande,acc,0.5074980268350434,0.014050905521228573,0