task,metric,value,err,version anli_r1,acc,0.324,0.014806864733738859,0 anli_r2,acc,0.34,0.014987482264363937,0 anli_r3,acc,0.335,0.013630871843821479,0 arc_challenge,acc,0.1689419795221843,0.010949795652485024,0 arc_challenge,acc_norm,0.2090443686006826,0.011882746987406448,0 arc_easy,acc,0.32786195286195285,0.009632587076170018,0 arc_easy,acc_norm,0.3085016835016835,0.009477472342978126,0 boolq,acc,0.6146788990825688,0.008511930879680647,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3356643356643356,,1 copa,acc,0.57,0.049756985195624284,0 hellaswag,acc,0.26538538139812784,0.004406358190678484,0 hellaswag,acc_norm,0.2664807807209719,0.004412149415717922,0 piqa,acc,0.5701849836779108,0.011550322268694083,0 piqa,acc_norm,0.5522306855277476,0.011601999796866812,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.561,0.015701131345400774,0 sciq,acc_norm,0.554,0.015726771166750357,0 storycloze_2016,acc,0.5227151256012827,0.01155049419200895,0 winogrande,acc,0.5074980268350434,0.014050905521228573,0