task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095526,0 anli_r2,acc,0.322,0.014782913600996662,0 anli_r3,acc,0.3383333333333333,0.013664144006618275,0 arc_challenge,acc,0.20051194539249148,0.011700318050499358,0 arc_challenge,acc_norm,0.2363481228668942,0.01241496052430183,0 arc_easy,acc,0.48569023569023567,0.010255580881603624,0 arc_easy,acc_norm,0.4642255892255892,0.010233488709726556,0 boolq,acc,0.5382262996941896,0.00871946009810685,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3176319176319176,,1 copa,acc,0.63,0.04852365870939099,0 hellaswag,acc,0.29954192391953793,0.004571212360565283,0 hellaswag,acc_norm,0.3345947022505477,0.0047088426001774385,0 piqa,acc,0.6398258977149075,0.011200375176667486,0 piqa,acc_norm,0.6436343852013058,0.011174109865864729,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.844,0.01148023500612236,0 sciq,acc_norm,0.826,0.01199449323097343,0 storycloze_2016,acc,0.5889898450026724,0.011377828319387507,0 winogrande,acc,0.5217048145224941,0.01403923921648463,0