task,metric,value,err,version anli_r1,acc,0.339,0.01497675877162034,0 anli_r2,acc,0.342,0.015008706182121728,0 anli_r3,acc,0.36666666666666664,0.013916893275819934,0 arc_challenge,acc,0.19795221843003413,0.0116439909715734,0 arc_challenge,acc_norm,0.23378839590443687,0.012368225378507137,0 arc_easy,acc,0.4722222222222222,0.010243938285881115,0 arc_easy,acc_norm,0.4276094276094276,0.010151683397430685,0 boolq,acc,0.5948012232415902,0.008586427929715531,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.20750000000000002,,1 copa,acc,0.63,0.04852365870939099,0 hellaswag,acc,0.29944234216291576,0.004570777326263895,0 hellaswag,acc_norm,0.3315076677952599,0.004697929774670284,0 piqa,acc,0.6409140369967355,0.0111929490738441,0 piqa,acc_norm,0.6376496191512514,0.01121504021510457,0 rte,acc,0.5306859205776173,0.030039730592197812,0 sciq,acc,0.803,0.01258369378796813,0 sciq,acc_norm,0.717,0.014251810906481737,0 storycloze_2016,acc,0.5954035275253875,0.011350002165791937,0 winogrande,acc,0.5224940805051302,0.014038257824059886,0