task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934644,0 anli_r2,acc,0.361,0.015195720118175118,0 anli_r3,acc,0.3333333333333333,0.013613950010225603,0 arc_challenge,acc,0.20051194539249148,0.011700318050499361,0 arc_challenge,acc_norm,0.24146757679180889,0.012506564839739432,0 arc_easy,acc,0.4861111111111111,0.010255824507190342,0 arc_easy,acc_norm,0.4659090909090909,0.010235908103438688,0 boolq,acc,0.544954128440367,0.008709637955263421,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.27449576321756775,,1 copa,acc,0.64,0.04824181513244218,0 hellaswag,acc,0.3007369049990042,0.0045764127139515,0 hellaswag,acc_norm,0.3345947022505477,0.004708842600177446,0 piqa,acc,0.6425462459194777,0.011181692590867659,0 piqa,acc_norm,0.6512513601741022,0.011119263056159599,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.817,0.012233587399477825,0 sciq,acc_norm,0.81,0.012411851354816329,0 storycloze_2016,acc,0.5905932656333511,0.01137105952719707,0 winogrande,acc,0.5153906866614049,0.014045826789783665,0