task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928362,0 anli_r2,acc,0.314,0.014683991951087962,0 anli_r3,acc,0.345,0.01372842153945487,0 arc_challenge,acc,0.31399317406143346,0.013562691224726281,0 arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0 arc_easy,acc,0.6325757575757576,0.009892552616211555,0 arc_easy,acc_norm,0.6376262626262627,0.009863468202583773,0 boolq,acc,0.5767584097859327,0.008641391399113598,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.33259154725720075,,1 copa,acc,0.71,0.045604802157206845,0 hellaswag,acc,0.45717984465245964,0.004971449552787172,0 hellaswag,acc_norm,0.6106353316072496,0.00486609688094144,0 piqa,acc,0.7442872687704026,0.010178690109459862,0 piqa,acc_norm,0.7562568008705114,0.010017199471500609,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.926,0.008282064512704159,0 sciq,acc_norm,0.941,0.007454835650406725,0 storycloze_2016,acc,0.7226082308925709,0.010353267472010768,0 winogrande,acc,0.5974743488555643,0.013782866831703044,0