task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934661,0 anli_r2,acc,0.355,0.01513949154378053,0 anli_r3,acc,0.35083333333333333,0.013782212417178202,0 arc_challenge,acc,0.30716723549488056,0.013481034054980945,0 arc_challenge,acc_norm,0.3199658703071672,0.013631345807016195,0 arc_easy,acc,0.6300505050505051,0.009906656266021158,0 arc_easy,acc_norm,0.6317340067340067,0.009897286209010888,0 boolq,acc,0.5871559633027523,0.00861117243047287,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.41546499477533966,,1 copa,acc,0.72,0.04512608598542128,0 hellaswag,acc,0.4582752439753037,0.004972377085916326,0 hellaswag,acc_norm,0.6056562437761402,0.004877104939356237,0 piqa,acc,0.7448313384113167,0.01017157159252182,0 piqa,acc_norm,0.7546245919477693,0.010039831320422386,0 rte,acc,0.51985559566787,0.030072723167317184,0 sciq,acc,0.924,0.008384169266796401,0 sciq,acc_norm,0.93,0.008072494358323499,0 storycloze_2016,acc,0.709246392303581,0.010501233625213076,0 winogrande,acc,0.5895816890292028,0.013825107120035865,0