task,metric,value,err,version anli_r1,acc,0.338,0.01496596071022449,0 anli_r2,acc,0.37,0.015275252316519362,0 anli_r3,acc,0.33916666666666667,0.013672343491681815,0 arc_challenge,acc,0.20392491467576793,0.011774262478702247,0 arc_challenge,acc_norm,0.2525597269624573,0.012696728980207704,0 arc_easy,acc,0.4877946127946128,0.010256726235129026,0 arc_easy,acc_norm,0.4591750841750842,0.010225526906982613,0 boolq,acc,0.537920489296636,0.008719868567159636,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.26161616161616164,,1 copa,acc,0.67,0.04725815626252607,0 hellaswag,acc,0.3036247759410476,0.0045888279587751124,0 hellaswag,acc_norm,0.33061143198566023,0.0046947189182257555,0 piqa,acc,0.6371055495103374,0.011218667570840881,0 piqa,acc_norm,0.6409140369967355,0.011192949073844112,0 rte,acc,0.4548736462093863,0.029973636495415252,0 sciq,acc,0.836,0.011715000693181323,0 sciq,acc_norm,0.819,0.012181436179177904,0 storycloze_2016,acc,0.5980758952431855,0.011337815169572413,0 winogrande,acc,0.5248618784530387,0.014035102883627752,0