task,metric,value,err,version anli_r1,acc,0.32,0.014758652303574876,0 anli_r2,acc,0.34,0.014987482264363935,0 anli_r3,acc,0.3275,0.013553211167251951,0 arc_challenge,acc,0.3037542662116041,0.013438909184778757,0 arc_challenge,acc_norm,0.3293515358361775,0.013734057652635474,0 arc_easy,acc,0.6220538720538721,0.009949405744045457,0 arc_easy,acc_norm,0.6317340067340067,0.009897286209010888,0 boolq,acc,0.5978593272171254,0.008575926383211252,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.28456510809451985,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.4561840270862378,0.004970585328297621,0 hellaswag,acc_norm,0.6068512248556065,0.0048745114668368,0 piqa,acc,0.7475516866158868,0.010135665547362364,0 piqa,acc_norm,0.7524483133841132,0.010069703966857116,0 rte,acc,0.4548736462093863,0.029973636495415252,0 sciq,acc,0.921,0.008534156773333437,0 sciq,acc_norm,0.923,0.00843458014024065,0 storycloze_2016,acc,0.7145911277391769,0.010443395884062106,0 winogrande,acc,0.5951065509076559,0.013795927003124927,0