task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811483,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.2832764505119454,0.013167478735134575,0 arc_challenge,acc_norm,0.30204778156996587,0.013417519144716417,0 arc_easy,acc,0.6047979797979798,0.010031894052790973,0 arc_easy,acc_norm,0.5526094276094277,0.01020283238541565,0 boolq,acc,0.617125382262997,0.008501734385335951,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.18803418803418803,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.45140410276837284,0.004966158142645415,0 hellaswag,acc_norm,0.5865365465046803,0.004914480534533721,0 piqa,acc,0.7486398258977149,0.010121156016819259,0 piqa,acc_norm,0.7519042437431991,0.010077118315574706,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.859,0.011010914595992446,0 sciq,acc_norm,0.792,0.012841374572096928,0 storycloze_2016,acc,0.7017637626937466,0.01057924979557881,0 winogrande,acc,0.5737963693764798,0.013898585965412342,0