task,metric,value,err,version anli_r1,acc,0.35,0.01509065034144423,0 anli_r2,acc,0.346,0.015050266127564448,0 anli_r3,acc,0.355,0.013819249004047296,0 arc_challenge,acc,0.29180887372013653,0.013284525292403496,0 arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0 arc_easy,acc,0.6220538720538721,0.009949405744045469,0 arc_easy,acc_norm,0.5395622895622896,0.010227616386289017,0 boolq,acc,0.6376146788990825,0.008407308655864048,1 cb,acc,0.26785714285714285,0.05971290310957636,1 cb,f1,0.2374338624338624,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.5215096594303924,0.004985162074336112,0 hellaswag,acc_norm,0.6843258315076678,0.004638339207348913,0 piqa,acc,0.7627856365614799,0.009924694933586364,0 piqa,acc_norm,0.7747551686615887,0.00974664347103214,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.87,0.010640169792499361,0 sciq,acc_norm,0.807,0.012486268734370145,0 storycloze_2016,acc,0.7455905932656334,0.010071542492663043,0 winogrande,acc,0.5659037095501184,0.013929882555694054,0