task,metric,value,err,version anli_r1,acc,0.336,0.014944140233795018,0 anli_r2,acc,0.329,0.01486539538592835,0 anli_r3,acc,0.35083333333333333,0.013782212417178195,0 arc_challenge,acc,0.310580204778157,0.013522292098053059,0 arc_challenge,acc_norm,0.33532423208191126,0.013796182947785562,0 arc_easy,acc,0.6481481481481481,0.009799078929868706,0 arc_easy,acc_norm,0.6212121212121212,0.00995373765654204,0 boolq,acc,0.6382262996941896,0.008404238796949254,1 cb,acc,0.19642857142857142,0.05357142857142858,1 cb,f1,0.1668300653594771,,1 copa,acc,0.78,0.04163331998932263,0 hellaswag,acc,0.5200159330810595,0.004985781620467012,0 hellaswag,acc_norm,0.6863174666401115,0.004630407476835209,0 piqa,acc,0.7573449401523396,0.0100020025697087,0 piqa,acc_norm,0.764961915125136,0.009893146688805319,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.913,0.00891686663074591,0 sciq,acc_norm,0.889,0.009938701010583726,0 storycloze_2016,acc,0.743452699091395,0.01009926092771917,0 winogrande,acc,0.5864246250986582,0.013840971763195306,0