task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.353,0.015120172605483708,0 anli_r3,acc,0.3275,0.013553211167251942,0 arc_challenge,acc,0.19197952218430034,0.011509598906598095,0 arc_challenge,acc_norm,0.23378839590443687,0.012368225378507135,0 arc_easy,acc,0.4831649831649832,0.010253966261288888,0 arc_easy,acc_norm,0.4478114478114478,0.010203742451111506,0 boolq,acc,0.5532110091743119,0.008695392261996197,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3421052631578947,,1 copa,acc,0.67,0.04725815626252607,0 hellaswag,acc,0.2976498705437164,0.004562902604938726,0 hellaswag,acc_norm,0.3331009759012149,0.0047035905585525025,0 piqa,acc,0.6392818280739935,0.011204064809088515,0 piqa,acc_norm,0.6458106637649619,0.011158755672626109,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.824,0.012048616898597516,0 sciq,acc_norm,0.78,0.01310617304066176,0 storycloze_2016,acc,0.5911277391769107,0.011368775493925615,0 winogrande,acc,0.5280189423835833,0.014030404213405786,0