task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363935,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.3325,0.013605417345710526,0 arc_challenge,acc,0.3310580204778157,0.013752062419817834,0 arc_challenge,acc_norm,0.3447098976109215,0.013888816286782112,0 arc_easy,acc,0.6536195286195287,0.009763542075695738,0 arc_easy,acc_norm,0.6401515151515151,0.009848484848484836,0 boolq,acc,0.6409785932721712,0.008390241754319908,1 cb,acc,0.23214285714285715,0.0569293902400011,1 cb,f1,0.223351041141572,,1 copa,acc,0.76,0.042923469599092816,0 hellaswag,acc,0.5209121688906593,0.004985415250690905,0 hellaswag,acc_norm,0.689205337582155,0.004618730353217064,0 piqa,acc,0.7606093579978237,0.009955884250291688,0 piqa,acc_norm,0.7747551686615887,0.009746643471032136,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.923,0.008434580140240648,0 sciq,acc_norm,0.912,0.008963053962592074,0 storycloze_2016,acc,0.7498663816141101,0.010015143382536456,0 winogrande,acc,0.5935280189423836,0.01380444869775337,0