task,metric,value,err,version anli_r1,acc,0.348,0.01507060460376841,0 anli_r2,acc,0.344,0.015029633724408947,0 anli_r3,acc,0.3325,0.013605417345710528,0 arc_challenge,acc,0.3148464163822526,0.01357265770308495,0 arc_challenge,acc_norm,0.3250853242320819,0.013688147309729119,0 arc_easy,acc,0.648989898989899,0.009793703885101042,0 arc_easy,acc_norm,0.6393097643097643,0.009853512108416748,0 boolq,acc,0.6363914373088685,0.008413404209789989,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.3185837135128588,,1 copa,acc,0.78,0.04163331998932263,0 hellaswag,acc,0.5206134236207927,0.00498553915978342,0 hellaswag,acc_norm,0.6902011551483768,0.00461465517501001,0 piqa,acc,0.7584330794341676,0.009986718001804467,0 piqa,acc_norm,0.7665941240478781,0.009869247889520991,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.917,0.008728527206074792,0 sciq,acc_norm,0.911,0.009008893392651521,0 storycloze_2016,acc,0.7504008551576697,0.010008002459430848,0 winogrande,acc,0.601420678768745,0.01376035717687383,0