|
task,metric,value,err,version
|
|
anli_r1,acc,0.348,0.01507060460376841,0
|
|
anli_r2,acc,0.344,0.015029633724408947,0
|
|
anli_r3,acc,0.3325,0.013605417345710528,0
|
|
arc_challenge,acc,0.3148464163822526,0.01357265770308495,0
|
|
arc_challenge,acc_norm,0.3250853242320819,0.013688147309729119,0
|
|
arc_easy,acc,0.648989898989899,0.009793703885101042,0
|
|
arc_easy,acc_norm,0.6393097643097643,0.009853512108416748,0
|
|
boolq,acc,0.6363914373088685,0.008413404209789989,1
|
|
cb,acc,0.3392857142857143,0.06384226561930825,1
|
|
cb,f1,0.3185837135128588,,1
|
|
copa,acc,0.78,0.04163331998932263,0
|
|
hellaswag,acc,0.5206134236207927,0.00498553915978342,0
|
|
hellaswag,acc_norm,0.6902011551483768,0.00461465517501001,0
|
|
piqa,acc,0.7584330794341676,0.009986718001804467,0
|
|
piqa,acc_norm,0.7665941240478781,0.009869247889520991,0
|
|
rte,acc,0.48014440433212996,0.0300727231673172,0
|
|
sciq,acc,0.917,0.008728527206074792,0
|
|
sciq,acc_norm,0.911,0.009008893392651521,0
|
|
storycloze_2016,acc,0.7504008551576697,0.010008002459430848,0
|
|
winogrande,acc,0.601420678768745,0.01376035717687383,0
|
|
|