|
task,metric,value,err,version
|
|
anli_r1,acc,0.344,0.015029633724408947,0
|
|
anli_r2,acc,0.345,0.015039986742055238,0
|
|
anli_r3,acc,0.34833333333333333,0.013759437498874072,0
|
|
arc_challenge,acc,0.3199658703071672,0.013631345807016196,0
|
|
arc_challenge,acc_norm,0.3447098976109215,0.01388881628678211,0
|
|
arc_easy,acc,0.6632996632996633,0.009697166595752475,0
|
|
arc_easy,acc_norm,0.6447811447811448,0.009820245899287124,0
|
|
boolq,acc,0.636085626911315,0.00841491890912884,1
|
|
cb,acc,0.21428571428571427,0.055328333517248834,1
|
|
cb,f1,0.1997113997113997,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.5206134236207927,0.004985539159783419,0
|
|
hellaswag,acc_norm,0.6900019916351324,0.004615472210316043,0
|
|
piqa,acc,0.7665941240478781,0.009869247889521007,0
|
|
piqa,acc_norm,0.7682263329706203,0.00984514377279404,0
|
|
rte,acc,0.48736462093862815,0.030086851767188564,0
|
|
sciq,acc,0.927,0.008230354715244062,0
|
|
sciq,acc_norm,0.908,0.009144376393151108,0
|
|
storycloze_2016,acc,0.7509353287012293,0.010000841162740146,0
|
|
winogrande,acc,0.6077348066298343,0.013722400462000888,0
|
|
|