|
task,metric,value,err,version
|
|
anli_r1,acc,0.35,0.01509065034144423,0
|
|
anli_r2,acc,0.346,0.015050266127564448,0
|
|
anli_r3,acc,0.355,0.013819249004047296,0
|
|
arc_challenge,acc,0.29180887372013653,0.013284525292403496,0
|
|
arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0
|
|
arc_easy,acc,0.6220538720538721,0.009949405744045469,0
|
|
arc_easy,acc_norm,0.5395622895622896,0.010227616386289017,0
|
|
boolq,acc,0.6376146788990825,0.008407308655864048,1
|
|
cb,acc,0.26785714285714285,0.05971290310957636,1
|
|
cb,f1,0.2374338624338624,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.5215096594303924,0.004985162074336112,0
|
|
hellaswag,acc_norm,0.6843258315076678,0.004638339207348913,0
|
|
piqa,acc,0.7627856365614799,0.009924694933586364,0
|
|
piqa,acc_norm,0.7747551686615887,0.00974664347103214,0
|
|
rte,acc,0.5270758122743683,0.030052303463143706,0
|
|
sciq,acc,0.87,0.010640169792499361,0
|
|
sciq,acc_norm,0.807,0.012486268734370145,0
|
|
storycloze_2016,acc,0.7455905932656334,0.010071542492663043,0
|
|
winogrande,acc,0.5659037095501184,0.013929882555694054,0
|
|
|