|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363935,0
|
|
anli_r2,acc,0.337,0.014955087918653605,0
|
|
anli_r3,acc,0.3325,0.013605417345710526,0
|
|
arc_challenge,acc,0.3310580204778157,0.013752062419817834,0
|
|
arc_challenge,acc_norm,0.3447098976109215,0.013888816286782112,0
|
|
arc_easy,acc,0.6536195286195287,0.009763542075695738,0
|
|
arc_easy,acc_norm,0.6401515151515151,0.009848484848484836,0
|
|
boolq,acc,0.6409785932721712,0.008390241754319908,1
|
|
cb,acc,0.23214285714285715,0.0569293902400011,1
|
|
cb,f1,0.223351041141572,,1
|
|
copa,acc,0.76,0.042923469599092816,0
|
|
hellaswag,acc,0.5209121688906593,0.004985415250690905,0
|
|
hellaswag,acc_norm,0.689205337582155,0.004618730353217064,0
|
|
piqa,acc,0.7606093579978237,0.009955884250291688,0
|
|
piqa,acc_norm,0.7747551686615887,0.009746643471032136,0
|
|
rte,acc,0.5054151624548736,0.030094698123239966,0
|
|
sciq,acc,0.923,0.008434580140240648,0
|
|
sciq,acc_norm,0.912,0.008963053962592074,0
|
|
storycloze_2016,acc,0.7498663816141101,0.010015143382536456,0
|
|
winogrande,acc,0.5935280189423836,0.01380444869775337,0
|
|
|