lm1-misc-pile
/
421m32b32b
/421m32b32bpile
/evaluation
/rankeval
/lm1-421m-32b-results_lm-eval_global_step60336_2023-01-23-18-55-40_1shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.335,0.014933117490932575,0 | |
anli_r2,acc,0.353,0.015120172605483708,0 | |
anli_r3,acc,0.3275,0.013553211167251942,0 | |
arc_challenge,acc,0.19197952218430034,0.011509598906598095,0 | |
arc_challenge,acc_norm,0.23378839590443687,0.012368225378507135,0 | |
arc_easy,acc,0.4831649831649832,0.010253966261288888,0 | |
arc_easy,acc_norm,0.4478114478114478,0.010203742451111506,0 | |
boolq,acc,0.5532110091743119,0.008695392261996197,1 | |
cb,acc,0.48214285714285715,0.06737697508644648,1 | |
cb,f1,0.3421052631578947,,1 | |
copa,acc,0.67,0.04725815626252607,0 | |
hellaswag,acc,0.2976498705437164,0.004562902604938726,0 | |
hellaswag,acc_norm,0.3331009759012149,0.0047035905585525025,0 | |
piqa,acc,0.6392818280739935,0.011204064809088515,0 | |
piqa,acc_norm,0.6458106637649619,0.011158755672626109,0 | |
rte,acc,0.5306859205776173,0.03003973059219781,0 | |
sciq,acc,0.824,0.012048616898597516,0 | |
sciq,acc_norm,0.78,0.01310617304066176,0 | |
storycloze_2016,acc,0.5911277391769107,0.011368775493925615,0 | |
winogrande,acc,0.5280189423835833,0.014030404213405786,0 | |