lm1-misc-pile
/
421m32b32b
/421m32b32bpile
/evaluation
/rankeval
/lm1-421m-32b-results_lm-eval_global_step60336_2023-01-23-18-55-40_1shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.335, | |
"acc_stderr": 0.014933117490932575 | |
}, | |
"anli_r2": { | |
"acc": 0.353, | |
"acc_stderr": 0.015120172605483708 | |
}, | |
"anli_r3": { | |
"acc": 0.3275, | |
"acc_stderr": 0.013553211167251942 | |
}, | |
"cb": { | |
"acc": 0.48214285714285715, | |
"acc_stderr": 0.06737697508644648, | |
"f1": 0.3421052631578947 | |
}, | |
"copa": { | |
"acc": 0.67, | |
"acc_stderr": 0.04725815626252607 | |
}, | |
"hellaswag": { | |
"acc": 0.2976498705437164, | |
"acc_stderr": 0.004562902604938726, | |
"acc_norm": 0.3331009759012149, | |
"acc_norm_stderr": 0.0047035905585525025 | |
}, | |
"rte": { | |
"acc": 0.5306859205776173, | |
"acc_stderr": 0.03003973059219781 | |
}, | |
"winogrande": { | |
"acc": 0.5280189423835833, | |
"acc_stderr": 0.014030404213405786 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5911277391769107, | |
"acc_stderr": 0.011368775493925615 | |
}, | |
"boolq": { | |
"acc": 0.5532110091743119, | |
"acc_stderr": 0.008695392261996197 | |
}, | |
"arc_easy": { | |
"acc": 0.4831649831649832, | |
"acc_stderr": 0.010253966261288888, | |
"acc_norm": 0.4478114478114478, | |
"acc_norm_stderr": 0.010203742451111506 | |
}, | |
"arc_challenge": { | |
"acc": 0.19197952218430034, | |
"acc_stderr": 0.011509598906598095, | |
"acc_norm": 0.23378839590443687, | |
"acc_norm_stderr": 0.012368225378507135 | |
}, | |
"sciq": { | |
"acc": 0.824, | |
"acc_stderr": 0.012048616898597516, | |
"acc_norm": 0.78, | |
"acc_norm_stderr": 0.01310617304066176 | |
}, | |
"piqa": { | |
"acc": 0.6392818280739935, | |
"acc_stderr": 0.011204064809088515, | |
"acc_norm": 0.6458106637649619, | |
"acc_norm_stderr": 0.011158755672626109 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |