lm1-misc-pile / 1b11b51b5 /evaluation /rankeval /lm1-1b1-1b5-results_lm-eval_global_step2891_2023-01-24-17-05-38_5shots.json
Muennighoff's picture
Add
b1799ed
{
"results": {
"anli_r1": {
"acc": 0.344,
"acc_stderr": 0.015029633724408945
},
"anli_r2": {
"acc": 0.329,
"acc_stderr": 0.014865395385928355
},
"anli_r3": {
"acc": 0.35583333333333333,
"acc_stderr": 0.013826518748493322
},
"cb": {
"acc": 0.4642857142857143,
"acc_stderr": 0.06724777654937658,
"f1": 0.3235431235431235
},
"copa": {
"acc": 0.62,
"acc_stderr": 0.048783173121456316
},
"hellaswag": {
"acc": 0.2658832901812388,
"acc_stderr": 0.0044089948686501,
"acc_norm": 0.26558454491137223,
"acc_norm_stderr": 0.004407413723383407
},
"rte": {
"acc": 0.5523465703971119,
"acc_stderr": 0.02993107036293953
},
"winogrande": {
"acc": 0.5011838989739542,
"acc_stderr": 0.014052446290529015
},
"storycloze_2016": {
"acc": 0.5227151256012827,
"acc_stderr": 0.011550494192008948
},
"boolq": {
"acc": 0.6128440366972477,
"acc_stderr": 0.008519429207594412
},
"arc_easy": {
"acc": 0.3421717171717172,
"acc_stderr": 0.009735236771958743,
"acc_norm": 0.32365319865319864,
"acc_norm_stderr": 0.009600478182273768
},
"arc_challenge": {
"acc": 0.16467576791808874,
"acc_stderr": 0.010838369209479231,
"acc_norm": 0.20733788395904437,
"acc_norm_stderr": 0.011846905782971364
},
"sciq": {
"acc": 0.556,
"acc_stderr": 0.01571976816340209,
"acc_norm": 0.541,
"acc_norm_stderr": 0.015766025737882165
},
"piqa": {
"acc": 0.5669205658324266,
"acc_stderr": 0.01156086442315138,
"acc_norm": 0.5571273122959739,
"acc_norm_stderr": 0.011589430503509102
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}