lm1-misc-pile
/
1b12b12b
/1b112b12bpile
/evaluation
/lm1-1b1-12b-results_lm-eval_global_step23189_2023-01-24-13-53-32_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.317,0.014721675438880229,0 | |
anli_r2,acc,0.334,0.01492201952373296,0 | |
anli_r3,acc,0.3325,0.013605417345710526,0 | |
arc_challenge,acc,0.20477815699658702,0.011792544338513398,0 | |
arc_challenge,acc_norm,0.2431740614334471,0.012536554144587089,0 | |
arc_easy,acc,0.47095959595959597,0.01024246382639562,0 | |
arc_easy,acc_norm,0.4494949494949495,0.010207308833916035,0 | |
boolq,acc,0.4792048929663609,0.008737488341370727,1 | |
cb,acc,0.35714285714285715,0.0646095738380922,1 | |
cb,f1,0.2563323201621074,,1 | |
copa,acc,0.65,0.0479372485441102,0 | |
hellaswag,acc,0.29685321649073887,0.004559375835805974,0 | |
hellaswag,acc_norm,0.32593108942441745,0.0046776374633913965,0 | |
piqa,acc,0.6392818280739935,0.011204064809088518,0 | |
piqa,acc_norm,0.6360174102285092,0.011225875703487171,0 | |
rte,acc,0.5234657039711191,0.03006330041190266,0 | |
sciq,acc,0.841,0.011569479368271289,0 | |
sciq,acc_norm,0.819,0.01218143617917791,0 | |
storycloze_2016,acc,0.584179583110636,0.01139738683314014,0 | |
winogrande,acc,0.5430149960536701,0.01400038676159829,0 | |