File size: 4,006 Bytes
d4518f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
[
{
"loss": 2.0908,
"grad_norm": 0.5866689682006836,
"learning_rate": 8.923190911336132e-05,
"epoch": 0.22189421894218941,
"step": 451
},
{
"eval_loss": 1.9633848667144775,
"eval_runtime": 1103.6079,
"eval_samples_per_second": 26.193,
"eval_steps_per_second": 1.637,
"epoch": 0.22189421894218941,
"step": 451
},
{
"loss": 1.9265,
"grad_norm": 0.5757979154586792,
"learning_rate": 7.809335638429242e-05,
"epoch": 0.44378843788437883,
"step": 902
},
{
"eval_loss": 1.9037492275238037,
"eval_runtime": 1102.9066,
"eval_samples_per_second": 26.21,
"eval_steps_per_second": 1.638,
"epoch": 0.44378843788437883,
"step": 902
},
{
"loss": 1.8852,
"grad_norm": 0.5880784392356873,
"learning_rate": 6.695480365522352e-05,
"epoch": 0.6656826568265682,
"step": 1353
},
{
"eval_loss": 1.8703107833862305,
"eval_runtime": 1103.8364,
"eval_samples_per_second": 26.188,
"eval_steps_per_second": 1.637,
"epoch": 0.6656826568265682,
"step": 1353
},
{
"loss": 1.8585,
"grad_norm": 0.6274667978286743,
"learning_rate": 5.581625092615461e-05,
"epoch": 0.8875768757687577,
"step": 1804
},
{
"eval_loss": 1.8478941917419434,
"eval_runtime": 1103.7709,
"eval_samples_per_second": 26.189,
"eval_steps_per_second": 1.637,
"epoch": 0.8875768757687577,
"step": 1804
},
{
"loss": 1.8051,
"grad_norm": 0.6508978009223938,
"learning_rate": 4.4677698197085704e-05,
"epoch": 1.1094710947109472,
"step": 2255
},
{
"eval_loss": 1.835593819618225,
"eval_runtime": 1103.8475,
"eval_samples_per_second": 26.187,
"eval_steps_per_second": 1.637,
"epoch": 1.1094710947109472,
"step": 2255
},
{
"loss": 1.7622,
"grad_norm": 0.6831102967262268,
"learning_rate": 3.3539145468016795e-05,
"epoch": 1.3313653136531365,
"step": 2706
},
{
"eval_loss": 1.8246678113937378,
"eval_runtime": 1103.4364,
"eval_samples_per_second": 26.197,
"eval_steps_per_second": 1.638,
"epoch": 1.3313653136531365,
"step": 2706
},
{
"loss": 1.7536,
"grad_norm": 0.6920585036277771,
"learning_rate": 2.240059273894789e-05,
"epoch": 1.5532595325953258,
"step": 3157
},
{
"eval_loss": 1.8157387971878052,
"eval_runtime": 1103.7228,
"eval_samples_per_second": 26.19,
"eval_steps_per_second": 1.637,
"epoch": 1.5532595325953258,
"step": 3157
},
{
"loss": 1.7467,
"grad_norm": 0.6837635040283203,
"learning_rate": 1.1262040009878982e-05,
"epoch": 1.7751537515375153,
"step": 3608
},
{
"eval_loss": 1.808944821357727,
"eval_runtime": 1103.6481,
"eval_samples_per_second": 26.192,
"eval_steps_per_second": 1.637,
"epoch": 1.7751537515375153,
"step": 3608
},
{
"loss": 1.7457,
"grad_norm": 0.6841686367988586,
"learning_rate": 1.2348728081007656e-07,
"epoch": 1.9970479704797048,
"step": 4059
},
{
"eval_loss": 1.805881142616272,
"eval_runtime": 1103.2943,
"eval_samples_per_second": 26.201,
"eval_steps_per_second": 1.638,
"epoch": 1.9970479704797048,
"step": 4059
},
{
"train_runtime": 69487.1891,
"train_samples_per_second": 7.488,
"train_steps_per_second": 0.058,
"total_flos": 1.0874326325169095e+19,
"train_loss": 1.841461892437747,
"epoch": 1.9995079950799508,
"step": 4064
}
] |