Text Generation
Transformers
PyTorch
English
llama
text-generation-inference
recycled-wizardlm-7b-v1.0 / trainer_state.json
Ming Li
Initial commit
4631d5e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9952,
"eval_steps": 500,
"global_step": 1638,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.8079,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 0.7828,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.7845,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.7802,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.7728,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.7783,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.7707,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.7661,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.7169,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 4.000000000000001e-06,
"loss": 0.7298,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 4.4e-06,
"loss": 0.7293,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 4.800000000000001e-06,
"loss": 0.6884,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 5.2e-06,
"loss": 0.7098,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 5.600000000000001e-06,
"loss": 0.694,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 6e-06,
"loss": 0.6775,
"step": 15
},
{
"epoch": 0.03,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.7008,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 6.800000000000001e-06,
"loss": 0.6801,
"step": 17
},
{
"epoch": 0.03,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.706,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 7.600000000000001e-06,
"loss": 0.6867,
"step": 19
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 0.67,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 8.400000000000001e-06,
"loss": 0.6414,
"step": 21
},
{
"epoch": 0.04,
"learning_rate": 8.8e-06,
"loss": 0.6347,
"step": 22
},
{
"epoch": 0.04,
"learning_rate": 9.200000000000002e-06,
"loss": 0.681,
"step": 23
},
{
"epoch": 0.04,
"learning_rate": 9.600000000000001e-06,
"loss": 0.6419,
"step": 24
},
{
"epoch": 0.05,
"learning_rate": 1e-05,
"loss": 0.6479,
"step": 25
},
{
"epoch": 0.05,
"learning_rate": 1.04e-05,
"loss": 0.677,
"step": 26
},
{
"epoch": 0.05,
"learning_rate": 1.0800000000000002e-05,
"loss": 0.6524,
"step": 27
},
{
"epoch": 0.05,
"learning_rate": 1.1200000000000001e-05,
"loss": 0.6438,
"step": 28
},
{
"epoch": 0.05,
"learning_rate": 1.16e-05,
"loss": 0.6681,
"step": 29
},
{
"epoch": 0.05,
"learning_rate": 1.2e-05,
"loss": 0.6418,
"step": 30
},
{
"epoch": 0.06,
"learning_rate": 1.2400000000000002e-05,
"loss": 0.6767,
"step": 31
},
{
"epoch": 0.06,
"learning_rate": 1.2800000000000001e-05,
"loss": 0.6286,
"step": 32
},
{
"epoch": 0.06,
"learning_rate": 1.3200000000000002e-05,
"loss": 0.6145,
"step": 33
},
{
"epoch": 0.06,
"learning_rate": 1.3600000000000002e-05,
"loss": 0.6542,
"step": 34
},
{
"epoch": 0.06,
"learning_rate": 1.4e-05,
"loss": 0.6431,
"step": 35
},
{
"epoch": 0.07,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.6634,
"step": 36
},
{
"epoch": 0.07,
"learning_rate": 1.48e-05,
"loss": 0.6329,
"step": 37
},
{
"epoch": 0.07,
"learning_rate": 1.5200000000000002e-05,
"loss": 0.6286,
"step": 38
},
{
"epoch": 0.07,
"learning_rate": 1.5600000000000003e-05,
"loss": 0.6587,
"step": 39
},
{
"epoch": 0.07,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.6214,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 1.64e-05,
"loss": 0.6435,
"step": 41
},
{
"epoch": 0.08,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.6204,
"step": 42
},
{
"epoch": 0.08,
"learning_rate": 1.72e-05,
"loss": 0.6182,
"step": 43
},
{
"epoch": 0.08,
"learning_rate": 1.76e-05,
"loss": 0.6466,
"step": 44
},
{
"epoch": 0.08,
"learning_rate": 1.8e-05,
"loss": 0.6283,
"step": 45
},
{
"epoch": 0.08,
"learning_rate": 1.8400000000000003e-05,
"loss": 0.6419,
"step": 46
},
{
"epoch": 0.09,
"learning_rate": 1.88e-05,
"loss": 0.5917,
"step": 47
},
{
"epoch": 0.09,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.6473,
"step": 48
},
{
"epoch": 0.09,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.5984,
"step": 49
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.6266,
"step": 50
},
{
"epoch": 0.09,
"learning_rate": 1.9999980431000962e-05,
"loss": 0.592,
"step": 51
},
{
"epoch": 0.1,
"learning_rate": 1.999992172408044e-05,
"loss": 0.6053,
"step": 52
},
{
"epoch": 0.1,
"learning_rate": 1.999982387946819e-05,
"loss": 0.6411,
"step": 53
},
{
"epoch": 0.1,
"learning_rate": 1.999968689754717e-05,
"loss": 0.6284,
"step": 54
},
{
"epoch": 0.1,
"learning_rate": 1.999951077885349e-05,
"loss": 0.6482,
"step": 55
},
{
"epoch": 0.1,
"learning_rate": 1.9999295524076455e-05,
"loss": 0.6372,
"step": 56
},
{
"epoch": 0.1,
"learning_rate": 1.9999041134058514e-05,
"loss": 0.6351,
"step": 57
},
{
"epoch": 0.11,
"learning_rate": 1.999874760979531e-05,
"loss": 0.634,
"step": 58
},
{
"epoch": 0.11,
"learning_rate": 1.999841495243563e-05,
"loss": 0.6034,
"step": 59
},
{
"epoch": 0.11,
"learning_rate": 1.9998043163281435e-05,
"loss": 0.6062,
"step": 60
},
{
"epoch": 0.11,
"learning_rate": 1.999763224378783e-05,
"loss": 0.6315,
"step": 61
},
{
"epoch": 0.11,
"learning_rate": 1.999718219556307e-05,
"loss": 0.6283,
"step": 62
},
{
"epoch": 0.12,
"learning_rate": 1.999669302036856e-05,
"loss": 0.626,
"step": 63
},
{
"epoch": 0.12,
"learning_rate": 1.999616472011883e-05,
"loss": 0.6246,
"step": 64
},
{
"epoch": 0.12,
"learning_rate": 1.9995597296881542e-05,
"loss": 0.6058,
"step": 65
},
{
"epoch": 0.12,
"learning_rate": 1.9994990752877473e-05,
"loss": 0.5862,
"step": 66
},
{
"epoch": 0.12,
"learning_rate": 1.999434509048052e-05,
"loss": 0.6235,
"step": 67
},
{
"epoch": 0.12,
"learning_rate": 1.9993660312217674e-05,
"loss": 0.5882,
"step": 68
},
{
"epoch": 0.13,
"learning_rate": 1.999293642076902e-05,
"loss": 0.633,
"step": 69
},
{
"epoch": 0.13,
"learning_rate": 1.999217341896772e-05,
"loss": 0.5946,
"step": 70
},
{
"epoch": 0.13,
"learning_rate": 1.9991371309800024e-05,
"loss": 0.6311,
"step": 71
},
{
"epoch": 0.13,
"learning_rate": 1.999053009640521e-05,
"loss": 0.6261,
"step": 72
},
{
"epoch": 0.13,
"learning_rate": 1.998964978207563e-05,
"loss": 0.6138,
"step": 73
},
{
"epoch": 0.14,
"learning_rate": 1.9988730370256654e-05,
"loss": 0.6095,
"step": 74
},
{
"epoch": 0.14,
"learning_rate": 1.9987771864546677e-05,
"loss": 0.6492,
"step": 75
},
{
"epoch": 0.14,
"learning_rate": 1.9986774268697098e-05,
"loss": 0.586,
"step": 76
},
{
"epoch": 0.14,
"learning_rate": 1.9985737586612307e-05,
"loss": 0.5801,
"step": 77
},
{
"epoch": 0.14,
"learning_rate": 1.998466182234967e-05,
"loss": 0.6112,
"step": 78
},
{
"epoch": 0.14,
"learning_rate": 1.998354698011951e-05,
"loss": 0.6219,
"step": 79
},
{
"epoch": 0.15,
"learning_rate": 1.9982393064285106e-05,
"loss": 0.6038,
"step": 80
},
{
"epoch": 0.15,
"learning_rate": 1.9981200079362645e-05,
"loss": 0.6138,
"step": 81
},
{
"epoch": 0.15,
"learning_rate": 1.997996803002123e-05,
"loss": 0.6318,
"step": 82
},
{
"epoch": 0.15,
"learning_rate": 1.997869692108286e-05,
"loss": 0.6058,
"step": 83
},
{
"epoch": 0.15,
"learning_rate": 1.99773867575224e-05,
"loss": 0.6175,
"step": 84
},
{
"epoch": 0.16,
"learning_rate": 1.997603754446757e-05,
"loss": 0.5948,
"step": 85
},
{
"epoch": 0.16,
"learning_rate": 1.997464928719892e-05,
"loss": 0.5982,
"step": 86
},
{
"epoch": 0.16,
"learning_rate": 1.9973221991149804e-05,
"loss": 0.6268,
"step": 87
},
{
"epoch": 0.16,
"learning_rate": 1.9971755661906376e-05,
"loss": 0.5966,
"step": 88
},
{
"epoch": 0.16,
"learning_rate": 1.9970250305207557e-05,
"loss": 0.5967,
"step": 89
},
{
"epoch": 0.16,
"learning_rate": 1.9968705926945015e-05,
"loss": 0.6143,
"step": 90
},
{
"epoch": 0.17,
"learning_rate": 1.996712253316313e-05,
"loss": 0.6139,
"step": 91
},
{
"epoch": 0.17,
"learning_rate": 1.9965500130058996e-05,
"loss": 0.5995,
"step": 92
},
{
"epoch": 0.17,
"learning_rate": 1.9963838723982368e-05,
"loss": 0.5798,
"step": 93
},
{
"epoch": 0.17,
"learning_rate": 1.9962138321435658e-05,
"loss": 0.607,
"step": 94
},
{
"epoch": 0.17,
"learning_rate": 1.99603989290739e-05,
"loss": 0.6157,
"step": 95
},
{
"epoch": 0.18,
"learning_rate": 1.9958620553704737e-05,
"loss": 0.643,
"step": 96
},
{
"epoch": 0.18,
"learning_rate": 1.9956803202288358e-05,
"loss": 0.6246,
"step": 97
},
{
"epoch": 0.18,
"learning_rate": 1.9954946881937525e-05,
"loss": 0.6213,
"step": 98
},
{
"epoch": 0.18,
"learning_rate": 1.99530515999175e-05,
"loss": 0.5969,
"step": 99
},
{
"epoch": 0.18,
"learning_rate": 1.9951117363646036e-05,
"loss": 0.6109,
"step": 100
},
{
"epoch": 0.18,
"learning_rate": 1.994914418069335e-05,
"loss": 0.6268,
"step": 101
},
{
"epoch": 0.19,
"learning_rate": 1.9947132058782082e-05,
"loss": 0.5923,
"step": 102
},
{
"epoch": 0.19,
"learning_rate": 1.994508100578728e-05,
"loss": 0.6107,
"step": 103
},
{
"epoch": 0.19,
"learning_rate": 1.9942991029736346e-05,
"loss": 0.6048,
"step": 104
},
{
"epoch": 0.19,
"learning_rate": 1.9940862138809036e-05,
"loss": 0.6215,
"step": 105
},
{
"epoch": 0.19,
"learning_rate": 1.9938694341337398e-05,
"loss": 0.614,
"step": 106
},
{
"epoch": 0.2,
"learning_rate": 1.993648764580576e-05,
"loss": 0.5971,
"step": 107
},
{
"epoch": 0.2,
"learning_rate": 1.993424206085068e-05,
"loss": 0.6248,
"step": 108
},
{
"epoch": 0.2,
"learning_rate": 1.993195759526094e-05,
"loss": 0.612,
"step": 109
},
{
"epoch": 0.2,
"learning_rate": 1.9929634257977467e-05,
"loss": 0.6018,
"step": 110
},
{
"epoch": 0.2,
"learning_rate": 1.992727205809335e-05,
"loss": 0.5908,
"step": 111
},
{
"epoch": 0.2,
"learning_rate": 1.992487100485376e-05,
"loss": 0.6068,
"step": 112
},
{
"epoch": 0.21,
"learning_rate": 1.9922431107655942e-05,
"loss": 0.6192,
"step": 113
},
{
"epoch": 0.21,
"learning_rate": 1.991995237604916e-05,
"loss": 0.5977,
"step": 114
},
{
"epoch": 0.21,
"learning_rate": 1.991743481973468e-05,
"loss": 0.6301,
"step": 115
},
{
"epoch": 0.21,
"learning_rate": 1.9914878448565712e-05,
"loss": 0.604,
"step": 116
},
{
"epoch": 0.21,
"learning_rate": 1.991228327254738e-05,
"loss": 0.5903,
"step": 117
},
{
"epoch": 0.22,
"learning_rate": 1.9909649301836675e-05,
"loss": 0.5724,
"step": 118
},
{
"epoch": 0.22,
"learning_rate": 1.9906976546742443e-05,
"loss": 0.5983,
"step": 119
},
{
"epoch": 0.22,
"learning_rate": 1.990426501772531e-05,
"loss": 0.6081,
"step": 120
},
{
"epoch": 0.22,
"learning_rate": 1.990151472539765e-05,
"loss": 0.6154,
"step": 121
},
{
"epoch": 0.22,
"learning_rate": 1.989872568052357e-05,
"loss": 0.6173,
"step": 122
},
{
"epoch": 0.22,
"learning_rate": 1.9895897894018822e-05,
"loss": 0.6004,
"step": 123
},
{
"epoch": 0.23,
"learning_rate": 1.98930313769508e-05,
"loss": 0.6188,
"step": 124
},
{
"epoch": 0.23,
"learning_rate": 1.9890126140538476e-05,
"loss": 0.5958,
"step": 125
},
{
"epoch": 0.23,
"learning_rate": 1.988718219615237e-05,
"loss": 0.6109,
"step": 126
},
{
"epoch": 0.23,
"learning_rate": 1.9884199555314484e-05,
"loss": 0.6114,
"step": 127
},
{
"epoch": 0.23,
"learning_rate": 1.9881178229698278e-05,
"loss": 0.5804,
"step": 128
},
{
"epoch": 0.24,
"learning_rate": 1.987811823112862e-05,
"loss": 0.6244,
"step": 129
},
{
"epoch": 0.24,
"learning_rate": 1.987501957158173e-05,
"loss": 0.5762,
"step": 130
},
{
"epoch": 0.24,
"learning_rate": 1.9871882263185137e-05,
"loss": 0.5868,
"step": 131
},
{
"epoch": 0.24,
"learning_rate": 1.9868706318217645e-05,
"loss": 0.6118,
"step": 132
},
{
"epoch": 0.24,
"learning_rate": 1.986549174910926e-05,
"loss": 0.6156,
"step": 133
},
{
"epoch": 0.25,
"learning_rate": 1.9862238568441166e-05,
"loss": 0.6046,
"step": 134
},
{
"epoch": 0.25,
"learning_rate": 1.985894678894566e-05,
"loss": 0.6137,
"step": 135
},
{
"epoch": 0.25,
"learning_rate": 1.9855616423506106e-05,
"loss": 0.5729,
"step": 136
},
{
"epoch": 0.25,
"learning_rate": 1.9852247485156892e-05,
"loss": 0.5827,
"step": 137
},
{
"epoch": 0.25,
"learning_rate": 1.9848839987083366e-05,
"loss": 0.6024,
"step": 138
},
{
"epoch": 0.25,
"learning_rate": 1.9845393942621793e-05,
"loss": 0.6106,
"step": 139
},
{
"epoch": 0.26,
"learning_rate": 1.98419093652593e-05,
"loss": 0.5903,
"step": 140
},
{
"epoch": 0.26,
"learning_rate": 1.983838626863383e-05,
"loss": 0.5913,
"step": 141
},
{
"epoch": 0.26,
"learning_rate": 1.9834824666534073e-05,
"loss": 0.5789,
"step": 142
},
{
"epoch": 0.26,
"learning_rate": 1.9831224572899424e-05,
"loss": 0.5984,
"step": 143
},
{
"epoch": 0.26,
"learning_rate": 1.9827586001819933e-05,
"loss": 0.6024,
"step": 144
},
{
"epoch": 0.27,
"learning_rate": 1.9823908967536242e-05,
"loss": 0.6001,
"step": 145
},
{
"epoch": 0.27,
"learning_rate": 1.9820193484439523e-05,
"loss": 0.5916,
"step": 146
},
{
"epoch": 0.27,
"learning_rate": 1.981643956707143e-05,
"loss": 0.5942,
"step": 147
},
{
"epoch": 0.27,
"learning_rate": 1.981264723012405e-05,
"loss": 0.5926,
"step": 148
},
{
"epoch": 0.27,
"learning_rate": 1.9808816488439834e-05,
"loss": 0.5747,
"step": 149
},
{
"epoch": 0.27,
"learning_rate": 1.9804947357011525e-05,
"loss": 0.5834,
"step": 150
},
{
"epoch": 0.28,
"learning_rate": 1.980103985098214e-05,
"loss": 0.6075,
"step": 151
},
{
"epoch": 0.28,
"learning_rate": 1.9797093985644866e-05,
"loss": 0.6352,
"step": 152
},
{
"epoch": 0.28,
"learning_rate": 1.979310977644304e-05,
"loss": 0.5982,
"step": 153
},
{
"epoch": 0.28,
"learning_rate": 1.9789087238970056e-05,
"loss": 0.6055,
"step": 154
},
{
"epoch": 0.28,
"learning_rate": 1.9785026388969316e-05,
"loss": 0.5945,
"step": 155
},
{
"epoch": 0.29,
"learning_rate": 1.978092724233418e-05,
"loss": 0.6098,
"step": 156
},
{
"epoch": 0.29,
"learning_rate": 1.9776789815107882e-05,
"loss": 0.6038,
"step": 157
},
{
"epoch": 0.29,
"learning_rate": 1.9772614123483488e-05,
"loss": 0.6047,
"step": 158
},
{
"epoch": 0.29,
"learning_rate": 1.9768400183803814e-05,
"loss": 0.5774,
"step": 159
},
{
"epoch": 0.29,
"learning_rate": 1.9764148012561384e-05,
"loss": 0.6083,
"step": 160
},
{
"epoch": 0.29,
"learning_rate": 1.9759857626398336e-05,
"loss": 0.6183,
"step": 161
},
{
"epoch": 0.3,
"learning_rate": 1.9755529042106394e-05,
"loss": 0.5771,
"step": 162
},
{
"epoch": 0.3,
"learning_rate": 1.975116227662676e-05,
"loss": 0.6356,
"step": 163
},
{
"epoch": 0.3,
"learning_rate": 1.974675734705008e-05,
"loss": 0.5723,
"step": 164
},
{
"epoch": 0.3,
"learning_rate": 1.9742314270616375e-05,
"loss": 0.584,
"step": 165
},
{
"epoch": 0.3,
"learning_rate": 1.973783306471495e-05,
"loss": 0.5755,
"step": 166
},
{
"epoch": 0.31,
"learning_rate": 1.973331374688435e-05,
"loss": 0.6151,
"step": 167
},
{
"epoch": 0.31,
"learning_rate": 1.972875633481228e-05,
"loss": 0.5881,
"step": 168
},
{
"epoch": 0.31,
"learning_rate": 1.9724160846335535e-05,
"loss": 0.5915,
"step": 169
},
{
"epoch": 0.31,
"learning_rate": 1.9719527299439944e-05,
"loss": 0.6112,
"step": 170
},
{
"epoch": 0.31,
"learning_rate": 1.9714855712260275e-05,
"loss": 0.5792,
"step": 171
},
{
"epoch": 0.31,
"learning_rate": 1.971014610308019e-05,
"loss": 0.5869,
"step": 172
},
{
"epoch": 0.32,
"learning_rate": 1.9705398490332154e-05,
"loss": 0.5925,
"step": 173
},
{
"epoch": 0.32,
"learning_rate": 1.9700612892597376e-05,
"loss": 0.5889,
"step": 174
},
{
"epoch": 0.32,
"learning_rate": 1.9695789328605722e-05,
"loss": 0.5877,
"step": 175
},
{
"epoch": 0.32,
"learning_rate": 1.969092781723566e-05,
"loss": 0.5891,
"step": 176
},
{
"epoch": 0.32,
"learning_rate": 1.968602837751417e-05,
"loss": 0.6091,
"step": 177
},
{
"epoch": 0.33,
"learning_rate": 1.968109102861668e-05,
"loss": 0.6106,
"step": 178
},
{
"epoch": 0.33,
"learning_rate": 1.9676115789866982e-05,
"loss": 0.616,
"step": 179
},
{
"epoch": 0.33,
"learning_rate": 1.9671102680737172e-05,
"loss": 0.6002,
"step": 180
},
{
"epoch": 0.33,
"learning_rate": 1.9666051720847546e-05,
"loss": 0.6018,
"step": 181
},
{
"epoch": 0.33,
"learning_rate": 1.9660962929966553e-05,
"loss": 0.5887,
"step": 182
},
{
"epoch": 0.33,
"learning_rate": 1.9655836328010708e-05,
"loss": 0.6081,
"step": 183
},
{
"epoch": 0.34,
"learning_rate": 1.9650671935044497e-05,
"loss": 0.5922,
"step": 184
},
{
"epoch": 0.34,
"learning_rate": 1.9645469771280326e-05,
"loss": 0.6012,
"step": 185
},
{
"epoch": 0.34,
"learning_rate": 1.9640229857078417e-05,
"loss": 0.6147,
"step": 186
},
{
"epoch": 0.34,
"learning_rate": 1.963495221294675e-05,
"loss": 0.5961,
"step": 187
},
{
"epoch": 0.34,
"learning_rate": 1.9629636859540962e-05,
"loss": 0.6114,
"step": 188
},
{
"epoch": 0.35,
"learning_rate": 1.9624283817664287e-05,
"loss": 0.558,
"step": 189
},
{
"epoch": 0.35,
"learning_rate": 1.9618893108267457e-05,
"loss": 0.5626,
"step": 190
},
{
"epoch": 0.35,
"learning_rate": 1.961346475244863e-05,
"loss": 0.5684,
"step": 191
},
{
"epoch": 0.35,
"learning_rate": 1.9607998771453306e-05,
"loss": 0.5819,
"step": 192
},
{
"epoch": 0.35,
"learning_rate": 1.9602495186674237e-05,
"loss": 0.5933,
"step": 193
},
{
"epoch": 0.35,
"learning_rate": 1.9596954019651354e-05,
"loss": 0.5997,
"step": 194
},
{
"epoch": 0.36,
"learning_rate": 1.9591375292071677e-05,
"loss": 0.5898,
"step": 195
},
{
"epoch": 0.36,
"learning_rate": 1.9585759025769225e-05,
"loss": 0.6317,
"step": 196
},
{
"epoch": 0.36,
"learning_rate": 1.958010524272494e-05,
"loss": 0.5999,
"step": 197
},
{
"epoch": 0.36,
"learning_rate": 1.95744139650666e-05,
"loss": 0.5931,
"step": 198
},
{
"epoch": 0.36,
"learning_rate": 1.9568685215068726e-05,
"loss": 0.5992,
"step": 199
},
{
"epoch": 0.37,
"learning_rate": 1.95629190151525e-05,
"loss": 0.6168,
"step": 200
},
{
"epoch": 0.37,
"learning_rate": 1.9557115387885667e-05,
"loss": 0.5959,
"step": 201
},
{
"epoch": 0.37,
"learning_rate": 1.955127435598247e-05,
"loss": 0.5897,
"step": 202
},
{
"epoch": 0.37,
"learning_rate": 1.9545395942303538e-05,
"loss": 0.5889,
"step": 203
},
{
"epoch": 0.37,
"learning_rate": 1.95394801698558e-05,
"loss": 0.5823,
"step": 204
},
{
"epoch": 0.37,
"learning_rate": 1.953352706179241e-05,
"loss": 0.6082,
"step": 205
},
{
"epoch": 0.38,
"learning_rate": 1.9527536641412637e-05,
"loss": 0.5797,
"step": 206
},
{
"epoch": 0.38,
"learning_rate": 1.9521508932161796e-05,
"loss": 0.5849,
"step": 207
},
{
"epoch": 0.38,
"learning_rate": 1.951544395763112e-05,
"loss": 0.5984,
"step": 208
},
{
"epoch": 0.38,
"learning_rate": 1.9509341741557716e-05,
"loss": 0.5976,
"step": 209
},
{
"epoch": 0.38,
"learning_rate": 1.9503202307824433e-05,
"loss": 0.5769,
"step": 210
},
{
"epoch": 0.39,
"learning_rate": 1.9497025680459786e-05,
"loss": 0.5792,
"step": 211
},
{
"epoch": 0.39,
"learning_rate": 1.949081188363786e-05,
"loss": 0.5952,
"step": 212
},
{
"epoch": 0.39,
"learning_rate": 1.9484560941678207e-05,
"loss": 0.5986,
"step": 213
},
{
"epoch": 0.39,
"learning_rate": 1.9478272879045764e-05,
"loss": 0.5843,
"step": 214
},
{
"epoch": 0.39,
"learning_rate": 1.947194772035075e-05,
"loss": 0.5933,
"step": 215
},
{
"epoch": 0.39,
"learning_rate": 1.9465585490348574e-05,
"loss": 0.5783,
"step": 216
},
{
"epoch": 0.4,
"learning_rate": 1.9459186213939723e-05,
"loss": 0.5939,
"step": 217
},
{
"epoch": 0.4,
"learning_rate": 1.945274991616969e-05,
"loss": 0.597,
"step": 218
},
{
"epoch": 0.4,
"learning_rate": 1.944627662222885e-05,
"loss": 0.5597,
"step": 219
},
{
"epoch": 0.4,
"learning_rate": 1.9439766357452386e-05,
"loss": 0.5595,
"step": 220
},
{
"epoch": 0.4,
"learning_rate": 1.9433219147320166e-05,
"loss": 0.608,
"step": 221
},
{
"epoch": 0.41,
"learning_rate": 1.942663501745666e-05,
"loss": 0.6106,
"step": 222
},
{
"epoch": 0.41,
"learning_rate": 1.9420013993630843e-05,
"loss": 0.5993,
"step": 223
},
{
"epoch": 0.41,
"learning_rate": 1.9413356101756063e-05,
"loss": 0.5813,
"step": 224
},
{
"epoch": 0.41,
"learning_rate": 1.940666136788999e-05,
"loss": 0.5912,
"step": 225
},
{
"epoch": 0.41,
"learning_rate": 1.9399929818234457e-05,
"loss": 0.581,
"step": 226
},
{
"epoch": 0.42,
"learning_rate": 1.939316147913541e-05,
"loss": 0.5803,
"step": 227
},
{
"epoch": 0.42,
"learning_rate": 1.9386356377082776e-05,
"loss": 0.5895,
"step": 228
},
{
"epoch": 0.42,
"learning_rate": 1.937951453871036e-05,
"loss": 0.5689,
"step": 229
},
{
"epoch": 0.42,
"learning_rate": 1.9372635990795744e-05,
"loss": 0.6018,
"step": 230
},
{
"epoch": 0.42,
"learning_rate": 1.9365720760260193e-05,
"loss": 0.6061,
"step": 231
},
{
"epoch": 0.42,
"learning_rate": 1.935876887416853e-05,
"loss": 0.5929,
"step": 232
},
{
"epoch": 0.43,
"learning_rate": 1.9351780359729047e-05,
"loss": 0.5928,
"step": 233
},
{
"epoch": 0.43,
"learning_rate": 1.934475524429339e-05,
"loss": 0.587,
"step": 234
},
{
"epoch": 0.43,
"learning_rate": 1.9337693555356458e-05,
"loss": 0.6007,
"step": 235
},
{
"epoch": 0.43,
"learning_rate": 1.9330595320556286e-05,
"loss": 0.5837,
"step": 236
},
{
"epoch": 0.43,
"learning_rate": 1.932346056767394e-05,
"loss": 0.5752,
"step": 237
},
{
"epoch": 0.44,
"learning_rate": 1.931628932463342e-05,
"loss": 0.6123,
"step": 238
},
{
"epoch": 0.44,
"learning_rate": 1.9309081619501533e-05,
"loss": 0.5801,
"step": 239
},
{
"epoch": 0.44,
"learning_rate": 1.9301837480487794e-05,
"loss": 0.596,
"step": 240
},
{
"epoch": 0.44,
"learning_rate": 1.929455693594431e-05,
"loss": 0.6054,
"step": 241
},
{
"epoch": 0.44,
"learning_rate": 1.9287240014365682e-05,
"loss": 0.5945,
"step": 242
},
{
"epoch": 0.44,
"learning_rate": 1.9279886744388875e-05,
"loss": 0.5767,
"step": 243
},
{
"epoch": 0.45,
"learning_rate": 1.9272497154793107e-05,
"loss": 0.5914,
"step": 244
},
{
"epoch": 0.45,
"learning_rate": 1.926507127449976e-05,
"loss": 0.6313,
"step": 245
},
{
"epoch": 0.45,
"learning_rate": 1.925760913257224e-05,
"loss": 0.5949,
"step": 246
},
{
"epoch": 0.45,
"learning_rate": 1.9250110758215882e-05,
"loss": 0.5606,
"step": 247
},
{
"epoch": 0.45,
"learning_rate": 1.9242576180777816e-05,
"loss": 0.5818,
"step": 248
},
{
"epoch": 0.46,
"learning_rate": 1.923500542974687e-05,
"loss": 0.5786,
"step": 249
},
{
"epoch": 0.46,
"learning_rate": 1.922739853475345e-05,
"loss": 0.5931,
"step": 250
},
{
"epoch": 0.46,
"learning_rate": 1.9219755525569418e-05,
"loss": 0.5688,
"step": 251
},
{
"epoch": 0.46,
"learning_rate": 1.921207643210799e-05,
"loss": 0.6043,
"step": 252
},
{
"epoch": 0.46,
"learning_rate": 1.9204361284423586e-05,
"loss": 0.6096,
"step": 253
},
{
"epoch": 0.46,
"learning_rate": 1.9196610112711763e-05,
"loss": 0.5528,
"step": 254
},
{
"epoch": 0.47,
"learning_rate": 1.9188822947309046e-05,
"loss": 0.5853,
"step": 255
},
{
"epoch": 0.47,
"learning_rate": 1.918099981869285e-05,
"loss": 0.5707,
"step": 256
},
{
"epoch": 0.47,
"learning_rate": 1.9173140757481325e-05,
"loss": 0.5716,
"step": 257
},
{
"epoch": 0.47,
"learning_rate": 1.9165245794433272e-05,
"loss": 0.6002,
"step": 258
},
{
"epoch": 0.47,
"learning_rate": 1.9157314960447988e-05,
"loss": 0.5648,
"step": 259
},
{
"epoch": 0.48,
"learning_rate": 1.9149348286565176e-05,
"loss": 0.5971,
"step": 260
},
{
"epoch": 0.48,
"learning_rate": 1.91413458039648e-05,
"loss": 0.5905,
"step": 261
},
{
"epoch": 0.48,
"learning_rate": 1.9133307543966976e-05,
"loss": 0.5786,
"step": 262
},
{
"epoch": 0.48,
"learning_rate": 1.9125233538031836e-05,
"loss": 0.5853,
"step": 263
},
{
"epoch": 0.48,
"learning_rate": 1.9117123817759438e-05,
"loss": 0.6005,
"step": 264
},
{
"epoch": 0.48,
"learning_rate": 1.9108978414889595e-05,
"loss": 0.569,
"step": 265
},
{
"epoch": 0.49,
"learning_rate": 1.910079736130178e-05,
"loss": 0.6134,
"step": 266
},
{
"epoch": 0.49,
"learning_rate": 1.9092580689015007e-05,
"loss": 0.5816,
"step": 267
},
{
"epoch": 0.49,
"learning_rate": 1.9084328430187677e-05,
"loss": 0.5978,
"step": 268
},
{
"epoch": 0.49,
"learning_rate": 1.907604061711749e-05,
"loss": 0.5856,
"step": 269
},
{
"epoch": 0.49,
"learning_rate": 1.906771728224128e-05,
"loss": 0.5805,
"step": 270
},
{
"epoch": 0.5,
"learning_rate": 1.9059358458134914e-05,
"loss": 0.5881,
"step": 271
},
{
"epoch": 0.5,
"learning_rate": 1.9050964177513158e-05,
"loss": 0.5941,
"step": 272
},
{
"epoch": 0.5,
"learning_rate": 1.9042534473229544e-05,
"loss": 0.5932,
"step": 273
},
{
"epoch": 0.5,
"learning_rate": 1.903406937827625e-05,
"loss": 0.5798,
"step": 274
},
{
"epoch": 0.5,
"learning_rate": 1.902556892578396e-05,
"loss": 0.5978,
"step": 275
},
{
"epoch": 0.5,
"learning_rate": 1.9017033149021747e-05,
"loss": 0.5805,
"step": 276
},
{
"epoch": 0.51,
"learning_rate": 1.9008462081396924e-05,
"loss": 0.5915,
"step": 277
},
{
"epoch": 0.51,
"learning_rate": 1.8999855756454945e-05,
"loss": 0.5872,
"step": 278
},
{
"epoch": 0.51,
"learning_rate": 1.8991214207879233e-05,
"loss": 0.564,
"step": 279
},
{
"epoch": 0.51,
"learning_rate": 1.898253746949109e-05,
"loss": 0.571,
"step": 280
},
{
"epoch": 0.51,
"learning_rate": 1.897382557524952e-05,
"loss": 0.5675,
"step": 281
},
{
"epoch": 0.52,
"learning_rate": 1.8965078559251144e-05,
"loss": 0.5753,
"step": 282
},
{
"epoch": 0.52,
"learning_rate": 1.8956296455730027e-05,
"loss": 0.5819,
"step": 283
},
{
"epoch": 0.52,
"learning_rate": 1.8947479299057562e-05,
"loss": 0.5728,
"step": 284
},
{
"epoch": 0.52,
"learning_rate": 1.893862712374234e-05,
"loss": 0.5565,
"step": 285
},
{
"epoch": 0.52,
"learning_rate": 1.8929739964430002e-05,
"loss": 0.5988,
"step": 286
},
{
"epoch": 0.52,
"learning_rate": 1.8920817855903103e-05,
"loss": 0.5909,
"step": 287
},
{
"epoch": 0.53,
"learning_rate": 1.8911860833081e-05,
"loss": 0.5718,
"step": 288
},
{
"epoch": 0.53,
"learning_rate": 1.8902868931019683e-05,
"loss": 0.5807,
"step": 289
},
{
"epoch": 0.53,
"learning_rate": 1.8893842184911656e-05,
"loss": 0.5766,
"step": 290
},
{
"epoch": 0.53,
"learning_rate": 1.8884780630085795e-05,
"loss": 0.5602,
"step": 291
},
{
"epoch": 0.53,
"learning_rate": 1.8875684302007215e-05,
"loss": 0.5749,
"step": 292
},
{
"epoch": 0.54,
"learning_rate": 1.886655323627712e-05,
"loss": 0.5826,
"step": 293
},
{
"epoch": 0.54,
"learning_rate": 1.8857387468632675e-05,
"loss": 0.5909,
"step": 294
},
{
"epoch": 0.54,
"learning_rate": 1.884818703494686e-05,
"loss": 0.5728,
"step": 295
},
{
"epoch": 0.54,
"learning_rate": 1.8838951971228326e-05,
"loss": 0.5622,
"step": 296
},
{
"epoch": 0.54,
"learning_rate": 1.8829682313621268e-05,
"loss": 0.5836,
"step": 297
},
{
"epoch": 0.54,
"learning_rate": 1.8820378098405272e-05,
"loss": 0.5974,
"step": 298
},
{
"epoch": 0.55,
"learning_rate": 1.8811039361995173e-05,
"loss": 0.5754,
"step": 299
},
{
"epoch": 0.55,
"learning_rate": 1.880166614094091e-05,
"loss": 0.5675,
"step": 300
},
{
"epoch": 0.55,
"learning_rate": 1.87922584719274e-05,
"loss": 0.5901,
"step": 301
},
{
"epoch": 0.55,
"learning_rate": 1.878281639177437e-05,
"loss": 0.582,
"step": 302
},
{
"epoch": 0.55,
"learning_rate": 1.8773339937436237e-05,
"loss": 0.5656,
"step": 303
},
{
"epoch": 0.56,
"learning_rate": 1.876382914600195e-05,
"loss": 0.5803,
"step": 304
},
{
"epoch": 0.56,
"learning_rate": 1.875428405469483e-05,
"loss": 0.5909,
"step": 305
},
{
"epoch": 0.56,
"learning_rate": 1.8744704700872463e-05,
"loss": 0.5609,
"step": 306
},
{
"epoch": 0.56,
"learning_rate": 1.8735091122026522e-05,
"loss": 0.5477,
"step": 307
},
{
"epoch": 0.56,
"learning_rate": 1.8725443355782623e-05,
"loss": 0.5823,
"step": 308
},
{
"epoch": 0.57,
"learning_rate": 1.8715761439900203e-05,
"loss": 0.5714,
"step": 309
},
{
"epoch": 0.57,
"learning_rate": 1.870604541227233e-05,
"loss": 0.5884,
"step": 310
},
{
"epoch": 0.57,
"learning_rate": 1.86962953109256e-05,
"loss": 0.5589,
"step": 311
},
{
"epoch": 0.57,
"learning_rate": 1.8686511174019956e-05,
"loss": 0.5694,
"step": 312
},
{
"epoch": 0.57,
"learning_rate": 1.8676693039848548e-05,
"loss": 0.5743,
"step": 313
},
{
"epoch": 0.57,
"learning_rate": 1.866684094683759e-05,
"loss": 0.5731,
"step": 314
},
{
"epoch": 0.58,
"learning_rate": 1.86569549335462e-05,
"loss": 0.5609,
"step": 315
},
{
"epoch": 0.58,
"learning_rate": 1.864703503866626e-05,
"loss": 0.5631,
"step": 316
},
{
"epoch": 0.58,
"learning_rate": 1.8637081301022248e-05,
"loss": 0.5614,
"step": 317
},
{
"epoch": 0.58,
"learning_rate": 1.86270937595711e-05,
"loss": 0.593,
"step": 318
},
{
"epoch": 0.58,
"learning_rate": 1.8617072453402058e-05,
"loss": 0.6037,
"step": 319
},
{
"epoch": 0.59,
"learning_rate": 1.86070174217365e-05,
"loss": 0.6023,
"step": 320
},
{
"epoch": 0.59,
"learning_rate": 1.8596928703927816e-05,
"loss": 0.5916,
"step": 321
},
{
"epoch": 0.59,
"learning_rate": 1.8586806339461226e-05,
"loss": 0.5926,
"step": 322
},
{
"epoch": 0.59,
"learning_rate": 1.8576650367953634e-05,
"loss": 0.5685,
"step": 323
},
{
"epoch": 0.59,
"learning_rate": 1.8566460829153484e-05,
"loss": 0.5785,
"step": 324
},
{
"epoch": 0.59,
"learning_rate": 1.8556237762940585e-05,
"loss": 0.5775,
"step": 325
},
{
"epoch": 0.6,
"learning_rate": 1.8545981209325975e-05,
"loss": 0.5783,
"step": 326
},
{
"epoch": 0.6,
"learning_rate": 1.8535691208451757e-05,
"loss": 0.5943,
"step": 327
},
{
"epoch": 0.6,
"learning_rate": 1.8525367800590927e-05,
"loss": 0.597,
"step": 328
},
{
"epoch": 0.6,
"learning_rate": 1.8515011026147238e-05,
"loss": 0.5838,
"step": 329
},
{
"epoch": 0.6,
"learning_rate": 1.8504620925655034e-05,
"loss": 0.5721,
"step": 330
},
{
"epoch": 0.61,
"learning_rate": 1.8494197539779083e-05,
"loss": 0.5873,
"step": 331
},
{
"epoch": 0.61,
"learning_rate": 1.848374090931444e-05,
"loss": 0.5705,
"step": 332
},
{
"epoch": 0.61,
"learning_rate": 1.8473251075186257e-05,
"loss": 0.5626,
"step": 333
},
{
"epoch": 0.61,
"learning_rate": 1.8462728078449642e-05,
"loss": 0.5775,
"step": 334
},
{
"epoch": 0.61,
"learning_rate": 1.8452171960289506e-05,
"loss": 0.5936,
"step": 335
},
{
"epoch": 0.61,
"learning_rate": 1.8441582762020374e-05,
"loss": 0.5872,
"step": 336
},
{
"epoch": 0.62,
"learning_rate": 1.8430960525086255e-05,
"loss": 0.5848,
"step": 337
},
{
"epoch": 0.62,
"learning_rate": 1.8420305291060457e-05,
"loss": 0.5815,
"step": 338
},
{
"epoch": 0.62,
"learning_rate": 1.8409617101645425e-05,
"loss": 0.6121,
"step": 339
},
{
"epoch": 0.62,
"learning_rate": 1.83988959986726e-05,
"loss": 0.5709,
"step": 340
},
{
"epoch": 0.62,
"learning_rate": 1.8388142024102234e-05,
"loss": 0.5965,
"step": 341
},
{
"epoch": 0.63,
"learning_rate": 1.8377355220023223e-05,
"loss": 0.5989,
"step": 342
},
{
"epoch": 0.63,
"learning_rate": 1.8366535628652966e-05,
"loss": 0.6011,
"step": 343
},
{
"epoch": 0.63,
"learning_rate": 1.8355683292337174e-05,
"loss": 0.5804,
"step": 344
},
{
"epoch": 0.63,
"learning_rate": 1.834479825354972e-05,
"loss": 0.5699,
"step": 345
},
{
"epoch": 0.63,
"learning_rate": 1.8333880554892466e-05,
"loss": 0.5538,
"step": 346
},
{
"epoch": 0.63,
"learning_rate": 1.83229302390951e-05,
"loss": 0.5755,
"step": 347
},
{
"epoch": 0.64,
"learning_rate": 1.8311947349014968e-05,
"loss": 0.5812,
"step": 348
},
{
"epoch": 0.64,
"learning_rate": 1.8300931927636895e-05,
"loss": 0.5754,
"step": 349
},
{
"epoch": 0.64,
"learning_rate": 1.8289884018073042e-05,
"loss": 0.5936,
"step": 350
},
{
"epoch": 0.64,
"learning_rate": 1.8278803663562718e-05,
"loss": 0.6044,
"step": 351
},
{
"epoch": 0.64,
"learning_rate": 1.82676909074722e-05,
"loss": 0.5521,
"step": 352
},
{
"epoch": 0.65,
"learning_rate": 1.8256545793294606e-05,
"loss": 0.5978,
"step": 353
},
{
"epoch": 0.65,
"learning_rate": 1.8245368364649675e-05,
"loss": 0.5588,
"step": 354
},
{
"epoch": 0.65,
"learning_rate": 1.8234158665283618e-05,
"loss": 0.583,
"step": 355
},
{
"epoch": 0.65,
"learning_rate": 1.8222916739068964e-05,
"loss": 0.5646,
"step": 356
},
{
"epoch": 0.65,
"learning_rate": 1.8211642630004358e-05,
"loss": 0.5844,
"step": 357
},
{
"epoch": 0.65,
"learning_rate": 1.8200336382214406e-05,
"loss": 0.5684,
"step": 358
},
{
"epoch": 0.66,
"learning_rate": 1.8188998039949502e-05,
"loss": 0.5786,
"step": 359
},
{
"epoch": 0.66,
"learning_rate": 1.8177627647585644e-05,
"loss": 0.5932,
"step": 360
},
{
"epoch": 0.66,
"learning_rate": 1.8166225249624266e-05,
"loss": 0.5567,
"step": 361
},
{
"epoch": 0.66,
"learning_rate": 1.8154790890692082e-05,
"loss": 0.5618,
"step": 362
},
{
"epoch": 0.66,
"learning_rate": 1.8143324615540878e-05,
"loss": 0.5652,
"step": 363
},
{
"epoch": 0.67,
"learning_rate": 1.813182646904736e-05,
"loss": 0.5642,
"step": 364
},
{
"epoch": 0.67,
"learning_rate": 1.812029649621297e-05,
"loss": 0.5469,
"step": 365
},
{
"epoch": 0.67,
"learning_rate": 1.8108734742163717e-05,
"loss": 0.5477,
"step": 366
},
{
"epoch": 0.67,
"learning_rate": 1.809714125214999e-05,
"loss": 0.5935,
"step": 367
},
{
"epoch": 0.67,
"learning_rate": 1.8085516071546385e-05,
"loss": 0.5882,
"step": 368
},
{
"epoch": 0.67,
"learning_rate": 1.807385924585154e-05,
"loss": 0.5835,
"step": 369
},
{
"epoch": 0.68,
"learning_rate": 1.8062170820687925e-05,
"loss": 0.5568,
"step": 370
},
{
"epoch": 0.68,
"learning_rate": 1.805045084180171e-05,
"loss": 0.561,
"step": 371
},
{
"epoch": 0.68,
"learning_rate": 1.803869935506253e-05,
"loss": 0.5967,
"step": 372
},
{
"epoch": 0.68,
"learning_rate": 1.8026916406463368e-05,
"loss": 0.5683,
"step": 373
},
{
"epoch": 0.68,
"learning_rate": 1.8015102042120314e-05,
"loss": 0.567,
"step": 374
},
{
"epoch": 0.69,
"learning_rate": 1.8003256308272434e-05,
"loss": 0.5758,
"step": 375
},
{
"epoch": 0.69,
"learning_rate": 1.799137925128155e-05,
"loss": 0.5801,
"step": 376
},
{
"epoch": 0.69,
"learning_rate": 1.797947091763209e-05,
"loss": 0.5398,
"step": 377
},
{
"epoch": 0.69,
"learning_rate": 1.7967531353930893e-05,
"loss": 0.5696,
"step": 378
},
{
"epoch": 0.69,
"learning_rate": 1.795556060690701e-05,
"loss": 0.5672,
"step": 379
},
{
"epoch": 0.69,
"learning_rate": 1.7943558723411555e-05,
"loss": 0.5797,
"step": 380
},
{
"epoch": 0.7,
"learning_rate": 1.7931525750417497e-05,
"loss": 0.5604,
"step": 381
},
{
"epoch": 0.7,
"learning_rate": 1.791946173501948e-05,
"loss": 0.5656,
"step": 382
},
{
"epoch": 0.7,
"learning_rate": 1.790736672443365e-05,
"loss": 0.579,
"step": 383
},
{
"epoch": 0.7,
"learning_rate": 1.7895240765997455e-05,
"loss": 0.551,
"step": 384
},
{
"epoch": 0.7,
"learning_rate": 1.788308390716947e-05,
"loss": 0.559,
"step": 385
},
{
"epoch": 0.71,
"learning_rate": 1.7870896195529205e-05,
"loss": 0.5617,
"step": 386
},
{
"epoch": 0.71,
"learning_rate": 1.7858677678776923e-05,
"loss": 0.5522,
"step": 387
},
{
"epoch": 0.71,
"learning_rate": 1.7846428404733456e-05,
"loss": 0.5895,
"step": 388
},
{
"epoch": 0.71,
"learning_rate": 1.7834148421340006e-05,
"loss": 0.5826,
"step": 389
},
{
"epoch": 0.71,
"learning_rate": 1.7821837776657968e-05,
"loss": 0.5902,
"step": 390
},
{
"epoch": 0.71,
"learning_rate": 1.780949651886875e-05,
"loss": 0.5567,
"step": 391
},
{
"epoch": 0.72,
"learning_rate": 1.7797124696273553e-05,
"loss": 0.5509,
"step": 392
},
{
"epoch": 0.72,
"learning_rate": 1.7784722357293225e-05,
"loss": 0.5516,
"step": 393
},
{
"epoch": 0.72,
"learning_rate": 1.7772289550468033e-05,
"loss": 0.5642,
"step": 394
},
{
"epoch": 0.72,
"learning_rate": 1.775982632445749e-05,
"loss": 0.5823,
"step": 395
},
{
"epoch": 0.72,
"learning_rate": 1.774733272804017e-05,
"loss": 0.5602,
"step": 396
},
{
"epoch": 0.73,
"learning_rate": 1.7734808810113512e-05,
"loss": 0.5983,
"step": 397
},
{
"epoch": 0.73,
"learning_rate": 1.772225461969362e-05,
"loss": 0.5519,
"step": 398
},
{
"epoch": 0.73,
"learning_rate": 1.7709670205915084e-05,
"loss": 0.5914,
"step": 399
},
{
"epoch": 0.73,
"learning_rate": 1.7697055618030777e-05,
"loss": 0.5604,
"step": 400
},
{
"epoch": 0.73,
"learning_rate": 1.7684410905411675e-05,
"loss": 0.5462,
"step": 401
},
{
"epoch": 0.74,
"learning_rate": 1.7671736117546646e-05,
"loss": 0.534,
"step": 402
},
{
"epoch": 0.74,
"learning_rate": 1.765903130404228e-05,
"loss": 0.5752,
"step": 403
},
{
"epoch": 0.74,
"learning_rate": 1.7646296514622666e-05,
"loss": 0.5551,
"step": 404
},
{
"epoch": 0.74,
"learning_rate": 1.7633531799129227e-05,
"loss": 0.5619,
"step": 405
},
{
"epoch": 0.74,
"learning_rate": 1.76207372075205e-05,
"loss": 0.5663,
"step": 406
},
{
"epoch": 0.74,
"learning_rate": 1.7607912789871956e-05,
"loss": 0.562,
"step": 407
},
{
"epoch": 0.75,
"learning_rate": 1.75950585963758e-05,
"loss": 0.5543,
"step": 408
},
{
"epoch": 0.75,
"learning_rate": 1.7582174677340767e-05,
"loss": 0.5614,
"step": 409
},
{
"epoch": 0.75,
"learning_rate": 1.7569261083191942e-05,
"loss": 0.5856,
"step": 410
},
{
"epoch": 0.75,
"learning_rate": 1.7556317864470543e-05,
"loss": 0.6035,
"step": 411
},
{
"epoch": 0.75,
"learning_rate": 1.7543345071833745e-05,
"loss": 0.5694,
"step": 412
},
{
"epoch": 0.76,
"learning_rate": 1.7530342756054452e-05,
"loss": 0.5486,
"step": 413
},
{
"epoch": 0.76,
"learning_rate": 1.751731096802113e-05,
"loss": 0.567,
"step": 414
},
{
"epoch": 0.76,
"learning_rate": 1.7504249758737587e-05,
"loss": 0.5631,
"step": 415
},
{
"epoch": 0.76,
"learning_rate": 1.7491159179322785e-05,
"loss": 0.5486,
"step": 416
},
{
"epoch": 0.76,
"learning_rate": 1.747803928101062e-05,
"loss": 0.5754,
"step": 417
},
{
"epoch": 0.76,
"learning_rate": 1.746489011514976e-05,
"loss": 0.5776,
"step": 418
},
{
"epoch": 0.77,
"learning_rate": 1.7451711733203406e-05,
"loss": 0.5888,
"step": 419
},
{
"epoch": 0.77,
"learning_rate": 1.7438504186749105e-05,
"loss": 0.5417,
"step": 420
},
{
"epoch": 0.77,
"learning_rate": 1.7425267527478543e-05,
"loss": 0.5689,
"step": 421
},
{
"epoch": 0.77,
"learning_rate": 1.7412001807197362e-05,
"loss": 0.5896,
"step": 422
},
{
"epoch": 0.77,
"learning_rate": 1.7398707077824936e-05,
"loss": 0.5549,
"step": 423
},
{
"epoch": 0.78,
"learning_rate": 1.7385383391394174e-05,
"loss": 0.5503,
"step": 424
},
{
"epoch": 0.78,
"learning_rate": 1.737203080005131e-05,
"loss": 0.5957,
"step": 425
},
{
"epoch": 0.78,
"learning_rate": 1.735864935605572e-05,
"loss": 0.5354,
"step": 426
},
{
"epoch": 0.78,
"learning_rate": 1.7345239111779697e-05,
"loss": 0.558,
"step": 427
},
{
"epoch": 0.78,
"learning_rate": 1.733180011970825e-05,
"loss": 0.5548,
"step": 428
},
{
"epoch": 0.78,
"learning_rate": 1.7318332432438906e-05,
"loss": 0.5616,
"step": 429
},
{
"epoch": 0.79,
"learning_rate": 1.7304836102681494e-05,
"loss": 0.5659,
"step": 430
},
{
"epoch": 0.79,
"learning_rate": 1.729131118325795e-05,
"loss": 0.594,
"step": 431
},
{
"epoch": 0.79,
"learning_rate": 1.72777577271021e-05,
"loss": 0.5509,
"step": 432
},
{
"epoch": 0.79,
"learning_rate": 1.726417578725946e-05,
"loss": 0.5784,
"step": 433
},
{
"epoch": 0.79,
"learning_rate": 1.7250565416887016e-05,
"loss": 0.575,
"step": 434
},
{
"epoch": 0.8,
"learning_rate": 1.7236926669253043e-05,
"loss": 0.5795,
"step": 435
},
{
"epoch": 0.8,
"learning_rate": 1.7223259597736863e-05,
"loss": 0.5651,
"step": 436
},
{
"epoch": 0.8,
"learning_rate": 1.7209564255828653e-05,
"loss": 0.549,
"step": 437
},
{
"epoch": 0.8,
"learning_rate": 1.7195840697129252e-05,
"loss": 0.5601,
"step": 438
},
{
"epoch": 0.8,
"learning_rate": 1.718208897534991e-05,
"loss": 0.5688,
"step": 439
},
{
"epoch": 0.8,
"learning_rate": 1.716830914431212e-05,
"loss": 0.5515,
"step": 440
},
{
"epoch": 0.81,
"learning_rate": 1.715450125794738e-05,
"loss": 0.5623,
"step": 441
},
{
"epoch": 0.81,
"learning_rate": 1.7140665370296995e-05,
"loss": 0.5534,
"step": 442
},
{
"epoch": 0.81,
"learning_rate": 1.7126801535511854e-05,
"loss": 0.5982,
"step": 443
},
{
"epoch": 0.81,
"learning_rate": 1.7112909807852237e-05,
"loss": 0.5784,
"step": 444
},
{
"epoch": 0.81,
"learning_rate": 1.709899024168758e-05,
"loss": 0.5545,
"step": 445
},
{
"epoch": 0.82,
"learning_rate": 1.7085042891496283e-05,
"loss": 0.5658,
"step": 446
},
{
"epoch": 0.82,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.5827,
"step": 447
},
{
"epoch": 0.82,
"learning_rate": 1.705706505749083e-05,
"loss": 0.5529,
"step": 448
},
{
"epoch": 0.82,
"learning_rate": 1.704303468317632e-05,
"loss": 0.5623,
"step": 449
},
{
"epoch": 0.82,
"learning_rate": 1.702897674383402e-05,
"loss": 0.5638,
"step": 450
},
{
"epoch": 0.82,
"learning_rate": 1.7014891294483893e-05,
"loss": 0.5845,
"step": 451
},
{
"epoch": 0.83,
"learning_rate": 1.7000778390253566e-05,
"loss": 0.5605,
"step": 452
},
{
"epoch": 0.83,
"learning_rate": 1.6986638086378124e-05,
"loss": 0.5594,
"step": 453
},
{
"epoch": 0.83,
"learning_rate": 1.6972470438199883e-05,
"loss": 0.5823,
"step": 454
},
{
"epoch": 0.83,
"learning_rate": 1.6958275501168183e-05,
"loss": 0.5924,
"step": 455
},
{
"epoch": 0.83,
"learning_rate": 1.6944053330839164e-05,
"loss": 0.5598,
"step": 456
},
{
"epoch": 0.84,
"learning_rate": 1.692980398287555e-05,
"loss": 0.5459,
"step": 457
},
{
"epoch": 0.84,
"learning_rate": 1.6915527513046445e-05,
"loss": 0.5401,
"step": 458
},
{
"epoch": 0.84,
"learning_rate": 1.690122397722709e-05,
"loss": 0.5689,
"step": 459
},
{
"epoch": 0.84,
"learning_rate": 1.6886893431398664e-05,
"loss": 0.586,
"step": 460
},
{
"epoch": 0.84,
"learning_rate": 1.6872535931648046e-05,
"loss": 0.5453,
"step": 461
},
{
"epoch": 0.84,
"learning_rate": 1.685815153416762e-05,
"loss": 0.549,
"step": 462
},
{
"epoch": 0.85,
"learning_rate": 1.6843740295255044e-05,
"loss": 0.5612,
"step": 463
},
{
"epoch": 0.85,
"learning_rate": 1.6829302271313012e-05,
"loss": 0.5649,
"step": 464
},
{
"epoch": 0.85,
"learning_rate": 1.6814837518849064e-05,
"loss": 0.5521,
"step": 465
},
{
"epoch": 0.85,
"learning_rate": 1.680034609447535e-05,
"loss": 0.5812,
"step": 466
},
{
"epoch": 0.85,
"learning_rate": 1.6785828054908393e-05,
"loss": 0.5477,
"step": 467
},
{
"epoch": 0.86,
"learning_rate": 1.6771283456968905e-05,
"loss": 0.5471,
"step": 468
},
{
"epoch": 0.86,
"learning_rate": 1.6756712357581526e-05,
"loss": 0.5506,
"step": 469
},
{
"epoch": 0.86,
"learning_rate": 1.674211481377462e-05,
"loss": 0.5484,
"step": 470
},
{
"epoch": 0.86,
"learning_rate": 1.6727490882680052e-05,
"loss": 0.5591,
"step": 471
},
{
"epoch": 0.86,
"learning_rate": 1.6712840621532963e-05,
"loss": 0.5707,
"step": 472
},
{
"epoch": 0.86,
"learning_rate": 1.669816408767154e-05,
"loss": 0.5579,
"step": 473
},
{
"epoch": 0.87,
"learning_rate": 1.66834613385368e-05,
"loss": 0.5469,
"step": 474
},
{
"epoch": 0.87,
"learning_rate": 1.6668732431672357e-05,
"loss": 0.5561,
"step": 475
},
{
"epoch": 0.87,
"learning_rate": 1.6653977424724208e-05,
"loss": 0.5755,
"step": 476
},
{
"epoch": 0.87,
"learning_rate": 1.663919637544049e-05,
"loss": 0.5522,
"step": 477
},
{
"epoch": 0.87,
"learning_rate": 1.662438934167128e-05,
"loss": 0.5507,
"step": 478
},
{
"epoch": 0.88,
"learning_rate": 1.660955638136834e-05,
"loss": 0.5565,
"step": 479
},
{
"epoch": 0.88,
"learning_rate": 1.65946975525849e-05,
"loss": 0.5865,
"step": 480
},
{
"epoch": 0.88,
"learning_rate": 1.657981291347545e-05,
"loss": 0.5546,
"step": 481
},
{
"epoch": 0.88,
"learning_rate": 1.6564902522295484e-05,
"loss": 0.5583,
"step": 482
},
{
"epoch": 0.88,
"learning_rate": 1.654996643740129e-05,
"loss": 0.5629,
"step": 483
},
{
"epoch": 0.89,
"learning_rate": 1.6535004717249713e-05,
"loss": 0.5689,
"step": 484
},
{
"epoch": 0.89,
"learning_rate": 1.6520017420397933e-05,
"loss": 0.5913,
"step": 485
},
{
"epoch": 0.89,
"learning_rate": 1.6505004605503227e-05,
"loss": 0.556,
"step": 486
},
{
"epoch": 0.89,
"learning_rate": 1.648996633132274e-05,
"loss": 0.5533,
"step": 487
},
{
"epoch": 0.89,
"learning_rate": 1.647490265671328e-05,
"loss": 0.5738,
"step": 488
},
{
"epoch": 0.89,
"learning_rate": 1.645981364063105e-05,
"loss": 0.5692,
"step": 489
},
{
"epoch": 0.9,
"learning_rate": 1.644469934213143e-05,
"loss": 0.572,
"step": 490
},
{
"epoch": 0.9,
"learning_rate": 1.642955982036877e-05,
"loss": 0.5626,
"step": 491
},
{
"epoch": 0.9,
"learning_rate": 1.641439513459612e-05,
"loss": 0.5657,
"step": 492
},
{
"epoch": 0.9,
"learning_rate": 1.6399205344165022e-05,
"loss": 0.566,
"step": 493
},
{
"epoch": 0.9,
"learning_rate": 1.6383990508525283e-05,
"loss": 0.5658,
"step": 494
},
{
"epoch": 0.91,
"learning_rate": 1.636875068722472e-05,
"loss": 0.5354,
"step": 495
},
{
"epoch": 0.91,
"learning_rate": 1.635348593990894e-05,
"loss": 0.5572,
"step": 496
},
{
"epoch": 0.91,
"learning_rate": 1.633819632632111e-05,
"loss": 0.5348,
"step": 497
},
{
"epoch": 0.91,
"learning_rate": 1.6322881906301724e-05,
"loss": 0.5562,
"step": 498
},
{
"epoch": 0.91,
"learning_rate": 1.630754273978834e-05,
"loss": 0.5529,
"step": 499
},
{
"epoch": 0.91,
"learning_rate": 1.62921788868154e-05,
"loss": 0.5645,
"step": 500
},
{
"epoch": 0.92,
"learning_rate": 1.6276790407513943e-05,
"loss": 0.5347,
"step": 501
},
{
"epoch": 0.92,
"learning_rate": 1.6261377362111396e-05,
"loss": 0.5566,
"step": 502
},
{
"epoch": 0.92,
"learning_rate": 1.6245939810931336e-05,
"loss": 0.5584,
"step": 503
},
{
"epoch": 0.92,
"learning_rate": 1.623047781439324e-05,
"loss": 0.5484,
"step": 504
},
{
"epoch": 0.92,
"learning_rate": 1.6214991433012274e-05,
"loss": 0.584,
"step": 505
},
{
"epoch": 0.93,
"learning_rate": 1.6199480727399035e-05,
"loss": 0.5531,
"step": 506
},
{
"epoch": 0.93,
"learning_rate": 1.6183945758259316e-05,
"loss": 0.5728,
"step": 507
},
{
"epoch": 0.93,
"learning_rate": 1.616838658639388e-05,
"loss": 0.5342,
"step": 508
},
{
"epoch": 0.93,
"learning_rate": 1.615280327269821e-05,
"loss": 0.566,
"step": 509
},
{
"epoch": 0.93,
"learning_rate": 1.6137195878162267e-05,
"loss": 0.5672,
"step": 510
},
{
"epoch": 0.93,
"learning_rate": 1.6121564463870285e-05,
"loss": 0.6008,
"step": 511
},
{
"epoch": 0.94,
"learning_rate": 1.610590909100048e-05,
"loss": 0.5723,
"step": 512
},
{
"epoch": 0.94,
"learning_rate": 1.6090229820824846e-05,
"loss": 0.5521,
"step": 513
},
{
"epoch": 0.94,
"learning_rate": 1.6074526714708913e-05,
"loss": 0.5584,
"step": 514
},
{
"epoch": 0.94,
"learning_rate": 1.605879983411149e-05,
"loss": 0.5813,
"step": 515
},
{
"epoch": 0.94,
"learning_rate": 1.6043049240584445e-05,
"loss": 0.5368,
"step": 516
},
{
"epoch": 0.95,
"learning_rate": 1.6027274995772445e-05,
"loss": 0.5588,
"step": 517
},
{
"epoch": 0.95,
"learning_rate": 1.6011477161412724e-05,
"loss": 0.5642,
"step": 518
},
{
"epoch": 0.95,
"learning_rate": 1.5995655799334845e-05,
"loss": 0.5855,
"step": 519
},
{
"epoch": 0.95,
"learning_rate": 1.597981097146045e-05,
"loss": 0.5612,
"step": 520
},
{
"epoch": 0.95,
"learning_rate": 1.5963942739803028e-05,
"loss": 0.5509,
"step": 521
},
{
"epoch": 0.95,
"learning_rate": 1.594805116646766e-05,
"loss": 0.568,
"step": 522
},
{
"epoch": 0.96,
"learning_rate": 1.5932136313650776e-05,
"loss": 0.567,
"step": 523
},
{
"epoch": 0.96,
"learning_rate": 1.5916198243639933e-05,
"loss": 0.5717,
"step": 524
},
{
"epoch": 0.96,
"learning_rate": 1.590023701881354e-05,
"loss": 0.5676,
"step": 525
},
{
"epoch": 0.96,
"learning_rate": 1.5884252701640638e-05,
"loss": 0.5679,
"step": 526
},
{
"epoch": 0.96,
"learning_rate": 1.5868245354680642e-05,
"loss": 0.5579,
"step": 527
},
{
"epoch": 0.97,
"learning_rate": 1.585221504058311e-05,
"loss": 0.5628,
"step": 528
},
{
"epoch": 0.97,
"learning_rate": 1.5836161822087474e-05,
"loss": 0.5745,
"step": 529
},
{
"epoch": 0.97,
"learning_rate": 1.5820085762022827e-05,
"loss": 0.5609,
"step": 530
},
{
"epoch": 0.97,
"learning_rate": 1.580398692330764e-05,
"loss": 0.5385,
"step": 531
},
{
"epoch": 0.97,
"learning_rate": 1.578786536894955e-05,
"loss": 0.5622,
"step": 532
},
{
"epoch": 0.97,
"learning_rate": 1.577172116204509e-05,
"loss": 0.5523,
"step": 533
},
{
"epoch": 0.98,
"learning_rate": 1.5755554365779458e-05,
"loss": 0.5268,
"step": 534
},
{
"epoch": 0.98,
"learning_rate": 1.5739365043426256e-05,
"loss": 0.5557,
"step": 535
},
{
"epoch": 0.98,
"learning_rate": 1.572315325834725e-05,
"loss": 0.557,
"step": 536
},
{
"epoch": 0.98,
"learning_rate": 1.5706919073992125e-05,
"loss": 0.5497,
"step": 537
},
{
"epoch": 0.98,
"learning_rate": 1.5690662553898224e-05,
"loss": 0.5465,
"step": 538
},
{
"epoch": 0.99,
"learning_rate": 1.5674383761690316e-05,
"loss": 0.5601,
"step": 539
},
{
"epoch": 0.99,
"learning_rate": 1.5658082761080332e-05,
"loss": 0.5325,
"step": 540
},
{
"epoch": 0.99,
"learning_rate": 1.5641759615867127e-05,
"loss": 0.5747,
"step": 541
},
{
"epoch": 0.99,
"learning_rate": 1.562541438993622e-05,
"loss": 0.5634,
"step": 542
},
{
"epoch": 0.99,
"learning_rate": 1.560904714725956e-05,
"loss": 0.5313,
"step": 543
},
{
"epoch": 0.99,
"learning_rate": 1.559265795189525e-05,
"loss": 0.5649,
"step": 544
},
{
"epoch": 1.0,
"learning_rate": 1.5576246867987324e-05,
"loss": 0.5602,
"step": 545
},
{
"epoch": 1.0,
"learning_rate": 1.5559813959765482e-05,
"loss": 0.5628,
"step": 546
},
{
"epoch": 1.0,
"learning_rate": 1.554335929154483e-05,
"loss": 0.5351,
"step": 547
},
{
"epoch": 1.0,
"learning_rate": 1.5526882927725652e-05,
"loss": 0.4116,
"step": 548
},
{
"epoch": 1.0,
"learning_rate": 1.5510384932793132e-05,
"loss": 0.3934,
"step": 549
},
{
"epoch": 1.01,
"learning_rate": 1.5493865371317125e-05,
"loss": 0.3794,
"step": 550
},
{
"epoch": 1.01,
"learning_rate": 1.5477324307951883e-05,
"loss": 0.4035,
"step": 551
},
{
"epoch": 1.01,
"learning_rate": 1.5460761807435817e-05,
"loss": 0.3856,
"step": 552
},
{
"epoch": 1.01,
"learning_rate": 1.5444177934591242e-05,
"loss": 0.3793,
"step": 553
},
{
"epoch": 1.01,
"learning_rate": 1.542757275432411e-05,
"loss": 0.3793,
"step": 554
},
{
"epoch": 1.01,
"learning_rate": 1.541094633162378e-05,
"loss": 0.4176,
"step": 555
},
{
"epoch": 1.02,
"learning_rate": 1.5394298731562736e-05,
"loss": 0.3934,
"step": 556
},
{
"epoch": 1.02,
"learning_rate": 1.5377630019296358e-05,
"loss": 0.3836,
"step": 557
},
{
"epoch": 1.02,
"learning_rate": 1.536094026006264e-05,
"loss": 0.3855,
"step": 558
},
{
"epoch": 1.02,
"learning_rate": 1.5344229519181964e-05,
"loss": 0.3732,
"step": 559
},
{
"epoch": 1.02,
"learning_rate": 1.5327497862056825e-05,
"loss": 0.384,
"step": 560
},
{
"epoch": 1.03,
"learning_rate": 1.5310745354171576e-05,
"loss": 0.3938,
"step": 561
},
{
"epoch": 1.03,
"learning_rate": 1.5293972061092187e-05,
"loss": 0.3776,
"step": 562
},
{
"epoch": 1.03,
"learning_rate": 1.5277178048465958e-05,
"loss": 0.3957,
"step": 563
},
{
"epoch": 1.03,
"learning_rate": 1.52603633820213e-05,
"loss": 0.36,
"step": 564
},
{
"epoch": 1.03,
"learning_rate": 1.524352812756745e-05,
"loss": 0.3807,
"step": 565
},
{
"epoch": 1.03,
"learning_rate": 1.5226672350994222e-05,
"loss": 0.3937,
"step": 566
},
{
"epoch": 1.04,
"learning_rate": 1.5209796118271753e-05,
"loss": 0.3751,
"step": 567
},
{
"epoch": 1.04,
"learning_rate": 1.5192899495450237e-05,
"loss": 0.3935,
"step": 568
},
{
"epoch": 1.04,
"learning_rate": 1.5175982548659675e-05,
"loss": 0.3704,
"step": 569
},
{
"epoch": 1.04,
"learning_rate": 1.5159045344109613e-05,
"loss": 0.3622,
"step": 570
},
{
"epoch": 1.04,
"learning_rate": 1.5142087948088873e-05,
"loss": 0.3732,
"step": 571
},
{
"epoch": 1.05,
"learning_rate": 1.512511042696531e-05,
"loss": 0.3859,
"step": 572
},
{
"epoch": 1.05,
"learning_rate": 1.5108112847185545e-05,
"loss": 0.3515,
"step": 573
},
{
"epoch": 1.05,
"learning_rate": 1.5091095275274701e-05,
"loss": 0.3677,
"step": 574
},
{
"epoch": 1.05,
"learning_rate": 1.5074057777836148e-05,
"loss": 0.3912,
"step": 575
},
{
"epoch": 1.05,
"learning_rate": 1.505700042155124e-05,
"loss": 0.3717,
"step": 576
},
{
"epoch": 1.06,
"learning_rate": 1.5039923273179055e-05,
"loss": 0.3761,
"step": 577
},
{
"epoch": 1.06,
"learning_rate": 1.5022826399556135e-05,
"loss": 0.3749,
"step": 578
},
{
"epoch": 1.06,
"learning_rate": 1.5005709867596216e-05,
"loss": 0.3833,
"step": 579
},
{
"epoch": 1.06,
"learning_rate": 1.498857374428998e-05,
"loss": 0.3908,
"step": 580
},
{
"epoch": 1.06,
"learning_rate": 1.4971418096704784e-05,
"loss": 0.3764,
"step": 581
},
{
"epoch": 1.06,
"learning_rate": 1.4954242991984398e-05,
"loss": 0.3663,
"step": 582
},
{
"epoch": 1.07,
"learning_rate": 1.4937048497348743e-05,
"loss": 0.3787,
"step": 583
},
{
"epoch": 1.07,
"learning_rate": 1.4919834680093628e-05,
"loss": 0.3826,
"step": 584
},
{
"epoch": 1.07,
"learning_rate": 1.4902601607590489e-05,
"loss": 0.3784,
"step": 585
},
{
"epoch": 1.07,
"learning_rate": 1.4885349347286118e-05,
"loss": 0.386,
"step": 586
},
{
"epoch": 1.07,
"learning_rate": 1.4868077966702414e-05,
"loss": 0.3729,
"step": 587
},
{
"epoch": 1.08,
"learning_rate": 1.4850787533436101e-05,
"loss": 0.377,
"step": 588
},
{
"epoch": 1.08,
"learning_rate": 1.4833478115158472e-05,
"loss": 0.4029,
"step": 589
},
{
"epoch": 1.08,
"learning_rate": 1.4816149779615128e-05,
"loss": 0.376,
"step": 590
},
{
"epoch": 1.08,
"learning_rate": 1.47988025946257e-05,
"loss": 0.3731,
"step": 591
},
{
"epoch": 1.08,
"learning_rate": 1.4781436628083601e-05,
"loss": 0.3813,
"step": 592
},
{
"epoch": 1.08,
"learning_rate": 1.4764051947955746e-05,
"loss": 0.3864,
"step": 593
},
{
"epoch": 1.09,
"learning_rate": 1.4746648622282294e-05,
"loss": 0.3943,
"step": 594
},
{
"epoch": 1.09,
"learning_rate": 1.4729226719176377e-05,
"loss": 0.3903,
"step": 595
},
{
"epoch": 1.09,
"learning_rate": 1.4711786306823838e-05,
"loss": 0.3799,
"step": 596
},
{
"epoch": 1.09,
"learning_rate": 1.4694327453482956e-05,
"loss": 0.385,
"step": 597
},
{
"epoch": 1.09,
"learning_rate": 1.4676850227484191e-05,
"loss": 0.3902,
"step": 598
},
{
"epoch": 1.1,
"learning_rate": 1.4659354697229903e-05,
"loss": 0.3802,
"step": 599
},
{
"epoch": 1.1,
"learning_rate": 1.46418409311941e-05,
"loss": 0.3914,
"step": 600
},
{
"epoch": 1.1,
"learning_rate": 1.4624308997922151e-05,
"loss": 0.3899,
"step": 601
},
{
"epoch": 1.1,
"learning_rate": 1.4606758966030536e-05,
"loss": 0.4017,
"step": 602
},
{
"epoch": 1.1,
"learning_rate": 1.4589190904206565e-05,
"loss": 0.38,
"step": 603
},
{
"epoch": 1.1,
"learning_rate": 1.4571604881208118e-05,
"loss": 0.3905,
"step": 604
},
{
"epoch": 1.11,
"learning_rate": 1.4554000965863367e-05,
"loss": 0.3738,
"step": 605
},
{
"epoch": 1.11,
"learning_rate": 1.453637922707051e-05,
"loss": 0.3874,
"step": 606
},
{
"epoch": 1.11,
"learning_rate": 1.4518739733797507e-05,
"loss": 0.3871,
"step": 607
},
{
"epoch": 1.11,
"learning_rate": 1.4501082555081802e-05,
"loss": 0.3947,
"step": 608
},
{
"epoch": 1.11,
"learning_rate": 1.448340776003006e-05,
"loss": 0.3709,
"step": 609
},
{
"epoch": 1.12,
"learning_rate": 1.4465715417817889e-05,
"loss": 0.3882,
"step": 610
},
{
"epoch": 1.12,
"learning_rate": 1.4448005597689573e-05,
"loss": 0.3874,
"step": 611
},
{
"epoch": 1.12,
"learning_rate": 1.4430278368957809e-05,
"loss": 0.3688,
"step": 612
},
{
"epoch": 1.12,
"learning_rate": 1.4412533801003412e-05,
"loss": 0.3661,
"step": 613
},
{
"epoch": 1.12,
"learning_rate": 1.4394771963275079e-05,
"loss": 0.3673,
"step": 614
},
{
"epoch": 1.12,
"learning_rate": 1.4376992925289077e-05,
"loss": 0.3727,
"step": 615
},
{
"epoch": 1.13,
"learning_rate": 1.4359196756629006e-05,
"loss": 0.4,
"step": 616
},
{
"epoch": 1.13,
"learning_rate": 1.4341383526945507e-05,
"loss": 0.3828,
"step": 617
},
{
"epoch": 1.13,
"learning_rate": 1.4323553305956e-05,
"loss": 0.3674,
"step": 618
},
{
"epoch": 1.13,
"learning_rate": 1.4305706163444391e-05,
"loss": 0.3801,
"step": 619
},
{
"epoch": 1.13,
"learning_rate": 1.4287842169260827e-05,
"loss": 0.3809,
"step": 620
},
{
"epoch": 1.14,
"learning_rate": 1.426996139332141e-05,
"loss": 0.3966,
"step": 621
},
{
"epoch": 1.14,
"learning_rate": 1.425206390560791e-05,
"loss": 0.3815,
"step": 622
},
{
"epoch": 1.14,
"learning_rate": 1.4234149776167518e-05,
"loss": 0.3607,
"step": 623
},
{
"epoch": 1.14,
"learning_rate": 1.4216219075112542e-05,
"loss": 0.3858,
"step": 624
},
{
"epoch": 1.14,
"learning_rate": 1.4198271872620162e-05,
"loss": 0.3859,
"step": 625
},
{
"epoch": 1.14,
"learning_rate": 1.4180308238932137e-05,
"loss": 0.3776,
"step": 626
},
{
"epoch": 1.15,
"learning_rate": 1.4162328244354528e-05,
"loss": 0.3681,
"step": 627
},
{
"epoch": 1.15,
"learning_rate": 1.4144331959257438e-05,
"loss": 0.381,
"step": 628
},
{
"epoch": 1.15,
"learning_rate": 1.4126319454074725e-05,
"loss": 0.3737,
"step": 629
},
{
"epoch": 1.15,
"learning_rate": 1.4108290799303721e-05,
"loss": 0.3595,
"step": 630
},
{
"epoch": 1.15,
"learning_rate": 1.4090246065504981e-05,
"loss": 0.3937,
"step": 631
},
{
"epoch": 1.16,
"learning_rate": 1.4072185323301977e-05,
"loss": 0.3699,
"step": 632
},
{
"epoch": 1.16,
"learning_rate": 1.4054108643380838e-05,
"loss": 0.3801,
"step": 633
},
{
"epoch": 1.16,
"learning_rate": 1.4036016096490066e-05,
"loss": 0.3492,
"step": 634
},
{
"epoch": 1.16,
"learning_rate": 1.4017907753440278e-05,
"loss": 0.3773,
"step": 635
},
{
"epoch": 1.16,
"learning_rate": 1.3999783685103893e-05,
"loss": 0.3756,
"step": 636
},
{
"epoch": 1.16,
"learning_rate": 1.398164396241489e-05,
"loss": 0.384,
"step": 637
},
{
"epoch": 1.17,
"learning_rate": 1.3963488656368518e-05,
"loss": 0.4027,
"step": 638
},
{
"epoch": 1.17,
"learning_rate": 1.3945317838021001e-05,
"loss": 0.3945,
"step": 639
},
{
"epoch": 1.17,
"learning_rate": 1.3927131578489292e-05,
"loss": 0.3563,
"step": 640
},
{
"epoch": 1.17,
"learning_rate": 1.3908929948950763e-05,
"loss": 0.384,
"step": 641
},
{
"epoch": 1.17,
"learning_rate": 1.3890713020642951e-05,
"loss": 0.385,
"step": 642
},
{
"epoch": 1.18,
"learning_rate": 1.3872480864863272e-05,
"loss": 0.402,
"step": 643
},
{
"epoch": 1.18,
"learning_rate": 1.3854233552968726e-05,
"loss": 0.3818,
"step": 644
},
{
"epoch": 1.18,
"learning_rate": 1.383597115637564e-05,
"loss": 0.3692,
"step": 645
},
{
"epoch": 1.18,
"learning_rate": 1.3817693746559382e-05,
"loss": 0.3782,
"step": 646
},
{
"epoch": 1.18,
"learning_rate": 1.3799401395054073e-05,
"loss": 0.3891,
"step": 647
},
{
"epoch": 1.18,
"learning_rate": 1.3781094173452316e-05,
"loss": 0.3803,
"step": 648
},
{
"epoch": 1.19,
"learning_rate": 1.3762772153404909e-05,
"loss": 0.3734,
"step": 649
},
{
"epoch": 1.19,
"learning_rate": 1.3744435406620571e-05,
"loss": 0.3802,
"step": 650
},
{
"epoch": 1.19,
"learning_rate": 1.3726084004865659e-05,
"loss": 0.383,
"step": 651
},
{
"epoch": 1.19,
"learning_rate": 1.3707718019963887e-05,
"loss": 0.3854,
"step": 652
},
{
"epoch": 1.19,
"learning_rate": 1.3689337523796043e-05,
"loss": 0.3889,
"step": 653
},
{
"epoch": 1.2,
"learning_rate": 1.3670942588299708e-05,
"loss": 0.3806,
"step": 654
},
{
"epoch": 1.2,
"learning_rate": 1.3652533285468972e-05,
"loss": 0.3754,
"step": 655
},
{
"epoch": 1.2,
"learning_rate": 1.3634109687354169e-05,
"loss": 0.3696,
"step": 656
},
{
"epoch": 1.2,
"learning_rate": 1.3615671866061573e-05,
"loss": 0.3823,
"step": 657
},
{
"epoch": 1.2,
"learning_rate": 1.3597219893753119e-05,
"loss": 0.3897,
"step": 658
},
{
"epoch": 1.21,
"learning_rate": 1.3578753842646137e-05,
"loss": 0.3893,
"step": 659
},
{
"epoch": 1.21,
"learning_rate": 1.3560273785013057e-05,
"loss": 0.3894,
"step": 660
},
{
"epoch": 1.21,
"learning_rate": 1.3541779793181122e-05,
"loss": 0.3959,
"step": 661
},
{
"epoch": 1.21,
"learning_rate": 1.3523271939532112e-05,
"loss": 0.3988,
"step": 662
},
{
"epoch": 1.21,
"learning_rate": 1.3504750296502064e-05,
"loss": 0.3687,
"step": 663
},
{
"epoch": 1.21,
"learning_rate": 1.3486214936580977e-05,
"loss": 0.3587,
"step": 664
},
{
"epoch": 1.22,
"learning_rate": 1.3467665932312545e-05,
"loss": 0.3789,
"step": 665
},
{
"epoch": 1.22,
"learning_rate": 1.3449103356293853e-05,
"loss": 0.3779,
"step": 666
},
{
"epoch": 1.22,
"learning_rate": 1.343052728117511e-05,
"loss": 0.3618,
"step": 667
},
{
"epoch": 1.22,
"learning_rate": 1.341193777965935e-05,
"loss": 0.3809,
"step": 668
},
{
"epoch": 1.22,
"learning_rate": 1.3393334924502168e-05,
"loss": 0.3936,
"step": 669
},
{
"epoch": 1.23,
"learning_rate": 1.3374718788511412e-05,
"loss": 0.403,
"step": 670
},
{
"epoch": 1.23,
"learning_rate": 1.3356089444546906e-05,
"loss": 0.3877,
"step": 671
},
{
"epoch": 1.23,
"learning_rate": 1.3337446965520183e-05,
"loss": 0.3733,
"step": 672
},
{
"epoch": 1.23,
"learning_rate": 1.3318791424394161e-05,
"loss": 0.3867,
"step": 673
},
{
"epoch": 1.23,
"learning_rate": 1.330012289418291e-05,
"loss": 0.3893,
"step": 674
},
{
"epoch": 1.23,
"learning_rate": 1.3281441447951304e-05,
"loss": 0.3911,
"step": 675
},
{
"epoch": 1.24,
"learning_rate": 1.3262747158814791e-05,
"loss": 0.3739,
"step": 676
},
{
"epoch": 1.24,
"learning_rate": 1.3244040099939078e-05,
"loss": 0.3748,
"step": 677
},
{
"epoch": 1.24,
"learning_rate": 1.3225320344539845e-05,
"loss": 0.3797,
"step": 678
},
{
"epoch": 1.24,
"learning_rate": 1.3206587965882465e-05,
"loss": 0.3707,
"step": 679
},
{
"epoch": 1.24,
"learning_rate": 1.3187843037281725e-05,
"loss": 0.38,
"step": 680
},
{
"epoch": 1.25,
"learning_rate": 1.3169085632101515e-05,
"loss": 0.3798,
"step": 681
},
{
"epoch": 1.25,
"learning_rate": 1.3150315823754572e-05,
"loss": 0.3683,
"step": 682
},
{
"epoch": 1.25,
"learning_rate": 1.313153368570216e-05,
"loss": 0.3678,
"step": 683
},
{
"epoch": 1.25,
"learning_rate": 1.3112739291453812e-05,
"loss": 0.3823,
"step": 684
},
{
"epoch": 1.25,
"learning_rate": 1.3093932714567026e-05,
"loss": 0.3803,
"step": 685
},
{
"epoch": 1.25,
"learning_rate": 1.3075114028646976e-05,
"loss": 0.4052,
"step": 686
},
{
"epoch": 1.26,
"learning_rate": 1.3056283307346234e-05,
"loss": 0.3622,
"step": 687
},
{
"epoch": 1.26,
"learning_rate": 1.3037440624364468e-05,
"loss": 0.3754,
"step": 688
},
{
"epoch": 1.26,
"learning_rate": 1.3018586053448173e-05,
"loss": 0.3776,
"step": 689
},
{
"epoch": 1.26,
"learning_rate": 1.299971966839036e-05,
"loss": 0.3856,
"step": 690
},
{
"epoch": 1.26,
"learning_rate": 1.298084154303029e-05,
"loss": 0.3818,
"step": 691
},
{
"epoch": 1.27,
"learning_rate": 1.2961951751253158e-05,
"loss": 0.3781,
"step": 692
},
{
"epoch": 1.27,
"learning_rate": 1.2943050366989833e-05,
"loss": 0.3672,
"step": 693
},
{
"epoch": 1.27,
"learning_rate": 1.292413746421655e-05,
"loss": 0.3942,
"step": 694
},
{
"epoch": 1.27,
"learning_rate": 1.290521311695462e-05,
"loss": 0.3921,
"step": 695
},
{
"epoch": 1.27,
"learning_rate": 1.2886277399270153e-05,
"loss": 0.3753,
"step": 696
},
{
"epoch": 1.27,
"learning_rate": 1.2867330385273756e-05,
"loss": 0.3896,
"step": 697
},
{
"epoch": 1.28,
"learning_rate": 1.2848372149120248e-05,
"loss": 0.3903,
"step": 698
},
{
"epoch": 1.28,
"learning_rate": 1.282940276500837e-05,
"loss": 0.3835,
"step": 699
},
{
"epoch": 1.28,
"learning_rate": 1.2810422307180497e-05,
"loss": 0.3968,
"step": 700
},
{
"epoch": 1.28,
"learning_rate": 1.2791430849922335e-05,
"loss": 0.3778,
"step": 701
},
{
"epoch": 1.28,
"learning_rate": 1.2772428467562653e-05,
"loss": 0.3746,
"step": 702
},
{
"epoch": 1.29,
"learning_rate": 1.2753415234472965e-05,
"loss": 0.4004,
"step": 703
},
{
"epoch": 1.29,
"learning_rate": 1.2734391225067264e-05,
"loss": 0.3841,
"step": 704
},
{
"epoch": 1.29,
"learning_rate": 1.2715356513801714e-05,
"loss": 0.3868,
"step": 705
},
{
"epoch": 1.29,
"learning_rate": 1.2696311175174358e-05,
"loss": 0.3962,
"step": 706
},
{
"epoch": 1.29,
"learning_rate": 1.2677255283724844e-05,
"loss": 0.3979,
"step": 707
},
{
"epoch": 1.29,
"learning_rate": 1.265818891403412e-05,
"loss": 0.3917,
"step": 708
},
{
"epoch": 1.3,
"learning_rate": 1.2639112140724133e-05,
"loss": 0.3858,
"step": 709
},
{
"epoch": 1.3,
"learning_rate": 1.2620025038457555e-05,
"loss": 0.3887,
"step": 710
},
{
"epoch": 1.3,
"learning_rate": 1.260092768193749e-05,
"loss": 0.4006,
"step": 711
},
{
"epoch": 1.3,
"learning_rate": 1.258182014590716e-05,
"loss": 0.3572,
"step": 712
},
{
"epoch": 1.3,
"learning_rate": 1.256270250514964e-05,
"loss": 0.3722,
"step": 713
},
{
"epoch": 1.31,
"learning_rate": 1.2543574834487551e-05,
"loss": 0.3939,
"step": 714
},
{
"epoch": 1.31,
"learning_rate": 1.252443720878276e-05,
"loss": 0.3948,
"step": 715
},
{
"epoch": 1.31,
"learning_rate": 1.2505289702936109e-05,
"loss": 0.3963,
"step": 716
},
{
"epoch": 1.31,
"learning_rate": 1.2486132391887103e-05,
"loss": 0.3698,
"step": 717
},
{
"epoch": 1.31,
"learning_rate": 1.2466965350613615e-05,
"loss": 0.3875,
"step": 718
},
{
"epoch": 1.31,
"learning_rate": 1.2447788654131616e-05,
"loss": 0.3684,
"step": 719
},
{
"epoch": 1.32,
"learning_rate": 1.2428602377494851e-05,
"loss": 0.3773,
"step": 720
},
{
"epoch": 1.32,
"learning_rate": 1.240940659579457e-05,
"loss": 0.3706,
"step": 721
},
{
"epoch": 1.32,
"learning_rate": 1.239020138415922e-05,
"loss": 0.3714,
"step": 722
},
{
"epoch": 1.32,
"learning_rate": 1.2370986817754146e-05,
"loss": 0.39,
"step": 723
},
{
"epoch": 1.32,
"learning_rate": 1.2351762971781324e-05,
"loss": 0.396,
"step": 724
},
{
"epoch": 1.33,
"learning_rate": 1.2332529921479037e-05,
"loss": 0.398,
"step": 725
},
{
"epoch": 1.33,
"learning_rate": 1.2313287742121592e-05,
"loss": 0.3778,
"step": 726
},
{
"epoch": 1.33,
"learning_rate": 1.2294036509019025e-05,
"loss": 0.3743,
"step": 727
},
{
"epoch": 1.33,
"learning_rate": 1.227477629751681e-05,
"loss": 0.3706,
"step": 728
},
{
"epoch": 1.33,
"learning_rate": 1.225550718299556e-05,
"loss": 0.3777,
"step": 729
},
{
"epoch": 1.33,
"learning_rate": 1.2236229240870731e-05,
"loss": 0.3891,
"step": 730
},
{
"epoch": 1.34,
"learning_rate": 1.2216942546592333e-05,
"loss": 0.3915,
"step": 731
},
{
"epoch": 1.34,
"learning_rate": 1.2197647175644618e-05,
"loss": 0.3677,
"step": 732
},
{
"epoch": 1.34,
"learning_rate": 1.2178343203545811e-05,
"loss": 0.38,
"step": 733
},
{
"epoch": 1.34,
"learning_rate": 1.2159030705847792e-05,
"loss": 0.367,
"step": 734
},
{
"epoch": 1.34,
"learning_rate": 1.2139709758135814e-05,
"loss": 0.3878,
"step": 735
},
{
"epoch": 1.35,
"learning_rate": 1.2120380436028194e-05,
"loss": 0.3845,
"step": 736
},
{
"epoch": 1.35,
"learning_rate": 1.2101042815176031e-05,
"loss": 0.3796,
"step": 737
},
{
"epoch": 1.35,
"learning_rate": 1.2081696971262905e-05,
"loss": 0.3773,
"step": 738
},
{
"epoch": 1.35,
"learning_rate": 1.2062342980004573e-05,
"loss": 0.3682,
"step": 739
},
{
"epoch": 1.35,
"learning_rate": 1.204298091714868e-05,
"loss": 0.3771,
"step": 740
},
{
"epoch": 1.35,
"learning_rate": 1.2023610858474465e-05,
"loss": 0.3758,
"step": 741
},
{
"epoch": 1.36,
"learning_rate": 1.2004232879792465e-05,
"loss": 0.3739,
"step": 742
},
{
"epoch": 1.36,
"learning_rate": 1.1984847056944207e-05,
"loss": 0.3826,
"step": 743
},
{
"epoch": 1.36,
"learning_rate": 1.1965453465801916e-05,
"loss": 0.3796,
"step": 744
},
{
"epoch": 1.36,
"learning_rate": 1.1946052182268228e-05,
"loss": 0.3728,
"step": 745
},
{
"epoch": 1.36,
"learning_rate": 1.1926643282275884e-05,
"loss": 0.3811,
"step": 746
},
{
"epoch": 1.37,
"learning_rate": 1.1907226841787436e-05,
"loss": 0.3794,
"step": 747
},
{
"epoch": 1.37,
"learning_rate": 1.1887802936794938e-05,
"loss": 0.3681,
"step": 748
},
{
"epoch": 1.37,
"learning_rate": 1.1868371643319669e-05,
"loss": 0.3765,
"step": 749
},
{
"epoch": 1.37,
"learning_rate": 1.1848933037411825e-05,
"loss": 0.3898,
"step": 750
},
{
"epoch": 1.37,
"learning_rate": 1.1829487195150211e-05,
"loss": 0.4043,
"step": 751
},
{
"epoch": 1.38,
"learning_rate": 1.1810034192641969e-05,
"loss": 0.3779,
"step": 752
},
{
"epoch": 1.38,
"learning_rate": 1.1790574106022247e-05,
"loss": 0.3954,
"step": 753
},
{
"epoch": 1.38,
"learning_rate": 1.1771107011453934e-05,
"loss": 0.3675,
"step": 754
},
{
"epoch": 1.38,
"learning_rate": 1.1751632985127341e-05,
"loss": 0.3658,
"step": 755
},
{
"epoch": 1.38,
"learning_rate": 1.1732152103259913e-05,
"loss": 0.3647,
"step": 756
},
{
"epoch": 1.38,
"learning_rate": 1.171266444209591e-05,
"loss": 0.3907,
"step": 757
},
{
"epoch": 1.39,
"learning_rate": 1.1693170077906145e-05,
"loss": 0.393,
"step": 758
},
{
"epoch": 1.39,
"learning_rate": 1.1673669086987653e-05,
"loss": 0.3661,
"step": 759
},
{
"epoch": 1.39,
"learning_rate": 1.1654161545663413e-05,
"loss": 0.3805,
"step": 760
},
{
"epoch": 1.39,
"learning_rate": 1.1634647530282035e-05,
"loss": 0.3897,
"step": 761
},
{
"epoch": 1.39,
"learning_rate": 1.1615127117217465e-05,
"loss": 0.379,
"step": 762
},
{
"epoch": 1.4,
"learning_rate": 1.1595600382868692e-05,
"loss": 0.3773,
"step": 763
},
{
"epoch": 1.4,
"learning_rate": 1.157606740365945e-05,
"loss": 0.3856,
"step": 764
},
{
"epoch": 1.4,
"learning_rate": 1.155652825603791e-05,
"loss": 0.4042,
"step": 765
},
{
"epoch": 1.4,
"learning_rate": 1.1536983016476375e-05,
"loss": 0.3684,
"step": 766
},
{
"epoch": 1.4,
"learning_rate": 1.1517431761471009e-05,
"loss": 0.3628,
"step": 767
},
{
"epoch": 1.4,
"learning_rate": 1.1497874567541505e-05,
"loss": 0.3742,
"step": 768
},
{
"epoch": 1.41,
"learning_rate": 1.1478311511230808e-05,
"loss": 0.3806,
"step": 769
},
{
"epoch": 1.41,
"learning_rate": 1.1458742669104806e-05,
"loss": 0.38,
"step": 770
},
{
"epoch": 1.41,
"learning_rate": 1.143916811775202e-05,
"loss": 0.3914,
"step": 771
},
{
"epoch": 1.41,
"learning_rate": 1.1419587933783334e-05,
"loss": 0.3888,
"step": 772
},
{
"epoch": 1.41,
"learning_rate": 1.140000219383167e-05,
"loss": 0.378,
"step": 773
},
{
"epoch": 1.42,
"learning_rate": 1.1380410974551683e-05,
"loss": 0.4107,
"step": 774
},
{
"epoch": 1.42,
"learning_rate": 1.1360814352619489e-05,
"loss": 0.3612,
"step": 775
},
{
"epoch": 1.42,
"learning_rate": 1.1341212404732348e-05,
"loss": 0.3933,
"step": 776
},
{
"epoch": 1.42,
"learning_rate": 1.1321605207608355e-05,
"loss": 0.3796,
"step": 777
},
{
"epoch": 1.42,
"learning_rate": 1.1301992837986154e-05,
"loss": 0.3918,
"step": 778
},
{
"epoch": 1.42,
"learning_rate": 1.1282375372624632e-05,
"loss": 0.3687,
"step": 779
},
{
"epoch": 1.43,
"learning_rate": 1.1262752888302624e-05,
"loss": 0.3687,
"step": 780
},
{
"epoch": 1.43,
"learning_rate": 1.1243125461818605e-05,
"loss": 0.3629,
"step": 781
},
{
"epoch": 1.43,
"learning_rate": 1.1223493169990394e-05,
"loss": 0.3728,
"step": 782
},
{
"epoch": 1.43,
"learning_rate": 1.1203856089654847e-05,
"loss": 0.3909,
"step": 783
},
{
"epoch": 1.43,
"learning_rate": 1.1184214297667571e-05,
"loss": 0.3912,
"step": 784
},
{
"epoch": 1.44,
"learning_rate": 1.1164567870902603e-05,
"loss": 0.3864,
"step": 785
},
{
"epoch": 1.44,
"learning_rate": 1.1144916886252126e-05,
"loss": 0.3741,
"step": 786
},
{
"epoch": 1.44,
"learning_rate": 1.1125261420626162e-05,
"loss": 0.3811,
"step": 787
},
{
"epoch": 1.44,
"learning_rate": 1.110560155095226e-05,
"loss": 0.3891,
"step": 788
},
{
"epoch": 1.44,
"learning_rate": 1.1085937354175225e-05,
"loss": 0.3873,
"step": 789
},
{
"epoch": 1.44,
"learning_rate": 1.1066268907256783e-05,
"loss": 0.3624,
"step": 790
},
{
"epoch": 1.45,
"learning_rate": 1.1046596287175297e-05,
"loss": 0.3776,
"step": 791
},
{
"epoch": 1.45,
"learning_rate": 1.1026919570925463e-05,
"loss": 0.4078,
"step": 792
},
{
"epoch": 1.45,
"learning_rate": 1.1007238835518008e-05,
"loss": 0.379,
"step": 793
},
{
"epoch": 1.45,
"learning_rate": 1.0987554157979392e-05,
"loss": 0.3804,
"step": 794
},
{
"epoch": 1.45,
"learning_rate": 1.0967865615351508e-05,
"loss": 0.383,
"step": 795
},
{
"epoch": 1.46,
"learning_rate": 1.0948173284691359e-05,
"loss": 0.3762,
"step": 796
},
{
"epoch": 1.46,
"learning_rate": 1.0928477243070793e-05,
"loss": 0.3947,
"step": 797
},
{
"epoch": 1.46,
"learning_rate": 1.090877756757617e-05,
"loss": 0.3487,
"step": 798
},
{
"epoch": 1.46,
"learning_rate": 1.0889074335308081e-05,
"loss": 0.3884,
"step": 799
},
{
"epoch": 1.46,
"learning_rate": 1.0869367623381026e-05,
"loss": 0.3777,
"step": 800
},
{
"epoch": 1.46,
"learning_rate": 1.0849657508923137e-05,
"loss": 0.403,
"step": 801
},
{
"epoch": 1.47,
"learning_rate": 1.0829944069075848e-05,
"loss": 0.3675,
"step": 802
},
{
"epoch": 1.47,
"learning_rate": 1.0810227380993628e-05,
"loss": 0.3893,
"step": 803
},
{
"epoch": 1.47,
"learning_rate": 1.0790507521843636e-05,
"loss": 0.3923,
"step": 804
},
{
"epoch": 1.47,
"learning_rate": 1.0770784568805457e-05,
"loss": 0.4103,
"step": 805
},
{
"epoch": 1.47,
"learning_rate": 1.0751058599070782e-05,
"loss": 0.4027,
"step": 806
},
{
"epoch": 1.48,
"learning_rate": 1.0731329689843105e-05,
"loss": 0.4037,
"step": 807
},
{
"epoch": 1.48,
"learning_rate": 1.071159791833743e-05,
"loss": 0.3827,
"step": 808
},
{
"epoch": 1.48,
"learning_rate": 1.069186336177996e-05,
"loss": 0.3761,
"step": 809
},
{
"epoch": 1.48,
"learning_rate": 1.0672126097407796e-05,
"loss": 0.376,
"step": 810
},
{
"epoch": 1.48,
"learning_rate": 1.0652386202468638e-05,
"loss": 0.4017,
"step": 811
},
{
"epoch": 1.48,
"learning_rate": 1.0632643754220496e-05,
"loss": 0.3802,
"step": 812
},
{
"epoch": 1.49,
"learning_rate": 1.0612898829931341e-05,
"loss": 0.3995,
"step": 813
},
{
"epoch": 1.49,
"learning_rate": 1.0593151506878868e-05,
"loss": 0.39,
"step": 814
},
{
"epoch": 1.49,
"learning_rate": 1.057340186235014e-05,
"loss": 0.3782,
"step": 815
},
{
"epoch": 1.49,
"learning_rate": 1.0553649973641314e-05,
"loss": 0.3838,
"step": 816
},
{
"epoch": 1.49,
"learning_rate": 1.0533895918057326e-05,
"loss": 0.3689,
"step": 817
},
{
"epoch": 1.5,
"learning_rate": 1.0514139772911598e-05,
"loss": 0.3748,
"step": 818
},
{
"epoch": 1.5,
"learning_rate": 1.0494381615525726e-05,
"loss": 0.3812,
"step": 819
},
{
"epoch": 1.5,
"learning_rate": 1.0474621523229182e-05,
"loss": 0.3822,
"step": 820
},
{
"epoch": 1.5,
"learning_rate": 1.0454859573359013e-05,
"loss": 0.3758,
"step": 821
},
{
"epoch": 1.5,
"learning_rate": 1.0435095843259531e-05,
"loss": 0.3901,
"step": 822
},
{
"epoch": 1.5,
"learning_rate": 1.0415330410282021e-05,
"loss": 0.3999,
"step": 823
},
{
"epoch": 1.51,
"learning_rate": 1.0395563351784433e-05,
"loss": 0.3697,
"step": 824
},
{
"epoch": 1.51,
"learning_rate": 1.0375794745131074e-05,
"loss": 0.3819,
"step": 825
},
{
"epoch": 1.51,
"learning_rate": 1.0356024667692314e-05,
"loss": 0.359,
"step": 826
},
{
"epoch": 1.51,
"learning_rate": 1.0336253196844276e-05,
"loss": 0.3813,
"step": 827
},
{
"epoch": 1.51,
"learning_rate": 1.0316480409968543e-05,
"loss": 0.3665,
"step": 828
},
{
"epoch": 1.52,
"learning_rate": 1.0296706384451842e-05,
"loss": 0.3875,
"step": 829
},
{
"epoch": 1.52,
"learning_rate": 1.0276931197685753e-05,
"loss": 0.3858,
"step": 830
},
{
"epoch": 1.52,
"learning_rate": 1.0257154927066393e-05,
"loss": 0.3853,
"step": 831
},
{
"epoch": 1.52,
"learning_rate": 1.0237377649994129e-05,
"loss": 0.3802,
"step": 832
},
{
"epoch": 1.52,
"learning_rate": 1.0217599443873265e-05,
"loss": 0.3795,
"step": 833
},
{
"epoch": 1.53,
"learning_rate": 1.0197820386111738e-05,
"loss": 0.3867,
"step": 834
},
{
"epoch": 1.53,
"learning_rate": 1.0178040554120825e-05,
"loss": 0.384,
"step": 835
},
{
"epoch": 1.53,
"learning_rate": 1.0158260025314821e-05,
"loss": 0.3789,
"step": 836
},
{
"epoch": 1.53,
"learning_rate": 1.0138478877110765e-05,
"loss": 0.3767,
"step": 837
},
{
"epoch": 1.53,
"learning_rate": 1.0118697186928107e-05,
"loss": 0.3634,
"step": 838
},
{
"epoch": 1.53,
"learning_rate": 1.0098915032188416e-05,
"loss": 0.3804,
"step": 839
},
{
"epoch": 1.54,
"learning_rate": 1.0079132490315095e-05,
"loss": 0.3881,
"step": 840
},
{
"epoch": 1.54,
"learning_rate": 1.0059349638733045e-05,
"loss": 0.3893,
"step": 841
},
{
"epoch": 1.54,
"learning_rate": 1.0039566554868393e-05,
"loss": 0.3855,
"step": 842
},
{
"epoch": 1.54,
"learning_rate": 1.0019783316148168e-05,
"loss": 0.3824,
"step": 843
},
{
"epoch": 1.54,
"learning_rate": 1e-05,
"loss": 0.3813,
"step": 844
},
{
"epoch": 1.55,
"learning_rate": 9.980216683851837e-06,
"loss": 0.392,
"step": 845
},
{
"epoch": 1.55,
"learning_rate": 9.960433445131608e-06,
"loss": 0.381,
"step": 846
},
{
"epoch": 1.55,
"learning_rate": 9.940650361266956e-06,
"loss": 0.3543,
"step": 847
},
{
"epoch": 1.55,
"learning_rate": 9.92086750968491e-06,
"loss": 0.3672,
"step": 848
},
{
"epoch": 1.55,
"learning_rate": 9.901084967811586e-06,
"loss": 0.386,
"step": 849
},
{
"epoch": 1.55,
"learning_rate": 9.881302813071898e-06,
"loss": 0.3724,
"step": 850
},
{
"epoch": 1.56,
"learning_rate": 9.86152112288924e-06,
"loss": 0.3985,
"step": 851
},
{
"epoch": 1.56,
"learning_rate": 9.841739974685179e-06,
"loss": 0.3916,
"step": 852
},
{
"epoch": 1.56,
"learning_rate": 9.821959445879178e-06,
"loss": 0.3681,
"step": 853
},
{
"epoch": 1.56,
"learning_rate": 9.802179613888264e-06,
"loss": 0.3812,
"step": 854
},
{
"epoch": 1.56,
"learning_rate": 9.782400556126737e-06,
"loss": 0.3759,
"step": 855
},
{
"epoch": 1.57,
"learning_rate": 9.762622350005873e-06,
"loss": 0.3888,
"step": 856
},
{
"epoch": 1.57,
"learning_rate": 9.742845072933612e-06,
"loss": 0.3792,
"step": 857
},
{
"epoch": 1.57,
"learning_rate": 9.723068802314247e-06,
"loss": 0.3717,
"step": 858
},
{
"epoch": 1.57,
"learning_rate": 9.70329361554816e-06,
"loss": 0.3732,
"step": 859
},
{
"epoch": 1.57,
"learning_rate": 9.683519590031459e-06,
"loss": 0.3937,
"step": 860
},
{
"epoch": 1.57,
"learning_rate": 9.663746803155729e-06,
"loss": 0.3776,
"step": 861
},
{
"epoch": 1.58,
"learning_rate": 9.643975332307687e-06,
"loss": 0.3752,
"step": 862
},
{
"epoch": 1.58,
"learning_rate": 9.62420525486893e-06,
"loss": 0.3611,
"step": 863
},
{
"epoch": 1.58,
"learning_rate": 9.604436648215572e-06,
"loss": 0.3819,
"step": 864
},
{
"epoch": 1.58,
"learning_rate": 9.58466958971798e-06,
"loss": 0.3577,
"step": 865
},
{
"epoch": 1.58,
"learning_rate": 9.564904156740474e-06,
"loss": 0.3588,
"step": 866
},
{
"epoch": 1.59,
"learning_rate": 9.545140426640992e-06,
"loss": 0.4042,
"step": 867
},
{
"epoch": 1.59,
"learning_rate": 9.525378476770821e-06,
"loss": 0.3754,
"step": 868
},
{
"epoch": 1.59,
"learning_rate": 9.505618384474277e-06,
"loss": 0.3836,
"step": 869
},
{
"epoch": 1.59,
"learning_rate": 9.485860227088406e-06,
"loss": 0.3757,
"step": 870
},
{
"epoch": 1.59,
"learning_rate": 9.466104081942674e-06,
"loss": 0.3551,
"step": 871
},
{
"epoch": 1.59,
"learning_rate": 9.44635002635869e-06,
"loss": 0.3712,
"step": 872
},
{
"epoch": 1.6,
"learning_rate": 9.426598137649866e-06,
"loss": 0.3697,
"step": 873
},
{
"epoch": 1.6,
"learning_rate": 9.406848493121134e-06,
"loss": 0.3597,
"step": 874
},
{
"epoch": 1.6,
"learning_rate": 9.38710117006866e-06,
"loss": 0.3892,
"step": 875
},
{
"epoch": 1.6,
"learning_rate": 9.36735624577951e-06,
"loss": 0.3965,
"step": 876
},
{
"epoch": 1.6,
"learning_rate": 9.347613797531362e-06,
"loss": 0.4039,
"step": 877
},
{
"epoch": 1.61,
"learning_rate": 9.327873902592207e-06,
"loss": 0.362,
"step": 878
},
{
"epoch": 1.61,
"learning_rate": 9.308136638220044e-06,
"loss": 0.3831,
"step": 879
},
{
"epoch": 1.61,
"learning_rate": 9.288402081662571e-06,
"loss": 0.3894,
"step": 880
},
{
"epoch": 1.61,
"learning_rate": 9.268670310156897e-06,
"loss": 0.3932,
"step": 881
},
{
"epoch": 1.61,
"learning_rate": 9.248941400929223e-06,
"loss": 0.3789,
"step": 882
},
{
"epoch": 1.61,
"learning_rate": 9.229215431194543e-06,
"loss": 0.3819,
"step": 883
},
{
"epoch": 1.62,
"learning_rate": 9.209492478156367e-06,
"loss": 0.3734,
"step": 884
},
{
"epoch": 1.62,
"learning_rate": 9.189772619006377e-06,
"loss": 0.4128,
"step": 885
},
{
"epoch": 1.62,
"learning_rate": 9.170055930924152e-06,
"loss": 0.3861,
"step": 886
},
{
"epoch": 1.62,
"learning_rate": 9.150342491076866e-06,
"loss": 0.391,
"step": 887
},
{
"epoch": 1.62,
"learning_rate": 9.130632376618978e-06,
"loss": 0.3616,
"step": 888
},
{
"epoch": 1.63,
"learning_rate": 9.110925664691922e-06,
"loss": 0.3611,
"step": 889
},
{
"epoch": 1.63,
"learning_rate": 9.091222432423832e-06,
"loss": 0.371,
"step": 890
},
{
"epoch": 1.63,
"learning_rate": 9.071522756929212e-06,
"loss": 0.3951,
"step": 891
},
{
"epoch": 1.63,
"learning_rate": 9.051826715308646e-06,
"loss": 0.3876,
"step": 892
},
{
"epoch": 1.63,
"learning_rate": 9.032134384648496e-06,
"loss": 0.3813,
"step": 893
},
{
"epoch": 1.63,
"learning_rate": 9.012445842020611e-06,
"loss": 0.3745,
"step": 894
},
{
"epoch": 1.64,
"learning_rate": 8.992761164481997e-06,
"loss": 0.4143,
"step": 895
},
{
"epoch": 1.64,
"learning_rate": 8.97308042907454e-06,
"loss": 0.3763,
"step": 896
},
{
"epoch": 1.64,
"learning_rate": 8.953403712824706e-06,
"loss": 0.3852,
"step": 897
},
{
"epoch": 1.64,
"learning_rate": 8.93373109274322e-06,
"loss": 0.3908,
"step": 898
},
{
"epoch": 1.64,
"learning_rate": 8.914062645824774e-06,
"loss": 0.3802,
"step": 899
},
{
"epoch": 1.65,
"learning_rate": 8.894398449047741e-06,
"loss": 0.374,
"step": 900
},
{
"epoch": 1.65,
"learning_rate": 8.874738579373843e-06,
"loss": 0.3823,
"step": 901
},
{
"epoch": 1.65,
"learning_rate": 8.855083113747876e-06,
"loss": 0.3841,
"step": 902
},
{
"epoch": 1.65,
"learning_rate": 8.8354321290974e-06,
"loss": 0.3846,
"step": 903
},
{
"epoch": 1.65,
"learning_rate": 8.815785702332434e-06,
"loss": 0.396,
"step": 904
},
{
"epoch": 1.65,
"learning_rate": 8.796143910345153e-06,
"loss": 0.3734,
"step": 905
},
{
"epoch": 1.66,
"learning_rate": 8.776506830009607e-06,
"loss": 0.3672,
"step": 906
},
{
"epoch": 1.66,
"learning_rate": 8.756874538181397e-06,
"loss": 0.3665,
"step": 907
},
{
"epoch": 1.66,
"learning_rate": 8.737247111697378e-06,
"loss": 0.379,
"step": 908
},
{
"epoch": 1.66,
"learning_rate": 8.717624627375371e-06,
"loss": 0.3837,
"step": 909
},
{
"epoch": 1.66,
"learning_rate": 8.698007162013851e-06,
"loss": 0.3702,
"step": 910
},
{
"epoch": 1.67,
"learning_rate": 8.67839479239165e-06,
"loss": 0.3712,
"step": 911
},
{
"epoch": 1.67,
"learning_rate": 8.658787595267654e-06,
"loss": 0.3573,
"step": 912
},
{
"epoch": 1.67,
"learning_rate": 8.639185647380513e-06,
"loss": 0.3914,
"step": 913
},
{
"epoch": 1.67,
"learning_rate": 8.619589025448318e-06,
"loss": 0.3673,
"step": 914
},
{
"epoch": 1.67,
"learning_rate": 8.599997806168335e-06,
"loss": 0.3679,
"step": 915
},
{
"epoch": 1.67,
"learning_rate": 8.58041206621667e-06,
"loss": 0.3771,
"step": 916
},
{
"epoch": 1.68,
"learning_rate": 8.56083188224798e-06,
"loss": 0.3701,
"step": 917
},
{
"epoch": 1.68,
"learning_rate": 8.541257330895198e-06,
"loss": 0.3792,
"step": 918
},
{
"epoch": 1.68,
"learning_rate": 8.521688488769194e-06,
"loss": 0.3777,
"step": 919
},
{
"epoch": 1.68,
"learning_rate": 8.502125432458495e-06,
"loss": 0.366,
"step": 920
},
{
"epoch": 1.68,
"learning_rate": 8.482568238528994e-06,
"loss": 0.3835,
"step": 921
},
{
"epoch": 1.69,
"learning_rate": 8.463016983523629e-06,
"loss": 0.3991,
"step": 922
},
{
"epoch": 1.69,
"learning_rate": 8.443471743962098e-06,
"loss": 0.3829,
"step": 923
},
{
"epoch": 1.69,
"learning_rate": 8.423932596340551e-06,
"loss": 0.38,
"step": 924
},
{
"epoch": 1.69,
"learning_rate": 8.404399617131311e-06,
"loss": 0.363,
"step": 925
},
{
"epoch": 1.69,
"learning_rate": 8.384872882782542e-06,
"loss": 0.393,
"step": 926
},
{
"epoch": 1.7,
"learning_rate": 8.365352469717969e-06,
"loss": 0.3717,
"step": 927
},
{
"epoch": 1.7,
"learning_rate": 8.345838454336589e-06,
"loss": 0.3624,
"step": 928
},
{
"epoch": 1.7,
"learning_rate": 8.32633091301235e-06,
"loss": 0.3872,
"step": 929
},
{
"epoch": 1.7,
"learning_rate": 8.306829922093857e-06,
"loss": 0.3662,
"step": 930
},
{
"epoch": 1.7,
"learning_rate": 8.287335557904092e-06,
"loss": 0.3882,
"step": 931
},
{
"epoch": 1.7,
"learning_rate": 8.267847896740092e-06,
"loss": 0.3654,
"step": 932
},
{
"epoch": 1.71,
"learning_rate": 8.248367014872659e-06,
"loss": 0.3864,
"step": 933
},
{
"epoch": 1.71,
"learning_rate": 8.228892988546067e-06,
"loss": 0.3648,
"step": 934
},
{
"epoch": 1.71,
"learning_rate": 8.209425893977758e-06,
"loss": 0.3853,
"step": 935
},
{
"epoch": 1.71,
"learning_rate": 8.189965807358033e-06,
"loss": 0.3634,
"step": 936
},
{
"epoch": 1.71,
"learning_rate": 8.170512804849792e-06,
"loss": 0.3776,
"step": 937
},
{
"epoch": 1.72,
"learning_rate": 8.151066962588181e-06,
"loss": 0.377,
"step": 938
},
{
"epoch": 1.72,
"learning_rate": 8.131628356680331e-06,
"loss": 0.375,
"step": 939
},
{
"epoch": 1.72,
"learning_rate": 8.112197063205063e-06,
"loss": 0.369,
"step": 940
},
{
"epoch": 1.72,
"learning_rate": 8.092773158212567e-06,
"loss": 0.3819,
"step": 941
},
{
"epoch": 1.72,
"learning_rate": 8.073356717724116e-06,
"loss": 0.372,
"step": 942
},
{
"epoch": 1.72,
"learning_rate": 8.053947817731773e-06,
"loss": 0.3779,
"step": 943
},
{
"epoch": 1.73,
"learning_rate": 8.034546534198087e-06,
"loss": 0.3772,
"step": 944
},
{
"epoch": 1.73,
"learning_rate": 8.015152943055797e-06,
"loss": 0.3895,
"step": 945
},
{
"epoch": 1.73,
"learning_rate": 7.995767120207537e-06,
"loss": 0.3579,
"step": 946
},
{
"epoch": 1.73,
"learning_rate": 7.976389141525536e-06,
"loss": 0.3775,
"step": 947
},
{
"epoch": 1.73,
"learning_rate": 7.957019082851322e-06,
"loss": 0.3559,
"step": 948
},
{
"epoch": 1.74,
"learning_rate": 7.93765701999543e-06,
"loss": 0.3775,
"step": 949
},
{
"epoch": 1.74,
"learning_rate": 7.918303028737097e-06,
"loss": 0.3761,
"step": 950
},
{
"epoch": 1.74,
"learning_rate": 7.898957184823974e-06,
"loss": 0.3846,
"step": 951
},
{
"epoch": 1.74,
"learning_rate": 7.879619563971808e-06,
"loss": 0.3969,
"step": 952
},
{
"epoch": 1.74,
"learning_rate": 7.860290241864191e-06,
"loss": 0.3808,
"step": 953
},
{
"epoch": 1.74,
"learning_rate": 7.840969294152212e-06,
"loss": 0.4052,
"step": 954
},
{
"epoch": 1.75,
"learning_rate": 7.821656796454192e-06,
"loss": 0.366,
"step": 955
},
{
"epoch": 1.75,
"learning_rate": 7.802352824355385e-06,
"loss": 0.3763,
"step": 956
},
{
"epoch": 1.75,
"learning_rate": 7.783057453407673e-06,
"loss": 0.3634,
"step": 957
},
{
"epoch": 1.75,
"learning_rate": 7.76377075912927e-06,
"loss": 0.3709,
"step": 958
},
{
"epoch": 1.75,
"learning_rate": 7.744492817004443e-06,
"loss": 0.4129,
"step": 959
},
{
"epoch": 1.76,
"learning_rate": 7.725223702483193e-06,
"loss": 0.3709,
"step": 960
},
{
"epoch": 1.76,
"learning_rate": 7.705963490980977e-06,
"loss": 0.3795,
"step": 961
},
{
"epoch": 1.76,
"learning_rate": 7.68671225787841e-06,
"loss": 0.3902,
"step": 962
},
{
"epoch": 1.76,
"learning_rate": 7.667470078520966e-06,
"loss": 0.3665,
"step": 963
},
{
"epoch": 1.76,
"learning_rate": 7.648237028218676e-06,
"loss": 0.3944,
"step": 964
},
{
"epoch": 1.76,
"learning_rate": 7.6290131822458554e-06,
"loss": 0.3958,
"step": 965
},
{
"epoch": 1.77,
"learning_rate": 7.609798615840785e-06,
"loss": 0.3705,
"step": 966
},
{
"epoch": 1.77,
"learning_rate": 7.590593404205432e-06,
"loss": 0.3614,
"step": 967
},
{
"epoch": 1.77,
"learning_rate": 7.571397622505151e-06,
"loss": 0.3722,
"step": 968
},
{
"epoch": 1.77,
"learning_rate": 7.552211345868388e-06,
"loss": 0.3805,
"step": 969
},
{
"epoch": 1.77,
"learning_rate": 7.533034649386385e-06,
"loss": 0.3705,
"step": 970
},
{
"epoch": 1.78,
"learning_rate": 7.513867608112901e-06,
"loss": 0.3765,
"step": 971
},
{
"epoch": 1.78,
"learning_rate": 7.494710297063894e-06,
"loss": 0.3618,
"step": 972
},
{
"epoch": 1.78,
"learning_rate": 7.4755627912172405e-06,
"loss": 0.3753,
"step": 973
},
{
"epoch": 1.78,
"learning_rate": 7.456425165512453e-06,
"loss": 0.3808,
"step": 974
},
{
"epoch": 1.78,
"learning_rate": 7.437297494850362e-06,
"loss": 0.3847,
"step": 975
},
{
"epoch": 1.78,
"learning_rate": 7.418179854092842e-06,
"loss": 0.3931,
"step": 976
},
{
"epoch": 1.79,
"learning_rate": 7.399072318062514e-06,
"loss": 0.3744,
"step": 977
},
{
"epoch": 1.79,
"learning_rate": 7.379974961542447e-06,
"loss": 0.3786,
"step": 978
},
{
"epoch": 1.79,
"learning_rate": 7.3608878592758695e-06,
"loss": 0.383,
"step": 979
},
{
"epoch": 1.79,
"learning_rate": 7.341811085965884e-06,
"loss": 0.3758,
"step": 980
},
{
"epoch": 1.79,
"learning_rate": 7.322744716275159e-06,
"loss": 0.3809,
"step": 981
},
{
"epoch": 1.8,
"learning_rate": 7.303688824825648e-06,
"loss": 0.3748,
"step": 982
},
{
"epoch": 1.8,
"learning_rate": 7.2846434861982905e-06,
"loss": 0.3634,
"step": 983
},
{
"epoch": 1.8,
"learning_rate": 7.2656087749327385e-06,
"loss": 0.3976,
"step": 984
},
{
"epoch": 1.8,
"learning_rate": 7.246584765527038e-06,
"loss": 0.3683,
"step": 985
},
{
"epoch": 1.8,
"learning_rate": 7.22757153243735e-06,
"loss": 0.3628,
"step": 986
},
{
"epoch": 1.8,
"learning_rate": 7.208569150077668e-06,
"loss": 0.3715,
"step": 987
},
{
"epoch": 1.81,
"learning_rate": 7.189577692819508e-06,
"loss": 0.3725,
"step": 988
},
{
"epoch": 1.81,
"learning_rate": 7.170597234991632e-06,
"loss": 0.3686,
"step": 989
},
{
"epoch": 1.81,
"learning_rate": 7.151627850879757e-06,
"loss": 0.3601,
"step": 990
},
{
"epoch": 1.81,
"learning_rate": 7.132669614726249e-06,
"loss": 0.3532,
"step": 991
},
{
"epoch": 1.81,
"learning_rate": 7.113722600729848e-06,
"loss": 0.3715,
"step": 992
},
{
"epoch": 1.82,
"learning_rate": 7.094786883045383e-06,
"loss": 0.3907,
"step": 993
},
{
"epoch": 1.82,
"learning_rate": 7.075862535783454e-06,
"loss": 0.359,
"step": 994
},
{
"epoch": 1.82,
"learning_rate": 7.056949633010166e-06,
"loss": 0.3632,
"step": 995
},
{
"epoch": 1.82,
"learning_rate": 7.038048248746842e-06,
"loss": 0.3876,
"step": 996
},
{
"epoch": 1.82,
"learning_rate": 7.019158456969714e-06,
"loss": 0.3827,
"step": 997
},
{
"epoch": 1.82,
"learning_rate": 7.000280331609641e-06,
"loss": 0.3578,
"step": 998
},
{
"epoch": 1.83,
"learning_rate": 6.981413946551832e-06,
"loss": 0.3849,
"step": 999
},
{
"epoch": 1.83,
"learning_rate": 6.962559375635536e-06,
"loss": 0.3857,
"step": 1000
},
{
"epoch": 1.83,
"learning_rate": 6.943716692653771e-06,
"loss": 0.3659,
"step": 1001
},
{
"epoch": 1.83,
"learning_rate": 6.924885971353027e-06,
"loss": 0.3739,
"step": 1002
},
{
"epoch": 1.83,
"learning_rate": 6.906067285432978e-06,
"loss": 0.3648,
"step": 1003
},
{
"epoch": 1.84,
"learning_rate": 6.8872607085461875e-06,
"loss": 0.3642,
"step": 1004
},
{
"epoch": 1.84,
"learning_rate": 6.868466314297841e-06,
"loss": 0.3777,
"step": 1005
},
{
"epoch": 1.84,
"learning_rate": 6.849684176245432e-06,
"loss": 0.367,
"step": 1006
},
{
"epoch": 1.84,
"learning_rate": 6.830914367898485e-06,
"loss": 0.3904,
"step": 1007
},
{
"epoch": 1.84,
"learning_rate": 6.812156962718279e-06,
"loss": 0.3662,
"step": 1008
},
{
"epoch": 1.85,
"learning_rate": 6.793412034117537e-06,
"loss": 0.3596,
"step": 1009
},
{
"epoch": 1.85,
"learning_rate": 6.774679655460159e-06,
"loss": 0.3785,
"step": 1010
},
{
"epoch": 1.85,
"learning_rate": 6.755959900060925e-06,
"loss": 0.3715,
"step": 1011
},
{
"epoch": 1.85,
"learning_rate": 6.737252841185212e-06,
"loss": 0.3673,
"step": 1012
},
{
"epoch": 1.85,
"learning_rate": 6.718558552048701e-06,
"loss": 0.3782,
"step": 1013
},
{
"epoch": 1.85,
"learning_rate": 6.699877105817093e-06,
"loss": 0.3803,
"step": 1014
},
{
"epoch": 1.86,
"learning_rate": 6.6812085756058394e-06,
"loss": 0.3609,
"step": 1015
},
{
"epoch": 1.86,
"learning_rate": 6.662553034479824e-06,
"loss": 0.3797,
"step": 1016
},
{
"epoch": 1.86,
"learning_rate": 6.643910555453094e-06,
"loss": 0.3638,
"step": 1017
},
{
"epoch": 1.86,
"learning_rate": 6.625281211488592e-06,
"loss": 0.364,
"step": 1018
},
{
"epoch": 1.86,
"learning_rate": 6.606665075497835e-06,
"loss": 0.3955,
"step": 1019
},
{
"epoch": 1.87,
"learning_rate": 6.588062220340651e-06,
"loss": 0.373,
"step": 1020
},
{
"epoch": 1.87,
"learning_rate": 6.569472718824894e-06,
"loss": 0.3893,
"step": 1021
},
{
"epoch": 1.87,
"learning_rate": 6.55089664370615e-06,
"loss": 0.3562,
"step": 1022
},
{
"epoch": 1.87,
"learning_rate": 6.532334067687458e-06,
"loss": 0.3911,
"step": 1023
},
{
"epoch": 1.87,
"learning_rate": 6.513785063419025e-06,
"loss": 0.3592,
"step": 1024
},
{
"epoch": 1.87,
"learning_rate": 6.4952497034979415e-06,
"loss": 0.3709,
"step": 1025
},
{
"epoch": 1.88,
"learning_rate": 6.47672806046789e-06,
"loss": 0.3692,
"step": 1026
},
{
"epoch": 1.88,
"learning_rate": 6.45822020681888e-06,
"loss": 0.3548,
"step": 1027
},
{
"epoch": 1.88,
"learning_rate": 6.4397262149869475e-06,
"loss": 0.3748,
"step": 1028
},
{
"epoch": 1.88,
"learning_rate": 6.421246157353863e-06,
"loss": 0.404,
"step": 1029
},
{
"epoch": 1.88,
"learning_rate": 6.402780106246884e-06,
"loss": 0.3677,
"step": 1030
},
{
"epoch": 1.89,
"learning_rate": 6.384328133938432e-06,
"loss": 0.3834,
"step": 1031
},
{
"epoch": 1.89,
"learning_rate": 6.365890312645833e-06,
"loss": 0.3599,
"step": 1032
},
{
"epoch": 1.89,
"learning_rate": 6.347466714531031e-06,
"loss": 0.3856,
"step": 1033
},
{
"epoch": 1.89,
"learning_rate": 6.329057411700299e-06,
"loss": 0.3641,
"step": 1034
},
{
"epoch": 1.89,
"learning_rate": 6.310662476203959e-06,
"loss": 0.3804,
"step": 1035
},
{
"epoch": 1.89,
"learning_rate": 6.292281980036116e-06,
"loss": 0.3792,
"step": 1036
},
{
"epoch": 1.9,
"learning_rate": 6.2739159951343435e-06,
"loss": 0.3855,
"step": 1037
},
{
"epoch": 1.9,
"learning_rate": 6.25556459337943e-06,
"loss": 0.3672,
"step": 1038
},
{
"epoch": 1.9,
"learning_rate": 6.237227846595093e-06,
"loss": 0.3656,
"step": 1039
},
{
"epoch": 1.9,
"learning_rate": 6.218905826547688e-06,
"loss": 0.3711,
"step": 1040
},
{
"epoch": 1.9,
"learning_rate": 6.200598604945929e-06,
"loss": 0.3792,
"step": 1041
},
{
"epoch": 1.91,
"learning_rate": 6.1823062534406196e-06,
"loss": 0.3718,
"step": 1042
},
{
"epoch": 1.91,
"learning_rate": 6.1640288436243635e-06,
"loss": 0.3758,
"step": 1043
},
{
"epoch": 1.91,
"learning_rate": 6.14576644703128e-06,
"loss": 0.3754,
"step": 1044
},
{
"epoch": 1.91,
"learning_rate": 6.127519135136732e-06,
"loss": 0.3731,
"step": 1045
},
{
"epoch": 1.91,
"learning_rate": 6.109286979357052e-06,
"loss": 0.3542,
"step": 1046
},
{
"epoch": 1.91,
"learning_rate": 6.091070051049241e-06,
"loss": 0.3595,
"step": 1047
},
{
"epoch": 1.92,
"learning_rate": 6.072868421510711e-06,
"loss": 0.397,
"step": 1048
},
{
"epoch": 1.92,
"learning_rate": 6.0546821619790005e-06,
"loss": 0.3925,
"step": 1049
},
{
"epoch": 1.92,
"learning_rate": 6.036511343631488e-06,
"loss": 0.3714,
"step": 1050
},
{
"epoch": 1.92,
"learning_rate": 6.0183560375851095e-06,
"loss": 0.371,
"step": 1051
},
{
"epoch": 1.92,
"learning_rate": 6.00021631489611e-06,
"loss": 0.3731,
"step": 1052
},
{
"epoch": 1.93,
"learning_rate": 5.982092246559728e-06,
"loss": 0.386,
"step": 1053
},
{
"epoch": 1.93,
"learning_rate": 5.963983903509936e-06,
"loss": 0.3633,
"step": 1054
},
{
"epoch": 1.93,
"learning_rate": 5.945891356619166e-06,
"loss": 0.3781,
"step": 1055
},
{
"epoch": 1.93,
"learning_rate": 5.927814676698027e-06,
"loss": 0.3689,
"step": 1056
},
{
"epoch": 1.93,
"learning_rate": 5.909753934495019e-06,
"loss": 0.3588,
"step": 1057
},
{
"epoch": 1.93,
"learning_rate": 5.891709200696281e-06,
"loss": 0.3772,
"step": 1058
},
{
"epoch": 1.94,
"learning_rate": 5.873680545925281e-06,
"loss": 0.3636,
"step": 1059
},
{
"epoch": 1.94,
"learning_rate": 5.855668040742564e-06,
"loss": 0.3679,
"step": 1060
},
{
"epoch": 1.94,
"learning_rate": 5.837671755645473e-06,
"loss": 0.3825,
"step": 1061
},
{
"epoch": 1.94,
"learning_rate": 5.819691761067866e-06,
"loss": 0.3932,
"step": 1062
},
{
"epoch": 1.94,
"learning_rate": 5.801728127379837e-06,
"loss": 0.3792,
"step": 1063
},
{
"epoch": 1.95,
"learning_rate": 5.783780924887462e-06,
"loss": 0.3635,
"step": 1064
},
{
"epoch": 1.95,
"learning_rate": 5.765850223832489e-06,
"loss": 0.3677,
"step": 1065
},
{
"epoch": 1.95,
"learning_rate": 5.74793609439209e-06,
"loss": 0.3854,
"step": 1066
},
{
"epoch": 1.95,
"learning_rate": 5.730038606678593e-06,
"loss": 0.3634,
"step": 1067
},
{
"epoch": 1.95,
"learning_rate": 5.712157830739176e-06,
"loss": 0.381,
"step": 1068
},
{
"epoch": 1.95,
"learning_rate": 5.69429383655561e-06,
"loss": 0.3723,
"step": 1069
},
{
"epoch": 1.96,
"learning_rate": 5.676446694044003e-06,
"loss": 0.3711,
"step": 1070
},
{
"epoch": 1.96,
"learning_rate": 5.658616473054495e-06,
"loss": 0.3741,
"step": 1071
},
{
"epoch": 1.96,
"learning_rate": 5.640803243370997e-06,
"loss": 0.3747,
"step": 1072
},
{
"epoch": 1.96,
"learning_rate": 5.623007074710928e-06,
"loss": 0.3629,
"step": 1073
},
{
"epoch": 1.96,
"learning_rate": 5.605228036724928e-06,
"loss": 0.3681,
"step": 1074
},
{
"epoch": 1.97,
"learning_rate": 5.587466198996592e-06,
"loss": 0.3885,
"step": 1075
},
{
"epoch": 1.97,
"learning_rate": 5.569721631042193e-06,
"loss": 0.3743,
"step": 1076
},
{
"epoch": 1.97,
"learning_rate": 5.551994402310428e-06,
"loss": 0.3781,
"step": 1077
},
{
"epoch": 1.97,
"learning_rate": 5.5342845821821145e-06,
"loss": 0.3666,
"step": 1078
},
{
"epoch": 1.97,
"learning_rate": 5.51659223996994e-06,
"loss": 0.3805,
"step": 1079
},
{
"epoch": 1.97,
"learning_rate": 5.498917444918199e-06,
"loss": 0.3554,
"step": 1080
},
{
"epoch": 1.98,
"learning_rate": 5.481260266202496e-06,
"loss": 0.3702,
"step": 1081
},
{
"epoch": 1.98,
"learning_rate": 5.463620772929494e-06,
"loss": 0.3821,
"step": 1082
},
{
"epoch": 1.98,
"learning_rate": 5.445999034136637e-06,
"loss": 0.3757,
"step": 1083
},
{
"epoch": 1.98,
"learning_rate": 5.428395118791887e-06,
"loss": 0.3588,
"step": 1084
},
{
"epoch": 1.98,
"learning_rate": 5.410809095793436e-06,
"loss": 0.3819,
"step": 1085
},
{
"epoch": 1.99,
"learning_rate": 5.393241033969466e-06,
"loss": 0.3716,
"step": 1086
},
{
"epoch": 1.99,
"learning_rate": 5.375691002077853e-06,
"loss": 0.3851,
"step": 1087
},
{
"epoch": 1.99,
"learning_rate": 5.358159068805902e-06,
"loss": 0.3566,
"step": 1088
},
{
"epoch": 1.99,
"learning_rate": 5.340645302770097e-06,
"loss": 0.3563,
"step": 1089
},
{
"epoch": 1.99,
"learning_rate": 5.323149772515812e-06,
"loss": 0.3708,
"step": 1090
},
{
"epoch": 1.99,
"learning_rate": 5.305672546517047e-06,
"loss": 0.3732,
"step": 1091
},
{
"epoch": 2.0,
"learning_rate": 5.288213693176165e-06,
"loss": 0.3659,
"step": 1092
},
{
"epoch": 2.0,
"learning_rate": 5.270773280823627e-06,
"loss": 0.3716,
"step": 1093
},
{
"epoch": 2.0,
"learning_rate": 5.253351377717707e-06,
"loss": 0.3442,
"step": 1094
},
{
"epoch": 2.0,
"learning_rate": 5.235948052044255e-06,
"loss": 0.238,
"step": 1095
},
{
"epoch": 2.0,
"learning_rate": 5.218563371916403e-06,
"loss": 0.2582,
"step": 1096
},
{
"epoch": 2.01,
"learning_rate": 5.2011974053743005e-06,
"loss": 0.2428,
"step": 1097
},
{
"epoch": 2.01,
"learning_rate": 5.183850220384874e-06,
"loss": 0.23,
"step": 1098
},
{
"epoch": 2.01,
"learning_rate": 5.166521884841533e-06,
"loss": 0.2213,
"step": 1099
},
{
"epoch": 2.01,
"learning_rate": 5.149212466563901e-06,
"loss": 0.241,
"step": 1100
},
{
"epoch": 2.01,
"learning_rate": 5.131922033297587e-06,
"loss": 0.2402,
"step": 1101
},
{
"epoch": 2.02,
"learning_rate": 5.114650652713885e-06,
"loss": 0.2182,
"step": 1102
},
{
"epoch": 2.02,
"learning_rate": 5.097398392409514e-06,
"loss": 0.2325,
"step": 1103
},
{
"epoch": 2.02,
"learning_rate": 5.0801653199063745e-06,
"loss": 0.2307,
"step": 1104
},
{
"epoch": 2.02,
"learning_rate": 5.062951502651261e-06,
"loss": 0.2159,
"step": 1105
},
{
"epoch": 2.02,
"learning_rate": 5.045757008015606e-06,
"loss": 0.2085,
"step": 1106
},
{
"epoch": 2.02,
"learning_rate": 5.0285819032952195e-06,
"loss": 0.2257,
"step": 1107
},
{
"epoch": 2.03,
"learning_rate": 5.0114262557100246e-06,
"loss": 0.227,
"step": 1108
},
{
"epoch": 2.03,
"learning_rate": 4.99429013240379e-06,
"loss": 0.2344,
"step": 1109
},
{
"epoch": 2.03,
"learning_rate": 4.9771736004438685e-06,
"loss": 0.2166,
"step": 1110
},
{
"epoch": 2.03,
"learning_rate": 4.960076726820947e-06,
"loss": 0.2376,
"step": 1111
},
{
"epoch": 2.03,
"learning_rate": 4.942999578448763e-06,
"loss": 0.2309,
"step": 1112
},
{
"epoch": 2.04,
"learning_rate": 4.925942222163852e-06,
"loss": 0.2286,
"step": 1113
},
{
"epoch": 2.04,
"learning_rate": 4.908904724725299e-06,
"loss": 0.2108,
"step": 1114
},
{
"epoch": 2.04,
"learning_rate": 4.8918871528144574e-06,
"loss": 0.229,
"step": 1115
},
{
"epoch": 2.04,
"learning_rate": 4.874889573034693e-06,
"loss": 0.2102,
"step": 1116
},
{
"epoch": 2.04,
"learning_rate": 4.857912051911131e-06,
"loss": 0.2198,
"step": 1117
},
{
"epoch": 2.04,
"learning_rate": 4.840954655890392e-06,
"loss": 0.239,
"step": 1118
},
{
"epoch": 2.05,
"learning_rate": 4.824017451340325e-06,
"loss": 0.226,
"step": 1119
},
{
"epoch": 2.05,
"learning_rate": 4.8071005045497644e-06,
"loss": 0.2263,
"step": 1120
},
{
"epoch": 2.05,
"learning_rate": 4.7902038817282504e-06,
"loss": 0.2212,
"step": 1121
},
{
"epoch": 2.05,
"learning_rate": 4.773327649005778e-06,
"loss": 0.2362,
"step": 1122
},
{
"epoch": 2.05,
"learning_rate": 4.756471872432551e-06,
"loss": 0.2148,
"step": 1123
},
{
"epoch": 2.06,
"learning_rate": 4.739636617978701e-06,
"loss": 0.2317,
"step": 1124
},
{
"epoch": 2.06,
"learning_rate": 4.7228219515340446e-06,
"loss": 0.243,
"step": 1125
},
{
"epoch": 2.06,
"learning_rate": 4.706027938907819e-06,
"loss": 0.2238,
"step": 1126
},
{
"epoch": 2.06,
"learning_rate": 4.689254645828427e-06,
"loss": 0.2252,
"step": 1127
},
{
"epoch": 2.06,
"learning_rate": 4.6725021379431764e-06,
"loss": 0.2343,
"step": 1128
},
{
"epoch": 2.06,
"learning_rate": 4.655770480818039e-06,
"loss": 0.2297,
"step": 1129
},
{
"epoch": 2.07,
"learning_rate": 4.639059739937365e-06,
"loss": 0.233,
"step": 1130
},
{
"epoch": 2.07,
"learning_rate": 4.622369980703645e-06,
"loss": 0.2335,
"step": 1131
},
{
"epoch": 2.07,
"learning_rate": 4.605701268437265e-06,
"loss": 0.2341,
"step": 1132
},
{
"epoch": 2.07,
"learning_rate": 4.589053668376222e-06,
"loss": 0.2298,
"step": 1133
},
{
"epoch": 2.07,
"learning_rate": 4.572427245675891e-06,
"loss": 0.2288,
"step": 1134
},
{
"epoch": 2.08,
"learning_rate": 4.555822065408762e-06,
"loss": 0.2173,
"step": 1135
},
{
"epoch": 2.08,
"learning_rate": 4.539238192564187e-06,
"loss": 0.2377,
"step": 1136
},
{
"epoch": 2.08,
"learning_rate": 4.522675692048123e-06,
"loss": 0.2165,
"step": 1137
},
{
"epoch": 2.08,
"learning_rate": 4.506134628682878e-06,
"loss": 0.219,
"step": 1138
},
{
"epoch": 2.08,
"learning_rate": 4.489615067206869e-06,
"loss": 0.2225,
"step": 1139
},
{
"epoch": 2.08,
"learning_rate": 4.473117072274352e-06,
"loss": 0.2371,
"step": 1140
},
{
"epoch": 2.09,
"learning_rate": 4.45664070845517e-06,
"loss": 0.2341,
"step": 1141
},
{
"epoch": 2.09,
"learning_rate": 4.440186040234524e-06,
"loss": 0.2222,
"step": 1142
},
{
"epoch": 2.09,
"learning_rate": 4.423753132012681e-06,
"loss": 0.2418,
"step": 1143
},
{
"epoch": 2.09,
"learning_rate": 4.407342048104753e-06,
"loss": 0.255,
"step": 1144
},
{
"epoch": 2.09,
"learning_rate": 4.390952852740445e-06,
"loss": 0.2225,
"step": 1145
},
{
"epoch": 2.1,
"learning_rate": 4.374585610063784e-06,
"loss": 0.2304,
"step": 1146
},
{
"epoch": 2.1,
"learning_rate": 4.358240384132876e-06,
"loss": 0.2242,
"step": 1147
},
{
"epoch": 2.1,
"learning_rate": 4.341917238919671e-06,
"loss": 0.2019,
"step": 1148
},
{
"epoch": 2.1,
"learning_rate": 4.325616238309687e-06,
"loss": 0.228,
"step": 1149
},
{
"epoch": 2.1,
"learning_rate": 4.309337446101779e-06,
"loss": 0.236,
"step": 1150
},
{
"epoch": 2.1,
"learning_rate": 4.293080926007878e-06,
"loss": 0.2225,
"step": 1151
},
{
"epoch": 2.11,
"learning_rate": 4.2768467416527536e-06,
"loss": 0.2296,
"step": 1152
},
{
"epoch": 2.11,
"learning_rate": 4.260634956573746e-06,
"loss": 0.2249,
"step": 1153
},
{
"epoch": 2.11,
"learning_rate": 4.244445634220545e-06,
"loss": 0.2215,
"step": 1154
},
{
"epoch": 2.11,
"learning_rate": 4.228278837954914e-06,
"loss": 0.2291,
"step": 1155
},
{
"epoch": 2.11,
"learning_rate": 4.212134631050453e-06,
"loss": 0.2361,
"step": 1156
},
{
"epoch": 2.12,
"learning_rate": 4.196013076692363e-06,
"loss": 0.2341,
"step": 1157
},
{
"epoch": 2.12,
"learning_rate": 4.179914237977177e-06,
"loss": 0.2222,
"step": 1158
},
{
"epoch": 2.12,
"learning_rate": 4.163838177912527e-06,
"loss": 0.2285,
"step": 1159
},
{
"epoch": 2.12,
"learning_rate": 4.147784959416894e-06,
"loss": 0.2218,
"step": 1160
},
{
"epoch": 2.12,
"learning_rate": 4.13175464531936e-06,
"loss": 0.2199,
"step": 1161
},
{
"epoch": 2.12,
"learning_rate": 4.1157472983593636e-06,
"loss": 0.2321,
"step": 1162
},
{
"epoch": 2.13,
"learning_rate": 4.099762981186463e-06,
"loss": 0.2171,
"step": 1163
},
{
"epoch": 2.13,
"learning_rate": 4.083801756360071e-06,
"loss": 0.2307,
"step": 1164
},
{
"epoch": 2.13,
"learning_rate": 4.067863686349226e-06,
"loss": 0.22,
"step": 1165
},
{
"epoch": 2.13,
"learning_rate": 4.051948833532342e-06,
"loss": 0.219,
"step": 1166
},
{
"epoch": 2.13,
"learning_rate": 4.036057260196973e-06,
"loss": 0.2258,
"step": 1167
},
{
"epoch": 2.14,
"learning_rate": 4.02018902853955e-06,
"loss": 0.2309,
"step": 1168
},
{
"epoch": 2.14,
"learning_rate": 4.004344200665159e-06,
"loss": 0.2321,
"step": 1169
},
{
"epoch": 2.14,
"learning_rate": 3.988522838587281e-06,
"loss": 0.217,
"step": 1170
},
{
"epoch": 2.14,
"learning_rate": 3.972725004227561e-06,
"loss": 0.2199,
"step": 1171
},
{
"epoch": 2.14,
"learning_rate": 3.956950759415556e-06,
"loss": 0.2276,
"step": 1172
},
{
"epoch": 2.14,
"learning_rate": 3.9412001658885114e-06,
"loss": 0.2356,
"step": 1173
},
{
"epoch": 2.15,
"learning_rate": 3.925473285291092e-06,
"loss": 0.2261,
"step": 1174
},
{
"epoch": 2.15,
"learning_rate": 3.909770179175155e-06,
"loss": 0.2175,
"step": 1175
},
{
"epoch": 2.15,
"learning_rate": 3.894090908999524e-06,
"loss": 0.2233,
"step": 1176
},
{
"epoch": 2.15,
"learning_rate": 3.878435536129721e-06,
"loss": 0.2272,
"step": 1177
},
{
"epoch": 2.15,
"learning_rate": 3.862804121837733e-06,
"loss": 0.2215,
"step": 1178
},
{
"epoch": 2.16,
"learning_rate": 3.847196727301796e-06,
"loss": 0.2259,
"step": 1179
},
{
"epoch": 2.16,
"learning_rate": 3.831613413606124e-06,
"loss": 0.2329,
"step": 1180
},
{
"epoch": 2.16,
"learning_rate": 3.816054241740685e-06,
"loss": 0.2217,
"step": 1181
},
{
"epoch": 2.16,
"learning_rate": 3.8005192726009664e-06,
"loss": 0.2215,
"step": 1182
},
{
"epoch": 2.16,
"learning_rate": 3.785008566987728e-06,
"loss": 0.2322,
"step": 1183
},
{
"epoch": 2.17,
"learning_rate": 3.7695221856067597e-06,
"loss": 0.2227,
"step": 1184
},
{
"epoch": 2.17,
"learning_rate": 3.754060189068671e-06,
"loss": 0.2436,
"step": 1185
},
{
"epoch": 2.17,
"learning_rate": 3.7386226378886082e-06,
"loss": 0.2193,
"step": 1186
},
{
"epoch": 2.17,
"learning_rate": 3.723209592486059e-06,
"loss": 0.2235,
"step": 1187
},
{
"epoch": 2.17,
"learning_rate": 3.707821113184601e-06,
"loss": 0.2304,
"step": 1188
},
{
"epoch": 2.17,
"learning_rate": 3.6924572602116606e-06,
"loss": 0.2248,
"step": 1189
},
{
"epoch": 2.18,
"learning_rate": 3.6771180936982785e-06,
"loss": 0.2369,
"step": 1190
},
{
"epoch": 2.18,
"learning_rate": 3.6618036736788888e-06,
"loss": 0.2376,
"step": 1191
},
{
"epoch": 2.18,
"learning_rate": 3.646514060091061e-06,
"loss": 0.2486,
"step": 1192
},
{
"epoch": 2.18,
"learning_rate": 3.6312493127752835e-06,
"loss": 0.2493,
"step": 1193
},
{
"epoch": 2.18,
"learning_rate": 3.61600949147472e-06,
"loss": 0.2367,
"step": 1194
},
{
"epoch": 2.19,
"learning_rate": 3.6007946558349815e-06,
"loss": 0.24,
"step": 1195
},
{
"epoch": 2.19,
"learning_rate": 3.5856048654038867e-06,
"loss": 0.2247,
"step": 1196
},
{
"epoch": 2.19,
"learning_rate": 3.570440179631234e-06,
"loss": 0.225,
"step": 1197
},
{
"epoch": 2.19,
"learning_rate": 3.555300657868571e-06,
"loss": 0.2321,
"step": 1198
},
{
"epoch": 2.19,
"learning_rate": 3.5401863593689554e-06,
"loss": 0.2221,
"step": 1199
},
{
"epoch": 2.19,
"learning_rate": 3.5250973432867195e-06,
"loss": 0.2307,
"step": 1200
},
{
"epoch": 2.2,
"learning_rate": 3.5100336686772596e-06,
"loss": 0.2337,
"step": 1201
},
{
"epoch": 2.2,
"learning_rate": 3.4949953944967785e-06,
"loss": 0.2388,
"step": 1202
},
{
"epoch": 2.2,
"learning_rate": 3.4799825796020716e-06,
"loss": 0.22,
"step": 1203
},
{
"epoch": 2.2,
"learning_rate": 3.46499528275029e-06,
"loss": 0.2314,
"step": 1204
},
{
"epoch": 2.2,
"learning_rate": 3.450033562598714e-06,
"loss": 0.2283,
"step": 1205
},
{
"epoch": 2.21,
"learning_rate": 3.4350974777045175e-06,
"loss": 0.2371,
"step": 1206
},
{
"epoch": 2.21,
"learning_rate": 3.4201870865245525e-06,
"loss": 0.2307,
"step": 1207
},
{
"epoch": 2.21,
"learning_rate": 3.4053024474151032e-06,
"loss": 0.2241,
"step": 1208
},
{
"epoch": 2.21,
"learning_rate": 3.3904436186316636e-06,
"loss": 0.2346,
"step": 1209
},
{
"epoch": 2.21,
"learning_rate": 3.3756106583287206e-06,
"loss": 0.2175,
"step": 1210
},
{
"epoch": 2.21,
"learning_rate": 3.360803624559509e-06,
"loss": 0.2314,
"step": 1211
},
{
"epoch": 2.22,
"learning_rate": 3.346022575275795e-06,
"loss": 0.2318,
"step": 1212
},
{
"epoch": 2.22,
"learning_rate": 3.3312675683276453e-06,
"loss": 0.2212,
"step": 1213
},
{
"epoch": 2.22,
"learning_rate": 3.3165386614632045e-06,
"loss": 0.2371,
"step": 1214
},
{
"epoch": 2.22,
"learning_rate": 3.3018359123284604e-06,
"loss": 0.2321,
"step": 1215
},
{
"epoch": 2.22,
"learning_rate": 3.2871593784670386e-06,
"loss": 0.2155,
"step": 1216
},
{
"epoch": 2.23,
"learning_rate": 3.2725091173199497e-06,
"loss": 0.2462,
"step": 1217
},
{
"epoch": 2.23,
"learning_rate": 3.2578851862253802e-06,
"loss": 0.2124,
"step": 1218
},
{
"epoch": 2.23,
"learning_rate": 3.2432876424184756e-06,
"loss": 0.2309,
"step": 1219
},
{
"epoch": 2.23,
"learning_rate": 3.2287165430310985e-06,
"loss": 0.2306,
"step": 1220
},
{
"epoch": 2.23,
"learning_rate": 3.2141719450916065e-06,
"loss": 0.228,
"step": 1221
},
{
"epoch": 2.23,
"learning_rate": 3.1996539055246543e-06,
"loss": 0.223,
"step": 1222
},
{
"epoch": 2.24,
"learning_rate": 3.1851624811509385e-06,
"loss": 0.2313,
"step": 1223
},
{
"epoch": 2.24,
"learning_rate": 3.17069772868699e-06,
"loss": 0.2217,
"step": 1224
},
{
"epoch": 2.24,
"learning_rate": 3.1562597047449597e-06,
"loss": 0.2358,
"step": 1225
},
{
"epoch": 2.24,
"learning_rate": 3.141848465832381e-06,
"loss": 0.2052,
"step": 1226
},
{
"epoch": 2.24,
"learning_rate": 3.1274640683519577e-06,
"loss": 0.2137,
"step": 1227
},
{
"epoch": 2.25,
"learning_rate": 3.1131065686013417e-06,
"loss": 0.22,
"step": 1228
},
{
"epoch": 2.25,
"learning_rate": 3.0987760227729124e-06,
"loss": 0.2189,
"step": 1229
},
{
"epoch": 2.25,
"learning_rate": 3.084472486953558e-06,
"loss": 0.2226,
"step": 1230
},
{
"epoch": 2.25,
"learning_rate": 3.0701960171244504e-06,
"loss": 0.2232,
"step": 1231
},
{
"epoch": 2.25,
"learning_rate": 3.055946669160841e-06,
"loss": 0.2341,
"step": 1232
},
{
"epoch": 2.25,
"learning_rate": 3.0417244988318217e-06,
"loss": 0.2355,
"step": 1233
},
{
"epoch": 2.26,
"learning_rate": 3.0275295618001177e-06,
"loss": 0.2165,
"step": 1234
},
{
"epoch": 2.26,
"learning_rate": 3.0133619136218772e-06,
"loss": 0.2178,
"step": 1235
},
{
"epoch": 2.26,
"learning_rate": 2.9992216097464346e-06,
"loss": 0.2284,
"step": 1236
},
{
"epoch": 2.26,
"learning_rate": 2.98510870551611e-06,
"loss": 0.2317,
"step": 1237
},
{
"epoch": 2.26,
"learning_rate": 2.9710232561659834e-06,
"loss": 0.2226,
"step": 1238
},
{
"epoch": 2.27,
"learning_rate": 2.9569653168236847e-06,
"loss": 0.2357,
"step": 1239
},
{
"epoch": 2.27,
"learning_rate": 2.942934942509171e-06,
"loss": 0.2228,
"step": 1240
},
{
"epoch": 2.27,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.2308,
"step": 1241
},
{
"epoch": 2.27,
"learning_rate": 2.914957108503722e-06,
"loss": 0.2207,
"step": 1242
},
{
"epoch": 2.27,
"learning_rate": 2.9010097583124208e-06,
"loss": 0.2319,
"step": 1243
},
{
"epoch": 2.27,
"learning_rate": 2.8870901921477656e-06,
"loss": 0.2325,
"step": 1244
},
{
"epoch": 2.28,
"learning_rate": 2.873198464488147e-06,
"loss": 0.2352,
"step": 1245
},
{
"epoch": 2.28,
"learning_rate": 2.8593346297030077e-06,
"loss": 0.2267,
"step": 1246
},
{
"epoch": 2.28,
"learning_rate": 2.8454987420526215e-06,
"loss": 0.2375,
"step": 1247
},
{
"epoch": 2.28,
"learning_rate": 2.831690855687882e-06,
"loss": 0.2489,
"step": 1248
},
{
"epoch": 2.28,
"learning_rate": 2.8179110246500905e-06,
"loss": 0.2138,
"step": 1249
},
{
"epoch": 2.29,
"learning_rate": 2.8041593028707513e-06,
"loss": 0.2278,
"step": 1250
},
{
"epoch": 2.29,
"learning_rate": 2.790435744171348e-06,
"loss": 0.2378,
"step": 1251
},
{
"epoch": 2.29,
"learning_rate": 2.7767404022631404e-06,
"loss": 0.2267,
"step": 1252
},
{
"epoch": 2.29,
"learning_rate": 2.7630733307469593e-06,
"loss": 0.2359,
"step": 1253
},
{
"epoch": 2.29,
"learning_rate": 2.749434583112984e-06,
"loss": 0.2303,
"step": 1254
},
{
"epoch": 2.29,
"learning_rate": 2.7358242127405434e-06,
"loss": 0.2433,
"step": 1255
},
{
"epoch": 2.3,
"learning_rate": 2.7222422728979015e-06,
"loss": 0.2192,
"step": 1256
},
{
"epoch": 2.3,
"learning_rate": 2.708688816742051e-06,
"loss": 0.2309,
"step": 1257
},
{
"epoch": 2.3,
"learning_rate": 2.695163897318508e-06,
"loss": 0.2202,
"step": 1258
},
{
"epoch": 2.3,
"learning_rate": 2.681667567561095e-06,
"loss": 0.2278,
"step": 1259
},
{
"epoch": 2.3,
"learning_rate": 2.668199880291751e-06,
"loss": 0.2145,
"step": 1260
},
{
"epoch": 2.31,
"learning_rate": 2.6547608882203056e-06,
"loss": 0.223,
"step": 1261
},
{
"epoch": 2.31,
"learning_rate": 2.6413506439442804e-06,
"loss": 0.2479,
"step": 1262
},
{
"epoch": 2.31,
"learning_rate": 2.6279691999486945e-06,
"loss": 0.2328,
"step": 1263
},
{
"epoch": 2.31,
"learning_rate": 2.614616608605833e-06,
"loss": 0.2407,
"step": 1264
},
{
"epoch": 2.31,
"learning_rate": 2.601292922175066e-06,
"loss": 0.2036,
"step": 1265
},
{
"epoch": 2.31,
"learning_rate": 2.587998192802639e-06,
"loss": 0.2284,
"step": 1266
},
{
"epoch": 2.32,
"learning_rate": 2.5747324725214595e-06,
"loss": 0.2138,
"step": 1267
},
{
"epoch": 2.32,
"learning_rate": 2.5614958132508983e-06,
"loss": 0.2319,
"step": 1268
},
{
"epoch": 2.32,
"learning_rate": 2.5482882667965945e-06,
"loss": 0.2272,
"step": 1269
},
{
"epoch": 2.32,
"learning_rate": 2.5351098848502385e-06,
"loss": 0.2344,
"step": 1270
},
{
"epoch": 2.32,
"learning_rate": 2.5219607189893804e-06,
"loss": 0.234,
"step": 1271
},
{
"epoch": 2.33,
"learning_rate": 2.508840820677221e-06,
"loss": 0.2281,
"step": 1272
},
{
"epoch": 2.33,
"learning_rate": 2.495750241262417e-06,
"loss": 0.2278,
"step": 1273
},
{
"epoch": 2.33,
"learning_rate": 2.482689031978872e-06,
"loss": 0.2355,
"step": 1274
},
{
"epoch": 2.33,
"learning_rate": 2.4696572439455503e-06,
"loss": 0.2283,
"step": 1275
},
{
"epoch": 2.33,
"learning_rate": 2.4566549281662587e-06,
"loss": 0.2226,
"step": 1276
},
{
"epoch": 2.34,
"learning_rate": 2.443682135529456e-06,
"loss": 0.2282,
"step": 1277
},
{
"epoch": 2.34,
"learning_rate": 2.430738916808061e-06,
"loss": 0.2181,
"step": 1278
},
{
"epoch": 2.34,
"learning_rate": 2.417825322659236e-06,
"loss": 0.2316,
"step": 1279
},
{
"epoch": 2.34,
"learning_rate": 2.4049414036242057e-06,
"loss": 0.2322,
"step": 1280
},
{
"epoch": 2.34,
"learning_rate": 2.392087210128048e-06,
"loss": 0.2192,
"step": 1281
},
{
"epoch": 2.34,
"learning_rate": 2.379262792479504e-06,
"loss": 0.2271,
"step": 1282
},
{
"epoch": 2.35,
"learning_rate": 2.3664682008707753e-06,
"loss": 0.2293,
"step": 1283
},
{
"epoch": 2.35,
"learning_rate": 2.3537034853773357e-06,
"loss": 0.2374,
"step": 1284
},
{
"epoch": 2.35,
"learning_rate": 2.340968695957724e-06,
"loss": 0.2176,
"step": 1285
},
{
"epoch": 2.35,
"learning_rate": 2.328263882453353e-06,
"loss": 0.2213,
"step": 1286
},
{
"epoch": 2.35,
"learning_rate": 2.3155890945883274e-06,
"loss": 0.2323,
"step": 1287
},
{
"epoch": 2.36,
"learning_rate": 2.3029443819692255e-06,
"loss": 0.229,
"step": 1288
},
{
"epoch": 2.36,
"learning_rate": 2.29032979408492e-06,
"loss": 0.2237,
"step": 1289
},
{
"epoch": 2.36,
"learning_rate": 2.2777453803063834e-06,
"loss": 0.2139,
"step": 1290
},
{
"epoch": 2.36,
"learning_rate": 2.265191189886492e-06,
"loss": 0.2132,
"step": 1291
},
{
"epoch": 2.36,
"learning_rate": 2.252667271959834e-06,
"loss": 0.2254,
"step": 1292
},
{
"epoch": 2.36,
"learning_rate": 2.240173675542513e-06,
"loss": 0.2262,
"step": 1293
},
{
"epoch": 2.37,
"learning_rate": 2.2277104495319714e-06,
"loss": 0.2137,
"step": 1294
},
{
"epoch": 2.37,
"learning_rate": 2.215277642706778e-06,
"loss": 0.2216,
"step": 1295
},
{
"epoch": 2.37,
"learning_rate": 2.202875303726445e-06,
"loss": 0.2329,
"step": 1296
},
{
"epoch": 2.37,
"learning_rate": 2.190503481131252e-06,
"loss": 0.2147,
"step": 1297
},
{
"epoch": 2.37,
"learning_rate": 2.1781622233420353e-06,
"loss": 0.2178,
"step": 1298
},
{
"epoch": 2.38,
"learning_rate": 2.1658515786599985e-06,
"loss": 0.2366,
"step": 1299
},
{
"epoch": 2.38,
"learning_rate": 2.1535715952665494e-06,
"loss": 0.2285,
"step": 1300
},
{
"epoch": 2.38,
"learning_rate": 2.1413223212230806e-06,
"loss": 0.2186,
"step": 1301
},
{
"epoch": 2.38,
"learning_rate": 2.129103804470797e-06,
"loss": 0.2248,
"step": 1302
},
{
"epoch": 2.38,
"learning_rate": 2.1169160928305323e-06,
"loss": 0.2256,
"step": 1303
},
{
"epoch": 2.38,
"learning_rate": 2.1047592340025456e-06,
"loss": 0.231,
"step": 1304
},
{
"epoch": 2.39,
"learning_rate": 2.0926332755663493e-06,
"loss": 0.2316,
"step": 1305
},
{
"epoch": 2.39,
"learning_rate": 2.080538264980523e-06,
"loss": 0.2235,
"step": 1306
},
{
"epoch": 2.39,
"learning_rate": 2.068474249582508e-06,
"loss": 0.2222,
"step": 1307
},
{
"epoch": 2.39,
"learning_rate": 2.056441276588448e-06,
"loss": 0.2166,
"step": 1308
},
{
"epoch": 2.39,
"learning_rate": 2.044439393092994e-06,
"loss": 0.2282,
"step": 1309
},
{
"epoch": 2.4,
"learning_rate": 2.0324686460691124e-06,
"loss": 0.2286,
"step": 1310
},
{
"epoch": 2.4,
"learning_rate": 2.020529082367909e-06,
"loss": 0.2274,
"step": 1311
},
{
"epoch": 2.4,
"learning_rate": 2.0086207487184504e-06,
"loss": 0.23,
"step": 1312
},
{
"epoch": 2.4,
"learning_rate": 1.9967436917275683e-06,
"loss": 0.2103,
"step": 1313
},
{
"epoch": 2.4,
"learning_rate": 1.984897957879687e-06,
"loss": 0.2535,
"step": 1314
},
{
"epoch": 2.4,
"learning_rate": 1.9730835935366355e-06,
"loss": 0.2315,
"step": 1315
},
{
"epoch": 2.41,
"learning_rate": 1.9613006449374715e-06,
"loss": 0.2072,
"step": 1316
},
{
"epoch": 2.41,
"learning_rate": 1.949549158198295e-06,
"loss": 0.2247,
"step": 1317
},
{
"epoch": 2.41,
"learning_rate": 1.937829179312076e-06,
"loss": 0.2368,
"step": 1318
},
{
"epoch": 2.41,
"learning_rate": 1.9261407541484657e-06,
"loss": 0.2217,
"step": 1319
},
{
"epoch": 2.41,
"learning_rate": 1.9144839284536177e-06,
"loss": 0.2369,
"step": 1320
},
{
"epoch": 2.42,
"learning_rate": 1.9028587478500126e-06,
"loss": 0.2233,
"step": 1321
},
{
"epoch": 2.42,
"learning_rate": 1.8912652578362857e-06,
"loss": 0.2573,
"step": 1322
},
{
"epoch": 2.42,
"learning_rate": 1.8797035037870326e-06,
"loss": 0.2232,
"step": 1323
},
{
"epoch": 2.42,
"learning_rate": 1.8681735309526438e-06,
"loss": 0.2237,
"step": 1324
},
{
"epoch": 2.42,
"learning_rate": 1.8566753844591257e-06,
"loss": 0.2256,
"step": 1325
},
{
"epoch": 2.42,
"learning_rate": 1.8452091093079217e-06,
"loss": 0.2164,
"step": 1326
},
{
"epoch": 2.43,
"learning_rate": 1.833774750375734e-06,
"loss": 0.2266,
"step": 1327
},
{
"epoch": 2.43,
"learning_rate": 1.822372352414361e-06,
"loss": 0.2283,
"step": 1328
},
{
"epoch": 2.43,
"learning_rate": 1.8110019600505014e-06,
"loss": 0.2344,
"step": 1329
},
{
"epoch": 2.43,
"learning_rate": 1.7996636177855931e-06,
"loss": 0.2167,
"step": 1330
},
{
"epoch": 2.43,
"learning_rate": 1.7883573699956425e-06,
"loss": 0.2197,
"step": 1331
},
{
"epoch": 2.44,
"learning_rate": 1.7770832609310374e-06,
"loss": 0.2177,
"step": 1332
},
{
"epoch": 2.44,
"learning_rate": 1.765841334716384e-06,
"loss": 0.2287,
"step": 1333
},
{
"epoch": 2.44,
"learning_rate": 1.75463163535033e-06,
"loss": 0.2278,
"step": 1334
},
{
"epoch": 2.44,
"learning_rate": 1.7434542067053972e-06,
"loss": 0.2334,
"step": 1335
},
{
"epoch": 2.44,
"learning_rate": 1.7323090925277986e-06,
"loss": 0.2454,
"step": 1336
},
{
"epoch": 2.44,
"learning_rate": 1.7211963364372852e-06,
"loss": 0.2329,
"step": 1337
},
{
"epoch": 2.45,
"learning_rate": 1.7101159819269585e-06,
"loss": 0.2198,
"step": 1338
},
{
"epoch": 2.45,
"learning_rate": 1.6990680723631048e-06,
"loss": 0.2246,
"step": 1339
},
{
"epoch": 2.45,
"learning_rate": 1.6880526509850347e-06,
"loss": 0.2284,
"step": 1340
},
{
"epoch": 2.45,
"learning_rate": 1.6770697609049024e-06,
"loss": 0.2225,
"step": 1341
},
{
"epoch": 2.45,
"learning_rate": 1.6661194451075346e-06,
"loss": 0.2256,
"step": 1342
},
{
"epoch": 2.46,
"learning_rate": 1.6552017464502813e-06,
"loss": 0.238,
"step": 1343
},
{
"epoch": 2.46,
"learning_rate": 1.6443167076628285e-06,
"loss": 0.2221,
"step": 1344
},
{
"epoch": 2.46,
"learning_rate": 1.6334643713470344e-06,
"loss": 0.2208,
"step": 1345
},
{
"epoch": 2.46,
"learning_rate": 1.6226447799767775e-06,
"loss": 0.2333,
"step": 1346
},
{
"epoch": 2.46,
"learning_rate": 1.6118579758977693e-06,
"loss": 0.2256,
"step": 1347
},
{
"epoch": 2.46,
"learning_rate": 1.6011040013274015e-06,
"loss": 0.2213,
"step": 1348
},
{
"epoch": 2.47,
"learning_rate": 1.5903828983545778e-06,
"loss": 0.2201,
"step": 1349
},
{
"epoch": 2.47,
"learning_rate": 1.5796947089395475e-06,
"loss": 0.2105,
"step": 1350
},
{
"epoch": 2.47,
"learning_rate": 1.5690394749137471e-06,
"loss": 0.234,
"step": 1351
},
{
"epoch": 2.47,
"learning_rate": 1.5584172379796247e-06,
"loss": 0.2156,
"step": 1352
},
{
"epoch": 2.47,
"learning_rate": 1.547828039710496e-06,
"loss": 0.2157,
"step": 1353
},
{
"epoch": 2.48,
"learning_rate": 1.5372719215503584e-06,
"loss": 0.2254,
"step": 1354
},
{
"epoch": 2.48,
"learning_rate": 1.5267489248137445e-06,
"loss": 0.2256,
"step": 1355
},
{
"epoch": 2.48,
"learning_rate": 1.5162590906855612e-06,
"loss": 0.2369,
"step": 1356
},
{
"epoch": 2.48,
"learning_rate": 1.5058024602209164e-06,
"loss": 0.2076,
"step": 1357
},
{
"epoch": 2.48,
"learning_rate": 1.4953790743449703e-06,
"loss": 0.2173,
"step": 1358
},
{
"epoch": 2.49,
"learning_rate": 1.4849889738527656e-06,
"loss": 0.2138,
"step": 1359
},
{
"epoch": 2.49,
"learning_rate": 1.4746321994090774e-06,
"loss": 0.2123,
"step": 1360
},
{
"epoch": 2.49,
"learning_rate": 1.4643087915482445e-06,
"loss": 0.2198,
"step": 1361
},
{
"epoch": 2.49,
"learning_rate": 1.4540187906740245e-06,
"loss": 0.2285,
"step": 1362
},
{
"epoch": 2.49,
"learning_rate": 1.4437622370594172e-06,
"loss": 0.2239,
"step": 1363
},
{
"epoch": 2.49,
"learning_rate": 1.4335391708465185e-06,
"loss": 0.2292,
"step": 1364
},
{
"epoch": 2.5,
"learning_rate": 1.4233496320463668e-06,
"loss": 0.2247,
"step": 1365
},
{
"epoch": 2.5,
"learning_rate": 1.4131936605387764e-06,
"loss": 0.2128,
"step": 1366
},
{
"epoch": 2.5,
"learning_rate": 1.4030712960721848e-06,
"loss": 0.2246,
"step": 1367
},
{
"epoch": 2.5,
"learning_rate": 1.3929825782635009e-06,
"loss": 0.2223,
"step": 1368
},
{
"epoch": 2.5,
"learning_rate": 1.3829275465979476e-06,
"loss": 0.2205,
"step": 1369
},
{
"epoch": 2.51,
"learning_rate": 1.3729062404289017e-06,
"loss": 0.2226,
"step": 1370
},
{
"epoch": 2.51,
"learning_rate": 1.3629186989777542e-06,
"loss": 0.236,
"step": 1371
},
{
"epoch": 2.51,
"learning_rate": 1.3529649613337425e-06,
"loss": 0.2163,
"step": 1372
},
{
"epoch": 2.51,
"learning_rate": 1.3430450664537986e-06,
"loss": 0.2407,
"step": 1373
},
{
"epoch": 2.51,
"learning_rate": 1.3331590531624116e-06,
"loss": 0.2281,
"step": 1374
},
{
"epoch": 2.51,
"learning_rate": 1.3233069601514537e-06,
"loss": 0.2149,
"step": 1375
},
{
"epoch": 2.52,
"learning_rate": 1.3134888259800472e-06,
"loss": 0.2262,
"step": 1376
},
{
"epoch": 2.52,
"learning_rate": 1.303704689074402e-06,
"loss": 0.2205,
"step": 1377
},
{
"epoch": 2.52,
"learning_rate": 1.2939545877276726e-06,
"loss": 0.2056,
"step": 1378
},
{
"epoch": 2.52,
"learning_rate": 1.2842385600998032e-06,
"loss": 0.2155,
"step": 1379
},
{
"epoch": 2.52,
"learning_rate": 1.2745566442173773e-06,
"loss": 0.23,
"step": 1380
},
{
"epoch": 2.53,
"learning_rate": 1.264908877973482e-06,
"loss": 0.2223,
"step": 1381
},
{
"epoch": 2.53,
"learning_rate": 1.2552952991275402e-06,
"loss": 0.2245,
"step": 1382
},
{
"epoch": 2.53,
"learning_rate": 1.2457159453051715e-06,
"loss": 0.2398,
"step": 1383
},
{
"epoch": 2.53,
"learning_rate": 1.2361708539980565e-06,
"loss": 0.2337,
"step": 1384
},
{
"epoch": 2.53,
"learning_rate": 1.2266600625637659e-06,
"loss": 0.2271,
"step": 1385
},
{
"epoch": 2.53,
"learning_rate": 1.2171836082256316e-06,
"loss": 0.2274,
"step": 1386
},
{
"epoch": 2.54,
"learning_rate": 1.2077415280726047e-06,
"loss": 0.2086,
"step": 1387
},
{
"epoch": 2.54,
"learning_rate": 1.1983338590590932e-06,
"loss": 0.2182,
"step": 1388
},
{
"epoch": 2.54,
"learning_rate": 1.1889606380048301e-06,
"loss": 0.2216,
"step": 1389
},
{
"epoch": 2.54,
"learning_rate": 1.1796219015947286e-06,
"loss": 0.2282,
"step": 1390
},
{
"epoch": 2.54,
"learning_rate": 1.1703176863787313e-06,
"loss": 0.2226,
"step": 1391
},
{
"epoch": 2.55,
"learning_rate": 1.1610480287716764e-06,
"loss": 0.2225,
"step": 1392
},
{
"epoch": 2.55,
"learning_rate": 1.151812965053144e-06,
"loss": 0.2129,
"step": 1393
},
{
"epoch": 2.55,
"learning_rate": 1.1426125313673287e-06,
"loss": 0.2307,
"step": 1394
},
{
"epoch": 2.55,
"learning_rate": 1.1334467637228818e-06,
"loss": 0.2319,
"step": 1395
},
{
"epoch": 2.55,
"learning_rate": 1.1243156979927873e-06,
"loss": 0.2159,
"step": 1396
},
{
"epoch": 2.55,
"learning_rate": 1.1152193699142067e-06,
"loss": 0.2068,
"step": 1397
},
{
"epoch": 2.56,
"learning_rate": 1.1061578150883445e-06,
"loss": 0.2347,
"step": 1398
},
{
"epoch": 2.56,
"learning_rate": 1.0971310689803173e-06,
"loss": 0.221,
"step": 1399
},
{
"epoch": 2.56,
"learning_rate": 1.088139166919e-06,
"loss": 0.2269,
"step": 1400
},
{
"epoch": 2.56,
"learning_rate": 1.0791821440968963e-06,
"loss": 0.2128,
"step": 1401
},
{
"epoch": 2.56,
"learning_rate": 1.070260035570002e-06,
"loss": 0.2271,
"step": 1402
},
{
"epoch": 2.57,
"learning_rate": 1.0613728762576625e-06,
"loss": 0.2259,
"step": 1403
},
{
"epoch": 2.57,
"learning_rate": 1.0525207009424377e-06,
"loss": 0.2132,
"step": 1404
},
{
"epoch": 2.57,
"learning_rate": 1.043703544269975e-06,
"loss": 0.2284,
"step": 1405
},
{
"epoch": 2.57,
"learning_rate": 1.0349214407488573e-06,
"loss": 0.229,
"step": 1406
},
{
"epoch": 2.57,
"learning_rate": 1.026174424750479e-06,
"loss": 0.2467,
"step": 1407
},
{
"epoch": 2.57,
"learning_rate": 1.0174625305089125e-06,
"loss": 0.2324,
"step": 1408
},
{
"epoch": 2.58,
"learning_rate": 1.0087857921207667e-06,
"loss": 0.2332,
"step": 1409
},
{
"epoch": 2.58,
"learning_rate": 1.0001442435450581e-06,
"loss": 0.2395,
"step": 1410
},
{
"epoch": 2.58,
"learning_rate": 9.915379186030771e-07,
"loss": 0.2356,
"step": 1411
},
{
"epoch": 2.58,
"learning_rate": 9.829668509782576e-07,
"loss": 0.2155,
"step": 1412
},
{
"epoch": 2.58,
"learning_rate": 9.744310742160434e-07,
"loss": 0.2289,
"step": 1413
},
{
"epoch": 2.59,
"learning_rate": 9.65930621723752e-07,
"loss": 0.2332,
"step": 1414
},
{
"epoch": 2.59,
"learning_rate": 9.57465526770457e-07,
"loss": 0.2308,
"step": 1415
},
{
"epoch": 2.59,
"learning_rate": 9.490358224868445e-07,
"loss": 0.2226,
"step": 1416
},
{
"epoch": 2.59,
"learning_rate": 9.406415418650872e-07,
"loss": 0.2356,
"step": 1417
},
{
"epoch": 2.59,
"learning_rate": 9.322827177587212e-07,
"loss": 0.2406,
"step": 1418
},
{
"epoch": 2.59,
"learning_rate": 9.239593828825133e-07,
"loss": 0.2185,
"step": 1419
},
{
"epoch": 2.6,
"learning_rate": 9.156715698123231e-07,
"loss": 0.2246,
"step": 1420
},
{
"epoch": 2.6,
"learning_rate": 9.074193109849971e-07,
"loss": 0.2206,
"step": 1421
},
{
"epoch": 2.6,
"learning_rate": 8.992026386982222e-07,
"loss": 0.2167,
"step": 1422
},
{
"epoch": 2.6,
"learning_rate": 8.910215851104087e-07,
"loss": 0.2406,
"step": 1423
},
{
"epoch": 2.6,
"learning_rate": 8.828761822405641e-07,
"loss": 0.2078,
"step": 1424
},
{
"epoch": 2.61,
"learning_rate": 8.747664619681639e-07,
"loss": 0.219,
"step": 1425
},
{
"epoch": 2.61,
"learning_rate": 8.666924560330293e-07,
"loss": 0.2145,
"step": 1426
},
{
"epoch": 2.61,
"learning_rate": 8.586541960352035e-07,
"loss": 0.2229,
"step": 1427
},
{
"epoch": 2.61,
"learning_rate": 8.506517134348269e-07,
"loss": 0.2193,
"step": 1428
},
{
"epoch": 2.61,
"learning_rate": 8.426850395520126e-07,
"loss": 0.2321,
"step": 1429
},
{
"epoch": 2.61,
"learning_rate": 8.347542055667313e-07,
"loss": 0.2325,
"step": 1430
},
{
"epoch": 2.62,
"learning_rate": 8.268592425186761e-07,
"loss": 0.2269,
"step": 1431
},
{
"epoch": 2.62,
"learning_rate": 8.190001813071524e-07,
"loss": 0.2141,
"step": 1432
},
{
"epoch": 2.62,
"learning_rate": 8.111770526909535e-07,
"loss": 0.2299,
"step": 1433
},
{
"epoch": 2.62,
"learning_rate": 8.033898872882396e-07,
"loss": 0.2205,
"step": 1434
},
{
"epoch": 2.62,
"learning_rate": 7.956387155764145e-07,
"loss": 0.2404,
"step": 1435
},
{
"epoch": 2.63,
"learning_rate": 7.87923567892015e-07,
"loss": 0.2244,
"step": 1436
},
{
"epoch": 2.63,
"learning_rate": 7.802444744305826e-07,
"loss": 0.2343,
"step": 1437
},
{
"epoch": 2.63,
"learning_rate": 7.726014652465508e-07,
"loss": 0.2221,
"step": 1438
},
{
"epoch": 2.63,
"learning_rate": 7.649945702531314e-07,
"loss": 0.2429,
"step": 1439
},
{
"epoch": 2.63,
"learning_rate": 7.574238192221872e-07,
"loss": 0.2117,
"step": 1440
},
{
"epoch": 2.63,
"learning_rate": 7.498892417841208e-07,
"loss": 0.2254,
"step": 1441
},
{
"epoch": 2.64,
"learning_rate": 7.42390867427758e-07,
"loss": 0.2282,
"step": 1442
},
{
"epoch": 2.64,
"learning_rate": 7.349287255002401e-07,
"loss": 0.2273,
"step": 1443
},
{
"epoch": 2.64,
"learning_rate": 7.275028452068944e-07,
"loss": 0.2244,
"step": 1444
},
{
"epoch": 2.64,
"learning_rate": 7.20113255611129e-07,
"loss": 0.2246,
"step": 1445
},
{
"epoch": 2.64,
"learning_rate": 7.127599856343192e-07,
"loss": 0.224,
"step": 1446
},
{
"epoch": 2.65,
"learning_rate": 7.054430640556898e-07,
"loss": 0.2178,
"step": 1447
},
{
"epoch": 2.65,
"learning_rate": 6.981625195122077e-07,
"loss": 0.2191,
"step": 1448
},
{
"epoch": 2.65,
"learning_rate": 6.909183804984698e-07,
"loss": 0.2344,
"step": 1449
},
{
"epoch": 2.65,
"learning_rate": 6.837106753665823e-07,
"loss": 0.2256,
"step": 1450
},
{
"epoch": 2.65,
"learning_rate": 6.76539432326061e-07,
"loss": 0.2204,
"step": 1451
},
{
"epoch": 2.66,
"learning_rate": 6.69404679443717e-07,
"loss": 0.2277,
"step": 1452
},
{
"epoch": 2.66,
"learning_rate": 6.623064446435434e-07,
"loss": 0.2196,
"step": 1453
},
{
"epoch": 2.66,
"learning_rate": 6.55244755706611e-07,
"loss": 0.2256,
"step": 1454
},
{
"epoch": 2.66,
"learning_rate": 6.482196402709562e-07,
"loss": 0.2312,
"step": 1455
},
{
"epoch": 2.66,
"learning_rate": 6.41231125831474e-07,
"loss": 0.2293,
"step": 1456
},
{
"epoch": 2.66,
"learning_rate": 6.342792397398101e-07,
"loss": 0.2246,
"step": 1457
},
{
"epoch": 2.67,
"learning_rate": 6.273640092042577e-07,
"loss": 0.2194,
"step": 1458
},
{
"epoch": 2.67,
"learning_rate": 6.204854612896427e-07,
"loss": 0.2295,
"step": 1459
},
{
"epoch": 2.67,
"learning_rate": 6.136436229172238e-07,
"loss": 0.2144,
"step": 1460
},
{
"epoch": 2.67,
"learning_rate": 6.06838520864591e-07,
"loss": 0.2115,
"step": 1461
},
{
"epoch": 2.67,
"learning_rate": 6.000701817655474e-07,
"loss": 0.238,
"step": 1462
},
{
"epoch": 2.68,
"learning_rate": 5.933386321100155e-07,
"loss": 0.2273,
"step": 1463
},
{
"epoch": 2.68,
"learning_rate": 5.866438982439382e-07,
"loss": 0.2274,
"step": 1464
},
{
"epoch": 2.68,
"learning_rate": 5.799860063691609e-07,
"loss": 0.2175,
"step": 1465
},
{
"epoch": 2.68,
"learning_rate": 5.733649825433385e-07,
"loss": 0.2202,
"step": 1466
},
{
"epoch": 2.68,
"learning_rate": 5.667808526798358e-07,
"loss": 0.2238,
"step": 1467
},
{
"epoch": 2.68,
"learning_rate": 5.602336425476174e-07,
"loss": 0.2211,
"step": 1468
},
{
"epoch": 2.69,
"learning_rate": 5.537233777711526e-07,
"loss": 0.203,
"step": 1469
},
{
"epoch": 2.69,
"learning_rate": 5.472500838303141e-07,
"loss": 0.2197,
"step": 1470
},
{
"epoch": 2.69,
"learning_rate": 5.408137860602803e-07,
"loss": 0.2118,
"step": 1471
},
{
"epoch": 2.69,
"learning_rate": 5.344145096514308e-07,
"loss": 0.2388,
"step": 1472
},
{
"epoch": 2.69,
"learning_rate": 5.280522796492504e-07,
"loss": 0.2448,
"step": 1473
},
{
"epoch": 2.7,
"learning_rate": 5.217271209542384e-07,
"loss": 0.2383,
"step": 1474
},
{
"epoch": 2.7,
"learning_rate": 5.154390583217961e-07,
"loss": 0.2271,
"step": 1475
},
{
"epoch": 2.7,
"learning_rate": 5.091881163621426e-07,
"loss": 0.2254,
"step": 1476
},
{
"epoch": 2.7,
"learning_rate": 5.029743195402149e-07,
"loss": 0.2384,
"step": 1477
},
{
"epoch": 2.7,
"learning_rate": 4.96797692175568e-07,
"loss": 0.2375,
"step": 1478
},
{
"epoch": 2.7,
"learning_rate": 4.906582584422859e-07,
"loss": 0.21,
"step": 1479
},
{
"epoch": 2.71,
"learning_rate": 4.845560423688822e-07,
"loss": 0.2286,
"step": 1480
},
{
"epoch": 2.71,
"learning_rate": 4.784910678382093e-07,
"loss": 0.2467,
"step": 1481
},
{
"epoch": 2.71,
"learning_rate": 4.724633585873628e-07,
"loss": 0.2318,
"step": 1482
},
{
"epoch": 2.71,
"learning_rate": 4.6647293820759167e-07,
"loss": 0.2179,
"step": 1483
},
{
"epoch": 2.71,
"learning_rate": 4.6051983014420155e-07,
"loss": 0.2177,
"step": 1484
},
{
"epoch": 2.72,
"learning_rate": 4.546040576964628e-07,
"loss": 0.2291,
"step": 1485
},
{
"epoch": 2.72,
"learning_rate": 4.487256440175292e-07,
"loss": 0.2166,
"step": 1486
},
{
"epoch": 2.72,
"learning_rate": 4.428846121143326e-07,
"loss": 0.2256,
"step": 1487
},
{
"epoch": 2.72,
"learning_rate": 4.370809848475033e-07,
"loss": 0.2134,
"step": 1488
},
{
"epoch": 2.72,
"learning_rate": 4.313147849312749e-07,
"loss": 0.2329,
"step": 1489
},
{
"epoch": 2.72,
"learning_rate": 4.2558603493340066e-07,
"loss": 0.2207,
"step": 1490
},
{
"epoch": 2.73,
"learning_rate": 4.198947572750611e-07,
"loss": 0.2215,
"step": 1491
},
{
"epoch": 2.73,
"learning_rate": 4.142409742307774e-07,
"loss": 0.2353,
"step": 1492
},
{
"epoch": 2.73,
"learning_rate": 4.0862470792832565e-07,
"loss": 0.2102,
"step": 1493
},
{
"epoch": 2.73,
"learning_rate": 4.030459803486464e-07,
"loss": 0.2189,
"step": 1494
},
{
"epoch": 2.73,
"learning_rate": 3.975048133257631e-07,
"loss": 0.2178,
"step": 1495
},
{
"epoch": 2.74,
"learning_rate": 3.9200122854669674e-07,
"loss": 0.2272,
"step": 1496
},
{
"epoch": 2.74,
"learning_rate": 3.8653524755137063e-07,
"loss": 0.2198,
"step": 1497
},
{
"epoch": 2.74,
"learning_rate": 3.8110689173254443e-07,
"loss": 0.2215,
"step": 1498
},
{
"epoch": 2.74,
"learning_rate": 3.7571618233571584e-07,
"loss": 0.2324,
"step": 1499
},
{
"epoch": 2.74,
"learning_rate": 3.703631404590402e-07,
"loss": 0.2319,
"step": 1500
},
{
"epoch": 2.74,
"learning_rate": 3.6504778705325314e-07,
"loss": 0.2204,
"step": 1501
},
{
"epoch": 2.75,
"learning_rate": 3.5977014292158496e-07,
"loss": 0.2236,
"step": 1502
},
{
"epoch": 2.75,
"learning_rate": 3.545302287196772e-07,
"loss": 0.2181,
"step": 1503
},
{
"epoch": 2.75,
"learning_rate": 3.493280649555042e-07,
"loss": 0.2298,
"step": 1504
},
{
"epoch": 2.75,
"learning_rate": 3.441636719892938e-07,
"loss": 0.2221,
"step": 1505
},
{
"epoch": 2.75,
"learning_rate": 3.3903707003344775e-07,
"loss": 0.221,
"step": 1506
},
{
"epoch": 2.76,
"learning_rate": 3.339482791524562e-07,
"loss": 0.2164,
"step": 1507
},
{
"epoch": 2.76,
"learning_rate": 3.28897319262832e-07,
"loss": 0.2261,
"step": 1508
},
{
"epoch": 2.76,
"learning_rate": 3.2388421013301973e-07,
"loss": 0.2268,
"step": 1509
},
{
"epoch": 2.76,
"learning_rate": 3.1890897138332266e-07,
"loss": 0.2278,
"step": 1510
},
{
"epoch": 2.76,
"learning_rate": 3.139716224858336e-07,
"loss": 0.2172,
"step": 1511
},
{
"epoch": 2.76,
"learning_rate": 3.090721827643439e-07,
"loss": 0.2258,
"step": 1512
},
{
"epoch": 2.77,
"learning_rate": 3.042106713942816e-07,
"loss": 0.2308,
"step": 1513
},
{
"epoch": 2.77,
"learning_rate": 2.993871074026289e-07,
"loss": 0.231,
"step": 1514
},
{
"epoch": 2.77,
"learning_rate": 2.9460150966784786e-07,
"loss": 0.2189,
"step": 1515
},
{
"epoch": 2.77,
"learning_rate": 2.898538969198117e-07,
"loss": 0.2305,
"step": 1516
},
{
"epoch": 2.77,
"learning_rate": 2.85144287739727e-07,
"loss": 0.2188,
"step": 1517
},
{
"epoch": 2.78,
"learning_rate": 2.8047270056005937e-07,
"loss": 0.2247,
"step": 1518
},
{
"epoch": 2.78,
"learning_rate": 2.7583915366446554e-07,
"loss": 0.2252,
"step": 1519
},
{
"epoch": 2.78,
"learning_rate": 2.712436651877237e-07,
"loss": 0.2302,
"step": 1520
},
{
"epoch": 2.78,
"learning_rate": 2.666862531156533e-07,
"loss": 0.2167,
"step": 1521
},
{
"epoch": 2.78,
"learning_rate": 2.6216693528505197e-07,
"loss": 0.229,
"step": 1522
},
{
"epoch": 2.78,
"learning_rate": 2.5768572938362767e-07,
"loss": 0.2085,
"step": 1523
},
{
"epoch": 2.79,
"learning_rate": 2.5324265294992103e-07,
"loss": 0.2208,
"step": 1524
},
{
"epoch": 2.79,
"learning_rate": 2.488377233732431e-07,
"loss": 0.219,
"step": 1525
},
{
"epoch": 2.79,
"learning_rate": 2.4447095789360885e-07,
"loss": 0.2299,
"step": 1526
},
{
"epoch": 2.79,
"learning_rate": 2.401423736016628e-07,
"loss": 0.2214,
"step": 1527
},
{
"epoch": 2.79,
"learning_rate": 2.3585198743861782e-07,
"loss": 0.2281,
"step": 1528
},
{
"epoch": 2.8,
"learning_rate": 2.3159981619618633e-07,
"loss": 0.2118,
"step": 1529
},
{
"epoch": 2.8,
"learning_rate": 2.273858765165149e-07,
"loss": 0.2244,
"step": 1530
},
{
"epoch": 2.8,
"learning_rate": 2.2321018489211977e-07,
"loss": 0.2415,
"step": 1531
},
{
"epoch": 2.8,
"learning_rate": 2.190727576658225e-07,
"loss": 0.2179,
"step": 1532
},
{
"epoch": 2.8,
"learning_rate": 2.1497361103068548e-07,
"loss": 0.2175,
"step": 1533
},
{
"epoch": 2.81,
"learning_rate": 2.1091276102994662e-07,
"loss": 0.2237,
"step": 1534
},
{
"epoch": 2.81,
"learning_rate": 2.068902235569603e-07,
"loss": 0.2146,
"step": 1535
},
{
"epoch": 2.81,
"learning_rate": 2.0290601435513423e-07,
"loss": 0.2036,
"step": 1536
},
{
"epoch": 2.81,
"learning_rate": 1.9896014901786386e-07,
"loss": 0.2328,
"step": 1537
},
{
"epoch": 2.81,
"learning_rate": 1.9505264298847693e-07,
"loss": 0.2265,
"step": 1538
},
{
"epoch": 2.81,
"learning_rate": 1.9118351156017124e-07,
"loss": 0.2165,
"step": 1539
},
{
"epoch": 2.82,
"learning_rate": 1.8735276987595031e-07,
"loss": 0.2332,
"step": 1540
},
{
"epoch": 2.82,
"learning_rate": 1.8356043292857008e-07,
"loss": 0.2325,
"step": 1541
},
{
"epoch": 2.82,
"learning_rate": 1.7980651556048e-07,
"loss": 0.24,
"step": 1542
},
{
"epoch": 2.82,
"learning_rate": 1.760910324637599e-07,
"loss": 0.2246,
"step": 1543
},
{
"epoch": 2.82,
"learning_rate": 1.7241399818006765e-07,
"loss": 0.2169,
"step": 1544
},
{
"epoch": 2.83,
"learning_rate": 1.6877542710057814e-07,
"loss": 0.2167,
"step": 1545
},
{
"epoch": 2.83,
"learning_rate": 1.651753334659323e-07,
"loss": 0.2103,
"step": 1546
},
{
"epoch": 2.83,
"learning_rate": 1.6161373136617365e-07,
"loss": 0.224,
"step": 1547
},
{
"epoch": 2.83,
"learning_rate": 1.5809063474070075e-07,
"loss": 0.2212,
"step": 1548
},
{
"epoch": 2.83,
"learning_rate": 1.5460605737820823e-07,
"loss": 0.2103,
"step": 1549
},
{
"epoch": 2.83,
"learning_rate": 1.5116001291663463e-07,
"loss": 0.2193,
"step": 1550
},
{
"epoch": 2.84,
"learning_rate": 1.4775251484310803e-07,
"loss": 0.2057,
"step": 1551
},
{
"epoch": 2.84,
"learning_rate": 1.443835764938939e-07,
"loss": 0.202,
"step": 1552
},
{
"epoch": 2.84,
"learning_rate": 1.4105321105434056e-07,
"loss": 0.2245,
"step": 1553
},
{
"epoch": 2.84,
"learning_rate": 1.3776143155883492e-07,
"loss": 0.2147,
"step": 1554
},
{
"epoch": 2.84,
"learning_rate": 1.3450825089074137e-07,
"loss": 0.2134,
"step": 1555
},
{
"epoch": 2.85,
"learning_rate": 1.3129368178235845e-07,
"loss": 0.2199,
"step": 1556
},
{
"epoch": 2.85,
"learning_rate": 1.281177368148645e-07,
"loss": 0.2249,
"step": 1557
},
{
"epoch": 2.85,
"learning_rate": 1.2498042841827317e-07,
"loss": 0.2215,
"step": 1558
},
{
"epoch": 2.85,
"learning_rate": 1.2188176887138137e-07,
"loss": 0.2268,
"step": 1559
},
{
"epoch": 2.85,
"learning_rate": 1.1882177030172248e-07,
"loss": 0.225,
"step": 1560
},
{
"epoch": 2.85,
"learning_rate": 1.1580044468551876e-07,
"loss": 0.231,
"step": 1561
},
{
"epoch": 2.86,
"learning_rate": 1.128178038476324e-07,
"loss": 0.2231,
"step": 1562
},
{
"epoch": 2.86,
"learning_rate": 1.0987385946152451e-07,
"loss": 0.2284,
"step": 1563
},
{
"epoch": 2.86,
"learning_rate": 1.0696862304920175e-07,
"loss": 0.229,
"step": 1564
},
{
"epoch": 2.86,
"learning_rate": 1.0410210598118086e-07,
"loss": 0.2214,
"step": 1565
},
{
"epoch": 2.86,
"learning_rate": 1.0127431947643318e-07,
"loss": 0.2261,
"step": 1566
},
{
"epoch": 2.87,
"learning_rate": 9.848527460234902e-08,
"loss": 0.2347,
"step": 1567
},
{
"epoch": 2.87,
"learning_rate": 9.573498227469336e-08,
"loss": 0.2339,
"step": 1568
},
{
"epoch": 2.87,
"learning_rate": 9.302345325755801e-08,
"loss": 0.2262,
"step": 1569
},
{
"epoch": 2.87,
"learning_rate": 9.03506981633262e-08,
"loss": 0.2076,
"step": 1570
},
{
"epoch": 2.87,
"learning_rate": 8.771672745262583e-08,
"loss": 0.2345,
"step": 1571
},
{
"epoch": 2.87,
"learning_rate": 8.512155143429068e-08,
"loss": 0.2214,
"step": 1572
},
{
"epoch": 2.88,
"learning_rate": 8.256518026532046e-08,
"loss": 0.218,
"step": 1573
},
{
"epoch": 2.88,
"learning_rate": 8.004762395083965e-08,
"loss": 0.2267,
"step": 1574
},
{
"epoch": 2.88,
"learning_rate": 7.756889234405984e-08,
"loss": 0.2094,
"step": 1575
},
{
"epoch": 2.88,
"learning_rate": 7.512899514624084e-08,
"loss": 0.2129,
"step": 1576
},
{
"epoch": 2.88,
"learning_rate": 7.272794190665178e-08,
"loss": 0.2206,
"step": 1577
},
{
"epoch": 2.89,
"learning_rate": 7.036574202253343e-08,
"loss": 0.2357,
"step": 1578
},
{
"epoch": 2.89,
"learning_rate": 6.804240473906486e-08,
"loss": 0.2163,
"step": 1579
},
{
"epoch": 2.89,
"learning_rate": 6.575793914932127e-08,
"loss": 0.2225,
"step": 1580
},
{
"epoch": 2.89,
"learning_rate": 6.351235419424506e-08,
"loss": 0.2033,
"step": 1581
},
{
"epoch": 2.89,
"learning_rate": 6.130565866260485e-08,
"loss": 0.2257,
"step": 1582
},
{
"epoch": 2.89,
"learning_rate": 5.913786119096654e-08,
"loss": 0.2178,
"step": 1583
},
{
"epoch": 2.9,
"learning_rate": 5.7008970263654485e-08,
"loss": 0.2265,
"step": 1584
},
{
"epoch": 2.9,
"learning_rate": 5.491899421272262e-08,
"loss": 0.2275,
"step": 1585
},
{
"epoch": 2.9,
"learning_rate": 5.2867941217917826e-08,
"loss": 0.2143,
"step": 1586
},
{
"epoch": 2.9,
"learning_rate": 5.085581930665107e-08,
"loss": 0.2268,
"step": 1587
},
{
"epoch": 2.9,
"learning_rate": 4.8882636353965174e-08,
"loss": 0.2317,
"step": 1588
},
{
"epoch": 2.91,
"learning_rate": 4.6948400082502676e-08,
"loss": 0.223,
"step": 1589
},
{
"epoch": 2.91,
"learning_rate": 4.505311806247803e-08,
"loss": 0.2234,
"step": 1590
},
{
"epoch": 2.91,
"learning_rate": 4.319679771164431e-08,
"loss": 0.2162,
"step": 1591
},
{
"epoch": 2.91,
"learning_rate": 4.1379446295268796e-08,
"loss": 0.2307,
"step": 1592
},
{
"epoch": 2.91,
"learning_rate": 3.960107092609966e-08,
"loss": 0.2119,
"step": 1593
},
{
"epoch": 2.91,
"learning_rate": 3.786167856434375e-08,
"loss": 0.2124,
"step": 1594
},
{
"epoch": 2.92,
"learning_rate": 3.6161276017634416e-08,
"loss": 0.2148,
"step": 1595
},
{
"epoch": 2.92,
"learning_rate": 3.449986994100596e-08,
"loss": 0.2364,
"step": 1596
},
{
"epoch": 2.92,
"learning_rate": 3.287746683687032e-08,
"loss": 0.2057,
"step": 1597
},
{
"epoch": 2.92,
"learning_rate": 3.1294073054987105e-08,
"loss": 0.2212,
"step": 1598
},
{
"epoch": 2.92,
"learning_rate": 2.9749694792442498e-08,
"loss": 0.2166,
"step": 1599
},
{
"epoch": 2.93,
"learning_rate": 2.8244338093625923e-08,
"loss": 0.2194,
"step": 1600
},
{
"epoch": 2.93,
"learning_rate": 2.67780088502001e-08,
"loss": 0.2327,
"step": 1601
},
{
"epoch": 2.93,
"learning_rate": 2.5350712801084365e-08,
"loss": 0.2194,
"step": 1602
},
{
"epoch": 2.93,
"learning_rate": 2.3962455532430262e-08,
"loss": 0.2273,
"step": 1603
},
{
"epoch": 2.93,
"learning_rate": 2.261324247759933e-08,
"loss": 0.2118,
"step": 1604
},
{
"epoch": 2.93,
"learning_rate": 2.1303078917139787e-08,
"loss": 0.2252,
"step": 1605
},
{
"epoch": 2.94,
"learning_rate": 2.0031969978770993e-08,
"loss": 0.2274,
"step": 1606
},
{
"epoch": 2.94,
"learning_rate": 1.8799920637359027e-08,
"loss": 0.2256,
"step": 1607
},
{
"epoch": 2.94,
"learning_rate": 1.760693571489669e-08,
"loss": 0.2135,
"step": 1608
},
{
"epoch": 2.94,
"learning_rate": 1.6453019880490194e-08,
"loss": 0.2098,
"step": 1609
},
{
"epoch": 2.94,
"learning_rate": 1.533817765033252e-08,
"loss": 0.2179,
"step": 1610
},
{
"epoch": 2.95,
"learning_rate": 1.4262413387695629e-08,
"loss": 0.2273,
"step": 1611
},
{
"epoch": 2.95,
"learning_rate": 1.3225731302903833e-08,
"loss": 0.2448,
"step": 1612
},
{
"epoch": 2.95,
"learning_rate": 1.2228135453324907e-08,
"loss": 0.2242,
"step": 1613
},
{
"epoch": 2.95,
"learning_rate": 1.1269629743346777e-08,
"loss": 0.2096,
"step": 1614
},
{
"epoch": 2.95,
"learning_rate": 1.0350217924370853e-08,
"loss": 0.2198,
"step": 1615
},
{
"epoch": 2.95,
"learning_rate": 9.469903594790941e-09,
"loss": 0.2105,
"step": 1616
},
{
"epoch": 2.96,
"learning_rate": 8.628690199979917e-09,
"loss": 0.2255,
"step": 1617
},
{
"epoch": 2.96,
"learning_rate": 7.826581032279734e-09,
"loss": 0.2211,
"step": 1618
},
{
"epoch": 2.96,
"learning_rate": 7.063579230983664e-09,
"loss": 0.2208,
"step": 1619
},
{
"epoch": 2.96,
"learning_rate": 6.3396877823296286e-09,
"loss": 0.2099,
"step": 1620
},
{
"epoch": 2.96,
"learning_rate": 5.654909519483554e-09,
"loss": 0.2167,
"step": 1621
},
{
"epoch": 2.97,
"learning_rate": 5.0092471225293705e-09,
"loss": 0.2323,
"step": 1622
},
{
"epoch": 2.97,
"learning_rate": 4.402703118461249e-09,
"loss": 0.225,
"step": 1623
},
{
"epoch": 2.97,
"learning_rate": 3.835279881171383e-09,
"loss": 0.2259,
"step": 1624
},
{
"epoch": 2.97,
"learning_rate": 3.3069796314399993e-09,
"loss": 0.2149,
"step": 1625
},
{
"epoch": 2.97,
"learning_rate": 2.8178044369286948e-09,
"loss": 0.2228,
"step": 1626
},
{
"epoch": 2.98,
"learning_rate": 2.367756212171557e-09,
"loss": 0.2286,
"step": 1627
},
{
"epoch": 2.98,
"learning_rate": 1.9568367185673897e-09,
"loss": 0.2264,
"step": 1628
},
{
"epoch": 2.98,
"learning_rate": 1.5850475643719443e-09,
"loss": 0.2208,
"step": 1629
},
{
"epoch": 2.98,
"learning_rate": 1.2523902046934767e-09,
"loss": 0.2182,
"step": 1630
},
{
"epoch": 2.98,
"learning_rate": 9.58865941487197e-10,
"loss": 0.2207,
"step": 1631
},
{
"epoch": 2.98,
"learning_rate": 7.044759235486087e-10,
"loss": 0.2339,
"step": 1632
},
{
"epoch": 2.99,
"learning_rate": 4.892211465079566e-10,
"loss": 0.2241,
"step": 1633
},
{
"epoch": 2.99,
"learning_rate": 3.1310245283022735e-10,
"loss": 0.2129,
"step": 1634
},
{
"epoch": 2.99,
"learning_rate": 1.761205318095982e-10,
"loss": 0.1959,
"step": 1635
},
{
"epoch": 2.99,
"learning_rate": 7.827591956388603e-11,
"loss": 0.2139,
"step": 1636
},
{
"epoch": 2.99,
"learning_rate": 1.956899903787779e-11,
"loss": 0.2199,
"step": 1637
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.22,
"step": 1638
},
{
"epoch": 3.0,
"step": 1638,
"total_flos": 4.3401313435024097e+18,
"train_loss": 0.3987319528856411,
"train_runtime": 82484.0165,
"train_samples_per_second": 2.546,
"train_steps_per_second": 0.02
}
],
"logging_steps": 1.0,
"max_steps": 1638,
"num_train_epochs": 3,
"save_steps": 2000,
"total_flos": 4.3401313435024097e+18,
"trial_name": null,
"trial_params": null
}