Mistral-Small-Sisyphus-24b-2503 / trainer_state.json
Fizzarolli's picture
Upload folder using huggingface_hub
732dc12 verified
raw
history blame
190 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001669449081803005,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000002e-07,
"loss": 2.0754,
"step": 1
},
{
"epoch": 0.00333889816360601,
"grad_norm": 0.0,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.8508,
"step": 2
},
{
"epoch": 0.005008347245409015,
"grad_norm": 0.0,
"learning_rate": 6.000000000000001e-07,
"loss": 1.7493,
"step": 3
},
{
"epoch": 0.00667779632721202,
"grad_norm": 0.0,
"learning_rate": 8.000000000000001e-07,
"loss": 1.7768,
"step": 4
},
{
"epoch": 0.008347245409015025,
"grad_norm": 0.0,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.8859,
"step": 5
},
{
"epoch": 0.01001669449081803,
"grad_norm": 0.0,
"learning_rate": 1.2000000000000002e-06,
"loss": 2.1114,
"step": 6
},
{
"epoch": 0.011686143572621035,
"grad_norm": 0.0,
"learning_rate": 1.4000000000000001e-06,
"loss": 1.8268,
"step": 7
},
{
"epoch": 0.01335559265442404,
"grad_norm": 0.0,
"learning_rate": 1.6000000000000001e-06,
"loss": 1.427,
"step": 8
},
{
"epoch": 0.015025041736227046,
"grad_norm": 0.0,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.8472,
"step": 9
},
{
"epoch": 0.01669449081803005,
"grad_norm": 0.0,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.7571,
"step": 10
},
{
"epoch": 0.018363939899833055,
"grad_norm": 0.0,
"learning_rate": 2.2e-06,
"loss": 1.8135,
"step": 11
},
{
"epoch": 0.02003338898163606,
"grad_norm": 0.0,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.6594,
"step": 12
},
{
"epoch": 0.021702838063439065,
"grad_norm": 0.0,
"learning_rate": 2.6e-06,
"loss": 1.8615,
"step": 13
},
{
"epoch": 0.02337228714524207,
"grad_norm": 0.0,
"learning_rate": 2.8000000000000003e-06,
"loss": 1.9803,
"step": 14
},
{
"epoch": 0.025041736227045076,
"grad_norm": 0.0,
"learning_rate": 3e-06,
"loss": 1.9604,
"step": 15
},
{
"epoch": 0.02671118530884808,
"grad_norm": 0.0,
"learning_rate": 3.2000000000000003e-06,
"loss": 1.8476,
"step": 16
},
{
"epoch": 0.028380634390651086,
"grad_norm": 0.0,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.71,
"step": 17
},
{
"epoch": 0.03005008347245409,
"grad_norm": 0.0,
"learning_rate": 3.6000000000000003e-06,
"loss": 1.9865,
"step": 18
},
{
"epoch": 0.03171953255425709,
"grad_norm": 0.0,
"learning_rate": 3.8000000000000005e-06,
"loss": 1.9356,
"step": 19
},
{
"epoch": 0.0333889816360601,
"grad_norm": 0.0,
"learning_rate": 4.000000000000001e-06,
"loss": 1.6531,
"step": 20
},
{
"epoch": 0.035058430717863104,
"grad_norm": 0.0,
"learning_rate": 4.2000000000000004e-06,
"loss": 1.6921,
"step": 21
},
{
"epoch": 0.03672787979966611,
"grad_norm": 0.0,
"learning_rate": 4.4e-06,
"loss": 1.7874,
"step": 22
},
{
"epoch": 0.038397328881469114,
"grad_norm": 0.0,
"learning_rate": 4.600000000000001e-06,
"loss": 1.8481,
"step": 23
},
{
"epoch": 0.04006677796327212,
"grad_norm": 0.0,
"learning_rate": 4.800000000000001e-06,
"loss": 1.6141,
"step": 24
},
{
"epoch": 0.041736227045075125,
"grad_norm": 0.0,
"learning_rate": 5e-06,
"loss": 1.621,
"step": 25
},
{
"epoch": 0.04340567612687813,
"grad_norm": 0.0,
"learning_rate": 5.2e-06,
"loss": 1.5873,
"step": 26
},
{
"epoch": 0.045075125208681135,
"grad_norm": 0.0,
"learning_rate": 5.400000000000001e-06,
"loss": 1.6264,
"step": 27
},
{
"epoch": 0.04674457429048414,
"grad_norm": 0.0,
"learning_rate": 5.600000000000001e-06,
"loss": 1.7444,
"step": 28
},
{
"epoch": 0.048414023372287146,
"grad_norm": 0.0,
"learning_rate": 5.8e-06,
"loss": 1.9601,
"step": 29
},
{
"epoch": 0.05008347245409015,
"grad_norm": 0.0,
"learning_rate": 6e-06,
"loss": 1.7666,
"step": 30
},
{
"epoch": 0.05175292153589316,
"grad_norm": 0.0,
"learning_rate": 6.200000000000001e-06,
"loss": 1.973,
"step": 31
},
{
"epoch": 0.05342237061769616,
"grad_norm": 0.0,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.6661,
"step": 32
},
{
"epoch": 0.05509181969949917,
"grad_norm": 0.0,
"learning_rate": 6.600000000000001e-06,
"loss": 1.9688,
"step": 33
},
{
"epoch": 0.05676126878130217,
"grad_norm": 0.0,
"learning_rate": 6.800000000000001e-06,
"loss": 1.9153,
"step": 34
},
{
"epoch": 0.05843071786310518,
"grad_norm": 0.0,
"learning_rate": 7e-06,
"loss": 1.804,
"step": 35
},
{
"epoch": 0.06010016694490818,
"grad_norm": 0.0,
"learning_rate": 7.2000000000000005e-06,
"loss": 1.7318,
"step": 36
},
{
"epoch": 0.06176961602671119,
"grad_norm": 0.0,
"learning_rate": 7.4e-06,
"loss": 2.0672,
"step": 37
},
{
"epoch": 0.06343906510851419,
"grad_norm": 0.0,
"learning_rate": 7.600000000000001e-06,
"loss": 1.8864,
"step": 38
},
{
"epoch": 0.0651085141903172,
"grad_norm": 0.0,
"learning_rate": 7.800000000000002e-06,
"loss": 1.7625,
"step": 39
},
{
"epoch": 0.0667779632721202,
"grad_norm": 0.0,
"learning_rate": 8.000000000000001e-06,
"loss": 1.7466,
"step": 40
},
{
"epoch": 0.06844741235392321,
"grad_norm": 0.0,
"learning_rate": 8.2e-06,
"loss": 1.5579,
"step": 41
},
{
"epoch": 0.07011686143572621,
"grad_norm": 0.0,
"learning_rate": 8.400000000000001e-06,
"loss": 1.83,
"step": 42
},
{
"epoch": 0.07178631051752922,
"grad_norm": 0.0,
"learning_rate": 8.6e-06,
"loss": 1.9467,
"step": 43
},
{
"epoch": 0.07345575959933222,
"grad_norm": 0.0,
"learning_rate": 8.8e-06,
"loss": 1.9879,
"step": 44
},
{
"epoch": 0.07512520868113523,
"grad_norm": 0.0,
"learning_rate": 9e-06,
"loss": 1.9357,
"step": 45
},
{
"epoch": 0.07679465776293823,
"grad_norm": 0.0,
"learning_rate": 9.200000000000002e-06,
"loss": 1.7311,
"step": 46
},
{
"epoch": 0.07846410684474124,
"grad_norm": 0.0,
"learning_rate": 9.4e-06,
"loss": 1.658,
"step": 47
},
{
"epoch": 0.08013355592654424,
"grad_norm": 0.0,
"learning_rate": 9.600000000000001e-06,
"loss": 1.6306,
"step": 48
},
{
"epoch": 0.08180300500834725,
"grad_norm": 0.0,
"learning_rate": 9.800000000000001e-06,
"loss": 1.783,
"step": 49
},
{
"epoch": 0.08347245409015025,
"grad_norm": 0.0,
"learning_rate": 1e-05,
"loss": 1.8001,
"step": 50
},
{
"epoch": 0.08514190317195326,
"grad_norm": 0.0,
"learning_rate": 9.999981277850585e-06,
"loss": 1.6999,
"step": 51
},
{
"epoch": 0.08681135225375626,
"grad_norm": 0.0,
"learning_rate": 9.999925111542544e-06,
"loss": 1.7434,
"step": 52
},
{
"epoch": 0.08848080133555926,
"grad_norm": 0.0,
"learning_rate": 9.999831501496497e-06,
"loss": 1.8493,
"step": 53
},
{
"epoch": 0.09015025041736227,
"grad_norm": 0.0,
"learning_rate": 9.999700448413483e-06,
"loss": 2.01,
"step": 54
},
{
"epoch": 0.09181969949916527,
"grad_norm": 0.0,
"learning_rate": 9.999531953274934e-06,
"loss": 1.8475,
"step": 55
},
{
"epoch": 0.09348914858096828,
"grad_norm": 0.0,
"learning_rate": 9.999326017342688e-06,
"loss": 1.5294,
"step": 56
},
{
"epoch": 0.09515859766277128,
"grad_norm": 0.0,
"learning_rate": 9.999082642158972e-06,
"loss": 1.6968,
"step": 57
},
{
"epoch": 0.09682804674457429,
"grad_norm": 0.0,
"learning_rate": 9.998801829546387e-06,
"loss": 1.6576,
"step": 58
},
{
"epoch": 0.09849749582637729,
"grad_norm": 0.0,
"learning_rate": 9.9984835816079e-06,
"loss": 1.9567,
"step": 59
},
{
"epoch": 0.1001669449081803,
"grad_norm": 0.0,
"learning_rate": 9.998127900726825e-06,
"loss": 1.5722,
"step": 60
},
{
"epoch": 0.1018363939899833,
"grad_norm": 0.0,
"learning_rate": 9.997734789566809e-06,
"loss": 1.5099,
"step": 61
},
{
"epoch": 0.10350584307178631,
"grad_norm": 0.0,
"learning_rate": 9.997304251071802e-06,
"loss": 1.6942,
"step": 62
},
{
"epoch": 0.10517529215358931,
"grad_norm": 0.0,
"learning_rate": 9.996836288466046e-06,
"loss": 1.4786,
"step": 63
},
{
"epoch": 0.10684474123539232,
"grad_norm": 0.0,
"learning_rate": 9.99633090525405e-06,
"loss": 1.7518,
"step": 64
},
{
"epoch": 0.10851419031719532,
"grad_norm": 0.0,
"learning_rate": 9.99578810522056e-06,
"loss": 1.8813,
"step": 65
},
{
"epoch": 0.11018363939899833,
"grad_norm": 0.0,
"learning_rate": 9.995207892430525e-06,
"loss": 1.6325,
"step": 66
},
{
"epoch": 0.11185308848080133,
"grad_norm": 0.0,
"learning_rate": 9.994590271229077e-06,
"loss": 1.6373,
"step": 67
},
{
"epoch": 0.11352253756260434,
"grad_norm": 0.0,
"learning_rate": 9.9939352462415e-06,
"loss": 1.7568,
"step": 68
},
{
"epoch": 0.11519198664440734,
"grad_norm": 0.0,
"learning_rate": 9.993242822373178e-06,
"loss": 1.8238,
"step": 69
},
{
"epoch": 0.11686143572621036,
"grad_norm": 0.0,
"learning_rate": 9.99251300480958e-06,
"loss": 1.8395,
"step": 70
},
{
"epoch": 0.11853088480801335,
"grad_norm": 0.0,
"learning_rate": 9.991745799016206e-06,
"loss": 1.4879,
"step": 71
},
{
"epoch": 0.12020033388981637,
"grad_norm": 0.0,
"learning_rate": 9.990941210738553e-06,
"loss": 1.9031,
"step": 72
},
{
"epoch": 0.12186978297161936,
"grad_norm": 0.0,
"learning_rate": 9.990099246002071e-06,
"loss": 1.7939,
"step": 73
},
{
"epoch": 0.12353923205342238,
"grad_norm": 0.0,
"learning_rate": 9.989219911112114e-06,
"loss": 1.7135,
"step": 74
},
{
"epoch": 0.12520868113522537,
"grad_norm": 0.0,
"learning_rate": 9.988303212653898e-06,
"loss": 1.7721,
"step": 75
},
{
"epoch": 0.12687813021702837,
"grad_norm": 0.0,
"learning_rate": 9.98734915749245e-06,
"loss": 1.7121,
"step": 76
},
{
"epoch": 0.1285475792988314,
"grad_norm": 0.0,
"learning_rate": 9.986357752772555e-06,
"loss": 1.6391,
"step": 77
},
{
"epoch": 0.1302170283806344,
"grad_norm": 0.0,
"learning_rate": 9.985329005918702e-06,
"loss": 1.7974,
"step": 78
},
{
"epoch": 0.1318864774624374,
"grad_norm": 0.0,
"learning_rate": 9.984262924635036e-06,
"loss": 1.5529,
"step": 79
},
{
"epoch": 0.1335559265442404,
"grad_norm": 0.0,
"learning_rate": 9.983159516905287e-06,
"loss": 1.9062,
"step": 80
},
{
"epoch": 0.1352253756260434,
"grad_norm": 0.0,
"learning_rate": 9.982018790992722e-06,
"loss": 1.7188,
"step": 81
},
{
"epoch": 0.13689482470784642,
"grad_norm": 0.0,
"learning_rate": 9.980840755440075e-06,
"loss": 1.6821,
"step": 82
},
{
"epoch": 0.13856427378964942,
"grad_norm": 0.0,
"learning_rate": 9.979625419069495e-06,
"loss": 2.0225,
"step": 83
},
{
"epoch": 0.14023372287145242,
"grad_norm": 0.0,
"learning_rate": 9.978372790982457e-06,
"loss": 1.7347,
"step": 84
},
{
"epoch": 0.1419031719532554,
"grad_norm": 0.0,
"learning_rate": 9.977082880559725e-06,
"loss": 1.7869,
"step": 85
},
{
"epoch": 0.14357262103505844,
"grad_norm": 0.0,
"learning_rate": 9.975755697461254e-06,
"loss": 1.7912,
"step": 86
},
{
"epoch": 0.14524207011686144,
"grad_norm": 0.0,
"learning_rate": 9.974391251626132e-06,
"loss": 1.6523,
"step": 87
},
{
"epoch": 0.14691151919866444,
"grad_norm": 0.0,
"learning_rate": 9.972989553272501e-06,
"loss": 1.7847,
"step": 88
},
{
"epoch": 0.14858096828046743,
"grad_norm": 0.0,
"learning_rate": 9.971550612897487e-06,
"loss": 1.5771,
"step": 89
},
{
"epoch": 0.15025041736227046,
"grad_norm": 0.0,
"learning_rate": 9.970074441277111e-06,
"loss": 1.721,
"step": 90
},
{
"epoch": 0.15191986644407346,
"grad_norm": 0.0,
"learning_rate": 9.968561049466214e-06,
"loss": 1.7414,
"step": 91
},
{
"epoch": 0.15358931552587646,
"grad_norm": 0.0,
"learning_rate": 9.967010448798376e-06,
"loss": 1.6912,
"step": 92
},
{
"epoch": 0.15525876460767946,
"grad_norm": 0.0,
"learning_rate": 9.965422650885829e-06,
"loss": 1.5043,
"step": 93
},
{
"epoch": 0.15692821368948248,
"grad_norm": 0.0,
"learning_rate": 9.963797667619368e-06,
"loss": 1.654,
"step": 94
},
{
"epoch": 0.15859766277128548,
"grad_norm": 0.0,
"learning_rate": 9.962135511168263e-06,
"loss": 1.6316,
"step": 95
},
{
"epoch": 0.16026711185308848,
"grad_norm": 0.0,
"learning_rate": 9.960436193980175e-06,
"loss": 1.5251,
"step": 96
},
{
"epoch": 0.16193656093489148,
"grad_norm": 0.0,
"learning_rate": 9.958699728781046e-06,
"loss": 1.919,
"step": 97
},
{
"epoch": 0.1636060100166945,
"grad_norm": 0.0,
"learning_rate": 9.956926128575026e-06,
"loss": 1.8145,
"step": 98
},
{
"epoch": 0.1652754590984975,
"grad_norm": 0.0,
"learning_rate": 9.955115406644357e-06,
"loss": 1.7591,
"step": 99
},
{
"epoch": 0.1669449081803005,
"grad_norm": 0.0,
"learning_rate": 9.953267576549279e-06,
"loss": 1.7051,
"step": 100
},
{
"epoch": 0.1686143572621035,
"grad_norm": 0.0,
"learning_rate": 9.951382652127935e-06,
"loss": 1.6331,
"step": 101
},
{
"epoch": 0.17028380634390652,
"grad_norm": 0.0,
"learning_rate": 9.949460647496258e-06,
"loss": 1.8922,
"step": 102
},
{
"epoch": 0.17195325542570952,
"grad_norm": 0.0,
"learning_rate": 9.947501577047874e-06,
"loss": 1.9153,
"step": 103
},
{
"epoch": 0.17362270450751252,
"grad_norm": 0.0,
"learning_rate": 9.945505455453983e-06,
"loss": 1.7728,
"step": 104
},
{
"epoch": 0.17529215358931552,
"grad_norm": 0.0,
"learning_rate": 9.943472297663262e-06,
"loss": 1.615,
"step": 105
},
{
"epoch": 0.17696160267111852,
"grad_norm": 0.0,
"learning_rate": 9.941402118901743e-06,
"loss": 1.6637,
"step": 106
},
{
"epoch": 0.17863105175292154,
"grad_norm": 0.0,
"learning_rate": 9.939294934672707e-06,
"loss": 1.6815,
"step": 107
},
{
"epoch": 0.18030050083472454,
"grad_norm": 0.0,
"learning_rate": 9.93715076075656e-06,
"loss": 1.7668,
"step": 108
},
{
"epoch": 0.18196994991652754,
"grad_norm": 0.0,
"learning_rate": 9.934969613210718e-06,
"loss": 1.4658,
"step": 109
},
{
"epoch": 0.18363939899833054,
"grad_norm": 0.0,
"learning_rate": 9.932751508369492e-06,
"loss": 1.7838,
"step": 110
},
{
"epoch": 0.18530884808013356,
"grad_norm": 0.0,
"learning_rate": 9.930496462843954e-06,
"loss": 1.666,
"step": 111
},
{
"epoch": 0.18697829716193656,
"grad_norm": 0.0,
"learning_rate": 9.92820449352183e-06,
"loss": 1.7945,
"step": 112
},
{
"epoch": 0.18864774624373956,
"grad_norm": 0.0,
"learning_rate": 9.92587561756735e-06,
"loss": 1.762,
"step": 113
},
{
"epoch": 0.19031719532554256,
"grad_norm": 0.0,
"learning_rate": 9.923509852421144e-06,
"loss": 1.5722,
"step": 114
},
{
"epoch": 0.19198664440734559,
"grad_norm": 0.0,
"learning_rate": 9.921107215800095e-06,
"loss": 1.7682,
"step": 115
},
{
"epoch": 0.19365609348914858,
"grad_norm": 0.0,
"learning_rate": 9.91866772569721e-06,
"loss": 1.8919,
"step": 116
},
{
"epoch": 0.19532554257095158,
"grad_norm": 0.0,
"learning_rate": 9.91619140038149e-06,
"loss": 1.9279,
"step": 117
},
{
"epoch": 0.19699499165275458,
"grad_norm": 0.0,
"learning_rate": 9.913678258397785e-06,
"loss": 1.784,
"step": 118
},
{
"epoch": 0.1986644407345576,
"grad_norm": 0.0,
"learning_rate": 9.911128318566668e-06,
"loss": 1.978,
"step": 119
},
{
"epoch": 0.2003338898163606,
"grad_norm": 0.0,
"learning_rate": 9.908541599984276e-06,
"loss": 1.6277,
"step": 120
},
{
"epoch": 0.2020033388981636,
"grad_norm": 0.0,
"learning_rate": 9.905918122022183e-06,
"loss": 1.55,
"step": 121
},
{
"epoch": 0.2036727879799666,
"grad_norm": 0.0,
"learning_rate": 9.90325790432725e-06,
"loss": 1.6347,
"step": 122
},
{
"epoch": 0.20534223706176963,
"grad_norm": 0.0,
"learning_rate": 9.90056096682147e-06,
"loss": 1.786,
"step": 123
},
{
"epoch": 0.20701168614357263,
"grad_norm": 0.0,
"learning_rate": 9.897827329701834e-06,
"loss": 1.7375,
"step": 124
},
{
"epoch": 0.20868113522537562,
"grad_norm": 0.0,
"learning_rate": 9.895057013440163e-06,
"loss": 1.6394,
"step": 125
},
{
"epoch": 0.21035058430717862,
"grad_norm": 0.0,
"learning_rate": 9.892250038782972e-06,
"loss": 1.6817,
"step": 126
},
{
"epoch": 0.21202003338898165,
"grad_norm": 0.0,
"learning_rate": 9.889406426751296e-06,
"loss": 1.9121,
"step": 127
},
{
"epoch": 0.21368948247078465,
"grad_norm": 0.0,
"learning_rate": 9.88652619864055e-06,
"loss": 1.4702,
"step": 128
},
{
"epoch": 0.21535893155258765,
"grad_norm": 0.0,
"learning_rate": 9.883609376020356e-06,
"loss": 1.6333,
"step": 129
},
{
"epoch": 0.21702838063439064,
"grad_norm": 0.0,
"learning_rate": 9.880655980734391e-06,
"loss": 1.64,
"step": 130
},
{
"epoch": 0.21869782971619364,
"grad_norm": 0.0,
"learning_rate": 9.877666034900216e-06,
"loss": 1.6464,
"step": 131
},
{
"epoch": 0.22036727879799667,
"grad_norm": 0.0,
"learning_rate": 9.874639560909118e-06,
"loss": 1.7207,
"step": 132
},
{
"epoch": 0.22203672787979967,
"grad_norm": 0.0,
"learning_rate": 9.871576581425937e-06,
"loss": 1.8864,
"step": 133
},
{
"epoch": 0.22370617696160267,
"grad_norm": 0.0,
"learning_rate": 9.868477119388897e-06,
"loss": 1.7744,
"step": 134
},
{
"epoch": 0.22537562604340566,
"grad_norm": 0.0,
"learning_rate": 9.86534119800943e-06,
"loss": 1.592,
"step": 135
},
{
"epoch": 0.2270450751252087,
"grad_norm": 0.0,
"learning_rate": 9.862168840772018e-06,
"loss": 1.7051,
"step": 136
},
{
"epoch": 0.2287145242070117,
"grad_norm": 0.0,
"learning_rate": 9.858960071433994e-06,
"loss": 1.7263,
"step": 137
},
{
"epoch": 0.2303839732888147,
"grad_norm": 0.0,
"learning_rate": 9.855714914025386e-06,
"loss": 1.6824,
"step": 138
},
{
"epoch": 0.23205342237061768,
"grad_norm": 0.0,
"learning_rate": 9.852433392848718e-06,
"loss": 1.8981,
"step": 139
},
{
"epoch": 0.2337228714524207,
"grad_norm": 0.0,
"learning_rate": 9.849115532478848e-06,
"loss": 1.4935,
"step": 140
},
{
"epoch": 0.2353923205342237,
"grad_norm": 0.0,
"learning_rate": 9.84576135776276e-06,
"loss": 1.3096,
"step": 141
},
{
"epoch": 0.2370617696160267,
"grad_norm": 0.0,
"learning_rate": 9.842370893819404e-06,
"loss": 1.963,
"step": 142
},
{
"epoch": 0.2387312186978297,
"grad_norm": 0.0,
"learning_rate": 9.838944166039486e-06,
"loss": 1.7601,
"step": 143
},
{
"epoch": 0.24040066777963273,
"grad_norm": 0.0,
"learning_rate": 9.83548120008529e-06,
"loss": 1.694,
"step": 144
},
{
"epoch": 0.24207011686143573,
"grad_norm": 0.0,
"learning_rate": 9.831982021890483e-06,
"loss": 1.697,
"step": 145
},
{
"epoch": 0.24373956594323873,
"grad_norm": 0.0,
"learning_rate": 9.828446657659919e-06,
"loss": 1.3733,
"step": 146
},
{
"epoch": 0.24540901502504173,
"grad_norm": 0.0,
"learning_rate": 9.824875133869447e-06,
"loss": 1.9497,
"step": 147
},
{
"epoch": 0.24707846410684475,
"grad_norm": 0.0,
"learning_rate": 9.821267477265705e-06,
"loss": 1.8237,
"step": 148
},
{
"epoch": 0.24874791318864775,
"grad_norm": 0.0,
"learning_rate": 9.817623714865931e-06,
"loss": 1.782,
"step": 149
},
{
"epoch": 0.25041736227045075,
"grad_norm": 0.0,
"learning_rate": 9.813943873957748e-06,
"loss": 1.7703,
"step": 150
},
{
"epoch": 0.25208681135225375,
"grad_norm": 0.0,
"learning_rate": 9.810227982098968e-06,
"loss": 1.7999,
"step": 151
},
{
"epoch": 0.25375626043405675,
"grad_norm": 0.0,
"learning_rate": 9.806476067117384e-06,
"loss": 1.5728,
"step": 152
},
{
"epoch": 0.25542570951585974,
"grad_norm": 0.0,
"learning_rate": 9.802688157110564e-06,
"loss": 1.5857,
"step": 153
},
{
"epoch": 0.2570951585976628,
"grad_norm": 0.0,
"learning_rate": 9.798864280445633e-06,
"loss": 1.6758,
"step": 154
},
{
"epoch": 0.2587646076794658,
"grad_norm": 0.0,
"learning_rate": 9.795004465759067e-06,
"loss": 1.5893,
"step": 155
},
{
"epoch": 0.2604340567612688,
"grad_norm": 0.0,
"learning_rate": 9.791108741956476e-06,
"loss": 1.8483,
"step": 156
},
{
"epoch": 0.2621035058430718,
"grad_norm": 0.0,
"learning_rate": 9.787177138212391e-06,
"loss": 1.9552,
"step": 157
},
{
"epoch": 0.2637729549248748,
"grad_norm": 0.0,
"learning_rate": 9.78320968397004e-06,
"loss": 1.7145,
"step": 158
},
{
"epoch": 0.2654424040066778,
"grad_norm": 0.0,
"learning_rate": 9.779206408941131e-06,
"loss": 1.6563,
"step": 159
},
{
"epoch": 0.2671118530884808,
"grad_norm": 0.0,
"learning_rate": 9.77516734310563e-06,
"loss": 1.7699,
"step": 160
},
{
"epoch": 0.2687813021702838,
"grad_norm": 0.0,
"learning_rate": 9.771092516711538e-06,
"loss": 1.6084,
"step": 161
},
{
"epoch": 0.2704507512520868,
"grad_norm": 0.0,
"learning_rate": 9.766981960274653e-06,
"loss": 1.6172,
"step": 162
},
{
"epoch": 0.27212020033388984,
"grad_norm": 0.0,
"learning_rate": 9.76283570457836e-06,
"loss": 1.7309,
"step": 163
},
{
"epoch": 0.27378964941569284,
"grad_norm": 0.0,
"learning_rate": 9.758653780673381e-06,
"loss": 1.8666,
"step": 164
},
{
"epoch": 0.27545909849749584,
"grad_norm": 0.0,
"learning_rate": 9.754436219877564e-06,
"loss": 1.9176,
"step": 165
},
{
"epoch": 0.27712854757929883,
"grad_norm": 0.0,
"learning_rate": 9.750183053775625e-06,
"loss": 1.9063,
"step": 166
},
{
"epoch": 0.27879799666110183,
"grad_norm": 0.0,
"learning_rate": 9.745894314218933e-06,
"loss": 1.8567,
"step": 167
},
{
"epoch": 0.28046744574290483,
"grad_norm": 0.0,
"learning_rate": 9.741570033325254e-06,
"loss": 1.861,
"step": 168
},
{
"epoch": 0.28213689482470783,
"grad_norm": 0.0,
"learning_rate": 9.737210243478522e-06,
"loss": 1.7261,
"step": 169
},
{
"epoch": 0.2838063439065108,
"grad_norm": 0.0,
"learning_rate": 9.732814977328593e-06,
"loss": 1.6596,
"step": 170
},
{
"epoch": 0.2854757929883139,
"grad_norm": 0.0,
"learning_rate": 9.728384267790997e-06,
"loss": 1.9513,
"step": 171
},
{
"epoch": 0.2871452420701169,
"grad_norm": 0.0,
"learning_rate": 9.723918148046696e-06,
"loss": 1.6785,
"step": 172
},
{
"epoch": 0.2888146911519199,
"grad_norm": 0.0,
"learning_rate": 9.719416651541839e-06,
"loss": 1.7828,
"step": 173
},
{
"epoch": 0.2904841402337229,
"grad_norm": 0.0,
"learning_rate": 9.714879811987496e-06,
"loss": 1.9663,
"step": 174
},
{
"epoch": 0.2921535893155259,
"grad_norm": 0.0,
"learning_rate": 9.710307663359426e-06,
"loss": 1.6448,
"step": 175
},
{
"epoch": 0.2938230383973289,
"grad_norm": 0.0,
"learning_rate": 9.705700239897809e-06,
"loss": 1.5589,
"step": 176
},
{
"epoch": 0.29549248747913187,
"grad_norm": 0.0,
"learning_rate": 9.701057576106991e-06,
"loss": 1.9398,
"step": 177
},
{
"epoch": 0.29716193656093487,
"grad_norm": 0.0,
"learning_rate": 9.69637970675523e-06,
"loss": 1.7781,
"step": 178
},
{
"epoch": 0.2988313856427379,
"grad_norm": 0.0,
"learning_rate": 9.691666666874438e-06,
"loss": 1.7034,
"step": 179
},
{
"epoch": 0.3005008347245409,
"grad_norm": 0.0,
"learning_rate": 9.686918491759904e-06,
"loss": 1.9747,
"step": 180
},
{
"epoch": 0.3021702838063439,
"grad_norm": 0.0,
"learning_rate": 9.682135216970048e-06,
"loss": 1.627,
"step": 181
},
{
"epoch": 0.3038397328881469,
"grad_norm": 0.0,
"learning_rate": 9.677316878326144e-06,
"loss": 1.6952,
"step": 182
},
{
"epoch": 0.3055091819699499,
"grad_norm": 0.0,
"learning_rate": 9.672463511912056e-06,
"loss": 1.9787,
"step": 183
},
{
"epoch": 0.3071786310517529,
"grad_norm": 0.0,
"learning_rate": 9.667575154073962e-06,
"loss": 1.7576,
"step": 184
},
{
"epoch": 0.3088480801335559,
"grad_norm": 0.0,
"learning_rate": 9.66265184142009e-06,
"loss": 1.4266,
"step": 185
},
{
"epoch": 0.3105175292153589,
"grad_norm": 0.0,
"learning_rate": 9.657693610820437e-06,
"loss": 1.84,
"step": 186
},
{
"epoch": 0.3121869782971619,
"grad_norm": 0.0,
"learning_rate": 9.652700499406497e-06,
"loss": 1.9044,
"step": 187
},
{
"epoch": 0.31385642737896496,
"grad_norm": 0.0,
"learning_rate": 9.647672544570981e-06,
"loss": 1.9096,
"step": 188
},
{
"epoch": 0.31552587646076796,
"grad_norm": 0.0,
"learning_rate": 9.642609783967539e-06,
"loss": 1.9484,
"step": 189
},
{
"epoch": 0.31719532554257096,
"grad_norm": 0.0,
"learning_rate": 9.637512255510475e-06,
"loss": 1.832,
"step": 190
},
{
"epoch": 0.31886477462437396,
"grad_norm": 0.0,
"learning_rate": 9.632379997374462e-06,
"loss": 1.8468,
"step": 191
},
{
"epoch": 0.32053422370617696,
"grad_norm": 0.0,
"learning_rate": 9.627213047994265e-06,
"loss": 1.5239,
"step": 192
},
{
"epoch": 0.32220367278797996,
"grad_norm": 0.0,
"learning_rate": 9.622011446064439e-06,
"loss": 1.9019,
"step": 193
},
{
"epoch": 0.32387312186978295,
"grad_norm": 0.0,
"learning_rate": 9.616775230539057e-06,
"loss": 1.6285,
"step": 194
},
{
"epoch": 0.32554257095158595,
"grad_norm": 0.0,
"learning_rate": 9.611504440631398e-06,
"loss": 1.7333,
"step": 195
},
{
"epoch": 0.327212020033389,
"grad_norm": 0.0,
"learning_rate": 9.606199115813672e-06,
"loss": 1.4345,
"step": 196
},
{
"epoch": 0.328881469115192,
"grad_norm": 0.0,
"learning_rate": 9.600859295816708e-06,
"loss": 1.7107,
"step": 197
},
{
"epoch": 0.330550918196995,
"grad_norm": 0.0,
"learning_rate": 9.595485020629676e-06,
"loss": 1.7476,
"step": 198
},
{
"epoch": 0.332220367278798,
"grad_norm": 0.0,
"learning_rate": 9.590076330499763e-06,
"loss": 1.8789,
"step": 199
},
{
"epoch": 0.333889816360601,
"grad_norm": 0.0,
"learning_rate": 9.584633265931894e-06,
"loss": 1.8005,
"step": 200
},
{
"epoch": 0.335559265442404,
"grad_norm": 0.0,
"learning_rate": 9.579155867688415e-06,
"loss": 1.9923,
"step": 201
},
{
"epoch": 0.337228714524207,
"grad_norm": 0.0,
"learning_rate": 9.573644176788795e-06,
"loss": 1.827,
"step": 202
},
{
"epoch": 0.33889816360601,
"grad_norm": 0.0,
"learning_rate": 9.568098234509312e-06,
"loss": 1.8459,
"step": 203
},
{
"epoch": 0.34056761268781305,
"grad_norm": 0.0,
"learning_rate": 9.562518082382751e-06,
"loss": 1.6333,
"step": 204
},
{
"epoch": 0.34223706176961605,
"grad_norm": 0.0,
"learning_rate": 9.55690376219809e-06,
"loss": 1.7023,
"step": 205
},
{
"epoch": 0.34390651085141904,
"grad_norm": 0.0,
"learning_rate": 9.551255316000183e-06,
"loss": 1.7017,
"step": 206
},
{
"epoch": 0.34557595993322204,
"grad_norm": 0.0,
"learning_rate": 9.545572786089452e-06,
"loss": 1.7043,
"step": 207
},
{
"epoch": 0.34724540901502504,
"grad_norm": 0.0,
"learning_rate": 9.539856215021568e-06,
"loss": 1.7999,
"step": 208
},
{
"epoch": 0.34891485809682804,
"grad_norm": 0.0,
"learning_rate": 9.53410564560713e-06,
"loss": 1.409,
"step": 209
},
{
"epoch": 0.35058430717863104,
"grad_norm": 0.0,
"learning_rate": 9.528321120911345e-06,
"loss": 1.4238,
"step": 210
},
{
"epoch": 0.35225375626043404,
"grad_norm": 0.0,
"learning_rate": 9.522502684253709e-06,
"loss": 1.958,
"step": 211
},
{
"epoch": 0.35392320534223703,
"grad_norm": 0.0,
"learning_rate": 9.516650379207677e-06,
"loss": 1.8037,
"step": 212
},
{
"epoch": 0.3555926544240401,
"grad_norm": 0.0,
"learning_rate": 9.510764249600339e-06,
"loss": 1.8266,
"step": 213
},
{
"epoch": 0.3572621035058431,
"grad_norm": 0.0,
"learning_rate": 9.504844339512096e-06,
"loss": 1.9888,
"step": 214
},
{
"epoch": 0.3589315525876461,
"grad_norm": 0.0,
"learning_rate": 9.498890693276326e-06,
"loss": 1.8452,
"step": 215
},
{
"epoch": 0.3606010016694491,
"grad_norm": 0.0,
"learning_rate": 9.492903355479047e-06,
"loss": 1.7189,
"step": 216
},
{
"epoch": 0.3622704507512521,
"grad_norm": 0.0,
"learning_rate": 9.486882370958596e-06,
"loss": 1.7222,
"step": 217
},
{
"epoch": 0.3639398998330551,
"grad_norm": 0.0,
"learning_rate": 9.480827784805278e-06,
"loss": 1.8466,
"step": 218
},
{
"epoch": 0.3656093489148581,
"grad_norm": 0.0,
"learning_rate": 9.474739642361043e-06,
"loss": 1.7537,
"step": 219
},
{
"epoch": 0.3672787979966611,
"grad_norm": 0.0,
"learning_rate": 9.468617989219136e-06,
"loss": 1.9107,
"step": 220
},
{
"epoch": 0.36894824707846413,
"grad_norm": 0.0,
"learning_rate": 9.462462871223755e-06,
"loss": 1.5911,
"step": 221
},
{
"epoch": 0.37061769616026713,
"grad_norm": 0.0,
"learning_rate": 9.45627433446972e-06,
"loss": 1.5626,
"step": 222
},
{
"epoch": 0.3722871452420701,
"grad_norm": 0.0,
"learning_rate": 9.450052425302112e-06,
"loss": 1.6464,
"step": 223
},
{
"epoch": 0.3739565943238731,
"grad_norm": 0.0,
"learning_rate": 9.443797190315938e-06,
"loss": 1.8094,
"step": 224
},
{
"epoch": 0.3756260434056761,
"grad_norm": 0.0,
"learning_rate": 9.437508676355774e-06,
"loss": 1.5682,
"step": 225
},
{
"epoch": 0.3772954924874791,
"grad_norm": 0.0,
"learning_rate": 9.431186930515419e-06,
"loss": 1.5277,
"step": 226
},
{
"epoch": 0.3789649415692821,
"grad_norm": 0.0,
"learning_rate": 9.424832000137542e-06,
"loss": 1.8445,
"step": 227
},
{
"epoch": 0.3806343906510851,
"grad_norm": 0.0,
"learning_rate": 9.418443932813328e-06,
"loss": 1.6292,
"step": 228
},
{
"epoch": 0.3823038397328882,
"grad_norm": 0.0,
"learning_rate": 9.412022776382113e-06,
"loss": 2.0086,
"step": 229
},
{
"epoch": 0.38397328881469117,
"grad_norm": 0.0,
"learning_rate": 9.405568578931042e-06,
"loss": 1.9954,
"step": 230
},
{
"epoch": 0.38564273789649417,
"grad_norm": 0.0,
"learning_rate": 9.399081388794688e-06,
"loss": 1.9507,
"step": 231
},
{
"epoch": 0.38731218697829717,
"grad_norm": 0.0,
"learning_rate": 9.392561254554712e-06,
"loss": 1.9047,
"step": 232
},
{
"epoch": 0.38898163606010017,
"grad_norm": 0.0,
"learning_rate": 9.386008225039486e-06,
"loss": 1.81,
"step": 233
},
{
"epoch": 0.39065108514190316,
"grad_norm": 0.0,
"learning_rate": 9.379422349323728e-06,
"loss": 1.5756,
"step": 234
},
{
"epoch": 0.39232053422370616,
"grad_norm": 0.0,
"learning_rate": 9.372803676728138e-06,
"loss": 1.5665,
"step": 235
},
{
"epoch": 0.39398998330550916,
"grad_norm": 0.0,
"learning_rate": 9.366152256819025e-06,
"loss": 1.6799,
"step": 236
},
{
"epoch": 0.39565943238731216,
"grad_norm": 0.0,
"learning_rate": 9.359468139407942e-06,
"loss": 1.4389,
"step": 237
},
{
"epoch": 0.3973288814691152,
"grad_norm": 0.0,
"learning_rate": 9.352751374551305e-06,
"loss": 1.8924,
"step": 238
},
{
"epoch": 0.3989983305509182,
"grad_norm": 0.0,
"learning_rate": 9.346002012550027e-06,
"loss": 1.8713,
"step": 239
},
{
"epoch": 0.4006677796327212,
"grad_norm": 0.0,
"learning_rate": 9.339220103949132e-06,
"loss": 1.7269,
"step": 240
},
{
"epoch": 0.4023372287145242,
"grad_norm": 0.0,
"learning_rate": 9.332405699537382e-06,
"loss": 1.651,
"step": 241
},
{
"epoch": 0.4040066777963272,
"grad_norm": 0.0,
"learning_rate": 9.325558850346897e-06,
"loss": 1.6421,
"step": 242
},
{
"epoch": 0.4056761268781302,
"grad_norm": 0.0,
"learning_rate": 9.318679607652768e-06,
"loss": 1.6585,
"step": 243
},
{
"epoch": 0.4073455759599332,
"grad_norm": 0.0,
"learning_rate": 9.311768022972682e-06,
"loss": 1.8029,
"step": 244
},
{
"epoch": 0.4090150250417362,
"grad_norm": 0.0,
"learning_rate": 9.304824148066526e-06,
"loss": 1.5796,
"step": 245
},
{
"epoch": 0.41068447412353926,
"grad_norm": 0.0,
"learning_rate": 9.297848034936007e-06,
"loss": 1.7035,
"step": 246
},
{
"epoch": 0.41235392320534225,
"grad_norm": 0.0,
"learning_rate": 9.290839735824254e-06,
"loss": 1.8096,
"step": 247
},
{
"epoch": 0.41402337228714525,
"grad_norm": 0.0,
"learning_rate": 9.283799303215442e-06,
"loss": 1.7213,
"step": 248
},
{
"epoch": 0.41569282136894825,
"grad_norm": 0.0,
"learning_rate": 9.276726789834378e-06,
"loss": 1.8537,
"step": 249
},
{
"epoch": 0.41736227045075125,
"grad_norm": 0.0,
"learning_rate": 9.269622248646124e-06,
"loss": 1.7887,
"step": 250
},
{
"epoch": 0.41903171953255425,
"grad_norm": 0.0,
"learning_rate": 9.262485732855597e-06,
"loss": 1.6711,
"step": 251
},
{
"epoch": 0.42070116861435725,
"grad_norm": 0.0,
"learning_rate": 9.255317295907158e-06,
"loss": 1.7755,
"step": 252
},
{
"epoch": 0.42237061769616024,
"grad_norm": 0.0,
"learning_rate": 9.24811699148423e-06,
"loss": 1.8195,
"step": 253
},
{
"epoch": 0.4240400667779633,
"grad_norm": 0.0,
"learning_rate": 9.240884873508876e-06,
"loss": 2.0062,
"step": 254
},
{
"epoch": 0.4257095158597663,
"grad_norm": 0.0,
"learning_rate": 9.233620996141421e-06,
"loss": 1.7525,
"step": 255
},
{
"epoch": 0.4273789649415693,
"grad_norm": 0.0,
"learning_rate": 9.226325413780021e-06,
"loss": 1.338,
"step": 256
},
{
"epoch": 0.4290484140233723,
"grad_norm": 0.0,
"learning_rate": 9.218998181060271e-06,
"loss": 1.7117,
"step": 257
},
{
"epoch": 0.4307178631051753,
"grad_norm": 0.0,
"learning_rate": 9.211639352854786e-06,
"loss": 1.7551,
"step": 258
},
{
"epoch": 0.4323873121869783,
"grad_norm": 0.0,
"learning_rate": 9.204248984272802e-06,
"loss": 1.4557,
"step": 259
},
{
"epoch": 0.4340567612687813,
"grad_norm": 0.0,
"learning_rate": 9.196827130659752e-06,
"loss": 1.6702,
"step": 260
},
{
"epoch": 0.4357262103505843,
"grad_norm": 0.0,
"learning_rate": 9.189373847596853e-06,
"loss": 1.6316,
"step": 261
},
{
"epoch": 0.4373956594323873,
"grad_norm": 0.0,
"learning_rate": 9.181889190900702e-06,
"loss": 1.6081,
"step": 262
},
{
"epoch": 0.43906510851419034,
"grad_norm": 0.0,
"learning_rate": 9.174373216622841e-06,
"loss": 1.4578,
"step": 263
},
{
"epoch": 0.44073455759599334,
"grad_norm": 0.0,
"learning_rate": 9.166825981049345e-06,
"loss": 1.8555,
"step": 264
},
{
"epoch": 0.44240400667779634,
"grad_norm": 0.0,
"learning_rate": 9.15924754070041e-06,
"loss": 1.7215,
"step": 265
},
{
"epoch": 0.44407345575959933,
"grad_norm": 0.0,
"learning_rate": 9.151637952329903e-06,
"loss": 1.8248,
"step": 266
},
{
"epoch": 0.44574290484140233,
"grad_norm": 0.0,
"learning_rate": 9.143997272924974e-06,
"loss": 1.8428,
"step": 267
},
{
"epoch": 0.44741235392320533,
"grad_norm": 0.0,
"learning_rate": 9.136325559705593e-06,
"loss": 1.8746,
"step": 268
},
{
"epoch": 0.44908180300500833,
"grad_norm": 0.0,
"learning_rate": 9.128622870124147e-06,
"loss": 1.7128,
"step": 269
},
{
"epoch": 0.4507512520868113,
"grad_norm": 0.0,
"learning_rate": 9.120889261864999e-06,
"loss": 1.569,
"step": 270
},
{
"epoch": 0.4524207011686144,
"grad_norm": 0.0,
"learning_rate": 9.113124792844053e-06,
"loss": 1.6879,
"step": 271
},
{
"epoch": 0.4540901502504174,
"grad_norm": 0.0,
"learning_rate": 9.105329521208334e-06,
"loss": 1.7053,
"step": 272
},
{
"epoch": 0.4557595993322204,
"grad_norm": 0.0,
"learning_rate": 9.097503505335534e-06,
"loss": 1.763,
"step": 273
},
{
"epoch": 0.4574290484140234,
"grad_norm": 0.0,
"learning_rate": 9.089646803833589e-06,
"loss": 1.6282,
"step": 274
},
{
"epoch": 0.4590984974958264,
"grad_norm": 0.0,
"learning_rate": 9.081759475540236e-06,
"loss": 1.7139,
"step": 275
},
{
"epoch": 0.4607679465776294,
"grad_norm": 0.0,
"learning_rate": 9.073841579522571e-06,
"loss": 1.7466,
"step": 276
},
{
"epoch": 0.46243739565943237,
"grad_norm": 0.0,
"learning_rate": 9.065893175076604e-06,
"loss": 1.7338,
"step": 277
},
{
"epoch": 0.46410684474123537,
"grad_norm": 0.0,
"learning_rate": 9.057914321726824e-06,
"loss": 1.9141,
"step": 278
},
{
"epoch": 0.4657762938230384,
"grad_norm": 0.0,
"learning_rate": 9.049905079225744e-06,
"loss": 1.6576,
"step": 279
},
{
"epoch": 0.4674457429048414,
"grad_norm": 0.0,
"learning_rate": 9.041865507553458e-06,
"loss": 1.7862,
"step": 280
},
{
"epoch": 0.4691151919866444,
"grad_norm": 0.0,
"learning_rate": 9.033795666917191e-06,
"loss": 1.7305,
"step": 281
},
{
"epoch": 0.4707846410684474,
"grad_norm": 0.0,
"learning_rate": 9.025695617750848e-06,
"loss": 1.9305,
"step": 282
},
{
"epoch": 0.4724540901502504,
"grad_norm": 0.0,
"learning_rate": 9.01756542071456e-06,
"loss": 1.6688,
"step": 283
},
{
"epoch": 0.4741235392320534,
"grad_norm": 0.0,
"learning_rate": 9.009405136694234e-06,
"loss": 1.9435,
"step": 284
},
{
"epoch": 0.4757929883138564,
"grad_norm": 0.0,
"learning_rate": 9.001214826801092e-06,
"loss": 2.0056,
"step": 285
},
{
"epoch": 0.4774624373956594,
"grad_norm": 0.0,
"learning_rate": 8.992994552371217e-06,
"loss": 1.5892,
"step": 286
},
{
"epoch": 0.4791318864774624,
"grad_norm": 0.0,
"learning_rate": 8.98474437496509e-06,
"loss": 1.6205,
"step": 287
},
{
"epoch": 0.48080133555926546,
"grad_norm": 0.0,
"learning_rate": 8.976464356367133e-06,
"loss": 1.7236,
"step": 288
},
{
"epoch": 0.48247078464106846,
"grad_norm": 0.0,
"learning_rate": 8.968154558585244e-06,
"loss": 1.646,
"step": 289
},
{
"epoch": 0.48414023372287146,
"grad_norm": 0.0,
"learning_rate": 8.959815043850336e-06,
"loss": 1.5001,
"step": 290
},
{
"epoch": 0.48580968280467446,
"grad_norm": 0.0,
"learning_rate": 8.951445874615862e-06,
"loss": 1.7625,
"step": 291
},
{
"epoch": 0.48747913188647746,
"grad_norm": 0.0,
"learning_rate": 8.943047113557358e-06,
"loss": 1.848,
"step": 292
},
{
"epoch": 0.48914858096828046,
"grad_norm": 0.0,
"learning_rate": 8.934618823571968e-06,
"loss": 1.6042,
"step": 293
},
{
"epoch": 0.49081803005008345,
"grad_norm": 0.0,
"learning_rate": 8.926161067777973e-06,
"loss": 1.6858,
"step": 294
},
{
"epoch": 0.49248747913188645,
"grad_norm": 0.0,
"learning_rate": 8.917673909514321e-06,
"loss": 2.0782,
"step": 295
},
{
"epoch": 0.4941569282136895,
"grad_norm": 0.0,
"learning_rate": 8.90915741234015e-06,
"loss": 1.6056,
"step": 296
},
{
"epoch": 0.4958263772954925,
"grad_norm": 0.0,
"learning_rate": 8.900611640034313e-06,
"loss": 1.9565,
"step": 297
},
{
"epoch": 0.4974958263772955,
"grad_norm": 0.0,
"learning_rate": 8.892036656594898e-06,
"loss": 1.4701,
"step": 298
},
{
"epoch": 0.4991652754590985,
"grad_norm": 0.0,
"learning_rate": 8.883432526238757e-06,
"loss": 1.4169,
"step": 299
},
{
"epoch": 0.5008347245409015,
"grad_norm": 0.0,
"learning_rate": 8.874799313401014e-06,
"loss": 1.6465,
"step": 300
},
{
"epoch": 0.5025041736227045,
"grad_norm": 0.0,
"learning_rate": 8.866137082734591e-06,
"loss": 1.5715,
"step": 301
},
{
"epoch": 0.5041736227045075,
"grad_norm": 0.0,
"learning_rate": 8.857445899109716e-06,
"loss": 1.848,
"step": 302
},
{
"epoch": 0.5058430717863105,
"grad_norm": 0.0,
"learning_rate": 8.848725827613445e-06,
"loss": 1.7313,
"step": 303
},
{
"epoch": 0.5075125208681135,
"grad_norm": 0.0,
"learning_rate": 8.839976933549173e-06,
"loss": 1.8501,
"step": 304
},
{
"epoch": 0.5091819699499165,
"grad_norm": 0.0,
"learning_rate": 8.831199282436136e-06,
"loss": 1.5231,
"step": 305
},
{
"epoch": 0.5108514190317195,
"grad_norm": 0.0,
"learning_rate": 8.822392940008937e-06,
"loss": 1.4616,
"step": 306
},
{
"epoch": 0.5125208681135225,
"grad_norm": 0.0,
"learning_rate": 8.813557972217038e-06,
"loss": 1.5747,
"step": 307
},
{
"epoch": 0.5141903171953256,
"grad_norm": 0.0,
"learning_rate": 8.804694445224274e-06,
"loss": 1.5613,
"step": 308
},
{
"epoch": 0.5158597662771286,
"grad_norm": 0.0,
"learning_rate": 8.795802425408352e-06,
"loss": 1.3077,
"step": 309
},
{
"epoch": 0.5175292153589316,
"grad_norm": 0.0,
"learning_rate": 8.786881979360368e-06,
"loss": 1.5789,
"step": 310
},
{
"epoch": 0.5191986644407346,
"grad_norm": 0.0,
"learning_rate": 8.777933173884288e-06,
"loss": 1.7127,
"step": 311
},
{
"epoch": 0.5208681135225376,
"grad_norm": 0.0,
"learning_rate": 8.76895607599646e-06,
"loss": 1.7422,
"step": 312
},
{
"epoch": 0.5225375626043406,
"grad_norm": 0.0,
"learning_rate": 8.759950752925114e-06,
"loss": 1.6902,
"step": 313
},
{
"epoch": 0.5242070116861436,
"grad_norm": 0.0,
"learning_rate": 8.750917272109849e-06,
"loss": 1.763,
"step": 314
},
{
"epoch": 0.5258764607679466,
"grad_norm": 0.0,
"learning_rate": 8.741855701201138e-06,
"loss": 1.7908,
"step": 315
},
{
"epoch": 0.5275459098497496,
"grad_norm": 0.0,
"learning_rate": 8.732766108059814e-06,
"loss": 1.6755,
"step": 316
},
{
"epoch": 0.5292153589315526,
"grad_norm": 0.0,
"learning_rate": 8.723648560756565e-06,
"loss": 1.7597,
"step": 317
},
{
"epoch": 0.5308848080133556,
"grad_norm": 0.0,
"learning_rate": 8.714503127571425e-06,
"loss": 1.9694,
"step": 318
},
{
"epoch": 0.5325542570951586,
"grad_norm": 0.0,
"learning_rate": 8.705329876993262e-06,
"loss": 1.6419,
"step": 319
},
{
"epoch": 0.5342237061769616,
"grad_norm": 0.0,
"learning_rate": 8.696128877719258e-06,
"loss": 1.8573,
"step": 320
},
{
"epoch": 0.5358931552587646,
"grad_norm": 0.0,
"learning_rate": 8.686900198654413e-06,
"loss": 1.7608,
"step": 321
},
{
"epoch": 0.5375626043405676,
"grad_norm": 0.0,
"learning_rate": 8.677643908911007e-06,
"loss": 2.0053,
"step": 322
},
{
"epoch": 0.5392320534223706,
"grad_norm": 0.0,
"learning_rate": 8.668360077808093e-06,
"loss": 1.8578,
"step": 323
},
{
"epoch": 0.5409015025041736,
"grad_norm": 0.0,
"learning_rate": 8.659048774870986e-06,
"loss": 1.6681,
"step": 324
},
{
"epoch": 0.5425709515859767,
"grad_norm": 0.0,
"learning_rate": 8.649710069830723e-06,
"loss": 1.6123,
"step": 325
},
{
"epoch": 0.5442404006677797,
"grad_norm": 0.0,
"learning_rate": 8.64034403262356e-06,
"loss": 1.7475,
"step": 326
},
{
"epoch": 0.5459098497495827,
"grad_norm": 0.0,
"learning_rate": 8.630950733390434e-06,
"loss": 1.3078,
"step": 327
},
{
"epoch": 0.5475792988313857,
"grad_norm": 0.0,
"learning_rate": 8.621530242476446e-06,
"loss": 1.6937,
"step": 328
},
{
"epoch": 0.5492487479131887,
"grad_norm": 0.0,
"learning_rate": 8.612082630430333e-06,
"loss": 1.9197,
"step": 329
},
{
"epoch": 0.5509181969949917,
"grad_norm": 0.0,
"learning_rate": 8.602607968003935e-06,
"loss": 1.7944,
"step": 330
},
{
"epoch": 0.5525876460767947,
"grad_norm": 0.0,
"learning_rate": 8.593106326151672e-06,
"loss": 1.83,
"step": 331
},
{
"epoch": 0.5542570951585977,
"grad_norm": 0.0,
"learning_rate": 8.583577776030005e-06,
"loss": 2.0895,
"step": 332
},
{
"epoch": 0.5559265442404007,
"grad_norm": 0.0,
"learning_rate": 8.574022388996913e-06,
"loss": 1.6171,
"step": 333
},
{
"epoch": 0.5575959933222037,
"grad_norm": 0.0,
"learning_rate": 8.564440236611344e-06,
"loss": 1.486,
"step": 334
},
{
"epoch": 0.5592654424040067,
"grad_norm": 0.0,
"learning_rate": 8.5548313906327e-06,
"loss": 1.7145,
"step": 335
},
{
"epoch": 0.5609348914858097,
"grad_norm": 0.0,
"learning_rate": 8.545195923020273e-06,
"loss": 1.713,
"step": 336
},
{
"epoch": 0.5626043405676127,
"grad_norm": 0.0,
"learning_rate": 8.535533905932739e-06,
"loss": 1.756,
"step": 337
},
{
"epoch": 0.5642737896494157,
"grad_norm": 0.0,
"learning_rate": 8.525845411727581e-06,
"loss": 1.5199,
"step": 338
},
{
"epoch": 0.5659432387312187,
"grad_norm": 0.0,
"learning_rate": 8.516130512960576e-06,
"loss": 1.7866,
"step": 339
},
{
"epoch": 0.5676126878130217,
"grad_norm": 0.0,
"learning_rate": 8.506389282385242e-06,
"loss": 1.9604,
"step": 340
},
{
"epoch": 0.5692821368948247,
"grad_norm": 0.0,
"learning_rate": 8.49662179295228e-06,
"loss": 1.8083,
"step": 341
},
{
"epoch": 0.5709515859766278,
"grad_norm": 0.0,
"learning_rate": 8.486828117809057e-06,
"loss": 1.8973,
"step": 342
},
{
"epoch": 0.5726210350584308,
"grad_norm": 0.0,
"learning_rate": 8.47700833029903e-06,
"loss": 1.6784,
"step": 343
},
{
"epoch": 0.5742904841402338,
"grad_norm": 0.0,
"learning_rate": 8.467162503961209e-06,
"loss": 1.7076,
"step": 344
},
{
"epoch": 0.5759599332220368,
"grad_norm": 0.0,
"learning_rate": 8.45729071252961e-06,
"loss": 1.4877,
"step": 345
},
{
"epoch": 0.5776293823038398,
"grad_norm": 0.0,
"learning_rate": 8.447393029932692e-06,
"loss": 1.8807,
"step": 346
},
{
"epoch": 0.5792988313856428,
"grad_norm": 0.0,
"learning_rate": 8.43746953029281e-06,
"loss": 1.8924,
"step": 347
},
{
"epoch": 0.5809682804674458,
"grad_norm": 0.0,
"learning_rate": 8.427520287925669e-06,
"loss": 1.7638,
"step": 348
},
{
"epoch": 0.5826377295492488,
"grad_norm": 0.0,
"learning_rate": 8.417545377339739e-06,
"loss": 1.8664,
"step": 349
},
{
"epoch": 0.5843071786310517,
"grad_norm": 0.0,
"learning_rate": 8.407544873235736e-06,
"loss": 1.6039,
"step": 350
},
{
"epoch": 0.5859766277128547,
"grad_norm": 0.0,
"learning_rate": 8.39751885050603e-06,
"loss": 1.8483,
"step": 351
},
{
"epoch": 0.5876460767946577,
"grad_norm": 0.0,
"learning_rate": 8.387467384234096e-06,
"loss": 1.9636,
"step": 352
},
{
"epoch": 0.5893155258764607,
"grad_norm": 0.0,
"learning_rate": 8.377390549693959e-06,
"loss": 1.3699,
"step": 353
},
{
"epoch": 0.5909849749582637,
"grad_norm": 0.0,
"learning_rate": 8.367288422349617e-06,
"loss": 1.7837,
"step": 354
},
{
"epoch": 0.5926544240400667,
"grad_norm": 0.0,
"learning_rate": 8.35716107785449e-06,
"loss": 1.7749,
"step": 355
},
{
"epoch": 0.5943238731218697,
"grad_norm": 0.0,
"learning_rate": 8.347008592050834e-06,
"loss": 1.6986,
"step": 356
},
{
"epoch": 0.5959933222036727,
"grad_norm": 0.0,
"learning_rate": 8.336831040969196e-06,
"loss": 1.7904,
"step": 357
},
{
"epoch": 0.5976627712854758,
"grad_norm": 0.0,
"learning_rate": 8.326628500827826e-06,
"loss": 1.9549,
"step": 358
},
{
"epoch": 0.5993322203672788,
"grad_norm": 0.0,
"learning_rate": 8.316401048032121e-06,
"loss": 1.7828,
"step": 359
},
{
"epoch": 0.6010016694490818,
"grad_norm": 0.0,
"learning_rate": 8.306148759174036e-06,
"loss": 1.7061,
"step": 360
},
{
"epoch": 0.6026711185308848,
"grad_norm": 0.0,
"learning_rate": 8.295871711031527e-06,
"loss": 1.5943,
"step": 361
},
{
"epoch": 0.6043405676126878,
"grad_norm": 0.0,
"learning_rate": 8.285569980567965e-06,
"loss": 1.7353,
"step": 362
},
{
"epoch": 0.6060100166944908,
"grad_norm": 0.0,
"learning_rate": 8.275243644931565e-06,
"loss": 1.7265,
"step": 363
},
{
"epoch": 0.6076794657762938,
"grad_norm": 0.0,
"learning_rate": 8.264892781454807e-06,
"loss": 1.9438,
"step": 364
},
{
"epoch": 0.6093489148580968,
"grad_norm": 0.0,
"learning_rate": 8.254517467653858e-06,
"loss": 1.6295,
"step": 365
},
{
"epoch": 0.6110183639398998,
"grad_norm": 0.0,
"learning_rate": 8.244117781227982e-06,
"loss": 2.0601,
"step": 366
},
{
"epoch": 0.6126878130217028,
"grad_norm": 0.0,
"learning_rate": 8.23369380005898e-06,
"loss": 1.6861,
"step": 367
},
{
"epoch": 0.6143572621035058,
"grad_norm": 0.0,
"learning_rate": 8.22324560221058e-06,
"loss": 1.68,
"step": 368
},
{
"epoch": 0.6160267111853088,
"grad_norm": 0.0,
"learning_rate": 8.21277326592787e-06,
"loss": 1.683,
"step": 369
},
{
"epoch": 0.6176961602671118,
"grad_norm": 0.0,
"learning_rate": 8.202276869636713e-06,
"loss": 1.4055,
"step": 370
},
{
"epoch": 0.6193656093489148,
"grad_norm": 0.0,
"learning_rate": 8.191756491943146e-06,
"loss": 1.7286,
"step": 371
},
{
"epoch": 0.6210350584307178,
"grad_norm": 0.0,
"learning_rate": 8.1812122116328e-06,
"loss": 1.7372,
"step": 372
},
{
"epoch": 0.6227045075125208,
"grad_norm": 0.0,
"learning_rate": 8.170644107670313e-06,
"loss": 1.5938,
"step": 373
},
{
"epoch": 0.6243739565943238,
"grad_norm": 0.0,
"learning_rate": 8.160052259198737e-06,
"loss": 1.5759,
"step": 374
},
{
"epoch": 0.6260434056761269,
"grad_norm": 0.0,
"learning_rate": 8.149436745538934e-06,
"loss": 1.6956,
"step": 375
},
{
"epoch": 0.6277128547579299,
"grad_norm": 0.0,
"learning_rate": 8.138797646189e-06,
"loss": 1.5602,
"step": 376
},
{
"epoch": 0.6293823038397329,
"grad_norm": 0.0,
"learning_rate": 8.128135040823661e-06,
"loss": 1.5855,
"step": 377
},
{
"epoch": 0.6310517529215359,
"grad_norm": 0.0,
"learning_rate": 8.117449009293668e-06,
"loss": 1.9672,
"step": 378
},
{
"epoch": 0.6327212020033389,
"grad_norm": 0.0,
"learning_rate": 8.106739631625216e-06,
"loss": 1.8238,
"step": 379
},
{
"epoch": 0.6343906510851419,
"grad_norm": 0.0,
"learning_rate": 8.096006988019331e-06,
"loss": 1.8443,
"step": 380
},
{
"epoch": 0.6360601001669449,
"grad_norm": 0.0,
"learning_rate": 8.085251158851278e-06,
"loss": 1.7355,
"step": 381
},
{
"epoch": 0.6377295492487479,
"grad_norm": 0.0,
"learning_rate": 8.074472224669952e-06,
"loss": 1.6855,
"step": 382
},
{
"epoch": 0.6393989983305509,
"grad_norm": 0.0,
"learning_rate": 8.063670266197278e-06,
"loss": 1.9201,
"step": 383
},
{
"epoch": 0.6410684474123539,
"grad_norm": 0.0,
"learning_rate": 8.052845364327609e-06,
"loss": 1.6821,
"step": 384
},
{
"epoch": 0.6427378964941569,
"grad_norm": 0.0,
"learning_rate": 8.041997600127118e-06,
"loss": 1.8732,
"step": 385
},
{
"epoch": 0.6444073455759599,
"grad_norm": 0.0,
"learning_rate": 8.031127054833192e-06,
"loss": 1.5744,
"step": 386
},
{
"epoch": 0.6460767946577629,
"grad_norm": 0.0,
"learning_rate": 8.020233809853815e-06,
"loss": 1.6747,
"step": 387
},
{
"epoch": 0.6477462437395659,
"grad_norm": 0.0,
"learning_rate": 8.009317946766975e-06,
"loss": 1.6959,
"step": 388
},
{
"epoch": 0.6494156928213689,
"grad_norm": 0.0,
"learning_rate": 7.998379547320038e-06,
"loss": 1.464,
"step": 389
},
{
"epoch": 0.6510851419031719,
"grad_norm": 0.0,
"learning_rate": 7.987418693429145e-06,
"loss": 1.7419,
"step": 390
},
{
"epoch": 0.6527545909849749,
"grad_norm": 0.0,
"learning_rate": 7.976435467178592e-06,
"loss": 1.801,
"step": 391
},
{
"epoch": 0.654424040066778,
"grad_norm": 0.0,
"learning_rate": 7.965429950820222e-06,
"loss": 1.7309,
"step": 392
},
{
"epoch": 0.656093489148581,
"grad_norm": 0.0,
"learning_rate": 7.954402226772804e-06,
"loss": 1.6584,
"step": 393
},
{
"epoch": 0.657762938230384,
"grad_norm": 0.0,
"learning_rate": 7.943352377621414e-06,
"loss": 1.8112,
"step": 394
},
{
"epoch": 0.659432387312187,
"grad_norm": 0.0,
"learning_rate": 7.932280486116825e-06,
"loss": 1.9561,
"step": 395
},
{
"epoch": 0.66110183639399,
"grad_norm": 0.0,
"learning_rate": 7.92118663517488e-06,
"loss": 1.7664,
"step": 396
},
{
"epoch": 0.662771285475793,
"grad_norm": 0.0,
"learning_rate": 7.910070907875871e-06,
"loss": 1.5639,
"step": 397
},
{
"epoch": 0.664440734557596,
"grad_norm": 0.0,
"learning_rate": 7.898933387463924e-06,
"loss": 1.401,
"step": 398
},
{
"epoch": 0.666110183639399,
"grad_norm": 0.0,
"learning_rate": 7.887774157346365e-06,
"loss": 1.5964,
"step": 399
},
{
"epoch": 0.667779632721202,
"grad_norm": 0.0,
"learning_rate": 7.876593301093104e-06,
"loss": 1.6001,
"step": 400
},
{
"epoch": 0.669449081803005,
"grad_norm": 0.0,
"learning_rate": 7.865390902436005e-06,
"loss": 1.7532,
"step": 401
},
{
"epoch": 0.671118530884808,
"grad_norm": 0.0,
"learning_rate": 7.854167045268265e-06,
"loss": 1.6746,
"step": 402
},
{
"epoch": 0.672787979966611,
"grad_norm": 0.0,
"learning_rate": 7.842921813643767e-06,
"loss": 1.9211,
"step": 403
},
{
"epoch": 0.674457429048414,
"grad_norm": 0.0,
"learning_rate": 7.831655291776484e-06,
"loss": 1.9482,
"step": 404
},
{
"epoch": 0.676126878130217,
"grad_norm": 0.0,
"learning_rate": 7.82036756403981e-06,
"loss": 1.8791,
"step": 405
},
{
"epoch": 0.67779632721202,
"grad_norm": 0.0,
"learning_rate": 7.809058714965962e-06,
"loss": 1.8097,
"step": 406
},
{
"epoch": 0.679465776293823,
"grad_norm": 0.0,
"learning_rate": 7.797728829245321e-06,
"loss": 1.6008,
"step": 407
},
{
"epoch": 0.6811352253756261,
"grad_norm": 0.0,
"learning_rate": 7.786377991725813e-06,
"loss": 1.5568,
"step": 408
},
{
"epoch": 0.6828046744574291,
"grad_norm": 0.0,
"learning_rate": 7.775006287412268e-06,
"loss": 1.5637,
"step": 409
},
{
"epoch": 0.6844741235392321,
"grad_norm": 0.0,
"learning_rate": 7.763613801465785e-06,
"loss": 1.8548,
"step": 410
},
{
"epoch": 0.6861435726210351,
"grad_norm": 0.0,
"learning_rate": 7.752200619203094e-06,
"loss": 1.6665,
"step": 411
},
{
"epoch": 0.6878130217028381,
"grad_norm": 0.0,
"learning_rate": 7.740766826095918e-06,
"loss": 1.8327,
"step": 412
},
{
"epoch": 0.6894824707846411,
"grad_norm": 0.0,
"learning_rate": 7.729312507770326e-06,
"loss": 1.7214,
"step": 413
},
{
"epoch": 0.6911519198664441,
"grad_norm": 0.0,
"learning_rate": 7.717837750006106e-06,
"loss": 1.4923,
"step": 414
},
{
"epoch": 0.6928213689482471,
"grad_norm": 0.0,
"learning_rate": 7.706342638736108e-06,
"loss": 1.9765,
"step": 415
},
{
"epoch": 0.6944908180300501,
"grad_norm": 0.0,
"learning_rate": 7.694827260045608e-06,
"loss": 1.8433,
"step": 416
},
{
"epoch": 0.6961602671118531,
"grad_norm": 0.0,
"learning_rate": 7.683291700171663e-06,
"loss": 1.7273,
"step": 417
},
{
"epoch": 0.6978297161936561,
"grad_norm": 0.0,
"learning_rate": 7.671736045502462e-06,
"loss": 1.9375,
"step": 418
},
{
"epoch": 0.6994991652754591,
"grad_norm": 0.0,
"learning_rate": 7.660160382576683e-06,
"loss": 1.8932,
"step": 419
},
{
"epoch": 0.7011686143572621,
"grad_norm": 0.0,
"learning_rate": 7.648564798082842e-06,
"loss": 1.8344,
"step": 420
},
{
"epoch": 0.7028380634390651,
"grad_norm": 0.0,
"learning_rate": 7.636949378858647e-06,
"loss": 1.888,
"step": 421
},
{
"epoch": 0.7045075125208681,
"grad_norm": 0.0,
"learning_rate": 7.625314211890342e-06,
"loss": 1.6684,
"step": 422
},
{
"epoch": 0.7061769616026711,
"grad_norm": 0.0,
"learning_rate": 7.613659384312062e-06,
"loss": 1.6708,
"step": 423
},
{
"epoch": 0.7078464106844741,
"grad_norm": 0.0,
"learning_rate": 7.601984983405173e-06,
"loss": 1.619,
"step": 424
},
{
"epoch": 0.7095158597662772,
"grad_norm": 0.0,
"learning_rate": 7.590291096597631e-06,
"loss": 1.7182,
"step": 425
},
{
"epoch": 0.7111853088480802,
"grad_norm": 0.0,
"learning_rate": 7.57857781146331e-06,
"loss": 1.8545,
"step": 426
},
{
"epoch": 0.7128547579298832,
"grad_norm": 0.0,
"learning_rate": 7.566845215721362e-06,
"loss": 1.8239,
"step": 427
},
{
"epoch": 0.7145242070116862,
"grad_norm": 0.0,
"learning_rate": 7.555093397235553e-06,
"loss": 1.7683,
"step": 428
},
{
"epoch": 0.7161936560934892,
"grad_norm": 0.0,
"learning_rate": 7.543322444013601e-06,
"loss": 1.8621,
"step": 429
},
{
"epoch": 0.7178631051752922,
"grad_norm": 0.0,
"learning_rate": 7.531532444206524e-06,
"loss": 1.916,
"step": 430
},
{
"epoch": 0.7195325542570952,
"grad_norm": 0.0,
"learning_rate": 7.519723486107977e-06,
"loss": 1.6987,
"step": 431
},
{
"epoch": 0.7212020033388982,
"grad_norm": 0.0,
"learning_rate": 7.507895658153594e-06,
"loss": 1.6218,
"step": 432
},
{
"epoch": 0.7228714524207012,
"grad_norm": 0.0,
"learning_rate": 7.496049048920317e-06,
"loss": 1.6171,
"step": 433
},
{
"epoch": 0.7245409015025042,
"grad_norm": 0.0,
"learning_rate": 7.484183747125743e-06,
"loss": 1.8034,
"step": 434
},
{
"epoch": 0.7262103505843072,
"grad_norm": 0.0,
"learning_rate": 7.472299841627452e-06,
"loss": 1.8408,
"step": 435
},
{
"epoch": 0.7278797996661102,
"grad_norm": 0.0,
"learning_rate": 7.460397421422346e-06,
"loss": 1.857,
"step": 436
},
{
"epoch": 0.7295492487479132,
"grad_norm": 0.0,
"learning_rate": 7.448476575645982e-06,
"loss": 1.6,
"step": 437
},
{
"epoch": 0.7312186978297162,
"grad_norm": 0.0,
"learning_rate": 7.4365373935719e-06,
"loss": 1.9061,
"step": 438
},
{
"epoch": 0.7328881469115192,
"grad_norm": 0.0,
"learning_rate": 7.424579964610963e-06,
"loss": 1.8586,
"step": 439
},
{
"epoch": 0.7345575959933222,
"grad_norm": 0.0,
"learning_rate": 7.412604378310677e-06,
"loss": 2.0079,
"step": 440
},
{
"epoch": 0.7362270450751253,
"grad_norm": 0.0,
"learning_rate": 7.400610724354531e-06,
"loss": 1.7882,
"step": 441
},
{
"epoch": 0.7378964941569283,
"grad_norm": 0.0,
"learning_rate": 7.388599092561315e-06,
"loss": 1.8505,
"step": 442
},
{
"epoch": 0.7395659432387313,
"grad_norm": 0.0,
"learning_rate": 7.376569572884457e-06,
"loss": 1.5792,
"step": 443
},
{
"epoch": 0.7412353923205343,
"grad_norm": 0.0,
"learning_rate": 7.364522255411342e-06,
"loss": 1.7987,
"step": 444
},
{
"epoch": 0.7429048414023373,
"grad_norm": 0.0,
"learning_rate": 7.3524572303626415e-06,
"loss": 1.814,
"step": 445
},
{
"epoch": 0.7445742904841403,
"grad_norm": 0.0,
"learning_rate": 7.340374588091638e-06,
"loss": 1.54,
"step": 446
},
{
"epoch": 0.7462437395659433,
"grad_norm": 0.0,
"learning_rate": 7.328274419083541e-06,
"loss": 1.9338,
"step": 447
},
{
"epoch": 0.7479131886477463,
"grad_norm": 0.0,
"learning_rate": 7.316156813954821e-06,
"loss": 1.81,
"step": 448
},
{
"epoch": 0.7495826377295493,
"grad_norm": 0.0,
"learning_rate": 7.304021863452525e-06,
"loss": 1.7657,
"step": 449
},
{
"epoch": 0.7512520868113522,
"grad_norm": 0.0,
"learning_rate": 7.291869658453594e-06,
"loss": 1.8334,
"step": 450
},
{
"epoch": 0.7529215358931552,
"grad_norm": 0.0,
"learning_rate": 7.279700289964187e-06,
"loss": 1.6198,
"step": 451
},
{
"epoch": 0.7545909849749582,
"grad_norm": 0.0,
"learning_rate": 7.267513849119001e-06,
"loss": 1.8972,
"step": 452
},
{
"epoch": 0.7562604340567612,
"grad_norm": 0.0,
"learning_rate": 7.255310427180579e-06,
"loss": 1.819,
"step": 453
},
{
"epoch": 0.7579298831385642,
"grad_norm": 0.0,
"learning_rate": 7.243090115538639e-06,
"loss": 1.5967,
"step": 454
},
{
"epoch": 0.7595993322203672,
"grad_norm": 0.0,
"learning_rate": 7.230853005709378e-06,
"loss": 1.8483,
"step": 455
},
{
"epoch": 0.7612687813021702,
"grad_norm": 0.0,
"learning_rate": 7.218599189334799e-06,
"loss": 1.7642,
"step": 456
},
{
"epoch": 0.7629382303839732,
"grad_norm": 0.0,
"learning_rate": 7.206328758182013e-06,
"loss": 1.6935,
"step": 457
},
{
"epoch": 0.7646076794657763,
"grad_norm": 0.0,
"learning_rate": 7.194041804142556e-06,
"loss": 1.6039,
"step": 458
},
{
"epoch": 0.7662771285475793,
"grad_norm": 0.0,
"learning_rate": 7.181738419231708e-06,
"loss": 1.4567,
"step": 459
},
{
"epoch": 0.7679465776293823,
"grad_norm": 0.0,
"learning_rate": 7.169418695587791e-06,
"loss": 1.5594,
"step": 460
},
{
"epoch": 0.7696160267111853,
"grad_norm": 0.0,
"learning_rate": 7.157082725471488e-06,
"loss": 1.8546,
"step": 461
},
{
"epoch": 0.7712854757929883,
"grad_norm": 0.0,
"learning_rate": 7.144730601265148e-06,
"loss": 1.8287,
"step": 462
},
{
"epoch": 0.7729549248747913,
"grad_norm": 0.0,
"learning_rate": 7.132362415472099e-06,
"loss": 1.6402,
"step": 463
},
{
"epoch": 0.7746243739565943,
"grad_norm": 0.0,
"learning_rate": 7.1199782607159494e-06,
"loss": 1.549,
"step": 464
},
{
"epoch": 0.7762938230383973,
"grad_norm": 0.0,
"learning_rate": 7.107578229739895e-06,
"loss": 1.5773,
"step": 465
},
{
"epoch": 0.7779632721202003,
"grad_norm": 0.0,
"learning_rate": 7.095162415406034e-06,
"loss": 2.0086,
"step": 466
},
{
"epoch": 0.7796327212020033,
"grad_norm": 0.0,
"learning_rate": 7.082730910694655e-06,
"loss": 1.635,
"step": 467
},
{
"epoch": 0.7813021702838063,
"grad_norm": 0.0,
"learning_rate": 7.070283808703553e-06,
"loss": 1.7928,
"step": 468
},
{
"epoch": 0.7829716193656093,
"grad_norm": 0.0,
"learning_rate": 7.057821202647332e-06,
"loss": 1.839,
"step": 469
},
{
"epoch": 0.7846410684474123,
"grad_norm": 0.0,
"learning_rate": 7.045343185856701e-06,
"loss": 1.6673,
"step": 470
},
{
"epoch": 0.7863105175292153,
"grad_norm": 0.0,
"learning_rate": 7.032849851777774e-06,
"loss": 1.7299,
"step": 471
},
{
"epoch": 0.7879799666110183,
"grad_norm": 0.0,
"learning_rate": 7.020341293971383e-06,
"loss": 1.8863,
"step": 472
},
{
"epoch": 0.7896494156928213,
"grad_norm": 0.0,
"learning_rate": 7.0078176061123595e-06,
"loss": 1.8666,
"step": 473
},
{
"epoch": 0.7913188647746243,
"grad_norm": 0.0,
"learning_rate": 6.995278881988847e-06,
"loss": 1.3643,
"step": 474
},
{
"epoch": 0.7929883138564274,
"grad_norm": 0.0,
"learning_rate": 6.982725215501592e-06,
"loss": 1.6456,
"step": 475
},
{
"epoch": 0.7946577629382304,
"grad_norm": 0.0,
"learning_rate": 6.970156700663244e-06,
"loss": 1.8537,
"step": 476
},
{
"epoch": 0.7963272120200334,
"grad_norm": 0.0,
"learning_rate": 6.957573431597646e-06,
"loss": 1.8776,
"step": 477
},
{
"epoch": 0.7979966611018364,
"grad_norm": 0.0,
"learning_rate": 6.9449755025391355e-06,
"loss": 1.8815,
"step": 478
},
{
"epoch": 0.7996661101836394,
"grad_norm": 0.0,
"learning_rate": 6.932363007831837e-06,
"loss": 1.8574,
"step": 479
},
{
"epoch": 0.8013355592654424,
"grad_norm": 0.0,
"learning_rate": 6.919736041928956e-06,
"loss": 1.8733,
"step": 480
},
{
"epoch": 0.8030050083472454,
"grad_norm": 0.0,
"learning_rate": 6.907094699392066e-06,
"loss": 1.7342,
"step": 481
},
{
"epoch": 0.8046744574290484,
"grad_norm": 0.0,
"learning_rate": 6.894439074890413e-06,
"loss": 1.6841,
"step": 482
},
{
"epoch": 0.8063439065108514,
"grad_norm": 0.0,
"learning_rate": 6.881769263200192e-06,
"loss": 1.6834,
"step": 483
},
{
"epoch": 0.8080133555926544,
"grad_norm": 0.0,
"learning_rate": 6.869085359203844e-06,
"loss": 1.8617,
"step": 484
},
{
"epoch": 0.8096828046744574,
"grad_norm": 0.0,
"learning_rate": 6.8563874578893505e-06,
"loss": 1.8515,
"step": 485
},
{
"epoch": 0.8113522537562604,
"grad_norm": 0.0,
"learning_rate": 6.843675654349513e-06,
"loss": 1.672,
"step": 486
},
{
"epoch": 0.8130217028380634,
"grad_norm": 0.0,
"learning_rate": 6.830950043781245e-06,
"loss": 1.8821,
"step": 487
},
{
"epoch": 0.8146911519198664,
"grad_norm": 0.0,
"learning_rate": 6.818210721484859e-06,
"loss": 1.9313,
"step": 488
},
{
"epoch": 0.8163606010016694,
"grad_norm": 0.0,
"learning_rate": 6.805457782863354e-06,
"loss": 1.8146,
"step": 489
},
{
"epoch": 0.8180300500834724,
"grad_norm": 0.0,
"learning_rate": 6.792691323421698e-06,
"loss": 1.6788,
"step": 490
},
{
"epoch": 0.8196994991652755,
"grad_norm": 0.0,
"learning_rate": 6.779911438766117e-06,
"loss": 1.8528,
"step": 491
},
{
"epoch": 0.8213689482470785,
"grad_norm": 0.0,
"learning_rate": 6.767118224603374e-06,
"loss": 1.6756,
"step": 492
},
{
"epoch": 0.8230383973288815,
"grad_norm": 0.0,
"learning_rate": 6.754311776740057e-06,
"loss": 1.4272,
"step": 493
},
{
"epoch": 0.8247078464106845,
"grad_norm": 0.0,
"learning_rate": 6.741492191081856e-06,
"loss": 1.7705,
"step": 494
},
{
"epoch": 0.8263772954924875,
"grad_norm": 0.0,
"learning_rate": 6.728659563632853e-06,
"loss": 1.5004,
"step": 495
},
{
"epoch": 0.8280467445742905,
"grad_norm": 0.0,
"learning_rate": 6.715813990494793e-06,
"loss": 2.0629,
"step": 496
},
{
"epoch": 0.8297161936560935,
"grad_norm": 0.0,
"learning_rate": 6.702955567866372e-06,
"loss": 2.0329,
"step": 497
},
{
"epoch": 0.8313856427378965,
"grad_norm": 0.0,
"learning_rate": 6.690084392042514e-06,
"loss": 1.7926,
"step": 498
},
{
"epoch": 0.8330550918196995,
"grad_norm": 0.0,
"learning_rate": 6.677200559413652e-06,
"loss": 1.6044,
"step": 499
},
{
"epoch": 0.8347245409015025,
"grad_norm": 0.0,
"learning_rate": 6.664304166465e-06,
"loss": 1.2786,
"step": 500
},
{
"epoch": 0.8363939899833055,
"grad_norm": 0.0,
"learning_rate": 6.651395309775837e-06,
"loss": 1.8032,
"step": 501
},
{
"epoch": 0.8380634390651085,
"grad_norm": 0.0,
"learning_rate": 6.638474086018778e-06,
"loss": 1.807,
"step": 502
},
{
"epoch": 0.8397328881469115,
"grad_norm": 0.0,
"learning_rate": 6.62554059195906e-06,
"loss": 1.5938,
"step": 503
},
{
"epoch": 0.8414023372287145,
"grad_norm": 0.0,
"learning_rate": 6.612594924453801e-06,
"loss": 1.5486,
"step": 504
},
{
"epoch": 0.8430717863105175,
"grad_norm": 0.0,
"learning_rate": 6.599637180451295e-06,
"loss": 1.7077,
"step": 505
},
{
"epoch": 0.8447412353923205,
"grad_norm": 0.0,
"learning_rate": 6.5866674569902676e-06,
"loss": 1.4935,
"step": 506
},
{
"epoch": 0.8464106844741235,
"grad_norm": 0.0,
"learning_rate": 6.5736858511991585e-06,
"loss": 1.5449,
"step": 507
},
{
"epoch": 0.8480801335559266,
"grad_norm": 0.0,
"learning_rate": 6.5606924602953925e-06,
"loss": 1.3814,
"step": 508
},
{
"epoch": 0.8497495826377296,
"grad_norm": 0.0,
"learning_rate": 6.547687381584653e-06,
"loss": 2.0371,
"step": 509
},
{
"epoch": 0.8514190317195326,
"grad_norm": 0.0,
"learning_rate": 6.534670712460151e-06,
"loss": 1.8953,
"step": 510
},
{
"epoch": 0.8530884808013356,
"grad_norm": 0.0,
"learning_rate": 6.521642550401894e-06,
"loss": 1.63,
"step": 511
},
{
"epoch": 0.8547579298831386,
"grad_norm": 0.0,
"learning_rate": 6.508602992975963e-06,
"loss": 1.7057,
"step": 512
},
{
"epoch": 0.8564273789649416,
"grad_norm": 0.0,
"learning_rate": 6.495552137833774e-06,
"loss": 1.6038,
"step": 513
},
{
"epoch": 0.8580968280467446,
"grad_norm": 0.0,
"learning_rate": 6.4824900827113506e-06,
"loss": 1.6273,
"step": 514
},
{
"epoch": 0.8597662771285476,
"grad_norm": 0.0,
"learning_rate": 6.469416925428593e-06,
"loss": 1.8133,
"step": 515
},
{
"epoch": 0.8614357262103506,
"grad_norm": 0.0,
"learning_rate": 6.456332763888544e-06,
"loss": 1.6532,
"step": 516
},
{
"epoch": 0.8631051752921536,
"grad_norm": 0.0,
"learning_rate": 6.443237696076652e-06,
"loss": 1.9142,
"step": 517
},
{
"epoch": 0.8647746243739566,
"grad_norm": 0.0,
"learning_rate": 6.430131820060043e-06,
"loss": 1.7401,
"step": 518
},
{
"epoch": 0.8664440734557596,
"grad_norm": 0.0,
"learning_rate": 6.417015233986786e-06,
"loss": 1.5529,
"step": 519
},
{
"epoch": 0.8681135225375626,
"grad_norm": 0.0,
"learning_rate": 6.403888036085155e-06,
"loss": 1.5511,
"step": 520
},
{
"epoch": 0.8697829716193656,
"grad_norm": 0.0,
"learning_rate": 6.390750324662895e-06,
"loss": 1.7932,
"step": 521
},
{
"epoch": 0.8714524207011686,
"grad_norm": 0.0,
"learning_rate": 6.3776021981064825e-06,
"loss": 1.7507,
"step": 522
},
{
"epoch": 0.8731218697829716,
"grad_norm": 0.0,
"learning_rate": 6.364443754880395e-06,
"loss": 1.8488,
"step": 523
},
{
"epoch": 0.8747913188647746,
"grad_norm": 0.0,
"learning_rate": 6.3512750935263664e-06,
"loss": 1.4568,
"step": 524
},
{
"epoch": 0.8764607679465777,
"grad_norm": 0.0,
"learning_rate": 6.338096312662658e-06,
"loss": 1.8529,
"step": 525
},
{
"epoch": 0.8781302170283807,
"grad_norm": 0.0,
"learning_rate": 6.32490751098331e-06,
"loss": 1.6563,
"step": 526
},
{
"epoch": 0.8797996661101837,
"grad_norm": 0.0,
"learning_rate": 6.311708787257408e-06,
"loss": 1.6838,
"step": 527
},
{
"epoch": 0.8814691151919867,
"grad_norm": 0.0,
"learning_rate": 6.298500240328342e-06,
"loss": 1.7945,
"step": 528
},
{
"epoch": 0.8831385642737897,
"grad_norm": 0.0,
"learning_rate": 6.285281969113072e-06,
"loss": 1.4784,
"step": 529
},
{
"epoch": 0.8848080133555927,
"grad_norm": 0.0,
"learning_rate": 6.272054072601374e-06,
"loss": 1.7104,
"step": 530
},
{
"epoch": 0.8864774624373957,
"grad_norm": 0.0,
"learning_rate": 6.258816649855109e-06,
"loss": 1.5431,
"step": 531
},
{
"epoch": 0.8881469115191987,
"grad_norm": 0.0,
"learning_rate": 6.245569800007484e-06,
"loss": 1.5852,
"step": 532
},
{
"epoch": 0.8898163606010017,
"grad_norm": 0.0,
"learning_rate": 6.232313622262297e-06,
"loss": 1.7488,
"step": 533
},
{
"epoch": 0.8914858096828047,
"grad_norm": 0.0,
"learning_rate": 6.219048215893204e-06,
"loss": 1.6947,
"step": 534
},
{
"epoch": 0.8931552587646077,
"grad_norm": 0.0,
"learning_rate": 6.2057736802429724e-06,
"loss": 1.6526,
"step": 535
},
{
"epoch": 0.8948247078464107,
"grad_norm": 0.0,
"learning_rate": 6.192490114722741e-06,
"loss": 1.4266,
"step": 536
},
{
"epoch": 0.8964941569282137,
"grad_norm": 0.0,
"learning_rate": 6.179197618811267e-06,
"loss": 1.4984,
"step": 537
},
{
"epoch": 0.8981636060100167,
"grad_norm": 0.0,
"learning_rate": 6.1658962920541875e-06,
"loss": 1.7565,
"step": 538
},
{
"epoch": 0.8998330550918197,
"grad_norm": 0.0,
"learning_rate": 6.152586234063277e-06,
"loss": 1.6057,
"step": 539
},
{
"epoch": 0.9015025041736227,
"grad_norm": 0.0,
"learning_rate": 6.139267544515689e-06,
"loss": 1.4978,
"step": 540
},
{
"epoch": 0.9031719532554258,
"grad_norm": 0.0,
"learning_rate": 6.125940323153223e-06,
"loss": 1.5634,
"step": 541
},
{
"epoch": 0.9048414023372288,
"grad_norm": 0.0,
"learning_rate": 6.112604669781572e-06,
"loss": 1.8368,
"step": 542
},
{
"epoch": 0.9065108514190318,
"grad_norm": 0.0,
"learning_rate": 6.0992606842695745e-06,
"loss": 1.4745,
"step": 543
},
{
"epoch": 0.9081803005008348,
"grad_norm": 0.0,
"learning_rate": 6.0859084665484645e-06,
"loss": 1.6676,
"step": 544
},
{
"epoch": 0.9098497495826378,
"grad_norm": 0.0,
"learning_rate": 6.07254811661113e-06,
"loss": 1.7473,
"step": 545
},
{
"epoch": 0.9115191986644408,
"grad_norm": 0.0,
"learning_rate": 6.059179734511357e-06,
"loss": 1.8217,
"step": 546
},
{
"epoch": 0.9131886477462438,
"grad_norm": 0.0,
"learning_rate": 6.045803420363085e-06,
"loss": 2.0666,
"step": 547
},
{
"epoch": 0.9148580968280468,
"grad_norm": 0.0,
"learning_rate": 6.032419274339654e-06,
"loss": 1.7439,
"step": 548
},
{
"epoch": 0.9165275459098498,
"grad_norm": 0.0,
"learning_rate": 6.019027396673058e-06,
"loss": 1.6459,
"step": 549
},
{
"epoch": 0.9181969949916527,
"grad_norm": 0.0,
"learning_rate": 6.005627887653189e-06,
"loss": 1.6537,
"step": 550
},
{
"epoch": 0.9198664440734557,
"grad_norm": 0.0,
"learning_rate": 5.9922208476270914e-06,
"loss": 1.4779,
"step": 551
},
{
"epoch": 0.9215358931552587,
"grad_norm": 0.0,
"learning_rate": 5.978806376998209e-06,
"loss": 1.6659,
"step": 552
},
{
"epoch": 0.9232053422370617,
"grad_norm": 0.0,
"learning_rate": 5.965384576225632e-06,
"loss": 1.8208,
"step": 553
},
{
"epoch": 0.9248747913188647,
"grad_norm": 0.0,
"learning_rate": 5.951955545823342e-06,
"loss": 1.7005,
"step": 554
},
{
"epoch": 0.9265442404006677,
"grad_norm": 0.0,
"learning_rate": 5.938519386359466e-06,
"loss": 1.7261,
"step": 555
},
{
"epoch": 0.9282136894824707,
"grad_norm": 0.0,
"learning_rate": 5.925076198455517e-06,
"loss": 1.8591,
"step": 556
},
{
"epoch": 0.9298831385642737,
"grad_norm": 0.0,
"learning_rate": 5.911626082785644e-06,
"loss": 1.9504,
"step": 557
},
{
"epoch": 0.9315525876460768,
"grad_norm": 0.0,
"learning_rate": 5.898169140075878e-06,
"loss": 1.8146,
"step": 558
},
{
"epoch": 0.9332220367278798,
"grad_norm": 0.0,
"learning_rate": 5.884705471103376e-06,
"loss": 1.7633,
"step": 559
},
{
"epoch": 0.9348914858096828,
"grad_norm": 0.0,
"learning_rate": 5.871235176695664e-06,
"loss": 1.7236,
"step": 560
},
{
"epoch": 0.9365609348914858,
"grad_norm": 0.0,
"learning_rate": 5.857758357729892e-06,
"loss": 1.9265,
"step": 561
},
{
"epoch": 0.9382303839732888,
"grad_norm": 0.0,
"learning_rate": 5.844275115132064e-06,
"loss": 1.5696,
"step": 562
},
{
"epoch": 0.9398998330550918,
"grad_norm": 0.0,
"learning_rate": 5.830785549876296e-06,
"loss": 1.8268,
"step": 563
},
{
"epoch": 0.9415692821368948,
"grad_norm": 0.0,
"learning_rate": 5.817289762984048e-06,
"loss": 1.5622,
"step": 564
},
{
"epoch": 0.9432387312186978,
"grad_norm": 0.0,
"learning_rate": 5.803787855523377e-06,
"loss": 1.7594,
"step": 565
},
{
"epoch": 0.9449081803005008,
"grad_norm": 0.0,
"learning_rate": 5.790279928608173e-06,
"loss": 1.6731,
"step": 566
},
{
"epoch": 0.9465776293823038,
"grad_norm": 0.0,
"learning_rate": 5.776766083397409e-06,
"loss": 1.5842,
"step": 567
},
{
"epoch": 0.9482470784641068,
"grad_norm": 0.0,
"learning_rate": 5.763246421094373e-06,
"loss": 1.8225,
"step": 568
},
{
"epoch": 0.9499165275459098,
"grad_norm": 0.0,
"learning_rate": 5.749721042945924e-06,
"loss": 1.591,
"step": 569
},
{
"epoch": 0.9515859766277128,
"grad_norm": 0.0,
"learning_rate": 5.736190050241719e-06,
"loss": 1.7563,
"step": 570
},
{
"epoch": 0.9532554257095158,
"grad_norm": 0.0,
"learning_rate": 5.722653544313467e-06,
"loss": 1.707,
"step": 571
},
{
"epoch": 0.9549248747913188,
"grad_norm": 0.0,
"learning_rate": 5.709111626534161e-06,
"loss": 1.8462,
"step": 572
},
{
"epoch": 0.9565943238731218,
"grad_norm": 0.0,
"learning_rate": 5.695564398317326e-06,
"loss": 1.6125,
"step": 573
},
{
"epoch": 0.9582637729549248,
"grad_norm": 0.0,
"learning_rate": 5.6820119611162515e-06,
"loss": 1.6431,
"step": 574
},
{
"epoch": 0.9599332220367279,
"grad_norm": 0.0,
"learning_rate": 5.668454416423243e-06,
"loss": 1.9529,
"step": 575
},
{
"epoch": 0.9616026711185309,
"grad_norm": 0.0,
"learning_rate": 5.65489186576885e-06,
"loss": 1.8668,
"step": 576
},
{
"epoch": 0.9632721202003339,
"grad_norm": 0.0,
"learning_rate": 5.64132441072111e-06,
"loss": 1.9664,
"step": 577
},
{
"epoch": 0.9649415692821369,
"grad_norm": 0.0,
"learning_rate": 5.627752152884794e-06,
"loss": 1.6759,
"step": 578
},
{
"epoch": 0.9666110183639399,
"grad_norm": 0.0,
"learning_rate": 5.614175193900639e-06,
"loss": 1.8378,
"step": 579
},
{
"epoch": 0.9682804674457429,
"grad_norm": 0.0,
"learning_rate": 5.600593635444583e-06,
"loss": 1.2707,
"step": 580
},
{
"epoch": 0.9699499165275459,
"grad_norm": 0.0,
"learning_rate": 5.587007579227014e-06,
"loss": 1.6216,
"step": 581
},
{
"epoch": 0.9716193656093489,
"grad_norm": 0.0,
"learning_rate": 5.573417126992004e-06,
"loss": 1.688,
"step": 582
},
{
"epoch": 0.9732888146911519,
"grad_norm": 0.0,
"learning_rate": 5.559822380516539e-06,
"loss": 1.8716,
"step": 583
},
{
"epoch": 0.9749582637729549,
"grad_norm": 0.0,
"learning_rate": 5.546223441609775e-06,
"loss": 1.938,
"step": 584
},
{
"epoch": 0.9766277128547579,
"grad_norm": 0.0,
"learning_rate": 5.532620412112255e-06,
"loss": 1.5521,
"step": 585
},
{
"epoch": 0.9782971619365609,
"grad_norm": 0.0,
"learning_rate": 5.51901339389516e-06,
"loss": 1.7355,
"step": 586
},
{
"epoch": 0.9799666110183639,
"grad_norm": 0.0,
"learning_rate": 5.5054024888595415e-06,
"loss": 1.3318,
"step": 587
},
{
"epoch": 0.9816360601001669,
"grad_norm": 0.0,
"learning_rate": 5.491787798935557e-06,
"loss": 1.7539,
"step": 588
},
{
"epoch": 0.9833055091819699,
"grad_norm": 0.0,
"learning_rate": 5.478169426081712e-06,
"loss": 1.8847,
"step": 589
},
{
"epoch": 0.9849749582637729,
"grad_norm": 0.0,
"learning_rate": 5.464547472284091e-06,
"loss": 1.6883,
"step": 590
},
{
"epoch": 0.986644407345576,
"grad_norm": 0.0,
"learning_rate": 5.450922039555594e-06,
"loss": 1.7047,
"step": 591
},
{
"epoch": 0.988313856427379,
"grad_norm": 0.0,
"learning_rate": 5.437293229935178e-06,
"loss": 1.5539,
"step": 592
},
{
"epoch": 0.989983305509182,
"grad_norm": 0.0,
"learning_rate": 5.4236611454870865e-06,
"loss": 1.6321,
"step": 593
},
{
"epoch": 0.991652754590985,
"grad_norm": 0.0,
"learning_rate": 5.4100258883000874e-06,
"loss": 1.7593,
"step": 594
},
{
"epoch": 0.993322203672788,
"grad_norm": 0.0,
"learning_rate": 5.39638756048671e-06,
"loss": 1.7527,
"step": 595
},
{
"epoch": 0.994991652754591,
"grad_norm": 0.0,
"learning_rate": 5.38274626418248e-06,
"loss": 1.3562,
"step": 596
},
{
"epoch": 0.996661101836394,
"grad_norm": 0.0,
"learning_rate": 5.3691021015451494e-06,
"loss": 1.4055,
"step": 597
},
{
"epoch": 0.998330550918197,
"grad_norm": 0.0,
"learning_rate": 5.355455174753941e-06,
"loss": 1.7539,
"step": 598
},
{
"epoch": 1.0,
"grad_norm": 0.0,
"learning_rate": 5.341805586008778e-06,
"loss": 1.7956,
"step": 599
},
{
"epoch": 1.001669449081803,
"grad_norm": 0.0,
"learning_rate": 5.328153437529512e-06,
"loss": 2.0003,
"step": 600
},
{
"epoch": 1.003338898163606,
"grad_norm": 0.0,
"learning_rate": 5.31449883155517e-06,
"loss": 1.7132,
"step": 601
},
{
"epoch": 1.005008347245409,
"grad_norm": 0.0,
"learning_rate": 5.300841870343183e-06,
"loss": 1.3768,
"step": 602
},
{
"epoch": 1.006677796327212,
"grad_norm": 0.0,
"learning_rate": 5.287182656168618e-06,
"loss": 1.7136,
"step": 603
},
{
"epoch": 1.008347245409015,
"grad_norm": 0.0,
"learning_rate": 5.273521291323411e-06,
"loss": 1.7554,
"step": 604
},
{
"epoch": 1.010016694490818,
"grad_norm": 0.0,
"learning_rate": 5.259857878115611e-06,
"loss": 1.2793,
"step": 605
},
{
"epoch": 1.011686143572621,
"grad_norm": 0.0,
"learning_rate": 5.2461925188686035e-06,
"loss": 1.7101,
"step": 606
},
{
"epoch": 1.013355592654424,
"grad_norm": 0.0,
"learning_rate": 5.232525315920346e-06,
"loss": 1.9689,
"step": 607
},
{
"epoch": 1.015025041736227,
"grad_norm": 0.0,
"learning_rate": 5.218856371622605e-06,
"loss": 1.7394,
"step": 608
},
{
"epoch": 1.01669449081803,
"grad_norm": 0.0,
"learning_rate": 5.205185788340189e-06,
"loss": 1.7766,
"step": 609
},
{
"epoch": 1.018363939899833,
"grad_norm": 0.0,
"learning_rate": 5.191513668450178e-06,
"loss": 1.7382,
"step": 610
},
{
"epoch": 1.020033388981636,
"grad_norm": 0.0,
"learning_rate": 5.17784011434116e-06,
"loss": 1.6687,
"step": 611
},
{
"epoch": 1.021702838063439,
"grad_norm": 0.0,
"learning_rate": 5.1641652284124645e-06,
"loss": 1.7971,
"step": 612
},
{
"epoch": 1.023372287145242,
"grad_norm": 0.0,
"learning_rate": 5.150489113073394e-06,
"loss": 1.9121,
"step": 613
},
{
"epoch": 1.025041736227045,
"grad_norm": 0.0,
"learning_rate": 5.136811870742462e-06,
"loss": 1.9295,
"step": 614
},
{
"epoch": 1.026711185308848,
"grad_norm": 0.0,
"learning_rate": 5.123133603846613e-06,
"loss": 1.9907,
"step": 615
},
{
"epoch": 1.0283806343906512,
"grad_norm": 0.0,
"learning_rate": 5.109454414820475e-06,
"loss": 1.7408,
"step": 616
},
{
"epoch": 1.0300500834724542,
"grad_norm": 0.0,
"learning_rate": 5.095774406105572e-06,
"loss": 1.9591,
"step": 617
},
{
"epoch": 1.0317195325542572,
"grad_norm": 0.0,
"learning_rate": 5.0820936801495716e-06,
"loss": 1.8002,
"step": 618
},
{
"epoch": 1.0333889816360602,
"grad_norm": 0.0,
"learning_rate": 5.068412339405514e-06,
"loss": 1.3725,
"step": 619
},
{
"epoch": 1.0350584307178632,
"grad_norm": 0.0,
"learning_rate": 5.054730486331041e-06,
"loss": 1.7209,
"step": 620
},
{
"epoch": 1.0367278797996662,
"grad_norm": 0.0,
"learning_rate": 5.0410482233876275e-06,
"loss": 1.5783,
"step": 621
},
{
"epoch": 1.0383973288814692,
"grad_norm": 0.0,
"learning_rate": 5.0273656530398285e-06,
"loss": 1.6146,
"step": 622
},
{
"epoch": 1.0400667779632722,
"grad_norm": 0.0,
"learning_rate": 5.013682877754491e-06,
"loss": 1.6253,
"step": 623
},
{
"epoch": 1.0417362270450752,
"grad_norm": 0.0,
"learning_rate": 5e-06,
"loss": 1.6338,
"step": 624
},
{
"epoch": 1.0434056761268782,
"grad_norm": 0.0,
"learning_rate": 4.986317122245508e-06,
"loss": 1.7018,
"step": 625
},
{
"epoch": 1.0450751252086812,
"grad_norm": 0.0,
"learning_rate": 4.972634346960173e-06,
"loss": 1.9059,
"step": 626
},
{
"epoch": 1.0467445742904842,
"grad_norm": 0.0,
"learning_rate": 4.9589517766123725e-06,
"loss": 1.6677,
"step": 627
},
{
"epoch": 1.0484140233722872,
"grad_norm": 0.0,
"learning_rate": 4.945269513668962e-06,
"loss": 1.8649,
"step": 628
},
{
"epoch": 1.0500834724540902,
"grad_norm": 0.0,
"learning_rate": 4.931587660594488e-06,
"loss": 1.503,
"step": 629
},
{
"epoch": 1.0517529215358932,
"grad_norm": 0.0,
"learning_rate": 4.917906319850431e-06,
"loss": 1.6686,
"step": 630
},
{
"epoch": 1.0534223706176962,
"grad_norm": 0.0,
"learning_rate": 4.90422559389443e-06,
"loss": 1.4966,
"step": 631
},
{
"epoch": 1.0550918196994992,
"grad_norm": 0.0,
"learning_rate": 4.890545585179527e-06,
"loss": 2.0538,
"step": 632
},
{
"epoch": 1.0567612687813022,
"grad_norm": 0.0,
"learning_rate": 4.876866396153388e-06,
"loss": 1.458,
"step": 633
},
{
"epoch": 1.0584307178631052,
"grad_norm": 0.0,
"learning_rate": 4.863188129257539e-06,
"loss": 1.7656,
"step": 634
},
{
"epoch": 1.0601001669449082,
"grad_norm": 0.0,
"learning_rate": 4.849510886926606e-06,
"loss": 1.8316,
"step": 635
},
{
"epoch": 1.0617696160267112,
"grad_norm": 0.0,
"learning_rate": 4.835834771587537e-06,
"loss": 1.6928,
"step": 636
},
{
"epoch": 1.0634390651085142,
"grad_norm": 0.0,
"learning_rate": 4.822159885658843e-06,
"loss": 1.7323,
"step": 637
},
{
"epoch": 1.0651085141903172,
"grad_norm": 0.0,
"learning_rate": 4.808486331549824e-06,
"loss": 1.7975,
"step": 638
},
{
"epoch": 1.0667779632721202,
"grad_norm": 0.0,
"learning_rate": 4.794814211659813e-06,
"loss": 1.819,
"step": 639
},
{
"epoch": 1.0684474123539232,
"grad_norm": 0.0,
"learning_rate": 4.781143628377396e-06,
"loss": 1.4956,
"step": 640
},
{
"epoch": 1.0701168614357262,
"grad_norm": 0.0,
"learning_rate": 4.767474684079655e-06,
"loss": 1.767,
"step": 641
},
{
"epoch": 1.0717863105175292,
"grad_norm": 0.0,
"learning_rate": 4.753807481131398e-06,
"loss": 1.9048,
"step": 642
},
{
"epoch": 1.0734557595993321,
"grad_norm": 0.0,
"learning_rate": 4.74014212188439e-06,
"loss": 1.7265,
"step": 643
},
{
"epoch": 1.0751252086811351,
"grad_norm": 0.0,
"learning_rate": 4.72647870867659e-06,
"loss": 1.4845,
"step": 644
},
{
"epoch": 1.0767946577629381,
"grad_norm": 0.0,
"learning_rate": 4.712817343831384e-06,
"loss": 1.9606,
"step": 645
},
{
"epoch": 1.0784641068447411,
"grad_norm": 0.0,
"learning_rate": 4.699158129656818e-06,
"loss": 1.6695,
"step": 646
},
{
"epoch": 1.0801335559265441,
"grad_norm": 0.0,
"learning_rate": 4.685501168444831e-06,
"loss": 1.7123,
"step": 647
},
{
"epoch": 1.0818030050083474,
"grad_norm": 0.0,
"learning_rate": 4.671846562470489e-06,
"loss": 1.7025,
"step": 648
},
{
"epoch": 1.0834724540901504,
"grad_norm": 0.0,
"learning_rate": 4.658194413991224e-06,
"loss": 1.6985,
"step": 649
},
{
"epoch": 1.0851419031719534,
"grad_norm": 0.0,
"learning_rate": 4.644544825246059e-06,
"loss": 1.6561,
"step": 650
},
{
"epoch": 1.0868113522537564,
"grad_norm": 0.0,
"learning_rate": 4.630897898454852e-06,
"loss": 1.6168,
"step": 651
},
{
"epoch": 1.0884808013355594,
"grad_norm": 0.0,
"learning_rate": 4.617253735817522e-06,
"loss": 1.8791,
"step": 652
},
{
"epoch": 1.0901502504173624,
"grad_norm": 0.0,
"learning_rate": 4.603612439513293e-06,
"loss": 1.7602,
"step": 653
},
{
"epoch": 1.0918196994991654,
"grad_norm": 0.0,
"learning_rate": 4.589974111699914e-06,
"loss": 1.7051,
"step": 654
},
{
"epoch": 1.0934891485809684,
"grad_norm": 0.0,
"learning_rate": 4.576338854512916e-06,
"loss": 1.5697,
"step": 655
},
{
"epoch": 1.0951585976627713,
"grad_norm": 0.0,
"learning_rate": 4.562706770064824e-06,
"loss": 1.6602,
"step": 656
},
{
"epoch": 1.0968280467445743,
"grad_norm": 0.0,
"learning_rate": 4.549077960444407e-06,
"loss": 1.6243,
"step": 657
},
{
"epoch": 1.0984974958263773,
"grad_norm": 0.0,
"learning_rate": 4.535452527715911e-06,
"loss": 1.902,
"step": 658
},
{
"epoch": 1.1001669449081803,
"grad_norm": 0.0,
"learning_rate": 4.521830573918289e-06,
"loss": 1.8197,
"step": 659
},
{
"epoch": 1.1018363939899833,
"grad_norm": 0.0,
"learning_rate": 4.508212201064446e-06,
"loss": 1.8519,
"step": 660
},
{
"epoch": 1.1035058430717863,
"grad_norm": 0.0,
"learning_rate": 4.49459751114046e-06,
"loss": 1.9204,
"step": 661
},
{
"epoch": 1.1051752921535893,
"grad_norm": 0.0,
"learning_rate": 4.480986606104842e-06,
"loss": 1.3943,
"step": 662
},
{
"epoch": 1.1068447412353923,
"grad_norm": 0.0,
"learning_rate": 4.467379587887747e-06,
"loss": 1.6344,
"step": 663
},
{
"epoch": 1.1085141903171953,
"grad_norm": 0.0,
"learning_rate": 4.453776558390225e-06,
"loss": 1.8236,
"step": 664
},
{
"epoch": 1.1101836393989983,
"grad_norm": 0.0,
"learning_rate": 4.4401776194834615e-06,
"loss": 1.6879,
"step": 665
},
{
"epoch": 1.1118530884808013,
"grad_norm": 0.0,
"learning_rate": 4.426582873007999e-06,
"loss": 1.7198,
"step": 666
},
{
"epoch": 1.1135225375626043,
"grad_norm": 0.0,
"learning_rate": 4.412992420772988e-06,
"loss": 1.9099,
"step": 667
},
{
"epoch": 1.1151919866444073,
"grad_norm": 0.0,
"learning_rate": 4.3994063645554185e-06,
"loss": 1.9572,
"step": 668
},
{
"epoch": 1.1168614357262103,
"grad_norm": 0.0,
"learning_rate": 4.385824806099364e-06,
"loss": 1.9276,
"step": 669
},
{
"epoch": 1.1185308848080133,
"grad_norm": 0.0,
"learning_rate": 4.3722478471152065e-06,
"loss": 1.6846,
"step": 670
},
{
"epoch": 1.1202003338898163,
"grad_norm": 0.0,
"learning_rate": 4.3586755892788896e-06,
"loss": 1.5223,
"step": 671
},
{
"epoch": 1.1218697829716193,
"grad_norm": 0.0,
"learning_rate": 4.345108134231152e-06,
"loss": 1.9143,
"step": 672
},
{
"epoch": 1.1235392320534223,
"grad_norm": 0.0,
"learning_rate": 4.331545583576758e-06,
"loss": 1.3943,
"step": 673
},
{
"epoch": 1.1252086811352253,
"grad_norm": 0.0,
"learning_rate": 4.31798803888375e-06,
"loss": 1.6374,
"step": 674
},
{
"epoch": 1.1268781302170283,
"grad_norm": 0.0,
"learning_rate": 4.304435601682675e-06,
"loss": 1.9188,
"step": 675
},
{
"epoch": 1.1285475792988313,
"grad_norm": 0.0,
"learning_rate": 4.290888373465841e-06,
"loss": 1.9335,
"step": 676
},
{
"epoch": 1.1302170283806343,
"grad_norm": 0.0,
"learning_rate": 4.277346455686535e-06,
"loss": 1.5135,
"step": 677
},
{
"epoch": 1.1318864774624373,
"grad_norm": 0.0,
"learning_rate": 4.2638099497582835e-06,
"loss": 1.6542,
"step": 678
},
{
"epoch": 1.1335559265442403,
"grad_norm": 0.0,
"learning_rate": 4.2502789570540775e-06,
"loss": 1.429,
"step": 679
},
{
"epoch": 1.1352253756260433,
"grad_norm": 0.0,
"learning_rate": 4.236753578905627e-06,
"loss": 1.3608,
"step": 680
},
{
"epoch": 1.1368948247078463,
"grad_norm": 0.0,
"learning_rate": 4.223233916602593e-06,
"loss": 1.6528,
"step": 681
},
{
"epoch": 1.1385642737896493,
"grad_norm": 0.0,
"learning_rate": 4.209720071391827e-06,
"loss": 1.6002,
"step": 682
},
{
"epoch": 1.1402337228714523,
"grad_norm": 0.0,
"learning_rate": 4.196212144476626e-06,
"loss": 1.7644,
"step": 683
},
{
"epoch": 1.1419031719532553,
"grad_norm": 0.0,
"learning_rate": 4.1827102370159526e-06,
"loss": 1.5274,
"step": 684
},
{
"epoch": 1.1435726210350585,
"grad_norm": 0.0,
"learning_rate": 4.169214450123706e-06,
"loss": 1.5075,
"step": 685
},
{
"epoch": 1.1452420701168615,
"grad_norm": 0.0,
"learning_rate": 4.155724884867937e-06,
"loss": 1.6292,
"step": 686
},
{
"epoch": 1.1469115191986645,
"grad_norm": 0.0,
"learning_rate": 4.142241642270109e-06,
"loss": 1.752,
"step": 687
},
{
"epoch": 1.1485809682804675,
"grad_norm": 0.0,
"learning_rate": 4.1287648233043366e-06,
"loss": 1.7797,
"step": 688
},
{
"epoch": 1.1502504173622705,
"grad_norm": 0.0,
"learning_rate": 4.115294528896627e-06,
"loss": 1.8398,
"step": 689
},
{
"epoch": 1.1519198664440735,
"grad_norm": 0.0,
"learning_rate": 4.1018308599241245e-06,
"loss": 1.6796,
"step": 690
},
{
"epoch": 1.1535893155258765,
"grad_norm": 0.0,
"learning_rate": 4.088373917214357e-06,
"loss": 1.8816,
"step": 691
},
{
"epoch": 1.1552587646076795,
"grad_norm": 0.0,
"learning_rate": 4.074923801544485e-06,
"loss": 1.7933,
"step": 692
},
{
"epoch": 1.1569282136894825,
"grad_norm": 0.0,
"learning_rate": 4.0614806136405355e-06,
"loss": 1.6684,
"step": 693
},
{
"epoch": 1.1585976627712855,
"grad_norm": 0.0,
"learning_rate": 4.048044454176658e-06,
"loss": 1.3224,
"step": 694
},
{
"epoch": 1.1602671118530885,
"grad_norm": 0.0,
"learning_rate": 4.034615423774369e-06,
"loss": 1.787,
"step": 695
},
{
"epoch": 1.1619365609348915,
"grad_norm": 0.0,
"learning_rate": 4.0211936230017915e-06,
"loss": 1.6381,
"step": 696
},
{
"epoch": 1.1636060100166945,
"grad_norm": 0.0,
"learning_rate": 4.00777915237291e-06,
"loss": 1.7108,
"step": 697
},
{
"epoch": 1.1652754590984975,
"grad_norm": 0.0,
"learning_rate": 3.9943721123468124e-06,
"loss": 1.8407,
"step": 698
},
{
"epoch": 1.1669449081803005,
"grad_norm": 0.0,
"learning_rate": 3.980972603326945e-06,
"loss": 1.7378,
"step": 699
},
{
"epoch": 1.1686143572621035,
"grad_norm": 0.0,
"learning_rate": 3.967580725660348e-06,
"loss": 1.8234,
"step": 700
},
{
"epoch": 1.1702838063439065,
"grad_norm": 0.0,
"learning_rate": 3.954196579636918e-06,
"loss": 1.7271,
"step": 701
},
{
"epoch": 1.1719532554257095,
"grad_norm": 0.0,
"learning_rate": 3.940820265488644e-06,
"loss": 1.7063,
"step": 702
},
{
"epoch": 1.1736227045075125,
"grad_norm": 0.0,
"learning_rate": 3.9274518833888704e-06,
"loss": 1.6849,
"step": 703
},
{
"epoch": 1.1752921535893155,
"grad_norm": 0.0,
"learning_rate": 3.914091533451537e-06,
"loss": 1.7138,
"step": 704
},
{
"epoch": 1.1769616026711185,
"grad_norm": 0.0,
"learning_rate": 3.900739315730426e-06,
"loss": 1.8181,
"step": 705
},
{
"epoch": 1.1786310517529215,
"grad_norm": 0.0,
"learning_rate": 3.887395330218429e-06,
"loss": 1.6003,
"step": 706
},
{
"epoch": 1.1803005008347245,
"grad_norm": 0.0,
"learning_rate": 3.874059676846778e-06,
"loss": 1.775,
"step": 707
},
{
"epoch": 1.1819699499165275,
"grad_norm": 0.0,
"learning_rate": 3.860732455484314e-06,
"loss": 1.4583,
"step": 708
},
{
"epoch": 1.1836393989983305,
"grad_norm": 0.0,
"learning_rate": 3.847413765936724e-06,
"loss": 1.6664,
"step": 709
},
{
"epoch": 1.1853088480801335,
"grad_norm": 0.0,
"learning_rate": 3.8341037079458125e-06,
"loss": 1.7606,
"step": 710
},
{
"epoch": 1.1869782971619365,
"grad_norm": 0.0,
"learning_rate": 3.820802381188735e-06,
"loss": 1.4833,
"step": 711
},
{
"epoch": 1.1886477462437395,
"grad_norm": 0.0,
"learning_rate": 3.8075098852772607e-06,
"loss": 1.771,
"step": 712
},
{
"epoch": 1.1903171953255425,
"grad_norm": 0.0,
"learning_rate": 3.7942263197570297e-06,
"loss": 1.6852,
"step": 713
},
{
"epoch": 1.1919866444073457,
"grad_norm": 0.0,
"learning_rate": 3.7809517841067976e-06,
"loss": 1.8644,
"step": 714
},
{
"epoch": 1.1936560934891487,
"grad_norm": 0.0,
"learning_rate": 3.7676863777377055e-06,
"loss": 1.7803,
"step": 715
},
{
"epoch": 1.1953255425709517,
"grad_norm": 0.0,
"learning_rate": 3.7544301999925176e-06,
"loss": 1.8378,
"step": 716
},
{
"epoch": 1.1969949916527547,
"grad_norm": 0.0,
"learning_rate": 3.7411833501448924e-06,
"loss": 1.7448,
"step": 717
},
{
"epoch": 1.1986644407345577,
"grad_norm": 0.0,
"learning_rate": 3.727945927398628e-06,
"loss": 1.5136,
"step": 718
},
{
"epoch": 1.2003338898163607,
"grad_norm": 0.0,
"learning_rate": 3.7147180308869296e-06,
"loss": 1.9288,
"step": 719
},
{
"epoch": 1.2020033388981637,
"grad_norm": 0.0,
"learning_rate": 3.7014997596716596e-06,
"loss": 1.606,
"step": 720
},
{
"epoch": 1.2036727879799667,
"grad_norm": 0.0,
"learning_rate": 3.688291212742594e-06,
"loss": 1.8199,
"step": 721
},
{
"epoch": 1.2053422370617697,
"grad_norm": 0.0,
"learning_rate": 3.675092489016693e-06,
"loss": 1.7053,
"step": 722
},
{
"epoch": 1.2070116861435727,
"grad_norm": 0.0,
"learning_rate": 3.6619036873373435e-06,
"loss": 1.7121,
"step": 723
},
{
"epoch": 1.2086811352253757,
"grad_norm": 0.0,
"learning_rate": 3.6487249064736352e-06,
"loss": 1.4813,
"step": 724
},
{
"epoch": 1.2103505843071787,
"grad_norm": 0.0,
"learning_rate": 3.6355562451196065e-06,
"loss": 1.7326,
"step": 725
},
{
"epoch": 1.2120200333889817,
"grad_norm": 0.0,
"learning_rate": 3.622397801893518e-06,
"loss": 1.6339,
"step": 726
},
{
"epoch": 1.2136894824707847,
"grad_norm": 0.0,
"learning_rate": 3.6092496753371064e-06,
"loss": 1.6062,
"step": 727
},
{
"epoch": 1.2153589315525877,
"grad_norm": 0.0,
"learning_rate": 3.5961119639148443e-06,
"loss": 1.784,
"step": 728
},
{
"epoch": 1.2170283806343907,
"grad_norm": 0.0,
"learning_rate": 3.582984766013215e-06,
"loss": 1.5648,
"step": 729
},
{
"epoch": 1.2186978297161937,
"grad_norm": 0.0,
"learning_rate": 3.569868179939958e-06,
"loss": 1.674,
"step": 730
},
{
"epoch": 1.2203672787979967,
"grad_norm": 0.0,
"learning_rate": 3.556762303923351e-06,
"loss": 1.6671,
"step": 731
},
{
"epoch": 1.2220367278797997,
"grad_norm": 0.0,
"learning_rate": 3.543667236111458e-06,
"loss": 1.4856,
"step": 732
},
{
"epoch": 1.2237061769616027,
"grad_norm": 0.0,
"learning_rate": 3.530583074571407e-06,
"loss": 1.7501,
"step": 733
},
{
"epoch": 1.2253756260434057,
"grad_norm": 0.0,
"learning_rate": 3.5175099172886507e-06,
"loss": 1.611,
"step": 734
},
{
"epoch": 1.2270450751252087,
"grad_norm": 0.0,
"learning_rate": 3.504447862166227e-06,
"loss": 2.1473,
"step": 735
},
{
"epoch": 1.2287145242070117,
"grad_norm": 0.0,
"learning_rate": 3.4913970070240388e-06,
"loss": 1.6351,
"step": 736
},
{
"epoch": 1.2303839732888147,
"grad_norm": 0.0,
"learning_rate": 3.4783574495981075e-06,
"loss": 1.7557,
"step": 737
},
{
"epoch": 1.2320534223706177,
"grad_norm": 0.0,
"learning_rate": 3.4653292875398523e-06,
"loss": 1.7185,
"step": 738
},
{
"epoch": 1.2337228714524207,
"grad_norm": 0.0,
"learning_rate": 3.4523126184153483e-06,
"loss": 1.6528,
"step": 739
},
{
"epoch": 1.2353923205342237,
"grad_norm": 0.0,
"learning_rate": 3.4393075397046105e-06,
"loss": 1.5345,
"step": 740
},
{
"epoch": 1.2370617696160267,
"grad_norm": 0.0,
"learning_rate": 3.426314148800843e-06,
"loss": 1.4437,
"step": 741
},
{
"epoch": 1.2387312186978297,
"grad_norm": 0.0,
"learning_rate": 3.4133325430097337e-06,
"loss": 1.8744,
"step": 742
},
{
"epoch": 1.2404006677796326,
"grad_norm": 0.0,
"learning_rate": 3.400362819548706e-06,
"loss": 1.625,
"step": 743
},
{
"epoch": 1.2420701168614356,
"grad_norm": 0.0,
"learning_rate": 3.3874050755461984e-06,
"loss": 1.8173,
"step": 744
},
{
"epoch": 1.2437395659432386,
"grad_norm": 0.0,
"learning_rate": 3.374459408040942e-06,
"loss": 1.9883,
"step": 745
},
{
"epoch": 1.2454090150250416,
"grad_norm": 0.0,
"learning_rate": 3.3615259139812227e-06,
"loss": 1.6616,
"step": 746
},
{
"epoch": 1.2470784641068446,
"grad_norm": 0.0,
"learning_rate": 3.3486046902241663e-06,
"loss": 1.5479,
"step": 747
},
{
"epoch": 1.2487479131886476,
"grad_norm": 0.0,
"learning_rate": 3.335695833535001e-06,
"loss": 1.943,
"step": 748
},
{
"epoch": 1.2504173622704506,
"grad_norm": 0.0,
"learning_rate": 3.322799440586349e-06,
"loss": 1.8239,
"step": 749
},
{
"epoch": 1.2520868113522536,
"grad_norm": 0.0,
"learning_rate": 3.309915607957487e-06,
"loss": 1.7892,
"step": 750
},
{
"epoch": 1.2537562604340566,
"grad_norm": 0.0,
"learning_rate": 3.2970444321336294e-06,
"loss": 1.3755,
"step": 751
},
{
"epoch": 1.2554257095158596,
"grad_norm": 0.0,
"learning_rate": 3.2841860095052096e-06,
"loss": 1.9403,
"step": 752
},
{
"epoch": 1.2570951585976629,
"grad_norm": 0.0,
"learning_rate": 3.27134043636715e-06,
"loss": 1.4275,
"step": 753
},
{
"epoch": 1.2587646076794659,
"grad_norm": 0.0,
"learning_rate": 3.258507808918146e-06,
"loss": 1.8757,
"step": 754
},
{
"epoch": 1.2604340567612689,
"grad_norm": 0.0,
"learning_rate": 3.245688223259944e-06,
"loss": 1.5396,
"step": 755
},
{
"epoch": 1.2621035058430718,
"grad_norm": 0.0,
"learning_rate": 3.232881775396626e-06,
"loss": 1.5604,
"step": 756
},
{
"epoch": 1.2637729549248748,
"grad_norm": 0.0,
"learning_rate": 3.2200885612338846e-06,
"loss": 1.7815,
"step": 757
},
{
"epoch": 1.2654424040066778,
"grad_norm": 0.0,
"learning_rate": 3.2073086765783023e-06,
"loss": 1.5858,
"step": 758
},
{
"epoch": 1.2671118530884808,
"grad_norm": 0.0,
"learning_rate": 3.1945422171366482e-06,
"loss": 1.7903,
"step": 759
},
{
"epoch": 1.2687813021702838,
"grad_norm": 0.0,
"learning_rate": 3.1817892785151426e-06,
"loss": 1.6163,
"step": 760
},
{
"epoch": 1.2704507512520868,
"grad_norm": 0.0,
"learning_rate": 3.1690499562187573e-06,
"loss": 1.6819,
"step": 761
},
{
"epoch": 1.2721202003338898,
"grad_norm": 0.0,
"learning_rate": 3.1563243456504877e-06,
"loss": 1.6804,
"step": 762
},
{
"epoch": 1.2737896494156928,
"grad_norm": 0.0,
"learning_rate": 3.1436125421106507e-06,
"loss": 2.0157,
"step": 763
},
{
"epoch": 1.2754590984974958,
"grad_norm": 0.0,
"learning_rate": 3.1309146407961565e-06,
"loss": 1.9822,
"step": 764
},
{
"epoch": 1.2771285475792988,
"grad_norm": 0.0,
"learning_rate": 3.118230736799809e-06,
"loss": 1.6525,
"step": 765
},
{
"epoch": 1.2787979966611018,
"grad_norm": 0.0,
"learning_rate": 3.1055609251095874e-06,
"loss": 1.9026,
"step": 766
},
{
"epoch": 1.2804674457429048,
"grad_norm": 0.0,
"learning_rate": 3.0929053006079336e-06,
"loss": 1.7447,
"step": 767
},
{
"epoch": 1.2821368948247078,
"grad_norm": 0.0,
"learning_rate": 3.0802639580710465e-06,
"loss": 1.7471,
"step": 768
},
{
"epoch": 1.2838063439065108,
"grad_norm": 0.0,
"learning_rate": 3.067636992168165e-06,
"loss": 1.6875,
"step": 769
},
{
"epoch": 1.2854757929883138,
"grad_norm": 0.0,
"learning_rate": 3.0550244974608675e-06,
"loss": 1.68,
"step": 770
},
{
"epoch": 1.2871452420701168,
"grad_norm": 0.0,
"learning_rate": 3.0424265684023556e-06,
"loss": 1.6858,
"step": 771
},
{
"epoch": 1.2888146911519198,
"grad_norm": 0.0,
"learning_rate": 3.0298432993367577e-06,
"loss": 1.9605,
"step": 772
},
{
"epoch": 1.2904841402337228,
"grad_norm": 0.0,
"learning_rate": 3.0172747844984098e-06,
"loss": 1.6079,
"step": 773
},
{
"epoch": 1.2921535893155258,
"grad_norm": 0.0,
"learning_rate": 3.0047211180111537e-06,
"loss": 1.6149,
"step": 774
},
{
"epoch": 1.2938230383973288,
"grad_norm": 0.0,
"learning_rate": 2.9921823938876426e-06,
"loss": 1.831,
"step": 775
},
{
"epoch": 1.2954924874791318,
"grad_norm": 0.0,
"learning_rate": 2.979658706028619e-06,
"loss": 1.5783,
"step": 776
},
{
"epoch": 1.2971619365609348,
"grad_norm": 0.0,
"learning_rate": 2.9671501482222277e-06,
"loss": 1.5288,
"step": 777
},
{
"epoch": 1.298831385642738,
"grad_norm": 0.0,
"learning_rate": 2.9546568141433007e-06,
"loss": 1.5322,
"step": 778
},
{
"epoch": 1.300500834724541,
"grad_norm": 0.0,
"learning_rate": 2.9421787973526694e-06,
"loss": 1.6689,
"step": 779
},
{
"epoch": 1.302170283806344,
"grad_norm": 0.0,
"learning_rate": 2.9297161912964476e-06,
"loss": 1.4969,
"step": 780
},
{
"epoch": 1.303839732888147,
"grad_norm": 0.0,
"learning_rate": 2.917269089305347e-06,
"loss": 1.7739,
"step": 781
},
{
"epoch": 1.30550918196995,
"grad_norm": 0.0,
"learning_rate": 2.904837584593968e-06,
"loss": 1.3092,
"step": 782
},
{
"epoch": 1.307178631051753,
"grad_norm": 0.0,
"learning_rate": 2.8924217702601048e-06,
"loss": 1.6546,
"step": 783
},
{
"epoch": 1.308848080133556,
"grad_norm": 0.0,
"learning_rate": 2.880021739284053e-06,
"loss": 1.7141,
"step": 784
},
{
"epoch": 1.310517529215359,
"grad_norm": 0.0,
"learning_rate": 2.8676375845279013e-06,
"loss": 1.9269,
"step": 785
},
{
"epoch": 1.312186978297162,
"grad_norm": 0.0,
"learning_rate": 2.8552693987348533e-06,
"loss": 1.6174,
"step": 786
},
{
"epoch": 1.313856427378965,
"grad_norm": 0.0,
"learning_rate": 2.8429172745285127e-06,
"loss": 1.672,
"step": 787
},
{
"epoch": 1.315525876460768,
"grad_norm": 0.0,
"learning_rate": 2.83058130441221e-06,
"loss": 1.8326,
"step": 788
},
{
"epoch": 1.317195325542571,
"grad_norm": 0.0,
"learning_rate": 2.8182615807682933e-06,
"loss": 1.7942,
"step": 789
},
{
"epoch": 1.318864774624374,
"grad_norm": 0.0,
"learning_rate": 2.8059581958574434e-06,
"loss": 1.6053,
"step": 790
},
{
"epoch": 1.320534223706177,
"grad_norm": 0.0,
"learning_rate": 2.793671241817989e-06,
"loss": 1.6082,
"step": 791
},
{
"epoch": 1.32220367278798,
"grad_norm": 0.0,
"learning_rate": 2.781400810665201e-06,
"loss": 1.7061,
"step": 792
},
{
"epoch": 1.323873121869783,
"grad_norm": 0.0,
"learning_rate": 2.769146994290623e-06,
"loss": 1.8037,
"step": 793
},
{
"epoch": 1.325542570951586,
"grad_norm": 0.0,
"learning_rate": 2.7569098844613616e-06,
"loss": 1.8475,
"step": 794
},
{
"epoch": 1.327212020033389,
"grad_norm": 0.0,
"learning_rate": 2.744689572819421e-06,
"loss": 1.854,
"step": 795
},
{
"epoch": 1.328881469115192,
"grad_norm": 0.0,
"learning_rate": 2.7324861508810007e-06,
"loss": 1.9131,
"step": 796
},
{
"epoch": 1.330550918196995,
"grad_norm": 0.0,
"learning_rate": 2.7202997100358117e-06,
"loss": 1.5491,
"step": 797
},
{
"epoch": 1.332220367278798,
"grad_norm": 0.0,
"learning_rate": 2.708130341546407e-06,
"loss": 1.4143,
"step": 798
},
{
"epoch": 1.333889816360601,
"grad_norm": 0.0,
"learning_rate": 2.695978136547476e-06,
"loss": 1.8685,
"step": 799
},
{
"epoch": 1.335559265442404,
"grad_norm": 0.0,
"learning_rate": 2.6838431860451797e-06,
"loss": 1.7486,
"step": 800
},
{
"epoch": 1.337228714524207,
"grad_norm": 0.0,
"learning_rate": 2.6717255809164615e-06,
"loss": 1.4641,
"step": 801
},
{
"epoch": 1.33889816360601,
"grad_norm": 0.0,
"learning_rate": 2.659625411908366e-06,
"loss": 1.6572,
"step": 802
},
{
"epoch": 1.340567612687813,
"grad_norm": 0.0,
"learning_rate": 2.6475427696373598e-06,
"loss": 1.5809,
"step": 803
},
{
"epoch": 1.342237061769616,
"grad_norm": 0.0,
"learning_rate": 2.635477744588658e-06,
"loss": 1.5445,
"step": 804
},
{
"epoch": 1.343906510851419,
"grad_norm": 0.0,
"learning_rate": 2.6234304271155443e-06,
"loss": 1.8947,
"step": 805
},
{
"epoch": 1.345575959933222,
"grad_norm": 0.0,
"learning_rate": 2.611400907438685e-06,
"loss": 1.8787,
"step": 806
},
{
"epoch": 1.347245409015025,
"grad_norm": 0.0,
"learning_rate": 2.5993892756454702e-06,
"loss": 1.9229,
"step": 807
},
{
"epoch": 1.348914858096828,
"grad_norm": 0.0,
"learning_rate": 2.587395621689325e-06,
"loss": 1.7786,
"step": 808
},
{
"epoch": 1.350584307178631,
"grad_norm": 0.0,
"learning_rate": 2.57542003538904e-06,
"loss": 1.9414,
"step": 809
},
{
"epoch": 1.352253756260434,
"grad_norm": 0.0,
"learning_rate": 2.563462606428101e-06,
"loss": 1.7625,
"step": 810
},
{
"epoch": 1.353923205342237,
"grad_norm": 0.0,
"learning_rate": 2.5515234243540186e-06,
"loss": 1.924,
"step": 811
},
{
"epoch": 1.35559265442404,
"grad_norm": 0.0,
"learning_rate": 2.5396025785776545e-06,
"loss": 1.8949,
"step": 812
},
{
"epoch": 1.357262103505843,
"grad_norm": 0.0,
"learning_rate": 2.527700158372548e-06,
"loss": 1.784,
"step": 813
},
{
"epoch": 1.358931552587646,
"grad_norm": 0.0,
"learning_rate": 2.515816252874258e-06,
"loss": 1.8006,
"step": 814
},
{
"epoch": 1.360601001669449,
"grad_norm": 0.0,
"learning_rate": 2.5039509510796843e-06,
"loss": 1.8571,
"step": 815
},
{
"epoch": 1.362270450751252,
"grad_norm": 0.0,
"learning_rate": 2.4921043418464085e-06,
"loss": 1.5047,
"step": 816
},
{
"epoch": 1.363939899833055,
"grad_norm": 0.0,
"learning_rate": 2.4802765138920236e-06,
"loss": 1.6812,
"step": 817
},
{
"epoch": 1.365609348914858,
"grad_norm": 0.0,
"learning_rate": 2.4684675557934766e-06,
"loss": 1.8403,
"step": 818
},
{
"epoch": 1.367278797996661,
"grad_norm": 0.0,
"learning_rate": 2.456677555986401e-06,
"loss": 1.5725,
"step": 819
},
{
"epoch": 1.3689482470784642,
"grad_norm": 0.0,
"learning_rate": 2.4449066027644473e-06,
"loss": 1.4778,
"step": 820
},
{
"epoch": 1.3706176961602672,
"grad_norm": 0.0,
"learning_rate": 2.433154784278638e-06,
"loss": 1.8657,
"step": 821
},
{
"epoch": 1.3722871452420702,
"grad_norm": 0.0,
"learning_rate": 2.4214221885366918e-06,
"loss": 1.6946,
"step": 822
},
{
"epoch": 1.3739565943238732,
"grad_norm": 0.0,
"learning_rate": 2.4097089034023726e-06,
"loss": 1.6673,
"step": 823
},
{
"epoch": 1.3756260434056762,
"grad_norm": 0.0,
"learning_rate": 2.398015016594828e-06,
"loss": 1.7846,
"step": 824
},
{
"epoch": 1.3772954924874792,
"grad_norm": 0.0,
"learning_rate": 2.386340615687941e-06,
"loss": 1.7574,
"step": 825
},
{
"epoch": 1.3789649415692822,
"grad_norm": 0.0,
"learning_rate": 2.3746857881096586e-06,
"loss": 1.6527,
"step": 826
},
{
"epoch": 1.3806343906510852,
"grad_norm": 0.0,
"learning_rate": 2.363050621141354e-06,
"loss": 1.8629,
"step": 827
},
{
"epoch": 1.3823038397328882,
"grad_norm": 0.0,
"learning_rate": 2.351435201917159e-06,
"loss": 1.7808,
"step": 828
},
{
"epoch": 1.3839732888146912,
"grad_norm": 0.0,
"learning_rate": 2.339839617423318e-06,
"loss": 1.6724,
"step": 829
},
{
"epoch": 1.3856427378964942,
"grad_norm": 0.0,
"learning_rate": 2.32826395449754e-06,
"loss": 1.4843,
"step": 830
},
{
"epoch": 1.3873121869782972,
"grad_norm": 0.0,
"learning_rate": 2.316708299828338e-06,
"loss": 1.7324,
"step": 831
},
{
"epoch": 1.3889816360601002,
"grad_norm": 0.0,
"learning_rate": 2.3051727399543934e-06,
"loss": 1.8341,
"step": 832
},
{
"epoch": 1.3906510851419032,
"grad_norm": 0.0,
"learning_rate": 2.2936573612638922e-06,
"loss": 1.605,
"step": 833
},
{
"epoch": 1.3923205342237062,
"grad_norm": 0.0,
"learning_rate": 2.282162249993895e-06,
"loss": 1.6203,
"step": 834
},
{
"epoch": 1.3939899833055092,
"grad_norm": 0.0,
"learning_rate": 2.2706874922296756e-06,
"loss": 1.6853,
"step": 835
},
{
"epoch": 1.3956594323873122,
"grad_norm": 0.0,
"learning_rate": 2.259233173904084e-06,
"loss": 1.6005,
"step": 836
},
{
"epoch": 1.3973288814691152,
"grad_norm": 0.0,
"learning_rate": 2.2477993807969074e-06,
"loss": 1.7349,
"step": 837
},
{
"epoch": 1.3989983305509182,
"grad_norm": 0.0,
"learning_rate": 2.2363861985342156e-06,
"loss": 1.7476,
"step": 838
},
{
"epoch": 1.4006677796327212,
"grad_norm": 0.0,
"learning_rate": 2.224993712587734e-06,
"loss": 1.7429,
"step": 839
},
{
"epoch": 1.4023372287145242,
"grad_norm": 0.0,
"learning_rate": 2.2136220082741876e-06,
"loss": 1.6146,
"step": 840
},
{
"epoch": 1.4040066777963272,
"grad_norm": 0.0,
"learning_rate": 2.20227117075468e-06,
"loss": 1.7036,
"step": 841
},
{
"epoch": 1.4056761268781301,
"grad_norm": 0.0,
"learning_rate": 2.1909412850340395e-06,
"loss": 1.4333,
"step": 842
},
{
"epoch": 1.4073455759599331,
"grad_norm": 0.0,
"learning_rate": 2.1796324359601896e-06,
"loss": 1.9014,
"step": 843
},
{
"epoch": 1.4090150250417361,
"grad_norm": 0.0,
"learning_rate": 2.168344708223519e-06,
"loss": 1.7888,
"step": 844
},
{
"epoch": 1.4106844741235394,
"grad_norm": 0.0,
"learning_rate": 2.1570781863562328e-06,
"loss": 1.726,
"step": 845
},
{
"epoch": 1.4123539232053424,
"grad_norm": 0.0,
"learning_rate": 2.1458329547317384e-06,
"loss": 1.6039,
"step": 846
},
{
"epoch": 1.4140233722871454,
"grad_norm": 0.0,
"learning_rate": 2.134609097563995e-06,
"loss": 1.7142,
"step": 847
},
{
"epoch": 1.4156928213689484,
"grad_norm": 0.0,
"learning_rate": 2.1234066989068972e-06,
"loss": 1.5549,
"step": 848
},
{
"epoch": 1.4173622704507514,
"grad_norm": 0.0,
"learning_rate": 2.1122258426536373e-06,
"loss": 1.6325,
"step": 849
},
{
"epoch": 1.4190317195325544,
"grad_norm": 0.0,
"learning_rate": 2.1010666125360767e-06,
"loss": 1.6929,
"step": 850
},
{
"epoch": 1.4207011686143574,
"grad_norm": 0.0,
"learning_rate": 2.08992909212413e-06,
"loss": 1.7992,
"step": 851
},
{
"epoch": 1.4223706176961604,
"grad_norm": 0.0,
"learning_rate": 2.0788133648251207e-06,
"loss": 1.6507,
"step": 852
},
{
"epoch": 1.4240400667779634,
"grad_norm": 0.0,
"learning_rate": 2.067719513883176e-06,
"loss": 1.7738,
"step": 853
},
{
"epoch": 1.4257095158597664,
"grad_norm": 0.0,
"learning_rate": 2.0566476223785857e-06,
"loss": 1.778,
"step": 854
},
{
"epoch": 1.4273789649415694,
"grad_norm": 0.0,
"learning_rate": 2.045597773227199e-06,
"loss": 1.9336,
"step": 855
},
{
"epoch": 1.4290484140233723,
"grad_norm": 0.0,
"learning_rate": 2.0345700491797786e-06,
"loss": 1.642,
"step": 856
},
{
"epoch": 1.4307178631051753,
"grad_norm": 0.0,
"learning_rate": 2.0235645328214077e-06,
"loss": 1.5683,
"step": 857
},
{
"epoch": 1.4323873121869783,
"grad_norm": 0.0,
"learning_rate": 2.0125813065708568e-06,
"loss": 1.6782,
"step": 858
},
{
"epoch": 1.4340567612687813,
"grad_norm": 0.0,
"learning_rate": 2.001620452679962e-06,
"loss": 1.6515,
"step": 859
},
{
"epoch": 1.4357262103505843,
"grad_norm": 0.0,
"learning_rate": 1.9906820532330262e-06,
"loss": 1.7665,
"step": 860
},
{
"epoch": 1.4373956594323873,
"grad_norm": 0.0,
"learning_rate": 1.979766190146187e-06,
"loss": 1.4809,
"step": 861
},
{
"epoch": 1.4390651085141903,
"grad_norm": 0.0,
"learning_rate": 1.9688729451668116e-06,
"loss": 1.4875,
"step": 862
},
{
"epoch": 1.4407345575959933,
"grad_norm": 0.0,
"learning_rate": 1.9580023998728823e-06,
"loss": 1.7605,
"step": 863
},
{
"epoch": 1.4424040066777963,
"grad_norm": 0.0,
"learning_rate": 1.947154635672393e-06,
"loss": 1.5669,
"step": 864
},
{
"epoch": 1.4440734557595993,
"grad_norm": 0.0,
"learning_rate": 1.9363297338027236e-06,
"loss": 1.6956,
"step": 865
},
{
"epoch": 1.4457429048414023,
"grad_norm": 0.0,
"learning_rate": 1.925527775330049e-06,
"loss": 1.5113,
"step": 866
},
{
"epoch": 1.4474123539232053,
"grad_norm": 0.0,
"learning_rate": 1.9147488411487226e-06,
"loss": 1.5448,
"step": 867
},
{
"epoch": 1.4490818030050083,
"grad_norm": 0.0,
"learning_rate": 1.9039930119806698e-06,
"loss": 1.7054,
"step": 868
},
{
"epoch": 1.4507512520868113,
"grad_norm": 0.0,
"learning_rate": 1.8932603683747858e-06,
"loss": 1.7207,
"step": 869
},
{
"epoch": 1.4524207011686143,
"grad_norm": 0.0,
"learning_rate": 1.8825509907063328e-06,
"loss": 1.9051,
"step": 870
},
{
"epoch": 1.4540901502504173,
"grad_norm": 0.0,
"learning_rate": 1.8718649591763415e-06,
"loss": 1.6744,
"step": 871
},
{
"epoch": 1.4557595993322203,
"grad_norm": 0.0,
"learning_rate": 1.8612023538109998e-06,
"loss": 1.7186,
"step": 872
},
{
"epoch": 1.4574290484140233,
"grad_norm": 0.0,
"learning_rate": 1.8505632544610657e-06,
"loss": 1.7795,
"step": 873
},
{
"epoch": 1.4590984974958263,
"grad_norm": 0.0,
"learning_rate": 1.8399477408012645e-06,
"loss": 1.7003,
"step": 874
},
{
"epoch": 1.4607679465776293,
"grad_norm": 0.0,
"learning_rate": 1.8293558923296873e-06,
"loss": 1.6446,
"step": 875
},
{
"epoch": 1.4624373956594323,
"grad_norm": 0.0,
"learning_rate": 1.8187877883672024e-06,
"loss": 1.931,
"step": 876
},
{
"epoch": 1.4641068447412353,
"grad_norm": 0.0,
"learning_rate": 1.8082435080568556e-06,
"loss": 1.8991,
"step": 877
},
{
"epoch": 1.4657762938230383,
"grad_norm": 0.0,
"learning_rate": 1.797723130363288e-06,
"loss": 1.6597,
"step": 878
},
{
"epoch": 1.4674457429048413,
"grad_norm": 0.0,
"learning_rate": 1.7872267340721289e-06,
"loss": 1.6849,
"step": 879
},
{
"epoch": 1.4691151919866443,
"grad_norm": 0.0,
"learning_rate": 1.7767543977894198e-06,
"loss": 1.6496,
"step": 880
},
{
"epoch": 1.4707846410684473,
"grad_norm": 0.0,
"learning_rate": 1.7663061999410209e-06,
"loss": 1.6825,
"step": 881
},
{
"epoch": 1.4724540901502503,
"grad_norm": 0.0,
"learning_rate": 1.755882218772018e-06,
"loss": 1.3431,
"step": 882
},
{
"epoch": 1.4741235392320533,
"grad_norm": 0.0,
"learning_rate": 1.745482532346145e-06,
"loss": 1.8387,
"step": 883
},
{
"epoch": 1.4757929883138563,
"grad_norm": 0.0,
"learning_rate": 1.7351072185451934e-06,
"loss": 1.857,
"step": 884
},
{
"epoch": 1.4774624373956593,
"grad_norm": 0.0,
"learning_rate": 1.7247563550684366e-06,
"loss": 1.7855,
"step": 885
},
{
"epoch": 1.4791318864774623,
"grad_norm": 0.0,
"learning_rate": 1.7144300194320357e-06,
"loss": 1.7008,
"step": 886
},
{
"epoch": 1.4808013355592655,
"grad_norm": 0.0,
"learning_rate": 1.7041282889684746e-06,
"loss": 1.8663,
"step": 887
},
{
"epoch": 1.4824707846410685,
"grad_norm": 0.0,
"learning_rate": 1.6938512408259655e-06,
"loss": 1.9139,
"step": 888
},
{
"epoch": 1.4841402337228715,
"grad_norm": 0.0,
"learning_rate": 1.6835989519678802e-06,
"loss": 1.7092,
"step": 889
},
{
"epoch": 1.4858096828046745,
"grad_norm": 0.0,
"learning_rate": 1.6733714991721738e-06,
"loss": 1.6333,
"step": 890
},
{
"epoch": 1.4874791318864775,
"grad_norm": 0.0,
"learning_rate": 1.6631689590308049e-06,
"loss": 1.6067,
"step": 891
},
{
"epoch": 1.4891485809682805,
"grad_norm": 0.0,
"learning_rate": 1.652991407949167e-06,
"loss": 1.5876,
"step": 892
},
{
"epoch": 1.4908180300500835,
"grad_norm": 0.0,
"learning_rate": 1.6428389221455115e-06,
"loss": 1.8228,
"step": 893
},
{
"epoch": 1.4924874791318865,
"grad_norm": 0.0,
"learning_rate": 1.6327115776503833e-06,
"loss": 1.8408,
"step": 894
},
{
"epoch": 1.4941569282136895,
"grad_norm": 0.0,
"learning_rate": 1.622609450306043e-06,
"loss": 1.7807,
"step": 895
},
{
"epoch": 1.4958263772954925,
"grad_norm": 0.0,
"learning_rate": 1.6125326157659048e-06,
"loss": 1.7149,
"step": 896
},
{
"epoch": 1.4974958263772955,
"grad_norm": 0.0,
"learning_rate": 1.6024811494939723e-06,
"loss": 1.9643,
"step": 897
},
{
"epoch": 1.4991652754590985,
"grad_norm": 0.0,
"learning_rate": 1.5924551267642641e-06,
"loss": 1.6214,
"step": 898
},
{
"epoch": 1.5008347245409015,
"grad_norm": 0.0,
"learning_rate": 1.5824546226602611e-06,
"loss": 1.9501,
"step": 899
},
{
"epoch": 1.5025041736227045,
"grad_norm": 0.0,
"learning_rate": 1.572479712074333e-06,
"loss": 1.726,
"step": 900
},
{
"epoch": 1.5041736227045075,
"grad_norm": 0.0,
"learning_rate": 1.5625304697071897e-06,
"loss": 1.7017,
"step": 901
},
{
"epoch": 1.5058430717863105,
"grad_norm": 0.0,
"learning_rate": 1.5526069700673108e-06,
"loss": 1.5422,
"step": 902
},
{
"epoch": 1.5075125208681135,
"grad_norm": 0.0,
"learning_rate": 1.542709287470393e-06,
"loss": 1.6978,
"step": 903
},
{
"epoch": 1.5091819699499165,
"grad_norm": 0.0,
"learning_rate": 1.532837496038792e-06,
"loss": 1.7853,
"step": 904
},
{
"epoch": 1.5108514190317195,
"grad_norm": 0.0,
"learning_rate": 1.5229916697009706e-06,
"loss": 1.5383,
"step": 905
},
{
"epoch": 1.5125208681135225,
"grad_norm": 0.0,
"learning_rate": 1.5131718821909435e-06,
"loss": 1.6953,
"step": 906
},
{
"epoch": 1.5141903171953257,
"grad_norm": 0.0,
"learning_rate": 1.5033782070477192e-06,
"loss": 1.6438,
"step": 907
},
{
"epoch": 1.5158597662771287,
"grad_norm": 0.0,
"learning_rate": 1.4936107176147606e-06,
"loss": 1.8139,
"step": 908
},
{
"epoch": 1.5175292153589317,
"grad_norm": 0.0,
"learning_rate": 1.483869487039425e-06,
"loss": 1.5901,
"step": 909
},
{
"epoch": 1.5191986644407347,
"grad_norm": 0.0,
"learning_rate": 1.4741545882724213e-06,
"loss": 1.5955,
"step": 910
},
{
"epoch": 1.5208681135225377,
"grad_norm": 0.0,
"learning_rate": 1.4644660940672628e-06,
"loss": 1.5351,
"step": 911
},
{
"epoch": 1.5225375626043407,
"grad_norm": 0.0,
"learning_rate": 1.4548040769797255e-06,
"loss": 1.7605,
"step": 912
},
{
"epoch": 1.5242070116861437,
"grad_norm": 0.0,
"learning_rate": 1.4451686093673028e-06,
"loss": 1.6614,
"step": 913
},
{
"epoch": 1.5258764607679467,
"grad_norm": 0.0,
"learning_rate": 1.4355597633886576e-06,
"loss": 1.8486,
"step": 914
},
{
"epoch": 1.5275459098497497,
"grad_norm": 0.0,
"learning_rate": 1.425977611003091e-06,
"loss": 1.7763,
"step": 915
},
{
"epoch": 1.5292153589315527,
"grad_norm": 0.0,
"learning_rate": 1.416422223969996e-06,
"loss": 1.7033,
"step": 916
},
{
"epoch": 1.5308848080133557,
"grad_norm": 0.0,
"learning_rate": 1.4068936738483302e-06,
"loss": 1.5128,
"step": 917
},
{
"epoch": 1.5325542570951587,
"grad_norm": 0.0,
"learning_rate": 1.3973920319960654e-06,
"loss": 1.8173,
"step": 918
},
{
"epoch": 1.5342237061769617,
"grad_norm": 0.0,
"learning_rate": 1.3879173695696668e-06,
"loss": 1.7077,
"step": 919
},
{
"epoch": 1.5358931552587647,
"grad_norm": 0.0,
"learning_rate": 1.378469757523554e-06,
"loss": 1.6683,
"step": 920
},
{
"epoch": 1.5375626043405677,
"grad_norm": 0.0,
"learning_rate": 1.3690492666095672e-06,
"loss": 1.6297,
"step": 921
},
{
"epoch": 1.5392320534223707,
"grad_norm": 0.0,
"learning_rate": 1.3596559673764421e-06,
"loss": 1.5391,
"step": 922
},
{
"epoch": 1.5409015025041737,
"grad_norm": 0.0,
"learning_rate": 1.350289930169278e-06,
"loss": 1.7279,
"step": 923
},
{
"epoch": 1.5425709515859767,
"grad_norm": 0.0,
"learning_rate": 1.3409512251290164e-06,
"loss": 1.9615,
"step": 924
},
{
"epoch": 1.5442404006677797,
"grad_norm": 0.0,
"learning_rate": 1.3316399221919075e-06,
"loss": 1.5777,
"step": 925
},
{
"epoch": 1.5459098497495827,
"grad_norm": 0.0,
"learning_rate": 1.322356091088996e-06,
"loss": 1.6915,
"step": 926
},
{
"epoch": 1.5475792988313857,
"grad_norm": 0.0,
"learning_rate": 1.3130998013455875e-06,
"loss": 1.6709,
"step": 927
},
{
"epoch": 1.5492487479131887,
"grad_norm": 0.0,
"learning_rate": 1.303871122280742e-06,
"loss": 1.5226,
"step": 928
},
{
"epoch": 1.5509181969949917,
"grad_norm": 0.0,
"learning_rate": 1.2946701230067405e-06,
"loss": 1.736,
"step": 929
},
{
"epoch": 1.5525876460767947,
"grad_norm": 0.0,
"learning_rate": 1.2854968724285755e-06,
"loss": 1.3861,
"step": 930
},
{
"epoch": 1.5542570951585977,
"grad_norm": 0.0,
"learning_rate": 1.276351439243436e-06,
"loss": 1.824,
"step": 931
},
{
"epoch": 1.5559265442404007,
"grad_norm": 0.0,
"learning_rate": 1.2672338919401866e-06,
"loss": 1.8992,
"step": 932
},
{
"epoch": 1.5575959933222037,
"grad_norm": 0.0,
"learning_rate": 1.2581442987988635e-06,
"loss": 1.6777,
"step": 933
},
{
"epoch": 1.5592654424040067,
"grad_norm": 0.0,
"learning_rate": 1.2490827278901513e-06,
"loss": 1.5996,
"step": 934
},
{
"epoch": 1.5609348914858097,
"grad_norm": 0.0,
"learning_rate": 1.2400492470748877e-06,
"loss": 1.4484,
"step": 935
},
{
"epoch": 1.5626043405676127,
"grad_norm": 0.0,
"learning_rate": 1.2310439240035415e-06,
"loss": 1.6822,
"step": 936
},
{
"epoch": 1.5642737896494157,
"grad_norm": 0.0,
"learning_rate": 1.2220668261157132e-06,
"loss": 1.6954,
"step": 937
},
{
"epoch": 1.5659432387312187,
"grad_norm": 0.0,
"learning_rate": 1.2131180206396331e-06,
"loss": 1.88,
"step": 938
},
{
"epoch": 1.5676126878130217,
"grad_norm": 0.0,
"learning_rate": 1.2041975745916474e-06,
"loss": 1.7492,
"step": 939
},
{
"epoch": 1.5692821368948247,
"grad_norm": 0.0,
"learning_rate": 1.195305554775728e-06,
"loss": 1.5302,
"step": 940
},
{
"epoch": 1.5709515859766277,
"grad_norm": 0.0,
"learning_rate": 1.186442027782964e-06,
"loss": 1.7966,
"step": 941
},
{
"epoch": 1.5726210350584306,
"grad_norm": 0.0,
"learning_rate": 1.177607059991065e-06,
"loss": 1.5941,
"step": 942
},
{
"epoch": 1.5742904841402336,
"grad_norm": 0.0,
"learning_rate": 1.1688007175638655e-06,
"loss": 1.5734,
"step": 943
},
{
"epoch": 1.5759599332220366,
"grad_norm": 0.0,
"learning_rate": 1.1600230664508288e-06,
"loss": 1.8075,
"step": 944
},
{
"epoch": 1.5776293823038396,
"grad_norm": 0.0,
"learning_rate": 1.1512741723865562e-06,
"loss": 2.1487,
"step": 945
},
{
"epoch": 1.5792988313856426,
"grad_norm": 0.0,
"learning_rate": 1.1425541008902852e-06,
"loss": 1.7166,
"step": 946
},
{
"epoch": 1.5809682804674456,
"grad_norm": 0.0,
"learning_rate": 1.133862917265411e-06,
"loss": 1.7968,
"step": 947
},
{
"epoch": 1.5826377295492486,
"grad_norm": 0.0,
"learning_rate": 1.1252006865989868e-06,
"loss": 1.7527,
"step": 948
},
{
"epoch": 1.5843071786310516,
"grad_norm": 0.0,
"learning_rate": 1.1165674737612447e-06,
"loss": 1.7028,
"step": 949
},
{
"epoch": 1.5859766277128546,
"grad_norm": 0.0,
"learning_rate": 1.107963343405103e-06,
"loss": 1.6813,
"step": 950
},
{
"epoch": 1.5876460767946576,
"grad_norm": 0.0,
"learning_rate": 1.0993883599656885e-06,
"loss": 1.6206,
"step": 951
},
{
"epoch": 1.5893155258764606,
"grad_norm": 0.0,
"learning_rate": 1.0908425876598512e-06,
"loss": 1.7438,
"step": 952
},
{
"epoch": 1.5909849749582636,
"grad_norm": 0.0,
"learning_rate": 1.0823260904856791e-06,
"loss": 1.3356,
"step": 953
},
{
"epoch": 1.5926544240400666,
"grad_norm": 0.0,
"learning_rate": 1.0738389322220276e-06,
"loss": 1.7507,
"step": 954
},
{
"epoch": 1.5943238731218696,
"grad_norm": 0.0,
"learning_rate": 1.0653811764280336e-06,
"loss": 1.9094,
"step": 955
},
{
"epoch": 1.5959933222036726,
"grad_norm": 0.0,
"learning_rate": 1.0569528864426444e-06,
"loss": 1.7581,
"step": 956
},
{
"epoch": 1.5976627712854758,
"grad_norm": 0.0,
"learning_rate": 1.0485541253841391e-06,
"loss": 1.734,
"step": 957
},
{
"epoch": 1.5993322203672788,
"grad_norm": 0.0,
"learning_rate": 1.0401849561496647e-06,
"loss": 1.7455,
"step": 958
},
{
"epoch": 1.6010016694490818,
"grad_norm": 0.0,
"learning_rate": 1.031845441414756e-06,
"loss": 1.6586,
"step": 959
},
{
"epoch": 1.6026711185308848,
"grad_norm": 0.0,
"learning_rate": 1.0235356436328675e-06,
"loss": 1.5319,
"step": 960
},
{
"epoch": 1.6043405676126878,
"grad_norm": 0.0,
"learning_rate": 1.015255625034911e-06,
"loss": 1.6651,
"step": 961
},
{
"epoch": 1.6060100166944908,
"grad_norm": 0.0,
"learning_rate": 1.007005447628785e-06,
"loss": 1.4794,
"step": 962
},
{
"epoch": 1.6076794657762938,
"grad_norm": 0.0,
"learning_rate": 9.987851731989096e-07,
"loss": 1.9092,
"step": 963
},
{
"epoch": 1.6093489148580968,
"grad_norm": 0.0,
"learning_rate": 9.905948633057666e-07,
"loss": 1.6746,
"step": 964
},
{
"epoch": 1.6110183639398998,
"grad_norm": 0.0,
"learning_rate": 9.82434579285441e-07,
"loss": 1.7691,
"step": 965
},
{
"epoch": 1.6126878130217028,
"grad_norm": 0.0,
"learning_rate": 9.743043822491528e-07,
"loss": 1.8323,
"step": 966
},
{
"epoch": 1.6143572621035058,
"grad_norm": 0.0,
"learning_rate": 9.662043330828086e-07,
"loss": 1.7632,
"step": 967
},
{
"epoch": 1.6160267111853088,
"grad_norm": 0.0,
"learning_rate": 9.58134492446543e-07,
"loss": 1.6031,
"step": 968
},
{
"epoch": 1.6176961602671118,
"grad_norm": 0.0,
"learning_rate": 9.500949207742566e-07,
"loss": 1.9118,
"step": 969
},
{
"epoch": 1.6193656093489148,
"grad_norm": 0.0,
"learning_rate": 9.420856782731774e-07,
"loss": 1.7467,
"step": 970
},
{
"epoch": 1.6210350584307178,
"grad_norm": 0.0,
"learning_rate": 9.341068249233964e-07,
"loss": 1.7573,
"step": 971
},
{
"epoch": 1.6227045075125208,
"grad_norm": 0.0,
"learning_rate": 9.26158420477431e-07,
"loss": 1.757,
"step": 972
},
{
"epoch": 1.6243739565943238,
"grad_norm": 0.0,
"learning_rate": 9.182405244597647e-07,
"loss": 1.7199,
"step": 973
},
{
"epoch": 1.626043405676127,
"grad_norm": 0.0,
"learning_rate": 9.10353196166412e-07,
"loss": 1.5325,
"step": 974
},
{
"epoch": 1.62771285475793,
"grad_norm": 0.0,
"learning_rate": 9.024964946644682e-07,
"loss": 1.6199,
"step": 975
},
{
"epoch": 1.629382303839733,
"grad_norm": 0.0,
"learning_rate": 8.946704787916676e-07,
"loss": 1.6902,
"step": 976
},
{
"epoch": 1.631051752921536,
"grad_norm": 0.0,
"learning_rate": 8.868752071559478e-07,
"loss": 1.3334,
"step": 977
},
{
"epoch": 1.632721202003339,
"grad_norm": 0.0,
"learning_rate": 8.791107381350028e-07,
"loss": 1.9713,
"step": 978
},
{
"epoch": 1.634390651085142,
"grad_norm": 0.0,
"learning_rate": 8.713771298758539e-07,
"loss": 1.6646,
"step": 979
},
{
"epoch": 1.636060100166945,
"grad_norm": 0.0,
"learning_rate": 8.636744402944075e-07,
"loss": 1.8279,
"step": 980
},
{
"epoch": 1.637729549248748,
"grad_norm": 0.0,
"learning_rate": 8.560027270750276e-07,
"loss": 1.7358,
"step": 981
},
{
"epoch": 1.639398998330551,
"grad_norm": 0.0,
"learning_rate": 8.483620476700977e-07,
"loss": 2.0311,
"step": 982
},
{
"epoch": 1.641068447412354,
"grad_norm": 0.0,
"learning_rate": 8.40752459299593e-07,
"loss": 1.5454,
"step": 983
},
{
"epoch": 1.642737896494157,
"grad_norm": 0.0,
"learning_rate": 8.33174018950656e-07,
"loss": 1.782,
"step": 984
},
{
"epoch": 1.64440734557596,
"grad_norm": 0.0,
"learning_rate": 8.256267833771608e-07,
"loss": 1.6006,
"step": 985
},
{
"epoch": 1.646076794657763,
"grad_norm": 0.0,
"learning_rate": 8.181108090993001e-07,
"loss": 1.6482,
"step": 986
},
{
"epoch": 1.647746243739566,
"grad_norm": 0.0,
"learning_rate": 8.106261524031473e-07,
"loss": 1.6951,
"step": 987
},
{
"epoch": 1.649415692821369,
"grad_norm": 0.0,
"learning_rate": 8.031728693402502e-07,
"loss": 1.7212,
"step": 988
},
{
"epoch": 1.651085141903172,
"grad_norm": 0.0,
"learning_rate": 7.957510157271991e-07,
"loss": 1.7513,
"step": 989
},
{
"epoch": 1.652754590984975,
"grad_norm": 0.0,
"learning_rate": 7.883606471452138e-07,
"loss": 1.649,
"step": 990
},
{
"epoch": 1.654424040066778,
"grad_norm": 0.0,
"learning_rate": 7.810018189397301e-07,
"loss": 1.5066,
"step": 991
},
{
"epoch": 1.656093489148581,
"grad_norm": 0.0,
"learning_rate": 7.736745862199785e-07,
"loss": 1.6172,
"step": 992
},
{
"epoch": 1.657762938230384,
"grad_norm": 0.0,
"learning_rate": 7.663790038585794e-07,
"loss": 1.7814,
"step": 993
},
{
"epoch": 1.659432387312187,
"grad_norm": 0.0,
"learning_rate": 7.59115126491124e-07,
"loss": 1.5252,
"step": 994
},
{
"epoch": 1.66110183639399,
"grad_norm": 0.0,
"learning_rate": 7.518830085157735e-07,
"loss": 1.9539,
"step": 995
},
{
"epoch": 1.662771285475793,
"grad_norm": 0.0,
"learning_rate": 7.446827040928439e-07,
"loss": 1.7377,
"step": 996
},
{
"epoch": 1.664440734557596,
"grad_norm": 0.0,
"learning_rate": 7.375142671444046e-07,
"loss": 1.8891,
"step": 997
},
{
"epoch": 1.666110183639399,
"grad_norm": 0.0,
"learning_rate": 7.303777513538762e-07,
"loss": 1.9226,
"step": 998
},
{
"epoch": 1.667779632721202,
"grad_norm": 0.0,
"learning_rate": 7.232732101656231e-07,
"loss": 1.5819,
"step": 999
},
{
"epoch": 1.669449081803005,
"grad_norm": 0.0,
"learning_rate": 7.162006967845602e-07,
"loss": 1.7022,
"step": 1000
},
{
"epoch": 1.671118530884808,
"grad_norm": 0.0,
"learning_rate": 7.091602641757467e-07,
"loss": 1.5175,
"step": 1001
},
{
"epoch": 1.672787979966611,
"grad_norm": 0.0,
"learning_rate": 7.021519650639952e-07,
"loss": 1.8913,
"step": 1002
},
{
"epoch": 1.674457429048414,
"grad_norm": 0.0,
"learning_rate": 6.951758519334745e-07,
"loss": 1.801,
"step": 1003
},
{
"epoch": 1.676126878130217,
"grad_norm": 0.0,
"learning_rate": 6.882319770273193e-07,
"loss": 1.6252,
"step": 1004
},
{
"epoch": 1.67779632721202,
"grad_norm": 0.0,
"learning_rate": 6.813203923472328e-07,
"loss": 1.6969,
"step": 1005
},
{
"epoch": 1.679465776293823,
"grad_norm": 0.0,
"learning_rate": 6.744411496531045e-07,
"loss": 1.8072,
"step": 1006
},
{
"epoch": 1.681135225375626,
"grad_norm": 0.0,
"learning_rate": 6.67594300462619e-07,
"loss": 1.6769,
"step": 1007
},
{
"epoch": 1.682804674457429,
"grad_norm": 0.0,
"learning_rate": 6.607798960508693e-07,
"loss": 1.8645,
"step": 1008
},
{
"epoch": 1.684474123539232,
"grad_norm": 0.0,
"learning_rate": 6.539979874499747e-07,
"loss": 1.8879,
"step": 1009
},
{
"epoch": 1.686143572621035,
"grad_norm": 0.0,
"learning_rate": 6.472486254486954e-07,
"loss": 1.5467,
"step": 1010
},
{
"epoch": 1.687813021702838,
"grad_norm": 0.0,
"learning_rate": 6.405318605920602e-07,
"loss": 1.5909,
"step": 1011
},
{
"epoch": 1.689482470784641,
"grad_norm": 0.0,
"learning_rate": 6.338477431809764e-07,
"loss": 1.738,
"step": 1012
},
{
"epoch": 1.691151919866444,
"grad_norm": 0.0,
"learning_rate": 6.271963232718631e-07,
"loss": 1.914,
"step": 1013
},
{
"epoch": 1.692821368948247,
"grad_norm": 0.0,
"learning_rate": 6.205776506762729e-07,
"loss": 1.6797,
"step": 1014
},
{
"epoch": 1.69449081803005,
"grad_norm": 0.0,
"learning_rate": 6.139917749605151e-07,
"loss": 1.7716,
"step": 1015
},
{
"epoch": 1.696160267111853,
"grad_norm": 0.0,
"learning_rate": 6.074387454452891e-07,
"loss": 1.8465,
"step": 1016
},
{
"epoch": 1.697829716193656,
"grad_norm": 0.0,
"learning_rate": 6.009186112053134e-07,
"loss": 2.0192,
"step": 1017
},
{
"epoch": 1.699499165275459,
"grad_norm": 0.0,
"learning_rate": 5.944314210689611e-07,
"loss": 2.0092,
"step": 1018
},
{
"epoch": 1.701168614357262,
"grad_norm": 0.0,
"learning_rate": 5.879772236178871e-07,
"loss": 1.9489,
"step": 1019
},
{
"epoch": 1.702838063439065,
"grad_norm": 0.0,
"learning_rate": 5.815560671866721e-07,
"loss": 1.6819,
"step": 1020
},
{
"epoch": 1.704507512520868,
"grad_norm": 0.0,
"learning_rate": 5.751679998624571e-07,
"loss": 1.3891,
"step": 1021
},
{
"epoch": 1.706176961602671,
"grad_norm": 0.0,
"learning_rate": 5.688130694845817e-07,
"loss": 1.8861,
"step": 1022
},
{
"epoch": 1.707846410684474,
"grad_norm": 0.0,
"learning_rate": 5.624913236442287e-07,
"loss": 1.441,
"step": 1023
},
{
"epoch": 1.7095158597662772,
"grad_norm": 0.0,
"learning_rate": 5.562028096840638e-07,
"loss": 1.4711,
"step": 1024
},
{
"epoch": 1.7111853088480802,
"grad_norm": 0.0,
"learning_rate": 5.499475746978899e-07,
"loss": 1.7767,
"step": 1025
},
{
"epoch": 1.7128547579298832,
"grad_norm": 0.0,
"learning_rate": 5.437256655302814e-07,
"loss": 1.5217,
"step": 1026
},
{
"epoch": 1.7145242070116862,
"grad_norm": 0.0,
"learning_rate": 5.37537128776246e-07,
"loss": 1.8079,
"step": 1027
},
{
"epoch": 1.7161936560934892,
"grad_norm": 0.0,
"learning_rate": 5.313820107808665e-07,
"loss": 1.9587,
"step": 1028
},
{
"epoch": 1.7178631051752922,
"grad_norm": 0.0,
"learning_rate": 5.25260357638957e-07,
"loss": 1.8453,
"step": 1029
},
{
"epoch": 1.7195325542570952,
"grad_norm": 0.0,
"learning_rate": 5.191722151947227e-07,
"loss": 1.6909,
"step": 1030
},
{
"epoch": 1.7212020033388982,
"grad_norm": 0.0,
"learning_rate": 5.131176290414053e-07,
"loss": 1.6264,
"step": 1031
},
{
"epoch": 1.7228714524207012,
"grad_norm": 0.0,
"learning_rate": 5.07096644520954e-07,
"loss": 1.5964,
"step": 1032
},
{
"epoch": 1.7245409015025042,
"grad_norm": 0.0,
"learning_rate": 5.011093067236756e-07,
"loss": 1.9016,
"step": 1033
},
{
"epoch": 1.7262103505843072,
"grad_norm": 0.0,
"learning_rate": 4.951556604879049e-07,
"loss": 1.7335,
"step": 1034
},
{
"epoch": 1.7278797996661102,
"grad_norm": 0.0,
"learning_rate": 4.892357503996625e-07,
"loss": 1.5909,
"step": 1035
},
{
"epoch": 1.7295492487479132,
"grad_norm": 0.0,
"learning_rate": 4.83349620792325e-07,
"loss": 1.633,
"step": 1036
},
{
"epoch": 1.7312186978297162,
"grad_norm": 0.0,
"learning_rate": 4.77497315746292e-07,
"loss": 1.7341,
"step": 1037
},
{
"epoch": 1.7328881469115192,
"grad_norm": 0.0,
"learning_rate": 4.716788790886545e-07,
"loss": 1.632,
"step": 1038
},
{
"epoch": 1.7345575959933222,
"grad_norm": 0.0,
"learning_rate": 4.658943543928707e-07,
"loss": 1.7613,
"step": 1039
},
{
"epoch": 1.7362270450751254,
"grad_norm": 0.0,
"learning_rate": 4.601437849784318e-07,
"loss": 1.8181,
"step": 1040
},
{
"epoch": 1.7378964941569284,
"grad_norm": 0.0,
"learning_rate": 4.544272139105488e-07,
"loss": 1.6785,
"step": 1041
},
{
"epoch": 1.7395659432387314,
"grad_norm": 0.0,
"learning_rate": 4.487446839998194e-07,
"loss": 1.6502,
"step": 1042
},
{
"epoch": 1.7412353923205344,
"grad_norm": 0.0,
"learning_rate": 4.4309623780191214e-07,
"loss": 1.919,
"step": 1043
},
{
"epoch": 1.7429048414023374,
"grad_norm": 0.0,
"learning_rate": 4.374819176172501e-07,
"loss": 1.5251,
"step": 1044
},
{
"epoch": 1.7445742904841404,
"grad_norm": 0.0,
"learning_rate": 4.319017654906887e-07,
"loss": 1.8641,
"step": 1045
},
{
"epoch": 1.7462437395659434,
"grad_norm": 0.0,
"learning_rate": 4.263558232112064e-07,
"loss": 1.9693,
"step": 1046
},
{
"epoch": 1.7479131886477464,
"grad_norm": 0.0,
"learning_rate": 4.2084413231158473e-07,
"loss": 1.5452,
"step": 1047
},
{
"epoch": 1.7495826377295494,
"grad_norm": 0.0,
"learning_rate": 4.153667340681067e-07,
"loss": 1.8121,
"step": 1048
},
{
"epoch": 1.7512520868113524,
"grad_norm": 0.0,
"learning_rate": 4.099236695002379e-07,
"loss": 1.8385,
"step": 1049
},
{
"epoch": 1.7529215358931554,
"grad_norm": 0.0,
"learning_rate": 4.045149793703257e-07,
"loss": 1.9422,
"step": 1050
},
{
"epoch": 1.7545909849749584,
"grad_norm": 0.0,
"learning_rate": 3.9914070418329123e-07,
"loss": 1.849,
"step": 1051
},
{
"epoch": 1.7562604340567614,
"grad_norm": 0.0,
"learning_rate": 3.938008841863289e-07,
"loss": 1.712,
"step": 1052
},
{
"epoch": 1.7579298831385644,
"grad_norm": 0.0,
"learning_rate": 3.8849555936860296e-07,
"loss": 1.5905,
"step": 1053
},
{
"epoch": 1.7595993322203674,
"grad_norm": 0.0,
"learning_rate": 3.832247694609442e-07,
"loss": 1.8678,
"step": 1054
},
{
"epoch": 1.7612687813021703,
"grad_norm": 0.0,
"learning_rate": 3.779885539355621e-07,
"loss": 1.665,
"step": 1055
},
{
"epoch": 1.7629382303839733,
"grad_norm": 0.0,
"learning_rate": 3.7278695200573754e-07,
"loss": 1.5783,
"step": 1056
},
{
"epoch": 1.7646076794657763,
"grad_norm": 0.0,
"learning_rate": 3.6762000262554e-07,
"loss": 2.0622,
"step": 1057
},
{
"epoch": 1.7662771285475793,
"grad_norm": 0.0,
"learning_rate": 3.6248774448952695e-07,
"loss": 1.6632,
"step": 1058
},
{
"epoch": 1.7679465776293823,
"grad_norm": 0.0,
"learning_rate": 3.5739021603246104e-07,
"loss": 1.6519,
"step": 1059
},
{
"epoch": 1.7696160267111853,
"grad_norm": 0.0,
"learning_rate": 3.52327455429019e-07,
"loss": 1.9502,
"step": 1060
},
{
"epoch": 1.7712854757929883,
"grad_norm": 0.0,
"learning_rate": 3.472995005935037e-07,
"loss": 1.6183,
"step": 1061
},
{
"epoch": 1.7729549248747913,
"grad_norm": 0.0,
"learning_rate": 3.423063891795647e-07,
"loss": 1.6387,
"step": 1062
},
{
"epoch": 1.7746243739565943,
"grad_norm": 0.0,
"learning_rate": 3.3734815857991155e-07,
"loss": 1.5453,
"step": 1063
},
{
"epoch": 1.7762938230383973,
"grad_norm": 0.0,
"learning_rate": 3.324248459260393e-07,
"loss": 1.8939,
"step": 1064
},
{
"epoch": 1.7779632721202003,
"grad_norm": 0.0,
"learning_rate": 3.2753648808794505e-07,
"loss": 1.8887,
"step": 1065
},
{
"epoch": 1.7796327212020033,
"grad_norm": 0.0,
"learning_rate": 3.2268312167385687e-07,
"loss": 1.7644,
"step": 1066
},
{
"epoch": 1.7813021702838063,
"grad_norm": 0.0,
"learning_rate": 3.1786478302995305e-07,
"loss": 1.5679,
"step": 1067
},
{
"epoch": 1.7829716193656093,
"grad_norm": 0.0,
"learning_rate": 3.1308150824009785e-07,
"loss": 1.672,
"step": 1068
},
{
"epoch": 1.7846410684474123,
"grad_norm": 0.0,
"learning_rate": 3.0833333312556446e-07,
"loss": 1.6457,
"step": 1069
},
{
"epoch": 1.7863105175292153,
"grad_norm": 0.0,
"learning_rate": 3.0362029324477015e-07,
"loss": 1.4934,
"step": 1070
},
{
"epoch": 1.7879799666110183,
"grad_norm": 0.0,
"learning_rate": 2.9894242389301053e-07,
"loss": 1.946,
"step": 1071
},
{
"epoch": 1.7896494156928213,
"grad_norm": 0.0,
"learning_rate": 2.942997601021924e-07,
"loss": 1.7434,
"step": 1072
},
{
"epoch": 1.7913188647746243,
"grad_norm": 0.0,
"learning_rate": 2.896923366405746e-07,
"loss": 1.7937,
"step": 1073
},
{
"epoch": 1.7929883138564273,
"grad_norm": 0.0,
"learning_rate": 2.851201880125043e-07,
"loss": 2.0654,
"step": 1074
},
{
"epoch": 1.7946577629382303,
"grad_norm": 0.0,
"learning_rate": 2.8058334845816214e-07,
"loss": 1.8588,
"step": 1075
},
{
"epoch": 1.7963272120200333,
"grad_norm": 0.0,
"learning_rate": 2.760818519533037e-07,
"loss": 1.9593,
"step": 1076
},
{
"epoch": 1.7979966611018363,
"grad_norm": 0.0,
"learning_rate": 2.716157322090041e-07,
"loss": 1.8096,
"step": 1077
},
{
"epoch": 1.7996661101836393,
"grad_norm": 0.0,
"learning_rate": 2.6718502267140844e-07,
"loss": 1.3897,
"step": 1078
},
{
"epoch": 1.8013355592654423,
"grad_norm": 0.0,
"learning_rate": 2.6278975652147875e-07,
"loss": 1.6728,
"step": 1079
},
{
"epoch": 1.8030050083472453,
"grad_norm": 0.0,
"learning_rate": 2.584299666747475e-07,
"loss": 1.6031,
"step": 1080
},
{
"epoch": 1.8046744574290483,
"grad_norm": 0.0,
"learning_rate": 2.541056857810681e-07,
"loss": 1.6506,
"step": 1081
},
{
"epoch": 1.8063439065108513,
"grad_norm": 0.0,
"learning_rate": 2.4981694622437546e-07,
"loss": 1.6066,
"step": 1082
},
{
"epoch": 1.8080133555926543,
"grad_norm": 0.0,
"learning_rate": 2.4556378012243807e-07,
"loss": 1.5126,
"step": 1083
},
{
"epoch": 1.8096828046744573,
"grad_norm": 0.0,
"learning_rate": 2.4134621932661916e-07,
"loss": 1.6942,
"step": 1084
},
{
"epoch": 1.8113522537562603,
"grad_norm": 0.0,
"learning_rate": 2.3716429542164244e-07,
"loss": 1.7853,
"step": 1085
},
{
"epoch": 1.8130217028380633,
"grad_norm": 0.0,
"learning_rate": 2.330180397253473e-07,
"loss": 1.7304,
"step": 1086
},
{
"epoch": 1.8146911519198663,
"grad_norm": 0.0,
"learning_rate": 2.28907483288463e-07,
"loss": 1.5907,
"step": 1087
},
{
"epoch": 1.8163606010016693,
"grad_norm": 0.0,
"learning_rate": 2.2483265689436929e-07,
"loss": 1.9272,
"step": 1088
},
{
"epoch": 1.8180300500834723,
"grad_norm": 0.0,
"learning_rate": 2.2079359105886989e-07,
"loss": 1.8972,
"step": 1089
},
{
"epoch": 1.8196994991652755,
"grad_norm": 0.0,
"learning_rate": 2.167903160299617e-07,
"loss": 1.4341,
"step": 1090
},
{
"epoch": 1.8213689482470785,
"grad_norm": 0.0,
"learning_rate": 2.1282286178761046e-07,
"loss": 1.7007,
"step": 1091
},
{
"epoch": 1.8230383973288815,
"grad_norm": 0.0,
"learning_rate": 2.0889125804352595e-07,
"loss": 1.3616,
"step": 1092
},
{
"epoch": 1.8247078464106845,
"grad_norm": 0.0,
"learning_rate": 2.049955342409349e-07,
"loss": 1.7606,
"step": 1093
},
{
"epoch": 1.8263772954924875,
"grad_norm": 0.0,
"learning_rate": 2.0113571955436895e-07,
"loss": 1.4619,
"step": 1094
},
{
"epoch": 1.8280467445742905,
"grad_norm": 0.0,
"learning_rate": 1.9731184288943772e-07,
"loss": 1.5691,
"step": 1095
},
{
"epoch": 1.8297161936560935,
"grad_norm": 0.0,
"learning_rate": 1.9352393288261717e-07,
"loss": 1.6799,
"step": 1096
},
{
"epoch": 1.8313856427378965,
"grad_norm": 0.0,
"learning_rate": 1.8977201790103428e-07,
"loss": 1.4127,
"step": 1097
},
{
"epoch": 1.8330550918196995,
"grad_norm": 0.0,
"learning_rate": 1.8605612604225388e-07,
"loss": 1.811,
"step": 1098
},
{
"epoch": 1.8347245409015025,
"grad_norm": 0.0,
"learning_rate": 1.8237628513407046e-07,
"loss": 1.8761,
"step": 1099
},
{
"epoch": 1.8363939899833055,
"grad_norm": 0.0,
"learning_rate": 1.787325227342951e-07,
"loss": 1.7312,
"step": 1100
},
{
"epoch": 1.8380634390651085,
"grad_norm": 0.0,
"learning_rate": 1.751248661305538e-07,
"loss": 1.6336,
"step": 1101
},
{
"epoch": 1.8397328881469115,
"grad_norm": 0.0,
"learning_rate": 1.715533423400817e-07,
"loss": 1.9454,
"step": 1102
},
{
"epoch": 1.8414023372287145,
"grad_norm": 0.0,
"learning_rate": 1.680179781095187e-07,
"loss": 1.6344,
"step": 1103
},
{
"epoch": 1.8430717863105175,
"grad_norm": 0.0,
"learning_rate": 1.6451879991471186e-07,
"loss": 1.3738,
"step": 1104
},
{
"epoch": 1.8447412353923205,
"grad_norm": 0.0,
"learning_rate": 1.610558339605156e-07,
"loss": 1.5461,
"step": 1105
},
{
"epoch": 1.8464106844741235,
"grad_norm": 0.0,
"learning_rate": 1.576291061805979e-07,
"loss": 1.939,
"step": 1106
},
{
"epoch": 1.8480801335559267,
"grad_norm": 0.0,
"learning_rate": 1.542386422372405e-07,
"loss": 1.5495,
"step": 1107
},
{
"epoch": 1.8497495826377297,
"grad_norm": 0.0,
"learning_rate": 1.5088446752115403e-07,
"loss": 1.7424,
"step": 1108
},
{
"epoch": 1.8514190317195327,
"grad_norm": 0.0,
"learning_rate": 1.4756660715128267e-07,
"loss": 1.4635,
"step": 1109
},
{
"epoch": 1.8530884808013357,
"grad_norm": 0.0,
"learning_rate": 1.4428508597461587e-07,
"loss": 1.8355,
"step": 1110
},
{
"epoch": 1.8547579298831387,
"grad_norm": 0.0,
"learning_rate": 1.4103992856600634e-07,
"loss": 1.4809,
"step": 1111
},
{
"epoch": 1.8564273789649417,
"grad_norm": 0.0,
"learning_rate": 1.378311592279835e-07,
"loss": 1.5775,
"step": 1112
},
{
"epoch": 1.8580968280467447,
"grad_norm": 0.0,
"learning_rate": 1.346588019905698e-07,
"loss": 1.7013,
"step": 1113
},
{
"epoch": 1.8597662771285477,
"grad_norm": 0.0,
"learning_rate": 1.3152288061110518e-07,
"loss": 1.8002,
"step": 1114
},
{
"epoch": 1.8614357262103507,
"grad_norm": 0.0,
"learning_rate": 1.284234185740635e-07,
"loss": 1.6495,
"step": 1115
},
{
"epoch": 1.8631051752921537,
"grad_norm": 0.0,
"learning_rate": 1.253604390908819e-07,
"loss": 1.7394,
"step": 1116
},
{
"epoch": 1.8647746243739567,
"grad_norm": 0.0,
"learning_rate": 1.2233396509978513e-07,
"loss": 1.937,
"step": 1117
},
{
"epoch": 1.8664440734557597,
"grad_norm": 0.0,
"learning_rate": 1.193440192656109e-07,
"loss": 1.7579,
"step": 1118
},
{
"epoch": 1.8681135225375627,
"grad_norm": 0.0,
"learning_rate": 1.163906239796453e-07,
"loss": 1.9714,
"step": 1119
},
{
"epoch": 1.8697829716193657,
"grad_norm": 0.0,
"learning_rate": 1.1347380135945108e-07,
"loss": 1.576,
"step": 1120
},
{
"epoch": 1.8714524207011687,
"grad_norm": 0.0,
"learning_rate": 1.1059357324870456e-07,
"loss": 1.7355,
"step": 1121
},
{
"epoch": 1.8731218697829717,
"grad_norm": 0.0,
"learning_rate": 1.0774996121702907e-07,
"loss": 1.9441,
"step": 1122
},
{
"epoch": 1.8747913188647747,
"grad_norm": 0.0,
"learning_rate": 1.049429865598367e-07,
"loss": 1.829,
"step": 1123
},
{
"epoch": 1.8764607679465777,
"grad_norm": 0.0,
"learning_rate": 1.0217267029816736e-07,
"loss": 1.827,
"step": 1124
},
{
"epoch": 1.8781302170283807,
"grad_norm": 0.0,
"learning_rate": 9.943903317853055e-08,
"loss": 1.6159,
"step": 1125
},
{
"epoch": 1.8797996661101837,
"grad_norm": 0.0,
"learning_rate": 9.674209567275161e-08,
"loss": 1.6928,
"step": 1126
},
{
"epoch": 1.8814691151919867,
"grad_norm": 0.0,
"learning_rate": 9.408187797781743e-08,
"loss": 1.7798,
"step": 1127
},
{
"epoch": 1.8831385642737897,
"grad_norm": 0.0,
"learning_rate": 9.145840001572537e-08,
"loss": 1.6211,
"step": 1128
},
{
"epoch": 1.8848080133555927,
"grad_norm": 0.0,
"learning_rate": 8.887168143333402e-08,
"loss": 1.6973,
"step": 1129
},
{
"epoch": 1.8864774624373957,
"grad_norm": 0.0,
"learning_rate": 8.632174160221496e-08,
"loss": 1.893,
"step": 1130
},
{
"epoch": 1.8881469115191987,
"grad_norm": 0.0,
"learning_rate": 8.380859961851174e-08,
"loss": 1.6941,
"step": 1131
},
{
"epoch": 1.8898163606010017,
"grad_norm": 0.0,
"learning_rate": 8.133227430279055e-08,
"loss": 1.6935,
"step": 1132
},
{
"epoch": 1.8914858096828047,
"grad_norm": 0.0,
"learning_rate": 7.889278419990598e-08,
"loss": 1.7137,
"step": 1133
},
{
"epoch": 1.8931552587646077,
"grad_norm": 0.0,
"learning_rate": 7.649014757885597e-08,
"loss": 1.5314,
"step": 1134
},
{
"epoch": 1.8948247078464107,
"grad_norm": 0.0,
"learning_rate": 7.41243824326504e-08,
"loss": 1.8015,
"step": 1135
},
{
"epoch": 1.8964941569282137,
"grad_norm": 0.0,
"learning_rate": 7.179550647817224e-08,
"loss": 1.9739,
"step": 1136
},
{
"epoch": 1.8981636060100167,
"grad_norm": 0.0,
"learning_rate": 6.950353715604597e-08,
"loss": 1.7966,
"step": 1137
},
{
"epoch": 1.8998330550918197,
"grad_norm": 0.0,
"learning_rate": 6.724849163050995e-08,
"loss": 1.5907,
"step": 1138
},
{
"epoch": 1.9015025041736227,
"grad_norm": 0.0,
"learning_rate": 6.50303867892832e-08,
"loss": 1.5096,
"step": 1139
},
{
"epoch": 1.9031719532554257,
"grad_norm": 0.0,
"learning_rate": 6.28492392434421e-08,
"loss": 1.5267,
"step": 1140
},
{
"epoch": 1.9048414023372287,
"grad_norm": 0.0,
"learning_rate": 6.070506532729393e-08,
"loss": 2.113,
"step": 1141
},
{
"epoch": 1.9065108514190316,
"grad_norm": 0.0,
"learning_rate": 5.8597881098257924e-08,
"loss": 1.7329,
"step": 1142
},
{
"epoch": 1.9081803005008346,
"grad_norm": 0.0,
"learning_rate": 5.652770233673943e-08,
"loss": 1.512,
"step": 1143
},
{
"epoch": 1.9098497495826376,
"grad_norm": 0.0,
"learning_rate": 5.4494544546018216e-08,
"loss": 1.7529,
"step": 1144
},
{
"epoch": 1.9115191986644406,
"grad_norm": 0.0,
"learning_rate": 5.249842295212748e-08,
"loss": 1.6867,
"step": 1145
},
{
"epoch": 1.9131886477462436,
"grad_norm": 0.0,
"learning_rate": 5.0539352503741756e-08,
"loss": 1.5203,
"step": 1146
},
{
"epoch": 1.9148580968280466,
"grad_norm": 0.0,
"learning_rate": 4.861734787206529e-08,
"loss": 1.7982,
"step": 1147
},
{
"epoch": 1.9165275459098496,
"grad_norm": 0.0,
"learning_rate": 4.67324234507216e-08,
"loss": 1.5475,
"step": 1148
},
{
"epoch": 1.9181969949916526,
"grad_norm": 0.0,
"learning_rate": 4.48845933556441e-08,
"loss": 1.6388,
"step": 1149
},
{
"epoch": 1.9198664440734556,
"grad_norm": 0.0,
"learning_rate": 4.307387142497399e-08,
"loss": 1.6519,
"step": 1150
},
{
"epoch": 1.9215358931552586,
"grad_norm": 0.0,
"learning_rate": 4.130027121895419e-08,
"loss": 1.7686,
"step": 1151
},
{
"epoch": 1.9232053422370616,
"grad_norm": 0.0,
"learning_rate": 3.956380601982668e-08,
"loss": 1.7067,
"step": 1152
},
{
"epoch": 1.9248747913188646,
"grad_norm": 0.0,
"learning_rate": 3.786448883173755e-08,
"loss": 1.5475,
"step": 1153
},
{
"epoch": 1.9265442404006676,
"grad_norm": 0.0,
"learning_rate": 3.620233238063375e-08,
"loss": 1.8219,
"step": 1154
},
{
"epoch": 1.9282136894824706,
"grad_norm": 0.0,
"learning_rate": 3.45773491141721e-08,
"loss": 1.7498,
"step": 1155
},
{
"epoch": 1.9298831385642736,
"grad_norm": 0.0,
"learning_rate": 3.2989551201624836e-08,
"loss": 1.9549,
"step": 1156
},
{
"epoch": 1.9315525876460768,
"grad_norm": 0.0,
"learning_rate": 3.143895053378698e-08,
"loss": 1.6767,
"step": 1157
},
{
"epoch": 1.9332220367278798,
"grad_norm": 0.0,
"learning_rate": 2.992555872289082e-08,
"loss": 1.4431,
"step": 1158
},
{
"epoch": 1.9348914858096828,
"grad_norm": 0.0,
"learning_rate": 2.844938710251377e-08,
"loss": 1.519,
"step": 1159
},
{
"epoch": 1.9365609348914858,
"grad_norm": 0.0,
"learning_rate": 2.7010446727498974e-08,
"loss": 1.8625,
"step": 1160
},
{
"epoch": 1.9382303839732888,
"grad_norm": 0.0,
"learning_rate": 2.5608748373869285e-08,
"loss": 1.7547,
"step": 1161
},
{
"epoch": 1.9398998330550918,
"grad_norm": 0.0,
"learning_rate": 2.4244302538746765e-08,
"loss": 1.8016,
"step": 1162
},
{
"epoch": 1.9415692821368948,
"grad_norm": 0.0,
"learning_rate": 2.2917119440275524e-08,
"loss": 1.5698,
"step": 1163
},
{
"epoch": 1.9432387312186978,
"grad_norm": 0.0,
"learning_rate": 2.162720901754234e-08,
"loss": 1.6776,
"step": 1164
},
{
"epoch": 1.9449081803005008,
"grad_norm": 0.0,
"learning_rate": 2.037458093050726e-08,
"loss": 1.5975,
"step": 1165
},
{
"epoch": 1.9465776293823038,
"grad_norm": 0.0,
"learning_rate": 1.9159244559924795e-08,
"loss": 2.009,
"step": 1166
},
{
"epoch": 1.9482470784641068,
"grad_norm": 0.0,
"learning_rate": 1.7981209007278956e-08,
"loss": 1.6987,
"step": 1167
},
{
"epoch": 1.9499165275459098,
"grad_norm": 0.0,
"learning_rate": 1.6840483094713867e-08,
"loss": 1.8003,
"step": 1168
},
{
"epoch": 1.9515859766277128,
"grad_norm": 0.0,
"learning_rate": 1.573707536496494e-08,
"loss": 1.7123,
"step": 1169
},
{
"epoch": 1.9532554257095158,
"grad_norm": 0.0,
"learning_rate": 1.4670994081297796e-08,
"loss": 1.4862,
"step": 1170
},
{
"epoch": 1.9549248747913188,
"grad_norm": 0.0,
"learning_rate": 1.3642247227446114e-08,
"loss": 1.5492,
"step": 1171
},
{
"epoch": 1.9565943238731218,
"grad_norm": 0.0,
"learning_rate": 1.2650842507550554e-08,
"loss": 1.7895,
"step": 1172
},
{
"epoch": 1.9582637729549248,
"grad_norm": 0.0,
"learning_rate": 1.1696787346102134e-08,
"loss": 1.747,
"step": 1173
},
{
"epoch": 1.959933222036728,
"grad_norm": 0.0,
"learning_rate": 1.078008888788673e-08,
"loss": 1.734,
"step": 1174
},
{
"epoch": 1.961602671118531,
"grad_norm": 0.0,
"learning_rate": 9.900753997929557e-09,
"loss": 1.4903,
"step": 1175
},
{
"epoch": 1.963272120200334,
"grad_norm": 0.0,
"learning_rate": 9.058789261446876e-09,
"loss": 1.71,
"step": 1176
},
{
"epoch": 1.964941569282137,
"grad_norm": 0.0,
"learning_rate": 8.254200983794369e-09,
"loss": 1.4663,
"step": 1177
},
{
"epoch": 1.96661101836394,
"grad_norm": 0.0,
"learning_rate": 7.48699519042051e-09,
"loss": 1.807,
"step": 1178
},
{
"epoch": 1.968280467445743,
"grad_norm": 0.0,
"learning_rate": 6.757177626822709e-09,
"loss": 1.5051,
"step": 1179
},
{
"epoch": 1.969949916527546,
"grad_norm": 0.0,
"learning_rate": 6.0647537585017956e-09,
"loss": 1.3409,
"step": 1180
},
{
"epoch": 1.971619365609349,
"grad_norm": 0.0,
"learning_rate": 5.409728770923162e-09,
"loss": 1.7744,
"step": 1181
},
{
"epoch": 1.973288814691152,
"grad_norm": 0.0,
"learning_rate": 4.792107569476789e-09,
"loss": 1.7346,
"step": 1182
},
{
"epoch": 1.974958263772955,
"grad_norm": 0.0,
"learning_rate": 4.211894779441727e-09,
"loss": 1.5221,
"step": 1183
},
{
"epoch": 1.976627712854758,
"grad_norm": 0.0,
"learning_rate": 3.669094745950008e-09,
"loss": 1.795,
"step": 1184
},
{
"epoch": 1.978297161936561,
"grad_norm": 0.0,
"learning_rate": 3.16371153395445e-09,
"loss": 1.5513,
"step": 1185
},
{
"epoch": 1.979966611018364,
"grad_norm": 0.0,
"learning_rate": 2.6957489281997927e-09,
"loss": 1.687,
"step": 1186
},
{
"epoch": 1.981636060100167,
"grad_norm": 0.0,
"learning_rate": 2.2652104331921664e-09,
"loss": 1.7631,
"step": 1187
},
{
"epoch": 1.98330550918197,
"grad_norm": 0.0,
"learning_rate": 1.8720992731741104e-09,
"loss": 1.5843,
"step": 1188
},
{
"epoch": 1.984974958263773,
"grad_norm": 0.0,
"learning_rate": 1.5164183920995946e-09,
"loss": 1.6599,
"step": 1189
},
{
"epoch": 1.986644407345576,
"grad_norm": 0.0,
"learning_rate": 1.1981704536129234e-09,
"loss": 1.8012,
"step": 1190
},
{
"epoch": 1.988313856427379,
"grad_norm": 0.0,
"learning_rate": 9.173578410281992e-10,
"loss": 1.8889,
"step": 1191
},
{
"epoch": 1.989983305509182,
"grad_norm": 0.0,
"learning_rate": 6.739826573121111e-10,
"loss": 1.7534,
"step": 1192
},
{
"epoch": 1.991652754590985,
"grad_norm": 0.0,
"learning_rate": 4.680467250672837e-10,
"loss": 1.8212,
"step": 1193
},
{
"epoch": 1.993322203672788,
"grad_norm": 0.0,
"learning_rate": 2.995515865183984e-10,
"loss": 1.6632,
"step": 1194
},
{
"epoch": 1.994991652754591,
"grad_norm": 0.0,
"learning_rate": 1.6849850350275643e-10,
"loss": 1.7037,
"step": 1195
},
{
"epoch": 1.996661101836394,
"grad_norm": 0.0,
"learning_rate": 7.48884574575115e-11,
"loss": 1.8285,
"step": 1196
},
{
"epoch": 1.998330550918197,
"grad_norm": 0.0,
"learning_rate": 1.872214941633921e-11,
"loss": 1.4735,
"step": 1197
},
{
"epoch": 2.0,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 1.8333,
"step": 1198
}
],
"logging_steps": 1,
"max_steps": 1198,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3485232050772378e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}