|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 921, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003257328990228013, |
|
"grad_norm": 8.991729736328125, |
|
"learning_rate": 5e-06, |
|
"loss": 4.95, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006514657980456026, |
|
"grad_norm": 8.309410095214844, |
|
"learning_rate": 4.999985455791999e-06, |
|
"loss": 4.8245, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009771986970684038, |
|
"grad_norm": 8.790390968322754, |
|
"learning_rate": 4.999941823337222e-06, |
|
"loss": 5.0692, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.013029315960912053, |
|
"grad_norm": 6.615326881408691, |
|
"learning_rate": 4.99986910314335e-06, |
|
"loss": 4.7744, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.016286644951140065, |
|
"grad_norm": 6.837432384490967, |
|
"learning_rate": 4.999767296056508e-06, |
|
"loss": 4.6511, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019543973941368076, |
|
"grad_norm": 8.298238754272461, |
|
"learning_rate": 4.999636403261258e-06, |
|
"loss": 4.7926, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02280130293159609, |
|
"grad_norm": 6.846872806549072, |
|
"learning_rate": 4.999476426280588e-06, |
|
"loss": 4.6072, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.026058631921824105, |
|
"grad_norm": 6.849335193634033, |
|
"learning_rate": 4.999287366975888e-06, |
|
"loss": 4.5674, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.029315960912052116, |
|
"grad_norm": 5.802831172943115, |
|
"learning_rate": 4.99906922754693e-06, |
|
"loss": 4.3631, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03257328990228013, |
|
"grad_norm": 8.730732917785645, |
|
"learning_rate": 4.998822010531849e-06, |
|
"loss": 4.8049, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.035830618892508145, |
|
"grad_norm": 5.898006916046143, |
|
"learning_rate": 4.998545718807104e-06, |
|
"loss": 4.8731, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03908794788273615, |
|
"grad_norm": 8.241886138916016, |
|
"learning_rate": 4.998240355587452e-06, |
|
"loss": 4.4484, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04234527687296417, |
|
"grad_norm": 5.800337314605713, |
|
"learning_rate": 4.997905924425903e-06, |
|
"loss": 4.5168, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04560260586319218, |
|
"grad_norm": 4.856197357177734, |
|
"learning_rate": 4.99754242921369e-06, |
|
"loss": 4.6223, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.048859934853420196, |
|
"grad_norm": 6.2050628662109375, |
|
"learning_rate": 4.997149874180209e-06, |
|
"loss": 4.3281, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05211726384364821, |
|
"grad_norm": 5.954516410827637, |
|
"learning_rate": 4.996728263892985e-06, |
|
"loss": 4.5456, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05537459283387622, |
|
"grad_norm": 5.490358352661133, |
|
"learning_rate": 4.9962776032576065e-06, |
|
"loss": 4.599, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05863192182410423, |
|
"grad_norm": 5.2625885009765625, |
|
"learning_rate": 4.995797897517676e-06, |
|
"loss": 4.3262, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06188925081433225, |
|
"grad_norm": 5.2583394050598145, |
|
"learning_rate": 4.995289152254744e-06, |
|
"loss": 4.3131, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 8.310872077941895, |
|
"learning_rate": 4.994751373388249e-06, |
|
"loss": 4.3256, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06840390879478828, |
|
"grad_norm": 4.853321552276611, |
|
"learning_rate": 4.994184567175446e-06, |
|
"loss": 4.1977, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07166123778501629, |
|
"grad_norm": 7.600931644439697, |
|
"learning_rate": 4.9935887402113315e-06, |
|
"loss": 4.0224, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0749185667752443, |
|
"grad_norm": 5.884659767150879, |
|
"learning_rate": 4.9929638994285715e-06, |
|
"loss": 4.2029, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0781758957654723, |
|
"grad_norm": 5.0048699378967285, |
|
"learning_rate": 4.9923100520974165e-06, |
|
"loss": 4.434, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08143322475570032, |
|
"grad_norm": 4.726573467254639, |
|
"learning_rate": 4.991627205825621e-06, |
|
"loss": 4.0566, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08469055374592833, |
|
"grad_norm": 6.583544731140137, |
|
"learning_rate": 4.99091536855835e-06, |
|
"loss": 4.1312, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08794788273615635, |
|
"grad_norm": 6.678943157196045, |
|
"learning_rate": 4.990174548578093e-06, |
|
"loss": 4.1762, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.09120521172638436, |
|
"grad_norm": 6.158233642578125, |
|
"learning_rate": 4.989404754504561e-06, |
|
"loss": 4.6873, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.09446254071661238, |
|
"grad_norm": 6.6103291511535645, |
|
"learning_rate": 4.9886059952945885e-06, |
|
"loss": 4.0162, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09771986970684039, |
|
"grad_norm": 8.516252517700195, |
|
"learning_rate": 4.987778280242034e-06, |
|
"loss": 4.0497, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10097719869706841, |
|
"grad_norm": 6.198530673980713, |
|
"learning_rate": 4.986921618977664e-06, |
|
"loss": 4.2576, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.10423452768729642, |
|
"grad_norm": 6.267817974090576, |
|
"learning_rate": 4.9860360214690465e-06, |
|
"loss": 4.3573, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.10749185667752444, |
|
"grad_norm": 4.247263431549072, |
|
"learning_rate": 4.985121498020433e-06, |
|
"loss": 4.4098, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11074918566775244, |
|
"grad_norm": 6.026848793029785, |
|
"learning_rate": 4.9841780592726385e-06, |
|
"loss": 4.3846, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.11400651465798045, |
|
"grad_norm": 4.245572566986084, |
|
"learning_rate": 4.9832057162029194e-06, |
|
"loss": 4.1725, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.11726384364820847, |
|
"grad_norm": 6.846841812133789, |
|
"learning_rate": 4.982204480124844e-06, |
|
"loss": 4.5872, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12052117263843648, |
|
"grad_norm": 7.242786407470703, |
|
"learning_rate": 4.981174362688159e-06, |
|
"loss": 4.0026, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1237785016286645, |
|
"grad_norm": 5.995569705963135, |
|
"learning_rate": 4.980115375878659e-06, |
|
"loss": 4.2044, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1270358306188925, |
|
"grad_norm": 6.272200107574463, |
|
"learning_rate": 4.979027532018045e-06, |
|
"loss": 4.0437, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 6.379438877105713, |
|
"learning_rate": 4.977910843763777e-06, |
|
"loss": 4.4689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13355048859934854, |
|
"grad_norm": 5.513742923736572, |
|
"learning_rate": 4.976765324108932e-06, |
|
"loss": 4.2111, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.13680781758957655, |
|
"grad_norm": 7.665502548217773, |
|
"learning_rate": 4.975590986382053e-06, |
|
"loss": 3.5765, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14006514657980457, |
|
"grad_norm": 4.788407802581787, |
|
"learning_rate": 4.974387844246987e-06, |
|
"loss": 4.3693, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.14332247557003258, |
|
"grad_norm": 5.028674602508545, |
|
"learning_rate": 4.973155911702736e-06, |
|
"loss": 4.1687, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1465798045602606, |
|
"grad_norm": 8.316819190979004, |
|
"learning_rate": 4.971895203083285e-06, |
|
"loss": 4.0222, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1498371335504886, |
|
"grad_norm": 5.167773723602295, |
|
"learning_rate": 4.970605733057441e-06, |
|
"loss": 4.1067, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.15309446254071662, |
|
"grad_norm": 9.906169891357422, |
|
"learning_rate": 4.969287516628661e-06, |
|
"loss": 3.4177, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1563517915309446, |
|
"grad_norm": 5.868465423583984, |
|
"learning_rate": 4.967940569134875e-06, |
|
"loss": 4.2688, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.15960912052117263, |
|
"grad_norm": 4.171844005584717, |
|
"learning_rate": 4.9665649062483115e-06, |
|
"loss": 4.2632, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.16286644951140064, |
|
"grad_norm": 7.338898181915283, |
|
"learning_rate": 4.965160543975312e-06, |
|
"loss": 4.2469, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16612377850162866, |
|
"grad_norm": 4.580145359039307, |
|
"learning_rate": 4.963727498656146e-06, |
|
"loss": 4.3323, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.16938110749185667, |
|
"grad_norm": 7.303488254547119, |
|
"learning_rate": 4.962265786964821e-06, |
|
"loss": 3.9707, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.17263843648208468, |
|
"grad_norm": 6.1601080894470215, |
|
"learning_rate": 4.960775425908887e-06, |
|
"loss": 4.0127, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1758957654723127, |
|
"grad_norm": 4.484189510345459, |
|
"learning_rate": 4.959256432829242e-06, |
|
"loss": 4.2545, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1791530944625407, |
|
"grad_norm": 5.867091178894043, |
|
"learning_rate": 4.957708825399928e-06, |
|
"loss": 4.178, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.18241042345276873, |
|
"grad_norm": 6.303260326385498, |
|
"learning_rate": 4.956132621627922e-06, |
|
"loss": 4.0996, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.18566775244299674, |
|
"grad_norm": 4.000598907470703, |
|
"learning_rate": 4.954527839852935e-06, |
|
"loss": 4.1641, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.18892508143322476, |
|
"grad_norm": 6.654462814331055, |
|
"learning_rate": 4.952894498747189e-06, |
|
"loss": 4.0859, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.19218241042345277, |
|
"grad_norm": 6.11088228225708, |
|
"learning_rate": 4.951232617315207e-06, |
|
"loss": 4.5088, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.19543973941368079, |
|
"grad_norm": 4.228384494781494, |
|
"learning_rate": 4.949542214893589e-06, |
|
"loss": 4.2673, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1986970684039088, |
|
"grad_norm": 7.4116950035095215, |
|
"learning_rate": 4.947823311150785e-06, |
|
"loss": 3.9379, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.20195439739413681, |
|
"grad_norm": 6.180227279663086, |
|
"learning_rate": 4.946075926086872e-06, |
|
"loss": 4.0369, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.20521172638436483, |
|
"grad_norm": 4.931899547576904, |
|
"learning_rate": 4.9443000800333135e-06, |
|
"loss": 4.2796, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.20846905537459284, |
|
"grad_norm": 4.462183475494385, |
|
"learning_rate": 4.9424957936527295e-06, |
|
"loss": 4.2172, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.21172638436482086, |
|
"grad_norm": 8.078901290893555, |
|
"learning_rate": 4.940663087938654e-06, |
|
"loss": 3.7498, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.21498371335504887, |
|
"grad_norm": 5.682638645172119, |
|
"learning_rate": 4.938801984215289e-06, |
|
"loss": 3.915, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2182410423452769, |
|
"grad_norm": 5.182820796966553, |
|
"learning_rate": 4.936912504137257e-06, |
|
"loss": 4.0408, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.22149837133550487, |
|
"grad_norm": 5.436476230621338, |
|
"learning_rate": 4.934994669689353e-06, |
|
"loss": 4.3085, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2247557003257329, |
|
"grad_norm": 5.0985212326049805, |
|
"learning_rate": 4.933048503186282e-06, |
|
"loss": 4.348, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2280130293159609, |
|
"grad_norm": 6.351657390594482, |
|
"learning_rate": 4.931074027272406e-06, |
|
"loss": 4.2169, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23127035830618892, |
|
"grad_norm": 5.03814697265625, |
|
"learning_rate": 4.9290712649214735e-06, |
|
"loss": 4.0666, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.23452768729641693, |
|
"grad_norm": 4.342626094818115, |
|
"learning_rate": 4.9270402394363604e-06, |
|
"loss": 4.1099, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.23778501628664495, |
|
"grad_norm": 4.489912033081055, |
|
"learning_rate": 4.924980974448791e-06, |
|
"loss": 3.9711, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.24104234527687296, |
|
"grad_norm": 6.246860980987549, |
|
"learning_rate": 4.922893493919068e-06, |
|
"loss": 3.7721, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.24429967426710097, |
|
"grad_norm": 6.77864933013916, |
|
"learning_rate": 4.920777822135793e-06, |
|
"loss": 4.072, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.247557003257329, |
|
"grad_norm": 5.387760162353516, |
|
"learning_rate": 4.918633983715582e-06, |
|
"loss": 4.0807, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.250814332247557, |
|
"grad_norm": 5.91160249710083, |
|
"learning_rate": 4.91646200360278e-06, |
|
"loss": 4.1652, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.254071661237785, |
|
"grad_norm": 6.564038276672363, |
|
"learning_rate": 4.914261907069172e-06, |
|
"loss": 4.2095, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.25732899022801303, |
|
"grad_norm": 5.434807777404785, |
|
"learning_rate": 4.912033719713687e-06, |
|
"loss": 3.8395, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 4.123232841491699, |
|
"learning_rate": 4.909777467462103e-06, |
|
"loss": 4.0243, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.26384364820846906, |
|
"grad_norm": 4.283027172088623, |
|
"learning_rate": 4.9074931765667386e-06, |
|
"loss": 4.1818, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2671009771986971, |
|
"grad_norm": 7.180938243865967, |
|
"learning_rate": 4.905180873606157e-06, |
|
"loss": 4.3134, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2703583061889251, |
|
"grad_norm": 7.892354488372803, |
|
"learning_rate": 4.90284058548485e-06, |
|
"loss": 3.7998, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2736156351791531, |
|
"grad_norm": 5.744028568267822, |
|
"learning_rate": 4.900472339432928e-06, |
|
"loss": 4.0191, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2768729641693811, |
|
"grad_norm": 7.475683212280273, |
|
"learning_rate": 4.898076163005802e-06, |
|
"loss": 4.2697, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.28013029315960913, |
|
"grad_norm": 7.0953240394592285, |
|
"learning_rate": 4.89565208408386e-06, |
|
"loss": 4.143, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.28338762214983715, |
|
"grad_norm": 5.530087947845459, |
|
"learning_rate": 4.893200130872152e-06, |
|
"loss": 4.055, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.28664495114006516, |
|
"grad_norm": 7.326042175292969, |
|
"learning_rate": 4.890720331900049e-06, |
|
"loss": 4.0776, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2899022801302932, |
|
"grad_norm": 6.444993495941162, |
|
"learning_rate": 4.8882127160209234e-06, |
|
"loss": 4.3407, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2931596091205212, |
|
"grad_norm": 5.226675033569336, |
|
"learning_rate": 4.885677312411802e-06, |
|
"loss": 3.9822, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2964169381107492, |
|
"grad_norm": 4.015732765197754, |
|
"learning_rate": 4.883114150573037e-06, |
|
"loss": 4.2537, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2996742671009772, |
|
"grad_norm": 7.535611152648926, |
|
"learning_rate": 4.880523260327954e-06, |
|
"loss": 4.0133, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.30293159609120524, |
|
"grad_norm": 6.678111553192139, |
|
"learning_rate": 4.8779046718225105e-06, |
|
"loss": 3.8224, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.30618892508143325, |
|
"grad_norm": 5.473388671875, |
|
"learning_rate": 4.875258415524945e-06, |
|
"loss": 4.2571, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.30944625407166126, |
|
"grad_norm": 5.16727876663208, |
|
"learning_rate": 4.872584522225417e-06, |
|
"loss": 4.1357, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3127035830618892, |
|
"grad_norm": 5.79735803604126, |
|
"learning_rate": 4.8698830230356555e-06, |
|
"loss": 4.2043, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.31596091205211724, |
|
"grad_norm": 6.028461933135986, |
|
"learning_rate": 4.867153949388594e-06, |
|
"loss": 3.9955, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.31921824104234525, |
|
"grad_norm": 4.4654340744018555, |
|
"learning_rate": 4.864397333038002e-06, |
|
"loss": 4.0395, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.32247557003257327, |
|
"grad_norm": 4.826022624969482, |
|
"learning_rate": 4.861613206058123e-06, |
|
"loss": 3.8778, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3257328990228013, |
|
"grad_norm": 5.090777397155762, |
|
"learning_rate": 4.858801600843295e-06, |
|
"loss": 4.0367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3289902280130293, |
|
"grad_norm": 5.630136966705322, |
|
"learning_rate": 4.855962550107574e-06, |
|
"loss": 4.2245, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3322475570032573, |
|
"grad_norm": 4.38795804977417, |
|
"learning_rate": 4.853096086884354e-06, |
|
"loss": 4.2784, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3355048859934853, |
|
"grad_norm": 7.042239665985107, |
|
"learning_rate": 4.850202244525987e-06, |
|
"loss": 3.6864, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.33876221498371334, |
|
"grad_norm": 5.578479766845703, |
|
"learning_rate": 4.847281056703388e-06, |
|
"loss": 3.9581, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.34201954397394135, |
|
"grad_norm": 6.32352352142334, |
|
"learning_rate": 4.844332557405649e-06, |
|
"loss": 4.3841, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.34527687296416937, |
|
"grad_norm": 6.242619514465332, |
|
"learning_rate": 4.841356780939638e-06, |
|
"loss": 4.0657, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3485342019543974, |
|
"grad_norm": 5.670553684234619, |
|
"learning_rate": 4.838353761929605e-06, |
|
"loss": 4.1274, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3517915309446254, |
|
"grad_norm": 5.2623748779296875, |
|
"learning_rate": 4.835323535316777e-06, |
|
"loss": 4.4132, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3550488599348534, |
|
"grad_norm": 7.746681213378906, |
|
"learning_rate": 4.832266136358951e-06, |
|
"loss": 3.8874, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3583061889250814, |
|
"grad_norm": 8.414247512817383, |
|
"learning_rate": 4.829181600630084e-06, |
|
"loss": 4.0962, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36156351791530944, |
|
"grad_norm": 6.013370990753174, |
|
"learning_rate": 4.826069964019878e-06, |
|
"loss": 3.9995, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.36482084690553745, |
|
"grad_norm": 5.62821626663208, |
|
"learning_rate": 4.822931262733367e-06, |
|
"loss": 3.8944, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.36807817589576547, |
|
"grad_norm": 5.668822288513184, |
|
"learning_rate": 4.819765533290489e-06, |
|
"loss": 3.9762, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3713355048859935, |
|
"grad_norm": 6.864443778991699, |
|
"learning_rate": 4.816572812525668e-06, |
|
"loss": 3.9365, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3745928338762215, |
|
"grad_norm": 6.25184965133667, |
|
"learning_rate": 4.813353137587377e-06, |
|
"loss": 4.0144, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3778501628664495, |
|
"grad_norm": 5.299489974975586, |
|
"learning_rate": 4.810106545937716e-06, |
|
"loss": 4.2232, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3811074918566775, |
|
"grad_norm": 5.129780292510986, |
|
"learning_rate": 4.806833075351968e-06, |
|
"loss": 3.9329, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.38436482084690554, |
|
"grad_norm": 4.818124771118164, |
|
"learning_rate": 4.803532763918162e-06, |
|
"loss": 4.2085, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.38762214983713356, |
|
"grad_norm": 6.111238956451416, |
|
"learning_rate": 4.80020565003663e-06, |
|
"loss": 4.2062, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 6.151260852813721, |
|
"learning_rate": 4.796851772419563e-06, |
|
"loss": 3.7084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3941368078175896, |
|
"grad_norm": 6.352945327758789, |
|
"learning_rate": 4.793471170090555e-06, |
|
"loss": 4.0596, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3973941368078176, |
|
"grad_norm": 6.536398887634277, |
|
"learning_rate": 4.7900638823841525e-06, |
|
"loss": 4.1707, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.4006514657980456, |
|
"grad_norm": 7.450645923614502, |
|
"learning_rate": 4.786629948945397e-06, |
|
"loss": 3.7428, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.40390879478827363, |
|
"grad_norm": 7.927166938781738, |
|
"learning_rate": 4.783169409729363e-06, |
|
"loss": 3.6772, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.40716612377850164, |
|
"grad_norm": 4.440308094024658, |
|
"learning_rate": 4.779682305000689e-06, |
|
"loss": 3.8738, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.41042345276872966, |
|
"grad_norm": 6.223262310028076, |
|
"learning_rate": 4.7761686753331195e-06, |
|
"loss": 4.3548, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.41368078175895767, |
|
"grad_norm": 4.511125087738037, |
|
"learning_rate": 4.772628561609021e-06, |
|
"loss": 4.1852, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.4169381107491857, |
|
"grad_norm": 4.9265828132629395, |
|
"learning_rate": 4.769062005018916e-06, |
|
"loss": 4.0206, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4201954397394137, |
|
"grad_norm": 5.572083950042725, |
|
"learning_rate": 4.765469047060996e-06, |
|
"loss": 3.8828, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.4234527687296417, |
|
"grad_norm": 5.799539089202881, |
|
"learning_rate": 4.761849729540643e-06, |
|
"loss": 3.7871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.42671009771986973, |
|
"grad_norm": 5.041553020477295, |
|
"learning_rate": 4.758204094569942e-06, |
|
"loss": 4.2724, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.42996742671009774, |
|
"grad_norm": 4.930086135864258, |
|
"learning_rate": 4.754532184567193e-06, |
|
"loss": 4.2007, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.43322475570032576, |
|
"grad_norm": 4.0916056632995605, |
|
"learning_rate": 4.750834042256414e-06, |
|
"loss": 4.1914, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4364820846905538, |
|
"grad_norm": 6.131953716278076, |
|
"learning_rate": 4.747109710666844e-06, |
|
"loss": 3.9056, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.43973941368078173, |
|
"grad_norm": 4.703607559204102, |
|
"learning_rate": 4.743359233132449e-06, |
|
"loss": 4.0255, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.44299674267100975, |
|
"grad_norm": 8.509166717529297, |
|
"learning_rate": 4.7395826532914056e-06, |
|
"loss": 4.4678, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.44625407166123776, |
|
"grad_norm": 4.48606538772583, |
|
"learning_rate": 4.735780015085607e-06, |
|
"loss": 3.6985, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4495114006514658, |
|
"grad_norm": 4.333646297454834, |
|
"learning_rate": 4.73195136276014e-06, |
|
"loss": 4.2048, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4527687296416938, |
|
"grad_norm": 7.2084197998046875, |
|
"learning_rate": 4.728096740862778e-06, |
|
"loss": 3.7695, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4560260586319218, |
|
"grad_norm": 4.8456926345825195, |
|
"learning_rate": 4.72421619424346e-06, |
|
"loss": 4.1851, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4592833876221498, |
|
"grad_norm": 4.744941234588623, |
|
"learning_rate": 4.720309768053766e-06, |
|
"loss": 4.2188, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.46254071661237783, |
|
"grad_norm": 6.611759662628174, |
|
"learning_rate": 4.716377507746397e-06, |
|
"loss": 3.9045, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.46579804560260585, |
|
"grad_norm": 7.642755031585693, |
|
"learning_rate": 4.712419459074643e-06, |
|
"loss": 3.4203, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.46905537459283386, |
|
"grad_norm": 4.797499656677246, |
|
"learning_rate": 4.708435668091849e-06, |
|
"loss": 3.9821, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4723127035830619, |
|
"grad_norm": 5.430541515350342, |
|
"learning_rate": 4.704426181150884e-06, |
|
"loss": 3.7922, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4755700325732899, |
|
"grad_norm": 5.740099906921387, |
|
"learning_rate": 4.700391044903597e-06, |
|
"loss": 4.0892, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4788273615635179, |
|
"grad_norm": 4.150174617767334, |
|
"learning_rate": 4.696330306300277e-06, |
|
"loss": 4.0174, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4820846905537459, |
|
"grad_norm": 8.157079696655273, |
|
"learning_rate": 4.692244012589107e-06, |
|
"loss": 3.9708, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.48534201954397393, |
|
"grad_norm": 5.4859466552734375, |
|
"learning_rate": 4.688132211315608e-06, |
|
"loss": 3.8235, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.48859934853420195, |
|
"grad_norm": 5.100396633148193, |
|
"learning_rate": 4.683994950322098e-06, |
|
"loss": 4.0108, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.49185667752442996, |
|
"grad_norm": 8.440217971801758, |
|
"learning_rate": 4.679832277747122e-06, |
|
"loss": 3.5701, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.495114006514658, |
|
"grad_norm": 6.211641788482666, |
|
"learning_rate": 4.675644242024902e-06, |
|
"loss": 3.7399, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.498371335504886, |
|
"grad_norm": 8.332008361816406, |
|
"learning_rate": 4.671430891884768e-06, |
|
"loss": 4.1424, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.501628664495114, |
|
"grad_norm": 6.8990607261657715, |
|
"learning_rate": 4.6671922763505915e-06, |
|
"loss": 3.9826, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.504885993485342, |
|
"grad_norm": 4.40015983581543, |
|
"learning_rate": 4.662928444740219e-06, |
|
"loss": 3.9535, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.50814332247557, |
|
"grad_norm": 6.208529949188232, |
|
"learning_rate": 4.658639446664893e-06, |
|
"loss": 3.6694, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.511400651465798, |
|
"grad_norm": 5.551063537597656, |
|
"learning_rate": 4.654325332028676e-06, |
|
"loss": 4.1019, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.5146579804560261, |
|
"grad_norm": 4.968599319458008, |
|
"learning_rate": 4.649986151027875e-06, |
|
"loss": 3.8717, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.5179153094462541, |
|
"grad_norm": 6.236921310424805, |
|
"learning_rate": 4.645621954150449e-06, |
|
"loss": 3.9015, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 8.418625831604004, |
|
"learning_rate": 4.641232792175428e-06, |
|
"loss": 3.7186, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5244299674267101, |
|
"grad_norm": 5.640598297119141, |
|
"learning_rate": 4.636818716172322e-06, |
|
"loss": 3.8935, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.5276872964169381, |
|
"grad_norm": 8.81381607055664, |
|
"learning_rate": 4.632379777500519e-06, |
|
"loss": 3.5697, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5309446254071661, |
|
"grad_norm": 5.180728912353516, |
|
"learning_rate": 4.6279160278087e-06, |
|
"loss": 4.1232, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5342019543973942, |
|
"grad_norm": 6.380191326141357, |
|
"learning_rate": 4.623427519034224e-06, |
|
"loss": 4.0181, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5374592833876222, |
|
"grad_norm": 5.962952136993408, |
|
"learning_rate": 4.618914303402541e-06, |
|
"loss": 4.1417, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5407166123778502, |
|
"grad_norm": 4.534554481506348, |
|
"learning_rate": 4.614376433426565e-06, |
|
"loss": 4.1021, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5439739413680782, |
|
"grad_norm": 6.459926605224609, |
|
"learning_rate": 4.609813961906076e-06, |
|
"loss": 3.6299, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5472312703583062, |
|
"grad_norm": 6.262177467346191, |
|
"learning_rate": 4.605226941927102e-06, |
|
"loss": 3.7101, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5504885993485342, |
|
"grad_norm": 5.087495803833008, |
|
"learning_rate": 4.6006154268613015e-06, |
|
"loss": 3.8704, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5537459283387622, |
|
"grad_norm": 5.9942193031311035, |
|
"learning_rate": 4.595979470365341e-06, |
|
"loss": 4.2554, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5570032573289903, |
|
"grad_norm": 4.429585933685303, |
|
"learning_rate": 4.591319126380275e-06, |
|
"loss": 4.131, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5602605863192183, |
|
"grad_norm": 4.608005523681641, |
|
"learning_rate": 4.586634449130911e-06, |
|
"loss": 3.7907, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5635179153094463, |
|
"grad_norm": 6.586268424987793, |
|
"learning_rate": 4.581925493125187e-06, |
|
"loss": 4.0965, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5667752442996743, |
|
"grad_norm": 6.221005916595459, |
|
"learning_rate": 4.577192313153531e-06, |
|
"loss": 3.733, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5700325732899023, |
|
"grad_norm": 4.957738399505615, |
|
"learning_rate": 4.572434964288226e-06, |
|
"loss": 4.2729, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5732899022801303, |
|
"grad_norm": 7.497585773468018, |
|
"learning_rate": 4.5676535018827685e-06, |
|
"loss": 3.7724, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5765472312703583, |
|
"grad_norm": 5.472403526306152, |
|
"learning_rate": 4.562847981571226e-06, |
|
"loss": 4.2249, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5798045602605864, |
|
"grad_norm": 4.5801568031311035, |
|
"learning_rate": 4.55801845926759e-06, |
|
"loss": 4.0099, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5830618892508144, |
|
"grad_norm": 4.295859336853027, |
|
"learning_rate": 4.553164991165119e-06, |
|
"loss": 4.0598, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5863192182410424, |
|
"grad_norm": 5.069422721862793, |
|
"learning_rate": 4.548287633735694e-06, |
|
"loss": 4.1214, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5895765472312704, |
|
"grad_norm": 4.593508243560791, |
|
"learning_rate": 4.543386443729157e-06, |
|
"loss": 3.8567, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5928338762214984, |
|
"grad_norm": 5.974512100219727, |
|
"learning_rate": 4.538461478172647e-06, |
|
"loss": 4.1337, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5960912052117264, |
|
"grad_norm": 4.851910591125488, |
|
"learning_rate": 4.5335127943699445e-06, |
|
"loss": 4.0842, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5993485342019544, |
|
"grad_norm": 7.21715784072876, |
|
"learning_rate": 4.528540449900799e-06, |
|
"loss": 3.7263, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6026058631921825, |
|
"grad_norm": 5.761256217956543, |
|
"learning_rate": 4.523544502620258e-06, |
|
"loss": 3.7832, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6058631921824105, |
|
"grad_norm": 6.854928493499756, |
|
"learning_rate": 4.518525010658001e-06, |
|
"loss": 3.858, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.6091205211726385, |
|
"grad_norm": 6.20596981048584, |
|
"learning_rate": 4.513482032417656e-06, |
|
"loss": 3.7106, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.6123778501628665, |
|
"grad_norm": 5.5723161697387695, |
|
"learning_rate": 4.508415626576122e-06, |
|
"loss": 3.9256, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.6156351791530945, |
|
"grad_norm": 5.427545547485352, |
|
"learning_rate": 4.503325852082886e-06, |
|
"loss": 4.0918, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.6188925081433225, |
|
"grad_norm": 4.53942346572876, |
|
"learning_rate": 4.4982127681593414e-06, |
|
"loss": 3.8621, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6221498371335505, |
|
"grad_norm": 6.003444194793701, |
|
"learning_rate": 4.493076434298091e-06, |
|
"loss": 3.7557, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6254071661237784, |
|
"grad_norm": 6.072404384613037, |
|
"learning_rate": 4.487916910262263e-06, |
|
"loss": 3.7174, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.6286644951140065, |
|
"grad_norm": 5.752810955047607, |
|
"learning_rate": 4.48273425608481e-06, |
|
"loss": 4.1727, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.6319218241042345, |
|
"grad_norm": 5.078545570373535, |
|
"learning_rate": 4.4775285320678106e-06, |
|
"loss": 4.2206, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.6351791530944625, |
|
"grad_norm": 5.559292316436768, |
|
"learning_rate": 4.4722997987817714e-06, |
|
"loss": 4.1013, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6384364820846905, |
|
"grad_norm": 4.91386604309082, |
|
"learning_rate": 4.467048117064921e-06, |
|
"loss": 3.8137, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.6416938110749185, |
|
"grad_norm": 5.978386402130127, |
|
"learning_rate": 4.461773548022502e-06, |
|
"loss": 3.9084, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.6449511400651465, |
|
"grad_norm": 5.226948261260986, |
|
"learning_rate": 4.4564761530260545e-06, |
|
"loss": 4.1762, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6482084690553745, |
|
"grad_norm": 5.278410911560059, |
|
"learning_rate": 4.451155993712711e-06, |
|
"loss": 4.245, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 5.923932075500488, |
|
"learning_rate": 4.445813131984476e-06, |
|
"loss": 3.6665, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6547231270358306, |
|
"grad_norm": 5.126694202423096, |
|
"learning_rate": 4.440447630007503e-06, |
|
"loss": 3.776, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6579804560260586, |
|
"grad_norm": 5.655832767486572, |
|
"learning_rate": 4.435059550211371e-06, |
|
"loss": 4.1377, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6612377850162866, |
|
"grad_norm": 5.701308250427246, |
|
"learning_rate": 4.429648955288366e-06, |
|
"loss": 3.9133, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6644951140065146, |
|
"grad_norm": 5.6329545974731445, |
|
"learning_rate": 4.42421590819274e-06, |
|
"loss": 4.049, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6677524429967426, |
|
"grad_norm": 5.668838024139404, |
|
"learning_rate": 4.418760472139988e-06, |
|
"loss": 3.7711, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6710097719869706, |
|
"grad_norm": 4.098300933837891, |
|
"learning_rate": 4.413282710606107e-06, |
|
"loss": 4.0585, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6742671009771987, |
|
"grad_norm": 5.279233455657959, |
|
"learning_rate": 4.407782687326859e-06, |
|
"loss": 3.6991, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6775244299674267, |
|
"grad_norm": 6.012117385864258, |
|
"learning_rate": 4.4022604662970305e-06, |
|
"loss": 4.0131, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6807817589576547, |
|
"grad_norm": 6.257996559143066, |
|
"learning_rate": 4.3967161117696864e-06, |
|
"loss": 3.9206, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6840390879478827, |
|
"grad_norm": 4.88628625869751, |
|
"learning_rate": 4.391149688255423e-06, |
|
"loss": 3.8542, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6872964169381107, |
|
"grad_norm": 4.90071964263916, |
|
"learning_rate": 4.385561260521618e-06, |
|
"loss": 3.87, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6905537459283387, |
|
"grad_norm": 5.792081832885742, |
|
"learning_rate": 4.379950893591675e-06, |
|
"loss": 4.042, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6938110749185668, |
|
"grad_norm": 5.57878303527832, |
|
"learning_rate": 4.3743186527442685e-06, |
|
"loss": 4.2162, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6970684039087948, |
|
"grad_norm": 7.439386367797852, |
|
"learning_rate": 4.368664603512586e-06, |
|
"loss": 3.6792, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.7003257328990228, |
|
"grad_norm": 4.927967071533203, |
|
"learning_rate": 4.36298881168356e-06, |
|
"loss": 3.9544, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.7035830618892508, |
|
"grad_norm": 4.070362567901611, |
|
"learning_rate": 4.35729134329711e-06, |
|
"loss": 4.1422, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.7068403908794788, |
|
"grad_norm": 3.362583875656128, |
|
"learning_rate": 4.351572264645366e-06, |
|
"loss": 3.9582, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.7100977198697068, |
|
"grad_norm": 6.386814117431641, |
|
"learning_rate": 4.345831642271906e-06, |
|
"loss": 4.3383, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.7133550488599348, |
|
"grad_norm": 4.9985880851745605, |
|
"learning_rate": 4.3400695429709725e-06, |
|
"loss": 4.0183, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.7166123778501629, |
|
"grad_norm": 4.032768249511719, |
|
"learning_rate": 4.3342860337867045e-06, |
|
"loss": 3.978, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7198697068403909, |
|
"grad_norm": 8.807585716247559, |
|
"learning_rate": 4.328481182012349e-06, |
|
"loss": 3.6196, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.7231270358306189, |
|
"grad_norm": 6.042029857635498, |
|
"learning_rate": 4.3226550551894815e-06, |
|
"loss": 3.9239, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.7263843648208469, |
|
"grad_norm": 5.543368816375732, |
|
"learning_rate": 4.316807721107226e-06, |
|
"loss": 3.7864, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.7296416938110749, |
|
"grad_norm": 5.127748966217041, |
|
"learning_rate": 4.310939247801455e-06, |
|
"loss": 4.0228, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7328990228013029, |
|
"grad_norm": 4.826234817504883, |
|
"learning_rate": 4.305049703554005e-06, |
|
"loss": 3.9928, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7361563517915309, |
|
"grad_norm": 7.3278632164001465, |
|
"learning_rate": 4.299139156891883e-06, |
|
"loss": 3.9358, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.739413680781759, |
|
"grad_norm": 4.310771942138672, |
|
"learning_rate": 4.293207676586464e-06, |
|
"loss": 3.9118, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.742671009771987, |
|
"grad_norm": 6.319010257720947, |
|
"learning_rate": 4.287255331652694e-06, |
|
"loss": 3.8571, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.745928338762215, |
|
"grad_norm": 6.4100422859191895, |
|
"learning_rate": 4.281282191348289e-06, |
|
"loss": 4.2424, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.749185667752443, |
|
"grad_norm": 4.665522575378418, |
|
"learning_rate": 4.275288325172924e-06, |
|
"loss": 4.0637, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.752442996742671, |
|
"grad_norm": 6.5943684577941895, |
|
"learning_rate": 4.269273802867427e-06, |
|
"loss": 3.4363, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.755700325732899, |
|
"grad_norm": 6.329523086547852, |
|
"learning_rate": 4.26323869441297e-06, |
|
"loss": 4.1044, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.758957654723127, |
|
"grad_norm": 6.649689674377441, |
|
"learning_rate": 4.257183070030252e-06, |
|
"loss": 3.7455, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.762214983713355, |
|
"grad_norm": 5.669809341430664, |
|
"learning_rate": 4.2511070001786806e-06, |
|
"loss": 3.9673, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7654723127035831, |
|
"grad_norm": 5.982848644256592, |
|
"learning_rate": 4.245010555555554e-06, |
|
"loss": 3.8431, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7687296416938111, |
|
"grad_norm": 4.247102737426758, |
|
"learning_rate": 4.23889380709524e-06, |
|
"loss": 4.1477, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7719869706840391, |
|
"grad_norm": 6.063522815704346, |
|
"learning_rate": 4.232756825968348e-06, |
|
"loss": 4.0559, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7752442996742671, |
|
"grad_norm": 7.113022804260254, |
|
"learning_rate": 4.226599683580902e-06, |
|
"loss": 3.4791, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7785016286644951, |
|
"grad_norm": 7.49078369140625, |
|
"learning_rate": 4.22042245157351e-06, |
|
"loss": 4.0812, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 6.068420886993408, |
|
"learning_rate": 4.214225201820529e-06, |
|
"loss": 3.6831, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7850162866449512, |
|
"grad_norm": 4.464322566986084, |
|
"learning_rate": 4.20800800642923e-06, |
|
"loss": 3.9978, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7882736156351792, |
|
"grad_norm": 4.343051910400391, |
|
"learning_rate": 4.201770937738962e-06, |
|
"loss": 4.0457, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7915309446254072, |
|
"grad_norm": 5.923551082611084, |
|
"learning_rate": 4.195514068320302e-06, |
|
"loss": 3.6406, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7947882736156352, |
|
"grad_norm": 5.177840232849121, |
|
"learning_rate": 4.1892374709742186e-06, |
|
"loss": 3.6954, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7980456026058632, |
|
"grad_norm": 5.686809062957764, |
|
"learning_rate": 4.182941218731222e-06, |
|
"loss": 3.8836, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.8013029315960912, |
|
"grad_norm": 4.131414413452148, |
|
"learning_rate": 4.176625384850516e-06, |
|
"loss": 3.6393, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.8045602605863192, |
|
"grad_norm": 4.142212390899658, |
|
"learning_rate": 4.170290042819137e-06, |
|
"loss": 3.8273, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.8078175895765473, |
|
"grad_norm": 5.210618495941162, |
|
"learning_rate": 4.163935266351115e-06, |
|
"loss": 3.7623, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.8110749185667753, |
|
"grad_norm": 4.716037273406982, |
|
"learning_rate": 4.1575611293866025e-06, |
|
"loss": 4.2419, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.8143322475570033, |
|
"grad_norm": 7.191868782043457, |
|
"learning_rate": 4.151167706091017e-06, |
|
"loss": 4.1355, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8175895765472313, |
|
"grad_norm": 5.328645706176758, |
|
"learning_rate": 4.1447550708541815e-06, |
|
"loss": 3.986, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.8208469055374593, |
|
"grad_norm": 4.756157875061035, |
|
"learning_rate": 4.138323298289456e-06, |
|
"loss": 4.0062, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.8241042345276873, |
|
"grad_norm": 5.154695987701416, |
|
"learning_rate": 4.131872463232872e-06, |
|
"loss": 3.9789, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.8273615635179153, |
|
"grad_norm": 5.032461643218994, |
|
"learning_rate": 4.125402640742259e-06, |
|
"loss": 3.7178, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.8306188925081434, |
|
"grad_norm": 4.375637054443359, |
|
"learning_rate": 4.11891390609637e-06, |
|
"loss": 3.9338, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.8338762214983714, |
|
"grad_norm": 7.046738624572754, |
|
"learning_rate": 4.112406334794014e-06, |
|
"loss": 3.9795, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8371335504885994, |
|
"grad_norm": 5.982004642486572, |
|
"learning_rate": 4.105880002553164e-06, |
|
"loss": 4.0637, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.8403908794788274, |
|
"grad_norm": 5.773483753204346, |
|
"learning_rate": 4.099334985310089e-06, |
|
"loss": 3.6393, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.8436482084690554, |
|
"grad_norm": 4.926510810852051, |
|
"learning_rate": 4.092771359218462e-06, |
|
"loss": 4.0211, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.8469055374592834, |
|
"grad_norm": 6.6061530113220215, |
|
"learning_rate": 4.086189200648476e-06, |
|
"loss": 3.9733, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8501628664495114, |
|
"grad_norm": 5.56645393371582, |
|
"learning_rate": 4.079588586185961e-06, |
|
"loss": 4.061, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.8534201954397395, |
|
"grad_norm": 5.1084675788879395, |
|
"learning_rate": 4.072969592631481e-06, |
|
"loss": 3.8312, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.8566775244299675, |
|
"grad_norm": 6.386423110961914, |
|
"learning_rate": 4.066332296999455e-06, |
|
"loss": 3.6092, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.8599348534201955, |
|
"grad_norm": 5.3035688400268555, |
|
"learning_rate": 4.0596767765172465e-06, |
|
"loss": 3.8357, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.8631921824104235, |
|
"grad_norm": 4.10700798034668, |
|
"learning_rate": 4.053003108624276e-06, |
|
"loss": 3.8495, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.8664495114006515, |
|
"grad_norm": 7.300857067108154, |
|
"learning_rate": 4.046311370971114e-06, |
|
"loss": 3.754, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8697068403908795, |
|
"grad_norm": 5.575901508331299, |
|
"learning_rate": 4.039601641418582e-06, |
|
"loss": 3.9931, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8729641693811075, |
|
"grad_norm": 4.204562664031982, |
|
"learning_rate": 4.032873998036841e-06, |
|
"loss": 3.9574, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8762214983713354, |
|
"grad_norm": 6.569215297698975, |
|
"learning_rate": 4.026128519104484e-06, |
|
"loss": 3.7635, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8794788273615635, |
|
"grad_norm": 6.4127020835876465, |
|
"learning_rate": 4.019365283107634e-06, |
|
"loss": 3.8189, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8827361563517915, |
|
"grad_norm": 4.822223663330078, |
|
"learning_rate": 4.012584368739017e-06, |
|
"loss": 3.7812, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8859934853420195, |
|
"grad_norm": 5.920554161071777, |
|
"learning_rate": 4.005785854897057e-06, |
|
"loss": 3.9864, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8892508143322475, |
|
"grad_norm": 5.076132774353027, |
|
"learning_rate": 3.998969820684954e-06, |
|
"loss": 3.8068, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8925081433224755, |
|
"grad_norm": 5.978939533233643, |
|
"learning_rate": 3.992136345409765e-06, |
|
"loss": 3.6497, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8957654723127035, |
|
"grad_norm": 5.440021514892578, |
|
"learning_rate": 3.985285508581475e-06, |
|
"loss": 3.9533, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8990228013029316, |
|
"grad_norm": 4.745472431182861, |
|
"learning_rate": 3.9784173899120836e-06, |
|
"loss": 4.1676, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.9022801302931596, |
|
"grad_norm": 5.8460259437561035, |
|
"learning_rate": 3.971532069314666e-06, |
|
"loss": 3.698, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.9055374592833876, |
|
"grad_norm": 7.4045820236206055, |
|
"learning_rate": 3.964629626902452e-06, |
|
"loss": 3.573, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.9087947882736156, |
|
"grad_norm": 6.916861534118652, |
|
"learning_rate": 3.957710142987886e-06, |
|
"loss": 3.5988, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 6.336071014404297, |
|
"learning_rate": 3.9507736980817e-06, |
|
"loss": 3.4439, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9153094462540716, |
|
"grad_norm": 6.711271286010742, |
|
"learning_rate": 3.943820372891972e-06, |
|
"loss": 3.9141, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.9185667752442996, |
|
"grad_norm": 4.0247802734375, |
|
"learning_rate": 3.936850248323189e-06, |
|
"loss": 4.0297, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.9218241042345277, |
|
"grad_norm": 7.413387775421143, |
|
"learning_rate": 3.929863405475303e-06, |
|
"loss": 3.6856, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.9250814332247557, |
|
"grad_norm": 6.697014808654785, |
|
"learning_rate": 3.92285992564279e-06, |
|
"loss": 3.6538, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.9283387622149837, |
|
"grad_norm": 6.154468059539795, |
|
"learning_rate": 3.915839890313706e-06, |
|
"loss": 3.2771, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.9315960912052117, |
|
"grad_norm": 7.574310779571533, |
|
"learning_rate": 3.908803381168732e-06, |
|
"loss": 4.1216, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.9348534201954397, |
|
"grad_norm": 3.7167022228240967, |
|
"learning_rate": 3.901750480080232e-06, |
|
"loss": 3.8945, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.9381107491856677, |
|
"grad_norm": 5.904863357543945, |
|
"learning_rate": 3.894681269111292e-06, |
|
"loss": 3.9315, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9413680781758957, |
|
"grad_norm": 6.429924011230469, |
|
"learning_rate": 3.887595830514775e-06, |
|
"loss": 4.3139, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.9446254071661238, |
|
"grad_norm": 6.790943622589111, |
|
"learning_rate": 3.880494246732352e-06, |
|
"loss": 3.5595, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9478827361563518, |
|
"grad_norm": 4.0954742431640625, |
|
"learning_rate": 3.873376600393555e-06, |
|
"loss": 4.0051, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.9511400651465798, |
|
"grad_norm": 6.410258769989014, |
|
"learning_rate": 3.866242974314805e-06, |
|
"loss": 3.5565, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.9543973941368078, |
|
"grad_norm": 6.833970546722412, |
|
"learning_rate": 3.859093451498456e-06, |
|
"loss": 4.0412, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.9576547231270358, |
|
"grad_norm": 8.363605499267578, |
|
"learning_rate": 3.851928115131826e-06, |
|
"loss": 4.552, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.9609120521172638, |
|
"grad_norm": 6.14967679977417, |
|
"learning_rate": 3.844747048586228e-06, |
|
"loss": 3.828, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.9641693811074918, |
|
"grad_norm": 7.708178520202637, |
|
"learning_rate": 3.8375503354160036e-06, |
|
"loss": 3.4753, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.9674267100977199, |
|
"grad_norm": 6.226506233215332, |
|
"learning_rate": 3.830338059357546e-06, |
|
"loss": 4.2654, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.9706840390879479, |
|
"grad_norm": 5.9939775466918945, |
|
"learning_rate": 3.823110304328331e-06, |
|
"loss": 3.7368, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9739413680781759, |
|
"grad_norm": 5.407865047454834, |
|
"learning_rate": 3.815867154425936e-06, |
|
"loss": 3.8956, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.9771986970684039, |
|
"grad_norm": 5.42250919342041, |
|
"learning_rate": 3.808608693927065e-06, |
|
"loss": 3.9053, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9804560260586319, |
|
"grad_norm": 6.157458782196045, |
|
"learning_rate": 3.801335007286564e-06, |
|
"loss": 3.7298, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.9837133550488599, |
|
"grad_norm": 4.539289951324463, |
|
"learning_rate": 3.7940461791364425e-06, |
|
"loss": 3.8612, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9869706840390879, |
|
"grad_norm": 4.508944511413574, |
|
"learning_rate": 3.7867422942848877e-06, |
|
"loss": 3.9865, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.990228013029316, |
|
"grad_norm": 6.667001247406006, |
|
"learning_rate": 3.779423437715274e-06, |
|
"loss": 4.2309, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.993485342019544, |
|
"grad_norm": 4.5301289558410645, |
|
"learning_rate": 3.772089694585181e-06, |
|
"loss": 3.9932, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.996742671009772, |
|
"grad_norm": 6.858119010925293, |
|
"learning_rate": 3.764741150225396e-06, |
|
"loss": 3.7958, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 8.143805503845215, |
|
"learning_rate": 3.757377890138927e-06, |
|
"loss": 4.2006, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.003257328990228, |
|
"grad_norm": 5.382366180419922, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 3.4269, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.006514657980456, |
|
"grad_norm": 4.062572002410889, |
|
"learning_rate": 3.742607565653073e-06, |
|
"loss": 3.8892, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.009771986970684, |
|
"grad_norm": 5.761571884155273, |
|
"learning_rate": 3.7352006731118266e-06, |
|
"loss": 3.8548, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.013029315960912, |
|
"grad_norm": 5.544031620025635, |
|
"learning_rate": 3.7277794085581697e-06, |
|
"loss": 3.5327, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.01628664495114, |
|
"grad_norm": 5.719707489013672, |
|
"learning_rate": 3.7203438583412343e-06, |
|
"loss": 3.6865, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.019543973941368, |
|
"grad_norm": 3.956254720687866, |
|
"learning_rate": 3.712894108976372e-06, |
|
"loss": 3.7916, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.022801302931596, |
|
"grad_norm": 5.764235019683838, |
|
"learning_rate": 3.7054302471441462e-06, |
|
"loss": 3.7351, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.0260586319218241, |
|
"grad_norm": 6.1902570724487305, |
|
"learning_rate": 3.697952359689324e-06, |
|
"loss": 3.6078, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.0293159609120521, |
|
"grad_norm": 6.540966987609863, |
|
"learning_rate": 3.690460533619866e-06, |
|
"loss": 3.4576, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.0325732899022801, |
|
"grad_norm": 4.03215217590332, |
|
"learning_rate": 3.6829548561059133e-06, |
|
"loss": 3.7704, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0358306188925082, |
|
"grad_norm": 4.043310642242432, |
|
"learning_rate": 3.6754354144787734e-06, |
|
"loss": 3.6919, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.0390879478827362, |
|
"grad_norm": 6.889205455780029, |
|
"learning_rate": 3.6679022962299054e-06, |
|
"loss": 3.456, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.0423452768729642, |
|
"grad_norm": 5.511551856994629, |
|
"learning_rate": 3.660355589009901e-06, |
|
"loss": 3.6732, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0456026058631922, |
|
"grad_norm": 5.021662712097168, |
|
"learning_rate": 3.652795380627462e-06, |
|
"loss": 3.8559, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.0488599348534202, |
|
"grad_norm": 7.119377613067627, |
|
"learning_rate": 3.6452217590483847e-06, |
|
"loss": 3.4773, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.0521172638436482, |
|
"grad_norm": 6.943653583526611, |
|
"learning_rate": 3.63763481239453e-06, |
|
"loss": 3.3563, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.0553745928338762, |
|
"grad_norm": 6.504844665527344, |
|
"learning_rate": 3.6300346289428025e-06, |
|
"loss": 3.1729, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.0586319218241043, |
|
"grad_norm": 7.149539947509766, |
|
"learning_rate": 3.622421297124122e-06, |
|
"loss": 3.1502, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.0618892508143323, |
|
"grad_norm": 6.777254104614258, |
|
"learning_rate": 3.6147949055223925e-06, |
|
"loss": 3.6956, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.0651465798045603, |
|
"grad_norm": 7.371313571929932, |
|
"learning_rate": 3.607155542873475e-06, |
|
"loss": 2.7985, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.0684039087947883, |
|
"grad_norm": 5.818310737609863, |
|
"learning_rate": 3.5995032980641538e-06, |
|
"loss": 3.3967, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.0716612377850163, |
|
"grad_norm": 5.086653709411621, |
|
"learning_rate": 3.5918382601311003e-06, |
|
"loss": 3.7434, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.0749185667752443, |
|
"grad_norm": 5.7530083656311035, |
|
"learning_rate": 3.5841605182598393e-06, |
|
"loss": 3.4631, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0781758957654723, |
|
"grad_norm": 4.993615627288818, |
|
"learning_rate": 3.5764701617837118e-06, |
|
"loss": 3.4732, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.0814332247557004, |
|
"grad_norm": 4.600963592529297, |
|
"learning_rate": 3.568767280182831e-06, |
|
"loss": 3.7875, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.0846905537459284, |
|
"grad_norm": 7.0127081871032715, |
|
"learning_rate": 3.561051963083048e-06, |
|
"loss": 3.7191, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.0879478827361564, |
|
"grad_norm": 5.862270832061768, |
|
"learning_rate": 3.5533243002549044e-06, |
|
"loss": 3.4695, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.0912052117263844, |
|
"grad_norm": 4.856973648071289, |
|
"learning_rate": 3.5455843816125878e-06, |
|
"loss": 3.8399, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.0944625407166124, |
|
"grad_norm": 6.645925998687744, |
|
"learning_rate": 3.5378322972128886e-06, |
|
"loss": 3.502, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.0977198697068404, |
|
"grad_norm": 4.62603759765625, |
|
"learning_rate": 3.530068137254148e-06, |
|
"loss": 3.8772, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.1009771986970684, |
|
"grad_norm": 4.81012487411499, |
|
"learning_rate": 3.5222919920752126e-06, |
|
"loss": 3.5262, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.1042345276872965, |
|
"grad_norm": 8.265521049499512, |
|
"learning_rate": 3.5145039521543806e-06, |
|
"loss": 3.8737, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.1074918566775245, |
|
"grad_norm": 3.3320538997650146, |
|
"learning_rate": 3.50670410810835e-06, |
|
"loss": 3.7063, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1107491856677525, |
|
"grad_norm": 6.44738245010376, |
|
"learning_rate": 3.498892550691164e-06, |
|
"loss": 3.8521, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.1140065146579805, |
|
"grad_norm": 5.850455284118652, |
|
"learning_rate": 3.491069370793155e-06, |
|
"loss": 3.2702, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.1172638436482085, |
|
"grad_norm": 4.26442289352417, |
|
"learning_rate": 3.4832346594398888e-06, |
|
"loss": 3.6619, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.1205211726384365, |
|
"grad_norm": 5.107183456420898, |
|
"learning_rate": 3.475388507791101e-06, |
|
"loss": 3.335, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.1237785016286646, |
|
"grad_norm": 4.226919174194336, |
|
"learning_rate": 3.4675310071396425e-06, |
|
"loss": 3.7333, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.1270358306188926, |
|
"grad_norm": 6.164919853210449, |
|
"learning_rate": 3.4596622489104113e-06, |
|
"loss": 3.8968, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.1302931596091206, |
|
"grad_norm": 5.433590888977051, |
|
"learning_rate": 3.451782324659293e-06, |
|
"loss": 3.4041, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.1335504885993486, |
|
"grad_norm": 4.933192253112793, |
|
"learning_rate": 3.443891326072093e-06, |
|
"loss": 3.5943, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.1368078175895766, |
|
"grad_norm": 5.232339859008789, |
|
"learning_rate": 3.4359893449634713e-06, |
|
"loss": 3.4548, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.1400651465798046, |
|
"grad_norm": 6.424132823944092, |
|
"learning_rate": 3.428076473275873e-06, |
|
"loss": 3.3356, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1433224755700326, |
|
"grad_norm": 5.729311943054199, |
|
"learning_rate": 3.42015280307846e-06, |
|
"loss": 3.2489, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.1465798045602607, |
|
"grad_norm": 4.083253860473633, |
|
"learning_rate": 3.4122184265660398e-06, |
|
"loss": 3.3285, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.1498371335504887, |
|
"grad_norm": 5.305028915405273, |
|
"learning_rate": 3.404273436057988e-06, |
|
"loss": 3.7341, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.1530944625407167, |
|
"grad_norm": 6.984644889831543, |
|
"learning_rate": 3.39631792399718e-06, |
|
"loss": 3.054, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.1563517915309447, |
|
"grad_norm": 5.173111438751221, |
|
"learning_rate": 3.3883519829489155e-06, |
|
"loss": 3.7036, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.1596091205211727, |
|
"grad_norm": 5.271275043487549, |
|
"learning_rate": 3.3803757055998354e-06, |
|
"loss": 3.6782, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.1628664495114007, |
|
"grad_norm": 4.47402811050415, |
|
"learning_rate": 3.37238918475685e-06, |
|
"loss": 3.4326, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.1661237785016287, |
|
"grad_norm": 5.682492733001709, |
|
"learning_rate": 3.3643925133460563e-06, |
|
"loss": 3.4877, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.1693811074918568, |
|
"grad_norm": 4.897508144378662, |
|
"learning_rate": 3.356385784411656e-06, |
|
"loss": 3.4859, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.1726384364820848, |
|
"grad_norm": 7.068917274475098, |
|
"learning_rate": 3.348369091114873e-06, |
|
"loss": 3.7516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.1758957654723128, |
|
"grad_norm": 5.376421928405762, |
|
"learning_rate": 3.3403425267328715e-06, |
|
"loss": 3.6323, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.1791530944625408, |
|
"grad_norm": 4.889153480529785, |
|
"learning_rate": 3.3323061846576692e-06, |
|
"loss": 3.5759, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.1824104234527688, |
|
"grad_norm": 4.02139139175415, |
|
"learning_rate": 3.324260158395051e-06, |
|
"loss": 3.8266, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.1856677524429968, |
|
"grad_norm": 5.562239170074463, |
|
"learning_rate": 3.3162045415634793e-06, |
|
"loss": 3.7196, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.1889250814332248, |
|
"grad_norm": 6.0636725425720215, |
|
"learning_rate": 3.308139427893008e-06, |
|
"loss": 3.4255, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.1921824104234529, |
|
"grad_norm": 4.625339031219482, |
|
"learning_rate": 3.3000649112241904e-06, |
|
"loss": 3.7757, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.1954397394136809, |
|
"grad_norm": 5.8719305992126465, |
|
"learning_rate": 3.291981085506987e-06, |
|
"loss": 3.7622, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.1986970684039089, |
|
"grad_norm": 7.395297527313232, |
|
"learning_rate": 3.2838880447996697e-06, |
|
"loss": 3.1235, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.201954397394137, |
|
"grad_norm": 4.938228607177734, |
|
"learning_rate": 3.2757858832677346e-06, |
|
"loss": 3.4661, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.205211726384365, |
|
"grad_norm": 4.547780990600586, |
|
"learning_rate": 3.2676746951827985e-06, |
|
"loss": 3.4966, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.208469055374593, |
|
"grad_norm": 5.542564868927002, |
|
"learning_rate": 3.2595545749215073e-06, |
|
"loss": 3.6376, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.211726384364821, |
|
"grad_norm": 5.063738822937012, |
|
"learning_rate": 3.251425616964436e-06, |
|
"loss": 3.7234, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.214983713355049, |
|
"grad_norm": 7.921319961547852, |
|
"learning_rate": 3.243287915894987e-06, |
|
"loss": 3.2747, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.218241042345277, |
|
"grad_norm": 4.5900044441223145, |
|
"learning_rate": 3.2351415663982956e-06, |
|
"loss": 3.8113, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.221498371335505, |
|
"grad_norm": 5.936985969543457, |
|
"learning_rate": 3.2269866632601227e-06, |
|
"loss": 3.4585, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.224755700325733, |
|
"grad_norm": 5.615612983703613, |
|
"learning_rate": 3.2188233013657545e-06, |
|
"loss": 3.4545, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.228013029315961, |
|
"grad_norm": 4.169675827026367, |
|
"learning_rate": 3.2106515756988964e-06, |
|
"loss": 3.7711, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.231270358306189, |
|
"grad_norm": 6.375253677368164, |
|
"learning_rate": 3.202471581340572e-06, |
|
"loss": 3.6459, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.234527687296417, |
|
"grad_norm": 5.786118030548096, |
|
"learning_rate": 3.1942834134680123e-06, |
|
"loss": 3.4114, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.237785016286645, |
|
"grad_norm": 5.953446865081787, |
|
"learning_rate": 3.186087167353551e-06, |
|
"loss": 3.278, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.241042345276873, |
|
"grad_norm": 7.0892462730407715, |
|
"learning_rate": 3.177882938363514e-06, |
|
"loss": 3.4727, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.244299674267101, |
|
"grad_norm": 7.629514217376709, |
|
"learning_rate": 3.1696708219571128e-06, |
|
"loss": 2.858, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.247557003257329, |
|
"grad_norm": 8.90638256072998, |
|
"learning_rate": 3.1614509136853295e-06, |
|
"loss": 3.2422, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.2508143322475571, |
|
"grad_norm": 5.899227142333984, |
|
"learning_rate": 3.1532233091898094e-06, |
|
"loss": 3.5167, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.2540716612377851, |
|
"grad_norm": 6.004745960235596, |
|
"learning_rate": 3.144988104201745e-06, |
|
"loss": 3.4772, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.2573289902280131, |
|
"grad_norm": 5.448160648345947, |
|
"learning_rate": 3.1367453945407646e-06, |
|
"loss": 3.615, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.2605863192182412, |
|
"grad_norm": 7.477326393127441, |
|
"learning_rate": 3.1284952761138137e-06, |
|
"loss": 3.2672, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.2638436482084692, |
|
"grad_norm": 6.218169212341309, |
|
"learning_rate": 3.1202378449140437e-06, |
|
"loss": 3.4072, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.2671009771986972, |
|
"grad_norm": 6.035054683685303, |
|
"learning_rate": 3.111973197019693e-06, |
|
"loss": 3.4844, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.2703583061889252, |
|
"grad_norm": 5.626272201538086, |
|
"learning_rate": 3.1037014285929672e-06, |
|
"loss": 3.7772, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2736156351791532, |
|
"grad_norm": 3.958400011062622, |
|
"learning_rate": 3.095422635878923e-06, |
|
"loss": 3.8474, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.2768729641693812, |
|
"grad_norm": 6.569092750549316, |
|
"learning_rate": 3.087136915204347e-06, |
|
"loss": 3.4613, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.2801302931596092, |
|
"grad_norm": 4.738685607910156, |
|
"learning_rate": 3.0788443629766348e-06, |
|
"loss": 3.6467, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.2833876221498373, |
|
"grad_norm": 4.849681377410889, |
|
"learning_rate": 3.0705450756826707e-06, |
|
"loss": 3.7038, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.2866449511400653, |
|
"grad_norm": 7.685214519500732, |
|
"learning_rate": 3.0622391498877012e-06, |
|
"loss": 3.344, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.2899022801302933, |
|
"grad_norm": 5.982230186462402, |
|
"learning_rate": 3.053926682234219e-06, |
|
"loss": 3.3405, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.2931596091205213, |
|
"grad_norm": 4.924343585968018, |
|
"learning_rate": 3.0456077694408292e-06, |
|
"loss": 3.6514, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.2964169381107493, |
|
"grad_norm": 5.400077819824219, |
|
"learning_rate": 3.0372825083011314e-06, |
|
"loss": 3.3848, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.2996742671009773, |
|
"grad_norm": 6.141596794128418, |
|
"learning_rate": 3.0289509956825878e-06, |
|
"loss": 3.3221, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.3029315960912053, |
|
"grad_norm": 4.01882266998291, |
|
"learning_rate": 3.020613328525402e-06, |
|
"loss": 3.5865, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3061889250814334, |
|
"grad_norm": 4.70369815826416, |
|
"learning_rate": 3.0122696038413857e-06, |
|
"loss": 3.721, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.3094462540716614, |
|
"grad_norm": 4.236996173858643, |
|
"learning_rate": 3.0039199187128322e-06, |
|
"loss": 3.6175, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.3127035830618892, |
|
"grad_norm": 5.709525108337402, |
|
"learning_rate": 2.995564370291387e-06, |
|
"loss": 3.3072, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.3159609120521172, |
|
"grad_norm": 7.52371072769165, |
|
"learning_rate": 2.987203055796919e-06, |
|
"loss": 3.5663, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.3192182410423452, |
|
"grad_norm": 5.285858631134033, |
|
"learning_rate": 2.978836072516385e-06, |
|
"loss": 3.6411, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.3224755700325732, |
|
"grad_norm": 4.379350662231445, |
|
"learning_rate": 2.9704635178027012e-06, |
|
"loss": 3.483, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.3257328990228012, |
|
"grad_norm": 4.9760918617248535, |
|
"learning_rate": 2.9620854890736095e-06, |
|
"loss": 3.8238, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.3289902280130292, |
|
"grad_norm": 4.094661235809326, |
|
"learning_rate": 2.9537020838105434e-06, |
|
"loss": 3.5433, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.3322475570032573, |
|
"grad_norm": 4.46438455581665, |
|
"learning_rate": 2.9453133995574955e-06, |
|
"loss": 3.7084, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.3355048859934853, |
|
"grad_norm": 5.638455867767334, |
|
"learning_rate": 2.93691953391988e-06, |
|
"loss": 3.6346, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.3387622149837133, |
|
"grad_norm": 6.500235557556152, |
|
"learning_rate": 2.9285205845634007e-06, |
|
"loss": 3.2883, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.3420195439739413, |
|
"grad_norm": 6.090638160705566, |
|
"learning_rate": 2.920116649212909e-06, |
|
"loss": 3.374, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.3452768729641693, |
|
"grad_norm": 4.844772815704346, |
|
"learning_rate": 2.9117078256512725e-06, |
|
"loss": 3.7777, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.3485342019543973, |
|
"grad_norm": 5.7558274269104, |
|
"learning_rate": 2.9032942117182345e-06, |
|
"loss": 3.383, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.3517915309446253, |
|
"grad_norm": 6.560629367828369, |
|
"learning_rate": 2.8948759053092756e-06, |
|
"loss": 3.6765, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.3550488599348534, |
|
"grad_norm": 4.3585100173950195, |
|
"learning_rate": 2.8864530043744754e-06, |
|
"loss": 3.4845, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.3583061889250814, |
|
"grad_norm": 5.6310248374938965, |
|
"learning_rate": 2.8780256069173724e-06, |
|
"loss": 3.4286, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.3615635179153094, |
|
"grad_norm": 4.276134014129639, |
|
"learning_rate": 2.8695938109938244e-06, |
|
"loss": 3.6676, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.3648208469055374, |
|
"grad_norm": 7.116851806640625, |
|
"learning_rate": 2.8611577147108656e-06, |
|
"loss": 3.2261, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.3680781758957654, |
|
"grad_norm": 7.104155540466309, |
|
"learning_rate": 2.8527174162255677e-06, |
|
"loss": 3.2597, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3713355048859934, |
|
"grad_norm": 6.247655868530273, |
|
"learning_rate": 2.8442730137438964e-06, |
|
"loss": 3.175, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.3745928338762214, |
|
"grad_norm": 6.24178409576416, |
|
"learning_rate": 2.8358246055195677e-06, |
|
"loss": 3.4389, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.3778501628664495, |
|
"grad_norm": 7.848822593688965, |
|
"learning_rate": 2.8273722898529075e-06, |
|
"loss": 4.1385, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.3811074918566775, |
|
"grad_norm": 7.670816898345947, |
|
"learning_rate": 2.8189161650897045e-06, |
|
"loss": 3.0239, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.3843648208469055, |
|
"grad_norm": 7.171327590942383, |
|
"learning_rate": 2.8104563296200704e-06, |
|
"loss": 3.286, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.3876221498371335, |
|
"grad_norm": 6.696552276611328, |
|
"learning_rate": 2.8019928818772897e-06, |
|
"loss": 3.1105, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.3908794788273615, |
|
"grad_norm": 4.991602897644043, |
|
"learning_rate": 2.793525920336678e-06, |
|
"loss": 3.4433, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.3941368078175895, |
|
"grad_norm": 5.104501724243164, |
|
"learning_rate": 2.785055543514434e-06, |
|
"loss": 3.3963, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.3973941368078175, |
|
"grad_norm": 7.394959449768066, |
|
"learning_rate": 2.776581849966497e-06, |
|
"loss": 3.1235, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.4006514657980456, |
|
"grad_norm": 5.726476669311523, |
|
"learning_rate": 2.7681049382873963e-06, |
|
"loss": 3.5493, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4039087947882736, |
|
"grad_norm": 5.474902153015137, |
|
"learning_rate": 2.7596249071091042e-06, |
|
"loss": 3.4242, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.4071661237785016, |
|
"grad_norm": 4.774505615234375, |
|
"learning_rate": 2.7511418550998907e-06, |
|
"loss": 3.387, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.4104234527687296, |
|
"grad_norm": 7.272900581359863, |
|
"learning_rate": 2.7426558809631748e-06, |
|
"loss": 3.715, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.4136807817589576, |
|
"grad_norm": 5.06602144241333, |
|
"learning_rate": 2.734167083436375e-06, |
|
"loss": 3.3789, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.4169381107491856, |
|
"grad_norm": 6.0230278968811035, |
|
"learning_rate": 2.72567556128976e-06, |
|
"loss": 3.8185, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.4201954397394136, |
|
"grad_norm": 7.184913635253906, |
|
"learning_rate": 2.7171814133253015e-06, |
|
"loss": 3.34, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.4234527687296417, |
|
"grad_norm": 5.576634883880615, |
|
"learning_rate": 2.708684738375524e-06, |
|
"loss": 3.4975, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.4267100977198697, |
|
"grad_norm": 5.067847728729248, |
|
"learning_rate": 2.7001856353023527e-06, |
|
"loss": 3.8647, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.4299674267100977, |
|
"grad_norm": 5.121155738830566, |
|
"learning_rate": 2.691684202995966e-06, |
|
"loss": 3.2674, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.4332247557003257, |
|
"grad_norm": 4.543545246124268, |
|
"learning_rate": 2.683180540373645e-06, |
|
"loss": 3.7039, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4364820846905537, |
|
"grad_norm": 6.284096717834473, |
|
"learning_rate": 2.6746747463786187e-06, |
|
"loss": 3.1269, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.4397394136807817, |
|
"grad_norm": 3.755789041519165, |
|
"learning_rate": 2.6661669199789176e-06, |
|
"loss": 3.6417, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.4429967426710097, |
|
"grad_norm": 4.775397300720215, |
|
"learning_rate": 2.657657160166219e-06, |
|
"loss": 3.3866, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.4462540716612378, |
|
"grad_norm": 8.88444995880127, |
|
"learning_rate": 2.6491455659546957e-06, |
|
"loss": 3.0159, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.4495114006514658, |
|
"grad_norm": 4.328442573547363, |
|
"learning_rate": 2.6406322363798657e-06, |
|
"loss": 3.5742, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.4527687296416938, |
|
"grad_norm": 8.353851318359375, |
|
"learning_rate": 2.6321172704974374e-06, |
|
"loss": 3.475, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.4560260586319218, |
|
"grad_norm": 4.968841552734375, |
|
"learning_rate": 2.6236007673821585e-06, |
|
"loss": 3.4786, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.4592833876221498, |
|
"grad_norm": 6.597938537597656, |
|
"learning_rate": 2.6150828261266644e-06, |
|
"loss": 3.4609, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.4625407166123778, |
|
"grad_norm": 6.208395957946777, |
|
"learning_rate": 2.6065635458403214e-06, |
|
"loss": 3.2623, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.4657980456026058, |
|
"grad_norm": 4.916355133056641, |
|
"learning_rate": 2.598043025648078e-06, |
|
"loss": 3.5028, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4690553745928339, |
|
"grad_norm": 4.385219097137451, |
|
"learning_rate": 2.589521364689308e-06, |
|
"loss": 3.5348, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.4723127035830619, |
|
"grad_norm": 4.333066940307617, |
|
"learning_rate": 2.5809986621166593e-06, |
|
"loss": 3.6123, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.47557003257329, |
|
"grad_norm": 7.059334754943848, |
|
"learning_rate": 2.572475017094899e-06, |
|
"loss": 3.6275, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.478827361563518, |
|
"grad_norm": 4.183060646057129, |
|
"learning_rate": 2.5639505287997584e-06, |
|
"loss": 3.6582, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.482084690553746, |
|
"grad_norm": 4.17788028717041, |
|
"learning_rate": 2.555425296416785e-06, |
|
"loss": 3.6591, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.485342019543974, |
|
"grad_norm": 5.921770095825195, |
|
"learning_rate": 2.5468994191401795e-06, |
|
"loss": 3.6156, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.488599348534202, |
|
"grad_norm": 7.887184143066406, |
|
"learning_rate": 2.5383729961716487e-06, |
|
"loss": 3.0889, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.49185667752443, |
|
"grad_norm": 4.037430763244629, |
|
"learning_rate": 2.5298461267192476e-06, |
|
"loss": 3.7687, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.495114006514658, |
|
"grad_norm": 5.913503646850586, |
|
"learning_rate": 2.521318909996226e-06, |
|
"loss": 3.3295, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.498371335504886, |
|
"grad_norm": 6.712876319885254, |
|
"learning_rate": 2.512791445219876e-06, |
|
"loss": 3.6517, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.501628664495114, |
|
"grad_norm": 4.9107770919799805, |
|
"learning_rate": 2.5042638316103733e-06, |
|
"loss": 3.6666, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.504885993485342, |
|
"grad_norm": 6.594664573669434, |
|
"learning_rate": 2.495736168389627e-06, |
|
"loss": 3.4433, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.50814332247557, |
|
"grad_norm": 5.935478687286377, |
|
"learning_rate": 2.487208554780125e-06, |
|
"loss": 3.7178, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.511400651465798, |
|
"grad_norm": 3.9866998195648193, |
|
"learning_rate": 2.4786810900037747e-06, |
|
"loss": 3.58, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.514657980456026, |
|
"grad_norm": 7.126434803009033, |
|
"learning_rate": 2.4701538732807532e-06, |
|
"loss": 3.2685, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.517915309446254, |
|
"grad_norm": 5.334031581878662, |
|
"learning_rate": 2.4616270038283517e-06, |
|
"loss": 3.3296, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.521172638436482, |
|
"grad_norm": 5.371250629425049, |
|
"learning_rate": 2.453100580859821e-06, |
|
"loss": 3.4741, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.52442996742671, |
|
"grad_norm": 5.606956481933594, |
|
"learning_rate": 2.4445747035832157e-06, |
|
"loss": 3.404, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.5276872964169381, |
|
"grad_norm": 5.078698635101318, |
|
"learning_rate": 2.436049471200242e-06, |
|
"loss": 3.587, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.5309446254071661, |
|
"grad_norm": 4.911012649536133, |
|
"learning_rate": 2.427524982905102e-06, |
|
"loss": 3.877, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5342019543973942, |
|
"grad_norm": 4.630041122436523, |
|
"learning_rate": 2.4190013378833416e-06, |
|
"loss": 3.8273, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.5374592833876222, |
|
"grad_norm": 7.098541259765625, |
|
"learning_rate": 2.4104786353106927e-06, |
|
"loss": 3.8225, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.5407166123778502, |
|
"grad_norm": 5.203646183013916, |
|
"learning_rate": 2.4019569743519223e-06, |
|
"loss": 3.4602, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.5439739413680782, |
|
"grad_norm": 8.603754997253418, |
|
"learning_rate": 2.393436454159679e-06, |
|
"loss": 2.8917, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.5472312703583062, |
|
"grad_norm": 5.113753318786621, |
|
"learning_rate": 2.384917173873336e-06, |
|
"loss": 3.4855, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.5504885993485342, |
|
"grad_norm": 5.767629146575928, |
|
"learning_rate": 2.376399232617842e-06, |
|
"loss": 3.3516, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.5537459283387622, |
|
"grad_norm": 6.133004188537598, |
|
"learning_rate": 2.3678827295025634e-06, |
|
"loss": 3.726, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.5570032573289903, |
|
"grad_norm": 6.120333194732666, |
|
"learning_rate": 2.359367763620135e-06, |
|
"loss": 3.2949, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.5602605863192183, |
|
"grad_norm": 7.7329792976379395, |
|
"learning_rate": 2.3508544340453047e-06, |
|
"loss": 3.6583, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.5635179153094463, |
|
"grad_norm": 6.699610710144043, |
|
"learning_rate": 2.342342839833782e-06, |
|
"loss": 3.4231, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5667752442996743, |
|
"grad_norm": 10.26463508605957, |
|
"learning_rate": 2.333833080021083e-06, |
|
"loss": 2.65, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.5700325732899023, |
|
"grad_norm": 6.48724365234375, |
|
"learning_rate": 2.3253252536213817e-06, |
|
"loss": 3.348, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.5732899022801303, |
|
"grad_norm": 4.94360876083374, |
|
"learning_rate": 2.316819459626356e-06, |
|
"loss": 3.6939, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.5765472312703583, |
|
"grad_norm": 6.517416954040527, |
|
"learning_rate": 2.3083157970040344e-06, |
|
"loss": 3.9084, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.5798045602605864, |
|
"grad_norm": 6.802534103393555, |
|
"learning_rate": 2.2998143646976477e-06, |
|
"loss": 3.08, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.5830618892508144, |
|
"grad_norm": 4.083236217498779, |
|
"learning_rate": 2.291315261624477e-06, |
|
"loss": 3.8346, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.5863192182410424, |
|
"grad_norm": 6.189084053039551, |
|
"learning_rate": 2.2828185866746993e-06, |
|
"loss": 3.7014, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.5895765472312704, |
|
"grad_norm": 6.799473285675049, |
|
"learning_rate": 2.2743244387102404e-06, |
|
"loss": 3.4053, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.5928338762214984, |
|
"grad_norm": 6.345086097717285, |
|
"learning_rate": 2.265832916563626e-06, |
|
"loss": 3.4448, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.5960912052117264, |
|
"grad_norm": 5.72438383102417, |
|
"learning_rate": 2.2573441190368256e-06, |
|
"loss": 3.5056, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.5993485342019544, |
|
"grad_norm": 4.741360187530518, |
|
"learning_rate": 2.2488581449001097e-06, |
|
"loss": 3.7963, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.6026058631921825, |
|
"grad_norm": 5.115567207336426, |
|
"learning_rate": 2.240375092890896e-06, |
|
"loss": 3.4823, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.6058631921824105, |
|
"grad_norm": 5.392317771911621, |
|
"learning_rate": 2.2318950617126045e-06, |
|
"loss": 3.9584, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.6091205211726385, |
|
"grad_norm": 4.737048149108887, |
|
"learning_rate": 2.2234181500335033e-06, |
|
"loss": 3.5862, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.6123778501628665, |
|
"grad_norm": 6.983661651611328, |
|
"learning_rate": 2.2149444564855664e-06, |
|
"loss": 3.1922, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.6156351791530945, |
|
"grad_norm": 4.231821060180664, |
|
"learning_rate": 2.2064740796633234e-06, |
|
"loss": 3.5761, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.6188925081433225, |
|
"grad_norm": 6.307882308959961, |
|
"learning_rate": 2.198007118122711e-06, |
|
"loss": 3.1334, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.6221498371335505, |
|
"grad_norm": 7.474777698516846, |
|
"learning_rate": 2.1895436703799305e-06, |
|
"loss": 3.2486, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.6254071661237783, |
|
"grad_norm": 5.582712173461914, |
|
"learning_rate": 2.1810838349102963e-06, |
|
"loss": 3.4783, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.6286644951140063, |
|
"grad_norm": 6.286925315856934, |
|
"learning_rate": 2.1726277101470933e-06, |
|
"loss": 3.9185, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6319218241042344, |
|
"grad_norm": 4.55825138092041, |
|
"learning_rate": 2.164175394480433e-06, |
|
"loss": 3.6785, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.6351791530944624, |
|
"grad_norm": 7.3772430419921875, |
|
"learning_rate": 2.1557269862561045e-06, |
|
"loss": 3.1871, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.6384364820846904, |
|
"grad_norm": 7.50354528427124, |
|
"learning_rate": 2.147282583774433e-06, |
|
"loss": 3.1048, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.6416938110749184, |
|
"grad_norm": 4.185749530792236, |
|
"learning_rate": 2.138842285289135e-06, |
|
"loss": 3.6414, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.6449511400651464, |
|
"grad_norm": 6.309109687805176, |
|
"learning_rate": 2.1304061890061764e-06, |
|
"loss": 3.3061, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.6482084690553744, |
|
"grad_norm": 6.976665019989014, |
|
"learning_rate": 2.1219743930826284e-06, |
|
"loss": 3.415, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.6514657980456025, |
|
"grad_norm": 7.75382661819458, |
|
"learning_rate": 2.1135469956255254e-06, |
|
"loss": 3.1916, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.6547231270358305, |
|
"grad_norm": 4.2923736572265625, |
|
"learning_rate": 2.1051240946907252e-06, |
|
"loss": 3.7138, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.6579804560260585, |
|
"grad_norm": 5.76967716217041, |
|
"learning_rate": 2.0967057882817664e-06, |
|
"loss": 3.7076, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.6612377850162865, |
|
"grad_norm": 5.392721176147461, |
|
"learning_rate": 2.0882921743487283e-06, |
|
"loss": 3.4379, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.6644951140065145, |
|
"grad_norm": 5.194660663604736, |
|
"learning_rate": 2.079883350787092e-06, |
|
"loss": 3.8899, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.6677524429967425, |
|
"grad_norm": 6.226246356964111, |
|
"learning_rate": 2.0714794154366e-06, |
|
"loss": 3.6434, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.6710097719869705, |
|
"grad_norm": 6.618658542633057, |
|
"learning_rate": 2.0630804660801203e-06, |
|
"loss": 3.1193, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.6742671009771986, |
|
"grad_norm": 6.338504314422607, |
|
"learning_rate": 2.0546866004425053e-06, |
|
"loss": 3.2267, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.6775244299674266, |
|
"grad_norm": 6.449606418609619, |
|
"learning_rate": 2.046297916189457e-06, |
|
"loss": 3.8425, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.6807817589576546, |
|
"grad_norm": 5.436049938201904, |
|
"learning_rate": 2.0379145109263914e-06, |
|
"loss": 3.7856, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.6840390879478826, |
|
"grad_norm": 4.790528297424316, |
|
"learning_rate": 2.0295364821972996e-06, |
|
"loss": 3.3842, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.6872964169381106, |
|
"grad_norm": 4.875779151916504, |
|
"learning_rate": 2.0211639274836155e-06, |
|
"loss": 3.9334, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.6905537459283386, |
|
"grad_norm": 8.919984817504883, |
|
"learning_rate": 2.0127969442030816e-06, |
|
"loss": 3.0129, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.6938110749185666, |
|
"grad_norm": 4.52994966506958, |
|
"learning_rate": 2.0044356297086136e-06, |
|
"loss": 3.7951, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.6970684039087947, |
|
"grad_norm": 5.16178035736084, |
|
"learning_rate": 1.996080081287169e-06, |
|
"loss": 3.7712, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.7003257328990227, |
|
"grad_norm": 5.213560104370117, |
|
"learning_rate": 1.987730396158615e-06, |
|
"loss": 3.6248, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.7035830618892507, |
|
"grad_norm": 5.562010765075684, |
|
"learning_rate": 1.979386671474598e-06, |
|
"loss": 3.8039, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.7068403908794787, |
|
"grad_norm": 5.998531341552734, |
|
"learning_rate": 1.9710490043174118e-06, |
|
"loss": 3.6943, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.7100977198697067, |
|
"grad_norm": 6.784372329711914, |
|
"learning_rate": 1.962717491698869e-06, |
|
"loss": 3.2823, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.7133550488599347, |
|
"grad_norm": 5.637960433959961, |
|
"learning_rate": 1.9543922305591708e-06, |
|
"loss": 3.4504, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.7166123778501627, |
|
"grad_norm": 8.034090995788574, |
|
"learning_rate": 1.9460733177657813e-06, |
|
"loss": 3.0897, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.7198697068403908, |
|
"grad_norm": 5.726444244384766, |
|
"learning_rate": 1.937760850112299e-06, |
|
"loss": 3.5505, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.7231270358306188, |
|
"grad_norm": 6.473389148712158, |
|
"learning_rate": 1.9294549243173306e-06, |
|
"loss": 3.2587, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.7263843648208468, |
|
"grad_norm": 4.210171222686768, |
|
"learning_rate": 1.9211556370233652e-06, |
|
"loss": 3.661, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7296416938110748, |
|
"grad_norm": 4.9132280349731445, |
|
"learning_rate": 1.9128630847956535e-06, |
|
"loss": 3.6479, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.7328990228013028, |
|
"grad_norm": 6.2500176429748535, |
|
"learning_rate": 1.9045773641210772e-06, |
|
"loss": 3.1326, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.7361563517915308, |
|
"grad_norm": 4.56230354309082, |
|
"learning_rate": 1.8962985714070327e-06, |
|
"loss": 3.7075, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.7394136807817588, |
|
"grad_norm": 5.635771751403809, |
|
"learning_rate": 1.8880268029803072e-06, |
|
"loss": 3.8035, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.7426710097719869, |
|
"grad_norm": 6.24404239654541, |
|
"learning_rate": 1.8797621550859563e-06, |
|
"loss": 3.1673, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.7459283387622149, |
|
"grad_norm": 5.211395740509033, |
|
"learning_rate": 1.871504723886187e-06, |
|
"loss": 3.6729, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.7491856677524429, |
|
"grad_norm": 6.440443515777588, |
|
"learning_rate": 1.8632546054592365e-06, |
|
"loss": 3.3578, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.752442996742671, |
|
"grad_norm": 5.6076436042785645, |
|
"learning_rate": 1.855011895798255e-06, |
|
"loss": 3.2926, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.755700325732899, |
|
"grad_norm": 4.676731586456299, |
|
"learning_rate": 1.8467766908101908e-06, |
|
"loss": 3.6138, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.758957654723127, |
|
"grad_norm": 4.973746299743652, |
|
"learning_rate": 1.8385490863146707e-06, |
|
"loss": 3.7079, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.762214983713355, |
|
"grad_norm": 6.0938897132873535, |
|
"learning_rate": 1.8303291780428879e-06, |
|
"loss": 3.5518, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.765472312703583, |
|
"grad_norm": 4.93472957611084, |
|
"learning_rate": 1.8221170616364864e-06, |
|
"loss": 3.4361, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.768729641693811, |
|
"grad_norm": 5.908854007720947, |
|
"learning_rate": 1.8139128326464495e-06, |
|
"loss": 3.7288, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.771986970684039, |
|
"grad_norm": 6.220162868499756, |
|
"learning_rate": 1.805716586531988e-06, |
|
"loss": 2.9277, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.775244299674267, |
|
"grad_norm": 4.5047287940979, |
|
"learning_rate": 1.7975284186594286e-06, |
|
"loss": 3.7543, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.778501628664495, |
|
"grad_norm": 6.460538864135742, |
|
"learning_rate": 1.789348424301104e-06, |
|
"loss": 3.4487, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.781758957654723, |
|
"grad_norm": 5.632236480712891, |
|
"learning_rate": 1.781176698634246e-06, |
|
"loss": 3.667, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.785016286644951, |
|
"grad_norm": 6.939136028289795, |
|
"learning_rate": 1.7730133367398775e-06, |
|
"loss": 3.1925, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.788273615635179, |
|
"grad_norm": 6.888382434844971, |
|
"learning_rate": 1.7648584336017044e-06, |
|
"loss": 3.2651, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.791530944625407, |
|
"grad_norm": 5.201353549957275, |
|
"learning_rate": 1.7567120841050133e-06, |
|
"loss": 3.905, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.794788273615635, |
|
"grad_norm": 6.2696309089660645, |
|
"learning_rate": 1.748574383035565e-06, |
|
"loss": 3.2628, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.798045602605863, |
|
"grad_norm": 5.081679821014404, |
|
"learning_rate": 1.740445425078493e-06, |
|
"loss": 3.4254, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.8013029315960911, |
|
"grad_norm": 5.418735980987549, |
|
"learning_rate": 1.7323253048172015e-06, |
|
"loss": 3.6783, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.8045602605863191, |
|
"grad_norm": 6.63270378112793, |
|
"learning_rate": 1.7242141167322656e-06, |
|
"loss": 3.6155, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.8078175895765471, |
|
"grad_norm": 5.2097625732421875, |
|
"learning_rate": 1.7161119552003303e-06, |
|
"loss": 3.4332, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.8110749185667752, |
|
"grad_norm": 7.32343053817749, |
|
"learning_rate": 1.7080189144930136e-06, |
|
"loss": 3.512, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.8143322475570032, |
|
"grad_norm": 6.2383012771606445, |
|
"learning_rate": 1.6999350887758098e-06, |
|
"loss": 3.2288, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.8175895765472312, |
|
"grad_norm": 6.8209733963012695, |
|
"learning_rate": 1.6918605721069925e-06, |
|
"loss": 3.2334, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.8208469055374592, |
|
"grad_norm": 4.392072677612305, |
|
"learning_rate": 1.6837954584365217e-06, |
|
"loss": 3.6259, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.8241042345276872, |
|
"grad_norm": 6.251183986663818, |
|
"learning_rate": 1.6757398416049502e-06, |
|
"loss": 3.3141, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.8273615635179152, |
|
"grad_norm": 7.095953941345215, |
|
"learning_rate": 1.6676938153423312e-06, |
|
"loss": 3.4431, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.8306188925081432, |
|
"grad_norm": 7.436147212982178, |
|
"learning_rate": 1.659657473267129e-06, |
|
"loss": 3.1196, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.8338762214983713, |
|
"grad_norm": 4.6533989906311035, |
|
"learning_rate": 1.6516309088851273e-06, |
|
"loss": 3.4344, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.8371335504885993, |
|
"grad_norm": 8.499824523925781, |
|
"learning_rate": 1.6436142155883442e-06, |
|
"loss": 3.129, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.8403908794788273, |
|
"grad_norm": 6.272755146026611, |
|
"learning_rate": 1.6356074866539434e-06, |
|
"loss": 3.5271, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.8436482084690553, |
|
"grad_norm": 5.055791854858398, |
|
"learning_rate": 1.6276108152431497e-06, |
|
"loss": 3.4117, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.8469055374592833, |
|
"grad_norm": 5.689114570617676, |
|
"learning_rate": 1.619624294400165e-06, |
|
"loss": 3.5649, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.8501628664495113, |
|
"grad_norm": 6.562279224395752, |
|
"learning_rate": 1.6116480170510852e-06, |
|
"loss": 3.3717, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.8534201954397393, |
|
"grad_norm": 4.955641269683838, |
|
"learning_rate": 1.6036820760028202e-06, |
|
"loss": 3.4331, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.8566775244299674, |
|
"grad_norm": 6.789297103881836, |
|
"learning_rate": 1.5957265639420128e-06, |
|
"loss": 3.8255, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.8599348534201954, |
|
"grad_norm": 4.328821659088135, |
|
"learning_rate": 1.5877815734339608e-06, |
|
"loss": 3.5312, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.8631921824104234, |
|
"grad_norm": 8.573647499084473, |
|
"learning_rate": 1.5798471969215394e-06, |
|
"loss": 3.3997, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.8664495114006514, |
|
"grad_norm": 5.302987098693848, |
|
"learning_rate": 1.5719235267241273e-06, |
|
"loss": 3.4652, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.8697068403908794, |
|
"grad_norm": 6.52237606048584, |
|
"learning_rate": 1.5640106550365298e-06, |
|
"loss": 3.5065, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.8729641693811074, |
|
"grad_norm": 6.677807807922363, |
|
"learning_rate": 1.556108673927908e-06, |
|
"loss": 2.987, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.8762214983713354, |
|
"grad_norm": 5.502460956573486, |
|
"learning_rate": 1.5482176753407075e-06, |
|
"loss": 3.6778, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.8794788273615635, |
|
"grad_norm": 5.219401836395264, |
|
"learning_rate": 1.54033775108959e-06, |
|
"loss": 3.5377, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.8827361563517915, |
|
"grad_norm": 6.289682865142822, |
|
"learning_rate": 1.5324689928603586e-06, |
|
"loss": 3.4786, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.8859934853420195, |
|
"grad_norm": 5.785194396972656, |
|
"learning_rate": 1.5246114922089e-06, |
|
"loss": 3.5605, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.8892508143322475, |
|
"grad_norm": 5.868037700653076, |
|
"learning_rate": 1.5167653405601125e-06, |
|
"loss": 3.5138, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.8925081433224755, |
|
"grad_norm": 5.104717254638672, |
|
"learning_rate": 1.5089306292068456e-06, |
|
"loss": 3.6772, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.8957654723127035, |
|
"grad_norm": 6.35374641418457, |
|
"learning_rate": 1.5011074493088372e-06, |
|
"loss": 3.8539, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.8990228013029316, |
|
"grad_norm": 7.706153392791748, |
|
"learning_rate": 1.4932958918916512e-06, |
|
"loss": 3.5753, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.9022801302931596, |
|
"grad_norm": 6.74048376083374, |
|
"learning_rate": 1.4854960478456207e-06, |
|
"loss": 3.5602, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.9055374592833876, |
|
"grad_norm": 6.806337833404541, |
|
"learning_rate": 1.4777080079247884e-06, |
|
"loss": 3.5366, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.9087947882736156, |
|
"grad_norm": 6.2833428382873535, |
|
"learning_rate": 1.469931862745853e-06, |
|
"loss": 3.1881, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.9120521172638436, |
|
"grad_norm": 8.585888862609863, |
|
"learning_rate": 1.4621677027871129e-06, |
|
"loss": 3.4442, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.9153094462540716, |
|
"grad_norm": 4.6327433586120605, |
|
"learning_rate": 1.4544156183874129e-06, |
|
"loss": 3.7583, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.9185667752442996, |
|
"grad_norm": 5.917350769042969, |
|
"learning_rate": 1.446675699745097e-06, |
|
"loss": 3.7279, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.9218241042345277, |
|
"grad_norm": 5.715301990509033, |
|
"learning_rate": 1.4389480369169528e-06, |
|
"loss": 3.2236, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.9250814332247557, |
|
"grad_norm": 6.128412246704102, |
|
"learning_rate": 1.4312327198171705e-06, |
|
"loss": 3.5584, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.9283387622149837, |
|
"grad_norm": 4.744357109069824, |
|
"learning_rate": 1.42352983821629e-06, |
|
"loss": 3.6764, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.9315960912052117, |
|
"grad_norm": 6.520962715148926, |
|
"learning_rate": 1.4158394817401611e-06, |
|
"loss": 3.179, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.9348534201954397, |
|
"grad_norm": 6.759045124053955, |
|
"learning_rate": 1.408161739868901e-06, |
|
"loss": 3.6262, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.9381107491856677, |
|
"grad_norm": 5.7115936279296875, |
|
"learning_rate": 1.400496701935847e-06, |
|
"loss": 3.6611, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.9413680781758957, |
|
"grad_norm": 7.379328727722168, |
|
"learning_rate": 1.3928444571265262e-06, |
|
"loss": 3.1594, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.9446254071661238, |
|
"grad_norm": 5.522282600402832, |
|
"learning_rate": 1.3852050944776088e-06, |
|
"loss": 3.6881, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.9478827361563518, |
|
"grad_norm": 6.506474494934082, |
|
"learning_rate": 1.37757870287588e-06, |
|
"loss": 3.5295, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.9511400651465798, |
|
"grad_norm": 5.814865589141846, |
|
"learning_rate": 1.3699653710571987e-06, |
|
"loss": 3.5468, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.9543973941368078, |
|
"grad_norm": 4.4180908203125, |
|
"learning_rate": 1.362365187605471e-06, |
|
"loss": 3.7826, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9576547231270358, |
|
"grad_norm": 6.508097171783447, |
|
"learning_rate": 1.354778240951617e-06, |
|
"loss": 3.8519, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.9609120521172638, |
|
"grad_norm": 5.066211700439453, |
|
"learning_rate": 1.3472046193725386e-06, |
|
"loss": 3.6248, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.9641693811074918, |
|
"grad_norm": 5.988237380981445, |
|
"learning_rate": 1.3396444109901008e-06, |
|
"loss": 3.5285, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.9674267100977199, |
|
"grad_norm": 5.127301216125488, |
|
"learning_rate": 1.3320977037700952e-06, |
|
"loss": 3.7062, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.9706840390879479, |
|
"grad_norm": 5.3811798095703125, |
|
"learning_rate": 1.324564585521228e-06, |
|
"loss": 3.4126, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.9739413680781759, |
|
"grad_norm": 7.705234527587891, |
|
"learning_rate": 1.3170451438940882e-06, |
|
"loss": 3.2128, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.977198697068404, |
|
"grad_norm": 4.88129997253418, |
|
"learning_rate": 1.3095394663801348e-06, |
|
"loss": 3.7014, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.980456026058632, |
|
"grad_norm": 7.479042053222656, |
|
"learning_rate": 1.302047640310677e-06, |
|
"loss": 3.2829, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.98371335504886, |
|
"grad_norm": 4.332832336425781, |
|
"learning_rate": 1.2945697528558542e-06, |
|
"loss": 3.6514, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.986970684039088, |
|
"grad_norm": 4.368464946746826, |
|
"learning_rate": 1.2871058910236293e-06, |
|
"loss": 3.7944, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.990228013029316, |
|
"grad_norm": 6.625438213348389, |
|
"learning_rate": 1.2796561416587666e-06, |
|
"loss": 3.5045, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.993485342019544, |
|
"grad_norm": 4.590055465698242, |
|
"learning_rate": 1.2722205914418318e-06, |
|
"loss": 3.29, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.996742671009772, |
|
"grad_norm": 5.7786335945129395, |
|
"learning_rate": 1.2647993268881744e-06, |
|
"loss": 3.3603, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.448220252990723, |
|
"learning_rate": 1.2573924343469274e-06, |
|
"loss": 3.4126, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.003257328990228, |
|
"grad_norm": 4.431787490844727, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 3.6297, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.006514657980456, |
|
"grad_norm": 7.178677558898926, |
|
"learning_rate": 1.242622109861074e-06, |
|
"loss": 3.1546, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.009771986970684, |
|
"grad_norm": 6.207696914672852, |
|
"learning_rate": 1.2352588497746046e-06, |
|
"loss": 3.1396, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.013029315960912, |
|
"grad_norm": 5.861324787139893, |
|
"learning_rate": 1.2279103054148197e-06, |
|
"loss": 3.0488, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.01628664495114, |
|
"grad_norm": 5.195066928863525, |
|
"learning_rate": 1.2205765622847273e-06, |
|
"loss": 3.2397, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.019543973941368, |
|
"grad_norm": 4.896651744842529, |
|
"learning_rate": 1.2132577057151138e-06, |
|
"loss": 3.5348, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.022801302931596, |
|
"grad_norm": 6.000244617462158, |
|
"learning_rate": 1.2059538208635587e-06, |
|
"loss": 3.1314, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.026058631921824, |
|
"grad_norm": 5.479099273681641, |
|
"learning_rate": 1.1986649927134371e-06, |
|
"loss": 3.6143, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.029315960912052, |
|
"grad_norm": 6.785484313964844, |
|
"learning_rate": 1.1913913060729356e-06, |
|
"loss": 2.8881, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.03257328990228, |
|
"grad_norm": 5.43539571762085, |
|
"learning_rate": 1.1841328455740644e-06, |
|
"loss": 3.297, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.035830618892508, |
|
"grad_norm": 6.371501445770264, |
|
"learning_rate": 1.1768896956716693e-06, |
|
"loss": 3.6874, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.039087947882736, |
|
"grad_norm": 5.3042707443237305, |
|
"learning_rate": 1.169661940642455e-06, |
|
"loss": 3.2485, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.042345276872964, |
|
"grad_norm": 5.197566032409668, |
|
"learning_rate": 1.1624496645839975e-06, |
|
"loss": 3.2668, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.045602605863192, |
|
"grad_norm": 7.447043418884277, |
|
"learning_rate": 1.1552529514137734e-06, |
|
"loss": 3.3702, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.04885993485342, |
|
"grad_norm": 4.471124172210693, |
|
"learning_rate": 1.1480718848681752e-06, |
|
"loss": 3.47, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.0521172638436482, |
|
"grad_norm": 6.143259048461914, |
|
"learning_rate": 1.1409065485015445e-06, |
|
"loss": 3.1034, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.0553745928338762, |
|
"grad_norm": 6.116540908813477, |
|
"learning_rate": 1.1337570256851962e-06, |
|
"loss": 3.4506, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.0586319218241043, |
|
"grad_norm": 6.157170295715332, |
|
"learning_rate": 1.1266233996064457e-06, |
|
"loss": 3.4515, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.0618892508143323, |
|
"grad_norm": 5.997113227844238, |
|
"learning_rate": 1.1195057532676487e-06, |
|
"loss": 3.0388, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.0651465798045603, |
|
"grad_norm": 5.394718647003174, |
|
"learning_rate": 1.112404169485226e-06, |
|
"loss": 3.0905, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0684039087947883, |
|
"grad_norm": 8.713213920593262, |
|
"learning_rate": 1.1053187308887087e-06, |
|
"loss": 2.8543, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.0716612377850163, |
|
"grad_norm": 5.968896389007568, |
|
"learning_rate": 1.098249519919769e-06, |
|
"loss": 3.3605, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.0749185667752443, |
|
"grad_norm": 7.154015064239502, |
|
"learning_rate": 1.091196618831268e-06, |
|
"loss": 2.6968, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.0781758957654723, |
|
"grad_norm": 6.021311283111572, |
|
"learning_rate": 1.084160109686295e-06, |
|
"loss": 3.3583, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.0814332247557004, |
|
"grad_norm": 5.368682861328125, |
|
"learning_rate": 1.07714007435721e-06, |
|
"loss": 3.7121, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.0846905537459284, |
|
"grad_norm": 5.130136966705322, |
|
"learning_rate": 1.070136594524698e-06, |
|
"loss": 3.2675, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.0879478827361564, |
|
"grad_norm": 6.868380069732666, |
|
"learning_rate": 1.0631497516768113e-06, |
|
"loss": 3.0014, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.0912052117263844, |
|
"grad_norm": 7.281705379486084, |
|
"learning_rate": 1.0561796271080283e-06, |
|
"loss": 3.2664, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0944625407166124, |
|
"grad_norm": 4.6429443359375, |
|
"learning_rate": 1.0492263019183002e-06, |
|
"loss": 3.177, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.0977198697068404, |
|
"grad_norm": 7.4768571853637695, |
|
"learning_rate": 1.042289857012115e-06, |
|
"loss": 2.9303, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.1009771986970684, |
|
"grad_norm": 5.8586745262146, |
|
"learning_rate": 1.0353703730975493e-06, |
|
"loss": 3.388, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.1042345276872965, |
|
"grad_norm": 4.739225387573242, |
|
"learning_rate": 1.0284679306853343e-06, |
|
"loss": 3.1248, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.1074918566775245, |
|
"grad_norm": 7.75331449508667, |
|
"learning_rate": 1.0215826100879175e-06, |
|
"loss": 2.3332, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.1107491856677525, |
|
"grad_norm": 4.79758882522583, |
|
"learning_rate": 1.0147144914185253e-06, |
|
"loss": 3.2238, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.1140065146579805, |
|
"grad_norm": 4.328355312347412, |
|
"learning_rate": 1.0078636545902363e-06, |
|
"loss": 3.3991, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.1172638436482085, |
|
"grad_norm": 4.247138023376465, |
|
"learning_rate": 1.0010301793150456e-06, |
|
"loss": 3.5185, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.1205211726384365, |
|
"grad_norm": 6.359057903289795, |
|
"learning_rate": 9.942141451029436e-07, |
|
"loss": 3.6669, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.1237785016286646, |
|
"grad_norm": 7.362981796264648, |
|
"learning_rate": 9.874156312609837e-07, |
|
"loss": 2.862, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.1270358306188926, |
|
"grad_norm": 8.532692909240723, |
|
"learning_rate": 9.806347168923667e-07, |
|
"loss": 2.494, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.1302931596091206, |
|
"grad_norm": 8.926546096801758, |
|
"learning_rate": 9.738714808955167e-07, |
|
"loss": 2.6494, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.1335504885993486, |
|
"grad_norm": 4.513092517852783, |
|
"learning_rate": 9.671260019631603e-07, |
|
"loss": 3.2468, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.1368078175895766, |
|
"grad_norm": 6.006774425506592, |
|
"learning_rate": 9.603983585814188e-07, |
|
"loss": 3.3401, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.1400651465798046, |
|
"grad_norm": 6.242079734802246, |
|
"learning_rate": 9.53688629028886e-07, |
|
"loss": 3.1088, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.1433224755700326, |
|
"grad_norm": 5.460093021392822, |
|
"learning_rate": 9.469968913757254e-07, |
|
"loss": 3.3652, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.1465798045602607, |
|
"grad_norm": 7.09798002243042, |
|
"learning_rate": 9.403232234827548e-07, |
|
"loss": 3.2013, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.1498371335504887, |
|
"grad_norm": 8.465774536132812, |
|
"learning_rate": 9.336677030005459e-07, |
|
"loss": 2.8236, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.1530944625407167, |
|
"grad_norm": 7.581086158752441, |
|
"learning_rate": 9.270304073685193e-07, |
|
"loss": 2.9084, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.1563517915309447, |
|
"grad_norm": 6.972714424133301, |
|
"learning_rate": 9.2041141381404e-07, |
|
"loss": 3.0302, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.1596091205211727, |
|
"grad_norm": 8.414934158325195, |
|
"learning_rate": 9.138107993515244e-07, |
|
"loss": 2.6214, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.1628664495114007, |
|
"grad_norm": 8.747556686401367, |
|
"learning_rate": 9.07228640781539e-07, |
|
"loss": 3.0217, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.1661237785016287, |
|
"grad_norm": 4.145984172821045, |
|
"learning_rate": 9.006650146899121e-07, |
|
"loss": 3.6553, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.1693811074918568, |
|
"grad_norm": 6.007801532745361, |
|
"learning_rate": 8.941199974468362e-07, |
|
"loss": 2.8565, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.1726384364820848, |
|
"grad_norm": 5.257616996765137, |
|
"learning_rate": 8.875936652059872e-07, |
|
"loss": 3.7403, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.175895765472313, |
|
"grad_norm": 5.72130823135376, |
|
"learning_rate": 8.810860939036301e-07, |
|
"loss": 3.1205, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.179153094462541, |
|
"grad_norm": 4.439971446990967, |
|
"learning_rate": 8.745973592577417e-07, |
|
"loss": 3.4111, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.182410423452769, |
|
"grad_norm": 5.781883239746094, |
|
"learning_rate": 8.681275367671288e-07, |
|
"loss": 3.0331, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.185667752442997, |
|
"grad_norm": 7.093985080718994, |
|
"learning_rate": 8.616767017105443e-07, |
|
"loss": 2.986, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.188925081433225, |
|
"grad_norm": 5.910228252410889, |
|
"learning_rate": 8.552449291458198e-07, |
|
"loss": 3.4674, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.192182410423453, |
|
"grad_norm": 6.024210453033447, |
|
"learning_rate": 8.488322939089838e-07, |
|
"loss": 3.3857, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.195439739413681, |
|
"grad_norm": 6.586373805999756, |
|
"learning_rate": 8.424388706133984e-07, |
|
"loss": 3.4031, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.198697068403909, |
|
"grad_norm": 7.641358852386475, |
|
"learning_rate": 8.360647336488847e-07, |
|
"loss": 2.7999, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.201954397394137, |
|
"grad_norm": 7.232587814331055, |
|
"learning_rate": 8.297099571808626e-07, |
|
"loss": 3.169, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.205211726384365, |
|
"grad_norm": 5.160323143005371, |
|
"learning_rate": 8.233746151494856e-07, |
|
"loss": 3.1441, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.208469055374593, |
|
"grad_norm": 6.639790058135986, |
|
"learning_rate": 8.170587812687777e-07, |
|
"loss": 3.3241, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.211726384364821, |
|
"grad_norm": 6.677544593811035, |
|
"learning_rate": 8.10762529025782e-07, |
|
"loss": 3.159, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.214983713355049, |
|
"grad_norm": 5.903120994567871, |
|
"learning_rate": 8.044859316796988e-07, |
|
"loss": 3.3389, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.218241042345277, |
|
"grad_norm": 4.54970121383667, |
|
"learning_rate": 7.982290622610392e-07, |
|
"loss": 3.665, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.221498371335505, |
|
"grad_norm": 6.834527969360352, |
|
"learning_rate": 7.919919935707702e-07, |
|
"loss": 2.8488, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.224755700325733, |
|
"grad_norm": 5.058896541595459, |
|
"learning_rate": 7.857747981794717e-07, |
|
"loss": 3.2633, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.228013029315961, |
|
"grad_norm": 5.747068405151367, |
|
"learning_rate": 7.795775484264911e-07, |
|
"loss": 3.2937, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.231270358306189, |
|
"grad_norm": 5.85347318649292, |
|
"learning_rate": 7.734003164190984e-07, |
|
"loss": 3.382, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.234527687296417, |
|
"grad_norm": 6.000707626342773, |
|
"learning_rate": 7.672431740316527e-07, |
|
"loss": 3.2161, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.237785016286645, |
|
"grad_norm": 7.966262340545654, |
|
"learning_rate": 7.611061929047603e-07, |
|
"loss": 3.3168, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.241042345276873, |
|
"grad_norm": 5.129033088684082, |
|
"learning_rate": 7.54989444444447e-07, |
|
"loss": 3.249, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.244299674267101, |
|
"grad_norm": 5.477846622467041, |
|
"learning_rate": 7.488929998213202e-07, |
|
"loss": 3.1673, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.247557003257329, |
|
"grad_norm": 6.5664472579956055, |
|
"learning_rate": 7.42816929969748e-07, |
|
"loss": 2.7741, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.250814332247557, |
|
"grad_norm": 5.1526970863342285, |
|
"learning_rate": 7.367613055870302e-07, |
|
"loss": 3.2483, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.254071661237785, |
|
"grad_norm": 4.2331719398498535, |
|
"learning_rate": 7.307261971325733e-07, |
|
"loss": 3.499, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.257328990228013, |
|
"grad_norm": 6.132472991943359, |
|
"learning_rate": 7.247116748270774e-07, |
|
"loss": 3.4622, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.260586319218241, |
|
"grad_norm": 6.091642379760742, |
|
"learning_rate": 7.187178086517116e-07, |
|
"loss": 3.1034, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.263843648208469, |
|
"grad_norm": 7.409130096435547, |
|
"learning_rate": 7.127446683473066e-07, |
|
"loss": 3.3242, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.267100977198697, |
|
"grad_norm": 4.992582321166992, |
|
"learning_rate": 7.067923234135368e-07, |
|
"loss": 3.578, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.270358306188925, |
|
"grad_norm": 7.707672595977783, |
|
"learning_rate": 7.008608431081179e-07, |
|
"loss": 3.3818, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.273615635179153, |
|
"grad_norm": 4.5212483406066895, |
|
"learning_rate": 6.949502964459951e-07, |
|
"loss": 3.4887, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.2768729641693812, |
|
"grad_norm": 5.691729545593262, |
|
"learning_rate": 6.890607521985454e-07, |
|
"loss": 3.2088, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.2801302931596092, |
|
"grad_norm": 5.936895847320557, |
|
"learning_rate": 6.831922788927744e-07, |
|
"loss": 3.0461, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2833876221498373, |
|
"grad_norm": 5.542623043060303, |
|
"learning_rate": 6.773449448105182e-07, |
|
"loss": 3.5307, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.2866449511400653, |
|
"grad_norm": 6.767984867095947, |
|
"learning_rate": 6.715188179876525e-07, |
|
"loss": 2.976, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.2899022801302933, |
|
"grad_norm": 5.975040435791016, |
|
"learning_rate": 6.657139662132961e-07, |
|
"loss": 3.1286, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.2931596091205213, |
|
"grad_norm": 6.311220169067383, |
|
"learning_rate": 6.59930457029028e-07, |
|
"loss": 3.4112, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.2964169381107493, |
|
"grad_norm": 6.7899932861328125, |
|
"learning_rate": 6.541683577280947e-07, |
|
"loss": 3.283, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.2996742671009773, |
|
"grad_norm": 4.831838607788086, |
|
"learning_rate": 6.484277353546342e-07, |
|
"loss": 3.481, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.3029315960912053, |
|
"grad_norm": 8.137195587158203, |
|
"learning_rate": 6.427086567028912e-07, |
|
"loss": 2.7856, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.3061889250814334, |
|
"grad_norm": 5.096379280090332, |
|
"learning_rate": 6.370111883164406e-07, |
|
"loss": 3.2754, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.3094462540716614, |
|
"grad_norm": 4.889212608337402, |
|
"learning_rate": 6.313353964874155e-07, |
|
"loss": 3.4334, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.3127035830618894, |
|
"grad_norm": 5.239722728729248, |
|
"learning_rate": 6.256813472557322e-07, |
|
"loss": 3.4799, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.3159609120521174, |
|
"grad_norm": 5.829878807067871, |
|
"learning_rate": 6.200491064083264e-07, |
|
"loss": 3.2489, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.3192182410423454, |
|
"grad_norm": 6.974037170410156, |
|
"learning_rate": 6.144387394783829e-07, |
|
"loss": 3.3498, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.3224755700325734, |
|
"grad_norm": 4.331153869628906, |
|
"learning_rate": 6.088503117445774e-07, |
|
"loss": 3.3922, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.3257328990228014, |
|
"grad_norm": 6.132293224334717, |
|
"learning_rate": 6.032838882303144e-07, |
|
"loss": 3.5287, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.3289902280130295, |
|
"grad_norm": 4.317237377166748, |
|
"learning_rate": 5.977395337029701e-07, |
|
"loss": 3.4481, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.3322475570032575, |
|
"grad_norm": 6.361827373504639, |
|
"learning_rate": 5.922173126731418e-07, |
|
"loss": 3.3759, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.3355048859934855, |
|
"grad_norm": 6.041193008422852, |
|
"learning_rate": 5.867172893938936e-07, |
|
"loss": 3.0176, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.3387622149837135, |
|
"grad_norm": 7.7040228843688965, |
|
"learning_rate": 5.812395278600127e-07, |
|
"loss": 2.7771, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.3420195439739415, |
|
"grad_norm": 6.479687213897705, |
|
"learning_rate": 5.757840918072601e-07, |
|
"loss": 3.2959, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.3452768729641695, |
|
"grad_norm": 6.083302021026611, |
|
"learning_rate": 5.703510447116351e-07, |
|
"loss": 3.5096, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.3485342019543975, |
|
"grad_norm": 6.406294822692871, |
|
"learning_rate": 5.64940449788629e-07, |
|
"loss": 3.426, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.3517915309446256, |
|
"grad_norm": 8.7530517578125, |
|
"learning_rate": 5.595523699924979e-07, |
|
"loss": 3.1966, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.3550488599348536, |
|
"grad_norm": 6.54054069519043, |
|
"learning_rate": 5.541868680155243e-07, |
|
"loss": 3.5258, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.3583061889250816, |
|
"grad_norm": 5.455934524536133, |
|
"learning_rate": 5.48844006287289e-07, |
|
"loss": 3.0753, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.3615635179153096, |
|
"grad_norm": 6.472217559814453, |
|
"learning_rate": 5.435238469739465e-07, |
|
"loss": 3.6595, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.3648208469055376, |
|
"grad_norm": 5.907280445098877, |
|
"learning_rate": 5.382264519774988e-07, |
|
"loss": 3.126, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.3680781758957656, |
|
"grad_norm": 6.544402122497559, |
|
"learning_rate": 5.329518829350788e-07, |
|
"loss": 3.331, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.3713355048859937, |
|
"grad_norm": 6.544687271118164, |
|
"learning_rate": 5.277002012182287e-07, |
|
"loss": 2.8258, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.3745928338762217, |
|
"grad_norm": 5.569584369659424, |
|
"learning_rate": 5.224714679321898e-07, |
|
"loss": 3.6026, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.3778501628664497, |
|
"grad_norm": 5.201624393463135, |
|
"learning_rate": 5.172657439151913e-07, |
|
"loss": 3.3477, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.3811074918566777, |
|
"grad_norm": 6.729457855224609, |
|
"learning_rate": 5.120830897377371e-07, |
|
"loss": 2.9732, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.3843648208469057, |
|
"grad_norm": 6.149277210235596, |
|
"learning_rate": 5.069235657019095e-07, |
|
"loss": 2.8766, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.3876221498371337, |
|
"grad_norm": 6.09014368057251, |
|
"learning_rate": 5.017872318406594e-07, |
|
"loss": 3.522, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.3908794788273617, |
|
"grad_norm": 6.65205192565918, |
|
"learning_rate": 4.966741479171147e-07, |
|
"loss": 3.6316, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.3941368078175898, |
|
"grad_norm": 6.388942718505859, |
|
"learning_rate": 4.915843734238789e-07, |
|
"loss": 2.9395, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.3973941368078178, |
|
"grad_norm": 5.279838562011719, |
|
"learning_rate": 4.865179675823442e-07, |
|
"loss": 3.2396, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.400651465798046, |
|
"grad_norm": 5.323233127593994, |
|
"learning_rate": 4.81474989341999e-07, |
|
"loss": 3.6813, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.403908794788274, |
|
"grad_norm": 8.088998794555664, |
|
"learning_rate": 4.764554973797417e-07, |
|
"loss": 2.8468, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.407166123778502, |
|
"grad_norm": 4.269301414489746, |
|
"learning_rate": 4.71459550099202e-07, |
|
"loss": 3.512, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.41042345276873, |
|
"grad_norm": 4.38401985168457, |
|
"learning_rate": 4.664872056300557e-07, |
|
"loss": 3.0347, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.413680781758958, |
|
"grad_norm": 5.353303909301758, |
|
"learning_rate": 4.6153852182735354e-07, |
|
"loss": 3.5764, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.416938110749186, |
|
"grad_norm": 4.9428863525390625, |
|
"learning_rate": 4.5661355627084375e-07, |
|
"loss": 3.5818, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.420195439739414, |
|
"grad_norm": 6.521563529968262, |
|
"learning_rate": 4.517123662643061e-07, |
|
"loss": 2.7058, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.423452768729642, |
|
"grad_norm": 8.82436466217041, |
|
"learning_rate": 4.468350088348811e-07, |
|
"loss": 3.3391, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.42671009771987, |
|
"grad_norm": 6.4387078285217285, |
|
"learning_rate": 4.419815407324102e-07, |
|
"loss": 3.323, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.429967426710098, |
|
"grad_norm": 6.8446736335754395, |
|
"learning_rate": 4.371520184287736e-07, |
|
"loss": 2.9756, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.433224755700326, |
|
"grad_norm": 7.508485794067383, |
|
"learning_rate": 4.323464981172315e-07, |
|
"loss": 2.9233, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.436482084690554, |
|
"grad_norm": 4.2981791496276855, |
|
"learning_rate": 4.275650357117747e-07, |
|
"loss": 3.4482, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.4397394136807815, |
|
"grad_norm": 4.362144947052002, |
|
"learning_rate": 4.228076868464695e-07, |
|
"loss": 3.5062, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.44299674267101, |
|
"grad_norm": 4.476921081542969, |
|
"learning_rate": 4.180745068748135e-07, |
|
"loss": 3.6147, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.4462540716612375, |
|
"grad_norm": 4.931582450866699, |
|
"learning_rate": 4.1336555086908895e-07, |
|
"loss": 3.4297, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.449511400651466, |
|
"grad_norm": 5.8569207191467285, |
|
"learning_rate": 4.086808736197254e-07, |
|
"loss": 3.3144, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.4527687296416936, |
|
"grad_norm": 7.034333229064941, |
|
"learning_rate": 4.0402052963465913e-07, |
|
"loss": 3.6045, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.456026058631922, |
|
"grad_norm": 4.413856506347656, |
|
"learning_rate": 3.9938457313869914e-07, |
|
"loss": 3.3704, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.4592833876221496, |
|
"grad_norm": 5.120318412780762, |
|
"learning_rate": 3.9477305807289895e-07, |
|
"loss": 3.4678, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.462540716612378, |
|
"grad_norm": 6.234142780303955, |
|
"learning_rate": 3.9018603809392484e-07, |
|
"loss": 3.3536, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.4657980456026056, |
|
"grad_norm": 6.449676513671875, |
|
"learning_rate": 3.856235665734359e-07, |
|
"loss": 2.9766, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.469055374592834, |
|
"grad_norm": 4.504249095916748, |
|
"learning_rate": 3.8108569659745907e-07, |
|
"loss": 3.2879, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.4723127035830617, |
|
"grad_norm": 8.269672393798828, |
|
"learning_rate": 3.7657248096577504e-07, |
|
"loss": 2.8618, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.47557003257329, |
|
"grad_norm": 5.094267845153809, |
|
"learning_rate": 3.720839721913011e-07, |
|
"loss": 3.4205, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.4788273615635177, |
|
"grad_norm": 6.253308296203613, |
|
"learning_rate": 3.67620222499481e-07, |
|
"loss": 3.1134, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.482084690553746, |
|
"grad_norm": 5.233059406280518, |
|
"learning_rate": 3.631812838276791e-07, |
|
"loss": 3.4115, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.4853420195439737, |
|
"grad_norm": 5.0791826248168945, |
|
"learning_rate": 3.587672078245716e-07, |
|
"loss": 3.5843, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.488599348534202, |
|
"grad_norm": 6.180685520172119, |
|
"learning_rate": 3.543780458495513e-07, |
|
"loss": 3.4273, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.4918566775244297, |
|
"grad_norm": 8.303179740905762, |
|
"learning_rate": 3.5001384897212556e-07, |
|
"loss": 3.7289, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.495114006514658, |
|
"grad_norm": 7.029627799987793, |
|
"learning_rate": 3.456746679713238e-07, |
|
"loss": 2.7946, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.4983713355048858, |
|
"grad_norm": 5.279702663421631, |
|
"learning_rate": 3.41360553335108e-07, |
|
"loss": 3.1989, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.5016286644951142, |
|
"grad_norm": 7.8925933837890625, |
|
"learning_rate": 3.3707155525978116e-07, |
|
"loss": 3.0259, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.504885993485342, |
|
"grad_norm": 5.007329940795898, |
|
"learning_rate": 3.328077236494087e-07, |
|
"loss": 3.4699, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.5081433224755703, |
|
"grad_norm": 5.810083866119385, |
|
"learning_rate": 3.2856910811523256e-07, |
|
"loss": 3.4737, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.511400651465798, |
|
"grad_norm": 4.268073081970215, |
|
"learning_rate": 3.243557579750986e-07, |
|
"loss": 3.5634, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.5146579804560263, |
|
"grad_norm": 6.415236473083496, |
|
"learning_rate": 3.2016772225287844e-07, |
|
"loss": 3.0534, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.517915309446254, |
|
"grad_norm": 7.551133155822754, |
|
"learning_rate": 3.16005049677903e-07, |
|
"loss": 2.9791, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.5211726384364823, |
|
"grad_norm": 5.403765678405762, |
|
"learning_rate": 3.118677886843921e-07, |
|
"loss": 3.4118, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.52442996742671, |
|
"grad_norm": 6.152907371520996, |
|
"learning_rate": 3.077559874108937e-07, |
|
"loss": 3.1713, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.5276872964169383, |
|
"grad_norm": 7.548793315887451, |
|
"learning_rate": 3.03669693699723e-07, |
|
"loss": 2.837, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.530944625407166, |
|
"grad_norm": 6.374703407287598, |
|
"learning_rate": 2.996089550964029e-07, |
|
"loss": 3.3303, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.5342019543973944, |
|
"grad_norm": 4.437128067016602, |
|
"learning_rate": 2.9557381884911667e-07, |
|
"loss": 3.4547, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.537459283387622, |
|
"grad_norm": 5.333934307098389, |
|
"learning_rate": 2.9156433190815155e-07, |
|
"loss": 3.5414, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.5407166123778504, |
|
"grad_norm": 6.211347579956055, |
|
"learning_rate": 2.875805409253582e-07, |
|
"loss": 3.0307, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.543973941368078, |
|
"grad_norm": 5.27606725692749, |
|
"learning_rate": 2.836224922536035e-07, |
|
"loss": 3.1969, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.5472312703583064, |
|
"grad_norm": 6.280512809753418, |
|
"learning_rate": 2.796902319462344e-07, |
|
"loss": 2.9867, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.550488599348534, |
|
"grad_norm": 4.462002277374268, |
|
"learning_rate": 2.7578380575654096e-07, |
|
"loss": 3.4119, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.5537459283387625, |
|
"grad_norm": 5.325382709503174, |
|
"learning_rate": 2.71903259137222e-07, |
|
"loss": 3.1448, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.55700325732899, |
|
"grad_norm": 7.347408771514893, |
|
"learning_rate": 2.680486372398605e-07, |
|
"loss": 3.0755, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.5602605863192185, |
|
"grad_norm": 5.137568950653076, |
|
"learning_rate": 2.642199849143937e-07, |
|
"loss": 3.6602, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.563517915309446, |
|
"grad_norm": 5.19792366027832, |
|
"learning_rate": 2.604173467085949e-07, |
|
"loss": 3.1483, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.5667752442996745, |
|
"grad_norm": 5.868303298950195, |
|
"learning_rate": 2.566407668675519e-07, |
|
"loss": 3.1571, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.570032573289902, |
|
"grad_norm": 7.622596263885498, |
|
"learning_rate": 2.5289028933315587e-07, |
|
"loss": 2.7567, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.5732899022801305, |
|
"grad_norm": 5.980817794799805, |
|
"learning_rate": 2.4916595774358704e-07, |
|
"loss": 3.3892, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.576547231270358, |
|
"grad_norm": 5.690640926361084, |
|
"learning_rate": 2.4546781543280716e-07, |
|
"loss": 2.9866, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.5798045602605866, |
|
"grad_norm": 7.25706148147583, |
|
"learning_rate": 2.4179590543005835e-07, |
|
"loss": 3.3073, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.583061889250814, |
|
"grad_norm": 8.114654541015625, |
|
"learning_rate": 2.3815027045935774e-07, |
|
"loss": 2.6904, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.5863192182410426, |
|
"grad_norm": 6.251055717468262, |
|
"learning_rate": 2.345309529390047e-07, |
|
"loss": 3.1565, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.58957654723127, |
|
"grad_norm": 3.57120418548584, |
|
"learning_rate": 2.3093799498108388e-07, |
|
"loss": 3.4674, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.5928338762214986, |
|
"grad_norm": 5.681574821472168, |
|
"learning_rate": 2.2737143839097893e-07, |
|
"loss": 3.298, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.596091205211726, |
|
"grad_norm": 5.519050121307373, |
|
"learning_rate": 2.238313246668808e-07, |
|
"loss": 2.9638, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.5993485342019547, |
|
"grad_norm": 5.44046688079834, |
|
"learning_rate": 2.2031769499931105e-07, |
|
"loss": 3.4083, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.6026058631921822, |
|
"grad_norm": 4.957735538482666, |
|
"learning_rate": 2.168305902706383e-07, |
|
"loss": 3.4802, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.6058631921824107, |
|
"grad_norm": 10.058741569519043, |
|
"learning_rate": 2.13370051054603e-07, |
|
"loss": 2.2093, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.6091205211726383, |
|
"grad_norm": 5.969457626342773, |
|
"learning_rate": 2.0993611761584765e-07, |
|
"loss": 3.3213, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.6123778501628667, |
|
"grad_norm": 6.594350814819336, |
|
"learning_rate": 2.0652882990944535e-07, |
|
"loss": 2.8912, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.6156351791530943, |
|
"grad_norm": 7.069642543792725, |
|
"learning_rate": 2.031482275804375e-07, |
|
"loss": 3.3256, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.6188925081433228, |
|
"grad_norm": 4.171195983886719, |
|
"learning_rate": 1.9979434996337005e-07, |
|
"loss": 3.3647, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.6221498371335503, |
|
"grad_norm": 5.1055378913879395, |
|
"learning_rate": 1.964672360818387e-07, |
|
"loss": 3.5159, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.6254071661237783, |
|
"grad_norm": 6.381522178649902, |
|
"learning_rate": 1.9316692464803276e-07, |
|
"loss": 3.4417, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.6286644951140063, |
|
"grad_norm": 5.769819259643555, |
|
"learning_rate": 1.898934540622846e-07, |
|
"loss": 3.4975, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.6319218241042344, |
|
"grad_norm": 8.080145835876465, |
|
"learning_rate": 1.866468624126236e-07, |
|
"loss": 2.8327, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.6351791530944624, |
|
"grad_norm": 5.503101825714111, |
|
"learning_rate": 1.834271874743332e-07, |
|
"loss": 3.3785, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.6384364820846904, |
|
"grad_norm": 5.769625663757324, |
|
"learning_rate": 1.802344667095113e-07, |
|
"loss": 3.5412, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.6416938110749184, |
|
"grad_norm": 7.4825005531311035, |
|
"learning_rate": 1.7706873726663383e-07, |
|
"loss": 2.9187, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.6449511400651464, |
|
"grad_norm": 5.458498001098633, |
|
"learning_rate": 1.7393003598012243e-07, |
|
"loss": 3.3779, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.6482084690553744, |
|
"grad_norm": 5.304836750030518, |
|
"learning_rate": 1.7081839936991724e-07, |
|
"loss": 3.5122, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.6514657980456025, |
|
"grad_norm": 5.748223304748535, |
|
"learning_rate": 1.6773386364104972e-07, |
|
"loss": 3.061, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.6547231270358305, |
|
"grad_norm": 4.899912357330322, |
|
"learning_rate": 1.6467646468322358e-07, |
|
"loss": 3.3704, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.6579804560260585, |
|
"grad_norm": 7.006373882293701, |
|
"learning_rate": 1.6164623807039538e-07, |
|
"loss": 3.1052, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.6612377850162865, |
|
"grad_norm": 6.537415981292725, |
|
"learning_rate": 1.586432190603626e-07, |
|
"loss": 3.1061, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.6644951140065145, |
|
"grad_norm": 6.889453411102295, |
|
"learning_rate": 1.556674425943519e-07, |
|
"loss": 2.9351, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.6677524429967425, |
|
"grad_norm": 6.120772361755371, |
|
"learning_rate": 1.5271894329661223e-07, |
|
"loss": 3.5839, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.6710097719869705, |
|
"grad_norm": 3.951160192489624, |
|
"learning_rate": 1.4979775547401376e-07, |
|
"loss": 3.4196, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.6742671009771986, |
|
"grad_norm": 6.393338203430176, |
|
"learning_rate": 1.469039131156466e-07, |
|
"loss": 3.1414, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.6775244299674266, |
|
"grad_norm": 4.9038896560668945, |
|
"learning_rate": 1.440374498924277e-07, |
|
"loss": 3.5466, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.6807817589576546, |
|
"grad_norm": 7.4714484214782715, |
|
"learning_rate": 1.4119839915670563e-07, |
|
"loss": 2.9009, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.6840390879478826, |
|
"grad_norm": 8.087553977966309, |
|
"learning_rate": 1.3838679394187705e-07, |
|
"loss": 2.9414, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.6872964169381106, |
|
"grad_norm": 5.134854793548584, |
|
"learning_rate": 1.3560266696199864e-07, |
|
"loss": 3.4754, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.6905537459283386, |
|
"grad_norm": 5.320379734039307, |
|
"learning_rate": 1.3284605061140764e-07, |
|
"loss": 3.5239, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.6938110749185666, |
|
"grad_norm": 5.643535614013672, |
|
"learning_rate": 1.3011697696434565e-07, |
|
"loss": 3.3429, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.6970684039087947, |
|
"grad_norm": 7.376565933227539, |
|
"learning_rate": 1.274154777745837e-07, |
|
"loss": 3.3408, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.7003257328990227, |
|
"grad_norm": 6.498621940612793, |
|
"learning_rate": 1.24741584475056e-07, |
|
"loss": 3.0349, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.7035830618892507, |
|
"grad_norm": 6.7927751541137695, |
|
"learning_rate": 1.220953281774895e-07, |
|
"loss": 3.3748, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.7068403908794787, |
|
"grad_norm": 5.855138301849365, |
|
"learning_rate": 1.1947673967204643e-07, |
|
"loss": 3.5825, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.7100977198697067, |
|
"grad_norm": 6.454092979431152, |
|
"learning_rate": 1.1688584942696369e-07, |
|
"loss": 3.4411, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.7133550488599347, |
|
"grad_norm": 7.363604545593262, |
|
"learning_rate": 1.1432268758819809e-07, |
|
"loss": 3.3443, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.7166123778501627, |
|
"grad_norm": 4.227643966674805, |
|
"learning_rate": 1.1178728397907734e-07, |
|
"loss": 3.3621, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.7198697068403908, |
|
"grad_norm": 4.559384346008301, |
|
"learning_rate": 1.0927966809995084e-07, |
|
"loss": 3.3748, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.7231270358306188, |
|
"grad_norm": 6.832982063293457, |
|
"learning_rate": 1.0679986912784879e-07, |
|
"loss": 3.1433, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.726384364820847, |
|
"grad_norm": 7.431112289428711, |
|
"learning_rate": 1.043479159161398e-07, |
|
"loss": 3.0649, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.729641693811075, |
|
"grad_norm": 7.875669479370117, |
|
"learning_rate": 1.019238369941991e-07, |
|
"loss": 2.4024, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.732899022801303, |
|
"grad_norm": 8.269497871398926, |
|
"learning_rate": 9.952766056707225e-08, |
|
"loss": 2.7252, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.736156351791531, |
|
"grad_norm": 7.376986503601074, |
|
"learning_rate": 9.715941451515027e-08, |
|
"loss": 3.1179, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.739413680781759, |
|
"grad_norm": 4.784783840179443, |
|
"learning_rate": 9.481912639384388e-08, |
|
"loss": 3.629, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.742671009771987, |
|
"grad_norm": 7.07935905456543, |
|
"learning_rate": 9.25068234332624e-08, |
|
"loss": 2.742, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.745928338762215, |
|
"grad_norm": 6.771409034729004, |
|
"learning_rate": 9.02225325378986e-08, |
|
"loss": 3.0037, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.749185667752443, |
|
"grad_norm": 6.435185432434082, |
|
"learning_rate": 8.796628028631321e-08, |
|
"loss": 2.9888, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.752442996742671, |
|
"grad_norm": 6.06053352355957, |
|
"learning_rate": 8.57380929308288e-08, |
|
"loss": 3.5483, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.755700325732899, |
|
"grad_norm": 6.068765640258789, |
|
"learning_rate": 8.353799639722076e-08, |
|
"loss": 3.0495, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.758957654723127, |
|
"grad_norm": 4.580106258392334, |
|
"learning_rate": 8.136601628441876e-08, |
|
"loss": 3.6901, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.762214983713355, |
|
"grad_norm": 4.707021713256836, |
|
"learning_rate": 7.922217786420772e-08, |
|
"loss": 3.622, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.765472312703583, |
|
"grad_norm": 6.918179988861084, |
|
"learning_rate": 7.710650608093257e-08, |
|
"loss": 3.1714, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.768729641693811, |
|
"grad_norm": 7.90523624420166, |
|
"learning_rate": 7.501902555120982e-08, |
|
"loss": 2.7148, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.771986970684039, |
|
"grad_norm": 5.939011096954346, |
|
"learning_rate": 7.295976056364034e-08, |
|
"loss": 3.4794, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.775244299674267, |
|
"grad_norm": 6.008779525756836, |
|
"learning_rate": 7.092873507852676e-08, |
|
"loss": 3.116, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.778501628664495, |
|
"grad_norm": 4.6474409103393555, |
|
"learning_rate": 6.892597272759483e-08, |
|
"loss": 3.323, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.781758957654723, |
|
"grad_norm": 7.1137213706970215, |
|
"learning_rate": 6.695149681371804e-08, |
|
"loss": 3.0716, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.785016286644951, |
|
"grad_norm": 8.155078887939453, |
|
"learning_rate": 6.500533031064737e-08, |
|
"loss": 2.8788, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.788273615635179, |
|
"grad_norm": 6.032046318054199, |
|
"learning_rate": 6.30874958627431e-08, |
|
"loss": 3.2304, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.791530944625407, |
|
"grad_norm": 6.509833812713623, |
|
"learning_rate": 6.119801578471196e-08, |
|
"loss": 3.0426, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.794788273615635, |
|
"grad_norm": 5.527968406677246, |
|
"learning_rate": 5.9336912061346284e-08, |
|
"loss": 3.3721, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.798045602605863, |
|
"grad_norm": 5.313396453857422, |
|
"learning_rate": 5.750420634727083e-08, |
|
"loss": 3.2306, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.801302931596091, |
|
"grad_norm": 6.423414707183838, |
|
"learning_rate": 5.5699919966686886e-08, |
|
"loss": 2.9182, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.804560260586319, |
|
"grad_norm": 5.296036243438721, |
|
"learning_rate": 5.3924073913128874e-08, |
|
"loss": 3.2474, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.807817589576547, |
|
"grad_norm": 5.368740081787109, |
|
"learning_rate": 5.217668884921506e-08, |
|
"loss": 3.5761, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.811074918566775, |
|
"grad_norm": 4.735677242279053, |
|
"learning_rate": 5.0457785106411414e-08, |
|
"loss": 3.2216, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.814332247557003, |
|
"grad_norm": 7.0894575119018555, |
|
"learning_rate": 4.876738268479342e-08, |
|
"loss": 2.87, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.817589576547231, |
|
"grad_norm": 4.9213948249816895, |
|
"learning_rate": 4.710550125281155e-08, |
|
"loss": 3.1997, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.820846905537459, |
|
"grad_norm": 6.972508430480957, |
|
"learning_rate": 4.54721601470659e-08, |
|
"loss": 3.2063, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.824104234527687, |
|
"grad_norm": 8.271316528320312, |
|
"learning_rate": 4.3867378372078604e-08, |
|
"loss": 2.9345, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.8273615635179152, |
|
"grad_norm": 4.559358596801758, |
|
"learning_rate": 4.2291174600073425e-08, |
|
"loss": 3.5209, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.8306188925081432, |
|
"grad_norm": 4.966446876525879, |
|
"learning_rate": 4.074356717075845e-08, |
|
"loss": 3.3913, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.8338762214983713, |
|
"grad_norm": 4.800805568695068, |
|
"learning_rate": 3.9224574091113745e-08, |
|
"loss": 3.4585, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.8371335504885993, |
|
"grad_norm": 5.908380508422852, |
|
"learning_rate": 3.773421303518043e-08, |
|
"loss": 3.2263, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.8403908794788273, |
|
"grad_norm": 6.047445297241211, |
|
"learning_rate": 3.627250134385474e-08, |
|
"loss": 3.0944, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.8436482084690553, |
|
"grad_norm": 6.993686199188232, |
|
"learning_rate": 3.4839456024688686e-08, |
|
"loss": 3.0605, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.8469055374592833, |
|
"grad_norm": 5.375525951385498, |
|
"learning_rate": 3.343509375168863e-08, |
|
"loss": 3.5609, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.8501628664495113, |
|
"grad_norm": 5.510409355163574, |
|
"learning_rate": 3.205943086512508e-08, |
|
"loss": 3.0868, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.8534201954397393, |
|
"grad_norm": 7.239731788635254, |
|
"learning_rate": 3.0712483371339306e-08, |
|
"loss": 2.7588, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.8566775244299674, |
|
"grad_norm": 8.21719741821289, |
|
"learning_rate": 2.939426694255898e-08, |
|
"loss": 2.9941, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.8599348534201954, |
|
"grad_norm": 7.529686450958252, |
|
"learning_rate": 2.8104796916715304e-08, |
|
"loss": 3.0337, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.8631921824104234, |
|
"grad_norm": 4.830805778503418, |
|
"learning_rate": 2.6844088297264258e-08, |
|
"loss": 3.1156, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.8664495114006514, |
|
"grad_norm": 4.539928913116455, |
|
"learning_rate": 2.5612155753013125e-08, |
|
"loss": 3.342, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.8697068403908794, |
|
"grad_norm": 7.2940874099731445, |
|
"learning_rate": 2.4409013617947842e-08, |
|
"loss": 2.9825, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.8729641693811074, |
|
"grad_norm": 5.533882141113281, |
|
"learning_rate": 2.3234675891068147e-08, |
|
"loss": 3.2988, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.8762214983713354, |
|
"grad_norm": 5.131170749664307, |
|
"learning_rate": 2.2089156236224096e-08, |
|
"loss": 3.7499, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.8794788273615635, |
|
"grad_norm": 6.615204811096191, |
|
"learning_rate": 2.097246798195618e-08, |
|
"loss": 3.1587, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.8827361563517915, |
|
"grad_norm": 5.77875280380249, |
|
"learning_rate": 1.988462412134129e-08, |
|
"loss": 3.1013, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.8859934853420195, |
|
"grad_norm": 7.753470420837402, |
|
"learning_rate": 1.8825637311841727e-08, |
|
"loss": 2.9051, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.8892508143322475, |
|
"grad_norm": 6.783411026000977, |
|
"learning_rate": 1.7795519875157262e-08, |
|
"loss": 3.0544, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.8925081433224755, |
|
"grad_norm": 6.783967971801758, |
|
"learning_rate": 1.6794283797080813e-08, |
|
"loss": 3.123, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.8957654723127035, |
|
"grad_norm": 5.180431842803955, |
|
"learning_rate": 1.5821940727361874e-08, |
|
"loss": 3.2508, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.8990228013029316, |
|
"grad_norm": 4.254175186157227, |
|
"learning_rate": 1.487850197956775e-08, |
|
"loss": 3.6052, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.9022801302931596, |
|
"grad_norm": 6.317099571228027, |
|
"learning_rate": 1.3963978530954491e-08, |
|
"loss": 3.1259, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.9055374592833876, |
|
"grad_norm": 6.046303749084473, |
|
"learning_rate": 1.3078381022336717e-08, |
|
"loss": 3.0762, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.9087947882736156, |
|
"grad_norm": 4.559985160827637, |
|
"learning_rate": 1.2221719757966877e-08, |
|
"loss": 3.5281, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.9120521172638436, |
|
"grad_norm": 6.569419860839844, |
|
"learning_rate": 1.139400470541202e-08, |
|
"loss": 3.0477, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.9153094462540716, |
|
"grad_norm": 6.211750507354736, |
|
"learning_rate": 1.0595245495439999e-08, |
|
"loss": 3.3027, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.9185667752442996, |
|
"grad_norm": 7.381767749786377, |
|
"learning_rate": 9.825451421907328e-09, |
|
"loss": 3.2725, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.9218241042345277, |
|
"grad_norm": 7.202872276306152, |
|
"learning_rate": 9.084631441649837e-09, |
|
"loss": 3.1016, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.9250814332247557, |
|
"grad_norm": 4.793407440185547, |
|
"learning_rate": 8.372794174379418e-09, |
|
"loss": 3.5427, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.9283387622149837, |
|
"grad_norm": 6.582172870635986, |
|
"learning_rate": 7.689947902583816e-09, |
|
"loss": 3.0381, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.9315960912052117, |
|
"grad_norm": 5.219425201416016, |
|
"learning_rate": 7.03610057142895e-09, |
|
"loss": 3.5185, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.9348534201954397, |
|
"grad_norm": 5.58571195602417, |
|
"learning_rate": 6.411259788668967e-09, |
|
"loss": 3.4211, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.9381107491856677, |
|
"grad_norm": 4.745430946350098, |
|
"learning_rate": 5.815432824554379e-09, |
|
"loss": 3.595, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.9413680781758957, |
|
"grad_norm": 6.919724941253662, |
|
"learning_rate": 5.2486266117510176e-09, |
|
"loss": 3.5303, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.9446254071661238, |
|
"grad_norm": 6.697390556335449, |
|
"learning_rate": 4.710847745256209e-09, |
|
"loss": 3.3494, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.9478827361563518, |
|
"grad_norm": 4.164398670196533, |
|
"learning_rate": 4.202102482324666e-09, |
|
"loss": 3.5631, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.95114006514658, |
|
"grad_norm": 9.10593032836914, |
|
"learning_rate": 3.7223967423935524e-09, |
|
"loss": 3.1123, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.954397394136808, |
|
"grad_norm": 4.9842634201049805, |
|
"learning_rate": 3.271736107015033e-09, |
|
"loss": 3.534, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.957654723127036, |
|
"grad_norm": 5.11368989944458, |
|
"learning_rate": 2.850125819790772e-09, |
|
"loss": 3.0846, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.960912052117264, |
|
"grad_norm": 6.25923490524292, |
|
"learning_rate": 2.45757078631087e-09, |
|
"loss": 3.0751, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.964169381107492, |
|
"grad_norm": 5.586625576019287, |
|
"learning_rate": 2.0940755740969654e-09, |
|
"loss": 3.3112, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.96742671009772, |
|
"grad_norm": 4.510610580444336, |
|
"learning_rate": 1.7596444125489442e-09, |
|
"loss": 3.4386, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.970684039087948, |
|
"grad_norm": 6.0876617431640625, |
|
"learning_rate": 1.4542811928963673e-09, |
|
"loss": 3.3032, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.973941368078176, |
|
"grad_norm": 7.20369815826416, |
|
"learning_rate": 1.1779894681515635e-09, |
|
"loss": 3.3911, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.977198697068404, |
|
"grad_norm": 5.171374797821045, |
|
"learning_rate": 9.307724530702166e-10, |
|
"loss": 3.2326, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.980456026058632, |
|
"grad_norm": 5.716080665588379, |
|
"learning_rate": 7.12633024113063e-10, |
|
"loss": 3.4967, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.98371335504886, |
|
"grad_norm": 7.97988748550415, |
|
"learning_rate": 5.235737194120294e-10, |
|
"loss": 2.6134, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.986970684039088, |
|
"grad_norm": 4.841028213500977, |
|
"learning_rate": 3.6359673874164505e-10, |
|
"loss": 3.2698, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.990228013029316, |
|
"grad_norm": 6.722901821136475, |
|
"learning_rate": 2.3270394349267367e-10, |
|
"loss": 2.7767, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.993485342019544, |
|
"grad_norm": 6.788003921508789, |
|
"learning_rate": 1.3089685665046426e-10, |
|
"loss": 2.9311, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.996742671009772, |
|
"grad_norm": 5.636250019073486, |
|
"learning_rate": 5.817666277802003e-11, |
|
"loss": 3.2762, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.332888126373291, |
|
"learning_rate": 1.4544208001288085e-11, |
|
"loss": 3.3124, |
|
"step": 921 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 921, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8679148719759360.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|