|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5349023803155925, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010698047606311847, |
|
"grad_norm": 1.7288448810577393, |
|
"learning_rate": 2.02e-06, |
|
"loss": 2.6627, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0021396095212623694, |
|
"grad_norm": 1.7482303380966187, |
|
"learning_rate": 4.04e-06, |
|
"loss": 2.8135, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0032094142818935543, |
|
"grad_norm": 2.0931556224823, |
|
"learning_rate": 6.06e-06, |
|
"loss": 2.6959, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004279219042524739, |
|
"grad_norm": 2.3097078800201416, |
|
"learning_rate": 8.08e-06, |
|
"loss": 2.8474, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005349023803155924, |
|
"grad_norm": 2.0344951152801514, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 2.7842, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006418828563787109, |
|
"grad_norm": 1.9919047355651855, |
|
"learning_rate": 1.212e-05, |
|
"loss": 3.089, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007488633324418294, |
|
"grad_norm": 0.9272979497909546, |
|
"learning_rate": 1.4140000000000002e-05, |
|
"loss": 2.3151, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008558438085049478, |
|
"grad_norm": 1.6376522779464722, |
|
"learning_rate": 1.616e-05, |
|
"loss": 2.824, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009628242845680663, |
|
"grad_norm": 1.8005309104919434, |
|
"learning_rate": 1.818e-05, |
|
"loss": 2.6834, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010698047606311848, |
|
"grad_norm": 1.8941646814346313, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 2.6678, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011767852366943032, |
|
"grad_norm": 1.860886573791504, |
|
"learning_rate": 2.222e-05, |
|
"loss": 2.7923, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012837657127574217, |
|
"grad_norm": 1.6937692165374756, |
|
"learning_rate": 2.424e-05, |
|
"loss": 2.4482, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.013907461888205403, |
|
"grad_norm": 0.988033652305603, |
|
"learning_rate": 2.6260000000000003e-05, |
|
"loss": 2.3207, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014977266648836588, |
|
"grad_norm": 1.067140817642212, |
|
"learning_rate": 2.8280000000000004e-05, |
|
"loss": 2.3765, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.016047071409467772, |
|
"grad_norm": 1.0069628953933716, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 2.2754, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017116876170098955, |
|
"grad_norm": 0.8966661095619202, |
|
"learning_rate": 3.232e-05, |
|
"loss": 1.9436, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.018186680930730142, |
|
"grad_norm": 0.8621421456336975, |
|
"learning_rate": 3.434e-05, |
|
"loss": 2.0105, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.019256485691361326, |
|
"grad_norm": 0.6790559887886047, |
|
"learning_rate": 3.636e-05, |
|
"loss": 2.1134, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02032629045199251, |
|
"grad_norm": 0.8534191250801086, |
|
"learning_rate": 3.838e-05, |
|
"loss": 2.6721, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.021396095212623697, |
|
"grad_norm": 0.6541608572006226, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 2.5973, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02246589997325488, |
|
"grad_norm": 0.5651280283927917, |
|
"learning_rate": 4.242e-05, |
|
"loss": 2.068, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.023535704733886064, |
|
"grad_norm": 0.6473293900489807, |
|
"learning_rate": 4.444e-05, |
|
"loss": 2.2025, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02460550949451725, |
|
"grad_norm": 0.6129287481307983, |
|
"learning_rate": 4.6460000000000006e-05, |
|
"loss": 2.2035, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.025675314255148435, |
|
"grad_norm": 0.8293673992156982, |
|
"learning_rate": 4.848e-05, |
|
"loss": 2.1585, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.026745119015779622, |
|
"grad_norm": 0.6636125445365906, |
|
"learning_rate": 5.05e-05, |
|
"loss": 1.9494, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.027814923776410806, |
|
"grad_norm": 0.7317002415657043, |
|
"learning_rate": 5.2520000000000005e-05, |
|
"loss": 2.3731, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02888472853704199, |
|
"grad_norm": 0.5912388563156128, |
|
"learning_rate": 5.454e-05, |
|
"loss": 1.9946, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.029954533297673176, |
|
"grad_norm": 0.509111225605011, |
|
"learning_rate": 5.656000000000001e-05, |
|
"loss": 1.906, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03102433805830436, |
|
"grad_norm": 0.5736451745033264, |
|
"learning_rate": 5.858e-05, |
|
"loss": 2.1449, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.032094142818935543, |
|
"grad_norm": 0.5325722098350525, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.8039, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03316394757956673, |
|
"grad_norm": 0.6035159826278687, |
|
"learning_rate": 6.262000000000001e-05, |
|
"loss": 2.0611, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03423375234019791, |
|
"grad_norm": 0.7616627812385559, |
|
"learning_rate": 6.464e-05, |
|
"loss": 2.519, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0353035571008291, |
|
"grad_norm": 0.5022464394569397, |
|
"learning_rate": 6.666e-05, |
|
"loss": 2.1251, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.036373361861460285, |
|
"grad_norm": 0.5941365957260132, |
|
"learning_rate": 6.868e-05, |
|
"loss": 2.0392, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03744316662209147, |
|
"grad_norm": 0.5984216928482056, |
|
"learning_rate": 7.07e-05, |
|
"loss": 2.0259, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03851297138272265, |
|
"grad_norm": 0.5933278799057007, |
|
"learning_rate": 7.272e-05, |
|
"loss": 2.0123, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.039582776143353836, |
|
"grad_norm": 0.5975232720375061, |
|
"learning_rate": 7.474e-05, |
|
"loss": 2.0471, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.04065258090398502, |
|
"grad_norm": 0.7600012421607971, |
|
"learning_rate": 7.676e-05, |
|
"loss": 1.9952, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04172238566461621, |
|
"grad_norm": 0.641670286655426, |
|
"learning_rate": 7.878e-05, |
|
"loss": 2.0748, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.042792190425247394, |
|
"grad_norm": 0.616679310798645, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 2.0396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04386199518587858, |
|
"grad_norm": 0.7081180214881897, |
|
"learning_rate": 8.282e-05, |
|
"loss": 1.7703, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04493179994650976, |
|
"grad_norm": 0.7007136940956116, |
|
"learning_rate": 8.484e-05, |
|
"loss": 2.2135, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.046001604707140945, |
|
"grad_norm": 0.729378879070282, |
|
"learning_rate": 8.686e-05, |
|
"loss": 1.8132, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04707140946777213, |
|
"grad_norm": 0.6952872276306152, |
|
"learning_rate": 8.888e-05, |
|
"loss": 2.2158, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04814121422840332, |
|
"grad_norm": 0.605029284954071, |
|
"learning_rate": 9.09e-05, |
|
"loss": 2.333, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0492110189890345, |
|
"grad_norm": 0.6177989840507507, |
|
"learning_rate": 9.292000000000001e-05, |
|
"loss": 1.6994, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.050280823749665686, |
|
"grad_norm": 0.5986810922622681, |
|
"learning_rate": 9.494e-05, |
|
"loss": 1.8176, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05135062851029687, |
|
"grad_norm": 0.6833954453468323, |
|
"learning_rate": 9.696e-05, |
|
"loss": 1.9501, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05242043327092805, |
|
"grad_norm": 0.8341196775436401, |
|
"learning_rate": 9.898e-05, |
|
"loss": 1.8998, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.053490238031559244, |
|
"grad_norm": 1.0498520135879517, |
|
"learning_rate": 0.000101, |
|
"loss": 1.6787, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05456004279219043, |
|
"grad_norm": 0.6734796762466431, |
|
"learning_rate": 0.00010302, |
|
"loss": 1.6638, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05562984755282161, |
|
"grad_norm": 0.6819894313812256, |
|
"learning_rate": 0.00010504000000000001, |
|
"loss": 2.0493, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.056699652313452795, |
|
"grad_norm": 0.8042811155319214, |
|
"learning_rate": 0.00010706000000000001, |
|
"loss": 1.9026, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05776945707408398, |
|
"grad_norm": 0.9070219993591309, |
|
"learning_rate": 0.00010908, |
|
"loss": 2.0546, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05883926183471516, |
|
"grad_norm": 0.8149216771125793, |
|
"learning_rate": 0.00011110000000000002, |
|
"loss": 1.5653, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05990906659534635, |
|
"grad_norm": 0.8578245043754578, |
|
"learning_rate": 0.00011312000000000001, |
|
"loss": 1.7776, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.060978871355977536, |
|
"grad_norm": 0.6369242668151855, |
|
"learning_rate": 0.00011514, |
|
"loss": 1.4798, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06204867611660872, |
|
"grad_norm": 0.8663727045059204, |
|
"learning_rate": 0.00011716, |
|
"loss": 1.8753, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06311848087723991, |
|
"grad_norm": 0.844870924949646, |
|
"learning_rate": 0.00011918, |
|
"loss": 1.7667, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06418828563787109, |
|
"grad_norm": 0.8465819954872131, |
|
"learning_rate": 0.00012119999999999999, |
|
"loss": 1.6944, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06525809039850228, |
|
"grad_norm": 0.807226300239563, |
|
"learning_rate": 0.00012322, |
|
"loss": 1.723, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06632789515913345, |
|
"grad_norm": 0.8274418711662292, |
|
"learning_rate": 0.00012524000000000001, |
|
"loss": 1.6919, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06739769991976464, |
|
"grad_norm": 0.8100050091743469, |
|
"learning_rate": 0.00012726, |
|
"loss": 1.8884, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06846750468039582, |
|
"grad_norm": 1.0959488153457642, |
|
"learning_rate": 0.00012928, |
|
"loss": 1.932, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06953730944102701, |
|
"grad_norm": 1.0040597915649414, |
|
"learning_rate": 0.00013130000000000002, |
|
"loss": 1.8512, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0706071142016582, |
|
"grad_norm": 1.096177101135254, |
|
"learning_rate": 0.00013332, |
|
"loss": 1.8671, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07167691896228938, |
|
"grad_norm": 1.0180017948150635, |
|
"learning_rate": 0.00013534000000000002, |
|
"loss": 1.8652, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07274672372292057, |
|
"grad_norm": 0.8910913467407227, |
|
"learning_rate": 0.00013736, |
|
"loss": 1.6867, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07381652848355175, |
|
"grad_norm": 0.7205930352210999, |
|
"learning_rate": 0.00013937999999999998, |
|
"loss": 1.3372, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07488633324418294, |
|
"grad_norm": 0.8050198554992676, |
|
"learning_rate": 0.0001414, |
|
"loss": 1.8053, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07595613800481413, |
|
"grad_norm": 0.9537407159805298, |
|
"learning_rate": 0.00014342, |
|
"loss": 1.9222, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0770259427654453, |
|
"grad_norm": 0.8652852773666382, |
|
"learning_rate": 0.00014544, |
|
"loss": 1.6321, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0780957475260765, |
|
"grad_norm": 0.9287482500076294, |
|
"learning_rate": 0.00014746, |
|
"loss": 1.6623, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07916555228670767, |
|
"grad_norm": 0.8908249735832214, |
|
"learning_rate": 0.00014948, |
|
"loss": 1.9778, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08023535704733886, |
|
"grad_norm": 1.09617280960083, |
|
"learning_rate": 0.0001515, |
|
"loss": 1.6459, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08130516180797004, |
|
"grad_norm": 0.7375260591506958, |
|
"learning_rate": 0.00015352, |
|
"loss": 1.5546, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08237496656860123, |
|
"grad_norm": 0.8195641040802002, |
|
"learning_rate": 0.00015554000000000002, |
|
"loss": 1.6602, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08344477132923242, |
|
"grad_norm": 0.8275863528251648, |
|
"learning_rate": 0.00015756, |
|
"loss": 1.7362, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0845145760898636, |
|
"grad_norm": 0.8596509695053101, |
|
"learning_rate": 0.00015958000000000001, |
|
"loss": 1.8343, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08558438085049479, |
|
"grad_norm": 0.8495697975158691, |
|
"learning_rate": 0.00016160000000000002, |
|
"loss": 1.5171, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08665418561112596, |
|
"grad_norm": 0.8796412944793701, |
|
"learning_rate": 0.00016362, |
|
"loss": 1.3298, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08772399037175715, |
|
"grad_norm": 0.7089576125144958, |
|
"learning_rate": 0.00016564, |
|
"loss": 1.5468, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08879379513238835, |
|
"grad_norm": 0.8609843850135803, |
|
"learning_rate": 0.00016766, |
|
"loss": 1.8827, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08986359989301952, |
|
"grad_norm": 0.8612833023071289, |
|
"learning_rate": 0.00016968, |
|
"loss": 1.7344, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09093340465365071, |
|
"grad_norm": 0.8177778124809265, |
|
"learning_rate": 0.0001717, |
|
"loss": 1.6543, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09200320941428189, |
|
"grad_norm": 0.9328483939170837, |
|
"learning_rate": 0.00017372, |
|
"loss": 1.452, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09307301417491308, |
|
"grad_norm": 1.2951349020004272, |
|
"learning_rate": 0.00017574, |
|
"loss": 1.5672, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.09414281893554426, |
|
"grad_norm": 1.0605595111846924, |
|
"learning_rate": 0.00017776, |
|
"loss": 1.7043, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09521262369617545, |
|
"grad_norm": 0.8237940073013306, |
|
"learning_rate": 0.00017978000000000002, |
|
"loss": 1.7331, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09628242845680664, |
|
"grad_norm": 0.8034561276435852, |
|
"learning_rate": 0.0001818, |
|
"loss": 1.6586, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09735223321743781, |
|
"grad_norm": 0.7969396114349365, |
|
"learning_rate": 0.00018382, |
|
"loss": 1.5385, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.098422037978069, |
|
"grad_norm": 0.792550265789032, |
|
"learning_rate": 0.00018584000000000002, |
|
"loss": 1.3909, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09949184273870018, |
|
"grad_norm": 0.8620875477790833, |
|
"learning_rate": 0.00018786, |
|
"loss": 1.8194, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10056164749933137, |
|
"grad_norm": 0.6592405438423157, |
|
"learning_rate": 0.00018988, |
|
"loss": 1.5115, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.10163145225996256, |
|
"grad_norm": 0.9358750581741333, |
|
"learning_rate": 0.0001919, |
|
"loss": 1.5659, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10270125702059374, |
|
"grad_norm": 0.7691030502319336, |
|
"learning_rate": 0.00019392, |
|
"loss": 1.6011, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10377106178122493, |
|
"grad_norm": 0.8470796942710876, |
|
"learning_rate": 0.00019594, |
|
"loss": 1.9553, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1048408665418561, |
|
"grad_norm": 0.8427095413208008, |
|
"learning_rate": 0.00019796, |
|
"loss": 1.5329, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1059106713024873, |
|
"grad_norm": 0.8238065838813782, |
|
"learning_rate": 0.00019998, |
|
"loss": 1.8338, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.10698047606311849, |
|
"grad_norm": 0.8195500373840332, |
|
"learning_rate": 0.000202, |
|
"loss": 1.4167, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10805028082374966, |
|
"grad_norm": 0.7097288966178894, |
|
"learning_rate": 0.00020199688492212377, |
|
"loss": 1.9694, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.10912008558438085, |
|
"grad_norm": 0.8986706137657166, |
|
"learning_rate": 0.00020198753988064772, |
|
"loss": 1.7537, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11018989034501203, |
|
"grad_norm": 0.8444643616676331, |
|
"learning_rate": 0.00020197196545201806, |
|
"loss": 1.8293, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.11125969510564322, |
|
"grad_norm": 0.961504340171814, |
|
"learning_rate": 0.0002019501625969389, |
|
"loss": 1.7586, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1123294998662744, |
|
"grad_norm": 1.3284465074539185, |
|
"learning_rate": 0.00020192213266031304, |
|
"loss": 1.5878, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11339930462690559, |
|
"grad_norm": 0.8253253102302551, |
|
"learning_rate": 0.00020188787737115897, |
|
"loss": 1.3733, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.11446910938753678, |
|
"grad_norm": 0.7246334552764893, |
|
"learning_rate": 0.00020184739884250436, |
|
"loss": 1.9136, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.11553891414816796, |
|
"grad_norm": 0.7832014560699463, |
|
"learning_rate": 0.00020180069957125544, |
|
"loss": 1.5472, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.11660871890879915, |
|
"grad_norm": 1.0413076877593994, |
|
"learning_rate": 0.0002017477824380433, |
|
"loss": 1.5569, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.11767852366943032, |
|
"grad_norm": 0.8862333297729492, |
|
"learning_rate": 0.00020168865070704594, |
|
"loss": 1.4746, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11874832843006151, |
|
"grad_norm": 1.0966826677322388, |
|
"learning_rate": 0.00020162330802578706, |
|
"loss": 1.6068, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1198181331906927, |
|
"grad_norm": 1.2533254623413086, |
|
"learning_rate": 0.00020155175842491107, |
|
"loss": 1.2275, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12088793795132388, |
|
"grad_norm": 0.8213242888450623, |
|
"learning_rate": 0.0002014740063179344, |
|
"loss": 1.665, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.12195774271195507, |
|
"grad_norm": 0.907412052154541, |
|
"learning_rate": 0.00020139005650097317, |
|
"loss": 1.5866, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.12302754747258625, |
|
"grad_norm": 1.1394984722137451, |
|
"learning_rate": 0.00020129991415244756, |
|
"loss": 1.7026, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.12409735223321744, |
|
"grad_norm": 0.8012979626655579, |
|
"learning_rate": 0.00020120358483276227, |
|
"loss": 1.4903, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.12516715699384862, |
|
"grad_norm": 0.7759647369384766, |
|
"learning_rate": 0.00020110107448396346, |
|
"loss": 1.6247, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.12623696175447982, |
|
"grad_norm": 0.8149462342262268, |
|
"learning_rate": 0.0002009923894293723, |
|
"loss": 1.7867, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.127306766515111, |
|
"grad_norm": 0.6406301259994507, |
|
"learning_rate": 0.00020087753637319499, |
|
"loss": 1.8543, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.12837657127574217, |
|
"grad_norm": 0.6905284523963928, |
|
"learning_rate": 0.00020075652240010892, |
|
"loss": 1.7696, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12944637603637335, |
|
"grad_norm": 0.7446596622467041, |
|
"learning_rate": 0.00020062935497482606, |
|
"loss": 1.6399, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.13051618079700456, |
|
"grad_norm": 0.6539953351020813, |
|
"learning_rate": 0.00020049604194163217, |
|
"loss": 1.3393, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13158598555763573, |
|
"grad_norm": 0.9102329611778259, |
|
"learning_rate": 0.00020035659152390313, |
|
"loss": 1.4309, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1326557903182669, |
|
"grad_norm": 0.9899008870124817, |
|
"learning_rate": 0.00020021101232359757, |
|
"loss": 1.4651, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1337255950788981, |
|
"grad_norm": 0.9645147323608398, |
|
"learning_rate": 0.0002000593133207263, |
|
"loss": 1.3662, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1347953998395293, |
|
"grad_norm": 0.5051845908164978, |
|
"learning_rate": 0.00019990150387279835, |
|
"loss": 1.7894, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.13586520460016047, |
|
"grad_norm": 0.9146197438240051, |
|
"learning_rate": 0.00019973759371424388, |
|
"loss": 1.3153, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.13693500936079164, |
|
"grad_norm": 0.9138407707214355, |
|
"learning_rate": 0.0001995675929558135, |
|
"loss": 1.6197, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.13800481412142285, |
|
"grad_norm": 0.8355864882469177, |
|
"learning_rate": 0.0001993915120839548, |
|
"loss": 1.5158, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.13907461888205402, |
|
"grad_norm": 0.7430102825164795, |
|
"learning_rate": 0.00019920936196016534, |
|
"loss": 1.4604, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1401444236426852, |
|
"grad_norm": 0.8076910376548767, |
|
"learning_rate": 0.0001990211538203228, |
|
"loss": 1.2259, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1412142284033164, |
|
"grad_norm": 0.5874120593070984, |
|
"learning_rate": 0.00019882689927399174, |
|
"loss": 1.3672, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.14228403316394758, |
|
"grad_norm": 0.9105700254440308, |
|
"learning_rate": 0.00019862661030370764, |
|
"loss": 1.7311, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.14335383792457876, |
|
"grad_norm": 0.8008758425712585, |
|
"learning_rate": 0.00019842029926423762, |
|
"loss": 1.6541, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.14442364268520994, |
|
"grad_norm": 0.8042259216308594, |
|
"learning_rate": 0.00019820797888181837, |
|
"loss": 1.4167, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14549344744584114, |
|
"grad_norm": 0.7910740971565247, |
|
"learning_rate": 0.00019798966225337126, |
|
"loss": 1.7123, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.14656325220647232, |
|
"grad_norm": 1.0059454441070557, |
|
"learning_rate": 0.00019776536284569425, |
|
"loss": 1.2412, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1476330569671035, |
|
"grad_norm": 0.5858060717582703, |
|
"learning_rate": 0.00019753509449463134, |
|
"loss": 1.5322, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1487028617277347, |
|
"grad_norm": 0.7342561483383179, |
|
"learning_rate": 0.00019729887140421912, |
|
"loss": 1.6668, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.14977266648836587, |
|
"grad_norm": 0.8164688348770142, |
|
"learning_rate": 0.00019705670814581052, |
|
"loss": 1.3033, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15084247124899705, |
|
"grad_norm": 1.2722523212432861, |
|
"learning_rate": 0.00019680861965717597, |
|
"loss": 1.6553, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.15191227600962826, |
|
"grad_norm": 0.8512006402015686, |
|
"learning_rate": 0.0001965546212415821, |
|
"loss": 1.5321, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.15298208077025943, |
|
"grad_norm": 0.9400825500488281, |
|
"learning_rate": 0.00019629472856684755, |
|
"loss": 1.2994, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1540518855308906, |
|
"grad_norm": 0.6759757399559021, |
|
"learning_rate": 0.00019602895766437678, |
|
"loss": 1.6195, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.15512169029152179, |
|
"grad_norm": 0.5927717089653015, |
|
"learning_rate": 0.00019575732492817092, |
|
"loss": 1.4048, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.156191495052153, |
|
"grad_norm": 0.7187090516090393, |
|
"learning_rate": 0.00019547984711381662, |
|
"loss": 1.6072, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.15726129981278417, |
|
"grad_norm": 0.7754374146461487, |
|
"learning_rate": 0.0001951965413374525, |
|
"loss": 1.407, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.15833110457341534, |
|
"grad_norm": 1.0608164072036743, |
|
"learning_rate": 0.00019490742507471338, |
|
"loss": 1.5289, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.15940090933404655, |
|
"grad_norm": 1.0499017238616943, |
|
"learning_rate": 0.0001946125161596522, |
|
"loss": 1.3187, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.16047071409467772, |
|
"grad_norm": 0.9605572819709778, |
|
"learning_rate": 0.00019431183278363997, |
|
"loss": 1.203, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1615405188553089, |
|
"grad_norm": 0.6870492696762085, |
|
"learning_rate": 0.00019400539349424367, |
|
"loss": 1.9006, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.16261032361594008, |
|
"grad_norm": 0.8582913875579834, |
|
"learning_rate": 0.0001936932171940821, |
|
"loss": 1.828, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.16368012837657128, |
|
"grad_norm": 0.8130080699920654, |
|
"learning_rate": 0.00019337532313966, |
|
"loss": 1.5142, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.16474993313720246, |
|
"grad_norm": 1.1094551086425781, |
|
"learning_rate": 0.00019305173094017996, |
|
"loss": 1.3347, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.16581973789783364, |
|
"grad_norm": 0.7829759120941162, |
|
"learning_rate": 0.0001927224605563332, |
|
"loss": 1.2676, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.16688954265846484, |
|
"grad_norm": 0.8161009550094604, |
|
"learning_rate": 0.00019238753229906797, |
|
"loss": 1.2813, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.16795934741909602, |
|
"grad_norm": 0.794654369354248, |
|
"learning_rate": 0.00019204696682833682, |
|
"loss": 1.9873, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1690291521797272, |
|
"grad_norm": 0.7361302375793457, |
|
"learning_rate": 0.00019170078515182216, |
|
"loss": 1.6699, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1700989569403584, |
|
"grad_norm": 0.7557123899459839, |
|
"learning_rate": 0.00019134900862364054, |
|
"loss": 1.4325, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.17116876170098957, |
|
"grad_norm": 0.863318681716919, |
|
"learning_rate": 0.00019099165894302515, |
|
"loss": 1.5635, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17223856646162075, |
|
"grad_norm": 0.8100736141204834, |
|
"learning_rate": 0.00019062875815298763, |
|
"loss": 1.3452, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.17330837122225193, |
|
"grad_norm": 0.8432340025901794, |
|
"learning_rate": 0.00019026032863895805, |
|
"loss": 1.3103, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.17437817598288313, |
|
"grad_norm": 0.8291115164756775, |
|
"learning_rate": 0.00018988639312740433, |
|
"loss": 1.3356, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1754479807435143, |
|
"grad_norm": 0.6323521733283997, |
|
"learning_rate": 0.0001895069746844302, |
|
"loss": 1.2827, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.17651778550414549, |
|
"grad_norm": 0.6580619812011719, |
|
"learning_rate": 0.00018912209671435252, |
|
"loss": 1.483, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1775875902647767, |
|
"grad_norm": 0.927400529384613, |
|
"learning_rate": 0.00018873178295825732, |
|
"loss": 1.214, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.17865739502540787, |
|
"grad_norm": 1.0609151124954224, |
|
"learning_rate": 0.00018833605749253566, |
|
"loss": 1.5702, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.17972719978603904, |
|
"grad_norm": 0.9172216653823853, |
|
"learning_rate": 0.00018793494472739831, |
|
"loss": 1.4003, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.18079700454667022, |
|
"grad_norm": 0.793006956577301, |
|
"learning_rate": 0.00018752846940537003, |
|
"loss": 1.5463, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.18186680930730142, |
|
"grad_norm": 0.608418345451355, |
|
"learning_rate": 0.0001871166565997633, |
|
"loss": 1.4574, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1829366140679326, |
|
"grad_norm": 1.0822337865829468, |
|
"learning_rate": 0.00018669953171313188, |
|
"loss": 1.5066, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.18400641882856378, |
|
"grad_norm": 0.8970229625701904, |
|
"learning_rate": 0.00018627712047570352, |
|
"loss": 1.5338, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.18507622358919498, |
|
"grad_norm": 0.9221557378768921, |
|
"learning_rate": 0.0001858494489437931, |
|
"loss": 1.2652, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.18614602834982616, |
|
"grad_norm": 0.8210504651069641, |
|
"learning_rate": 0.0001854165434981953, |
|
"loss": 1.6507, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.18721583311045734, |
|
"grad_norm": 0.9927299618721008, |
|
"learning_rate": 0.00018497843084255708, |
|
"loss": 1.1338, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1882856378710885, |
|
"grad_norm": 0.640484631061554, |
|
"learning_rate": 0.00018453513800173072, |
|
"loss": 1.2064, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.18935544263171972, |
|
"grad_norm": 1.0150686502456665, |
|
"learning_rate": 0.00018408669232010684, |
|
"loss": 1.4428, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1904252473923509, |
|
"grad_norm": 1.0852081775665283, |
|
"learning_rate": 0.00018363312145992737, |
|
"loss": 1.612, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.19149505215298207, |
|
"grad_norm": 0.884283185005188, |
|
"learning_rate": 0.0001831744533995795, |
|
"loss": 1.4523, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.19256485691361327, |
|
"grad_norm": 0.7068943977355957, |
|
"learning_rate": 0.00018271071643186968, |
|
"loss": 0.9464, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19363466167424445, |
|
"grad_norm": 0.9215288162231445, |
|
"learning_rate": 0.00018224193916227852, |
|
"loss": 1.255, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.19470446643487563, |
|
"grad_norm": 0.6173170208930969, |
|
"learning_rate": 0.00018176815050719615, |
|
"loss": 1.556, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.19577427119550683, |
|
"grad_norm": 0.7621954083442688, |
|
"learning_rate": 0.00018128937969213852, |
|
"loss": 1.7331, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.196844075956138, |
|
"grad_norm": 0.8637974858283997, |
|
"learning_rate": 0.00018080565624994474, |
|
"loss": 1.2933, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.19791388071676919, |
|
"grad_norm": 0.9591987729072571, |
|
"learning_rate": 0.00018031701001895524, |
|
"loss": 1.4638, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.19898368547740036, |
|
"grad_norm": 0.7968719601631165, |
|
"learning_rate": 0.0001798234711411713, |
|
"loss": 1.2952, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.20005349023803157, |
|
"grad_norm": 1.2682313919067383, |
|
"learning_rate": 0.00017932507006039567, |
|
"loss": 1.3475, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.20112329499866274, |
|
"grad_norm": 0.5984092354774475, |
|
"learning_rate": 0.0001788218375203547, |
|
"loss": 1.4973, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.20219309975929392, |
|
"grad_norm": 0.7179667949676514, |
|
"learning_rate": 0.00017831380456280192, |
|
"loss": 1.4728, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.20326290451992512, |
|
"grad_norm": 1.1618380546569824, |
|
"learning_rate": 0.00017780100252560313, |
|
"loss": 1.6948, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2043327092805563, |
|
"grad_norm": 0.7051059603691101, |
|
"learning_rate": 0.00017728346304080357, |
|
"loss": 1.1708, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.20540251404118748, |
|
"grad_norm": 0.8025861382484436, |
|
"learning_rate": 0.0001767612180326764, |
|
"loss": 1.2174, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.20647231880181866, |
|
"grad_norm": 0.88255774974823, |
|
"learning_rate": 0.00017623429971575384, |
|
"loss": 1.4061, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.20754212356244986, |
|
"grad_norm": 0.7437373995780945, |
|
"learning_rate": 0.0001757027405928396, |
|
"loss": 1.1144, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.20861192832308104, |
|
"grad_norm": 1.0885889530181885, |
|
"learning_rate": 0.00017516657345300425, |
|
"loss": 1.6319, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2096817330837122, |
|
"grad_norm": 0.7450828552246094, |
|
"learning_rate": 0.00017462583136956258, |
|
"loss": 1.2593, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.21075153784434342, |
|
"grad_norm": 0.7687603831291199, |
|
"learning_rate": 0.00017408054769803337, |
|
"loss": 1.4812, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2118213426049746, |
|
"grad_norm": 0.9665757417678833, |
|
"learning_rate": 0.00017353075607408209, |
|
"loss": 1.2375, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.21289114736560577, |
|
"grad_norm": 0.9301120042800903, |
|
"learning_rate": 0.00017297649041144575, |
|
"loss": 1.4734, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.21396095212623698, |
|
"grad_norm": 0.9758381843566895, |
|
"learning_rate": 0.0001724177848998413, |
|
"loss": 1.1196, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21503075688686815, |
|
"grad_norm": 0.6634808778762817, |
|
"learning_rate": 0.00017185467400285644, |
|
"loss": 1.3473, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.21610056164749933, |
|
"grad_norm": 1.0161389112472534, |
|
"learning_rate": 0.00017128719245582374, |
|
"loss": 1.5335, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2171703664081305, |
|
"grad_norm": 1.047826886177063, |
|
"learning_rate": 0.00017071537526367817, |
|
"loss": 1.4653, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.2182401711687617, |
|
"grad_norm": 0.8670541644096375, |
|
"learning_rate": 0.00017013925769879755, |
|
"loss": 1.1689, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.2193099759293929, |
|
"grad_norm": 1.1081433296203613, |
|
"learning_rate": 0.00016955887529882714, |
|
"loss": 1.3757, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.22037978069002406, |
|
"grad_norm": 1.3014432191848755, |
|
"learning_rate": 0.0001689742638644871, |
|
"loss": 1.1107, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.22144958545065527, |
|
"grad_norm": 0.779435396194458, |
|
"learning_rate": 0.00016838545945736458, |
|
"loss": 1.4839, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.22251939021128644, |
|
"grad_norm": 1.2251975536346436, |
|
"learning_rate": 0.00016779249839768884, |
|
"loss": 1.6717, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.22358919497191762, |
|
"grad_norm": 1.197913646697998, |
|
"learning_rate": 0.00016719541726209117, |
|
"loss": 1.4974, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2246589997325488, |
|
"grad_norm": 1.0760390758514404, |
|
"learning_rate": 0.00016659425288134854, |
|
"loss": 1.3019, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22572880449318, |
|
"grad_norm": 1.208444356918335, |
|
"learning_rate": 0.00016598904233811168, |
|
"loss": 1.1918, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.22679860925381118, |
|
"grad_norm": 1.3097633123397827, |
|
"learning_rate": 0.00016537982296461768, |
|
"loss": 1.1058, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.22786841401444236, |
|
"grad_norm": 0.8760459423065186, |
|
"learning_rate": 0.00016476663234038717, |
|
"loss": 1.6432, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.22893821877507356, |
|
"grad_norm": 0.7652115225791931, |
|
"learning_rate": 0.00016414950828990625, |
|
"loss": 1.7073, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.23000802353570474, |
|
"grad_norm": 0.8084537386894226, |
|
"learning_rate": 0.00016352848888029326, |
|
"loss": 1.3418, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2310778282963359, |
|
"grad_norm": 1.2894420623779297, |
|
"learning_rate": 0.00016290361241895064, |
|
"loss": 1.4992, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2321476330569671, |
|
"grad_norm": 0.9421677589416504, |
|
"learning_rate": 0.00016227491745120196, |
|
"loss": 1.3181, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.2332174378175983, |
|
"grad_norm": 1.0605345964431763, |
|
"learning_rate": 0.0001616424427579143, |
|
"loss": 1.4584, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.23428724257822947, |
|
"grad_norm": 0.8410621285438538, |
|
"learning_rate": 0.0001610062273531059, |
|
"loss": 1.5476, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.23535704733886065, |
|
"grad_norm": 0.6994707584381104, |
|
"learning_rate": 0.00016036631048153979, |
|
"loss": 1.176, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23642685209949185, |
|
"grad_norm": 1.1372922658920288, |
|
"learning_rate": 0.0001597227316163029, |
|
"loss": 1.3494, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.23749665686012303, |
|
"grad_norm": 1.1717870235443115, |
|
"learning_rate": 0.00015907553045637116, |
|
"loss": 1.313, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2385664616207542, |
|
"grad_norm": 1.0904631614685059, |
|
"learning_rate": 0.00015842474692416068, |
|
"loss": 1.3035, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.2396362663813854, |
|
"grad_norm": 1.2501357793807983, |
|
"learning_rate": 0.0001577704211630652, |
|
"loss": 1.2295, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2407060711420166, |
|
"grad_norm": 1.003653645515442, |
|
"learning_rate": 0.00015711259353497981, |
|
"loss": 1.0317, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24177587590264776, |
|
"grad_norm": 0.6239796876907349, |
|
"learning_rate": 0.0001564513046178113, |
|
"loss": 1.7061, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.24284568066327894, |
|
"grad_norm": 0.8568412065505981, |
|
"learning_rate": 0.000155786595202975, |
|
"loss": 1.37, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.24391548542391014, |
|
"grad_norm": 0.9232256412506104, |
|
"learning_rate": 0.00015511850629287865, |
|
"loss": 1.1996, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.24498529018454132, |
|
"grad_norm": 1.1868820190429688, |
|
"learning_rate": 0.00015444707909839325, |
|
"loss": 1.5739, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2460550949451725, |
|
"grad_norm": 1.0000498294830322, |
|
"learning_rate": 0.00015377235503631083, |
|
"loss": 1.1122, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2471248997058037, |
|
"grad_norm": 1.3450075387954712, |
|
"learning_rate": 0.0001530943757267898, |
|
"loss": 1.1177, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.24819470446643488, |
|
"grad_norm": 0.6483561992645264, |
|
"learning_rate": 0.00015241318299078751, |
|
"loss": 1.5464, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.24926450922706606, |
|
"grad_norm": 0.8778035044670105, |
|
"learning_rate": 0.00015172881884748063, |
|
"loss": 1.3679, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.25033431398769723, |
|
"grad_norm": 1.1430999040603638, |
|
"learning_rate": 0.00015104132551167318, |
|
"loss": 1.7691, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.2514041187483284, |
|
"grad_norm": 1.1102079153060913, |
|
"learning_rate": 0.00015035074539119248, |
|
"loss": 1.4866, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.25247392350895964, |
|
"grad_norm": 1.0472543239593506, |
|
"learning_rate": 0.00014965712108427323, |
|
"loss": 1.1737, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.2535437282695908, |
|
"grad_norm": 1.1687076091766357, |
|
"learning_rate": 0.00014896049537693005, |
|
"loss": 1.1114, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.254613533030222, |
|
"grad_norm": 0.5792782306671143, |
|
"learning_rate": 0.00014826091124031792, |
|
"loss": 1.1166, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.25568333779085317, |
|
"grad_norm": 0.8307198286056519, |
|
"learning_rate": 0.0001475584118280817, |
|
"loss": 1.4741, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.25675314255148435, |
|
"grad_norm": 0.6716192960739136, |
|
"learning_rate": 0.00014685304047369423, |
|
"loss": 1.2407, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2578229473121155, |
|
"grad_norm": 1.034075140953064, |
|
"learning_rate": 0.00014614484068778324, |
|
"loss": 1.4235, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.2588927520727467, |
|
"grad_norm": 0.9473350644111633, |
|
"learning_rate": 0.00014543385615544744, |
|
"loss": 1.4101, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.25996255683337793, |
|
"grad_norm": 1.1106480360031128, |
|
"learning_rate": 0.00014472013073356184, |
|
"loss": 0.9895, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2610323615940091, |
|
"grad_norm": 0.8599668145179749, |
|
"learning_rate": 0.00014400370844807234, |
|
"loss": 1.2244, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.2621021663546403, |
|
"grad_norm": 0.7908911108970642, |
|
"learning_rate": 0.00014328463349128025, |
|
"loss": 1.5923, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.26317197111527146, |
|
"grad_norm": 0.8245794773101807, |
|
"learning_rate": 0.000142562950219116, |
|
"loss": 1.4023, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.26424177587590264, |
|
"grad_norm": 0.9234296679496765, |
|
"learning_rate": 0.00014183870314840325, |
|
"loss": 1.3907, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.2653115806365338, |
|
"grad_norm": 1.1608610153198242, |
|
"learning_rate": 0.00014111193695411285, |
|
"loss": 1.4156, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.266381385397165, |
|
"grad_norm": 1.186960220336914, |
|
"learning_rate": 0.00014038269646660703, |
|
"loss": 0.9267, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2674511901577962, |
|
"grad_norm": 1.0800729990005493, |
|
"learning_rate": 0.00013965102666887408, |
|
"loss": 1.1525, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2685209949184274, |
|
"grad_norm": 0.5610882639884949, |
|
"learning_rate": 0.0001389169726937536, |
|
"loss": 1.4339, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2695907996790586, |
|
"grad_norm": 0.9096266627311707, |
|
"learning_rate": 0.0001381805798211525, |
|
"loss": 1.4273, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.27066060443968976, |
|
"grad_norm": 0.930641770362854, |
|
"learning_rate": 0.00013744189347525182, |
|
"loss": 1.0906, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.27173040920032093, |
|
"grad_norm": 1.0747754573822021, |
|
"learning_rate": 0.00013670095922170498, |
|
"loss": 1.3499, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2728002139609521, |
|
"grad_norm": 1.0504320859909058, |
|
"learning_rate": 0.00013595782276482678, |
|
"loss": 0.9918, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2738700187215833, |
|
"grad_norm": 1.225920557975769, |
|
"learning_rate": 0.00013521252994477446, |
|
"loss": 1.2121, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2749398234822145, |
|
"grad_norm": 0.6419029235839844, |
|
"learning_rate": 0.00013446512673471965, |
|
"loss": 1.4319, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2760096282428457, |
|
"grad_norm": 0.9120138883590698, |
|
"learning_rate": 0.0001337156592380131, |
|
"loss": 1.7584, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.27707943300347687, |
|
"grad_norm": 0.8686931133270264, |
|
"learning_rate": 0.0001329641736853402, |
|
"loss": 1.1114, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.27814923776410805, |
|
"grad_norm": 1.1766819953918457, |
|
"learning_rate": 0.0001322107164318697, |
|
"loss": 1.322, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2792190425247392, |
|
"grad_norm": 1.1522575616836548, |
|
"learning_rate": 0.00013145533395439405, |
|
"loss": 1.6013, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2802888472853704, |
|
"grad_norm": 1.0668003559112549, |
|
"learning_rate": 0.0001306980728484627, |
|
"loss": 0.9711, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2813586520460016, |
|
"grad_norm": 0.7991006374359131, |
|
"learning_rate": 0.00012993897982550764, |
|
"loss": 1.5706, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2824284568066328, |
|
"grad_norm": 0.7399187684059143, |
|
"learning_rate": 0.00012917810170996218, |
|
"loss": 1.4344, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.283498261567264, |
|
"grad_norm": 0.8639386296272278, |
|
"learning_rate": 0.0001284154854363725, |
|
"loss": 1.2841, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.28456806632789516, |
|
"grad_norm": 1.039579153060913, |
|
"learning_rate": 0.00012765117804650267, |
|
"loss": 1.2761, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.28563787108852634, |
|
"grad_norm": 1.2643564939498901, |
|
"learning_rate": 0.00012688522668643268, |
|
"loss": 0.9961, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2867076758491575, |
|
"grad_norm": 1.101484775543213, |
|
"learning_rate": 0.00012611767860365038, |
|
"loss": 1.0579, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2877774806097887, |
|
"grad_norm": 0.8096588253974915, |
|
"learning_rate": 0.00012534858114413692, |
|
"loss": 1.4865, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.28884728537041987, |
|
"grad_norm": 0.9671081900596619, |
|
"learning_rate": 0.00012457798174944645, |
|
"loss": 1.712, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2899170901310511, |
|
"grad_norm": 0.8346510529518127, |
|
"learning_rate": 0.0001238059279537795, |
|
"loss": 1.4498, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2909868948916823, |
|
"grad_norm": 0.8946216702461243, |
|
"learning_rate": 0.00012303246738105082, |
|
"loss": 1.1354, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.29205669965231346, |
|
"grad_norm": 1.1908957958221436, |
|
"learning_rate": 0.00012225764774195186, |
|
"loss": 1.2882, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.29312650441294463, |
|
"grad_norm": 1.5412572622299194, |
|
"learning_rate": 0.00012148151683100776, |
|
"loss": 1.3073, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.2941963091735758, |
|
"grad_norm": 1.269574522972107, |
|
"learning_rate": 0.00012070412252362897, |
|
"loss": 0.7722, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.295266113934207, |
|
"grad_norm": 0.6998116970062256, |
|
"learning_rate": 0.0001199255127731582, |
|
"loss": 1.6273, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2963359186948382, |
|
"grad_norm": 1.2294608354568481, |
|
"learning_rate": 0.00011914573560791246, |
|
"loss": 1.3577, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.2974057234554694, |
|
"grad_norm": 1.464188814163208, |
|
"learning_rate": 0.00011836483912822035, |
|
"loss": 1.5042, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.29847552821610057, |
|
"grad_norm": 0.8934053778648376, |
|
"learning_rate": 0.00011758287150345516, |
|
"loss": 1.2751, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.29954533297673175, |
|
"grad_norm": 1.2757556438446045, |
|
"learning_rate": 0.00011679988096906333, |
|
"loss": 1.0978, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3006151377373629, |
|
"grad_norm": 1.4734572172164917, |
|
"learning_rate": 0.00011601591582358924, |
|
"loss": 0.9759, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.3016849424979941, |
|
"grad_norm": 0.5812798738479614, |
|
"learning_rate": 0.00011523102442569585, |
|
"loss": 1.8345, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3027547472586253, |
|
"grad_norm": 0.6622848510742188, |
|
"learning_rate": 0.00011444525519118179, |
|
"loss": 1.3788, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.3038245520192565, |
|
"grad_norm": 0.8406071066856384, |
|
"learning_rate": 0.00011365865658999474, |
|
"loss": 1.2666, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3048943567798877, |
|
"grad_norm": 0.8677452802658081, |
|
"learning_rate": 0.00011287127714324162, |
|
"loss": 1.2111, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.30596416154051886, |
|
"grad_norm": 1.3912626504898071, |
|
"learning_rate": 0.00011208316542019556, |
|
"loss": 1.0299, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.30703396630115004, |
|
"grad_norm": 1.1326332092285156, |
|
"learning_rate": 0.00011129437003530006, |
|
"loss": 0.8733, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3081037710617812, |
|
"grad_norm": 0.8693293333053589, |
|
"learning_rate": 0.00011050493964516997, |
|
"loss": 1.2772, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3091735758224124, |
|
"grad_norm": 0.9244024753570557, |
|
"learning_rate": 0.00010971492294559029, |
|
"loss": 1.6244, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.31024338058304357, |
|
"grad_norm": 0.99155592918396, |
|
"learning_rate": 0.00010892436866851235, |
|
"loss": 1.3652, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3113131853436748, |
|
"grad_norm": 1.061529517173767, |
|
"learning_rate": 0.00010813332557904784, |
|
"loss": 1.2438, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.312382990104306, |
|
"grad_norm": 0.8850228190422058, |
|
"learning_rate": 0.00010734184247246066, |
|
"loss": 1.1902, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.31345279486493716, |
|
"grad_norm": 1.0577521324157715, |
|
"learning_rate": 0.00010654996817115704, |
|
"loss": 0.8845, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.31452259962556833, |
|
"grad_norm": 0.9758553504943848, |
|
"learning_rate": 0.00010575775152167391, |
|
"loss": 1.4599, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.3155924043861995, |
|
"grad_norm": 0.6535763740539551, |
|
"learning_rate": 0.00010496524139166594, |
|
"loss": 1.7197, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.3166622091468307, |
|
"grad_norm": 0.8583334684371948, |
|
"learning_rate": 0.00010417248666689095, |
|
"loss": 1.0697, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.31773201390746186, |
|
"grad_norm": 0.9801604747772217, |
|
"learning_rate": 0.00010337953624819464, |
|
"loss": 1.3483, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.3188018186680931, |
|
"grad_norm": 0.8529968857765198, |
|
"learning_rate": 0.0001025864390484939, |
|
"loss": 0.9852, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.31987162342872427, |
|
"grad_norm": 1.1832444667816162, |
|
"learning_rate": 0.00010179324398975984, |
|
"loss": 0.9118, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.32094142818935545, |
|
"grad_norm": 1.5987980365753174, |
|
"learning_rate": 0.000101, |
|
"loss": 0.8357, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3220112329499866, |
|
"grad_norm": 0.8217248916625977, |
|
"learning_rate": 0.00010020675601024019, |
|
"loss": 1.4393, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.3230810377106178, |
|
"grad_norm": 0.8473594188690186, |
|
"learning_rate": 9.941356095150613e-05, |
|
"loss": 1.4425, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.324150842471249, |
|
"grad_norm": 1.3870422840118408, |
|
"learning_rate": 9.862046375180539e-05, |
|
"loss": 1.6543, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.32522064723188016, |
|
"grad_norm": 1.0462790727615356, |
|
"learning_rate": 9.782751333310905e-05, |
|
"loss": 1.1367, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3262904519925114, |
|
"grad_norm": 0.9303992986679077, |
|
"learning_rate": 9.70347586083341e-05, |
|
"loss": 1.2189, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.32736025675314256, |
|
"grad_norm": 1.2022167444229126, |
|
"learning_rate": 9.62422484783261e-05, |
|
"loss": 0.9073, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.32843006151377374, |
|
"grad_norm": 0.7831782102584839, |
|
"learning_rate": 9.5450031828843e-05, |
|
"loss": 1.3442, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.3294998662744049, |
|
"grad_norm": 1.0228748321533203, |
|
"learning_rate": 9.465815752753935e-05, |
|
"loss": 1.3841, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.3305696710350361, |
|
"grad_norm": 1.2510477304458618, |
|
"learning_rate": 9.386667442095219e-05, |
|
"loss": 1.387, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.33163947579566727, |
|
"grad_norm": 1.1324783563613892, |
|
"learning_rate": 9.307563133148767e-05, |
|
"loss": 1.187, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33270928055629845, |
|
"grad_norm": 1.0329921245574951, |
|
"learning_rate": 9.228507705440976e-05, |
|
"loss": 1.0911, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.3337790853169297, |
|
"grad_norm": 1.0315637588500977, |
|
"learning_rate": 9.149506035483005e-05, |
|
"loss": 0.8645, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.33484889007756086, |
|
"grad_norm": 0.8189682364463806, |
|
"learning_rate": 9.070562996469997e-05, |
|
"loss": 1.6589, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.33591869483819203, |
|
"grad_norm": 0.7126203775405884, |
|
"learning_rate": 8.991683457980443e-05, |
|
"loss": 1.2723, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.3369884995988232, |
|
"grad_norm": 1.3155546188354492, |
|
"learning_rate": 8.912872285675841e-05, |
|
"loss": 1.5234, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3380583043594544, |
|
"grad_norm": 1.2776226997375488, |
|
"learning_rate": 8.834134341000527e-05, |
|
"loss": 1.3699, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.33912810912008556, |
|
"grad_norm": 0.9939321279525757, |
|
"learning_rate": 8.755474480881823e-05, |
|
"loss": 1.0941, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.3401979138807168, |
|
"grad_norm": 1.078808307647705, |
|
"learning_rate": 8.676897557430415e-05, |
|
"loss": 0.9849, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.341267718641348, |
|
"grad_norm": 0.692589282989502, |
|
"learning_rate": 8.598408417641078e-05, |
|
"loss": 0.9682, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.34233752340197915, |
|
"grad_norm": 0.69404137134552, |
|
"learning_rate": 8.520011903093666e-05, |
|
"loss": 1.4386, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3434073281626103, |
|
"grad_norm": 1.0537830591201782, |
|
"learning_rate": 8.441712849654485e-05, |
|
"loss": 1.3422, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.3444771329232415, |
|
"grad_norm": 1.2558127641677856, |
|
"learning_rate": 8.363516087177962e-05, |
|
"loss": 1.1179, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.3455469376838727, |
|
"grad_norm": 0.9815801382064819, |
|
"learning_rate": 8.285426439208755e-05, |
|
"loss": 1.159, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.34661674244450386, |
|
"grad_norm": 1.1159148216247559, |
|
"learning_rate": 8.20744872268418e-05, |
|
"loss": 1.0194, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.3476865472051351, |
|
"grad_norm": 0.904017448425293, |
|
"learning_rate": 8.129587747637105e-05, |
|
"loss": 0.8047, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.34875635196576626, |
|
"grad_norm": 0.6387720108032227, |
|
"learning_rate": 8.051848316899227e-05, |
|
"loss": 1.5516, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.34982615672639744, |
|
"grad_norm": 0.9615854024887085, |
|
"learning_rate": 7.974235225804814e-05, |
|
"loss": 1.4077, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3508959614870286, |
|
"grad_norm": 0.9426089525222778, |
|
"learning_rate": 7.896753261894923e-05, |
|
"loss": 1.4134, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.3519657662476598, |
|
"grad_norm": 1.0092761516571045, |
|
"learning_rate": 7.819407204622054e-05, |
|
"loss": 1.1219, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.35303557100829097, |
|
"grad_norm": 0.9588394165039062, |
|
"learning_rate": 7.74220182505536e-05, |
|
"loss": 0.9877, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35410537576892215, |
|
"grad_norm": 1.3635826110839844, |
|
"learning_rate": 7.665141885586312e-05, |
|
"loss": 0.8919, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3551751805295534, |
|
"grad_norm": 0.543036699295044, |
|
"learning_rate": 7.588232139634968e-05, |
|
"loss": 1.2026, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.35624498529018456, |
|
"grad_norm": 1.009917974472046, |
|
"learning_rate": 7.511477331356733e-05, |
|
"loss": 1.4086, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.35731479005081573, |
|
"grad_norm": 0.9390615224838257, |
|
"learning_rate": 7.434882195349736e-05, |
|
"loss": 1.2364, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3583845948114469, |
|
"grad_norm": 1.1638097763061523, |
|
"learning_rate": 7.358451456362751e-05, |
|
"loss": 1.4135, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3594543995720781, |
|
"grad_norm": 1.2236016988754272, |
|
"learning_rate": 7.282189829003785e-05, |
|
"loss": 1.1124, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.36052420433270926, |
|
"grad_norm": 1.398525357246399, |
|
"learning_rate": 7.206102017449237e-05, |
|
"loss": 0.8598, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.36159400909334044, |
|
"grad_norm": 1.0281710624694824, |
|
"learning_rate": 7.130192715153731e-05, |
|
"loss": 1.48, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3626638138539717, |
|
"grad_norm": 0.7850888967514038, |
|
"learning_rate": 7.054466604560595e-05, |
|
"loss": 1.1451, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.36373361861460285, |
|
"grad_norm": 1.0542351007461548, |
|
"learning_rate": 6.978928356813031e-05, |
|
"loss": 1.2867, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.364803423375234, |
|
"grad_norm": 1.0121204853057861, |
|
"learning_rate": 6.90358263146598e-05, |
|
"loss": 0.8742, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3658732281358652, |
|
"grad_norm": 1.2728453874588013, |
|
"learning_rate": 6.828434076198693e-05, |
|
"loss": 0.9807, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.3669430328964964, |
|
"grad_norm": 1.3085882663726807, |
|
"learning_rate": 6.753487326528033e-05, |
|
"loss": 0.9157, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.36801283765712756, |
|
"grad_norm": 0.9796567559242249, |
|
"learning_rate": 6.678747005522557e-05, |
|
"loss": 0.9743, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.36908264241775873, |
|
"grad_norm": 0.6303224563598633, |
|
"learning_rate": 6.60421772351732e-05, |
|
"loss": 1.0932, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.37015244717838997, |
|
"grad_norm": 1.1777071952819824, |
|
"learning_rate": 6.529904077829505e-05, |
|
"loss": 1.2724, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.37122225193902114, |
|
"grad_norm": 0.9197458624839783, |
|
"learning_rate": 6.455810652474817e-05, |
|
"loss": 1.4357, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.3722920566996523, |
|
"grad_norm": 1.5600042343139648, |
|
"learning_rate": 6.381942017884753e-05, |
|
"loss": 1.1761, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3733618614602835, |
|
"grad_norm": 1.1098041534423828, |
|
"learning_rate": 6.30830273062464e-05, |
|
"loss": 1.0017, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.37443166622091467, |
|
"grad_norm": 1.2631731033325195, |
|
"learning_rate": 6.234897333112594e-05, |
|
"loss": 0.8865, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37550147098154585, |
|
"grad_norm": 0.8299064040184021, |
|
"learning_rate": 6.161730353339302e-05, |
|
"loss": 1.2372, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.376571275742177, |
|
"grad_norm": 1.0870029926300049, |
|
"learning_rate": 6.088806304588717e-05, |
|
"loss": 1.6713, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.37764108050280826, |
|
"grad_norm": 1.1188887357711792, |
|
"learning_rate": 6.0161296851596766e-05, |
|
"loss": 1.3496, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.37871088526343943, |
|
"grad_norm": 1.0576889514923096, |
|
"learning_rate": 5.943704978088402e-05, |
|
"loss": 1.211, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.3797806900240706, |
|
"grad_norm": 1.3803176879882812, |
|
"learning_rate": 5.871536650871979e-05, |
|
"loss": 1.0773, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3808504947847018, |
|
"grad_norm": 1.1492490768432617, |
|
"learning_rate": 5.7996291551927666e-05, |
|
"loss": 0.9198, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.38192029954533296, |
|
"grad_norm": 0.6801052093505859, |
|
"learning_rate": 5.7279869266438234e-05, |
|
"loss": 1.4337, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.38299010430596414, |
|
"grad_norm": 1.2174427509307861, |
|
"learning_rate": 5.656614384455257e-05, |
|
"loss": 1.2353, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.3840599090665954, |
|
"grad_norm": 1.1382440328598022, |
|
"learning_rate": 5.585515931221677e-05, |
|
"loss": 1.3431, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.38512971382722655, |
|
"grad_norm": 0.9358460307121277, |
|
"learning_rate": 5.514695952630578e-05, |
|
"loss": 0.9496, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3861995185878577, |
|
"grad_norm": 1.1950464248657227, |
|
"learning_rate": 5.444158817191832e-05, |
|
"loss": 0.9529, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3872693233484889, |
|
"grad_norm": 1.0980271100997925, |
|
"learning_rate": 5.373908875968211e-05, |
|
"loss": 1.1347, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3883391281091201, |
|
"grad_norm": 0.7558470368385315, |
|
"learning_rate": 5.3039504623069965e-05, |
|
"loss": 0.9622, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.38940893286975126, |
|
"grad_norm": 1.0493141412734985, |
|
"learning_rate": 5.234287891572674e-05, |
|
"loss": 1.1268, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.39047873763038243, |
|
"grad_norm": 1.2050443887710571, |
|
"learning_rate": 5.164925460880758e-05, |
|
"loss": 1.2384, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.39154854239101367, |
|
"grad_norm": 1.2287790775299072, |
|
"learning_rate": 5.095867448832683e-05, |
|
"loss": 1.2972, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.39261834715164484, |
|
"grad_norm": 1.3510652780532837, |
|
"learning_rate": 5.027118115251938e-05, |
|
"loss": 1.3639, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.393688151912276, |
|
"grad_norm": 1.2277965545654297, |
|
"learning_rate": 4.95868170092125e-05, |
|
"loss": 1.1627, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3947579566729072, |
|
"grad_norm": 1.1306562423706055, |
|
"learning_rate": 4.890562427321021e-05, |
|
"loss": 1.1845, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.39582776143353837, |
|
"grad_norm": 0.8683123588562012, |
|
"learning_rate": 4.822764496368917e-05, |
|
"loss": 1.1248, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39689756619416955, |
|
"grad_norm": 1.0992448329925537, |
|
"learning_rate": 4.755292090160676e-05, |
|
"loss": 1.4496, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3979673709548007, |
|
"grad_norm": 1.0645350217819214, |
|
"learning_rate": 4.6881493707121315e-05, |
|
"loss": 0.943, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.39903717571543196, |
|
"grad_norm": 1.105506420135498, |
|
"learning_rate": 4.621340479702503e-05, |
|
"loss": 1.0031, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.40010698047606313, |
|
"grad_norm": 0.8552220463752747, |
|
"learning_rate": 4.554869538218868e-05, |
|
"loss": 1.0468, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.4011767852366943, |
|
"grad_norm": 0.8983728289604187, |
|
"learning_rate": 4.48874064650202e-05, |
|
"loss": 0.8188, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4022465899973255, |
|
"grad_norm": 0.6168304085731506, |
|
"learning_rate": 4.422957883693483e-05, |
|
"loss": 1.394, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.40331639475795666, |
|
"grad_norm": 1.0310661792755127, |
|
"learning_rate": 4.357525307583933e-05, |
|
"loss": 1.2572, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.40438619951858784, |
|
"grad_norm": 0.9725519418716431, |
|
"learning_rate": 4.29244695436289e-05, |
|
"loss": 1.2638, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.405456004279219, |
|
"grad_norm": 0.768764317035675, |
|
"learning_rate": 4.227726838369711e-05, |
|
"loss": 1.1837, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.40652580903985025, |
|
"grad_norm": 1.267078161239624, |
|
"learning_rate": 4.1633689518460225e-05, |
|
"loss": 1.0828, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4075956138004814, |
|
"grad_norm": 1.0793355703353882, |
|
"learning_rate": 4.0993772646894116e-05, |
|
"loss": 0.9099, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.4086654185611126, |
|
"grad_norm": 0.7304561138153076, |
|
"learning_rate": 4.035755724208573e-05, |
|
"loss": 1.3844, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.4097352233217438, |
|
"grad_norm": 0.9201098680496216, |
|
"learning_rate": 3.972508254879805e-05, |
|
"loss": 1.3788, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.41080502808237496, |
|
"grad_norm": 1.1659053564071655, |
|
"learning_rate": 3.90963875810494e-05, |
|
"loss": 1.3056, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.41187483284300613, |
|
"grad_norm": 0.9076784253120422, |
|
"learning_rate": 3.847151111970676e-05, |
|
"loss": 1.2791, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4129446376036373, |
|
"grad_norm": 1.2633960247039795, |
|
"learning_rate": 3.785049171009381e-05, |
|
"loss": 1.2781, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.41401444236426854, |
|
"grad_norm": 0.8448993563652039, |
|
"learning_rate": 3.723336765961285e-05, |
|
"loss": 0.7594, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.4150842471248997, |
|
"grad_norm": 0.9559032917022705, |
|
"learning_rate": 3.662017703538234e-05, |
|
"loss": 1.238, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.4161540518855309, |
|
"grad_norm": 0.7692968845367432, |
|
"learning_rate": 3.601095766188833e-05, |
|
"loss": 1.0494, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.4172238566461621, |
|
"grad_norm": 0.9550884962081909, |
|
"learning_rate": 3.540574711865146e-05, |
|
"loss": 1.2024, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41829366140679325, |
|
"grad_norm": 0.9791415333747864, |
|
"learning_rate": 3.4804582737908825e-05, |
|
"loss": 1.1066, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.4193634661674244, |
|
"grad_norm": 0.876552402973175, |
|
"learning_rate": 3.420750160231118e-05, |
|
"loss": 0.9091, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.4204332709280556, |
|
"grad_norm": 1.0393965244293213, |
|
"learning_rate": 3.361454054263541e-05, |
|
"loss": 0.7999, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.42150307568868683, |
|
"grad_norm": 0.6238046288490295, |
|
"learning_rate": 3.302573613551292e-05, |
|
"loss": 0.9947, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.422572880449318, |
|
"grad_norm": 1.2074321508407593, |
|
"learning_rate": 3.244112470117288e-05, |
|
"loss": 1.4528, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.4236426852099492, |
|
"grad_norm": 0.8338631987571716, |
|
"learning_rate": 3.186074230120244e-05, |
|
"loss": 1.2835, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.42471248997058036, |
|
"grad_norm": 0.9252687096595764, |
|
"learning_rate": 3.1284624736321846e-05, |
|
"loss": 1.1478, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.42578229473121154, |
|
"grad_norm": 0.896878182888031, |
|
"learning_rate": 3.071280754417626e-05, |
|
"loss": 1.0608, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.4268520994918427, |
|
"grad_norm": 1.1846908330917358, |
|
"learning_rate": 3.0145325997143577e-05, |
|
"loss": 0.9898, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.42792190425247395, |
|
"grad_norm": 0.8462516665458679, |
|
"learning_rate": 2.9582215100158706e-05, |
|
"loss": 0.6714, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4289917090131051, |
|
"grad_norm": 0.5424546003341675, |
|
"learning_rate": 2.902350958855426e-05, |
|
"loss": 1.304, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.4300615137737363, |
|
"grad_norm": 1.0380676984786987, |
|
"learning_rate": 2.846924392591794e-05, |
|
"loss": 1.4114, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.4311313185343675, |
|
"grad_norm": 1.0235589742660522, |
|
"learning_rate": 2.791945230196663e-05, |
|
"loss": 1.1948, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.43220112329499866, |
|
"grad_norm": 0.9752116799354553, |
|
"learning_rate": 2.7374168630437456e-05, |
|
"loss": 0.8311, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.43327092805562983, |
|
"grad_norm": 1.1082531213760376, |
|
"learning_rate": 2.6833426546995782e-05, |
|
"loss": 0.8029, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.434340732816261, |
|
"grad_norm": 1.0731070041656494, |
|
"learning_rate": 2.629725940716041e-05, |
|
"loss": 0.6362, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.43541053757689224, |
|
"grad_norm": 0.7748804688453674, |
|
"learning_rate": 2.57657002842462e-05, |
|
"loss": 1.3502, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.4364803423375234, |
|
"grad_norm": 0.9873160719871521, |
|
"learning_rate": 2.523878196732358e-05, |
|
"loss": 1.4015, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.4375501470981546, |
|
"grad_norm": 1.168376088142395, |
|
"learning_rate": 2.4716536959196462e-05, |
|
"loss": 0.9562, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.4386199518587858, |
|
"grad_norm": 0.9947543740272522, |
|
"learning_rate": 2.4198997474396877e-05, |
|
"loss": 1.0764, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43968975661941695, |
|
"grad_norm": 1.0038477182388306, |
|
"learning_rate": 2.3686195437198112e-05, |
|
"loss": 1.1025, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.4407595613800481, |
|
"grad_norm": 1.073735237121582, |
|
"learning_rate": 2.31781624796453e-05, |
|
"loss": 0.9347, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4418293661406793, |
|
"grad_norm": 0.8761813044548035, |
|
"learning_rate": 2.2674929939604332e-05, |
|
"loss": 1.3846, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.44289917090131053, |
|
"grad_norm": 0.707740068435669, |
|
"learning_rate": 2.217652885882869e-05, |
|
"loss": 1.2739, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.4439689756619417, |
|
"grad_norm": 0.8587148189544678, |
|
"learning_rate": 2.1682989981044783e-05, |
|
"loss": 1.4457, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.4450387804225729, |
|
"grad_norm": 0.9313194155693054, |
|
"learning_rate": 2.119434375005527e-05, |
|
"loss": 1.3368, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.44610858518320406, |
|
"grad_norm": 0.9590698480606079, |
|
"learning_rate": 2.071062030786149e-05, |
|
"loss": 1.0, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.44717838994383524, |
|
"grad_norm": 1.0751738548278809, |
|
"learning_rate": 2.0231849492803852e-05, |
|
"loss": 0.8612, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.4482481947044664, |
|
"grad_norm": 0.8183998465538025, |
|
"learning_rate": 1.9758060837721467e-05, |
|
"loss": 1.1862, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.4493179994650976, |
|
"grad_norm": 0.9243028163909912, |
|
"learning_rate": 1.928928356813032e-05, |
|
"loss": 1.6195, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4503878042257288, |
|
"grad_norm": 1.1141655445098877, |
|
"learning_rate": 1.882554660042052e-05, |
|
"loss": 1.2306, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.45145760898636, |
|
"grad_norm": 0.9904926419258118, |
|
"learning_rate": 1.8366878540072614e-05, |
|
"loss": 1.1581, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4525274137469912, |
|
"grad_norm": 0.8445707559585571, |
|
"learning_rate": 1.7913307679893173e-05, |
|
"loss": 1.1488, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.45359721850762236, |
|
"grad_norm": 1.067460060119629, |
|
"learning_rate": 1.7464861998269243e-05, |
|
"loss": 0.9826, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.45466702326825353, |
|
"grad_norm": 1.1314057111740112, |
|
"learning_rate": 1.702156915744292e-05, |
|
"loss": 0.6977, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4557368280288847, |
|
"grad_norm": 0.8268198370933533, |
|
"learning_rate": 1.6583456501804725e-05, |
|
"loss": 1.6189, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.4568066327895159, |
|
"grad_norm": 0.9529428482055664, |
|
"learning_rate": 1.6150551056206867e-05, |
|
"loss": 1.2327, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.4578764375501471, |
|
"grad_norm": 1.1057630777359009, |
|
"learning_rate": 1.57228795242965e-05, |
|
"loss": 1.1263, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.4589462423107783, |
|
"grad_norm": 0.8888829350471497, |
|
"learning_rate": 1.5300468286868137e-05, |
|
"loss": 0.9244, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4600160470714095, |
|
"grad_norm": 1.019853949546814, |
|
"learning_rate": 1.488334340023669e-05, |
|
"loss": 1.0842, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46108585183204065, |
|
"grad_norm": 1.1532846689224243, |
|
"learning_rate": 1.4471530594629996e-05, |
|
"loss": 0.7479, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4621556565926718, |
|
"grad_norm": 0.9575594663619995, |
|
"learning_rate": 1.4065055272601703e-05, |
|
"loss": 1.3278, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.463225461353303, |
|
"grad_norm": 0.9033411145210266, |
|
"learning_rate": 1.3663942507464348e-05, |
|
"loss": 1.6109, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4642952661139342, |
|
"grad_norm": 0.9153274297714233, |
|
"learning_rate": 1.3268217041742701e-05, |
|
"loss": 0.9475, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.4653650708745654, |
|
"grad_norm": 1.0005955696105957, |
|
"learning_rate": 1.2877903285647486e-05, |
|
"loss": 0.9418, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4664348756351966, |
|
"grad_norm": 1.0038702487945557, |
|
"learning_rate": 1.2493025315569801e-05, |
|
"loss": 0.8441, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.46750468039582777, |
|
"grad_norm": 1.012077808380127, |
|
"learning_rate": 1.2113606872595673e-05, |
|
"loss": 0.5747, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.46857448515645894, |
|
"grad_norm": 0.9703443646430969, |
|
"learning_rate": 1.173967136104196e-05, |
|
"loss": 1.2232, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.4696442899170901, |
|
"grad_norm": 1.00784170627594, |
|
"learning_rate": 1.1371241847012401e-05, |
|
"loss": 1.1904, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.4707140946777213, |
|
"grad_norm": 0.7357102036476135, |
|
"learning_rate": 1.1008341056974854e-05, |
|
"loss": 1.2484, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4717838994383525, |
|
"grad_norm": 1.120018482208252, |
|
"learning_rate": 1.0650991376359473e-05, |
|
"loss": 1.0129, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.4728537041989837, |
|
"grad_norm": 1.1301084756851196, |
|
"learning_rate": 1.029921484817783e-05, |
|
"loss": 1.0191, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.4739235089596149, |
|
"grad_norm": 0.9591827988624573, |
|
"learning_rate": 9.953033171663175e-06, |
|
"loss": 0.7102, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.47499331372024606, |
|
"grad_norm": 0.9715290665626526, |
|
"learning_rate": 9.612467700932045e-06, |
|
"loss": 0.9119, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.47606311848087723, |
|
"grad_norm": 0.8477181196212769, |
|
"learning_rate": 9.277539443666783e-06, |
|
"loss": 1.434, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4771329232415084, |
|
"grad_norm": 1.0625755786895752, |
|
"learning_rate": 8.948269059820025e-06, |
|
"loss": 0.9963, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4782027280021396, |
|
"grad_norm": 0.9794643521308899, |
|
"learning_rate": 8.624676860340025e-06, |
|
"loss": 0.9641, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.4792725327627708, |
|
"grad_norm": 1.0721542835235596, |
|
"learning_rate": 8.306782805917904e-06, |
|
"loss": 1.2077, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.480342337523402, |
|
"grad_norm": 1.1416406631469727, |
|
"learning_rate": 7.994606505756355e-06, |
|
"loss": 0.793, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4814121422840332, |
|
"grad_norm": 1.3246476650238037, |
|
"learning_rate": 7.68816721636004e-06, |
|
"loss": 0.6877, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48248194704466435, |
|
"grad_norm": 0.7753664255142212, |
|
"learning_rate": 7.3874838403478e-06, |
|
"loss": 1.5715, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4835517518052955, |
|
"grad_norm": 0.8879972696304321, |
|
"learning_rate": 7.092574925286614e-06, |
|
"loss": 1.5482, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4846215565659267, |
|
"grad_norm": 0.9376313090324402, |
|
"learning_rate": 6.803458662547507e-06, |
|
"loss": 1.0972, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.4856913613265579, |
|
"grad_norm": 1.133258581161499, |
|
"learning_rate": 6.520152886183406e-06, |
|
"loss": 0.8188, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4867611660871891, |
|
"grad_norm": 1.023056983947754, |
|
"learning_rate": 6.242675071829111e-06, |
|
"loss": 0.9871, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4878309708478203, |
|
"grad_norm": 1.0413668155670166, |
|
"learning_rate": 5.971042335623229e-06, |
|
"loss": 0.8659, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.48890077560845147, |
|
"grad_norm": 0.8116885423660278, |
|
"learning_rate": 5.705271433152458e-06, |
|
"loss": 1.4952, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.48997058036908264, |
|
"grad_norm": 0.7350386381149292, |
|
"learning_rate": 5.445378758417925e-06, |
|
"loss": 1.3216, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.4910403851297138, |
|
"grad_norm": 0.9509938359260559, |
|
"learning_rate": 5.191380342824035e-06, |
|
"loss": 1.033, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.492110189890345, |
|
"grad_norm": 0.8455548882484436, |
|
"learning_rate": 4.943291854189493e-06, |
|
"loss": 0.9927, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4931799946509762, |
|
"grad_norm": 1.0387928485870361, |
|
"learning_rate": 4.701128595780878e-06, |
|
"loss": 0.8588, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4942497994116074, |
|
"grad_norm": 0.9830685257911682, |
|
"learning_rate": 4.464905505368658e-06, |
|
"loss": 0.6431, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.4953196041722386, |
|
"grad_norm": 0.8270193338394165, |
|
"learning_rate": 4.23463715430577e-06, |
|
"loss": 1.4119, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.49638940893286976, |
|
"grad_norm": 0.9378442764282227, |
|
"learning_rate": 4.010337746628751e-06, |
|
"loss": 1.3177, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.49745921369350093, |
|
"grad_norm": 1.149201512336731, |
|
"learning_rate": 3.792021118181636e-06, |
|
"loss": 1.1545, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4985290184541321, |
|
"grad_norm": 1.167047381401062, |
|
"learning_rate": 3.5797007357623945e-06, |
|
"loss": 1.4185, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4995988232147633, |
|
"grad_norm": 1.0257937908172607, |
|
"learning_rate": 3.3733896962923658e-06, |
|
"loss": 0.8571, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.5006686279753945, |
|
"grad_norm": 1.0537946224212646, |
|
"learning_rate": 3.1731007260082616e-06, |
|
"loss": 1.0841, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5017384327360257, |
|
"grad_norm": 0.7948933839797974, |
|
"learning_rate": 2.9788461796772114e-06, |
|
"loss": 0.9228, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.5028082374966568, |
|
"grad_norm": 0.8775575160980225, |
|
"learning_rate": 2.790638039834668e-06, |
|
"loss": 1.0854, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.503878042257288, |
|
"grad_norm": 0.8913066387176514, |
|
"learning_rate": 2.6084879160452166e-06, |
|
"loss": 1.2876, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.5049478470179193, |
|
"grad_norm": 1.0341969728469849, |
|
"learning_rate": 2.432407044186509e-06, |
|
"loss": 1.0843, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5060176517785504, |
|
"grad_norm": 1.077120304107666, |
|
"learning_rate": 2.26240628575615e-06, |
|
"loss": 1.1631, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.5070874565391816, |
|
"grad_norm": 1.2969963550567627, |
|
"learning_rate": 2.098496127201648e-06, |
|
"loss": 0.9719, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.5081572612998128, |
|
"grad_norm": 0.9883275032043457, |
|
"learning_rate": 1.9406866792737267e-06, |
|
"loss": 0.7292, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.509227066060444, |
|
"grad_norm": 0.8158979415893555, |
|
"learning_rate": 1.7889876764024505e-06, |
|
"loss": 1.7438, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5102968708210751, |
|
"grad_norm": 1.0562134981155396, |
|
"learning_rate": 1.6434084760968697e-06, |
|
"loss": 1.1742, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.5113666755817063, |
|
"grad_norm": 1.1257410049438477, |
|
"learning_rate": 1.5039580583678393e-06, |
|
"loss": 1.4287, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.5124364803423376, |
|
"grad_norm": 0.9307600855827332, |
|
"learning_rate": 1.3706450251739613e-06, |
|
"loss": 1.0974, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.5135062851029687, |
|
"grad_norm": 1.2765836715698242, |
|
"learning_rate": 1.2434775998910964e-06, |
|
"loss": 0.913, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5145760898635999, |
|
"grad_norm": 0.960442304611206, |
|
"learning_rate": 1.1224636268050439e-06, |
|
"loss": 0.7802, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.515645894624231, |
|
"grad_norm": 0.7446873188018799, |
|
"learning_rate": 1.0076105706276888e-06, |
|
"loss": 1.3411, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5167156993848623, |
|
"grad_norm": 0.8171153664588928, |
|
"learning_rate": 8.989255160365527e-07, |
|
"loss": 1.1665, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.5177855041454934, |
|
"grad_norm": 1.2924435138702393, |
|
"learning_rate": 7.964151672377458e-07, |
|
"loss": 1.0651, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5188553089061246, |
|
"grad_norm": 0.8738812804222107, |
|
"learning_rate": 7.000858475524444e-07, |
|
"loss": 0.9887, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5199251136667559, |
|
"grad_norm": 1.1796964406967163, |
|
"learning_rate": 6.099434990268609e-07, |
|
"loss": 1.0128, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.520994918427387, |
|
"grad_norm": 0.9124283194541931, |
|
"learning_rate": 5.259936820656257e-07, |
|
"loss": 0.5644, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.5220647231880182, |
|
"grad_norm": 0.7489669322967529, |
|
"learning_rate": 4.482415750889204e-07, |
|
"loss": 1.1193, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5231345279486493, |
|
"grad_norm": 0.969879150390625, |
|
"learning_rate": 3.766919742129331e-07, |
|
"loss": 1.2911, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.5242043327092806, |
|
"grad_norm": 0.9446693062782288, |
|
"learning_rate": 3.1134929295407564e-07, |
|
"loss": 1.2359, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5252741374699117, |
|
"grad_norm": 0.8747495412826538, |
|
"learning_rate": 2.5221756195672563e-07, |
|
"loss": 1.0005, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.5263439422305429, |
|
"grad_norm": 1.1830285787582397, |
|
"learning_rate": 1.9930042874457254e-07, |
|
"loss": 0.8599, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.5274137469911742, |
|
"grad_norm": 0.886387825012207, |
|
"learning_rate": 1.5260115749566882e-07, |
|
"loss": 0.8124, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.5284835517518053, |
|
"grad_norm": 0.7844628691673279, |
|
"learning_rate": 1.1212262884103974e-07, |
|
"loss": 1.4931, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5295533565124365, |
|
"grad_norm": 0.8858184218406677, |
|
"learning_rate": 7.7867339686987e-08, |
|
"loss": 1.2632, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5306231612730676, |
|
"grad_norm": 0.9755517840385437, |
|
"learning_rate": 4.98374030611084e-08, |
|
"loss": 1.1289, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5316929660336989, |
|
"grad_norm": 0.9477503299713135, |
|
"learning_rate": 2.8034547981943713e-08, |
|
"loss": 1.1403, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.53276277079433, |
|
"grad_norm": 0.9157533645629883, |
|
"learning_rate": 1.246011935228064e-08, |
|
"loss": 0.9965, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5338325755549612, |
|
"grad_norm": 1.1439539194107056, |
|
"learning_rate": 3.115077876243988e-09, |
|
"loss": 1.0751, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.5349023803155925, |
|
"grad_norm": 1.0133370161056519, |
|
"learning_rate": 0.0, |
|
"loss": 0.8007, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0690671548959293e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|