|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7733994606041957, |
|
"eval_steps": 500, |
|
"global_step": 21364, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011222328814234257, |
|
"grad_norm": 37.096622467041016, |
|
"learning_rate": 1.0157273918741808e-06, |
|
"loss": 8.8686, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0022444657628468514, |
|
"grad_norm": 13.880346298217773, |
|
"learning_rate": 2.0314547837483616e-06, |
|
"loss": 7.6419, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0033666986442702773, |
|
"grad_norm": 16.09684944152832, |
|
"learning_rate": 3.0471821756225426e-06, |
|
"loss": 6.4382, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.004488931525693703, |
|
"grad_norm": 19.170230865478516, |
|
"learning_rate": 4.062909567496723e-06, |
|
"loss": 5.3399, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.005611164407117128, |
|
"grad_norm": 24.654130935668945, |
|
"learning_rate": 5.078636959370905e-06, |
|
"loss": 4.7646, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.006733397288540555, |
|
"grad_norm": 24.712974548339844, |
|
"learning_rate": 6.094364351245085e-06, |
|
"loss": 4.4667, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.00785563016996398, |
|
"grad_norm": 17.238990783691406, |
|
"learning_rate": 7.110091743119267e-06, |
|
"loss": 4.2168, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.008977863051387406, |
|
"grad_norm": 20.40213394165039, |
|
"learning_rate": 8.125819134993446e-06, |
|
"loss": 4.0355, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.010100095932810832, |
|
"grad_norm": 15.052313804626465, |
|
"learning_rate": 9.141546526867629e-06, |
|
"loss": 3.8458, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.011222328814234257, |
|
"grad_norm": 18.802026748657227, |
|
"learning_rate": 1.015727391874181e-05, |
|
"loss": 3.6688, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.012344561695657683, |
|
"grad_norm": 16.62171745300293, |
|
"learning_rate": 1.117300131061599e-05, |
|
"loss": 3.52, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.01346679457708111, |
|
"grad_norm": 16.29236602783203, |
|
"learning_rate": 1.218872870249017e-05, |
|
"loss": 3.402, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.014589027458504534, |
|
"grad_norm": 11.65068531036377, |
|
"learning_rate": 1.3204456094364351e-05, |
|
"loss": 3.2829, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.01571126033992796, |
|
"grad_norm": 10.617654800415039, |
|
"learning_rate": 1.4220183486238533e-05, |
|
"loss": 3.2008, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.016833493221351387, |
|
"grad_norm": 10.611294746398926, |
|
"learning_rate": 1.5235910878112714e-05, |
|
"loss": 3.1249, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.01795572610277481, |
|
"grad_norm": 9.946114540100098, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 3.0503, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.019077958984198236, |
|
"grad_norm": 10.92148494720459, |
|
"learning_rate": 1.7267365661861077e-05, |
|
"loss": 2.9903, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.020200191865621664, |
|
"grad_norm": 8.329671859741211, |
|
"learning_rate": 1.8283093053735257e-05, |
|
"loss": 2.9261, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.02132242474704509, |
|
"grad_norm": 7.897571086883545, |
|
"learning_rate": 1.9298820445609438e-05, |
|
"loss": 2.889, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.022444657628468513, |
|
"grad_norm": 7.548309326171875, |
|
"learning_rate": 2.031454783748362e-05, |
|
"loss": 2.7945, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.02356689050989194, |
|
"grad_norm": 8.54383659362793, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 2.7538, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.024689123391315366, |
|
"grad_norm": 7.025435924530029, |
|
"learning_rate": 2.234600262123198e-05, |
|
"loss": 2.7075, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.02581135627273879, |
|
"grad_norm": 7.59956169128418, |
|
"learning_rate": 2.336173001310616e-05, |
|
"loss": 2.6625, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.02693358915416222, |
|
"grad_norm": 6.982921123504639, |
|
"learning_rate": 2.437745740498034e-05, |
|
"loss": 2.6248, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.028055822035585643, |
|
"grad_norm": 6.033556938171387, |
|
"learning_rate": 2.5393184796854525e-05, |
|
"loss": 2.5724, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.029178054917009068, |
|
"grad_norm": 6.674008846282959, |
|
"learning_rate": 2.6408912188728702e-05, |
|
"loss": 2.5292, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.030300287798432492, |
|
"grad_norm": 6.499022006988525, |
|
"learning_rate": 2.7424639580602886e-05, |
|
"loss": 2.496, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.03142252067985592, |
|
"grad_norm": 6.163687229156494, |
|
"learning_rate": 2.8440366972477066e-05, |
|
"loss": 2.4541, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.032544753561279345, |
|
"grad_norm": 5.20266580581665, |
|
"learning_rate": 2.9456094364351244e-05, |
|
"loss": 2.449, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.03366698644270277, |
|
"grad_norm": 5.6633830070495605, |
|
"learning_rate": 3.0471821756225428e-05, |
|
"loss": 2.4085, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.034789219324126194, |
|
"grad_norm": 6.414912700653076, |
|
"learning_rate": 3.148754914809961e-05, |
|
"loss": 2.3791, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.03591145220554962, |
|
"grad_norm": 4.983119964599609, |
|
"learning_rate": 3.2503276539973785e-05, |
|
"loss": 2.3505, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.03703368508697305, |
|
"grad_norm": 5.280698299407959, |
|
"learning_rate": 3.351900393184797e-05, |
|
"loss": 2.3191, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.03815591796839647, |
|
"grad_norm": 5.565277099609375, |
|
"learning_rate": 3.453473132372215e-05, |
|
"loss": 2.2957, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.0392781508498199, |
|
"grad_norm": 5.02451753616333, |
|
"learning_rate": 3.555045871559633e-05, |
|
"loss": 2.2618, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.04040038373124333, |
|
"grad_norm": 4.424225807189941, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 2.2512, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.04152261661266675, |
|
"grad_norm": 6.270051002502441, |
|
"learning_rate": 3.7581913499344695e-05, |
|
"loss": 2.2354, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.04264484949409018, |
|
"grad_norm": 14.256332397460938, |
|
"learning_rate": 3.8597640891218876e-05, |
|
"loss": 2.2364, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.043767082375513605, |
|
"grad_norm": 4.837010383605957, |
|
"learning_rate": 3.9613368283093056e-05, |
|
"loss": 2.2346, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.044889315256937026, |
|
"grad_norm": 3.9555633068084717, |
|
"learning_rate": 4.062909567496724e-05, |
|
"loss": 2.2003, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.046011548138360454, |
|
"grad_norm": 4.136904716491699, |
|
"learning_rate": 4.164482306684142e-05, |
|
"loss": 2.2056, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.04713378101978388, |
|
"grad_norm": 4.25378942489624, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.1395, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.048256013901207304, |
|
"grad_norm": 3.6108360290527344, |
|
"learning_rate": 4.367627785058978e-05, |
|
"loss": 2.1296, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.04937824678263073, |
|
"grad_norm": 3.66212797164917, |
|
"learning_rate": 4.469200524246396e-05, |
|
"loss": 2.1316, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.05050047966405416, |
|
"grad_norm": 3.5523183345794678, |
|
"learning_rate": 4.570773263433814e-05, |
|
"loss": 2.1381, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.05162271254547758, |
|
"grad_norm": 3.710803747177124, |
|
"learning_rate": 4.672346002621232e-05, |
|
"loss": 2.1296, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.05274494542690101, |
|
"grad_norm": 3.346266031265259, |
|
"learning_rate": 4.77391874180865e-05, |
|
"loss": 2.0755, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.05386717830832444, |
|
"grad_norm": 3.264901876449585, |
|
"learning_rate": 4.875491480996068e-05, |
|
"loss": 2.0902, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.05498941118974786, |
|
"grad_norm": 3.031913995742798, |
|
"learning_rate": 4.977064220183487e-05, |
|
"loss": 2.1002, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.056111644071171286, |
|
"grad_norm": 3.3827006816864014, |
|
"learning_rate": 4.9999915451558777e-05, |
|
"loss": 2.111, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.057233876952594714, |
|
"grad_norm": 3.5572054386138916, |
|
"learning_rate": 4.999955597496219e-05, |
|
"loss": 2.0809, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.058356109834018136, |
|
"grad_norm": 3.2875311374664307, |
|
"learning_rate": 4.9998914381774255e-05, |
|
"loss": 2.0562, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.059478342715441564, |
|
"grad_norm": 2.903362274169922, |
|
"learning_rate": 4.999799067923527e-05, |
|
"loss": 2.0598, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.060600575596864985, |
|
"grad_norm": 2.980804681777954, |
|
"learning_rate": 4.999678487776908e-05, |
|
"loss": 2.0458, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.06172280847828841, |
|
"grad_norm": 2.880610466003418, |
|
"learning_rate": 4.9995296990983006e-05, |
|
"loss": 2.0433, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.06284504135971183, |
|
"grad_norm": 2.7269234657287598, |
|
"learning_rate": 4.999352703566763e-05, |
|
"loss": 2.0189, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.06396727424113527, |
|
"grad_norm": 2.808084487915039, |
|
"learning_rate": 4.999147503179668e-05, |
|
"loss": 2.0083, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 0.06508950712255869, |
|
"grad_norm": 2.925065279006958, |
|
"learning_rate": 4.998914100252672e-05, |
|
"loss": 2.001, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.06621174000398211, |
|
"grad_norm": 2.996300458908081, |
|
"learning_rate": 4.998652497419696e-05, |
|
"loss": 1.9877, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 0.06733397288540555, |
|
"grad_norm": 2.6028084754943848, |
|
"learning_rate": 4.9983626976328927e-05, |
|
"loss": 1.9778, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.06845620576682897, |
|
"grad_norm": 2.4577603340148926, |
|
"learning_rate": 4.998044704162613e-05, |
|
"loss": 1.9998, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 0.06957843864825239, |
|
"grad_norm": 2.4269509315490723, |
|
"learning_rate": 4.9976985205973705e-05, |
|
"loss": 1.9813, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 0.07070067152967582, |
|
"grad_norm": 2.6069250106811523, |
|
"learning_rate": 4.997324150843799e-05, |
|
"loss": 1.9781, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 0.07182290441109924, |
|
"grad_norm": 2.5287699699401855, |
|
"learning_rate": 4.99692159912661e-05, |
|
"loss": 1.9684, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.07294513729252267, |
|
"grad_norm": 2.6519899368286133, |
|
"learning_rate": 4.996490869988546e-05, |
|
"loss": 1.9821, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.0740673701739461, |
|
"grad_norm": 2.525928497314453, |
|
"learning_rate": 4.996031968290326e-05, |
|
"loss": 1.9512, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 0.07518960305536952, |
|
"grad_norm": 2.4517486095428467, |
|
"learning_rate": 4.995544899210594e-05, |
|
"loss": 1.9283, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.07631183593679294, |
|
"grad_norm": 2.7807457447052, |
|
"learning_rate": 4.9950296682458583e-05, |
|
"loss": 1.9448, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.07743406881821638, |
|
"grad_norm": 2.4739558696746826, |
|
"learning_rate": 4.994486281210429e-05, |
|
"loss": 1.946, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 0.0785563016996398, |
|
"grad_norm": 2.6515214443206787, |
|
"learning_rate": 4.9939147442363566e-05, |
|
"loss": 1.9474, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.07967853458106322, |
|
"grad_norm": 2.8361852169036865, |
|
"learning_rate": 4.9933150637733574e-05, |
|
"loss": 1.9463, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 0.08080076746248666, |
|
"grad_norm": 2.332261323928833, |
|
"learning_rate": 4.992687246588743e-05, |
|
"loss": 1.9607, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.08192300034391008, |
|
"grad_norm": 2.3486499786376953, |
|
"learning_rate": 4.992031299767347e-05, |
|
"loss": 1.9248, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 0.0830452332253335, |
|
"grad_norm": 3.125208616256714, |
|
"learning_rate": 4.9913472307114386e-05, |
|
"loss": 1.9088, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 0.08416746610675693, |
|
"grad_norm": 2.2809853553771973, |
|
"learning_rate": 4.9906350471406446e-05, |
|
"loss": 1.9199, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.08528969898818035, |
|
"grad_norm": 2.567641258239746, |
|
"learning_rate": 4.989894757091861e-05, |
|
"loss": 1.9054, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 0.08641193186960378, |
|
"grad_norm": 2.2755303382873535, |
|
"learning_rate": 4.989126368919158e-05, |
|
"loss": 1.903, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 0.08753416475102721, |
|
"grad_norm": 2.147775888442993, |
|
"learning_rate": 4.988329891293693e-05, |
|
"loss": 1.8993, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 0.08865639763245063, |
|
"grad_norm": 2.2279839515686035, |
|
"learning_rate": 4.987505333203608e-05, |
|
"loss": 1.905, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.08977863051387405, |
|
"grad_norm": 2.317538022994995, |
|
"learning_rate": 4.9866527039539276e-05, |
|
"loss": 1.8776, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.09090086339529749, |
|
"grad_norm": 2.296868324279785, |
|
"learning_rate": 4.9857720131664594e-05, |
|
"loss": 1.8714, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 0.09202309627672091, |
|
"grad_norm": 2.282538890838623, |
|
"learning_rate": 4.9848632707796773e-05, |
|
"loss": 1.8765, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 0.09314532915814433, |
|
"grad_norm": 2.1396827697753906, |
|
"learning_rate": 4.9839264870486155e-05, |
|
"loss": 1.8827, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 0.09426756203956776, |
|
"grad_norm": 2.1897048950195312, |
|
"learning_rate": 4.9829616725447526e-05, |
|
"loss": 1.8655, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 0.09538979492099119, |
|
"grad_norm": 2.1385130882263184, |
|
"learning_rate": 4.981968838155888e-05, |
|
"loss": 1.8768, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.09651202780241461, |
|
"grad_norm": 2.264171600341797, |
|
"learning_rate": 4.980947995086024e-05, |
|
"loss": 1.8734, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 0.09763426068383804, |
|
"grad_norm": 2.089871883392334, |
|
"learning_rate": 4.979899154855234e-05, |
|
"loss": 1.8516, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 0.09875649356526146, |
|
"grad_norm": 2.092179298400879, |
|
"learning_rate": 4.9788223292995386e-05, |
|
"loss": 1.8729, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 0.09987872644668488, |
|
"grad_norm": 2.3216769695281982, |
|
"learning_rate": 4.977717530570768e-05, |
|
"loss": 1.8673, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 0.10100095932810832, |
|
"grad_norm": 2.104457139968872, |
|
"learning_rate": 4.976584771136425e-05, |
|
"loss": 1.8734, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.10212319220953174, |
|
"grad_norm": 2.236363649368286, |
|
"learning_rate": 4.975424063779547e-05, |
|
"loss": 1.8316, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 0.10324542509095516, |
|
"grad_norm": 2.264967203140259, |
|
"learning_rate": 4.974235421598557e-05, |
|
"loss": 1.8614, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 0.1043676579723786, |
|
"grad_norm": 2.1815454959869385, |
|
"learning_rate": 4.973018858007122e-05, |
|
"loss": 1.8365, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 0.10548989085380202, |
|
"grad_norm": 2.049677848815918, |
|
"learning_rate": 4.9717743867339963e-05, |
|
"loss": 1.8454, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 0.10661212373522544, |
|
"grad_norm": 1.9844895601272583, |
|
"learning_rate": 4.9705020218228695e-05, |
|
"loss": 1.8419, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.10773435661664887, |
|
"grad_norm": 2.052708387374878, |
|
"learning_rate": 4.969201777632205e-05, |
|
"loss": 1.8509, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.1088565894980723, |
|
"grad_norm": 2.014535665512085, |
|
"learning_rate": 4.9678736688350846e-05, |
|
"loss": 1.8129, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 0.10997882237949572, |
|
"grad_norm": 1.9768311977386475, |
|
"learning_rate": 4.966517710419033e-05, |
|
"loss": 1.8375, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 0.11110105526091915, |
|
"grad_norm": 2.046293258666992, |
|
"learning_rate": 4.965133917685858e-05, |
|
"loss": 1.8132, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 0.11222328814234257, |
|
"grad_norm": 2.104555368423462, |
|
"learning_rate": 4.9637223062514714e-05, |
|
"loss": 1.8147, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.113345521023766, |
|
"grad_norm": 2.04533052444458, |
|
"learning_rate": 4.962282892045718e-05, |
|
"loss": 1.8591, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 0.11446775390518943, |
|
"grad_norm": 1.967282772064209, |
|
"learning_rate": 4.9608156913121904e-05, |
|
"loss": 1.7966, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 0.11558998678661285, |
|
"grad_norm": 2.092106342315674, |
|
"learning_rate": 4.959320720608049e-05, |
|
"loss": 1.8301, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 0.11671221966803627, |
|
"grad_norm": 2.0512046813964844, |
|
"learning_rate": 4.9577979968038354e-05, |
|
"loss": 1.8211, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 0.11783445254945969, |
|
"grad_norm": 1.9260915517807007, |
|
"learning_rate": 4.956247537083282e-05, |
|
"loss": 1.7989, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.11895668543088313, |
|
"grad_norm": 2.0938026905059814, |
|
"learning_rate": 4.9546693589431145e-05, |
|
"loss": 1.8336, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 0.12007891831230655, |
|
"grad_norm": 1.9972988367080688, |
|
"learning_rate": 4.9530634801928595e-05, |
|
"loss": 1.8147, |
|
"step": 3317 |
|
}, |
|
{ |
|
"epoch": 0.12120115119372997, |
|
"grad_norm": 1.9120224714279175, |
|
"learning_rate": 4.9514299189546395e-05, |
|
"loss": 1.8028, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 0.1223233840751534, |
|
"grad_norm": 1.959033727645874, |
|
"learning_rate": 4.949768693662973e-05, |
|
"loss": 1.8281, |
|
"step": 3379 |
|
}, |
|
{ |
|
"epoch": 0.12344561695657683, |
|
"grad_norm": 1.9182357788085938, |
|
"learning_rate": 4.948079823064559e-05, |
|
"loss": 1.8165, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.12456784983800025, |
|
"grad_norm": 1.9079999923706055, |
|
"learning_rate": 4.946363326218074e-05, |
|
"loss": 1.7916, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 0.12569008271942367, |
|
"grad_norm": 1.916276216506958, |
|
"learning_rate": 4.9446192224939525e-05, |
|
"loss": 1.8086, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 0.1268123156008471, |
|
"grad_norm": 1.903389811515808, |
|
"learning_rate": 4.942847531574167e-05, |
|
"loss": 1.8116, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 0.12793454848227054, |
|
"grad_norm": 2.064885139465332, |
|
"learning_rate": 4.941048273452008e-05, |
|
"loss": 1.8144, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 0.12905678136369395, |
|
"grad_norm": 2.1314241886138916, |
|
"learning_rate": 4.9392214684318605e-05, |
|
"loss": 1.7943, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.13017901424511738, |
|
"grad_norm": 2.0061681270599365, |
|
"learning_rate": 4.93736713712897e-05, |
|
"loss": 1.794, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 0.13130124712654082, |
|
"grad_norm": 1.9408286809921265, |
|
"learning_rate": 4.9354853004692124e-05, |
|
"loss": 1.7882, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 0.13242348000796422, |
|
"grad_norm": 1.8884766101837158, |
|
"learning_rate": 4.93357597968886e-05, |
|
"loss": 1.7846, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 0.13354571288938766, |
|
"grad_norm": 1.9393378496170044, |
|
"learning_rate": 4.931639196334338e-05, |
|
"loss": 1.7923, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 0.1346679457708111, |
|
"grad_norm": 1.8815410137176514, |
|
"learning_rate": 4.9296749722619826e-05, |
|
"loss": 1.7939, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.1357901786522345, |
|
"grad_norm": 1.8603038787841797, |
|
"learning_rate": 4.9276833296377966e-05, |
|
"loss": 1.7589, |
|
"step": 3751 |
|
}, |
|
{ |
|
"epoch": 0.13691241153365794, |
|
"grad_norm": 1.775247573852539, |
|
"learning_rate": 4.925664290937196e-05, |
|
"loss": 1.7897, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 0.13803464441508137, |
|
"grad_norm": 1.8576780557632446, |
|
"learning_rate": 4.9236178789447576e-05, |
|
"loss": 1.7908, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 0.13915687729650478, |
|
"grad_norm": 1.800264596939087, |
|
"learning_rate": 4.921544116753962e-05, |
|
"loss": 1.7736, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 0.1402791101779282, |
|
"grad_norm": 1.9730401039123535, |
|
"learning_rate": 4.919443027766935e-05, |
|
"loss": 1.7639, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.14140134305935165, |
|
"grad_norm": 1.8654968738555908, |
|
"learning_rate": 4.91731463569418e-05, |
|
"loss": 1.7477, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 0.14252357594077505, |
|
"grad_norm": 1.8131386041641235, |
|
"learning_rate": 4.915158964554312e-05, |
|
"loss": 1.7887, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 0.1436458088221985, |
|
"grad_norm": 1.8576264381408691, |
|
"learning_rate": 4.912976038673786e-05, |
|
"loss": 1.7779, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.14476804170362192, |
|
"grad_norm": 1.8940199613571167, |
|
"learning_rate": 4.9107658826866254e-05, |
|
"loss": 1.7653, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.14589027458504533, |
|
"grad_norm": 1.7727802991867065, |
|
"learning_rate": 4.908528521534139e-05, |
|
"loss": 1.7809, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.14701250746646877, |
|
"grad_norm": 1.7416553497314453, |
|
"learning_rate": 4.906263980464644e-05, |
|
"loss": 1.7605, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 0.1481347403478922, |
|
"grad_norm": 1.82987642288208, |
|
"learning_rate": 4.903972285033178e-05, |
|
"loss": 1.7554, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 0.1492569732293156, |
|
"grad_norm": 1.916339635848999, |
|
"learning_rate": 4.901653461101213e-05, |
|
"loss": 1.7872, |
|
"step": 4123 |
|
}, |
|
{ |
|
"epoch": 0.15037920611073904, |
|
"grad_norm": 1.8903008699417114, |
|
"learning_rate": 4.8993075348363626e-05, |
|
"loss": 1.782, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.15150143899216248, |
|
"grad_norm": 1.9334847927093506, |
|
"learning_rate": 4.896934532712084e-05, |
|
"loss": 1.7565, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.1526236718735859, |
|
"grad_norm": 1.7778478860855103, |
|
"learning_rate": 4.8945344815073846e-05, |
|
"loss": 1.7613, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 0.15374590475500932, |
|
"grad_norm": 1.7348295450210571, |
|
"learning_rate": 4.892107408306516e-05, |
|
"loss": 1.7512, |
|
"step": 4247 |
|
}, |
|
{ |
|
"epoch": 0.15486813763643276, |
|
"grad_norm": 1.7189710140228271, |
|
"learning_rate": 4.889653340498669e-05, |
|
"loss": 1.741, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 0.15599037051785616, |
|
"grad_norm": 1.8557075262069702, |
|
"learning_rate": 4.8871723057776664e-05, |
|
"loss": 1.7471, |
|
"step": 4309 |
|
}, |
|
{ |
|
"epoch": 0.1571126033992796, |
|
"grad_norm": 1.7188880443572998, |
|
"learning_rate": 4.8846643321416476e-05, |
|
"loss": 1.7492, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.15823483628070303, |
|
"grad_norm": 1.6712063550949097, |
|
"learning_rate": 4.882129447892753e-05, |
|
"loss": 1.7434, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 0.15935706916212644, |
|
"grad_norm": 1.7652437686920166, |
|
"learning_rate": 4.8795676816368076e-05, |
|
"loss": 1.7422, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 0.16047930204354988, |
|
"grad_norm": 1.7910144329071045, |
|
"learning_rate": 4.876979062282995e-05, |
|
"loss": 1.7635, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 0.1616015349249733, |
|
"grad_norm": 1.9248684644699097, |
|
"learning_rate": 4.8743636190435325e-05, |
|
"loss": 1.7401, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 0.16272376780639672, |
|
"grad_norm": 1.828202486038208, |
|
"learning_rate": 4.871721381433344e-05, |
|
"loss": 1.7419, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.16384600068782015, |
|
"grad_norm": 1.7170790433883667, |
|
"learning_rate": 4.869052379269719e-05, |
|
"loss": 1.7562, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 0.1649682335692436, |
|
"grad_norm": 1.753203272819519, |
|
"learning_rate": 4.866356642671985e-05, |
|
"loss": 1.7569, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.166090466450667, |
|
"grad_norm": 1.7906442880630493, |
|
"learning_rate": 4.8636342020611634e-05, |
|
"loss": 1.7376, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 0.16721269933209043, |
|
"grad_norm": 1.7113378047943115, |
|
"learning_rate": 4.860885088159626e-05, |
|
"loss": 1.7386, |
|
"step": 4619 |
|
}, |
|
{ |
|
"epoch": 0.16833493221351387, |
|
"grad_norm": 1.7997937202453613, |
|
"learning_rate": 4.858109331990751e-05, |
|
"loss": 1.7531, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.16945716509493727, |
|
"grad_norm": 1.76421320438385, |
|
"learning_rate": 4.855306964878567e-05, |
|
"loss": 1.7402, |
|
"step": 4681 |
|
}, |
|
{ |
|
"epoch": 0.1705793979763607, |
|
"grad_norm": 1.7803616523742676, |
|
"learning_rate": 4.8524780184474084e-05, |
|
"loss": 1.7345, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 0.17170163085778414, |
|
"grad_norm": 1.7763142585754395, |
|
"learning_rate": 4.8496225246215496e-05, |
|
"loss": 1.7469, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 0.17282386373920755, |
|
"grad_norm": 1.728219747543335, |
|
"learning_rate": 4.8467405156248505e-05, |
|
"loss": 1.7182, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 0.17394609662063099, |
|
"grad_norm": 1.7837860584259033, |
|
"learning_rate": 4.843832023980392e-05, |
|
"loss": 1.739, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.17506832950205442, |
|
"grad_norm": 1.7005128860473633, |
|
"learning_rate": 4.840897082510106e-05, |
|
"loss": 1.7377, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 0.17619056238347783, |
|
"grad_norm": 1.6570392847061157, |
|
"learning_rate": 4.8379357243344084e-05, |
|
"loss": 1.712, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 0.17731279526490126, |
|
"grad_norm": 1.6575350761413574, |
|
"learning_rate": 4.8349479828718236e-05, |
|
"loss": 1.7147, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.1784350281463247, |
|
"grad_norm": 1.8768808841705322, |
|
"learning_rate": 4.8319338918386075e-05, |
|
"loss": 1.7312, |
|
"step": 4929 |
|
}, |
|
{ |
|
"epoch": 0.1795572610277481, |
|
"grad_norm": 1.7145389318466187, |
|
"learning_rate": 4.828893485248369e-05, |
|
"loss": 1.7221, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.18067949390917154, |
|
"grad_norm": 1.834173560142517, |
|
"learning_rate": 4.825826797411682e-05, |
|
"loss": 1.7322, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 0.18180172679059498, |
|
"grad_norm": 1.7125933170318604, |
|
"learning_rate": 4.822733862935702e-05, |
|
"loss": 1.7156, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 0.18292395967201838, |
|
"grad_norm": 1.7470024824142456, |
|
"learning_rate": 4.819614716723775e-05, |
|
"loss": 1.7176, |
|
"step": 5053 |
|
}, |
|
{ |
|
"epoch": 0.18404619255344182, |
|
"grad_norm": 1.7042289972305298, |
|
"learning_rate": 4.8164693939750425e-05, |
|
"loss": 1.7192, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 0.18516842543486525, |
|
"grad_norm": 1.6803418397903442, |
|
"learning_rate": 4.813297930184042e-05, |
|
"loss": 1.7197, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.18629065831628866, |
|
"grad_norm": 1.7296956777572632, |
|
"learning_rate": 4.810100361140314e-05, |
|
"loss": 1.72, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 0.1874128911977121, |
|
"grad_norm": 1.6245464086532593, |
|
"learning_rate": 4.8068767229279885e-05, |
|
"loss": 1.7081, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 0.18853512407913553, |
|
"grad_norm": 1.7138885259628296, |
|
"learning_rate": 4.8036270519253854e-05, |
|
"loss": 1.7068, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 0.18965735696055894, |
|
"grad_norm": 1.704185128211975, |
|
"learning_rate": 4.8003513848046e-05, |
|
"loss": 1.7219, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 0.19077958984198237, |
|
"grad_norm": 1.712551236152649, |
|
"learning_rate": 4.79704975853109e-05, |
|
"loss": 1.7118, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.1919018227234058, |
|
"grad_norm": 1.7193052768707275, |
|
"learning_rate": 4.793722210363262e-05, |
|
"loss": 1.7195, |
|
"step": 5301 |
|
}, |
|
{ |
|
"epoch": 0.19302405560482921, |
|
"grad_norm": 1.5574607849121094, |
|
"learning_rate": 4.7903687778520414e-05, |
|
"loss": 1.7286, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 0.19414628848625265, |
|
"grad_norm": 1.7480719089508057, |
|
"learning_rate": 4.7869894988404593e-05, |
|
"loss": 1.6957, |
|
"step": 5363 |
|
}, |
|
{ |
|
"epoch": 0.19526852136767608, |
|
"grad_norm": 1.7487633228302002, |
|
"learning_rate": 4.783584411463221e-05, |
|
"loss": 1.7203, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 0.1963907542490995, |
|
"grad_norm": 1.6720587015151978, |
|
"learning_rate": 4.780153554146274e-05, |
|
"loss": 1.7009, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.19751298713052293, |
|
"grad_norm": 1.6622951030731201, |
|
"learning_rate": 4.7766969656063766e-05, |
|
"loss": 1.7049, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.19863522001194636, |
|
"grad_norm": 1.656158208847046, |
|
"learning_rate": 4.773214684850662e-05, |
|
"loss": 1.7104, |
|
"step": 5487 |
|
}, |
|
{ |
|
"epoch": 0.19975745289336977, |
|
"grad_norm": 1.6559454202651978, |
|
"learning_rate": 4.769706751176193e-05, |
|
"loss": 1.7089, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 0.2008796857747932, |
|
"grad_norm": 1.7262494564056396, |
|
"learning_rate": 4.7661732041695264e-05, |
|
"loss": 1.7143, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.20200191865621664, |
|
"grad_norm": 1.6877381801605225, |
|
"learning_rate": 4.762614083706258e-05, |
|
"loss": 1.7134, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.20312415153764005, |
|
"grad_norm": 1.5669549703598022, |
|
"learning_rate": 4.759029429950581e-05, |
|
"loss": 1.7213, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 0.20424638441906348, |
|
"grad_norm": 1.7044217586517334, |
|
"learning_rate": 4.7554192833548235e-05, |
|
"loss": 1.7185, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 0.20536861730048692, |
|
"grad_norm": 1.6999757289886475, |
|
"learning_rate": 4.751783684659e-05, |
|
"loss": 1.7163, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 0.20649085018191032, |
|
"grad_norm": 1.6043522357940674, |
|
"learning_rate": 4.748122674890348e-05, |
|
"loss": 1.7031, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 0.20761308306333376, |
|
"grad_norm": 1.7062305212020874, |
|
"learning_rate": 4.7444362953628654e-05, |
|
"loss": 1.7035, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.2087353159447572, |
|
"grad_norm": 1.6612005233764648, |
|
"learning_rate": 4.7407245876768424e-05, |
|
"loss": 1.6981, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 0.2098575488261806, |
|
"grad_norm": 1.7277076244354248, |
|
"learning_rate": 4.736987593718397e-05, |
|
"loss": 1.7161, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 0.21097978170760404, |
|
"grad_norm": 1.705458402633667, |
|
"learning_rate": 4.733225355658999e-05, |
|
"loss": 1.6854, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 0.21210201458902747, |
|
"grad_norm": 1.629443883895874, |
|
"learning_rate": 4.7294379159549926e-05, |
|
"loss": 1.7025, |
|
"step": 5859 |
|
}, |
|
{ |
|
"epoch": 0.21322424747045088, |
|
"grad_norm": 1.613192081451416, |
|
"learning_rate": 4.725625317347119e-05, |
|
"loss": 1.6992, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.2143464803518743, |
|
"grad_norm": 1.6801332235336304, |
|
"learning_rate": 4.7217876028600374e-05, |
|
"loss": 1.6798, |
|
"step": 5921 |
|
}, |
|
{ |
|
"epoch": 0.21546871323329775, |
|
"grad_norm": 1.6418830156326294, |
|
"learning_rate": 4.717924815801832e-05, |
|
"loss": 1.6918, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.21659094611472116, |
|
"grad_norm": 1.6128371953964233, |
|
"learning_rate": 4.714036999763532e-05, |
|
"loss": 1.706, |
|
"step": 5983 |
|
}, |
|
{ |
|
"epoch": 0.2177131789961446, |
|
"grad_norm": 1.71291983127594, |
|
"learning_rate": 4.7101241986186116e-05, |
|
"loss": 1.6861, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 0.21883541187756803, |
|
"grad_norm": 1.5903745889663696, |
|
"learning_rate": 4.7061864565225e-05, |
|
"loss": 1.6886, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.21995764475899143, |
|
"grad_norm": 1.71088445186615, |
|
"learning_rate": 4.702223817912081e-05, |
|
"loss": 1.7003, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 0.22107987764041487, |
|
"grad_norm": 1.541530966758728, |
|
"learning_rate": 4.698236327505195e-05, |
|
"loss": 1.6956, |
|
"step": 6107 |
|
}, |
|
{ |
|
"epoch": 0.2222021105218383, |
|
"grad_norm": 1.539455533027649, |
|
"learning_rate": 4.694224030300127e-05, |
|
"loss": 1.6833, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 0.2233243434032617, |
|
"grad_norm": 1.688120722770691, |
|
"learning_rate": 4.690186971575107e-05, |
|
"loss": 1.6973, |
|
"step": 6169 |
|
}, |
|
{ |
|
"epoch": 0.22444657628468515, |
|
"grad_norm": 1.6934964656829834, |
|
"learning_rate": 4.6861251968877916e-05, |
|
"loss": 1.6979, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.22556880916610858, |
|
"grad_norm": 1.6558688879013062, |
|
"learning_rate": 4.68203875207476e-05, |
|
"loss": 1.6925, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 0.226691042047532, |
|
"grad_norm": 1.6245280504226685, |
|
"learning_rate": 4.677927683250983e-05, |
|
"loss": 1.6688, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 0.22781327492895542, |
|
"grad_norm": 1.5808422565460205, |
|
"learning_rate": 4.6737920368093156e-05, |
|
"loss": 1.688, |
|
"step": 6293 |
|
}, |
|
{ |
|
"epoch": 0.22893550781037886, |
|
"grad_norm": 1.5224875211715698, |
|
"learning_rate": 4.669631859419965e-05, |
|
"loss": 1.6864, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 0.23005774069180226, |
|
"grad_norm": 1.5904366970062256, |
|
"learning_rate": 4.6654471980299676e-05, |
|
"loss": 1.6893, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.2311799735732257, |
|
"grad_norm": 1.6145131587982178, |
|
"learning_rate": 4.661238099862658e-05, |
|
"loss": 1.6818, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 0.23230220645464913, |
|
"grad_norm": 1.6297610998153687, |
|
"learning_rate": 4.657004612417138e-05, |
|
"loss": 1.687, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 0.23342443933607254, |
|
"grad_norm": 1.6199692487716675, |
|
"learning_rate": 4.6527467834677374e-05, |
|
"loss": 1.6945, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 0.23454667221749598, |
|
"grad_norm": 1.5439369678497314, |
|
"learning_rate": 4.648464661063478e-05, |
|
"loss": 1.6926, |
|
"step": 6479 |
|
}, |
|
{ |
|
"epoch": 0.23566890509891938, |
|
"grad_norm": 1.6095410585403442, |
|
"learning_rate": 4.6441582935275264e-05, |
|
"loss": 1.689, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.23679113798034282, |
|
"grad_norm": 1.4971855878829956, |
|
"learning_rate": 4.6398277294566586e-05, |
|
"loss": 1.6622, |
|
"step": 6541 |
|
}, |
|
{ |
|
"epoch": 0.23791337086176625, |
|
"grad_norm": 1.53174889087677, |
|
"learning_rate": 4.6354730177207e-05, |
|
"loss": 1.6785, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 0.23903560374318966, |
|
"grad_norm": 1.4567692279815674, |
|
"learning_rate": 4.6310942074619787e-05, |
|
"loss": 1.6776, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 0.2401578366246131, |
|
"grad_norm": 1.6813284158706665, |
|
"learning_rate": 4.626691348094777e-05, |
|
"loss": 1.6692, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 0.24128006950603653, |
|
"grad_norm": 1.5593857765197754, |
|
"learning_rate": 4.622264489304762e-05, |
|
"loss": 1.6811, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.24240230238745994, |
|
"grad_norm": 1.5681389570236206, |
|
"learning_rate": 4.617813681048434e-05, |
|
"loss": 1.689, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 0.24352453526888337, |
|
"grad_norm": 1.6402842998504639, |
|
"learning_rate": 4.61333897355256e-05, |
|
"loss": 1.6621, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 0.2446467681503068, |
|
"grad_norm": 1.642669677734375, |
|
"learning_rate": 4.608840417313604e-05, |
|
"loss": 1.6562, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 0.24576900103173022, |
|
"grad_norm": 1.6442660093307495, |
|
"learning_rate": 4.6043180630971646e-05, |
|
"loss": 1.6721, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 0.24689123391315365, |
|
"grad_norm": 1.5577408075332642, |
|
"learning_rate": 4.599771961937391e-05, |
|
"loss": 1.6837, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.2480134667945771, |
|
"grad_norm": 1.8555899858474731, |
|
"learning_rate": 4.5952021651364204e-05, |
|
"loss": 1.6739, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 0.2491356996760005, |
|
"grad_norm": 1.667812466621399, |
|
"learning_rate": 4.590608724263786e-05, |
|
"loss": 1.6704, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 0.25025793255742396, |
|
"grad_norm": 1.6642868518829346, |
|
"learning_rate": 4.585991691155845e-05, |
|
"loss": 1.6784, |
|
"step": 6913 |
|
}, |
|
{ |
|
"epoch": 0.25138016543884734, |
|
"grad_norm": 1.6429824829101562, |
|
"learning_rate": 4.581351117915188e-05, |
|
"loss": 1.6729, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.25250239832027077, |
|
"grad_norm": 1.6268694400787354, |
|
"learning_rate": 4.5766870569100534e-05, |
|
"loss": 1.6657, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.2536246312016942, |
|
"grad_norm": 1.496177315711975, |
|
"learning_rate": 4.571999560773736e-05, |
|
"loss": 1.6611, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 0.25474686408311764, |
|
"grad_norm": 1.7032805681228638, |
|
"learning_rate": 4.5672886824039915e-05, |
|
"loss": 1.6816, |
|
"step": 7037 |
|
}, |
|
{ |
|
"epoch": 0.2558690969645411, |
|
"grad_norm": 1.791925072669983, |
|
"learning_rate": 4.5625544749624435e-05, |
|
"loss": 1.6689, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 0.2569913298459645, |
|
"grad_norm": 1.5614711046218872, |
|
"learning_rate": 4.5577969918739794e-05, |
|
"loss": 1.6647, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.2581135627273879, |
|
"grad_norm": 1.517112135887146, |
|
"learning_rate": 4.5530162868261486e-05, |
|
"loss": 1.6614, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.2592357956088113, |
|
"grad_norm": 1.5636824369430542, |
|
"learning_rate": 4.548212413768558e-05, |
|
"loss": 1.6599, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 0.26035802849023476, |
|
"grad_norm": 1.5803399085998535, |
|
"learning_rate": 4.543385426912261e-05, |
|
"loss": 1.6558, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 0.2614802613716582, |
|
"grad_norm": 1.6228526830673218, |
|
"learning_rate": 4.53853538072915e-05, |
|
"loss": 1.6778, |
|
"step": 7223 |
|
}, |
|
{ |
|
"epoch": 0.26260249425308163, |
|
"grad_norm": 1.5660549402236938, |
|
"learning_rate": 4.533662329951336e-05, |
|
"loss": 1.6827, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 0.26372472713450507, |
|
"grad_norm": 1.555421233177185, |
|
"learning_rate": 4.528766329570536e-05, |
|
"loss": 1.6755, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.26484696001592845, |
|
"grad_norm": 1.603285312652588, |
|
"learning_rate": 4.523847434837447e-05, |
|
"loss": 1.6455, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 0.2659691928973519, |
|
"grad_norm": 1.510772943496704, |
|
"learning_rate": 4.518905701261128e-05, |
|
"loss": 1.6736, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 0.2670914257787753, |
|
"grad_norm": 1.6260360479354858, |
|
"learning_rate": 4.5139411846083715e-05, |
|
"loss": 1.6643, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 0.26821365866019875, |
|
"grad_norm": 3.0237209796905518, |
|
"learning_rate": 4.508953940903073e-05, |
|
"loss": 1.6615, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 0.2693358915416222, |
|
"grad_norm": 1.4725430011749268, |
|
"learning_rate": 4.5039440264255994e-05, |
|
"loss": 1.6582, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.2704581244230456, |
|
"grad_norm": 1.5135307312011719, |
|
"learning_rate": 4.498911497712155e-05, |
|
"loss": 1.6754, |
|
"step": 7471 |
|
}, |
|
{ |
|
"epoch": 0.271580357304469, |
|
"grad_norm": 1.5741811990737915, |
|
"learning_rate": 4.493856411554142e-05, |
|
"loss": 1.6889, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 0.27270259018589244, |
|
"grad_norm": 1.5469688177108765, |
|
"learning_rate": 4.4887788249975206e-05, |
|
"loss": 1.6542, |
|
"step": 7533 |
|
}, |
|
{ |
|
"epoch": 0.27382482306731587, |
|
"grad_norm": 1.4596927165985107, |
|
"learning_rate": 4.4836787953421656e-05, |
|
"loss": 1.6365, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 0.2749470559487393, |
|
"grad_norm": 1.566522479057312, |
|
"learning_rate": 4.478556380141218e-05, |
|
"loss": 1.657, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.27606928883016274, |
|
"grad_norm": 1.5141624212265015, |
|
"learning_rate": 4.4734116372004375e-05, |
|
"loss": 1.6695, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 0.2771915217115862, |
|
"grad_norm": 1.4138630628585815, |
|
"learning_rate": 4.4682446245775477e-05, |
|
"loss": 1.6638, |
|
"step": 7657 |
|
}, |
|
{ |
|
"epoch": 0.27831375459300955, |
|
"grad_norm": 1.4885402917861938, |
|
"learning_rate": 4.463055400581586e-05, |
|
"loss": 1.6817, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 0.279435987474433, |
|
"grad_norm": 1.645486831665039, |
|
"learning_rate": 4.4578440237722374e-05, |
|
"loss": 1.6392, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 0.2805582203558564, |
|
"grad_norm": 1.5977535247802734, |
|
"learning_rate": 4.452610552959183e-05, |
|
"loss": 1.6557, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.28168045323727986, |
|
"grad_norm": 1.6347745656967163, |
|
"learning_rate": 4.447355047201428e-05, |
|
"loss": 1.6573, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 0.2828026861187033, |
|
"grad_norm": 1.5288081169128418, |
|
"learning_rate": 4.4420775658066414e-05, |
|
"loss": 1.638, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 0.28392491900012673, |
|
"grad_norm": 1.4643625020980835, |
|
"learning_rate": 4.436778168330484e-05, |
|
"loss": 1.6402, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 0.2850471518815501, |
|
"grad_norm": 1.568663239479065, |
|
"learning_rate": 4.4314569145759353e-05, |
|
"loss": 1.6565, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 0.28616938476297354, |
|
"grad_norm": 1.476515293121338, |
|
"learning_rate": 4.42611386459262e-05, |
|
"loss": 1.6709, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.287291617644397, |
|
"grad_norm": 1.532404899597168, |
|
"learning_rate": 4.420749078676133e-05, |
|
"loss": 1.6333, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.2884138505258204, |
|
"grad_norm": 1.5388779640197754, |
|
"learning_rate": 4.4153626173673516e-05, |
|
"loss": 1.6494, |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 0.28953608340724385, |
|
"grad_norm": 1.5787324905395508, |
|
"learning_rate": 4.409954541451762e-05, |
|
"loss": 1.6362, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 0.2906583162886673, |
|
"grad_norm": 1.4780092239379883, |
|
"learning_rate": 4.404524911958764e-05, |
|
"loss": 1.643, |
|
"step": 8029 |
|
}, |
|
{ |
|
"epoch": 0.29178054917009066, |
|
"grad_norm": 1.5434736013412476, |
|
"learning_rate": 4.399073790160989e-05, |
|
"loss": 1.6472, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.2929027820515141, |
|
"grad_norm": 1.4898840188980103, |
|
"learning_rate": 4.393601237573607e-05, |
|
"loss": 1.6483, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 0.29402501493293753, |
|
"grad_norm": 1.5529502630233765, |
|
"learning_rate": 4.388107315953628e-05, |
|
"loss": 1.6291, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 0.29514724781436097, |
|
"grad_norm": 1.4831997156143188, |
|
"learning_rate": 4.382592087299212e-05, |
|
"loss": 1.6518, |
|
"step": 8153 |
|
}, |
|
{ |
|
"epoch": 0.2962694806957844, |
|
"grad_norm": 1.4568578004837036, |
|
"learning_rate": 4.377055613848964e-05, |
|
"loss": 1.6465, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 0.29739171357720784, |
|
"grad_norm": 1.4941576719284058, |
|
"learning_rate": 4.3714979580812355e-05, |
|
"loss": 1.634, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.2985139464586312, |
|
"grad_norm": 1.5891722440719604, |
|
"learning_rate": 4.365919182713416e-05, |
|
"loss": 1.6422, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 0.29963617934005465, |
|
"grad_norm": 1.5435233116149902, |
|
"learning_rate": 4.360319350701226e-05, |
|
"loss": 1.6446, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 0.3007584122214781, |
|
"grad_norm": 1.4754277467727661, |
|
"learning_rate": 4.3546985252380115e-05, |
|
"loss": 1.655, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 0.3018806451029015, |
|
"grad_norm": 1.5463342666625977, |
|
"learning_rate": 4.349056769754021e-05, |
|
"loss": 1.6407, |
|
"step": 8339 |
|
}, |
|
{ |
|
"epoch": 0.30300287798432496, |
|
"grad_norm": 1.4847484827041626, |
|
"learning_rate": 4.3433941479156994e-05, |
|
"loss": 1.65, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.3041251108657484, |
|
"grad_norm": 1.475669264793396, |
|
"learning_rate": 4.3377107236249647e-05, |
|
"loss": 1.6398, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 0.3052473437471718, |
|
"grad_norm": 1.558566689491272, |
|
"learning_rate": 4.332006561018488e-05, |
|
"loss": 1.6501, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 0.3063695766285952, |
|
"grad_norm": 1.5497310161590576, |
|
"learning_rate": 4.3262817244669683e-05, |
|
"loss": 1.6371, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 0.30749180951001864, |
|
"grad_norm": 1.464553952217102, |
|
"learning_rate": 4.3205362785744083e-05, |
|
"loss": 1.6766, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 0.3086140423914421, |
|
"grad_norm": 1.5198413133621216, |
|
"learning_rate": 4.314770288177384e-05, |
|
"loss": 1.633, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.3097362752728655, |
|
"grad_norm": 1.5493290424346924, |
|
"learning_rate": 4.308983818344313e-05, |
|
"loss": 1.6465, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 0.31085850815428895, |
|
"grad_norm": 1.4413405656814575, |
|
"learning_rate": 4.3031769343747206e-05, |
|
"loss": 1.6463, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 0.31198074103571233, |
|
"grad_norm": 1.508507251739502, |
|
"learning_rate": 4.297349701798505e-05, |
|
"loss": 1.6262, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 0.31310297391713576, |
|
"grad_norm": 1.560054063796997, |
|
"learning_rate": 4.2915021863751916e-05, |
|
"loss": 1.6484, |
|
"step": 8649 |
|
}, |
|
{ |
|
"epoch": 0.3142252067985592, |
|
"grad_norm": 1.495651125907898, |
|
"learning_rate": 4.285634454093198e-05, |
|
"loss": 1.6329, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.31534743967998263, |
|
"grad_norm": 1.481740117073059, |
|
"learning_rate": 4.279746571169086e-05, |
|
"loss": 1.6274, |
|
"step": 8711 |
|
}, |
|
{ |
|
"epoch": 0.31646967256140607, |
|
"grad_norm": 1.53792142868042, |
|
"learning_rate": 4.2738386040468136e-05, |
|
"loss": 1.6252, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 0.31759190544282945, |
|
"grad_norm": 1.4411643743515015, |
|
"learning_rate": 4.2679106193969866e-05, |
|
"loss": 1.6423, |
|
"step": 8773 |
|
}, |
|
{ |
|
"epoch": 0.3187141383242529, |
|
"grad_norm": 1.5158967971801758, |
|
"learning_rate": 4.261962684116106e-05, |
|
"loss": 1.6596, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 0.3198363712056763, |
|
"grad_norm": 1.6026604175567627, |
|
"learning_rate": 4.2559948653258145e-05, |
|
"loss": 1.6399, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.32095860408709975, |
|
"grad_norm": 1.4422760009765625, |
|
"learning_rate": 4.250007230372134e-05, |
|
"loss": 1.646, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 0.3220808369685232, |
|
"grad_norm": 1.4450057744979858, |
|
"learning_rate": 4.2439998468247126e-05, |
|
"loss": 1.6311, |
|
"step": 8897 |
|
}, |
|
{ |
|
"epoch": 0.3232030698499466, |
|
"grad_norm": 1.432768702507019, |
|
"learning_rate": 4.2379727824760566e-05, |
|
"loss": 1.6234, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.32432530273137, |
|
"grad_norm": 1.5206103324890137, |
|
"learning_rate": 4.231926105340768e-05, |
|
"loss": 1.6268, |
|
"step": 8959 |
|
}, |
|
{ |
|
"epoch": 0.32544753561279344, |
|
"grad_norm": 1.5703397989273071, |
|
"learning_rate": 4.225859883654776e-05, |
|
"loss": 1.6409, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.32656976849421687, |
|
"grad_norm": 1.4549362659454346, |
|
"learning_rate": 4.219774185874569e-05, |
|
"loss": 1.6471, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 0.3276920013756403, |
|
"grad_norm": 1.669263243675232, |
|
"learning_rate": 4.213669080676418e-05, |
|
"loss": 1.6355, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 0.32881423425706374, |
|
"grad_norm": 1.4004725217819214, |
|
"learning_rate": 4.2075446369556056e-05, |
|
"loss": 1.6046, |
|
"step": 9083 |
|
}, |
|
{ |
|
"epoch": 0.3299364671384872, |
|
"grad_norm": 1.4844101667404175, |
|
"learning_rate": 4.201400923825648e-05, |
|
"loss": 1.6357, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 0.33105870001991056, |
|
"grad_norm": 1.5377836227416992, |
|
"learning_rate": 4.195238010617511e-05, |
|
"loss": 1.6425, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 0.332180932901334, |
|
"grad_norm": 1.4880887269973755, |
|
"learning_rate": 4.1890559668788344e-05, |
|
"loss": 1.6368, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 0.3333031657827574, |
|
"grad_norm": 1.5786559581756592, |
|
"learning_rate": 4.1828548623731405e-05, |
|
"loss": 1.6327, |
|
"step": 9207 |
|
}, |
|
{ |
|
"epoch": 0.33442539866418086, |
|
"grad_norm": 1.4619288444519043, |
|
"learning_rate": 4.1766347670790506e-05, |
|
"loss": 1.6431, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 0.3355476315456043, |
|
"grad_norm": 1.4946295022964478, |
|
"learning_rate": 4.170395751189495e-05, |
|
"loss": 1.6265, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 0.33666986442702773, |
|
"grad_norm": 1.4698960781097412, |
|
"learning_rate": 4.164137885110921e-05, |
|
"loss": 1.6356, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3377920973084511, |
|
"grad_norm": 1.4136701822280884, |
|
"learning_rate": 4.157861239462495e-05, |
|
"loss": 1.606, |
|
"step": 9331 |
|
}, |
|
{ |
|
"epoch": 0.33891433018987455, |
|
"grad_norm": 1.5250601768493652, |
|
"learning_rate": 4.1515658850753114e-05, |
|
"loss": 1.6266, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 0.340036563071298, |
|
"grad_norm": 1.5827070474624634, |
|
"learning_rate": 4.145251892991588e-05, |
|
"loss": 1.618, |
|
"step": 9393 |
|
}, |
|
{ |
|
"epoch": 0.3411587959527214, |
|
"grad_norm": 1.4887738227844238, |
|
"learning_rate": 4.138919334463868e-05, |
|
"loss": 1.6196, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 0.34228102883414485, |
|
"grad_norm": 1.5627696514129639, |
|
"learning_rate": 4.1325682809542124e-05, |
|
"loss": 1.6155, |
|
"step": 9455 |
|
}, |
|
{ |
|
"epoch": 0.3434032617155683, |
|
"grad_norm": 1.4552607536315918, |
|
"learning_rate": 4.126198804133398e-05, |
|
"loss": 1.6272, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 0.34452549459699167, |
|
"grad_norm": 1.5104546546936035, |
|
"learning_rate": 4.1198109758801055e-05, |
|
"loss": 1.6245, |
|
"step": 9517 |
|
}, |
|
{ |
|
"epoch": 0.3456477274784151, |
|
"grad_norm": 1.4588383436203003, |
|
"learning_rate": 4.113404868280107e-05, |
|
"loss": 1.6285, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 0.34676996035983854, |
|
"grad_norm": 1.40166437625885, |
|
"learning_rate": 4.106980553625457e-05, |
|
"loss": 1.6181, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 0.34789219324126197, |
|
"grad_norm": 1.4949356317520142, |
|
"learning_rate": 4.100538104413674e-05, |
|
"loss": 1.6148, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.3490144261226854, |
|
"grad_norm": 1.4863393306732178, |
|
"learning_rate": 4.09407759334692e-05, |
|
"loss": 1.6218, |
|
"step": 9641 |
|
}, |
|
{ |
|
"epoch": 0.35013665900410884, |
|
"grad_norm": 1.4831593036651611, |
|
"learning_rate": 4.087599093331186e-05, |
|
"loss": 1.6201, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 0.3512588918855322, |
|
"grad_norm": 1.487328052520752, |
|
"learning_rate": 4.081102677475462e-05, |
|
"loss": 1.6203, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 0.35238112476695566, |
|
"grad_norm": 1.560600996017456, |
|
"learning_rate": 4.0745884190909194e-05, |
|
"loss": 1.6099, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 0.3535033576483791, |
|
"grad_norm": 1.45511794090271, |
|
"learning_rate": 4.0680563916900796e-05, |
|
"loss": 1.6494, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 0.3546255905298025, |
|
"grad_norm": 1.4966280460357666, |
|
"learning_rate": 4.0615066689859815e-05, |
|
"loss": 1.6157, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 0.35574782341122596, |
|
"grad_norm": 1.4888532161712646, |
|
"learning_rate": 4.0549393248913584e-05, |
|
"loss": 1.6203, |
|
"step": 9827 |
|
}, |
|
{ |
|
"epoch": 0.3568700562926494, |
|
"grad_norm": 1.5495861768722534, |
|
"learning_rate": 4.048354433517794e-05, |
|
"loss": 1.6131, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 0.3579922891740728, |
|
"grad_norm": 1.4991432428359985, |
|
"learning_rate": 4.0417520691748916e-05, |
|
"loss": 1.6371, |
|
"step": 9889 |
|
}, |
|
{ |
|
"epoch": 0.3591145220554962, |
|
"grad_norm": 1.5163663625717163, |
|
"learning_rate": 4.035132306369438e-05, |
|
"loss": 1.5911, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.36023675493691965, |
|
"grad_norm": 1.439622402191162, |
|
"learning_rate": 4.028495219804555e-05, |
|
"loss": 1.6218, |
|
"step": 9951 |
|
}, |
|
{ |
|
"epoch": 0.3613589878183431, |
|
"grad_norm": 1.4068893194198608, |
|
"learning_rate": 4.021840884378864e-05, |
|
"loss": 1.6284, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 0.3624812206997665, |
|
"grad_norm": 1.4577332735061646, |
|
"learning_rate": 4.015169375185633e-05, |
|
"loss": 1.6104, |
|
"step": 10013 |
|
}, |
|
{ |
|
"epoch": 0.36360345358118995, |
|
"grad_norm": 1.448833703994751, |
|
"learning_rate": 4.0084807675119396e-05, |
|
"loss": 1.6299, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 0.36472568646261333, |
|
"grad_norm": 1.440450668334961, |
|
"learning_rate": 4.0017751368378106e-05, |
|
"loss": 1.6255, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.36584791934403676, |
|
"grad_norm": 1.3380858898162842, |
|
"learning_rate": 3.995052558835377e-05, |
|
"loss": 1.6162, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 0.3669701522254602, |
|
"grad_norm": 1.4549713134765625, |
|
"learning_rate": 3.988313109368017e-05, |
|
"loss": 1.6181, |
|
"step": 10137 |
|
}, |
|
{ |
|
"epoch": 0.36809238510688363, |
|
"grad_norm": 1.4933863878250122, |
|
"learning_rate": 3.981556864489504e-05, |
|
"loss": 1.634, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 0.36921461798830707, |
|
"grad_norm": 1.5157703161239624, |
|
"learning_rate": 3.974783900443142e-05, |
|
"loss": 1.6258, |
|
"step": 10199 |
|
}, |
|
{ |
|
"epoch": 0.3703368508697305, |
|
"grad_norm": 1.464006781578064, |
|
"learning_rate": 3.9679942936609095e-05, |
|
"loss": 1.6235, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.3714590837511539, |
|
"grad_norm": 1.3768154382705688, |
|
"learning_rate": 3.961188120762596e-05, |
|
"loss": 1.6044, |
|
"step": 10261 |
|
}, |
|
{ |
|
"epoch": 0.3725813166325773, |
|
"grad_norm": 1.4427024126052856, |
|
"learning_rate": 3.954365458554938e-05, |
|
"loss": 1.6403, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 0.37370354951400075, |
|
"grad_norm": 1.3831264972686768, |
|
"learning_rate": 3.947526384030751e-05, |
|
"loss": 1.6136, |
|
"step": 10323 |
|
}, |
|
{ |
|
"epoch": 0.3748257823954242, |
|
"grad_norm": 1.4275633096694946, |
|
"learning_rate": 3.9406709743680624e-05, |
|
"loss": 1.6167, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 0.3759480152768476, |
|
"grad_norm": 1.4378384351730347, |
|
"learning_rate": 3.9337993069292366e-05, |
|
"loss": 1.6231, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 0.37707024815827106, |
|
"grad_norm": 1.3743884563446045, |
|
"learning_rate": 3.926911459260109e-05, |
|
"loss": 1.6171, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 0.37819248103969444, |
|
"grad_norm": 1.496160864830017, |
|
"learning_rate": 3.920007509089102e-05, |
|
"loss": 1.6234, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 0.3793147139211179, |
|
"grad_norm": 1.4610028266906738, |
|
"learning_rate": 3.913087534326357e-05, |
|
"loss": 1.5963, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 0.3804369468025413, |
|
"grad_norm": 1.483314037322998, |
|
"learning_rate": 3.9061516130628475e-05, |
|
"loss": 1.6021, |
|
"step": 10509 |
|
}, |
|
{ |
|
"epoch": 0.38155917968396474, |
|
"grad_norm": 1.4944846630096436, |
|
"learning_rate": 3.8991998235695025e-05, |
|
"loss": 1.5833, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.3826814125653882, |
|
"grad_norm": 1.3831861019134521, |
|
"learning_rate": 3.8922322442963224e-05, |
|
"loss": 1.624, |
|
"step": 10571 |
|
}, |
|
{ |
|
"epoch": 0.3838036454468116, |
|
"grad_norm": 1.4178634881973267, |
|
"learning_rate": 3.885248953871491e-05, |
|
"loss": 1.6188, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 0.384925878328235, |
|
"grad_norm": 1.4889320135116577, |
|
"learning_rate": 3.8782500311004915e-05, |
|
"loss": 1.608, |
|
"step": 10633 |
|
}, |
|
{ |
|
"epoch": 0.38604811120965843, |
|
"grad_norm": 1.3335620164871216, |
|
"learning_rate": 3.871235554965218e-05, |
|
"loss": 1.6182, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 0.38717034409108186, |
|
"grad_norm": 1.4620449542999268, |
|
"learning_rate": 3.864205604623078e-05, |
|
"loss": 1.5848, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 0.3882925769725053, |
|
"grad_norm": 1.3857917785644531, |
|
"learning_rate": 3.857160259406107e-05, |
|
"loss": 1.6048, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 0.38941480985392873, |
|
"grad_norm": 1.4226957559585571, |
|
"learning_rate": 3.8500995988200674e-05, |
|
"loss": 1.6052, |
|
"step": 10757 |
|
}, |
|
{ |
|
"epoch": 0.39053704273535217, |
|
"grad_norm": 1.478182077407837, |
|
"learning_rate": 3.843023702543556e-05, |
|
"loss": 1.6268, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 0.39165927561677555, |
|
"grad_norm": 1.431401014328003, |
|
"learning_rate": 3.8359326504270984e-05, |
|
"loss": 1.6176, |
|
"step": 10819 |
|
}, |
|
{ |
|
"epoch": 0.392781508498199, |
|
"grad_norm": 1.339880108833313, |
|
"learning_rate": 3.828826522492255e-05, |
|
"loss": 1.5902, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.3939037413796224, |
|
"grad_norm": 1.4537174701690674, |
|
"learning_rate": 3.821705398930713e-05, |
|
"loss": 1.6107, |
|
"step": 10881 |
|
}, |
|
{ |
|
"epoch": 0.39502597426104585, |
|
"grad_norm": 1.3559256792068481, |
|
"learning_rate": 3.814569360103385e-05, |
|
"loss": 1.5879, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.3961482071424693, |
|
"grad_norm": 1.3561891317367554, |
|
"learning_rate": 3.807418486539499e-05, |
|
"loss": 1.6162, |
|
"step": 10943 |
|
}, |
|
{ |
|
"epoch": 0.3972704400238927, |
|
"grad_norm": 1.471112847328186, |
|
"learning_rate": 3.80025285893569e-05, |
|
"loss": 1.5968, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 0.3983926729053161, |
|
"grad_norm": 1.3438925743103027, |
|
"learning_rate": 3.793072558155093e-05, |
|
"loss": 1.5876, |
|
"step": 11005 |
|
}, |
|
{ |
|
"epoch": 0.39951490578673954, |
|
"grad_norm": 1.4102482795715332, |
|
"learning_rate": 3.785877665226426e-05, |
|
"loss": 1.5886, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 0.400637138668163, |
|
"grad_norm": 1.4435259103775024, |
|
"learning_rate": 3.778668261343079e-05, |
|
"loss": 1.5999, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 0.4017593715495864, |
|
"grad_norm": 1.4556541442871094, |
|
"learning_rate": 3.771444427862192e-05, |
|
"loss": 1.6185, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 0.40288160443100984, |
|
"grad_norm": 1.370553970336914, |
|
"learning_rate": 3.7642062463037465e-05, |
|
"loss": 1.6005, |
|
"step": 11129 |
|
}, |
|
{ |
|
"epoch": 0.4040038373124333, |
|
"grad_norm": 1.368855595588684, |
|
"learning_rate": 3.7569537983496373e-05, |
|
"loss": 1.6024, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.40512607019385666, |
|
"grad_norm": 1.4200265407562256, |
|
"learning_rate": 3.749687165842753e-05, |
|
"loss": 1.6082, |
|
"step": 11191 |
|
}, |
|
{ |
|
"epoch": 0.4062483030752801, |
|
"grad_norm": 1.4704499244689941, |
|
"learning_rate": 3.7424064307860536e-05, |
|
"loss": 1.6227, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 0.40737053595670353, |
|
"grad_norm": 1.3868876695632935, |
|
"learning_rate": 3.735111675341645e-05, |
|
"loss": 1.6008, |
|
"step": 11253 |
|
}, |
|
{ |
|
"epoch": 0.40849276883812696, |
|
"grad_norm": 1.473650574684143, |
|
"learning_rate": 3.7278029818298524e-05, |
|
"loss": 1.5825, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 0.4096150017195504, |
|
"grad_norm": 1.412559986114502, |
|
"learning_rate": 3.720480432728287e-05, |
|
"loss": 1.5971, |
|
"step": 11315 |
|
}, |
|
{ |
|
"epoch": 0.41073723460097383, |
|
"grad_norm": 1.4288370609283447, |
|
"learning_rate": 3.71314411067092e-05, |
|
"loss": 1.6079, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 0.4118594674823972, |
|
"grad_norm": 1.4781348705291748, |
|
"learning_rate": 3.70579409844715e-05, |
|
"loss": 1.5904, |
|
"step": 11377 |
|
}, |
|
{ |
|
"epoch": 0.41298170036382065, |
|
"grad_norm": 1.377030611038208, |
|
"learning_rate": 3.698430479000865e-05, |
|
"loss": 1.5804, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 0.4141039332452441, |
|
"grad_norm": 1.4176589250564575, |
|
"learning_rate": 3.691053335429509e-05, |
|
"loss": 1.6046, |
|
"step": 11439 |
|
}, |
|
{ |
|
"epoch": 0.4152261661266675, |
|
"grad_norm": 1.4933243989944458, |
|
"learning_rate": 3.683662750983147e-05, |
|
"loss": 1.6018, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.41634839900809095, |
|
"grad_norm": 1.4382365942001343, |
|
"learning_rate": 3.676258809063518e-05, |
|
"loss": 1.5962, |
|
"step": 11501 |
|
}, |
|
{ |
|
"epoch": 0.4174706318895144, |
|
"grad_norm": 1.468005657196045, |
|
"learning_rate": 3.6688415932231004e-05, |
|
"loss": 1.6044, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 0.41859286477093777, |
|
"grad_norm": 1.4858007431030273, |
|
"learning_rate": 3.661411187164166e-05, |
|
"loss": 1.5973, |
|
"step": 11563 |
|
}, |
|
{ |
|
"epoch": 0.4197150976523612, |
|
"grad_norm": 1.457524061203003, |
|
"learning_rate": 3.65396767473784e-05, |
|
"loss": 1.5872, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 0.42083733053378464, |
|
"grad_norm": 1.4685806035995483, |
|
"learning_rate": 3.6465111399431465e-05, |
|
"loss": 1.6072, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.42195956341520807, |
|
"grad_norm": 1.4355812072753906, |
|
"learning_rate": 3.6390416669260674e-05, |
|
"loss": 1.6005, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 0.4230817962966315, |
|
"grad_norm": 1.4105843305587769, |
|
"learning_rate": 3.63155933997859e-05, |
|
"loss": 1.5999, |
|
"step": 11687 |
|
}, |
|
{ |
|
"epoch": 0.42420402917805494, |
|
"grad_norm": 1.4515639543533325, |
|
"learning_rate": 3.624064243537758e-05, |
|
"loss": 1.5903, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 0.4253262620594783, |
|
"grad_norm": 1.4507205486297607, |
|
"learning_rate": 3.616556462184716e-05, |
|
"loss": 1.6004, |
|
"step": 11749 |
|
}, |
|
{ |
|
"epoch": 0.42644849494090176, |
|
"grad_norm": 1.3846348524093628, |
|
"learning_rate": 3.609036080643755e-05, |
|
"loss": 1.5878, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.4275707278223252, |
|
"grad_norm": 1.4062190055847168, |
|
"learning_rate": 3.60150318378136e-05, |
|
"loss": 1.6049, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 0.4286929607037486, |
|
"grad_norm": 1.5231355428695679, |
|
"learning_rate": 3.5939578566052465e-05, |
|
"loss": 1.5972, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 0.42981519358517206, |
|
"grad_norm": 1.4500449895858765, |
|
"learning_rate": 3.586400184263408e-05, |
|
"loss": 1.5918, |
|
"step": 11873 |
|
}, |
|
{ |
|
"epoch": 0.4309374264665955, |
|
"grad_norm": 1.415440559387207, |
|
"learning_rate": 3.578830252043148e-05, |
|
"loss": 1.6111, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.4320596593480189, |
|
"grad_norm": 1.3857108354568481, |
|
"learning_rate": 3.571248145370125e-05, |
|
"loss": 1.5882, |
|
"step": 11935 |
|
}, |
|
{ |
|
"epoch": 0.4331818922294423, |
|
"grad_norm": 1.442830204963684, |
|
"learning_rate": 3.5636539498073794e-05, |
|
"loss": 1.587, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 0.43430412511086575, |
|
"grad_norm": 1.3706488609313965, |
|
"learning_rate": 3.556047751054378e-05, |
|
"loss": 1.5942, |
|
"step": 11997 |
|
}, |
|
{ |
|
"epoch": 0.4354263579922892, |
|
"grad_norm": 1.450567364692688, |
|
"learning_rate": 3.548429634946039e-05, |
|
"loss": 1.6011, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 0.4365485908737126, |
|
"grad_norm": 1.4172272682189941, |
|
"learning_rate": 3.540799687451768e-05, |
|
"loss": 1.5726, |
|
"step": 12059 |
|
}, |
|
{ |
|
"epoch": 0.43767082375513605, |
|
"grad_norm": 1.4156157970428467, |
|
"learning_rate": 3.533157994674485e-05, |
|
"loss": 1.5848, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.43879305663655943, |
|
"grad_norm": 1.3843419551849365, |
|
"learning_rate": 3.5255046428496546e-05, |
|
"loss": 1.5893, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 0.43991528951798287, |
|
"grad_norm": 1.43569815158844, |
|
"learning_rate": 3.517839718344311e-05, |
|
"loss": 1.5922, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 0.4410375223994063, |
|
"grad_norm": 1.4200314283370972, |
|
"learning_rate": 3.510163307656086e-05, |
|
"loss": 1.6047, |
|
"step": 12183 |
|
}, |
|
{ |
|
"epoch": 0.44215975528082974, |
|
"grad_norm": 1.4956674575805664, |
|
"learning_rate": 3.5024754974122324e-05, |
|
"loss": 1.5802, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 0.44328198816225317, |
|
"grad_norm": 1.4289231300354004, |
|
"learning_rate": 3.494776374368643e-05, |
|
"loss": 1.6193, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 0.4444042210436766, |
|
"grad_norm": 1.389282464981079, |
|
"learning_rate": 3.4870660254088724e-05, |
|
"loss": 1.5977, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 0.4455264539251, |
|
"grad_norm": 1.4207974672317505, |
|
"learning_rate": 3.479344537543164e-05, |
|
"loss": 1.5789, |
|
"step": 12307 |
|
}, |
|
{ |
|
"epoch": 0.4466486868065234, |
|
"grad_norm": 1.355353832244873, |
|
"learning_rate": 3.4716119979074565e-05, |
|
"loss": 1.5889, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 0.44777091968794686, |
|
"grad_norm": 1.3336408138275146, |
|
"learning_rate": 3.463868493762412e-05, |
|
"loss": 1.5865, |
|
"step": 12369 |
|
}, |
|
{ |
|
"epoch": 0.4488931525693703, |
|
"grad_norm": 1.5265244245529175, |
|
"learning_rate": 3.456114112492418e-05, |
|
"loss": 1.5993, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4500153854507937, |
|
"grad_norm": 1.4629555940628052, |
|
"learning_rate": 3.4483489416046164e-05, |
|
"loss": 1.5982, |
|
"step": 12431 |
|
}, |
|
{ |
|
"epoch": 0.45113761833221716, |
|
"grad_norm": 1.43988835811615, |
|
"learning_rate": 3.440573068727905e-05, |
|
"loss": 1.5816, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 0.45225985121364054, |
|
"grad_norm": 1.4607633352279663, |
|
"learning_rate": 3.4327865816119495e-05, |
|
"loss": 1.571, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 0.453382084095064, |
|
"grad_norm": 1.3664649724960327, |
|
"learning_rate": 3.4249895681262025e-05, |
|
"loss": 1.5736, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 0.4545043169764874, |
|
"grad_norm": 1.436094880104065, |
|
"learning_rate": 3.417182116258899e-05, |
|
"loss": 1.5829, |
|
"step": 12555 |
|
}, |
|
{ |
|
"epoch": 0.45562654985791085, |
|
"grad_norm": 1.3681309223175049, |
|
"learning_rate": 3.409364314116074e-05, |
|
"loss": 1.5938, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 0.4567487827393343, |
|
"grad_norm": 1.3929277658462524, |
|
"learning_rate": 3.401536249920559e-05, |
|
"loss": 1.572, |
|
"step": 12617 |
|
}, |
|
{ |
|
"epoch": 0.4578710156207577, |
|
"grad_norm": 1.3980777263641357, |
|
"learning_rate": 3.393698012010998e-05, |
|
"loss": 1.5941, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 0.4589932485021811, |
|
"grad_norm": 1.4055850505828857, |
|
"learning_rate": 3.385849688840839e-05, |
|
"loss": 1.5818, |
|
"step": 12679 |
|
}, |
|
{ |
|
"epoch": 0.46011548138360453, |
|
"grad_norm": 1.3678046464920044, |
|
"learning_rate": 3.3779913689773414e-05, |
|
"loss": 1.5759, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.46123771426502796, |
|
"grad_norm": 1.468201994895935, |
|
"learning_rate": 3.370123141100578e-05, |
|
"loss": 1.5792, |
|
"step": 12741 |
|
}, |
|
{ |
|
"epoch": 0.4623599471464514, |
|
"grad_norm": 1.346614122390747, |
|
"learning_rate": 3.3622450940024305e-05, |
|
"loss": 1.5983, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 0.46348218002787483, |
|
"grad_norm": 1.3895704746246338, |
|
"learning_rate": 3.35435731658559e-05, |
|
"loss": 1.5809, |
|
"step": 12803 |
|
}, |
|
{ |
|
"epoch": 0.46460441290929827, |
|
"grad_norm": 1.3664804697036743, |
|
"learning_rate": 3.346459897862552e-05, |
|
"loss": 1.5788, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 0.46572664579072165, |
|
"grad_norm": 1.4561264514923096, |
|
"learning_rate": 3.338552926954613e-05, |
|
"loss": 1.5867, |
|
"step": 12865 |
|
}, |
|
{ |
|
"epoch": 0.4668488786721451, |
|
"grad_norm": 1.3407316207885742, |
|
"learning_rate": 3.330636493090868e-05, |
|
"loss": 1.5729, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 0.4679711115535685, |
|
"grad_norm": 1.3465179204940796, |
|
"learning_rate": 3.322710685607193e-05, |
|
"loss": 1.5915, |
|
"step": 12927 |
|
}, |
|
{ |
|
"epoch": 0.46909334443499195, |
|
"grad_norm": 1.553585171699524, |
|
"learning_rate": 3.314775593945251e-05, |
|
"loss": 1.5875, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 0.4702155773164154, |
|
"grad_norm": 1.3964170217514038, |
|
"learning_rate": 3.3068313076514714e-05, |
|
"loss": 1.5783, |
|
"step": 12989 |
|
}, |
|
{ |
|
"epoch": 0.47133781019783877, |
|
"grad_norm": 1.3884953260421753, |
|
"learning_rate": 3.298877916376047e-05, |
|
"loss": 1.5577, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.4724600430792622, |
|
"grad_norm": 1.3421337604522705, |
|
"learning_rate": 3.290915509871915e-05, |
|
"loss": 1.5791, |
|
"step": 13051 |
|
}, |
|
{ |
|
"epoch": 0.47358227596068564, |
|
"grad_norm": 1.297429084777832, |
|
"learning_rate": 3.282944177993753e-05, |
|
"loss": 1.5699, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 0.4747045088421091, |
|
"grad_norm": 1.3672280311584473, |
|
"learning_rate": 3.274964010696957e-05, |
|
"loss": 1.5711, |
|
"step": 13113 |
|
}, |
|
{ |
|
"epoch": 0.4758267417235325, |
|
"grad_norm": 1.4202091693878174, |
|
"learning_rate": 3.266975098036629e-05, |
|
"loss": 1.5679, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 0.47694897460495594, |
|
"grad_norm": 1.383973479270935, |
|
"learning_rate": 3.258977530166562e-05, |
|
"loss": 1.6019, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.4780712074863793, |
|
"grad_norm": 1.3134119510650635, |
|
"learning_rate": 3.250971397338227e-05, |
|
"loss": 1.5721, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 0.47919344036780276, |
|
"grad_norm": 1.3229272365570068, |
|
"learning_rate": 3.2429567898997404e-05, |
|
"loss": 1.5812, |
|
"step": 13237 |
|
}, |
|
{ |
|
"epoch": 0.4803156732492262, |
|
"grad_norm": 1.2991341352462769, |
|
"learning_rate": 3.234933798294859e-05, |
|
"loss": 1.5793, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 0.48143790613064963, |
|
"grad_norm": 1.384522795677185, |
|
"learning_rate": 3.2269025130619535e-05, |
|
"loss": 1.5592, |
|
"step": 13299 |
|
}, |
|
{ |
|
"epoch": 0.48256013901207306, |
|
"grad_norm": 1.3743617534637451, |
|
"learning_rate": 3.218863024832985e-05, |
|
"loss": 1.5785, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.4836823718934965, |
|
"grad_norm": 1.4512649774551392, |
|
"learning_rate": 3.2108154243324864e-05, |
|
"loss": 1.5703, |
|
"step": 13361 |
|
}, |
|
{ |
|
"epoch": 0.4848046047749199, |
|
"grad_norm": 1.2982932329177856, |
|
"learning_rate": 3.2027598023765345e-05, |
|
"loss": 1.5609, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 0.4859268376563433, |
|
"grad_norm": 1.3747495412826538, |
|
"learning_rate": 3.194696249871729e-05, |
|
"loss": 1.5766, |
|
"step": 13423 |
|
}, |
|
{ |
|
"epoch": 0.48704907053776675, |
|
"grad_norm": 1.3155137300491333, |
|
"learning_rate": 3.186624857814164e-05, |
|
"loss": 1.57, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 0.4881713034191902, |
|
"grad_norm": 1.4094924926757812, |
|
"learning_rate": 3.178545717288401e-05, |
|
"loss": 1.5855, |
|
"step": 13485 |
|
}, |
|
{ |
|
"epoch": 0.4892935363006136, |
|
"grad_norm": 1.3931294679641724, |
|
"learning_rate": 3.170458919466444e-05, |
|
"loss": 1.5486, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 0.49041576918203705, |
|
"grad_norm": 1.48263418674469, |
|
"learning_rate": 3.1623645556067063e-05, |
|
"loss": 1.5829, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 0.49153800206346043, |
|
"grad_norm": 1.3016873598098755, |
|
"learning_rate": 3.154262717052985e-05, |
|
"loss": 1.5808, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 0.49266023494488387, |
|
"grad_norm": 1.623724102973938, |
|
"learning_rate": 3.146153495233426e-05, |
|
"loss": 1.5582, |
|
"step": 13609 |
|
}, |
|
{ |
|
"epoch": 0.4937824678263073, |
|
"grad_norm": 1.3603851795196533, |
|
"learning_rate": 3.1380369816594944e-05, |
|
"loss": 1.5703, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.49490470070773074, |
|
"grad_norm": 1.4793063402175903, |
|
"learning_rate": 3.129913267924946e-05, |
|
"loss": 1.5739, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 0.4960269335891542, |
|
"grad_norm": 1.4615710973739624, |
|
"learning_rate": 3.121782445704782e-05, |
|
"loss": 1.5846, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 0.4971491664705776, |
|
"grad_norm": 1.419823408126831, |
|
"learning_rate": 3.11364460675423e-05, |
|
"loss": 1.5702, |
|
"step": 13733 |
|
}, |
|
{ |
|
"epoch": 0.498271399352001, |
|
"grad_norm": 1.429337501525879, |
|
"learning_rate": 3.1054998429076934e-05, |
|
"loss": 1.5825, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 0.4993936322334244, |
|
"grad_norm": 1.3171850442886353, |
|
"learning_rate": 3.097348246077728e-05, |
|
"loss": 1.5721, |
|
"step": 13795 |
|
}, |
|
{ |
|
"epoch": 0.5005158651148479, |
|
"grad_norm": 1.487111210823059, |
|
"learning_rate": 3.0891899082539924e-05, |
|
"loss": 1.5879, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 0.5016380979962712, |
|
"grad_norm": 1.4311749935150146, |
|
"learning_rate": 3.0810249215022233e-05, |
|
"loss": 1.5843, |
|
"step": 13857 |
|
}, |
|
{ |
|
"epoch": 0.5027603308776947, |
|
"grad_norm": 1.468863844871521, |
|
"learning_rate": 3.0728533779631865e-05, |
|
"loss": 1.5884, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.5038825637591181, |
|
"grad_norm": 1.3970764875411987, |
|
"learning_rate": 3.064675369851637e-05, |
|
"loss": 1.5769, |
|
"step": 13919 |
|
}, |
|
{ |
|
"epoch": 0.5050047966405415, |
|
"grad_norm": 1.3623278141021729, |
|
"learning_rate": 3.056490989455289e-05, |
|
"loss": 1.5706, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.506127029521965, |
|
"grad_norm": 1.3077219724655151, |
|
"learning_rate": 3.0483003291337596e-05, |
|
"loss": 1.5761, |
|
"step": 13981 |
|
}, |
|
{ |
|
"epoch": 0.5072492624033884, |
|
"grad_norm": 1.3295941352844238, |
|
"learning_rate": 3.040103481317539e-05, |
|
"loss": 1.5776, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 0.5083714952848118, |
|
"grad_norm": 1.3900631666183472, |
|
"learning_rate": 3.03190053850694e-05, |
|
"loss": 1.5777, |
|
"step": 14043 |
|
}, |
|
{ |
|
"epoch": 0.5094937281662353, |
|
"grad_norm": 1.3359615802764893, |
|
"learning_rate": 3.0236915932710573e-05, |
|
"loss": 1.5569, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 0.5106159610476587, |
|
"grad_norm": 1.2790296077728271, |
|
"learning_rate": 3.0154767382467232e-05, |
|
"loss": 1.5598, |
|
"step": 14105 |
|
}, |
|
{ |
|
"epoch": 0.5117381939290822, |
|
"grad_norm": 1.5767478942871094, |
|
"learning_rate": 3.0072560661374582e-05, |
|
"loss": 1.5483, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 0.5128604268105056, |
|
"grad_norm": 1.343381404876709, |
|
"learning_rate": 2.999029669712431e-05, |
|
"loss": 1.5689, |
|
"step": 14167 |
|
}, |
|
{ |
|
"epoch": 0.513982659691929, |
|
"grad_norm": 1.4147651195526123, |
|
"learning_rate": 2.990797641805408e-05, |
|
"loss": 1.5643, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 0.5151048925733523, |
|
"grad_norm": 1.3360931873321533, |
|
"learning_rate": 2.982560075313704e-05, |
|
"loss": 1.5689, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 0.5162271254547758, |
|
"grad_norm": 1.458016037940979, |
|
"learning_rate": 2.9743170631971368e-05, |
|
"loss": 1.5633, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.5173493583361992, |
|
"grad_norm": 1.430955171585083, |
|
"learning_rate": 2.9660686984769792e-05, |
|
"loss": 1.5559, |
|
"step": 14291 |
|
}, |
|
{ |
|
"epoch": 0.5184715912176227, |
|
"grad_norm": 1.3806464672088623, |
|
"learning_rate": 2.9578150742349047e-05, |
|
"loss": 1.577, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 0.5195938240990461, |
|
"grad_norm": 1.359813928604126, |
|
"learning_rate": 2.949556283611942e-05, |
|
"loss": 1.5485, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 0.5207160569804695, |
|
"grad_norm": 1.4222601652145386, |
|
"learning_rate": 2.9412924198074206e-05, |
|
"loss": 1.575, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 0.521838289861893, |
|
"grad_norm": 1.3186180591583252, |
|
"learning_rate": 2.9330235760779208e-05, |
|
"loss": 1.5744, |
|
"step": 14415 |
|
}, |
|
{ |
|
"epoch": 0.5229605227433164, |
|
"grad_norm": 1.3309999704360962, |
|
"learning_rate": 2.9247498457362188e-05, |
|
"loss": 1.5664, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 0.5240827556247398, |
|
"grad_norm": 1.368514060974121, |
|
"learning_rate": 2.9164713221502373e-05, |
|
"loss": 1.56, |
|
"step": 14477 |
|
}, |
|
{ |
|
"epoch": 0.5252049885061633, |
|
"grad_norm": 1.3132268190383911, |
|
"learning_rate": 2.9081880987419912e-05, |
|
"loss": 1.563, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 0.5263272213875867, |
|
"grad_norm": 1.431347131729126, |
|
"learning_rate": 2.8999002689865296e-05, |
|
"loss": 1.5612, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 0.5274494542690101, |
|
"grad_norm": 1.303941249847412, |
|
"learning_rate": 2.8916079264108852e-05, |
|
"loss": 1.5601, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.5285716871504335, |
|
"grad_norm": 1.4077236652374268, |
|
"learning_rate": 2.883311164593017e-05, |
|
"loss": 1.5516, |
|
"step": 14601 |
|
}, |
|
{ |
|
"epoch": 0.5296939200318569, |
|
"grad_norm": 1.3132708072662354, |
|
"learning_rate": 2.875010077160754e-05, |
|
"loss": 1.5538, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 0.5308161529132803, |
|
"grad_norm": 1.2660679817199707, |
|
"learning_rate": 2.866704757790741e-05, |
|
"loss": 1.5652, |
|
"step": 14663 |
|
}, |
|
{ |
|
"epoch": 0.5319383857947038, |
|
"grad_norm": 1.4541290998458862, |
|
"learning_rate": 2.858395300207376e-05, |
|
"loss": 1.5602, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 0.5330606186761272, |
|
"grad_norm": 1.3694487810134888, |
|
"learning_rate": 2.8500817981817607e-05, |
|
"loss": 1.5483, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.5341828515575506, |
|
"grad_norm": 1.3493553400039673, |
|
"learning_rate": 2.8417643455306336e-05, |
|
"loss": 1.5539, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 0.5353050844389741, |
|
"grad_norm": 1.4280232191085815, |
|
"learning_rate": 2.8334430361153185e-05, |
|
"loss": 1.5672, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 0.5364273173203975, |
|
"grad_norm": 1.3430079221725464, |
|
"learning_rate": 2.8251179638406612e-05, |
|
"loss": 1.5474, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 0.5375495502018209, |
|
"grad_norm": 1.3380746841430664, |
|
"learning_rate": 2.8167892226539704e-05, |
|
"loss": 1.5508, |
|
"step": 14849 |
|
}, |
|
{ |
|
"epoch": 0.5386717830832444, |
|
"grad_norm": 1.3501845598220825, |
|
"learning_rate": 2.8084569065439588e-05, |
|
"loss": 1.5656, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.5397940159646678, |
|
"grad_norm": 1.3564043045043945, |
|
"learning_rate": 2.8001211095396807e-05, |
|
"loss": 1.5726, |
|
"step": 14911 |
|
}, |
|
{ |
|
"epoch": 0.5409162488460912, |
|
"grad_norm": 1.3949267864227295, |
|
"learning_rate": 2.791781925709473e-05, |
|
"loss": 1.5635, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 0.5420384817275146, |
|
"grad_norm": 1.4317481517791748, |
|
"learning_rate": 2.7834394491598908e-05, |
|
"loss": 1.5447, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 0.543160714608938, |
|
"grad_norm": 1.396610140800476, |
|
"learning_rate": 2.7750937740346485e-05, |
|
"loss": 1.557, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 0.5442829474903614, |
|
"grad_norm": 1.369884967803955, |
|
"learning_rate": 2.7667449945135564e-05, |
|
"loss": 1.5672, |
|
"step": 15035 |
|
}, |
|
{ |
|
"epoch": 0.5454051803717849, |
|
"grad_norm": 1.4686237573623657, |
|
"learning_rate": 2.7583932048114557e-05, |
|
"loss": 1.572, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 0.5465274132532083, |
|
"grad_norm": 1.524717926979065, |
|
"learning_rate": 2.7500384991771587e-05, |
|
"loss": 1.5537, |
|
"step": 15097 |
|
}, |
|
{ |
|
"epoch": 0.5476496461346317, |
|
"grad_norm": 1.3461147546768188, |
|
"learning_rate": 2.7416809718923825e-05, |
|
"loss": 1.5321, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 0.5487718790160552, |
|
"grad_norm": 1.3704477548599243, |
|
"learning_rate": 2.7333207172706864e-05, |
|
"loss": 1.5677, |
|
"step": 15159 |
|
}, |
|
{ |
|
"epoch": 0.5498941118974786, |
|
"grad_norm": 1.3601664304733276, |
|
"learning_rate": 2.7249578296564088e-05, |
|
"loss": 1.5577, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 0.551016344778902, |
|
"grad_norm": 1.4055489301681519, |
|
"learning_rate": 2.7165924034235973e-05, |
|
"loss": 1.5453, |
|
"step": 15221 |
|
}, |
|
{ |
|
"epoch": 0.5521385776603255, |
|
"grad_norm": 1.3587946891784668, |
|
"learning_rate": 2.708224532974953e-05, |
|
"loss": 1.5401, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 0.5532608105417489, |
|
"grad_norm": 1.3209632635116577, |
|
"learning_rate": 2.6998543127407538e-05, |
|
"loss": 1.5383, |
|
"step": 15283 |
|
}, |
|
{ |
|
"epoch": 0.5543830434231724, |
|
"grad_norm": 1.294921636581421, |
|
"learning_rate": 2.6914818371777988e-05, |
|
"loss": 1.5734, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 0.5555052763045957, |
|
"grad_norm": 1.6017462015151978, |
|
"learning_rate": 2.6831072007683373e-05, |
|
"loss": 1.5702, |
|
"step": 15345 |
|
}, |
|
{ |
|
"epoch": 0.5566275091860191, |
|
"grad_norm": 1.3644670248031616, |
|
"learning_rate": 2.6747304980190018e-05, |
|
"loss": 1.571, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 0.5577497420674425, |
|
"grad_norm": 1.3694461584091187, |
|
"learning_rate": 2.6663518234597453e-05, |
|
"loss": 1.5398, |
|
"step": 15407 |
|
}, |
|
{ |
|
"epoch": 0.558871974948866, |
|
"grad_norm": 1.3380069732666016, |
|
"learning_rate": 2.6579712716427696e-05, |
|
"loss": 1.5628, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 0.5599942078302894, |
|
"grad_norm": 1.322144627571106, |
|
"learning_rate": 2.6495889371414652e-05, |
|
"loss": 1.5682, |
|
"step": 15469 |
|
}, |
|
{ |
|
"epoch": 0.5611164407117128, |
|
"grad_norm": 1.3240221738815308, |
|
"learning_rate": 2.6412049145493367e-05, |
|
"loss": 1.5506, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.5622386735931363, |
|
"grad_norm": 1.3131070137023926, |
|
"learning_rate": 2.632819298478939e-05, |
|
"loss": 1.5529, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 0.5633609064745597, |
|
"grad_norm": 1.3907220363616943, |
|
"learning_rate": 2.6244321835608105e-05, |
|
"loss": 1.547, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 0.5644831393559832, |
|
"grad_norm": 1.233981966972351, |
|
"learning_rate": 2.6160436644424024e-05, |
|
"loss": 1.5377, |
|
"step": 15593 |
|
}, |
|
{ |
|
"epoch": 0.5656053722374066, |
|
"grad_norm": 1.443326711654663, |
|
"learning_rate": 2.6076538357870133e-05, |
|
"loss": 1.5788, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 0.56672760511883, |
|
"grad_norm": 1.4688999652862549, |
|
"learning_rate": 2.5992627922727196e-05, |
|
"loss": 1.5629, |
|
"step": 15655 |
|
}, |
|
{ |
|
"epoch": 0.5678498380002535, |
|
"grad_norm": 1.3365731239318848, |
|
"learning_rate": 2.5908706285913066e-05, |
|
"loss": 1.5544, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 0.5689720708816768, |
|
"grad_norm": 1.3793649673461914, |
|
"learning_rate": 2.5824774394472008e-05, |
|
"loss": 1.5317, |
|
"step": 15717 |
|
}, |
|
{ |
|
"epoch": 0.5700943037631002, |
|
"grad_norm": 1.417433738708496, |
|
"learning_rate": 2.5740833195563996e-05, |
|
"loss": 1.5506, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 0.5712165366445237, |
|
"grad_norm": 1.346710443496704, |
|
"learning_rate": 2.5656883636454067e-05, |
|
"loss": 1.5462, |
|
"step": 15779 |
|
}, |
|
{ |
|
"epoch": 0.5723387695259471, |
|
"grad_norm": 1.4065468311309814, |
|
"learning_rate": 2.557292666450159e-05, |
|
"loss": 1.5464, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 0.5734610024073705, |
|
"grad_norm": 1.3797588348388672, |
|
"learning_rate": 2.5488963227149566e-05, |
|
"loss": 1.565, |
|
"step": 15841 |
|
}, |
|
{ |
|
"epoch": 0.574583235288794, |
|
"grad_norm": 1.2842196226119995, |
|
"learning_rate": 2.5404994271913983e-05, |
|
"loss": 1.5489, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.5757054681702174, |
|
"grad_norm": 1.368696689605713, |
|
"learning_rate": 2.5321020746373085e-05, |
|
"loss": 1.5358, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 0.5768277010516408, |
|
"grad_norm": 1.3306961059570312, |
|
"learning_rate": 2.52370435981567e-05, |
|
"loss": 1.541, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 0.5779499339330643, |
|
"grad_norm": 1.286727786064148, |
|
"learning_rate": 2.5153063774935533e-05, |
|
"loss": 1.533, |
|
"step": 15965 |
|
}, |
|
{ |
|
"epoch": 0.5790721668144877, |
|
"grad_norm": 1.434964656829834, |
|
"learning_rate": 2.506908222441045e-05, |
|
"loss": 1.5404, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 0.5801943996959111, |
|
"grad_norm": 1.3955284357070923, |
|
"learning_rate": 2.498509989430187e-05, |
|
"loss": 1.5532, |
|
"step": 16027 |
|
}, |
|
{ |
|
"epoch": 0.5813166325773346, |
|
"grad_norm": 1.3676408529281616, |
|
"learning_rate": 2.4901117732338958e-05, |
|
"loss": 1.5263, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 0.5824388654587579, |
|
"grad_norm": 1.3900113105773926, |
|
"learning_rate": 2.481713668624899e-05, |
|
"loss": 1.5465, |
|
"step": 16089 |
|
}, |
|
{ |
|
"epoch": 0.5835610983401813, |
|
"grad_norm": 1.3808554410934448, |
|
"learning_rate": 2.4733157703746663e-05, |
|
"loss": 1.5332, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.5846833312216048, |
|
"grad_norm": 1.2974086999893188, |
|
"learning_rate": 2.4649181732523392e-05, |
|
"loss": 1.5562, |
|
"step": 16151 |
|
}, |
|
{ |
|
"epoch": 0.5858055641030282, |
|
"grad_norm": 1.4109300374984741, |
|
"learning_rate": 2.4565209720236582e-05, |
|
"loss": 1.5273, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 0.5869277969844516, |
|
"grad_norm": 1.3626701831817627, |
|
"learning_rate": 2.4481242614498975e-05, |
|
"loss": 1.5311, |
|
"step": 16213 |
|
}, |
|
{ |
|
"epoch": 0.5880500298658751, |
|
"grad_norm": 1.3017241954803467, |
|
"learning_rate": 2.439728136286796e-05, |
|
"loss": 1.5522, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 0.5891722627472985, |
|
"grad_norm": 1.349171757698059, |
|
"learning_rate": 2.4313326912834852e-05, |
|
"loss": 1.5262, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.5902944956287219, |
|
"grad_norm": 1.3548376560211182, |
|
"learning_rate": 2.4229380211814206e-05, |
|
"loss": 1.5455, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 0.5914167285101454, |
|
"grad_norm": 1.412003755569458, |
|
"learning_rate": 2.4145442207133124e-05, |
|
"loss": 1.5634, |
|
"step": 16337 |
|
}, |
|
{ |
|
"epoch": 0.5925389613915688, |
|
"grad_norm": 1.3400499820709229, |
|
"learning_rate": 2.406151384602059e-05, |
|
"loss": 1.5398, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 0.5936611942729922, |
|
"grad_norm": 1.3035651445388794, |
|
"learning_rate": 2.3977596075596747e-05, |
|
"loss": 1.5289, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 0.5947834271544157, |
|
"grad_norm": 1.322824478149414, |
|
"learning_rate": 2.3893689842862223e-05, |
|
"loss": 1.5509, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 0.595905660035839, |
|
"grad_norm": 1.3810386657714844, |
|
"learning_rate": 2.3809796094687475e-05, |
|
"loss": 1.5439, |
|
"step": 16461 |
|
}, |
|
{ |
|
"epoch": 0.5970278929172624, |
|
"grad_norm": 1.399760127067566, |
|
"learning_rate": 2.372591577780202e-05, |
|
"loss": 1.5459, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 0.5981501257986859, |
|
"grad_norm": 1.3253116607666016, |
|
"learning_rate": 2.3642049838783838e-05, |
|
"loss": 1.5556, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 0.5992723586801093, |
|
"grad_norm": 1.3376234769821167, |
|
"learning_rate": 2.3558199224048666e-05, |
|
"loss": 1.5322, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 0.6003945915615327, |
|
"grad_norm": 1.274533748626709, |
|
"learning_rate": 2.347436487983929e-05, |
|
"loss": 1.5288, |
|
"step": 16585 |
|
}, |
|
{ |
|
"epoch": 0.6015168244429562, |
|
"grad_norm": 1.3756400346755981, |
|
"learning_rate": 2.3390547752214888e-05, |
|
"loss": 1.5287, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 0.6026390573243796, |
|
"grad_norm": 1.391845941543579, |
|
"learning_rate": 2.330674878704035e-05, |
|
"loss": 1.5329, |
|
"step": 16647 |
|
}, |
|
{ |
|
"epoch": 0.603761290205803, |
|
"grad_norm": 1.414237380027771, |
|
"learning_rate": 2.322296892997561e-05, |
|
"loss": 1.5482, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 0.6048835230872265, |
|
"grad_norm": 1.3953816890716553, |
|
"learning_rate": 2.313920912646497e-05, |
|
"loss": 1.5372, |
|
"step": 16709 |
|
}, |
|
{ |
|
"epoch": 0.6060057559686499, |
|
"grad_norm": 1.3669557571411133, |
|
"learning_rate": 2.305547032172643e-05, |
|
"loss": 1.5522, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.6071279888500734, |
|
"grad_norm": 1.3847616910934448, |
|
"learning_rate": 2.2971753460741014e-05, |
|
"loss": 1.5314, |
|
"step": 16771 |
|
}, |
|
{ |
|
"epoch": 0.6082502217314968, |
|
"grad_norm": 1.2923661470413208, |
|
"learning_rate": 2.288805948824212e-05, |
|
"loss": 1.5434, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 0.6093724546129201, |
|
"grad_norm": 1.3146955966949463, |
|
"learning_rate": 2.2804389348704858e-05, |
|
"loss": 1.5442, |
|
"step": 16833 |
|
}, |
|
{ |
|
"epoch": 0.6104946874943435, |
|
"grad_norm": 1.362166166305542, |
|
"learning_rate": 2.2720743986335374e-05, |
|
"loss": 1.546, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 0.611616920375767, |
|
"grad_norm": 1.3853099346160889, |
|
"learning_rate": 2.2637124345060233e-05, |
|
"loss": 1.5385, |
|
"step": 16895 |
|
}, |
|
{ |
|
"epoch": 0.6127391532571904, |
|
"grad_norm": 1.3611940145492554, |
|
"learning_rate": 2.2553531368515695e-05, |
|
"loss": 1.5577, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 0.6138613861386139, |
|
"grad_norm": 1.3302477598190308, |
|
"learning_rate": 2.2469966000037144e-05, |
|
"loss": 1.5566, |
|
"step": 16957 |
|
}, |
|
{ |
|
"epoch": 0.6149836190200373, |
|
"grad_norm": 1.3969210386276245, |
|
"learning_rate": 2.2386429182648417e-05, |
|
"loss": 1.5459, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 0.6161058519014607, |
|
"grad_norm": 1.3878018856048584, |
|
"learning_rate": 2.230292185905114e-05, |
|
"loss": 1.5295, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 0.6172280847828842, |
|
"grad_norm": 1.3366162776947021, |
|
"learning_rate": 2.2219444971614116e-05, |
|
"loss": 1.5485, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.6183503176643076, |
|
"grad_norm": 1.3503491878509521, |
|
"learning_rate": 2.2135999462362655e-05, |
|
"loss": 1.5266, |
|
"step": 17081 |
|
}, |
|
{ |
|
"epoch": 0.619472550545731, |
|
"grad_norm": 1.3379223346710205, |
|
"learning_rate": 2.2052586272968003e-05, |
|
"loss": 1.5366, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 0.6205947834271545, |
|
"grad_norm": 1.299849033355713, |
|
"learning_rate": 2.196920634473666e-05, |
|
"loss": 1.5315, |
|
"step": 17143 |
|
}, |
|
{ |
|
"epoch": 0.6217170163085779, |
|
"grad_norm": 1.3590292930603027, |
|
"learning_rate": 2.1885860618599787e-05, |
|
"loss": 1.5332, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 0.6228392491900012, |
|
"grad_norm": 1.3150153160095215, |
|
"learning_rate": 2.1802550035102577e-05, |
|
"loss": 1.5197, |
|
"step": 17205 |
|
}, |
|
{ |
|
"epoch": 0.6239614820714247, |
|
"grad_norm": 1.3216016292572021, |
|
"learning_rate": 2.171927553439363e-05, |
|
"loss": 1.5344, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 0.6250837149528481, |
|
"grad_norm": 1.3521660566329956, |
|
"learning_rate": 2.1636038056214376e-05, |
|
"loss": 1.5236, |
|
"step": 17267 |
|
}, |
|
{ |
|
"epoch": 0.6262059478342715, |
|
"grad_norm": 1.4077104330062866, |
|
"learning_rate": 2.155283853988844e-05, |
|
"loss": 1.5318, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 0.627328180715695, |
|
"grad_norm": 1.4986066818237305, |
|
"learning_rate": 2.146967792431106e-05, |
|
"loss": 1.5466, |
|
"step": 17329 |
|
}, |
|
{ |
|
"epoch": 0.6284504135971184, |
|
"grad_norm": 1.2227765321731567, |
|
"learning_rate": 2.138655714793849e-05, |
|
"loss": 1.5345, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.6295726464785418, |
|
"grad_norm": 1.3314886093139648, |
|
"learning_rate": 2.1303477148777367e-05, |
|
"loss": 1.5376, |
|
"step": 17391 |
|
}, |
|
{ |
|
"epoch": 0.6306948793599653, |
|
"grad_norm": 1.3682267665863037, |
|
"learning_rate": 2.122043886437421e-05, |
|
"loss": 1.5313, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 0.6318171122413887, |
|
"grad_norm": 1.3226497173309326, |
|
"learning_rate": 2.1137443231804765e-05, |
|
"loss": 1.5361, |
|
"step": 17453 |
|
}, |
|
{ |
|
"epoch": 0.6329393451228121, |
|
"grad_norm": 1.3603419065475464, |
|
"learning_rate": 2.105449118766347e-05, |
|
"loss": 1.5353, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 0.6340615780042356, |
|
"grad_norm": 1.3611435890197754, |
|
"learning_rate": 2.097158366805287e-05, |
|
"loss": 1.5449, |
|
"step": 17515 |
|
}, |
|
{ |
|
"epoch": 0.6351838108856589, |
|
"grad_norm": 1.3318766355514526, |
|
"learning_rate": 2.0888721608573047e-05, |
|
"loss": 1.5194, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 0.6363060437670823, |
|
"grad_norm": 1.3144105672836304, |
|
"learning_rate": 2.0805905944311087e-05, |
|
"loss": 1.5288, |
|
"step": 17577 |
|
}, |
|
{ |
|
"epoch": 0.6374282766485058, |
|
"grad_norm": 1.3346774578094482, |
|
"learning_rate": 2.0723137609830497e-05, |
|
"loss": 1.5278, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 0.6385505095299292, |
|
"grad_norm": 1.4217780828475952, |
|
"learning_rate": 2.0640417539160686e-05, |
|
"loss": 1.5467, |
|
"step": 17639 |
|
}, |
|
{ |
|
"epoch": 0.6396727424113526, |
|
"grad_norm": 1.3335380554199219, |
|
"learning_rate": 2.0557746665786427e-05, |
|
"loss": 1.5506, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 0.6407949752927761, |
|
"grad_norm": 1.3793307542800903, |
|
"learning_rate": 2.0475125922637256e-05, |
|
"loss": 1.5172, |
|
"step": 17701 |
|
}, |
|
{ |
|
"epoch": 0.6419172081741995, |
|
"grad_norm": 1.3435157537460327, |
|
"learning_rate": 2.0392556242077047e-05, |
|
"loss": 1.5137, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 0.6430394410556229, |
|
"grad_norm": 1.3066918849945068, |
|
"learning_rate": 2.031003855589343e-05, |
|
"loss": 1.5184, |
|
"step": 17763 |
|
}, |
|
{ |
|
"epoch": 0.6441616739370464, |
|
"grad_norm": 1.4214332103729248, |
|
"learning_rate": 2.022757379528727e-05, |
|
"loss": 1.5239, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 0.6452839068184698, |
|
"grad_norm": 1.3571085929870605, |
|
"learning_rate": 2.0145162890862184e-05, |
|
"loss": 1.5234, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.6464061396998932, |
|
"grad_norm": 1.2680344581604004, |
|
"learning_rate": 2.0062806772614022e-05, |
|
"loss": 1.5207, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.6475283725813167, |
|
"grad_norm": 1.3365403413772583, |
|
"learning_rate": 1.9980506369920392e-05, |
|
"loss": 1.5457, |
|
"step": 17887 |
|
}, |
|
{ |
|
"epoch": 0.64865060546274, |
|
"grad_norm": 1.3576997518539429, |
|
"learning_rate": 1.989826261153015e-05, |
|
"loss": 1.516, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 0.6497728383441634, |
|
"grad_norm": 1.3189170360565186, |
|
"learning_rate": 1.9816076425552923e-05, |
|
"loss": 1.5204, |
|
"step": 17949 |
|
}, |
|
{ |
|
"epoch": 0.6508950712255869, |
|
"grad_norm": 1.2855075597763062, |
|
"learning_rate": 1.9733948739448676e-05, |
|
"loss": 1.5131, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.6520173041070103, |
|
"grad_norm": 1.3004227876663208, |
|
"learning_rate": 1.9651880480017155e-05, |
|
"loss": 1.5495, |
|
"step": 18011 |
|
}, |
|
{ |
|
"epoch": 0.6531395369884337, |
|
"grad_norm": 1.3858931064605713, |
|
"learning_rate": 1.9569872573387516e-05, |
|
"loss": 1.529, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 0.6542617698698572, |
|
"grad_norm": 1.378490686416626, |
|
"learning_rate": 1.9487925945007854e-05, |
|
"loss": 1.5281, |
|
"step": 18073 |
|
}, |
|
{ |
|
"epoch": 0.6553840027512806, |
|
"grad_norm": 1.317062258720398, |
|
"learning_rate": 1.9406041519634726e-05, |
|
"loss": 1.5294, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 0.656506235632704, |
|
"grad_norm": 1.313314437866211, |
|
"learning_rate": 1.932422022132275e-05, |
|
"loss": 1.5343, |
|
"step": 18135 |
|
}, |
|
{ |
|
"epoch": 0.6576284685141275, |
|
"grad_norm": 1.3339669704437256, |
|
"learning_rate": 1.924246297341414e-05, |
|
"loss": 1.5203, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 0.6587507013955509, |
|
"grad_norm": 1.298256516456604, |
|
"learning_rate": 1.9160770698528338e-05, |
|
"loss": 1.5297, |
|
"step": 18197 |
|
}, |
|
{ |
|
"epoch": 0.6598729342769744, |
|
"grad_norm": 1.322373628616333, |
|
"learning_rate": 1.907914431855156e-05, |
|
"loss": 1.5307, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 0.6609951671583978, |
|
"grad_norm": 1.403425931930542, |
|
"learning_rate": 1.8997584754626412e-05, |
|
"loss": 1.5279, |
|
"step": 18259 |
|
}, |
|
{ |
|
"epoch": 0.6621174000398211, |
|
"grad_norm": 1.3005762100219727, |
|
"learning_rate": 1.8916092927141486e-05, |
|
"loss": 1.5325, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 0.6632396329212445, |
|
"grad_norm": 1.3655368089675903, |
|
"learning_rate": 1.883466975572098e-05, |
|
"loss": 1.54, |
|
"step": 18321 |
|
}, |
|
{ |
|
"epoch": 0.664361865802668, |
|
"grad_norm": 1.376219391822815, |
|
"learning_rate": 1.8753316159214312e-05, |
|
"loss": 1.518, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 0.6654840986840914, |
|
"grad_norm": 1.3264917135238647, |
|
"learning_rate": 1.8672033055685766e-05, |
|
"loss": 1.5108, |
|
"step": 18383 |
|
}, |
|
{ |
|
"epoch": 0.6666063315655149, |
|
"grad_norm": 1.4083831310272217, |
|
"learning_rate": 1.8590821362404116e-05, |
|
"loss": 1.5252, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 0.6677285644469383, |
|
"grad_norm": 1.302178978919983, |
|
"learning_rate": 1.8509681995832294e-05, |
|
"loss": 1.4972, |
|
"step": 18445 |
|
}, |
|
{ |
|
"epoch": 0.6688507973283617, |
|
"grad_norm": 1.3290973901748657, |
|
"learning_rate": 1.8428615871617004e-05, |
|
"loss": 1.5343, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 0.6699730302097852, |
|
"grad_norm": 1.4198294878005981, |
|
"learning_rate": 1.8347623904578448e-05, |
|
"loss": 1.5272, |
|
"step": 18507 |
|
}, |
|
{ |
|
"epoch": 0.6710952630912086, |
|
"grad_norm": 1.2832363843917847, |
|
"learning_rate": 1.8266707008699975e-05, |
|
"loss": 1.5351, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 0.672217495972632, |
|
"grad_norm": 1.367154836654663, |
|
"learning_rate": 1.818586609711774e-05, |
|
"loss": 1.5236, |
|
"step": 18569 |
|
}, |
|
{ |
|
"epoch": 0.6733397288540555, |
|
"grad_norm": 1.3867367506027222, |
|
"learning_rate": 1.8105102082110462e-05, |
|
"loss": 1.5141, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.6744619617354789, |
|
"grad_norm": 1.3272528648376465, |
|
"learning_rate": 1.8024415875089058e-05, |
|
"loss": 1.5459, |
|
"step": 18631 |
|
}, |
|
{ |
|
"epoch": 0.6755841946169022, |
|
"grad_norm": 1.4012340307235718, |
|
"learning_rate": 1.7943808386586407e-05, |
|
"loss": 1.5082, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 0.6767064274983257, |
|
"grad_norm": 1.3309136629104614, |
|
"learning_rate": 1.7863280526247073e-05, |
|
"loss": 1.5207, |
|
"step": 18693 |
|
}, |
|
{ |
|
"epoch": 0.6778286603797491, |
|
"grad_norm": 1.3469054698944092, |
|
"learning_rate": 1.7782833202817003e-05, |
|
"loss": 1.5301, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 0.6789508932611725, |
|
"grad_norm": 1.3786745071411133, |
|
"learning_rate": 1.7702467324133327e-05, |
|
"loss": 1.5236, |
|
"step": 18755 |
|
}, |
|
{ |
|
"epoch": 0.680073126142596, |
|
"grad_norm": 1.3620835542678833, |
|
"learning_rate": 1.7622183797114042e-05, |
|
"loss": 1.5288, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 0.6811953590240194, |
|
"grad_norm": 1.3298254013061523, |
|
"learning_rate": 1.7541983527747838e-05, |
|
"loss": 1.5208, |
|
"step": 18817 |
|
}, |
|
{ |
|
"epoch": 0.6823175919054428, |
|
"grad_norm": 1.2911970615386963, |
|
"learning_rate": 1.746186742108387e-05, |
|
"loss": 1.5172, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 0.6834398247868663, |
|
"grad_norm": 1.30719792842865, |
|
"learning_rate": 1.73818363812215e-05, |
|
"loss": 1.5206, |
|
"step": 18879 |
|
}, |
|
{ |
|
"epoch": 0.6845620576682897, |
|
"grad_norm": 1.3682974576950073, |
|
"learning_rate": 1.7301891311300153e-05, |
|
"loss": 1.5126, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 0.6856842905497131, |
|
"grad_norm": 1.3172578811645508, |
|
"learning_rate": 1.7222033113489055e-05, |
|
"loss": 1.506, |
|
"step": 18941 |
|
}, |
|
{ |
|
"epoch": 0.6868065234311366, |
|
"grad_norm": 1.3976131677627563, |
|
"learning_rate": 1.7142262688977127e-05, |
|
"loss": 1.5161, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 0.68792875631256, |
|
"grad_norm": 1.3834096193313599, |
|
"learning_rate": 1.7062580937962764e-05, |
|
"loss": 1.5156, |
|
"step": 19003 |
|
}, |
|
{ |
|
"epoch": 0.6890509891939833, |
|
"grad_norm": 1.2939929962158203, |
|
"learning_rate": 1.698298875964369e-05, |
|
"loss": 1.5111, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 0.6901732220754068, |
|
"grad_norm": 1.416242241859436, |
|
"learning_rate": 1.690348705220684e-05, |
|
"loss": 1.5112, |
|
"step": 19065 |
|
}, |
|
{ |
|
"epoch": 0.6912954549568302, |
|
"grad_norm": 1.4598749876022339, |
|
"learning_rate": 1.6824076712818156e-05, |
|
"loss": 1.5074, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 0.6924176878382536, |
|
"grad_norm": 1.403602123260498, |
|
"learning_rate": 1.6744758637612533e-05, |
|
"loss": 1.5049, |
|
"step": 19127 |
|
}, |
|
{ |
|
"epoch": 0.6935399207196771, |
|
"grad_norm": 1.328615665435791, |
|
"learning_rate": 1.6665533721683664e-05, |
|
"loss": 1.5182, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 0.6946621536011005, |
|
"grad_norm": 1.3603520393371582, |
|
"learning_rate": 1.6586402859073974e-05, |
|
"loss": 1.5303, |
|
"step": 19189 |
|
}, |
|
{ |
|
"epoch": 0.6957843864825239, |
|
"grad_norm": 1.4252129793167114, |
|
"learning_rate": 1.6507366942764463e-05, |
|
"loss": 1.5364, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 0.6969066193639474, |
|
"grad_norm": 1.2863671779632568, |
|
"learning_rate": 1.6428426864664732e-05, |
|
"loss": 1.5243, |
|
"step": 19251 |
|
}, |
|
{ |
|
"epoch": 0.6980288522453708, |
|
"grad_norm": 1.298772931098938, |
|
"learning_rate": 1.6349583515602816e-05, |
|
"loss": 1.5254, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 0.6991510851267942, |
|
"grad_norm": 1.3208067417144775, |
|
"learning_rate": 1.6270837785315208e-05, |
|
"loss": 1.517, |
|
"step": 19313 |
|
}, |
|
{ |
|
"epoch": 0.7002733180082177, |
|
"grad_norm": 1.4582445621490479, |
|
"learning_rate": 1.619219056243676e-05, |
|
"loss": 1.5156, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 0.7013955508896411, |
|
"grad_norm": 1.3674423694610596, |
|
"learning_rate": 1.6113642734490698e-05, |
|
"loss": 1.5056, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 0.7025177837710644, |
|
"grad_norm": 1.289265513420105, |
|
"learning_rate": 1.6035195187878577e-05, |
|
"loss": 1.5151, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 0.7036400166524879, |
|
"grad_norm": 1.3161633014678955, |
|
"learning_rate": 1.5956848807870305e-05, |
|
"loss": 1.5206, |
|
"step": 19437 |
|
}, |
|
{ |
|
"epoch": 0.7047622495339113, |
|
"grad_norm": 1.3161797523498535, |
|
"learning_rate": 1.587860447859413e-05, |
|
"loss": 1.5132, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 0.7058844824153347, |
|
"grad_norm": 1.3772165775299072, |
|
"learning_rate": 1.5800463083026686e-05, |
|
"loss": 1.5273, |
|
"step": 19499 |
|
}, |
|
{ |
|
"epoch": 0.7070067152967582, |
|
"grad_norm": 1.3191962242126465, |
|
"learning_rate": 1.572242550298298e-05, |
|
"loss": 1.5238, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 0.7081289481781816, |
|
"grad_norm": 1.3758587837219238, |
|
"learning_rate": 1.56444926191065e-05, |
|
"loss": 1.5242, |
|
"step": 19561 |
|
}, |
|
{ |
|
"epoch": 0.709251181059605, |
|
"grad_norm": 1.3456153869628906, |
|
"learning_rate": 1.5566665310859257e-05, |
|
"loss": 1.5109, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 0.7103734139410285, |
|
"grad_norm": 1.3654590845108032, |
|
"learning_rate": 1.5488944456511846e-05, |
|
"loss": 1.5092, |
|
"step": 19623 |
|
}, |
|
{ |
|
"epoch": 0.7114956468224519, |
|
"grad_norm": 1.2868263721466064, |
|
"learning_rate": 1.5411330933133546e-05, |
|
"loss": 1.534, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 0.7126178797038754, |
|
"grad_norm": 1.3140943050384521, |
|
"learning_rate": 1.533382561658241e-05, |
|
"loss": 1.5381, |
|
"step": 19685 |
|
}, |
|
{ |
|
"epoch": 0.7137401125852988, |
|
"grad_norm": 1.353061556816101, |
|
"learning_rate": 1.525642938149541e-05, |
|
"loss": 1.5133, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 0.7148623454667222, |
|
"grad_norm": 1.378933072090149, |
|
"learning_rate": 1.5179143101278536e-05, |
|
"loss": 1.514, |
|
"step": 19747 |
|
}, |
|
{ |
|
"epoch": 0.7159845783481456, |
|
"grad_norm": 1.3969671726226807, |
|
"learning_rate": 1.5101967648096955e-05, |
|
"loss": 1.5255, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 0.717106811229569, |
|
"grad_norm": 1.3627468347549438, |
|
"learning_rate": 1.5024903892865172e-05, |
|
"loss": 1.5168, |
|
"step": 19809 |
|
}, |
|
{ |
|
"epoch": 0.7182290441109924, |
|
"grad_norm": 1.3613289594650269, |
|
"learning_rate": 1.4947952705237184e-05, |
|
"loss": 1.532, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.7193512769924159, |
|
"grad_norm": 1.3214402198791504, |
|
"learning_rate": 1.4871114953596682e-05, |
|
"loss": 1.5236, |
|
"step": 19871 |
|
}, |
|
{ |
|
"epoch": 0.7204735098738393, |
|
"grad_norm": 1.3939237594604492, |
|
"learning_rate": 1.4794391505047256e-05, |
|
"loss": 1.521, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 0.7215957427552627, |
|
"grad_norm": 1.384696364402771, |
|
"learning_rate": 1.4717783225402596e-05, |
|
"loss": 1.5118, |
|
"step": 19933 |
|
}, |
|
{ |
|
"epoch": 0.7227179756366862, |
|
"grad_norm": 1.286145806312561, |
|
"learning_rate": 1.4641290979176735e-05, |
|
"loss": 1.522, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 0.7238402085181096, |
|
"grad_norm": 1.380027413368225, |
|
"learning_rate": 1.4564915629574246e-05, |
|
"loss": 1.5147, |
|
"step": 19995 |
|
}, |
|
{ |
|
"epoch": 0.724962441399533, |
|
"grad_norm": 1.372430443763733, |
|
"learning_rate": 1.4488658038480601e-05, |
|
"loss": 1.5132, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 0.7260846742809565, |
|
"grad_norm": 1.3200669288635254, |
|
"learning_rate": 1.4412519066452323e-05, |
|
"loss": 1.4935, |
|
"step": 20057 |
|
}, |
|
{ |
|
"epoch": 0.7272069071623799, |
|
"grad_norm": 1.3791152238845825, |
|
"learning_rate": 1.4336499572707373e-05, |
|
"loss": 1.5242, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 0.7283291400438033, |
|
"grad_norm": 1.287310004234314, |
|
"learning_rate": 1.4260600415115433e-05, |
|
"loss": 1.5098, |
|
"step": 20119 |
|
}, |
|
{ |
|
"epoch": 0.7294513729252267, |
|
"grad_norm": 1.307353138923645, |
|
"learning_rate": 1.4184822450188137e-05, |
|
"loss": 1.5098, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.7305736058066501, |
|
"grad_norm": 1.3487526178359985, |
|
"learning_rate": 1.410916653306954e-05, |
|
"loss": 1.5167, |
|
"step": 20181 |
|
}, |
|
{ |
|
"epoch": 0.7316958386880735, |
|
"grad_norm": 1.3626441955566406, |
|
"learning_rate": 1.403363351752639e-05, |
|
"loss": 1.5005, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 0.732818071569497, |
|
"grad_norm": 1.3192275762557983, |
|
"learning_rate": 1.3958224255938485e-05, |
|
"loss": 1.5191, |
|
"step": 20243 |
|
}, |
|
{ |
|
"epoch": 0.7339403044509204, |
|
"grad_norm": 1.336755633354187, |
|
"learning_rate": 1.388293959928911e-05, |
|
"loss": 1.5223, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 0.7350625373323438, |
|
"grad_norm": 1.3645100593566895, |
|
"learning_rate": 1.3807780397155379e-05, |
|
"loss": 1.5156, |
|
"step": 20305 |
|
}, |
|
{ |
|
"epoch": 0.7361847702137673, |
|
"grad_norm": 1.3681402206420898, |
|
"learning_rate": 1.3732747497698655e-05, |
|
"loss": 1.5065, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 0.7373070030951907, |
|
"grad_norm": 1.3669005632400513, |
|
"learning_rate": 1.3657841747655038e-05, |
|
"loss": 1.5148, |
|
"step": 20367 |
|
}, |
|
{ |
|
"epoch": 0.7384292359766141, |
|
"grad_norm": 1.349400281906128, |
|
"learning_rate": 1.3583063992325706e-05, |
|
"loss": 1.5234, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 0.7395514688580376, |
|
"grad_norm": 1.3764326572418213, |
|
"learning_rate": 1.3508415075567496e-05, |
|
"loss": 1.5019, |
|
"step": 20429 |
|
}, |
|
{ |
|
"epoch": 0.740673701739461, |
|
"grad_norm": 1.5416663885116577, |
|
"learning_rate": 1.343389583978327e-05, |
|
"loss": 1.5188, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 0.7417959346208844, |
|
"grad_norm": 1.3264429569244385, |
|
"learning_rate": 1.3359507125912468e-05, |
|
"loss": 1.5041, |
|
"step": 20491 |
|
}, |
|
{ |
|
"epoch": 0.7429181675023078, |
|
"grad_norm": 1.3554550409317017, |
|
"learning_rate": 1.3285249773421627e-05, |
|
"loss": 1.5207, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 0.7440404003837312, |
|
"grad_norm": 1.31184983253479, |
|
"learning_rate": 1.3211124620294884e-05, |
|
"loss": 1.5257, |
|
"step": 20553 |
|
}, |
|
{ |
|
"epoch": 0.7451626332651546, |
|
"grad_norm": 1.3225113153457642, |
|
"learning_rate": 1.313713250302451e-05, |
|
"loss": 1.5196, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 0.7462848661465781, |
|
"grad_norm": 1.3386696577072144, |
|
"learning_rate": 1.3063274256601479e-05, |
|
"loss": 1.5174, |
|
"step": 20615 |
|
}, |
|
{ |
|
"epoch": 0.7474070990280015, |
|
"grad_norm": 1.423807978630066, |
|
"learning_rate": 1.2989550714506086e-05, |
|
"loss": 1.4968, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 0.7485293319094249, |
|
"grad_norm": 1.2833530902862549, |
|
"learning_rate": 1.291596270869846e-05, |
|
"loss": 1.491, |
|
"step": 20677 |
|
}, |
|
{ |
|
"epoch": 0.7496515647908484, |
|
"grad_norm": 1.2796401977539062, |
|
"learning_rate": 1.284251106960927e-05, |
|
"loss": 1.5062, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 0.7507737976722718, |
|
"grad_norm": 1.3797061443328857, |
|
"learning_rate": 1.2769196626130263e-05, |
|
"loss": 1.5152, |
|
"step": 20739 |
|
}, |
|
{ |
|
"epoch": 0.7518960305536952, |
|
"grad_norm": 1.4489312171936035, |
|
"learning_rate": 1.2696020205604969e-05, |
|
"loss": 1.5122, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 0.7530182634351187, |
|
"grad_norm": 1.3305705785751343, |
|
"learning_rate": 1.2622982633819359e-05, |
|
"loss": 1.5143, |
|
"step": 20801 |
|
}, |
|
{ |
|
"epoch": 0.7541404963165421, |
|
"grad_norm": 1.3734405040740967, |
|
"learning_rate": 1.2550084734992484e-05, |
|
"loss": 1.513, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 0.7552627291979656, |
|
"grad_norm": 1.2886455059051514, |
|
"learning_rate": 1.247732733176724e-05, |
|
"loss": 1.489, |
|
"step": 20863 |
|
}, |
|
{ |
|
"epoch": 0.7563849620793889, |
|
"grad_norm": 1.4357209205627441, |
|
"learning_rate": 1.2404711245201044e-05, |
|
"loss": 1.5179, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 0.7575071949608123, |
|
"grad_norm": 1.294068455696106, |
|
"learning_rate": 1.2332237294756535e-05, |
|
"loss": 1.5151, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 0.7586294278422357, |
|
"grad_norm": 1.3966395854949951, |
|
"learning_rate": 1.225990629829241e-05, |
|
"loss": 1.5127, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 0.7597516607236592, |
|
"grad_norm": 1.3190878629684448, |
|
"learning_rate": 1.2187719072054136e-05, |
|
"loss": 1.5063, |
|
"step": 20987 |
|
}, |
|
{ |
|
"epoch": 0.7608738936050826, |
|
"grad_norm": 1.2927324771881104, |
|
"learning_rate": 1.2115676430664735e-05, |
|
"loss": 1.4923, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 0.761996126486506, |
|
"grad_norm": 1.3363546133041382, |
|
"learning_rate": 1.2043779187115647e-05, |
|
"loss": 1.4928, |
|
"step": 21049 |
|
}, |
|
{ |
|
"epoch": 0.7631183593679295, |
|
"grad_norm": 1.322825312614441, |
|
"learning_rate": 1.1972028152757476e-05, |
|
"loss": 1.5116, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 0.7642405922493529, |
|
"grad_norm": 1.396026372909546, |
|
"learning_rate": 1.1900424137290889e-05, |
|
"loss": 1.5089, |
|
"step": 21111 |
|
}, |
|
{ |
|
"epoch": 0.7653628251307764, |
|
"grad_norm": 1.358963966369629, |
|
"learning_rate": 1.1828967948757482e-05, |
|
"loss": 1.505, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 0.7664850580121998, |
|
"grad_norm": 1.3169891834259033, |
|
"learning_rate": 1.175766039353062e-05, |
|
"loss": 1.5115, |
|
"step": 21173 |
|
}, |
|
{ |
|
"epoch": 0.7676072908936232, |
|
"grad_norm": 1.3406434059143066, |
|
"learning_rate": 1.1686502276306382e-05, |
|
"loss": 1.5093, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 0.7687295237750467, |
|
"grad_norm": 1.3709667921066284, |
|
"learning_rate": 1.1615494400094445e-05, |
|
"loss": 1.5017, |
|
"step": 21235 |
|
}, |
|
{ |
|
"epoch": 0.76985175665647, |
|
"grad_norm": 1.4957972764968872, |
|
"learning_rate": 1.1544637566209029e-05, |
|
"loss": 1.5121, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 0.7709739895378934, |
|
"grad_norm": 1.3525892496109009, |
|
"learning_rate": 1.1473932574259886e-05, |
|
"loss": 1.4934, |
|
"step": 21297 |
|
}, |
|
{ |
|
"epoch": 0.7720962224193169, |
|
"grad_norm": 1.3251068592071533, |
|
"learning_rate": 1.1403380222143247e-05, |
|
"loss": 1.4858, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 0.7732184553007403, |
|
"grad_norm": 1.3417954444885254, |
|
"learning_rate": 1.1332981306032808e-05, |
|
"loss": 1.5119, |
|
"step": 21359 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 30517, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3052, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5847615378155897e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|