|
{ |
|
"best_metric": 6.66681432723999, |
|
"best_model_checkpoint": "ModernBERT-base-dnb/checkpoint-27850", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 27850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004488330341113106, |
|
"grad_norm": 21.10218048095703, |
|
"learning_rate": 5.3859964093357274e-08, |
|
"loss": 20.235, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008976660682226212, |
|
"grad_norm": 21.044858932495117, |
|
"learning_rate": 1.0556552962298026e-07, |
|
"loss": 20.2822, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013464991023339317, |
|
"grad_norm": 15.749917984008789, |
|
"learning_rate": 1.5942549371633754e-07, |
|
"loss": 20.2111, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.017953321364452424, |
|
"grad_norm": 53.41726303100586, |
|
"learning_rate": 2.1113105924596052e-07, |
|
"loss": 20.219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02244165170556553, |
|
"grad_norm": 19.08318328857422, |
|
"learning_rate": 2.649910233393178e-07, |
|
"loss": 20.2098, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.026929982046678635, |
|
"grad_norm": 17.500648498535156, |
|
"learning_rate": 3.188509874326751e-07, |
|
"loss": 20.1833, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03141831238779174, |
|
"grad_norm": 15.890292167663574, |
|
"learning_rate": 3.7271095152603236e-07, |
|
"loss": 20.1506, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03590664272890485, |
|
"grad_norm": 15.574706077575684, |
|
"learning_rate": 4.265709156193896e-07, |
|
"loss": 20.2048, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04039497307001795, |
|
"grad_norm": 16.508468627929688, |
|
"learning_rate": 4.804308797127469e-07, |
|
"loss": 20.1701, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04488330341113106, |
|
"grad_norm": 14.636826515197754, |
|
"learning_rate": 5.342908438061041e-07, |
|
"loss": 20.0568, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04937163375224417, |
|
"grad_norm": 12.061074256896973, |
|
"learning_rate": 5.881508078994614e-07, |
|
"loss": 20.0871, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05385996409335727, |
|
"grad_norm": 13.407825469970703, |
|
"learning_rate": 6.420107719928187e-07, |
|
"loss": 20.116, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05834829443447038, |
|
"grad_norm": 14.116663932800293, |
|
"learning_rate": 6.95870736086176e-07, |
|
"loss": 20.1185, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06283662477558348, |
|
"grad_norm": 13.583087921142578, |
|
"learning_rate": 7.497307001795332e-07, |
|
"loss": 20.1201, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06732495511669659, |
|
"grad_norm": 16.15342903137207, |
|
"learning_rate": 8.035906642728905e-07, |
|
"loss": 20.0418, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0718132854578097, |
|
"grad_norm": 11.18100643157959, |
|
"learning_rate": 8.574506283662477e-07, |
|
"loss": 20.0282, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0763016157989228, |
|
"grad_norm": 11.952834129333496, |
|
"learning_rate": 9.113105924596051e-07, |
|
"loss": 20.032, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.0807899461400359, |
|
"grad_norm": 11.063940048217773, |
|
"learning_rate": 9.651705565529624e-07, |
|
"loss": 20.0012, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08527827648114901, |
|
"grad_norm": 11.023772239685059, |
|
"learning_rate": 1.0190305206463197e-06, |
|
"loss": 20.0264, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08976660682226212, |
|
"grad_norm": 11.103007316589355, |
|
"learning_rate": 1.072890484739677e-06, |
|
"loss": 19.964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09425493716337523, |
|
"grad_norm": 12.964080810546875, |
|
"learning_rate": 1.126750448833034e-06, |
|
"loss": 20.0389, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09874326750448834, |
|
"grad_norm": 11.313638687133789, |
|
"learning_rate": 1.1806104129263915e-06, |
|
"loss": 20.0112, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10323159784560143, |
|
"grad_norm": 9.363356590270996, |
|
"learning_rate": 1.2344703770197488e-06, |
|
"loss": 19.9996, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.10771992818671454, |
|
"grad_norm": 11.197516441345215, |
|
"learning_rate": 1.2883303411131059e-06, |
|
"loss": 19.9399, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11220825852782765, |
|
"grad_norm": 9.123501777648926, |
|
"learning_rate": 1.3421903052064631e-06, |
|
"loss": 20.0128, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.11669658886894076, |
|
"grad_norm": 14.137397766113281, |
|
"learning_rate": 1.3960502692998206e-06, |
|
"loss": 20.0196, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12118491921005387, |
|
"grad_norm": 9.076166152954102, |
|
"learning_rate": 1.449910233393178e-06, |
|
"loss": 19.9431, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.12567324955116696, |
|
"grad_norm": 11.227516174316406, |
|
"learning_rate": 1.503770197486535e-06, |
|
"loss": 19.892, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13016157989228008, |
|
"grad_norm": 9.613359451293945, |
|
"learning_rate": 1.5576301615798923e-06, |
|
"loss": 19.9289, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.13464991023339318, |
|
"grad_norm": 9.49264907836914, |
|
"learning_rate": 1.6114901256732495e-06, |
|
"loss": 19.886, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13913824057450627, |
|
"grad_norm": 11.677379608154297, |
|
"learning_rate": 1.6653500897666068e-06, |
|
"loss": 19.8904, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.1436265709156194, |
|
"grad_norm": 11.741113662719727, |
|
"learning_rate": 1.719210053859964e-06, |
|
"loss": 19.901, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1481149012567325, |
|
"grad_norm": 12.074057579040527, |
|
"learning_rate": 1.7730700179533214e-06, |
|
"loss": 19.903, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1526032315978456, |
|
"grad_norm": 17.792566299438477, |
|
"learning_rate": 1.8269299820466787e-06, |
|
"loss": 19.8621, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1570915619389587, |
|
"grad_norm": 10.892045021057129, |
|
"learning_rate": 1.880789946140036e-06, |
|
"loss": 19.8345, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.1615798922800718, |
|
"grad_norm": 11.912057876586914, |
|
"learning_rate": 1.9346499102333932e-06, |
|
"loss": 19.8252, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16606822262118492, |
|
"grad_norm": 11.374829292297363, |
|
"learning_rate": 1.9885098743267503e-06, |
|
"loss": 19.8219, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.17055655296229802, |
|
"grad_norm": 11.979461669921875, |
|
"learning_rate": 2.0423698384201078e-06, |
|
"loss": 19.7829, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.17504488330341114, |
|
"grad_norm": 12.24255657196045, |
|
"learning_rate": 2.096229802513465e-06, |
|
"loss": 19.7962, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.17953321364452424, |
|
"grad_norm": 11.375764846801758, |
|
"learning_rate": 2.150089766606822e-06, |
|
"loss": 19.7298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18402154398563733, |
|
"grad_norm": 12.321856498718262, |
|
"learning_rate": 2.20394973070018e-06, |
|
"loss": 19.712, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.18850987432675045, |
|
"grad_norm": 12.325765609741211, |
|
"learning_rate": 2.257809694793537e-06, |
|
"loss": 19.6841, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.19299820466786355, |
|
"grad_norm": 12.724334716796875, |
|
"learning_rate": 2.3116696588868944e-06, |
|
"loss": 19.6671, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.19748653500897667, |
|
"grad_norm": 12.078253746032715, |
|
"learning_rate": 2.3655296229802515e-06, |
|
"loss": 19.6296, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.20197486535008977, |
|
"grad_norm": 13.096288681030273, |
|
"learning_rate": 2.4193895870736085e-06, |
|
"loss": 19.5341, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.20646319569120286, |
|
"grad_norm": 13.682557106018066, |
|
"learning_rate": 2.473249551166966e-06, |
|
"loss": 19.6408, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.21095152603231598, |
|
"grad_norm": 13.981725692749023, |
|
"learning_rate": 2.527109515260323e-06, |
|
"loss": 19.6481, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.21543985637342908, |
|
"grad_norm": 13.742826461791992, |
|
"learning_rate": 2.58096947935368e-06, |
|
"loss": 19.5424, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2199281867145422, |
|
"grad_norm": 16.546175003051758, |
|
"learning_rate": 2.634829443447038e-06, |
|
"loss": 19.5218, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.2244165170556553, |
|
"grad_norm": 15.267694473266602, |
|
"learning_rate": 2.688689407540395e-06, |
|
"loss": 19.5041, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2289048473967684, |
|
"grad_norm": 15.6527681350708, |
|
"learning_rate": 2.7425493716337522e-06, |
|
"loss": 19.6472, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2333931777378815, |
|
"grad_norm": 17.371788024902344, |
|
"learning_rate": 2.7964093357271097e-06, |
|
"loss": 19.4608, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2378815080789946, |
|
"grad_norm": 15.061119079589844, |
|
"learning_rate": 2.8502692998204668e-06, |
|
"loss": 19.4584, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.24236983842010773, |
|
"grad_norm": 13.703615188598633, |
|
"learning_rate": 2.90197486535009e-06, |
|
"loss": 19.4636, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.24685816876122083, |
|
"grad_norm": 13.755026817321777, |
|
"learning_rate": 2.9558348294434473e-06, |
|
"loss": 19.391, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.2513464991023339, |
|
"grad_norm": 15.358574867248535, |
|
"learning_rate": 3.0096947935368044e-06, |
|
"loss": 19.3758, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.25583482944344704, |
|
"grad_norm": 14.703276634216309, |
|
"learning_rate": 3.063554757630162e-06, |
|
"loss": 19.357, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.26032315978456017, |
|
"grad_norm": 14.63382625579834, |
|
"learning_rate": 3.117414721723519e-06, |
|
"loss": 19.2794, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.26481149012567323, |
|
"grad_norm": 15.199682235717773, |
|
"learning_rate": 3.171274685816876e-06, |
|
"loss": 19.2949, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.26929982046678635, |
|
"grad_norm": 14.768528938293457, |
|
"learning_rate": 3.2251346499102335e-06, |
|
"loss": 19.3093, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2737881508078995, |
|
"grad_norm": 14.896871566772461, |
|
"learning_rate": 3.2789946140035906e-06, |
|
"loss": 19.2305, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.27827648114901254, |
|
"grad_norm": 15.565362930297852, |
|
"learning_rate": 3.3328545780969477e-06, |
|
"loss": 19.2403, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.28276481149012567, |
|
"grad_norm": 16.003311157226562, |
|
"learning_rate": 3.3867145421903056e-06, |
|
"loss": 19.324, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.2872531418312388, |
|
"grad_norm": 15.933990478515625, |
|
"learning_rate": 3.4405745062836626e-06, |
|
"loss": 19.194, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2917414721723519, |
|
"grad_norm": 16.488842010498047, |
|
"learning_rate": 3.49443447037702e-06, |
|
"loss": 19.3091, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.296229802513465, |
|
"grad_norm": 15.880293846130371, |
|
"learning_rate": 3.548294434470377e-06, |
|
"loss": 19.1561, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3007181328545781, |
|
"grad_norm": 15.783681869506836, |
|
"learning_rate": 3.6021543985637343e-06, |
|
"loss": 19.0354, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3052064631956912, |
|
"grad_norm": 17.726228713989258, |
|
"learning_rate": 3.6560143626570918e-06, |
|
"loss": 19.0885, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3096947935368043, |
|
"grad_norm": 15.17348575592041, |
|
"learning_rate": 3.709874326750449e-06, |
|
"loss": 19.2063, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3141831238779174, |
|
"grad_norm": 16.843894958496094, |
|
"learning_rate": 3.763734290843806e-06, |
|
"loss": 18.9388, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.31867145421903054, |
|
"grad_norm": 23.23088264465332, |
|
"learning_rate": 3.817594254937163e-06, |
|
"loss": 19.1281, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.3231597845601436, |
|
"grad_norm": 15.54453182220459, |
|
"learning_rate": 3.8714542190305205e-06, |
|
"loss": 19.1134, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3276481149012567, |
|
"grad_norm": 16.050777435302734, |
|
"learning_rate": 3.925314183123878e-06, |
|
"loss": 18.9336, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.33213644524236985, |
|
"grad_norm": 15.230576515197754, |
|
"learning_rate": 3.979174147217235e-06, |
|
"loss": 19.063, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.33662477558348297, |
|
"grad_norm": 16.179035186767578, |
|
"learning_rate": 4.0330341113105925e-06, |
|
"loss": 18.9994, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.34111310592459604, |
|
"grad_norm": 16.51089096069336, |
|
"learning_rate": 4.0868940754039504e-06, |
|
"loss": 18.9635, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.34560143626570916, |
|
"grad_norm": 15.68458080291748, |
|
"learning_rate": 4.140754039497307e-06, |
|
"loss": 18.9526, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.3500897666068223, |
|
"grad_norm": 16.986204147338867, |
|
"learning_rate": 4.1946140035906646e-06, |
|
"loss": 19.042, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.35457809694793535, |
|
"grad_norm": 15.371708869934082, |
|
"learning_rate": 4.248473967684022e-06, |
|
"loss": 18.8415, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3590664272890485, |
|
"grad_norm": 15.591832160949707, |
|
"learning_rate": 4.302333931777379e-06, |
|
"loss": 18.8101, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3635547576301616, |
|
"grad_norm": 15.236952781677246, |
|
"learning_rate": 4.356193895870736e-06, |
|
"loss": 18.8519, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.36804308797127466, |
|
"grad_norm": 16.580678939819336, |
|
"learning_rate": 4.410053859964094e-06, |
|
"loss": 18.8845, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.3725314183123878, |
|
"grad_norm": 16.18427085876465, |
|
"learning_rate": 4.463913824057451e-06, |
|
"loss": 18.7093, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.3770197486535009, |
|
"grad_norm": 18.2146053314209, |
|
"learning_rate": 4.517773788150808e-06, |
|
"loss": 18.777, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.38150807899461403, |
|
"grad_norm": 16.10597038269043, |
|
"learning_rate": 4.571633752244166e-06, |
|
"loss": 18.7469, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.3859964093357271, |
|
"grad_norm": 15.864044189453125, |
|
"learning_rate": 4.625493716337523e-06, |
|
"loss": 18.8742, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.3904847396768402, |
|
"grad_norm": 18.99787139892578, |
|
"learning_rate": 4.67935368043088e-06, |
|
"loss": 18.7297, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.39497307001795334, |
|
"grad_norm": 16.34602928161621, |
|
"learning_rate": 4.733213644524237e-06, |
|
"loss": 18.6701, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3994614003590664, |
|
"grad_norm": 16.323881149291992, |
|
"learning_rate": 4.787073608617595e-06, |
|
"loss": 18.7605, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.40394973070017953, |
|
"grad_norm": 16.251663208007812, |
|
"learning_rate": 4.840933572710951e-06, |
|
"loss": 18.736, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.40843806104129265, |
|
"grad_norm": 16.65889549255371, |
|
"learning_rate": 4.894793536804309e-06, |
|
"loss": 18.7178, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.4129263913824057, |
|
"grad_norm": 16.258752822875977, |
|
"learning_rate": 4.948653500897667e-06, |
|
"loss": 18.5482, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.41741472172351884, |
|
"grad_norm": 15.998190879821777, |
|
"learning_rate": 5.002513464991023e-06, |
|
"loss": 18.7417, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.42190305206463197, |
|
"grad_norm": 14.841017723083496, |
|
"learning_rate": 5.056373429084381e-06, |
|
"loss": 18.6716, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4263913824057451, |
|
"grad_norm": 15.91247272491455, |
|
"learning_rate": 5.110233393177738e-06, |
|
"loss": 18.688, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.43087971274685816, |
|
"grad_norm": 17.436525344848633, |
|
"learning_rate": 5.164093357271095e-06, |
|
"loss": 18.5867, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4353680430879713, |
|
"grad_norm": 16.675212860107422, |
|
"learning_rate": 5.217953321364452e-06, |
|
"loss": 18.7864, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.4398563734290844, |
|
"grad_norm": 16.56376075744629, |
|
"learning_rate": 5.27181328545781e-06, |
|
"loss": 18.469, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.44434470377019747, |
|
"grad_norm": 16.11998176574707, |
|
"learning_rate": 5.325673249551166e-06, |
|
"loss": 18.528, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.4488330341113106, |
|
"grad_norm": 16.21501922607422, |
|
"learning_rate": 5.379533213644524e-06, |
|
"loss": 18.5634, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4533213644524237, |
|
"grad_norm": 17.891937255859375, |
|
"learning_rate": 5.433393177737882e-06, |
|
"loss": 18.5037, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.4578096947935368, |
|
"grad_norm": 18.845378875732422, |
|
"learning_rate": 5.4872531418312385e-06, |
|
"loss": 18.4701, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4622980251346499, |
|
"grad_norm": 14.12865924835205, |
|
"learning_rate": 5.541113105924596e-06, |
|
"loss": 18.5924, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.466786355475763, |
|
"grad_norm": 19.014360427856445, |
|
"learning_rate": 5.5949730700179534e-06, |
|
"loss": 18.4597, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.47127468581687615, |
|
"grad_norm": 16.36025047302246, |
|
"learning_rate": 5.6488330341113105e-06, |
|
"loss": 18.6834, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.4757630161579892, |
|
"grad_norm": 16.406009674072266, |
|
"learning_rate": 5.7026929982046676e-06, |
|
"loss": 18.4796, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.48025134649910234, |
|
"grad_norm": 16.516311645507812, |
|
"learning_rate": 5.7565529622980255e-06, |
|
"loss": 18.408, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.48473967684021546, |
|
"grad_norm": 15.522378921508789, |
|
"learning_rate": 5.8104129263913826e-06, |
|
"loss": 18.664, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.48922800718132853, |
|
"grad_norm": 25.351648330688477, |
|
"learning_rate": 5.86427289048474e-06, |
|
"loss": 18.4765, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.49371633752244165, |
|
"grad_norm": 15.928828239440918, |
|
"learning_rate": 5.9181328545780975e-06, |
|
"loss": 18.5569, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.4982046678635548, |
|
"grad_norm": 14.958181381225586, |
|
"learning_rate": 5.971992818671455e-06, |
|
"loss": 18.4793, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.5026929982046678, |
|
"grad_norm": 16.40975570678711, |
|
"learning_rate": 6.025852782764812e-06, |
|
"loss": 18.3546, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.507181328545781, |
|
"grad_norm": 15.906976699829102, |
|
"learning_rate": 6.079712746858169e-06, |
|
"loss": 18.4363, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.5116696588868941, |
|
"grad_norm": 15.144857406616211, |
|
"learning_rate": 6.133572710951527e-06, |
|
"loss": 18.3984, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5161579892280072, |
|
"grad_norm": 16.463176727294922, |
|
"learning_rate": 6.187432675044883e-06, |
|
"loss": 18.2991, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5206463195691203, |
|
"grad_norm": 15.463627815246582, |
|
"learning_rate": 6.241292639138241e-06, |
|
"loss": 18.694, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5251346499102334, |
|
"grad_norm": 17.69695281982422, |
|
"learning_rate": 6.295152603231599e-06, |
|
"loss": 18.2619, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.5296229802513465, |
|
"grad_norm": 17.653112411499023, |
|
"learning_rate": 6.349012567324955e-06, |
|
"loss": 18.19, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5341113105924596, |
|
"grad_norm": 16.84458351135254, |
|
"learning_rate": 6.402872531418313e-06, |
|
"loss": 18.3045, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.5385996409335727, |
|
"grad_norm": 16.69748878479004, |
|
"learning_rate": 6.45673249551167e-06, |
|
"loss": 18.2229, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5430879712746858, |
|
"grad_norm": 20.153520584106445, |
|
"learning_rate": 6.510592459605027e-06, |
|
"loss": 18.3158, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.547576301615799, |
|
"grad_norm": 18.79121208190918, |
|
"learning_rate": 6.564452423698384e-06, |
|
"loss": 18.3017, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.552064631956912, |
|
"grad_norm": 16.678478240966797, |
|
"learning_rate": 6.618312387791742e-06, |
|
"loss": 18.39, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5565529622980251, |
|
"grad_norm": 22.23566246032715, |
|
"learning_rate": 6.672172351885098e-06, |
|
"loss": 18.3635, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5610412926391383, |
|
"grad_norm": 17.27984619140625, |
|
"learning_rate": 6.726032315978456e-06, |
|
"loss": 18.1439, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5655296229802513, |
|
"grad_norm": 16.337604522705078, |
|
"learning_rate": 6.779892280071814e-06, |
|
"loss": 18.378, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5700179533213644, |
|
"grad_norm": 16.56863784790039, |
|
"learning_rate": 6.83375224416517e-06, |
|
"loss": 18.192, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5745062836624776, |
|
"grad_norm": 17.132043838500977, |
|
"learning_rate": 6.887612208258528e-06, |
|
"loss": 18.3463, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5789946140035906, |
|
"grad_norm": 17.860429763793945, |
|
"learning_rate": 6.941472172351885e-06, |
|
"loss": 18.2454, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5834829443447038, |
|
"grad_norm": 14.994222640991211, |
|
"learning_rate": 6.995332136445242e-06, |
|
"loss": 18.3204, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5879712746858169, |
|
"grad_norm": 16.782873153686523, |
|
"learning_rate": 7.049192100538599e-06, |
|
"loss": 18.2429, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.59245960502693, |
|
"grad_norm": 15.746573448181152, |
|
"learning_rate": 7.103052064631957e-06, |
|
"loss": 18.0945, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5969479353680431, |
|
"grad_norm": 17.83595848083496, |
|
"learning_rate": 7.156912028725314e-06, |
|
"loss": 18.3612, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.6014362657091562, |
|
"grad_norm": 17.314441680908203, |
|
"learning_rate": 7.2107719928186714e-06, |
|
"loss": 18.1783, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6059245960502693, |
|
"grad_norm": 16.076663970947266, |
|
"learning_rate": 7.264631956912029e-06, |
|
"loss": 18.3625, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.6104129263913824, |
|
"grad_norm": 16.62413215637207, |
|
"learning_rate": 7.318491921005386e-06, |
|
"loss": 18.3717, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6149012567324955, |
|
"grad_norm": 19.139238357543945, |
|
"learning_rate": 7.3723518850987435e-06, |
|
"loss": 18.0023, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.6193895870736086, |
|
"grad_norm": 15.575067520141602, |
|
"learning_rate": 7.4262118491921005e-06, |
|
"loss": 18.3083, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6238779174147218, |
|
"grad_norm": 18.650287628173828, |
|
"learning_rate": 7.4800718132854585e-06, |
|
"loss": 18.2143, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.6283662477558348, |
|
"grad_norm": 18.52598762512207, |
|
"learning_rate": 7.5339317773788155e-06, |
|
"loss": 18.0261, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6328545780969479, |
|
"grad_norm": 17.653348922729492, |
|
"learning_rate": 7.587791741472173e-06, |
|
"loss": 17.919, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.6373429084380611, |
|
"grad_norm": 17.140901565551758, |
|
"learning_rate": 7.641651705565529e-06, |
|
"loss": 18.0068, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6418312387791741, |
|
"grad_norm": 16.913959503173828, |
|
"learning_rate": 7.695511669658888e-06, |
|
"loss": 18.1295, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6463195691202872, |
|
"grad_norm": 18.763505935668945, |
|
"learning_rate": 7.749371633752245e-06, |
|
"loss": 17.8775, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6508078994614004, |
|
"grad_norm": 16.92535400390625, |
|
"learning_rate": 7.803231597845602e-06, |
|
"loss": 18.1007, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.6552962298025135, |
|
"grad_norm": 19.66353988647461, |
|
"learning_rate": 7.857091561938959e-06, |
|
"loss": 17.9173, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6597845601436265, |
|
"grad_norm": 17.924484252929688, |
|
"learning_rate": 7.910951526032318e-06, |
|
"loss": 17.9126, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6642728904847397, |
|
"grad_norm": 18.674997329711914, |
|
"learning_rate": 7.964811490125673e-06, |
|
"loss": 18.0945, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6687612208258528, |
|
"grad_norm": 17.291763305664062, |
|
"learning_rate": 8.01867145421903e-06, |
|
"loss": 18.1315, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.6732495511669659, |
|
"grad_norm": 18.881961822509766, |
|
"learning_rate": 8.072531418312387e-06, |
|
"loss": 17.8323, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.677737881508079, |
|
"grad_norm": 18.964895248413086, |
|
"learning_rate": 8.126391382405746e-06, |
|
"loss": 17.8395, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6822262118491921, |
|
"grad_norm": 16.793581008911133, |
|
"learning_rate": 8.180251346499103e-06, |
|
"loss": 18.0268, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6867145421903053, |
|
"grad_norm": 16.940654754638672, |
|
"learning_rate": 8.23411131059246e-06, |
|
"loss": 17.9372, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6912028725314183, |
|
"grad_norm": 16.80086898803711, |
|
"learning_rate": 8.287971274685817e-06, |
|
"loss": 17.7917, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6956912028725314, |
|
"grad_norm": 21.22157859802246, |
|
"learning_rate": 8.341831238779174e-06, |
|
"loss": 18.1762, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.7001795332136446, |
|
"grad_norm": 18.476032257080078, |
|
"learning_rate": 8.395691202872531e-06, |
|
"loss": 17.7716, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7046678635547576, |
|
"grad_norm": 15.498908996582031, |
|
"learning_rate": 8.447396768402154e-06, |
|
"loss": 18.0372, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.7091561938958707, |
|
"grad_norm": 18.895370483398438, |
|
"learning_rate": 8.501256732495513e-06, |
|
"loss": 18.0482, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7136445242369839, |
|
"grad_norm": 21.163408279418945, |
|
"learning_rate": 8.55511669658887e-06, |
|
"loss": 17.745, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.718132854578097, |
|
"grad_norm": 15.291622161865234, |
|
"learning_rate": 8.608976660682225e-06, |
|
"loss": 17.9657, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.72262118491921, |
|
"grad_norm": 19.777557373046875, |
|
"learning_rate": 8.662836624775583e-06, |
|
"loss": 17.7042, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.7271095152603232, |
|
"grad_norm": 18.978593826293945, |
|
"learning_rate": 8.716696588868941e-06, |
|
"loss": 17.7297, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7315978456014363, |
|
"grad_norm": 21.442642211914062, |
|
"learning_rate": 8.770556552962298e-06, |
|
"loss": 17.8813, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.7360861759425493, |
|
"grad_norm": 17.907501220703125, |
|
"learning_rate": 8.824416517055655e-06, |
|
"loss": 17.7841, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7405745062836625, |
|
"grad_norm": 18.066967010498047, |
|
"learning_rate": 8.878276481149014e-06, |
|
"loss": 17.8453, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.7450628366247756, |
|
"grad_norm": 20.10198974609375, |
|
"learning_rate": 8.93213644524237e-06, |
|
"loss": 17.8353, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7495511669658886, |
|
"grad_norm": 16.845745086669922, |
|
"learning_rate": 8.985996409335727e-06, |
|
"loss": 17.8432, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7540394973070018, |
|
"grad_norm": 18.617088317871094, |
|
"learning_rate": 9.039856373429084e-06, |
|
"loss": 17.8202, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7585278276481149, |
|
"grad_norm": 19.578916549682617, |
|
"learning_rate": 9.093716337522442e-06, |
|
"loss": 17.7985, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.7630161579892281, |
|
"grad_norm": 16.22163200378418, |
|
"learning_rate": 9.1475763016158e-06, |
|
"loss": 17.9493, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7675044883303411, |
|
"grad_norm": 18.09850311279297, |
|
"learning_rate": 9.201436265709157e-06, |
|
"loss": 17.7751, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.7719928186714542, |
|
"grad_norm": 18.09538459777832, |
|
"learning_rate": 9.255296229802514e-06, |
|
"loss": 17.7249, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7764811490125674, |
|
"grad_norm": 15.956615447998047, |
|
"learning_rate": 9.30915619389587e-06, |
|
"loss": 17.9555, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.7809694793536804, |
|
"grad_norm": 17.795026779174805, |
|
"learning_rate": 9.363016157989228e-06, |
|
"loss": 17.7529, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7854578096947935, |
|
"grad_norm": 18.07413101196289, |
|
"learning_rate": 9.416876122082585e-06, |
|
"loss": 17.5449, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.7899461400359067, |
|
"grad_norm": 19.664108276367188, |
|
"learning_rate": 9.470736086175944e-06, |
|
"loss": 17.6976, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7944344703770198, |
|
"grad_norm": 21.336183547973633, |
|
"learning_rate": 9.5245960502693e-06, |
|
"loss": 17.7112, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.7989228007181328, |
|
"grad_norm": 16.216899871826172, |
|
"learning_rate": 9.578456014362658e-06, |
|
"loss": 17.6466, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.803411131059246, |
|
"grad_norm": 17.492589950561523, |
|
"learning_rate": 9.632315978456013e-06, |
|
"loss": 17.823, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.8078994614003591, |
|
"grad_norm": 19.598114013671875, |
|
"learning_rate": 9.686175942549372e-06, |
|
"loss": 17.6314, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8123877917414721, |
|
"grad_norm": 18.45696258544922, |
|
"learning_rate": 9.740035906642729e-06, |
|
"loss": 17.6494, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.8168761220825853, |
|
"grad_norm": 17.067623138427734, |
|
"learning_rate": 9.793895870736086e-06, |
|
"loss": 17.5437, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8213644524236984, |
|
"grad_norm": 22.662578582763672, |
|
"learning_rate": 9.847755834829445e-06, |
|
"loss": 17.6709, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.8258527827648114, |
|
"grad_norm": 17.072893142700195, |
|
"learning_rate": 9.901615798922802e-06, |
|
"loss": 17.7612, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8303411131059246, |
|
"grad_norm": 18.79728889465332, |
|
"learning_rate": 9.955475763016157e-06, |
|
"loss": 17.7251, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.8348294434470377, |
|
"grad_norm": 18.312700271606445, |
|
"learning_rate": 1.0009335727109514e-05, |
|
"loss": 17.5846, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8393177737881508, |
|
"grad_norm": 18.2193660736084, |
|
"learning_rate": 1.0063195691202873e-05, |
|
"loss": 17.463, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.8438061041292639, |
|
"grad_norm": 20.095277786254883, |
|
"learning_rate": 1.011705565529623e-05, |
|
"loss": 17.1827, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.848294434470377, |
|
"grad_norm": 20.12647247314453, |
|
"learning_rate": 1.0170915619389587e-05, |
|
"loss": 17.4883, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.8527827648114902, |
|
"grad_norm": 18.37622833251953, |
|
"learning_rate": 1.0224775583482946e-05, |
|
"loss": 17.5654, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8572710951526032, |
|
"grad_norm": 18.199060440063477, |
|
"learning_rate": 1.0278635547576303e-05, |
|
"loss": 17.526, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.8617594254937163, |
|
"grad_norm": 20.442859649658203, |
|
"learning_rate": 1.0332495511669658e-05, |
|
"loss": 17.6952, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8662477558348295, |
|
"grad_norm": 18.540332794189453, |
|
"learning_rate": 1.0386355475763015e-05, |
|
"loss": 17.5526, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.8707360861759426, |
|
"grad_norm": 19.37755012512207, |
|
"learning_rate": 1.0440215439856374e-05, |
|
"loss": 17.463, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8752244165170556, |
|
"grad_norm": 19.95784568786621, |
|
"learning_rate": 1.0494075403949731e-05, |
|
"loss": 17.671, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.8797127468581688, |
|
"grad_norm": 17.712932586669922, |
|
"learning_rate": 1.0547935368043088e-05, |
|
"loss": 17.4896, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8842010771992819, |
|
"grad_norm": 19.43058967590332, |
|
"learning_rate": 1.0601795332136445e-05, |
|
"loss": 17.5935, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.8886894075403949, |
|
"grad_norm": 20.811893463134766, |
|
"learning_rate": 1.0655655296229803e-05, |
|
"loss": 17.5003, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8931777378815081, |
|
"grad_norm": 21.070432662963867, |
|
"learning_rate": 1.070951526032316e-05, |
|
"loss": 17.479, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.8976660682226212, |
|
"grad_norm": 17.394657135009766, |
|
"learning_rate": 1.0763375224416517e-05, |
|
"loss": 17.5536, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9021543985637342, |
|
"grad_norm": 19.602218627929688, |
|
"learning_rate": 1.0817235188509875e-05, |
|
"loss": 17.4437, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.9066427289048474, |
|
"grad_norm": 21.4727783203125, |
|
"learning_rate": 1.0871095152603232e-05, |
|
"loss": 17.3718, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9111310592459605, |
|
"grad_norm": 18.7939510345459, |
|
"learning_rate": 1.092495511669659e-05, |
|
"loss": 17.1782, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.9156193895870736, |
|
"grad_norm": 21.373146057128906, |
|
"learning_rate": 1.0978815080789945e-05, |
|
"loss": 17.0867, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9201077199281867, |
|
"grad_norm": 22.122276306152344, |
|
"learning_rate": 1.1032675044883304e-05, |
|
"loss": 17.5058, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.9245960502692998, |
|
"grad_norm": 20.753555297851562, |
|
"learning_rate": 1.108653500897666e-05, |
|
"loss": 16.9196, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.9290843806104129, |
|
"grad_norm": 20.26563262939453, |
|
"learning_rate": 1.1140394973070018e-05, |
|
"loss": 17.3307, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.933572710951526, |
|
"grad_norm": 19.536109924316406, |
|
"learning_rate": 1.1194254937163377e-05, |
|
"loss": 17.2833, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9380610412926391, |
|
"grad_norm": 21.98984146118164, |
|
"learning_rate": 1.1248114901256734e-05, |
|
"loss": 17.3079, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.9425493716337523, |
|
"grad_norm": 20.069507598876953, |
|
"learning_rate": 1.1301974865350089e-05, |
|
"loss": 17.3164, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9470377019748654, |
|
"grad_norm": 19.031282424926758, |
|
"learning_rate": 1.1355834829443446e-05, |
|
"loss": 17.4307, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.9515260323159784, |
|
"grad_norm": 21.127609252929688, |
|
"learning_rate": 1.1409694793536805e-05, |
|
"loss": 17.0626, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9560143626570916, |
|
"grad_norm": 22.808317184448242, |
|
"learning_rate": 1.1463554757630162e-05, |
|
"loss": 17.2784, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.9605026929982047, |
|
"grad_norm": 20.546794891357422, |
|
"learning_rate": 1.1517414721723519e-05, |
|
"loss": 17.1925, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9649910233393177, |
|
"grad_norm": 18.644824981689453, |
|
"learning_rate": 1.1571274685816878e-05, |
|
"loss": 17.1864, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.9694793536804309, |
|
"grad_norm": 19.968189239501953, |
|
"learning_rate": 1.1625134649910235e-05, |
|
"loss": 17.1675, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.973967684021544, |
|
"grad_norm": 19.67746925354004, |
|
"learning_rate": 1.167899461400359e-05, |
|
"loss": 17.1256, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.9784560143626571, |
|
"grad_norm": 21.387107849121094, |
|
"learning_rate": 1.1732854578096947e-05, |
|
"loss": 17.411, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.9829443447037702, |
|
"grad_norm": 17.536405563354492, |
|
"learning_rate": 1.1786714542190306e-05, |
|
"loss": 17.1895, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.9874326750448833, |
|
"grad_norm": 21.705129623413086, |
|
"learning_rate": 1.1840574506283663e-05, |
|
"loss": 17.3247, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9919210053859964, |
|
"grad_norm": 20.925796508789062, |
|
"learning_rate": 1.189443447037702e-05, |
|
"loss": 17.2336, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.9964093357271095, |
|
"grad_norm": 19.501977920532227, |
|
"learning_rate": 1.1948294434470377e-05, |
|
"loss": 16.939, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.024535543123365092, |
|
"eval_f1_macro": 0.00018617887832660077, |
|
"eval_f1_micro": 0.024535543123365092, |
|
"eval_f1_weighted": 0.005391520089441958, |
|
"eval_loss": 8.812005996704102, |
|
"eval_precision_macro": 0.0002191599995913754, |
|
"eval_precision_micro": 0.024535543123365092, |
|
"eval_precision_weighted": 0.004721591271214293, |
|
"eval_recall_macro": 0.000766160477322559, |
|
"eval_recall_micro": 0.024535543123365092, |
|
"eval_recall_weighted": 0.024535543123365092, |
|
"eval_runtime": 128.9376, |
|
"eval_samples_per_second": 406.189, |
|
"eval_steps_per_second": 12.696, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.0008976660682227, |
|
"grad_norm": 19.746585845947266, |
|
"learning_rate": 1.2002154398563734e-05, |
|
"loss": 16.8127, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.0053859964093357, |
|
"grad_norm": 20.92815589904785, |
|
"learning_rate": 1.2056014362657091e-05, |
|
"loss": 16.6227, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.0098743267504489, |
|
"grad_norm": 23.12251091003418, |
|
"learning_rate": 1.2109874326750448e-05, |
|
"loss": 16.8028, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.014362657091562, |
|
"grad_norm": 21.773548126220703, |
|
"learning_rate": 1.2163734290843807e-05, |
|
"loss": 16.9285, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.018850987432675, |
|
"grad_norm": 18.216033935546875, |
|
"learning_rate": 1.2217594254937164e-05, |
|
"loss": 16.8515, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.0233393177737882, |
|
"grad_norm": 20.353927612304688, |
|
"learning_rate": 1.2271454219030521e-05, |
|
"loss": 16.9787, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.0278276481149013, |
|
"grad_norm": 22.886110305786133, |
|
"learning_rate": 1.2325314183123877e-05, |
|
"loss": 16.7614, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.0323159784560143, |
|
"grad_norm": 21.366548538208008, |
|
"learning_rate": 1.2379174147217235e-05, |
|
"loss": 16.3866, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.0368043087971275, |
|
"grad_norm": 23.675683975219727, |
|
"learning_rate": 1.2433034111310593e-05, |
|
"loss": 16.3334, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.0412926391382407, |
|
"grad_norm": 22.998641967773438, |
|
"learning_rate": 1.248689407540395e-05, |
|
"loss": 16.7228, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.0457809694793536, |
|
"grad_norm": 20.504121780395508, |
|
"learning_rate": 1.2540754039497308e-05, |
|
"loss": 16.5914, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.0502692998204668, |
|
"grad_norm": 22.66668128967285, |
|
"learning_rate": 1.2594614003590665e-05, |
|
"loss": 16.5995, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.05475763016158, |
|
"grad_norm": 20.194726943969727, |
|
"learning_rate": 1.264847396768402e-05, |
|
"loss": 16.7589, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.059245960502693, |
|
"grad_norm": 21.407981872558594, |
|
"learning_rate": 1.2702333931777378e-05, |
|
"loss": 16.6117, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.063734290843806, |
|
"grad_norm": 20.662927627563477, |
|
"learning_rate": 1.2756193895870737e-05, |
|
"loss": 16.5679, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.0682226211849193, |
|
"grad_norm": 24.050336837768555, |
|
"learning_rate": 1.2810053859964094e-05, |
|
"loss": 16.8247, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.0727109515260322, |
|
"grad_norm": 20.72054100036621, |
|
"learning_rate": 1.286391382405745e-05, |
|
"loss": 16.623, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.0771992818671454, |
|
"grad_norm": 23.1834659576416, |
|
"learning_rate": 1.291777378815081e-05, |
|
"loss": 16.4884, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0816876122082586, |
|
"grad_norm": 21.00957679748535, |
|
"learning_rate": 1.2971633752244167e-05, |
|
"loss": 16.3869, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.0861759425493716, |
|
"grad_norm": 22.43168067932129, |
|
"learning_rate": 1.3025493716337522e-05, |
|
"loss": 16.3575, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.0906642728904847, |
|
"grad_norm": 21.6562557220459, |
|
"learning_rate": 1.3079353680430879e-05, |
|
"loss": 16.4085, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.095152603231598, |
|
"grad_norm": 23.325424194335938, |
|
"learning_rate": 1.3133213644524238e-05, |
|
"loss": 16.5846, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.0996409335727109, |
|
"grad_norm": 24.215314865112305, |
|
"learning_rate": 1.3187073608617595e-05, |
|
"loss": 16.4518, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.104129263913824, |
|
"grad_norm": 24.384559631347656, |
|
"learning_rate": 1.3240933572710952e-05, |
|
"loss": 16.5581, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.1086175942549372, |
|
"grad_norm": 24.343595504760742, |
|
"learning_rate": 1.3294793536804309e-05, |
|
"loss": 16.2876, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.1131059245960502, |
|
"grad_norm": 21.597131729125977, |
|
"learning_rate": 1.3348653500897666e-05, |
|
"loss": 16.4586, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.1175942549371634, |
|
"grad_norm": 22.356834411621094, |
|
"learning_rate": 1.3402513464991023e-05, |
|
"loss": 16.2934, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.1220825852782765, |
|
"grad_norm": 22.678932189941406, |
|
"learning_rate": 1.345637342908438e-05, |
|
"loss": 16.5327, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.1265709156193895, |
|
"grad_norm": 19.975004196166992, |
|
"learning_rate": 1.3510233393177739e-05, |
|
"loss": 16.4558, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.1310592459605027, |
|
"grad_norm": 23.107633590698242, |
|
"learning_rate": 1.3564093357271096e-05, |
|
"loss": 16.4774, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.1355475763016158, |
|
"grad_norm": 23.048038482666016, |
|
"learning_rate": 1.3617953321364453e-05, |
|
"loss": 16.8229, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.140035906642729, |
|
"grad_norm": 22.7868595123291, |
|
"learning_rate": 1.3671813285457809e-05, |
|
"loss": 16.5661, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.144524236983842, |
|
"grad_norm": 24.512683868408203, |
|
"learning_rate": 1.3725673249551167e-05, |
|
"loss": 16.4614, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.1490125673249552, |
|
"grad_norm": 22.137468338012695, |
|
"learning_rate": 1.3779533213644524e-05, |
|
"loss": 16.6419, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.1535008976660683, |
|
"grad_norm": 25.348499298095703, |
|
"learning_rate": 1.3833393177737881e-05, |
|
"loss": 16.2941, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.1579892280071813, |
|
"grad_norm": 20.657936096191406, |
|
"learning_rate": 1.388725314183124e-05, |
|
"loss": 16.4759, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.1624775583482945, |
|
"grad_norm": 21.39447021484375, |
|
"learning_rate": 1.3941113105924597e-05, |
|
"loss": 16.6561, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.1669658886894076, |
|
"grad_norm": 23.087963104248047, |
|
"learning_rate": 1.3994973070017953e-05, |
|
"loss": 15.8167, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.1714542190305206, |
|
"grad_norm": 23.6542911529541, |
|
"learning_rate": 1.404883303411131e-05, |
|
"loss": 16.531, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.1759425493716338, |
|
"grad_norm": 23.05323028564453, |
|
"learning_rate": 1.4102692998204668e-05, |
|
"loss": 16.3313, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.180430879712747, |
|
"grad_norm": 22.49639320373535, |
|
"learning_rate": 1.4156552962298026e-05, |
|
"loss": 16.2067, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.18491921005386, |
|
"grad_norm": 27.224279403686523, |
|
"learning_rate": 1.4210412926391383e-05, |
|
"loss": 16.502, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.189407540394973, |
|
"grad_norm": 21.412261962890625, |
|
"learning_rate": 1.4264272890484741e-05, |
|
"loss": 16.6798, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.1938958707360863, |
|
"grad_norm": 23.425609588623047, |
|
"learning_rate": 1.4318132854578098e-05, |
|
"loss": 16.2533, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.1983842010771992, |
|
"grad_norm": 23.98543357849121, |
|
"learning_rate": 1.4371992818671454e-05, |
|
"loss": 16.2683, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.2028725314183124, |
|
"grad_norm": 24.748369216918945, |
|
"learning_rate": 1.4425852782764811e-05, |
|
"loss": 16.4797, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.2073608617594256, |
|
"grad_norm": 20.175334930419922, |
|
"learning_rate": 1.447971274685817e-05, |
|
"loss": 16.493, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.2118491921005385, |
|
"grad_norm": 23.000167846679688, |
|
"learning_rate": 1.4533572710951527e-05, |
|
"loss": 16.1375, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.2163375224416517, |
|
"grad_norm": 21.749601364135742, |
|
"learning_rate": 1.4587432675044884e-05, |
|
"loss": 16.4148, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.220825852782765, |
|
"grad_norm": 23.57693099975586, |
|
"learning_rate": 1.464129263913824e-05, |
|
"loss": 15.9781, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.2253141831238779, |
|
"grad_norm": 22.823196411132812, |
|
"learning_rate": 1.4695152603231598e-05, |
|
"loss": 16.6314, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.229802513464991, |
|
"grad_norm": 22.367694854736328, |
|
"learning_rate": 1.4749012567324955e-05, |
|
"loss": 16.5215, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.2342908438061042, |
|
"grad_norm": 33.2826042175293, |
|
"learning_rate": 1.4802872531418312e-05, |
|
"loss": 16.4813, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.2387791741472172, |
|
"grad_norm": 23.414485931396484, |
|
"learning_rate": 1.485673249551167e-05, |
|
"loss": 15.8688, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.2432675044883303, |
|
"grad_norm": 24.831056594848633, |
|
"learning_rate": 1.4910592459605028e-05, |
|
"loss": 16.1143, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.2477558348294435, |
|
"grad_norm": 23.415950775146484, |
|
"learning_rate": 1.4964452423698385e-05, |
|
"loss": 15.7436, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.2522441651705565, |
|
"grad_norm": 22.253082275390625, |
|
"learning_rate": 1.5018312387791742e-05, |
|
"loss": 16.0852, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.2567324955116697, |
|
"grad_norm": 22.159162521362305, |
|
"learning_rate": 1.5072172351885099e-05, |
|
"loss": 16.3732, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2612208258527828, |
|
"grad_norm": 22.717971801757812, |
|
"learning_rate": 1.5126032315978456e-05, |
|
"loss": 16.1097, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.2657091561938958, |
|
"grad_norm": 22.539794921875, |
|
"learning_rate": 1.5179892280071813e-05, |
|
"loss": 16.131, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.270197486535009, |
|
"grad_norm": 25.072383880615234, |
|
"learning_rate": 1.523375224416517e-05, |
|
"loss": 15.9651, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.2746858168761221, |
|
"grad_norm": 22.601781845092773, |
|
"learning_rate": 1.5287612208258526e-05, |
|
"loss": 16.0353, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.279174147217235, |
|
"grad_norm": 21.910064697265625, |
|
"learning_rate": 1.5341472172351888e-05, |
|
"loss": 16.1811, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.2836624775583483, |
|
"grad_norm": 23.791175842285156, |
|
"learning_rate": 1.5395332136445243e-05, |
|
"loss": 16.0497, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.2881508078994615, |
|
"grad_norm": 24.051387786865234, |
|
"learning_rate": 1.5449192100538602e-05, |
|
"loss": 16.0254, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.2926391382405744, |
|
"grad_norm": 20.40333366394043, |
|
"learning_rate": 1.5503052064631957e-05, |
|
"loss": 16.1743, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.2971274685816876, |
|
"grad_norm": 21.65686798095703, |
|
"learning_rate": 1.5556912028725313e-05, |
|
"loss": 16.0881, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.3016157989228008, |
|
"grad_norm": 23.0731201171875, |
|
"learning_rate": 1.561077199281867e-05, |
|
"loss": 15.7084, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.3061041292639137, |
|
"grad_norm": 23.546977996826172, |
|
"learning_rate": 1.5664631956912027e-05, |
|
"loss": 15.8303, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.310592459605027, |
|
"grad_norm": 24.602670669555664, |
|
"learning_rate": 1.571849192100539e-05, |
|
"loss": 16.0991, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.31508078994614, |
|
"grad_norm": 23.653459548950195, |
|
"learning_rate": 1.5772351885098744e-05, |
|
"loss": 16.011, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.319569120287253, |
|
"grad_norm": 23.47325325012207, |
|
"learning_rate": 1.5826211849192103e-05, |
|
"loss": 16.1551, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.3240574506283662, |
|
"grad_norm": 26.003053665161133, |
|
"learning_rate": 1.588007181328546e-05, |
|
"loss": 15.7585, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.3285457809694794, |
|
"grad_norm": 24.2227840423584, |
|
"learning_rate": 1.5933931777378814e-05, |
|
"loss": 16.1166, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.3330341113105924, |
|
"grad_norm": 23.852928161621094, |
|
"learning_rate": 1.5987791741472173e-05, |
|
"loss": 16.1642, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.3375224416517055, |
|
"grad_norm": 20.22197914123535, |
|
"learning_rate": 1.6041651705565528e-05, |
|
"loss": 16.2129, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.3420107719928187, |
|
"grad_norm": 23.04417610168457, |
|
"learning_rate": 1.609551166965889e-05, |
|
"loss": 15.9037, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.3464991023339317, |
|
"grad_norm": 22.43314552307129, |
|
"learning_rate": 1.6149371633752246e-05, |
|
"loss": 15.9117, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3509874326750448, |
|
"grad_norm": 23.691787719726562, |
|
"learning_rate": 1.62032315978456e-05, |
|
"loss": 16.1126, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.355475763016158, |
|
"grad_norm": 22.891239166259766, |
|
"learning_rate": 1.625709156193896e-05, |
|
"loss": 15.8261, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.359964093357271, |
|
"grad_norm": 22.000856399536133, |
|
"learning_rate": 1.6310951526032315e-05, |
|
"loss": 16.0308, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.3644524236983842, |
|
"grad_norm": 23.784557342529297, |
|
"learning_rate": 1.6364811490125674e-05, |
|
"loss": 15.7405, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.3689407540394973, |
|
"grad_norm": 23.533695220947266, |
|
"learning_rate": 1.641867145421903e-05, |
|
"loss": 15.9525, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.3734290843806103, |
|
"grad_norm": 22.5810604095459, |
|
"learning_rate": 1.647253141831239e-05, |
|
"loss": 15.9572, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.3779174147217235, |
|
"grad_norm": 22.96384048461914, |
|
"learning_rate": 1.6526391382405747e-05, |
|
"loss": 16.0959, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.3824057450628366, |
|
"grad_norm": 22.757946014404297, |
|
"learning_rate": 1.6580251346499102e-05, |
|
"loss": 16.0045, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.3868940754039496, |
|
"grad_norm": 24.446062088012695, |
|
"learning_rate": 1.663411131059246e-05, |
|
"loss": 16.0387, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.3913824057450628, |
|
"grad_norm": 24.733076095581055, |
|
"learning_rate": 1.6687971274685816e-05, |
|
"loss": 15.6993, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.395870736086176, |
|
"grad_norm": 22.095415115356445, |
|
"learning_rate": 1.6741831238779175e-05, |
|
"loss": 16.108, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.400359066427289, |
|
"grad_norm": 22.528247833251953, |
|
"learning_rate": 1.679569120287253e-05, |
|
"loss": 15.9896, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.404847396768402, |
|
"grad_norm": 21.990081787109375, |
|
"learning_rate": 1.684955116696589e-05, |
|
"loss": 15.8753, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.4093357271095153, |
|
"grad_norm": 24.167387008666992, |
|
"learning_rate": 1.6903411131059248e-05, |
|
"loss": 15.7562, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.4138240574506284, |
|
"grad_norm": 23.88982391357422, |
|
"learning_rate": 1.6957271095152603e-05, |
|
"loss": 15.8713, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.4183123877917414, |
|
"grad_norm": 26.82301902770996, |
|
"learning_rate": 1.7011131059245962e-05, |
|
"loss": 15.5179, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.4228007181328546, |
|
"grad_norm": 25.797740936279297, |
|
"learning_rate": 1.7064991023339317e-05, |
|
"loss": 15.8012, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.4272890484739678, |
|
"grad_norm": 24.005008697509766, |
|
"learning_rate": 1.7118850987432676e-05, |
|
"loss": 16.0349, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.4317773788150807, |
|
"grad_norm": 21.801897048950195, |
|
"learning_rate": 1.717271095152603e-05, |
|
"loss": 16.2094, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.436265709156194, |
|
"grad_norm": 22.728696823120117, |
|
"learning_rate": 1.7226570915619387e-05, |
|
"loss": 15.8239, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.440754039497307, |
|
"grad_norm": 23.855932235717773, |
|
"learning_rate": 1.728043087971275e-05, |
|
"loss": 15.7778, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.44524236983842, |
|
"grad_norm": 24.114036560058594, |
|
"learning_rate": 1.7334290843806104e-05, |
|
"loss": 15.9458, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.4497307001795332, |
|
"grad_norm": 23.884950637817383, |
|
"learning_rate": 1.7388150807899463e-05, |
|
"loss": 15.7334, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.4542190305206464, |
|
"grad_norm": 26.62238311767578, |
|
"learning_rate": 1.744201077199282e-05, |
|
"loss": 15.4899, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.4587073608617596, |
|
"grad_norm": 25.22521209716797, |
|
"learning_rate": 1.7495870736086177e-05, |
|
"loss": 15.6142, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.4631956912028725, |
|
"grad_norm": 24.73517417907715, |
|
"learning_rate": 1.7549730700179533e-05, |
|
"loss": 16.046, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.4676840215439857, |
|
"grad_norm": 25.76804542541504, |
|
"learning_rate": 1.7603590664272888e-05, |
|
"loss": 15.5229, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.4721723518850989, |
|
"grad_norm": 24.096242904663086, |
|
"learning_rate": 1.765745062836625e-05, |
|
"loss": 15.485, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.4766606822262118, |
|
"grad_norm": 23.282133102416992, |
|
"learning_rate": 1.7711310592459606e-05, |
|
"loss": 15.4915, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.481149012567325, |
|
"grad_norm": 23.70339012145996, |
|
"learning_rate": 1.7765170556552964e-05, |
|
"loss": 15.778, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.4856373429084382, |
|
"grad_norm": 24.140331268310547, |
|
"learning_rate": 1.781903052064632e-05, |
|
"loss": 15.9428, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.4901256732495511, |
|
"grad_norm": 22.932546615600586, |
|
"learning_rate": 1.7872890484739675e-05, |
|
"loss": 15.6304, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.4946140035906643, |
|
"grad_norm": 24.020971298217773, |
|
"learning_rate": 1.7926750448833034e-05, |
|
"loss": 15.4193, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.4991023339317775, |
|
"grad_norm": 24.903371810913086, |
|
"learning_rate": 1.798061041292639e-05, |
|
"loss": 15.3212, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.5035906642728905, |
|
"grad_norm": 24.483036041259766, |
|
"learning_rate": 1.803447037701975e-05, |
|
"loss": 15.5371, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.5080789946140036, |
|
"grad_norm": 24.4531192779541, |
|
"learning_rate": 1.8088330341113107e-05, |
|
"loss": 15.6667, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.5125673249551168, |
|
"grad_norm": 23.508136749267578, |
|
"learning_rate": 1.8142190305206466e-05, |
|
"loss": 15.545, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.5170556552962298, |
|
"grad_norm": 25.5224666595459, |
|
"learning_rate": 1.819605026929982e-05, |
|
"loss": 15.8223, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.521543985637343, |
|
"grad_norm": 23.785808563232422, |
|
"learning_rate": 1.8249910233393176e-05, |
|
"loss": 15.736, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.5260323159784561, |
|
"grad_norm": 22.968332290649414, |
|
"learning_rate": 1.8303770197486535e-05, |
|
"loss": 15.5985, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.530520646319569, |
|
"grad_norm": 24.91457176208496, |
|
"learning_rate": 1.835763016157989e-05, |
|
"loss": 15.5723, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.5350089766606823, |
|
"grad_norm": 27.051095962524414, |
|
"learning_rate": 1.8411490125673253e-05, |
|
"loss": 15.5436, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.5394973070017954, |
|
"grad_norm": 26.1645565032959, |
|
"learning_rate": 1.8465350089766608e-05, |
|
"loss": 15.7189, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.5439856373429084, |
|
"grad_norm": 25.47484016418457, |
|
"learning_rate": 1.8519210053859967e-05, |
|
"loss": 15.4637, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.5484739676840216, |
|
"grad_norm": 21.521570205688477, |
|
"learning_rate": 1.8573070017953322e-05, |
|
"loss": 15.7744, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.5529622980251347, |
|
"grad_norm": 22.680315017700195, |
|
"learning_rate": 1.8626929982046677e-05, |
|
"loss": 15.516, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.5574506283662477, |
|
"grad_norm": 22.149436950683594, |
|
"learning_rate": 1.8680789946140036e-05, |
|
"loss": 16.09, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.5619389587073609, |
|
"grad_norm": 24.99411392211914, |
|
"learning_rate": 1.873464991023339e-05, |
|
"loss": 15.5241, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.566427289048474, |
|
"grad_norm": 24.49349021911621, |
|
"learning_rate": 1.8788509874326754e-05, |
|
"loss": 15.5149, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.570915619389587, |
|
"grad_norm": 24.748638153076172, |
|
"learning_rate": 1.884236983842011e-05, |
|
"loss": 15.4012, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.5754039497307002, |
|
"grad_norm": 23.619789123535156, |
|
"learning_rate": 1.8896229802513465e-05, |
|
"loss": 15.4603, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.5798922800718134, |
|
"grad_norm": 24.02398681640625, |
|
"learning_rate": 1.8950089766606823e-05, |
|
"loss": 15.7808, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.5843806104129263, |
|
"grad_norm": 24.2972354888916, |
|
"learning_rate": 1.900394973070018e-05, |
|
"loss": 15.1804, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.5888689407540395, |
|
"grad_norm": 24.516998291015625, |
|
"learning_rate": 1.9057809694793537e-05, |
|
"loss": 15.318, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.5933572710951527, |
|
"grad_norm": 24.47681999206543, |
|
"learning_rate": 1.9111669658886893e-05, |
|
"loss": 15.5455, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.5978456014362656, |
|
"grad_norm": 26.061948776245117, |
|
"learning_rate": 1.9165529622980255e-05, |
|
"loss": 15.204, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.6023339317773788, |
|
"grad_norm": 25.155284881591797, |
|
"learning_rate": 1.9217235188509875e-05, |
|
"loss": 15.7975, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.606822262118492, |
|
"grad_norm": 26.721513748168945, |
|
"learning_rate": 1.9271095152603233e-05, |
|
"loss": 14.8189, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.611310592459605, |
|
"grad_norm": 24.048892974853516, |
|
"learning_rate": 1.932495511669659e-05, |
|
"loss": 15.4657, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 1.6157989228007181, |
|
"grad_norm": 21.87297248840332, |
|
"learning_rate": 1.9378815080789948e-05, |
|
"loss": 15.2708, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.6202872531418313, |
|
"grad_norm": 23.78717613220215, |
|
"learning_rate": 1.9432675044883303e-05, |
|
"loss": 15.2762, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 1.6247755834829443, |
|
"grad_norm": 25.389694213867188, |
|
"learning_rate": 1.948653500897666e-05, |
|
"loss": 15.4709, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.6292639138240574, |
|
"grad_norm": 25.06108283996582, |
|
"learning_rate": 1.9540394973070017e-05, |
|
"loss": 15.4012, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 1.6337522441651706, |
|
"grad_norm": 22.665700912475586, |
|
"learning_rate": 1.9594254937163376e-05, |
|
"loss": 15.7225, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.6382405745062836, |
|
"grad_norm": 24.0644474029541, |
|
"learning_rate": 1.9648114901256735e-05, |
|
"loss": 15.5653, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 1.6427289048473968, |
|
"grad_norm": 25.258146286010742, |
|
"learning_rate": 1.970197486535009e-05, |
|
"loss": 15.5544, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.64721723518851, |
|
"grad_norm": 26.202850341796875, |
|
"learning_rate": 1.975583482944345e-05, |
|
"loss": 15.1562, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 1.6517055655296229, |
|
"grad_norm": 25.502126693725586, |
|
"learning_rate": 1.9809694793536804e-05, |
|
"loss": 15.5075, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.656193895870736, |
|
"grad_norm": 22.884952545166016, |
|
"learning_rate": 1.9863554757630163e-05, |
|
"loss": 15.386, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 1.6606822262118492, |
|
"grad_norm": 22.87488555908203, |
|
"learning_rate": 1.9917414721723518e-05, |
|
"loss": 15.1854, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.6651705565529622, |
|
"grad_norm": 25.1315975189209, |
|
"learning_rate": 1.9971274685816877e-05, |
|
"loss": 15.0536, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 1.6696588868940754, |
|
"grad_norm": 23.088226318359375, |
|
"learning_rate": 2.0025134649910236e-05, |
|
"loss": 15.6098, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.6741472172351886, |
|
"grad_norm": 24.46171760559082, |
|
"learning_rate": 2.007899461400359e-05, |
|
"loss": 15.2449, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 1.6786355475763015, |
|
"grad_norm": 25.243085861206055, |
|
"learning_rate": 2.013285457809695e-05, |
|
"loss": 15.3234, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.6831238779174147, |
|
"grad_norm": 24.72486686706543, |
|
"learning_rate": 2.0186714542190305e-05, |
|
"loss": 15.641, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 1.6876122082585279, |
|
"grad_norm": 23.12143898010254, |
|
"learning_rate": 2.024057450628366e-05, |
|
"loss": 15.2531, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.6921005385996408, |
|
"grad_norm": 24.512834548950195, |
|
"learning_rate": 2.029443447037702e-05, |
|
"loss": 15.5208, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 1.696588868940754, |
|
"grad_norm": 25.56024742126465, |
|
"learning_rate": 2.0348294434470378e-05, |
|
"loss": 15.0794, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.7010771992818672, |
|
"grad_norm": 25.564701080322266, |
|
"learning_rate": 2.0402154398563737e-05, |
|
"loss": 15.0259, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 1.7055655296229801, |
|
"grad_norm": 25.182714462280273, |
|
"learning_rate": 2.0456014362657092e-05, |
|
"loss": 15.3297, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.7100538599640933, |
|
"grad_norm": 25.756427764892578, |
|
"learning_rate": 2.050987432675045e-05, |
|
"loss": 15.0274, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 1.7145421903052065, |
|
"grad_norm": 24.414350509643555, |
|
"learning_rate": 2.0563734290843806e-05, |
|
"loss": 15.004, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.7190305206463194, |
|
"grad_norm": 26.023277282714844, |
|
"learning_rate": 2.0617594254937162e-05, |
|
"loss": 14.8821, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 1.7235188509874326, |
|
"grad_norm": 24.01046371459961, |
|
"learning_rate": 2.067145421903052e-05, |
|
"loss": 15.2589, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.7280071813285458, |
|
"grad_norm": 24.23836898803711, |
|
"learning_rate": 2.0725314183123876e-05, |
|
"loss": 15.1177, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 1.7324955116696588, |
|
"grad_norm": 23.774337768554688, |
|
"learning_rate": 2.0779174147217238e-05, |
|
"loss": 15.1478, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.736983842010772, |
|
"grad_norm": 28.614397048950195, |
|
"learning_rate": 2.0833034111310593e-05, |
|
"loss": 15.1796, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 1.7414721723518851, |
|
"grad_norm": 26.42593765258789, |
|
"learning_rate": 2.0886894075403952e-05, |
|
"loss": 15.0701, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.745960502692998, |
|
"grad_norm": 23.472248077392578, |
|
"learning_rate": 2.0940754039497308e-05, |
|
"loss": 15.3539, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 1.7504488330341115, |
|
"grad_norm": 23.4112491607666, |
|
"learning_rate": 2.0994614003590663e-05, |
|
"loss": 15.2332, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.7549371633752244, |
|
"grad_norm": 21.964303970336914, |
|
"learning_rate": 2.1048473967684022e-05, |
|
"loss": 15.1311, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 1.7594254937163374, |
|
"grad_norm": 25.997272491455078, |
|
"learning_rate": 2.1102333931777377e-05, |
|
"loss": 15.4253, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.7639138240574508, |
|
"grad_norm": 24.534364700317383, |
|
"learning_rate": 2.115619389587074e-05, |
|
"loss": 14.8151, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 1.7684021543985637, |
|
"grad_norm": 25.785430908203125, |
|
"learning_rate": 2.1210053859964095e-05, |
|
"loss": 15.1881, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.7728904847396767, |
|
"grad_norm": 24.27193832397461, |
|
"learning_rate": 2.126391382405745e-05, |
|
"loss": 15.0705, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 1.77737881508079, |
|
"grad_norm": 24.99488067626953, |
|
"learning_rate": 2.131777378815081e-05, |
|
"loss": 15.1269, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.781867145421903, |
|
"grad_norm": 25.080209732055664, |
|
"learning_rate": 2.1371633752244164e-05, |
|
"loss": 15.12, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 1.786355475763016, |
|
"grad_norm": 25.579904556274414, |
|
"learning_rate": 2.1425493716337523e-05, |
|
"loss": 14.9893, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.7908438061041294, |
|
"grad_norm": 25.11918067932129, |
|
"learning_rate": 2.1479353680430878e-05, |
|
"loss": 15.1663, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 1.7953321364452424, |
|
"grad_norm": 27.383655548095703, |
|
"learning_rate": 2.153321364452424e-05, |
|
"loss": 15.1548, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7998204667863553, |
|
"grad_norm": 24.2135009765625, |
|
"learning_rate": 2.1587073608617596e-05, |
|
"loss": 15.0468, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 1.8043087971274687, |
|
"grad_norm": 26.53235626220703, |
|
"learning_rate": 2.164093357271095e-05, |
|
"loss": 14.9113, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.8087971274685817, |
|
"grad_norm": 25.139854431152344, |
|
"learning_rate": 2.169479353680431e-05, |
|
"loss": 15.2685, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 1.8132854578096946, |
|
"grad_norm": 26.078100204467773, |
|
"learning_rate": 2.1748653500897665e-05, |
|
"loss": 15.0769, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.817773788150808, |
|
"grad_norm": 32.14773941040039, |
|
"learning_rate": 2.1802513464991024e-05, |
|
"loss": 15.0157, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 1.822262118491921, |
|
"grad_norm": 25.352624893188477, |
|
"learning_rate": 2.185637342908438e-05, |
|
"loss": 15.2303, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.826750448833034, |
|
"grad_norm": 24.74574851989746, |
|
"learning_rate": 2.1910233393177738e-05, |
|
"loss": 14.5637, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 1.8312387791741473, |
|
"grad_norm": 26.362592697143555, |
|
"learning_rate": 2.1964093357271097e-05, |
|
"loss": 14.9059, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.8357271095152603, |
|
"grad_norm": 24.987171173095703, |
|
"learning_rate": 2.2017953321364452e-05, |
|
"loss": 15.069, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 1.8402154398563735, |
|
"grad_norm": 24.836288452148438, |
|
"learning_rate": 2.207181328545781e-05, |
|
"loss": 15.0462, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.8447037701974867, |
|
"grad_norm": 24.79768180847168, |
|
"learning_rate": 2.2125673249551166e-05, |
|
"loss": 14.8612, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 1.8491921005385996, |
|
"grad_norm": 25.61474609375, |
|
"learning_rate": 2.2179533213644525e-05, |
|
"loss": 14.7445, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.8536804308797128, |
|
"grad_norm": 25.009479522705078, |
|
"learning_rate": 2.223339317773788e-05, |
|
"loss": 14.9331, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 1.858168761220826, |
|
"grad_norm": 25.85749053955078, |
|
"learning_rate": 2.228725314183124e-05, |
|
"loss": 14.8491, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.862657091561939, |
|
"grad_norm": 24.728235244750977, |
|
"learning_rate": 2.2341113105924598e-05, |
|
"loss": 14.8084, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 1.867145421903052, |
|
"grad_norm": 23.449575424194336, |
|
"learning_rate": 2.2394973070017954e-05, |
|
"loss": 14.8635, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.8716337522441653, |
|
"grad_norm": 23.53273582458496, |
|
"learning_rate": 2.2448833034111312e-05, |
|
"loss": 15.0607, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 1.8761220825852782, |
|
"grad_norm": 26.236675262451172, |
|
"learning_rate": 2.2502692998204668e-05, |
|
"loss": 14.7338, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.8806104129263914, |
|
"grad_norm": 24.960784912109375, |
|
"learning_rate": 2.2556552962298026e-05, |
|
"loss": 14.831, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 1.8850987432675046, |
|
"grad_norm": 23.77855682373047, |
|
"learning_rate": 2.2610412926391382e-05, |
|
"loss": 14.9616, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.8895870736086176, |
|
"grad_norm": 25.975210189819336, |
|
"learning_rate": 2.266427289048474e-05, |
|
"loss": 14.5186, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 1.8940754039497307, |
|
"grad_norm": 26.122711181640625, |
|
"learning_rate": 2.27181328545781e-05, |
|
"loss": 14.3817, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.898563734290844, |
|
"grad_norm": 25.613475799560547, |
|
"learning_rate": 2.2771992818671455e-05, |
|
"loss": 14.5428, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 1.9030520646319569, |
|
"grad_norm": 24.9304141998291, |
|
"learning_rate": 2.2825852782764813e-05, |
|
"loss": 14.8628, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.90754039497307, |
|
"grad_norm": 25.525495529174805, |
|
"learning_rate": 2.287971274685817e-05, |
|
"loss": 14.7881, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 1.9120287253141832, |
|
"grad_norm": 24.550325393676758, |
|
"learning_rate": 2.2933572710951524e-05, |
|
"loss": 14.5237, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.9165170556552962, |
|
"grad_norm": 26.814821243286133, |
|
"learning_rate": 2.2987432675044883e-05, |
|
"loss": 14.9076, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 1.9210053859964094, |
|
"grad_norm": 25.589099884033203, |
|
"learning_rate": 2.3041292639138242e-05, |
|
"loss": 14.9983, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.9254937163375225, |
|
"grad_norm": 26.260356903076172, |
|
"learning_rate": 2.30951526032316e-05, |
|
"loss": 14.4078, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 1.9299820466786355, |
|
"grad_norm": 40.02426528930664, |
|
"learning_rate": 2.3149012567324956e-05, |
|
"loss": 14.6382, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.9344703770197487, |
|
"grad_norm": 24.463035583496094, |
|
"learning_rate": 2.3202872531418315e-05, |
|
"loss": 14.251, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 1.9389587073608618, |
|
"grad_norm": 26.021873474121094, |
|
"learning_rate": 2.325673249551167e-05, |
|
"loss": 14.783, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.9434470377019748, |
|
"grad_norm": 25.914993286132812, |
|
"learning_rate": 2.3310592459605025e-05, |
|
"loss": 14.6037, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 1.947935368043088, |
|
"grad_norm": 24.850980758666992, |
|
"learning_rate": 2.3364452423698384e-05, |
|
"loss": 14.692, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.9524236983842012, |
|
"grad_norm": 23.075193405151367, |
|
"learning_rate": 2.341831238779174e-05, |
|
"loss": 14.4149, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 1.9569120287253141, |
|
"grad_norm": 26.311481475830078, |
|
"learning_rate": 2.34721723518851e-05, |
|
"loss": 14.5615, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.9614003590664273, |
|
"grad_norm": 24.902671813964844, |
|
"learning_rate": 2.3526032315978457e-05, |
|
"loss": 14.267, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 1.9658886894075405, |
|
"grad_norm": 24.723201751708984, |
|
"learning_rate": 2.3579892280071816e-05, |
|
"loss": 14.5657, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.9703770197486534, |
|
"grad_norm": 26.1663818359375, |
|
"learning_rate": 2.363375224416517e-05, |
|
"loss": 14.6454, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 1.9748653500897666, |
|
"grad_norm": 24.78443145751953, |
|
"learning_rate": 2.3687612208258527e-05, |
|
"loss": 14.4444, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9793536804308798, |
|
"grad_norm": 24.568164825439453, |
|
"learning_rate": 2.3741472172351885e-05, |
|
"loss": 14.6314, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.9838420107719927, |
|
"grad_norm": 26.20634651184082, |
|
"learning_rate": 2.379533213644524e-05, |
|
"loss": 14.6824, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.988330341113106, |
|
"grad_norm": 24.453754425048828, |
|
"learning_rate": 2.3849192100538603e-05, |
|
"loss": 14.7177, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 1.992818671454219, |
|
"grad_norm": 25.506359100341797, |
|
"learning_rate": 2.3903052064631958e-05, |
|
"loss": 14.5953, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.997307001795332, |
|
"grad_norm": 24.724069595336914, |
|
"learning_rate": 2.3956912028725314e-05, |
|
"loss": 14.5515, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.059114429190613486, |
|
"eval_f1_macro": 0.0023152878504535197, |
|
"eval_f1_micro": 0.059114429190613486, |
|
"eval_f1_weighted": 0.026248109088727177, |
|
"eval_loss": 7.744897365570068, |
|
"eval_precision_macro": 0.002216938480351738, |
|
"eval_precision_micro": 0.059114429190613486, |
|
"eval_precision_weighted": 0.02310532518025774, |
|
"eval_recall_macro": 0.005032399335473846, |
|
"eval_recall_micro": 0.059114429190613486, |
|
"eval_recall_weighted": 0.059114429190613486, |
|
"eval_runtime": 86.2961, |
|
"eval_samples_per_second": 606.899, |
|
"eval_steps_per_second": 18.97, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.0017953321364454, |
|
"grad_norm": 25.532371520996094, |
|
"learning_rate": 2.4010771992818672e-05, |
|
"loss": 14.3659, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.0062836624775584, |
|
"grad_norm": 25.71830177307129, |
|
"learning_rate": 2.4064631956912028e-05, |
|
"loss": 13.8419, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 2.0107719928186714, |
|
"grad_norm": 27.925411224365234, |
|
"learning_rate": 2.4118491921005386e-05, |
|
"loss": 13.9401, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.0152603231597848, |
|
"grad_norm": 26.441532135009766, |
|
"learning_rate": 2.4172351885098742e-05, |
|
"loss": 14.0487, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 2.0197486535008977, |
|
"grad_norm": 25.631881713867188, |
|
"learning_rate": 2.4226211849192104e-05, |
|
"loss": 13.4916, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.0242369838420107, |
|
"grad_norm": 25.339025497436523, |
|
"learning_rate": 2.428007181328546e-05, |
|
"loss": 13.5516, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 2.028725314183124, |
|
"grad_norm": 26.991966247558594, |
|
"learning_rate": 2.4333931777378815e-05, |
|
"loss": 13.4598, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.033213644524237, |
|
"grad_norm": 25.9316463470459, |
|
"learning_rate": 2.4387791741472174e-05, |
|
"loss": 13.7771, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 2.03770197486535, |
|
"grad_norm": 27.35523796081543, |
|
"learning_rate": 2.444165170556553e-05, |
|
"loss": 13.721, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.0421903052064634, |
|
"grad_norm": 27.451637268066406, |
|
"learning_rate": 2.4495511669658888e-05, |
|
"loss": 13.7572, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 2.0466786355475763, |
|
"grad_norm": 27.497739791870117, |
|
"learning_rate": 2.4549371633752243e-05, |
|
"loss": 13.7687, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.0511669658886893, |
|
"grad_norm": 26.42055892944336, |
|
"learning_rate": 2.4603231597845602e-05, |
|
"loss": 13.8483, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 2.0556552962298027, |
|
"grad_norm": 26.251361846923828, |
|
"learning_rate": 2.465709156193896e-05, |
|
"loss": 13.4564, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.0601436265709157, |
|
"grad_norm": 27.7249813079834, |
|
"learning_rate": 2.4710951526032316e-05, |
|
"loss": 13.6193, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 2.0646319569120286, |
|
"grad_norm": 29.7418155670166, |
|
"learning_rate": 2.4764811490125675e-05, |
|
"loss": 13.6154, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.069120287253142, |
|
"grad_norm": 28.159162521362305, |
|
"learning_rate": 2.481867145421903e-05, |
|
"loss": 13.505, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 2.073608617594255, |
|
"grad_norm": 27.0701904296875, |
|
"learning_rate": 2.487253141831239e-05, |
|
"loss": 13.6783, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.078096947935368, |
|
"grad_norm": 28.18494987487793, |
|
"learning_rate": 2.4924236983842012e-05, |
|
"loss": 13.6024, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 2.0825852782764813, |
|
"grad_norm": 25.40494155883789, |
|
"learning_rate": 2.4978096947935367e-05, |
|
"loss": 13.7101, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.0870736086175943, |
|
"grad_norm": 28.17936897277832, |
|
"learning_rate": 2.5031956912028726e-05, |
|
"loss": 13.8797, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 2.0915619389587072, |
|
"grad_norm": 28.881277084350586, |
|
"learning_rate": 2.5085816876122085e-05, |
|
"loss": 13.6777, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.0960502692998206, |
|
"grad_norm": 25.790342330932617, |
|
"learning_rate": 2.513967684021544e-05, |
|
"loss": 13.5791, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 2.1005385996409336, |
|
"grad_norm": 28.37506866455078, |
|
"learning_rate": 2.51935368043088e-05, |
|
"loss": 13.4982, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.1050269299820465, |
|
"grad_norm": 33.875404357910156, |
|
"learning_rate": 2.5247396768402154e-05, |
|
"loss": 13.5064, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 2.10951526032316, |
|
"grad_norm": 28.881078720092773, |
|
"learning_rate": 2.530125673249551e-05, |
|
"loss": 13.6496, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.114003590664273, |
|
"grad_norm": 26.983850479125977, |
|
"learning_rate": 2.535511669658887e-05, |
|
"loss": 13.3992, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 2.118491921005386, |
|
"grad_norm": 26.257688522338867, |
|
"learning_rate": 2.5408976660682227e-05, |
|
"loss": 13.7699, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.1229802513464993, |
|
"grad_norm": 28.320302963256836, |
|
"learning_rate": 2.5462836624775586e-05, |
|
"loss": 13.3924, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 2.127468581687612, |
|
"grad_norm": 28.05795669555664, |
|
"learning_rate": 2.551669658886894e-05, |
|
"loss": 13.5392, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.131956912028725, |
|
"grad_norm": 29.30341911315918, |
|
"learning_rate": 2.55705565529623e-05, |
|
"loss": 13.4195, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 2.1364452423698386, |
|
"grad_norm": 27.965492248535156, |
|
"learning_rate": 2.5624416517055655e-05, |
|
"loss": 13.6041, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.1409335727109515, |
|
"grad_norm": 29.342981338500977, |
|
"learning_rate": 2.567827648114901e-05, |
|
"loss": 13.4952, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 2.1454219030520645, |
|
"grad_norm": 29.504013061523438, |
|
"learning_rate": 2.573213644524237e-05, |
|
"loss": 13.3822, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.149910233393178, |
|
"grad_norm": 25.68410301208496, |
|
"learning_rate": 2.578599640933573e-05, |
|
"loss": 13.6285, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 2.154398563734291, |
|
"grad_norm": 27.036991119384766, |
|
"learning_rate": 2.5839856373429087e-05, |
|
"loss": 13.9489, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.158886894075404, |
|
"grad_norm": 28.70158576965332, |
|
"learning_rate": 2.5893716337522443e-05, |
|
"loss": 13.6128, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 2.163375224416517, |
|
"grad_norm": 27.817323684692383, |
|
"learning_rate": 2.59475763016158e-05, |
|
"loss": 13.8509, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.16786355475763, |
|
"grad_norm": 26.909086227416992, |
|
"learning_rate": 2.6001436265709157e-05, |
|
"loss": 13.4432, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 2.172351885098743, |
|
"grad_norm": 27.109466552734375, |
|
"learning_rate": 2.6055296229802512e-05, |
|
"loss": 13.3693, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.1768402154398565, |
|
"grad_norm": 29.08690643310547, |
|
"learning_rate": 2.610915619389587e-05, |
|
"loss": 13.7364, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 2.1813285457809695, |
|
"grad_norm": 28.68939971923828, |
|
"learning_rate": 2.616301615798923e-05, |
|
"loss": 13.7631, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.1858168761220824, |
|
"grad_norm": 28.95443344116211, |
|
"learning_rate": 2.621687612208259e-05, |
|
"loss": 14.0335, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 2.190305206463196, |
|
"grad_norm": 29.304248809814453, |
|
"learning_rate": 2.6270736086175944e-05, |
|
"loss": 13.6967, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.1947935368043088, |
|
"grad_norm": 27.95583152770996, |
|
"learning_rate": 2.63245960502693e-05, |
|
"loss": 13.5719, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 2.1992818671454217, |
|
"grad_norm": 27.92197608947754, |
|
"learning_rate": 2.6378456014362658e-05, |
|
"loss": 13.6523, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.203770197486535, |
|
"grad_norm": 29.322330474853516, |
|
"learning_rate": 2.6432315978456013e-05, |
|
"loss": 13.5422, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 2.208258527827648, |
|
"grad_norm": 29.324125289916992, |
|
"learning_rate": 2.6486175942549372e-05, |
|
"loss": 13.639, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.212746858168761, |
|
"grad_norm": 27.53671646118164, |
|
"learning_rate": 2.654003590664273e-05, |
|
"loss": 13.7083, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 2.2172351885098744, |
|
"grad_norm": 28.272226333618164, |
|
"learning_rate": 2.659389587073609e-05, |
|
"loss": 13.7521, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.2217235188509874, |
|
"grad_norm": 28.756206512451172, |
|
"learning_rate": 2.6647755834829445e-05, |
|
"loss": 13.3446, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 2.2262118491921004, |
|
"grad_norm": 27.521116256713867, |
|
"learning_rate": 2.67016157989228e-05, |
|
"loss": 13.3676, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.2307001795332138, |
|
"grad_norm": 28.232725143432617, |
|
"learning_rate": 2.675547576301616e-05, |
|
"loss": 13.7248, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 2.2351885098743267, |
|
"grad_norm": 27.95871353149414, |
|
"learning_rate": 2.6809335727109514e-05, |
|
"loss": 13.3986, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.2396768402154397, |
|
"grad_norm": 26.93558692932129, |
|
"learning_rate": 2.6863195691202873e-05, |
|
"loss": 13.5971, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 2.244165170556553, |
|
"grad_norm": 27.357070922851562, |
|
"learning_rate": 2.6914901256732496e-05, |
|
"loss": 13.8007, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.248653500897666, |
|
"grad_norm": 34.84161376953125, |
|
"learning_rate": 2.6968761220825855e-05, |
|
"loss": 13.3402, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 2.253141831238779, |
|
"grad_norm": 29.713102340698242, |
|
"learning_rate": 2.702262118491921e-05, |
|
"loss": 13.4515, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.2576301615798924, |
|
"grad_norm": 31.844457626342773, |
|
"learning_rate": 2.707648114901257e-05, |
|
"loss": 13.4538, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 2.2621184919210053, |
|
"grad_norm": 31.339860916137695, |
|
"learning_rate": 2.7130341113105924e-05, |
|
"loss": 13.7536, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.2666068222621183, |
|
"grad_norm": 27.18288803100586, |
|
"learning_rate": 2.7184201077199283e-05, |
|
"loss": 13.361, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 2.2710951526032317, |
|
"grad_norm": 25.645360946655273, |
|
"learning_rate": 2.723806104129264e-05, |
|
"loss": 13.7802, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.2755834829443446, |
|
"grad_norm": 28.508298873901367, |
|
"learning_rate": 2.7291921005385997e-05, |
|
"loss": 13.4987, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 2.280071813285458, |
|
"grad_norm": 26.898292541503906, |
|
"learning_rate": 2.7345780969479356e-05, |
|
"loss": 13.6794, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.284560143626571, |
|
"grad_norm": 40.84425354003906, |
|
"learning_rate": 2.739964093357271e-05, |
|
"loss": 13.3866, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 2.289048473967684, |
|
"grad_norm": 27.576169967651367, |
|
"learning_rate": 2.745350089766607e-05, |
|
"loss": 13.7029, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.293536804308797, |
|
"grad_norm": 27.815526962280273, |
|
"learning_rate": 2.7507360861759426e-05, |
|
"loss": 13.6855, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 2.2980251346499103, |
|
"grad_norm": 26.595399856567383, |
|
"learning_rate": 2.7561220825852784e-05, |
|
"loss": 13.3868, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.3025134649910233, |
|
"grad_norm": 27.15950584411621, |
|
"learning_rate": 2.761508078994614e-05, |
|
"loss": 13.3696, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 2.3070017953321367, |
|
"grad_norm": 28.6210994720459, |
|
"learning_rate": 2.7668940754039495e-05, |
|
"loss": 13.5515, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.3114901256732496, |
|
"grad_norm": 27.74658203125, |
|
"learning_rate": 2.7722800718132857e-05, |
|
"loss": 13.0361, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 2.3159784560143626, |
|
"grad_norm": 26.844989776611328, |
|
"learning_rate": 2.7776660682226213e-05, |
|
"loss": 13.5466, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.3204667863554755, |
|
"grad_norm": 27.64177703857422, |
|
"learning_rate": 2.783052064631957e-05, |
|
"loss": 13.8139, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 2.324955116696589, |
|
"grad_norm": 28.158784866333008, |
|
"learning_rate": 2.7884380610412927e-05, |
|
"loss": 13.7636, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.329443447037702, |
|
"grad_norm": 28.323238372802734, |
|
"learning_rate": 2.7938240574506286e-05, |
|
"loss": 13.3848, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 2.3339317773788153, |
|
"grad_norm": 28.48469352722168, |
|
"learning_rate": 2.799210053859964e-05, |
|
"loss": 13.6163, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.3384201077199283, |
|
"grad_norm": 26.27099609375, |
|
"learning_rate": 2.8045960502692996e-05, |
|
"loss": 13.3823, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 2.342908438061041, |
|
"grad_norm": 27.050186157226562, |
|
"learning_rate": 2.8099820466786355e-05, |
|
"loss": 13.6565, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.347396768402154, |
|
"grad_norm": 26.83416748046875, |
|
"learning_rate": 2.8153680430879714e-05, |
|
"loss": 13.5032, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 2.3518850987432676, |
|
"grad_norm": 25.751502990722656, |
|
"learning_rate": 2.8207540394973073e-05, |
|
"loss": 13.5242, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.3563734290843805, |
|
"grad_norm": 27.54896354675293, |
|
"learning_rate": 2.8261400359066428e-05, |
|
"loss": 13.6656, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 2.360861759425494, |
|
"grad_norm": 29.93552017211914, |
|
"learning_rate": 2.8315260323159787e-05, |
|
"loss": 13.7181, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.365350089766607, |
|
"grad_norm": 34.247100830078125, |
|
"learning_rate": 2.8369120287253142e-05, |
|
"loss": 13.3388, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 2.36983842010772, |
|
"grad_norm": 27.253677368164062, |
|
"learning_rate": 2.8422980251346498e-05, |
|
"loss": 12.8927, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.374326750448833, |
|
"grad_norm": 26.714345932006836, |
|
"learning_rate": 2.8476840215439856e-05, |
|
"loss": 13.1986, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 2.378815080789946, |
|
"grad_norm": 28.791046142578125, |
|
"learning_rate": 2.8530700179533215e-05, |
|
"loss": 13.3162, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.383303411131059, |
|
"grad_norm": 27.82441520690918, |
|
"learning_rate": 2.8584560143626574e-05, |
|
"loss": 13.6409, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 2.3877917414721725, |
|
"grad_norm": 27.760778427124023, |
|
"learning_rate": 2.863842010771993e-05, |
|
"loss": 13.4591, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.3922800718132855, |
|
"grad_norm": 35.298912048339844, |
|
"learning_rate": 2.8692280071813285e-05, |
|
"loss": 13.5868, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 2.3967684021543985, |
|
"grad_norm": 29.174081802368164, |
|
"learning_rate": 2.8746140035906643e-05, |
|
"loss": 12.9569, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.401256732495512, |
|
"grad_norm": 28.78097152709961, |
|
"learning_rate": 2.88e-05, |
|
"loss": 13.405, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 2.405745062836625, |
|
"grad_norm": 28.48590660095215, |
|
"learning_rate": 2.8853859964093357e-05, |
|
"loss": 13.8227, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.4102333931777378, |
|
"grad_norm": 27.466550827026367, |
|
"learning_rate": 2.8907719928186716e-05, |
|
"loss": 13.3373, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 2.414721723518851, |
|
"grad_norm": 26.298185348510742, |
|
"learning_rate": 2.8961579892280075e-05, |
|
"loss": 13.3942, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.419210053859964, |
|
"grad_norm": 27.673166275024414, |
|
"learning_rate": 2.901543985637343e-05, |
|
"loss": 13.3092, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 2.423698384201077, |
|
"grad_norm": 27.58799171447754, |
|
"learning_rate": 2.9069299820466786e-05, |
|
"loss": 13.2923, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.4281867145421905, |
|
"grad_norm": 28.616209030151367, |
|
"learning_rate": 2.9123159784560144e-05, |
|
"loss": 13.7892, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 2.4326750448833034, |
|
"grad_norm": 27.34395980834961, |
|
"learning_rate": 2.91770197486535e-05, |
|
"loss": 13.4703, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.4371633752244164, |
|
"grad_norm": 27.241291046142578, |
|
"learning_rate": 2.923087971274686e-05, |
|
"loss": 13.6906, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 2.44165170556553, |
|
"grad_norm": 31.22068214416504, |
|
"learning_rate": 2.9284739676840217e-05, |
|
"loss": 13.0538, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.4461400359066428, |
|
"grad_norm": 27.56983184814453, |
|
"learning_rate": 2.9338599640933573e-05, |
|
"loss": 13.4391, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 2.4506283662477557, |
|
"grad_norm": 27.46451187133789, |
|
"learning_rate": 2.939245960502693e-05, |
|
"loss": 13.4247, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.455116696588869, |
|
"grad_norm": 27.22041893005371, |
|
"learning_rate": 2.9446319569120287e-05, |
|
"loss": 13.2423, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 2.459605026929982, |
|
"grad_norm": 44.078704833984375, |
|
"learning_rate": 2.9500179533213646e-05, |
|
"loss": 12.9909, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.464093357271095, |
|
"grad_norm": 28.11593246459961, |
|
"learning_rate": 2.9554039497307e-05, |
|
"loss": 13.2222, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 2.4685816876122084, |
|
"grad_norm": 28.899824142456055, |
|
"learning_rate": 2.960789946140036e-05, |
|
"loss": 13.4138, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.4730700179533214, |
|
"grad_norm": 27.567039489746094, |
|
"learning_rate": 2.966175942549372e-05, |
|
"loss": 13.5078, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 2.4775583482944343, |
|
"grad_norm": 26.155046463012695, |
|
"learning_rate": 2.9715619389587074e-05, |
|
"loss": 13.2964, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.4820466786355477, |
|
"grad_norm": 26.821226119995117, |
|
"learning_rate": 2.9769479353680433e-05, |
|
"loss": 13.7765, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.4865350089766607, |
|
"grad_norm": 28.220781326293945, |
|
"learning_rate": 2.9823339317773788e-05, |
|
"loss": 13.6587, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.4910233393177736, |
|
"grad_norm": 29.53750228881836, |
|
"learning_rate": 2.9877199281867147e-05, |
|
"loss": 13.3947, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 2.495511669658887, |
|
"grad_norm": 26.887174606323242, |
|
"learning_rate": 2.9931059245960502e-05, |
|
"loss": 13.1447, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 27.31348419189453, |
|
"learning_rate": 2.998491921005386e-05, |
|
"loss": 13.2417, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 2.504488330341113, |
|
"grad_norm": 30.33110809326172, |
|
"learning_rate": 2.9995691202872533e-05, |
|
"loss": 13.3828, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.5089766606822264, |
|
"grad_norm": 27.644296646118164, |
|
"learning_rate": 2.9989706762417715e-05, |
|
"loss": 13.1784, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 2.5134649910233393, |
|
"grad_norm": 29.11074447631836, |
|
"learning_rate": 2.9983722321962897e-05, |
|
"loss": 13.2607, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.5179533213644523, |
|
"grad_norm": 27.747520446777344, |
|
"learning_rate": 2.997773788150808e-05, |
|
"loss": 13.5471, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 2.5224416517055657, |
|
"grad_norm": 26.138446807861328, |
|
"learning_rate": 2.9971753441053262e-05, |
|
"loss": 13.6022, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.5269299820466786, |
|
"grad_norm": 27.328989028930664, |
|
"learning_rate": 2.9965769000598445e-05, |
|
"loss": 13.1897, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 2.5314183123877916, |
|
"grad_norm": 26.998350143432617, |
|
"learning_rate": 2.9959784560143627e-05, |
|
"loss": 13.4755, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.535906642728905, |
|
"grad_norm": 27.311878204345703, |
|
"learning_rate": 2.995380011968881e-05, |
|
"loss": 12.9735, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 2.540394973070018, |
|
"grad_norm": 25.830198287963867, |
|
"learning_rate": 2.994781567923399e-05, |
|
"loss": 13.352, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.5448833034111313, |
|
"grad_norm": 26.87948989868164, |
|
"learning_rate": 2.9941831238779177e-05, |
|
"loss": 13.4053, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 2.5493716337522443, |
|
"grad_norm": 28.717430114746094, |
|
"learning_rate": 2.993584679832436e-05, |
|
"loss": 13.0502, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.5538599640933572, |
|
"grad_norm": 26.697654724121094, |
|
"learning_rate": 2.992986235786954e-05, |
|
"loss": 13.109, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 2.55834829443447, |
|
"grad_norm": 27.923320770263672, |
|
"learning_rate": 2.992387791741472e-05, |
|
"loss": 13.0734, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.5628366247755836, |
|
"grad_norm": 28.750234603881836, |
|
"learning_rate": 2.9917893476959903e-05, |
|
"loss": 13.2513, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 2.5673249551166966, |
|
"grad_norm": 28.91152000427246, |
|
"learning_rate": 2.9911909036505086e-05, |
|
"loss": 13.0385, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.57181328545781, |
|
"grad_norm": 26.92072105407715, |
|
"learning_rate": 2.990592459605027e-05, |
|
"loss": 13.289, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 2.576301615798923, |
|
"grad_norm": 26.42039680480957, |
|
"learning_rate": 2.9899940155595454e-05, |
|
"loss": 13.3365, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.580789946140036, |
|
"grad_norm": 26.49629783630371, |
|
"learning_rate": 2.9893955715140636e-05, |
|
"loss": 13.0466, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 2.585278276481149, |
|
"grad_norm": 26.710182189941406, |
|
"learning_rate": 2.988797127468582e-05, |
|
"loss": 13.196, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.5897666068222622, |
|
"grad_norm": 28.95528793334961, |
|
"learning_rate": 2.9881986834230998e-05, |
|
"loss": 13.2839, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 2.594254937163375, |
|
"grad_norm": 27.436601638793945, |
|
"learning_rate": 2.9876002393776183e-05, |
|
"loss": 13.195, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.5987432675044886, |
|
"grad_norm": 27.884984970092773, |
|
"learning_rate": 2.9870017953321366e-05, |
|
"loss": 13.3211, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 2.6032315978456015, |
|
"grad_norm": 28.28389549255371, |
|
"learning_rate": 2.9864033512866548e-05, |
|
"loss": 13.0379, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.6077199281867145, |
|
"grad_norm": 33.531089782714844, |
|
"learning_rate": 2.985804907241173e-05, |
|
"loss": 13.297, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 2.6122082585278275, |
|
"grad_norm": 27.326101303100586, |
|
"learning_rate": 2.9852064631956913e-05, |
|
"loss": 13.3307, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.616696588868941, |
|
"grad_norm": 26.402788162231445, |
|
"learning_rate": 2.9846080191502095e-05, |
|
"loss": 13.2087, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 2.621184919210054, |
|
"grad_norm": 28.52970314025879, |
|
"learning_rate": 2.984009575104728e-05, |
|
"loss": 13.0077, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.625673249551167, |
|
"grad_norm": 26.127384185791016, |
|
"learning_rate": 2.983411131059246e-05, |
|
"loss": 12.8619, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 2.63016157989228, |
|
"grad_norm": 26.900188446044922, |
|
"learning_rate": 2.9828126870137642e-05, |
|
"loss": 13.0219, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.634649910233393, |
|
"grad_norm": 28.075593948364258, |
|
"learning_rate": 2.9822142429682825e-05, |
|
"loss": 13.0576, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 2.639138240574506, |
|
"grad_norm": 27.4871883392334, |
|
"learning_rate": 2.9816157989228007e-05, |
|
"loss": 13.3393, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.6436265709156195, |
|
"grad_norm": 26.82506561279297, |
|
"learning_rate": 2.981017354877319e-05, |
|
"loss": 13.2037, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 2.6481149012567324, |
|
"grad_norm": 27.90208625793457, |
|
"learning_rate": 2.9804189108318375e-05, |
|
"loss": 13.2741, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.652603231597846, |
|
"grad_norm": 27.409181594848633, |
|
"learning_rate": 2.9798204667863557e-05, |
|
"loss": 13.0042, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 2.657091561938959, |
|
"grad_norm": 26.863079071044922, |
|
"learning_rate": 2.979222022740874e-05, |
|
"loss": 12.9851, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.6615798922800717, |
|
"grad_norm": 27.66518211364746, |
|
"learning_rate": 2.978623578695392e-05, |
|
"loss": 13.1473, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 2.6660682226211847, |
|
"grad_norm": 31.207706451416016, |
|
"learning_rate": 2.97802513464991e-05, |
|
"loss": 13.6109, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.670556552962298, |
|
"grad_norm": 26.27522087097168, |
|
"learning_rate": 2.9774266906044287e-05, |
|
"loss": 12.7777, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 2.675044883303411, |
|
"grad_norm": 28.05002784729004, |
|
"learning_rate": 2.976828246558947e-05, |
|
"loss": 13.4278, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.6795332136445245, |
|
"grad_norm": 27.554943084716797, |
|
"learning_rate": 2.976229802513465e-05, |
|
"loss": 13.4759, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 2.6840215439856374, |
|
"grad_norm": 28.544275283813477, |
|
"learning_rate": 2.9756313584679834e-05, |
|
"loss": 13.3619, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.6885098743267504, |
|
"grad_norm": 26.328645706176758, |
|
"learning_rate": 2.9750329144225016e-05, |
|
"loss": 13.3435, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 2.6929982046678633, |
|
"grad_norm": 28.869354248046875, |
|
"learning_rate": 2.97443447037702e-05, |
|
"loss": 12.8979, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.6974865350089767, |
|
"grad_norm": 26.356056213378906, |
|
"learning_rate": 2.973836026331538e-05, |
|
"loss": 13.2308, |
|
"step": 15025 |
|
}, |
|
{ |
|
"epoch": 2.7019748653500897, |
|
"grad_norm": 26.76312828063965, |
|
"learning_rate": 2.9732375822860563e-05, |
|
"loss": 13.4595, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.706463195691203, |
|
"grad_norm": 26.2951717376709, |
|
"learning_rate": 2.9726391382405746e-05, |
|
"loss": 12.9706, |
|
"step": 15075 |
|
}, |
|
{ |
|
"epoch": 2.710951526032316, |
|
"grad_norm": 26.80390739440918, |
|
"learning_rate": 2.9720406941950928e-05, |
|
"loss": 13.0039, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.715439856373429, |
|
"grad_norm": 27.611963272094727, |
|
"learning_rate": 2.971442250149611e-05, |
|
"loss": 12.9145, |
|
"step": 15125 |
|
}, |
|
{ |
|
"epoch": 2.719928186714542, |
|
"grad_norm": 28.494123458862305, |
|
"learning_rate": 2.9708438061041293e-05, |
|
"loss": 13.2181, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.7244165170556554, |
|
"grad_norm": 26.697126388549805, |
|
"learning_rate": 2.970245362058648e-05, |
|
"loss": 13.3677, |
|
"step": 15175 |
|
}, |
|
{ |
|
"epoch": 2.7289048473967683, |
|
"grad_norm": 27.672060012817383, |
|
"learning_rate": 2.9696469180131657e-05, |
|
"loss": 12.834, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.7333931777378817, |
|
"grad_norm": 28.86951446533203, |
|
"learning_rate": 2.969048473967684e-05, |
|
"loss": 13.2779, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 2.7378815080789947, |
|
"grad_norm": 26.693321228027344, |
|
"learning_rate": 2.9684500299222022e-05, |
|
"loss": 13.1227, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.7423698384201076, |
|
"grad_norm": 27.52298927307129, |
|
"learning_rate": 2.9678515858767205e-05, |
|
"loss": 12.6953, |
|
"step": 15275 |
|
}, |
|
{ |
|
"epoch": 2.7468581687612206, |
|
"grad_norm": 26.291044235229492, |
|
"learning_rate": 2.9672531418312387e-05, |
|
"loss": 13.1016, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.751346499102334, |
|
"grad_norm": 27.562095642089844, |
|
"learning_rate": 2.9666546977857573e-05, |
|
"loss": 13.1168, |
|
"step": 15325 |
|
}, |
|
{ |
|
"epoch": 2.755834829443447, |
|
"grad_norm": 26.268095016479492, |
|
"learning_rate": 2.9660562537402755e-05, |
|
"loss": 13.0068, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.7603231597845603, |
|
"grad_norm": 27.220062255859375, |
|
"learning_rate": 2.9654578096947937e-05, |
|
"loss": 12.8645, |
|
"step": 15375 |
|
}, |
|
{ |
|
"epoch": 2.7648114901256733, |
|
"grad_norm": 27.46253776550293, |
|
"learning_rate": 2.9648593656493116e-05, |
|
"loss": 13.3971, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.7692998204667862, |
|
"grad_norm": 26.30550193786621, |
|
"learning_rate": 2.96426092160383e-05, |
|
"loss": 12.8522, |
|
"step": 15425 |
|
}, |
|
{ |
|
"epoch": 2.773788150807899, |
|
"grad_norm": 27.612834930419922, |
|
"learning_rate": 2.9636624775583484e-05, |
|
"loss": 13.5025, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.7782764811490126, |
|
"grad_norm": 26.21208953857422, |
|
"learning_rate": 2.9630640335128667e-05, |
|
"loss": 12.552, |
|
"step": 15475 |
|
}, |
|
{ |
|
"epoch": 2.7827648114901256, |
|
"grad_norm": 27.3443546295166, |
|
"learning_rate": 2.962465589467385e-05, |
|
"loss": 13.1858, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.787253141831239, |
|
"grad_norm": 27.457931518554688, |
|
"learning_rate": 2.961867145421903e-05, |
|
"loss": 13.2157, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 2.791741472172352, |
|
"grad_norm": 28.71920394897461, |
|
"learning_rate": 2.9612687013764214e-05, |
|
"loss": 13.213, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.796229802513465, |
|
"grad_norm": 25.985244750976562, |
|
"learning_rate": 2.9606702573309396e-05, |
|
"loss": 12.9935, |
|
"step": 15575 |
|
}, |
|
{ |
|
"epoch": 2.800718132854578, |
|
"grad_norm": 25.949575424194336, |
|
"learning_rate": 2.960071813285458e-05, |
|
"loss": 12.9924, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.8052064631956912, |
|
"grad_norm": 26.65997314453125, |
|
"learning_rate": 2.959473369239976e-05, |
|
"loss": 13.1186, |
|
"step": 15625 |
|
}, |
|
{ |
|
"epoch": 2.809694793536804, |
|
"grad_norm": 26.854761123657227, |
|
"learning_rate": 2.9588749251944943e-05, |
|
"loss": 13.0178, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.8141831238779176, |
|
"grad_norm": 26.749004364013672, |
|
"learning_rate": 2.9582764811490126e-05, |
|
"loss": 13.0751, |
|
"step": 15675 |
|
}, |
|
{ |
|
"epoch": 2.8186714542190305, |
|
"grad_norm": 26.282302856445312, |
|
"learning_rate": 2.9576780371035308e-05, |
|
"loss": 12.8906, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.8231597845601435, |
|
"grad_norm": 26.395767211914062, |
|
"learning_rate": 2.957079593058049e-05, |
|
"loss": 13.3278, |
|
"step": 15725 |
|
}, |
|
{ |
|
"epoch": 2.827648114901257, |
|
"grad_norm": 26.95943832397461, |
|
"learning_rate": 2.9564811490125676e-05, |
|
"loss": 12.9831, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.83213644524237, |
|
"grad_norm": 28.028095245361328, |
|
"learning_rate": 2.955882704967086e-05, |
|
"loss": 12.932, |
|
"step": 15775 |
|
}, |
|
{ |
|
"epoch": 2.836624775583483, |
|
"grad_norm": 27.706012725830078, |
|
"learning_rate": 2.9552842609216038e-05, |
|
"loss": 12.7754, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.841113105924596, |
|
"grad_norm": 27.81289291381836, |
|
"learning_rate": 2.954685816876122e-05, |
|
"loss": 12.8489, |
|
"step": 15825 |
|
}, |
|
{ |
|
"epoch": 2.845601436265709, |
|
"grad_norm": 26.71699333190918, |
|
"learning_rate": 2.9540873728306402e-05, |
|
"loss": 12.6091, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.850089766606822, |
|
"grad_norm": 65.94219207763672, |
|
"learning_rate": 2.9534889287851588e-05, |
|
"loss": 13.0053, |
|
"step": 15875 |
|
}, |
|
{ |
|
"epoch": 2.8545780969479355, |
|
"grad_norm": 32.33103561401367, |
|
"learning_rate": 2.952890484739677e-05, |
|
"loss": 12.4834, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.8590664272890485, |
|
"grad_norm": 25.59375, |
|
"learning_rate": 2.9522920406941953e-05, |
|
"loss": 13.0441, |
|
"step": 15925 |
|
}, |
|
{ |
|
"epoch": 2.8635547576301614, |
|
"grad_norm": 26.32273292541504, |
|
"learning_rate": 2.9516935966487135e-05, |
|
"loss": 12.701, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.868043087971275, |
|
"grad_norm": 27.84789276123047, |
|
"learning_rate": 2.9510951526032317e-05, |
|
"loss": 13.1712, |
|
"step": 15975 |
|
}, |
|
{ |
|
"epoch": 2.872531418312388, |
|
"grad_norm": 27.111125946044922, |
|
"learning_rate": 2.9504967085577496e-05, |
|
"loss": 12.7789, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.8770197486535007, |
|
"grad_norm": 26.045406341552734, |
|
"learning_rate": 2.9498982645122682e-05, |
|
"loss": 12.7077, |
|
"step": 16025 |
|
}, |
|
{ |
|
"epoch": 2.881508078994614, |
|
"grad_norm": 26.169029235839844, |
|
"learning_rate": 2.9492998204667865e-05, |
|
"loss": 13.0239, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.885996409335727, |
|
"grad_norm": 26.920217514038086, |
|
"learning_rate": 2.9487013764213047e-05, |
|
"loss": 12.978, |
|
"step": 16075 |
|
}, |
|
{ |
|
"epoch": 2.89048473967684, |
|
"grad_norm": 26.622011184692383, |
|
"learning_rate": 2.948102932375823e-05, |
|
"loss": 12.825, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.8949730700179535, |
|
"grad_norm": 26.462886810302734, |
|
"learning_rate": 2.947504488330341e-05, |
|
"loss": 12.8208, |
|
"step": 16125 |
|
}, |
|
{ |
|
"epoch": 2.8994614003590664, |
|
"grad_norm": 26.220985412597656, |
|
"learning_rate": 2.9469060442848594e-05, |
|
"loss": 13.3692, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.9039497307001794, |
|
"grad_norm": 27.57528305053711, |
|
"learning_rate": 2.946307600239378e-05, |
|
"loss": 12.7889, |
|
"step": 16175 |
|
}, |
|
{ |
|
"epoch": 2.9084380610412928, |
|
"grad_norm": 27.193159103393555, |
|
"learning_rate": 2.945709156193896e-05, |
|
"loss": 12.9411, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.9129263913824057, |
|
"grad_norm": 28.573688507080078, |
|
"learning_rate": 2.945110712148414e-05, |
|
"loss": 13.0207, |
|
"step": 16225 |
|
}, |
|
{ |
|
"epoch": 2.917414721723519, |
|
"grad_norm": 27.21942710876465, |
|
"learning_rate": 2.9445122681029323e-05, |
|
"loss": 12.8121, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.921903052064632, |
|
"grad_norm": 25.42641258239746, |
|
"learning_rate": 2.9439138240574506e-05, |
|
"loss": 13.0534, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 2.926391382405745, |
|
"grad_norm": 26.955564498901367, |
|
"learning_rate": 2.943315380011969e-05, |
|
"loss": 12.6557, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.930879712746858, |
|
"grad_norm": 26.791296005249023, |
|
"learning_rate": 2.9427169359664874e-05, |
|
"loss": 12.6982, |
|
"step": 16325 |
|
}, |
|
{ |
|
"epoch": 2.9353680430879714, |
|
"grad_norm": 27.43919563293457, |
|
"learning_rate": 2.9421184919210056e-05, |
|
"loss": 13.1458, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.9398563734290843, |
|
"grad_norm": 26.005870819091797, |
|
"learning_rate": 2.941520047875524e-05, |
|
"loss": 13.3099, |
|
"step": 16375 |
|
}, |
|
{ |
|
"epoch": 2.9443447037701977, |
|
"grad_norm": 26.166765213012695, |
|
"learning_rate": 2.9409216038300418e-05, |
|
"loss": 12.7118, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.9488330341113107, |
|
"grad_norm": 26.198945999145508, |
|
"learning_rate": 2.94032315978456e-05, |
|
"loss": 13.0126, |
|
"step": 16425 |
|
}, |
|
{ |
|
"epoch": 2.9533213644524237, |
|
"grad_norm": 27.599916458129883, |
|
"learning_rate": 2.9397247157390786e-05, |
|
"loss": 12.4839, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.9578096947935366, |
|
"grad_norm": 26.379606246948242, |
|
"learning_rate": 2.9391262716935968e-05, |
|
"loss": 13.1094, |
|
"step": 16475 |
|
}, |
|
{ |
|
"epoch": 2.96229802513465, |
|
"grad_norm": 26.30647850036621, |
|
"learning_rate": 2.938527827648115e-05, |
|
"loss": 13.0026, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.966786355475763, |
|
"grad_norm": 27.161256790161133, |
|
"learning_rate": 2.9379293836026333e-05, |
|
"loss": 12.795, |
|
"step": 16525 |
|
}, |
|
{ |
|
"epoch": 2.9712746858168764, |
|
"grad_norm": 27.510034561157227, |
|
"learning_rate": 2.9373309395571515e-05, |
|
"loss": 12.4387, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.9757630161579893, |
|
"grad_norm": 28.14108657836914, |
|
"learning_rate": 2.9367324955116697e-05, |
|
"loss": 13.1286, |
|
"step": 16575 |
|
}, |
|
{ |
|
"epoch": 2.9802513464991023, |
|
"grad_norm": 28.018766403198242, |
|
"learning_rate": 2.936134051466188e-05, |
|
"loss": 12.9119, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.9847396768402152, |
|
"grad_norm": 27.52519416809082, |
|
"learning_rate": 2.9355356074207062e-05, |
|
"loss": 13.2577, |
|
"step": 16625 |
|
}, |
|
{ |
|
"epoch": 2.9892280071813286, |
|
"grad_norm": 26.498538970947266, |
|
"learning_rate": 2.9349371633752245e-05, |
|
"loss": 12.8444, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.9937163375224416, |
|
"grad_norm": 27.386394500732422, |
|
"learning_rate": 2.9343387193297427e-05, |
|
"loss": 12.9318, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 2.998204667863555, |
|
"grad_norm": 29.109481811523438, |
|
"learning_rate": 2.933740275284261e-05, |
|
"loss": 13.3042, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.07543963492639337, |
|
"eval_f1_macro": 0.005658449303781104, |
|
"eval_f1_micro": 0.07543963492639337, |
|
"eval_f1_weighted": 0.04143719976295692, |
|
"eval_loss": 7.06182861328125, |
|
"eval_precision_macro": 0.005690112768572151, |
|
"eval_precision_micro": 0.07543963492639337, |
|
"eval_precision_weighted": 0.0367941063687332, |
|
"eval_recall_macro": 0.009608965585832425, |
|
"eval_recall_micro": 0.07543963492639337, |
|
"eval_recall_weighted": 0.07543963492639337, |
|
"eval_runtime": 86.5339, |
|
"eval_samples_per_second": 605.231, |
|
"eval_steps_per_second": 18.917, |
|
"step": 16710 |
|
}, |
|
{ |
|
"epoch": 3.002692998204668, |
|
"grad_norm": 26.483478546142578, |
|
"learning_rate": 2.933141831238779e-05, |
|
"loss": 11.5651, |
|
"step": 16725 |
|
}, |
|
{ |
|
"epoch": 3.007181328545781, |
|
"grad_norm": 28.335594177246094, |
|
"learning_rate": 2.9325433871932977e-05, |
|
"loss": 11.4552, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 3.011669658886894, |
|
"grad_norm": 26.723102569580078, |
|
"learning_rate": 2.931944943147816e-05, |
|
"loss": 11.1541, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 3.0161579892280073, |
|
"grad_norm": 28.930675506591797, |
|
"learning_rate": 2.931346499102334e-05, |
|
"loss": 11.2597, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.02064631956912, |
|
"grad_norm": 30.39067268371582, |
|
"learning_rate": 2.930748055056852e-05, |
|
"loss": 11.1364, |
|
"step": 16825 |
|
}, |
|
{ |
|
"epoch": 3.025134649910233, |
|
"grad_norm": 29.515583038330078, |
|
"learning_rate": 2.9301496110113703e-05, |
|
"loss": 11.1398, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 3.0296229802513466, |
|
"grad_norm": 29.533111572265625, |
|
"learning_rate": 2.929551166965889e-05, |
|
"loss": 11.3146, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 3.0341113105924595, |
|
"grad_norm": 28.315011978149414, |
|
"learning_rate": 2.928952722920407e-05, |
|
"loss": 10.9237, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.0385996409335725, |
|
"grad_norm": 27.643081665039062, |
|
"learning_rate": 2.9283542788749254e-05, |
|
"loss": 11.0213, |
|
"step": 16925 |
|
}, |
|
{ |
|
"epoch": 3.043087971274686, |
|
"grad_norm": 30.351112365722656, |
|
"learning_rate": 2.9277558348294436e-05, |
|
"loss": 11.2419, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 3.047576301615799, |
|
"grad_norm": 31.334726333618164, |
|
"learning_rate": 2.927157390783962e-05, |
|
"loss": 11.1816, |
|
"step": 16975 |
|
}, |
|
{ |
|
"epoch": 3.0520646319569122, |
|
"grad_norm": 28.574382781982422, |
|
"learning_rate": 2.9265589467384798e-05, |
|
"loss": 10.8629, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.056552962298025, |
|
"grad_norm": 30.646869659423828, |
|
"learning_rate": 2.9259605026929983e-05, |
|
"loss": 11.0935, |
|
"step": 17025 |
|
}, |
|
{ |
|
"epoch": 3.061041292639138, |
|
"grad_norm": 38.04651641845703, |
|
"learning_rate": 2.9253620586475166e-05, |
|
"loss": 11.3436, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 3.0655296229802516, |
|
"grad_norm": 29.01982307434082, |
|
"learning_rate": 2.9247636146020348e-05, |
|
"loss": 11.0168, |
|
"step": 17075 |
|
}, |
|
{ |
|
"epoch": 3.0700179533213645, |
|
"grad_norm": 31.702123641967773, |
|
"learning_rate": 2.924165170556553e-05, |
|
"loss": 11.1176, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.0745062836624775, |
|
"grad_norm": 31.976844787597656, |
|
"learning_rate": 2.9235667265110713e-05, |
|
"loss": 11.5311, |
|
"step": 17125 |
|
}, |
|
{ |
|
"epoch": 3.078994614003591, |
|
"grad_norm": 29.563053131103516, |
|
"learning_rate": 2.9229682824655895e-05, |
|
"loss": 10.9299, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 3.083482944344704, |
|
"grad_norm": 31.436248779296875, |
|
"learning_rate": 2.9223698384201077e-05, |
|
"loss": 11.1627, |
|
"step": 17175 |
|
}, |
|
{ |
|
"epoch": 3.087971274685817, |
|
"grad_norm": 30.475858688354492, |
|
"learning_rate": 2.921771394374626e-05, |
|
"loss": 11.3677, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 3.09245960502693, |
|
"grad_norm": 29.236719131469727, |
|
"learning_rate": 2.9211729503291442e-05, |
|
"loss": 11.2419, |
|
"step": 17225 |
|
}, |
|
{ |
|
"epoch": 3.096947935368043, |
|
"grad_norm": 32.13743209838867, |
|
"learning_rate": 2.9205745062836625e-05, |
|
"loss": 11.1695, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 3.101436265709156, |
|
"grad_norm": 31.184057235717773, |
|
"learning_rate": 2.9199760622381807e-05, |
|
"loss": 10.8847, |
|
"step": 17275 |
|
}, |
|
{ |
|
"epoch": 3.1059245960502695, |
|
"grad_norm": 35.40129852294922, |
|
"learning_rate": 2.9193776181926993e-05, |
|
"loss": 11.4488, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 3.1104129263913824, |
|
"grad_norm": 31.04747772216797, |
|
"learning_rate": 2.9187791741472175e-05, |
|
"loss": 11.3415, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 3.1149012567324954, |
|
"grad_norm": 30.742427825927734, |
|
"learning_rate": 2.9181807301017357e-05, |
|
"loss": 11.0896, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 3.119389587073609, |
|
"grad_norm": 29.326475143432617, |
|
"learning_rate": 2.9175822860562536e-05, |
|
"loss": 11.2907, |
|
"step": 17375 |
|
}, |
|
{ |
|
"epoch": 3.1238779174147218, |
|
"grad_norm": 33.5991325378418, |
|
"learning_rate": 2.916983842010772e-05, |
|
"loss": 11.0482, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.1283662477558347, |
|
"grad_norm": 32.41011428833008, |
|
"learning_rate": 2.91638539796529e-05, |
|
"loss": 10.9863, |
|
"step": 17425 |
|
}, |
|
{ |
|
"epoch": 3.132854578096948, |
|
"grad_norm": 29.50647735595703, |
|
"learning_rate": 2.9157869539198087e-05, |
|
"loss": 11.4484, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 3.137342908438061, |
|
"grad_norm": 30.07097625732422, |
|
"learning_rate": 2.915188509874327e-05, |
|
"loss": 10.8256, |
|
"step": 17475 |
|
}, |
|
{ |
|
"epoch": 3.141831238779174, |
|
"grad_norm": 28.87929344177246, |
|
"learning_rate": 2.914590065828845e-05, |
|
"loss": 10.7541, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.1463195691202874, |
|
"grad_norm": 34.39598083496094, |
|
"learning_rate": 2.9139916217833634e-05, |
|
"loss": 11.3925, |
|
"step": 17525 |
|
}, |
|
{ |
|
"epoch": 3.1508078994614004, |
|
"grad_norm": 30.967477798461914, |
|
"learning_rate": 2.9133931777378816e-05, |
|
"loss": 11.0977, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 3.1552962298025133, |
|
"grad_norm": 32.6968879699707, |
|
"learning_rate": 2.9127947336923995e-05, |
|
"loss": 10.9783, |
|
"step": 17575 |
|
}, |
|
{ |
|
"epoch": 3.1597845601436267, |
|
"grad_norm": 30.194917678833008, |
|
"learning_rate": 2.9122202274087372e-05, |
|
"loss": 11.0674, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 3.1642728904847397, |
|
"grad_norm": 29.895421981811523, |
|
"learning_rate": 2.9116217833632558e-05, |
|
"loss": 11.3506, |
|
"step": 17625 |
|
}, |
|
{ |
|
"epoch": 3.1687612208258527, |
|
"grad_norm": 30.785797119140625, |
|
"learning_rate": 2.911023339317774e-05, |
|
"loss": 11.044, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 3.173249551166966, |
|
"grad_norm": 31.407691955566406, |
|
"learning_rate": 2.9104248952722923e-05, |
|
"loss": 10.9295, |
|
"step": 17675 |
|
}, |
|
{ |
|
"epoch": 3.177737881508079, |
|
"grad_norm": 29.658754348754883, |
|
"learning_rate": 2.9098264512268102e-05, |
|
"loss": 11.2406, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.182226211849192, |
|
"grad_norm": 30.043371200561523, |
|
"learning_rate": 2.9092280071813284e-05, |
|
"loss": 10.9478, |
|
"step": 17725 |
|
}, |
|
{ |
|
"epoch": 3.1867145421903054, |
|
"grad_norm": 31.360021591186523, |
|
"learning_rate": 2.908629563135847e-05, |
|
"loss": 10.8657, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 3.1912028725314183, |
|
"grad_norm": 31.64422035217285, |
|
"learning_rate": 2.9080311190903652e-05, |
|
"loss": 10.8207, |
|
"step": 17775 |
|
}, |
|
{ |
|
"epoch": 3.1956912028725313, |
|
"grad_norm": 30.953533172607422, |
|
"learning_rate": 2.9074326750448835e-05, |
|
"loss": 11.0474, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 3.2001795332136447, |
|
"grad_norm": 29.29545783996582, |
|
"learning_rate": 2.9068342309994017e-05, |
|
"loss": 11.4172, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 3.2046678635547576, |
|
"grad_norm": 28.73203468322754, |
|
"learning_rate": 2.90623578695392e-05, |
|
"loss": 11.0947, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 3.2091561938958706, |
|
"grad_norm": 29.092605590820312, |
|
"learning_rate": 2.905637342908438e-05, |
|
"loss": 10.7874, |
|
"step": 17875 |
|
}, |
|
{ |
|
"epoch": 3.213644524236984, |
|
"grad_norm": 30.759441375732422, |
|
"learning_rate": 2.9050388988629564e-05, |
|
"loss": 11.1644, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 3.218132854578097, |
|
"grad_norm": 31.628297805786133, |
|
"learning_rate": 2.9044404548174746e-05, |
|
"loss": 11.6035, |
|
"step": 17925 |
|
}, |
|
{ |
|
"epoch": 3.22262118491921, |
|
"grad_norm": 32.346553802490234, |
|
"learning_rate": 2.903842010771993e-05, |
|
"loss": 10.9969, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 3.2271095152603233, |
|
"grad_norm": 29.345993041992188, |
|
"learning_rate": 2.903243566726511e-05, |
|
"loss": 11.2449, |
|
"step": 17975 |
|
}, |
|
{ |
|
"epoch": 3.2315978456014363, |
|
"grad_norm": 36.96156311035156, |
|
"learning_rate": 2.9026451226810293e-05, |
|
"loss": 11.157, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.236086175942549, |
|
"grad_norm": 31.43854522705078, |
|
"learning_rate": 2.9020466786355476e-05, |
|
"loss": 11.2116, |
|
"step": 18025 |
|
}, |
|
{ |
|
"epoch": 3.2405745062836626, |
|
"grad_norm": 31.491018295288086, |
|
"learning_rate": 2.901448234590066e-05, |
|
"loss": 10.9847, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 3.2450628366247756, |
|
"grad_norm": 31.342721939086914, |
|
"learning_rate": 2.900849790544584e-05, |
|
"loss": 11.0106, |
|
"step": 18075 |
|
}, |
|
{ |
|
"epoch": 3.2495511669658885, |
|
"grad_norm": 31.182981491088867, |
|
"learning_rate": 2.9002513464991023e-05, |
|
"loss": 11.2681, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 3.254039497307002, |
|
"grad_norm": 31.756725311279297, |
|
"learning_rate": 2.8996529024536205e-05, |
|
"loss": 11.1072, |
|
"step": 18125 |
|
}, |
|
{ |
|
"epoch": 3.258527827648115, |
|
"grad_norm": 28.509653091430664, |
|
"learning_rate": 2.8990544584081388e-05, |
|
"loss": 11.184, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 3.263016157989228, |
|
"grad_norm": 31.49736785888672, |
|
"learning_rate": 2.8984560143626573e-05, |
|
"loss": 11.2929, |
|
"step": 18175 |
|
}, |
|
{ |
|
"epoch": 3.2675044883303412, |
|
"grad_norm": 29.734132766723633, |
|
"learning_rate": 2.8978575703171756e-05, |
|
"loss": 11.1909, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 3.271992818671454, |
|
"grad_norm": 31.356077194213867, |
|
"learning_rate": 2.8972591262716938e-05, |
|
"loss": 11.468, |
|
"step": 18225 |
|
}, |
|
{ |
|
"epoch": 3.276481149012567, |
|
"grad_norm": 28.490388870239258, |
|
"learning_rate": 2.896660682226212e-05, |
|
"loss": 11.3047, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 3.2809694793536806, |
|
"grad_norm": 30.780508041381836, |
|
"learning_rate": 2.89606223818073e-05, |
|
"loss": 11.3076, |
|
"step": 18275 |
|
}, |
|
{ |
|
"epoch": 3.2854578096947935, |
|
"grad_norm": 29.654769897460938, |
|
"learning_rate": 2.8954637941352482e-05, |
|
"loss": 11.0864, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.2899461400359065, |
|
"grad_norm": 31.67804718017578, |
|
"learning_rate": 2.8948653500897668e-05, |
|
"loss": 11.2724, |
|
"step": 18325 |
|
}, |
|
{ |
|
"epoch": 3.29443447037702, |
|
"grad_norm": 29.71087646484375, |
|
"learning_rate": 2.894266906044285e-05, |
|
"loss": 11.3839, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 3.298922800718133, |
|
"grad_norm": 30.625585556030273, |
|
"learning_rate": 2.8936684619988032e-05, |
|
"loss": 11.1948, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 3.3034111310592458, |
|
"grad_norm": 33.840885162353516, |
|
"learning_rate": 2.8930700179533215e-05, |
|
"loss": 11.5731, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 3.307899461400359, |
|
"grad_norm": 31.30687713623047, |
|
"learning_rate": 2.8924715739078397e-05, |
|
"loss": 11.3931, |
|
"step": 18425 |
|
}, |
|
{ |
|
"epoch": 3.312387791741472, |
|
"grad_norm": 30.306846618652344, |
|
"learning_rate": 2.891873129862358e-05, |
|
"loss": 11.3634, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 3.316876122082585, |
|
"grad_norm": 31.10429573059082, |
|
"learning_rate": 2.891274685816876e-05, |
|
"loss": 11.383, |
|
"step": 18475 |
|
}, |
|
{ |
|
"epoch": 3.3213644524236985, |
|
"grad_norm": 31.466232299804688, |
|
"learning_rate": 2.8906762417713944e-05, |
|
"loss": 11.3213, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.3258527827648114, |
|
"grad_norm": 31.928709030151367, |
|
"learning_rate": 2.8900777977259126e-05, |
|
"loss": 11.4474, |
|
"step": 18525 |
|
}, |
|
{ |
|
"epoch": 3.3303411131059244, |
|
"grad_norm": 31.45096778869629, |
|
"learning_rate": 2.889479353680431e-05, |
|
"loss": 11.3677, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 3.334829443447038, |
|
"grad_norm": 31.321134567260742, |
|
"learning_rate": 2.888880909634949e-05, |
|
"loss": 11.0314, |
|
"step": 18575 |
|
}, |
|
{ |
|
"epoch": 3.3393177737881508, |
|
"grad_norm": 30.80310821533203, |
|
"learning_rate": 2.8882824655894673e-05, |
|
"loss": 11.3432, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.343806104129264, |
|
"grad_norm": 33.093849182128906, |
|
"learning_rate": 2.887684021543986e-05, |
|
"loss": 11.4452, |
|
"step": 18625 |
|
}, |
|
{ |
|
"epoch": 3.348294434470377, |
|
"grad_norm": 30.316701889038086, |
|
"learning_rate": 2.887085577498504e-05, |
|
"loss": 11.1295, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 3.35278276481149, |
|
"grad_norm": 30.940135955810547, |
|
"learning_rate": 2.886487133453022e-05, |
|
"loss": 11.2617, |
|
"step": 18675 |
|
}, |
|
{ |
|
"epoch": 3.357271095152603, |
|
"grad_norm": 28.96495246887207, |
|
"learning_rate": 2.8858886894075403e-05, |
|
"loss": 11.4463, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 3.3617594254937164, |
|
"grad_norm": 30.461139678955078, |
|
"learning_rate": 2.8852902453620585e-05, |
|
"loss": 11.3384, |
|
"step": 18725 |
|
}, |
|
{ |
|
"epoch": 3.3662477558348294, |
|
"grad_norm": 30.79012680053711, |
|
"learning_rate": 2.884691801316577e-05, |
|
"loss": 11.3678, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 3.370736086175943, |
|
"grad_norm": 31.620222091674805, |
|
"learning_rate": 2.8840933572710953e-05, |
|
"loss": 11.0789, |
|
"step": 18775 |
|
}, |
|
{ |
|
"epoch": 3.3752244165170557, |
|
"grad_norm": 29.551908493041992, |
|
"learning_rate": 2.8834949132256136e-05, |
|
"loss": 11.2116, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 3.3797127468581687, |
|
"grad_norm": 31.130882263183594, |
|
"learning_rate": 2.8828964691801318e-05, |
|
"loss": 11.4108, |
|
"step": 18825 |
|
}, |
|
{ |
|
"epoch": 3.3842010771992816, |
|
"grad_norm": 30.52980613708496, |
|
"learning_rate": 2.88229802513465e-05, |
|
"loss": 11.2848, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 3.388689407540395, |
|
"grad_norm": 31.423954010009766, |
|
"learning_rate": 2.881699581089168e-05, |
|
"loss": 11.1228, |
|
"step": 18875 |
|
}, |
|
{ |
|
"epoch": 3.393177737881508, |
|
"grad_norm": 30.197856903076172, |
|
"learning_rate": 2.8811011370436865e-05, |
|
"loss": 10.955, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.3976660682226214, |
|
"grad_norm": 29.411909103393555, |
|
"learning_rate": 2.8805026929982048e-05, |
|
"loss": 11.4046, |
|
"step": 18925 |
|
}, |
|
{ |
|
"epoch": 3.4021543985637344, |
|
"grad_norm": 30.500823974609375, |
|
"learning_rate": 2.879904248952723e-05, |
|
"loss": 11.3617, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 3.4066427289048473, |
|
"grad_norm": 31.399059295654297, |
|
"learning_rate": 2.8793058049072412e-05, |
|
"loss": 11.4427, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 3.4111310592459603, |
|
"grad_norm": 30.890851974487305, |
|
"learning_rate": 2.8787073608617595e-05, |
|
"loss": 11.4184, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.4156193895870737, |
|
"grad_norm": 31.299579620361328, |
|
"learning_rate": 2.8781089168162777e-05, |
|
"loss": 11.3553, |
|
"step": 19025 |
|
}, |
|
{ |
|
"epoch": 3.4201077199281866, |
|
"grad_norm": 30.379802703857422, |
|
"learning_rate": 2.8775104727707963e-05, |
|
"loss": 11.2289, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 3.4245960502693, |
|
"grad_norm": 30.748916625976562, |
|
"learning_rate": 2.8769120287253142e-05, |
|
"loss": 10.9974, |
|
"step": 19075 |
|
}, |
|
{ |
|
"epoch": 3.429084380610413, |
|
"grad_norm": 30.164533615112305, |
|
"learning_rate": 2.8763135846798324e-05, |
|
"loss": 11.2552, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 3.433572710951526, |
|
"grad_norm": 30.67738914489746, |
|
"learning_rate": 2.8757151406343506e-05, |
|
"loss": 11.514, |
|
"step": 19125 |
|
}, |
|
{ |
|
"epoch": 3.438061041292639, |
|
"grad_norm": 31.51483154296875, |
|
"learning_rate": 2.875116696588869e-05, |
|
"loss": 11.4153, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 3.4425493716337523, |
|
"grad_norm": 32.316654205322266, |
|
"learning_rate": 2.8745182525433875e-05, |
|
"loss": 11.5824, |
|
"step": 19175 |
|
}, |
|
{ |
|
"epoch": 3.4470377019748653, |
|
"grad_norm": 31.41953468322754, |
|
"learning_rate": 2.8739198084979057e-05, |
|
"loss": 11.259, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.4515260323159787, |
|
"grad_norm": 32.805870056152344, |
|
"learning_rate": 2.873321364452424e-05, |
|
"loss": 11.2999, |
|
"step": 19225 |
|
}, |
|
{ |
|
"epoch": 3.4560143626570916, |
|
"grad_norm": 32.010826110839844, |
|
"learning_rate": 2.872722920406942e-05, |
|
"loss": 10.9906, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 3.4605026929982046, |
|
"grad_norm": 33.595767974853516, |
|
"learning_rate": 2.87212447636146e-05, |
|
"loss": 11.2258, |
|
"step": 19275 |
|
}, |
|
{ |
|
"epoch": 3.464991023339318, |
|
"grad_norm": 31.329288482666016, |
|
"learning_rate": 2.8715260323159783e-05, |
|
"loss": 11.2326, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 3.469479353680431, |
|
"grad_norm": 31.369930267333984, |
|
"learning_rate": 2.870927588270497e-05, |
|
"loss": 11.2166, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 3.473967684021544, |
|
"grad_norm": 30.896013259887695, |
|
"learning_rate": 2.870329144225015e-05, |
|
"loss": 11.1363, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 3.4784560143626573, |
|
"grad_norm": 31.08727264404297, |
|
"learning_rate": 2.8697307001795333e-05, |
|
"loss": 11.4698, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 3.4829443447037702, |
|
"grad_norm": 28.412425994873047, |
|
"learning_rate": 2.8691322561340516e-05, |
|
"loss": 11.0108, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 3.487432675044883, |
|
"grad_norm": 31.50676155090332, |
|
"learning_rate": 2.8685338120885698e-05, |
|
"loss": 10.8741, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 3.4919210053859966, |
|
"grad_norm": 28.88292694091797, |
|
"learning_rate": 2.867935368043088e-05, |
|
"loss": 11.1386, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 3.4964093357271095, |
|
"grad_norm": 30.06254005432129, |
|
"learning_rate": 2.8673369239976063e-05, |
|
"loss": 11.1929, |
|
"step": 19475 |
|
}, |
|
{ |
|
"epoch": 3.5008976660682225, |
|
"grad_norm": 34.148311614990234, |
|
"learning_rate": 2.8667384799521245e-05, |
|
"loss": 11.339, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.505385996409336, |
|
"grad_norm": 33.28491973876953, |
|
"learning_rate": 2.8661400359066428e-05, |
|
"loss": 11.1095, |
|
"step": 19525 |
|
}, |
|
{ |
|
"epoch": 3.509874326750449, |
|
"grad_norm": 33.306026458740234, |
|
"learning_rate": 2.865541591861161e-05, |
|
"loss": 11.0814, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 3.514362657091562, |
|
"grad_norm": 31.115873336791992, |
|
"learning_rate": 2.8649431478156792e-05, |
|
"loss": 11.2325, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 3.5188509874326748, |
|
"grad_norm": 31.66822052001953, |
|
"learning_rate": 2.8643447037701978e-05, |
|
"loss": 11.1676, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.523339317773788, |
|
"grad_norm": 29.544313430786133, |
|
"learning_rate": 2.863746259724716e-05, |
|
"loss": 11.1635, |
|
"step": 19625 |
|
}, |
|
{ |
|
"epoch": 3.527827648114901, |
|
"grad_norm": 34.17205810546875, |
|
"learning_rate": 2.8631478156792343e-05, |
|
"loss": 11.7224, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 3.5323159784560145, |
|
"grad_norm": 32.336727142333984, |
|
"learning_rate": 2.8625493716337522e-05, |
|
"loss": 11.3425, |
|
"step": 19675 |
|
}, |
|
{ |
|
"epoch": 3.5368043087971275, |
|
"grad_norm": 32.560447692871094, |
|
"learning_rate": 2.8619509275882704e-05, |
|
"loss": 10.9515, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 3.5412926391382404, |
|
"grad_norm": 30.652273178100586, |
|
"learning_rate": 2.8613524835427886e-05, |
|
"loss": 11.2001, |
|
"step": 19725 |
|
}, |
|
{ |
|
"epoch": 3.545780969479354, |
|
"grad_norm": 30.610469818115234, |
|
"learning_rate": 2.8607540394973072e-05, |
|
"loss": 11.5682, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 3.550269299820467, |
|
"grad_norm": 30.808074951171875, |
|
"learning_rate": 2.8601555954518255e-05, |
|
"loss": 11.453, |
|
"step": 19775 |
|
}, |
|
{ |
|
"epoch": 3.5547576301615798, |
|
"grad_norm": 31.674997329711914, |
|
"learning_rate": 2.8595571514063437e-05, |
|
"loss": 11.2786, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.559245960502693, |
|
"grad_norm": 30.62029457092285, |
|
"learning_rate": 2.858958707360862e-05, |
|
"loss": 11.7177, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 3.563734290843806, |
|
"grad_norm": 31.6383113861084, |
|
"learning_rate": 2.8583602633153798e-05, |
|
"loss": 11.2354, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 3.568222621184919, |
|
"grad_norm": 32.37112045288086, |
|
"learning_rate": 2.857761819269898e-05, |
|
"loss": 11.2081, |
|
"step": 19875 |
|
}, |
|
{ |
|
"epoch": 3.5727109515260325, |
|
"grad_norm": 29.828189849853516, |
|
"learning_rate": 2.8571873129862358e-05, |
|
"loss": 11.0898, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 3.5771992818671454, |
|
"grad_norm": 30.648529052734375, |
|
"learning_rate": 2.8565888689407543e-05, |
|
"loss": 10.8965, |
|
"step": 19925 |
|
}, |
|
{ |
|
"epoch": 3.5816876122082584, |
|
"grad_norm": 41.849483489990234, |
|
"learning_rate": 2.8559904248952726e-05, |
|
"loss": 11.0246, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 3.5861759425493718, |
|
"grad_norm": 31.274961471557617, |
|
"learning_rate": 2.8553919808497905e-05, |
|
"loss": 11.4183, |
|
"step": 19975 |
|
}, |
|
{ |
|
"epoch": 3.5906642728904847, |
|
"grad_norm": 30.798633575439453, |
|
"learning_rate": 2.8547935368043087e-05, |
|
"loss": 11.1237, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.5951526032315977, |
|
"grad_norm": 28.889543533325195, |
|
"learning_rate": 2.854195092758827e-05, |
|
"loss": 11.3662, |
|
"step": 20025 |
|
}, |
|
{ |
|
"epoch": 3.599640933572711, |
|
"grad_norm": 29.90560531616211, |
|
"learning_rate": 2.8535966487133455e-05, |
|
"loss": 11.0125, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 3.604129263913824, |
|
"grad_norm": 30.499813079833984, |
|
"learning_rate": 2.8529982046678638e-05, |
|
"loss": 11.4072, |
|
"step": 20075 |
|
}, |
|
{ |
|
"epoch": 3.608617594254937, |
|
"grad_norm": 30.493555068969727, |
|
"learning_rate": 2.852399760622382e-05, |
|
"loss": 11.2318, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.6131059245960504, |
|
"grad_norm": 31.599319458007812, |
|
"learning_rate": 2.8518013165769002e-05, |
|
"loss": 11.062, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 3.6175942549371634, |
|
"grad_norm": 30.47945213317871, |
|
"learning_rate": 2.8512028725314185e-05, |
|
"loss": 11.4558, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 3.6220825852782763, |
|
"grad_norm": 32.14970779418945, |
|
"learning_rate": 2.8506044284859364e-05, |
|
"loss": 10.5179, |
|
"step": 20175 |
|
}, |
|
{ |
|
"epoch": 3.6265709156193897, |
|
"grad_norm": 31.17921257019043, |
|
"learning_rate": 2.850005984440455e-05, |
|
"loss": 10.968, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 3.6310592459605027, |
|
"grad_norm": 31.30777931213379, |
|
"learning_rate": 2.8494075403949732e-05, |
|
"loss": 11.336, |
|
"step": 20225 |
|
}, |
|
{ |
|
"epoch": 3.635547576301616, |
|
"grad_norm": 30.182174682617188, |
|
"learning_rate": 2.8488090963494914e-05, |
|
"loss": 11.2265, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 3.640035906642729, |
|
"grad_norm": 31.07087516784668, |
|
"learning_rate": 2.8482106523040096e-05, |
|
"loss": 11.2847, |
|
"step": 20275 |
|
}, |
|
{ |
|
"epoch": 3.644524236983842, |
|
"grad_norm": 28.739133834838867, |
|
"learning_rate": 2.847612208258528e-05, |
|
"loss": 11.427, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 3.649012567324955, |
|
"grad_norm": 31.23784637451172, |
|
"learning_rate": 2.847013764213046e-05, |
|
"loss": 11.0143, |
|
"step": 20325 |
|
}, |
|
{ |
|
"epoch": 3.6535008976660683, |
|
"grad_norm": 30.830699920654297, |
|
"learning_rate": 2.8464153201675647e-05, |
|
"loss": 11.2841, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 3.6579892280071813, |
|
"grad_norm": 30.827341079711914, |
|
"learning_rate": 2.8458168761220826e-05, |
|
"loss": 10.9722, |
|
"step": 20375 |
|
}, |
|
{ |
|
"epoch": 3.6624775583482947, |
|
"grad_norm": 29.842851638793945, |
|
"learning_rate": 2.8452184320766008e-05, |
|
"loss": 11.2758, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.6669658886894076, |
|
"grad_norm": 32.061363220214844, |
|
"learning_rate": 2.844619988031119e-05, |
|
"loss": 11.0502, |
|
"step": 20425 |
|
}, |
|
{ |
|
"epoch": 3.6714542190305206, |
|
"grad_norm": 31.67589569091797, |
|
"learning_rate": 2.8440215439856373e-05, |
|
"loss": 11.2309, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 3.6759425493716336, |
|
"grad_norm": 29.2219295501709, |
|
"learning_rate": 2.8434230999401555e-05, |
|
"loss": 11.1079, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 3.680430879712747, |
|
"grad_norm": 29.494009017944336, |
|
"learning_rate": 2.842824655894674e-05, |
|
"loss": 11.095, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.68491921005386, |
|
"grad_norm": 30.72727394104004, |
|
"learning_rate": 2.8422262118491923e-05, |
|
"loss": 10.9651, |
|
"step": 20525 |
|
}, |
|
{ |
|
"epoch": 3.6894075403949733, |
|
"grad_norm": 32.62581253051758, |
|
"learning_rate": 2.8416277678037106e-05, |
|
"loss": 11.1485, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 3.6938958707360863, |
|
"grad_norm": 32.17450714111328, |
|
"learning_rate": 2.8410293237582285e-05, |
|
"loss": 11.2383, |
|
"step": 20575 |
|
}, |
|
{ |
|
"epoch": 3.6983842010771992, |
|
"grad_norm": 31.298063278198242, |
|
"learning_rate": 2.8404308797127467e-05, |
|
"loss": 11.4215, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 3.702872531418312, |
|
"grad_norm": 31.1262149810791, |
|
"learning_rate": 2.8398324356672653e-05, |
|
"loss": 11.0779, |
|
"step": 20625 |
|
}, |
|
{ |
|
"epoch": 3.7073608617594256, |
|
"grad_norm": 31.340126037597656, |
|
"learning_rate": 2.8392339916217835e-05, |
|
"loss": 11.0714, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 3.7118491921005385, |
|
"grad_norm": 33.29624557495117, |
|
"learning_rate": 2.8386355475763018e-05, |
|
"loss": 11.0074, |
|
"step": 20675 |
|
}, |
|
{ |
|
"epoch": 3.716337522441652, |
|
"grad_norm": 30.880542755126953, |
|
"learning_rate": 2.83803710353082e-05, |
|
"loss": 10.8339, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.720825852782765, |
|
"grad_norm": 29.898832321166992, |
|
"learning_rate": 2.8374386594853382e-05, |
|
"loss": 10.8654, |
|
"step": 20725 |
|
}, |
|
{ |
|
"epoch": 3.725314183123878, |
|
"grad_norm": 29.32884979248047, |
|
"learning_rate": 2.8368402154398565e-05, |
|
"loss": 11.3662, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 3.729802513464991, |
|
"grad_norm": 32.064762115478516, |
|
"learning_rate": 2.8362417713943747e-05, |
|
"loss": 11.4581, |
|
"step": 20775 |
|
}, |
|
{ |
|
"epoch": 3.734290843806104, |
|
"grad_norm": 32.138267517089844, |
|
"learning_rate": 2.835643327348893e-05, |
|
"loss": 10.9124, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 3.738779174147217, |
|
"grad_norm": 33.86062240600586, |
|
"learning_rate": 2.8350448833034112e-05, |
|
"loss": 11.0097, |
|
"step": 20825 |
|
}, |
|
{ |
|
"epoch": 3.7432675044883306, |
|
"grad_norm": 30.490970611572266, |
|
"learning_rate": 2.8344464392579294e-05, |
|
"loss": 11.4534, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 3.7477558348294435, |
|
"grad_norm": 27.865781784057617, |
|
"learning_rate": 2.8338479952124477e-05, |
|
"loss": 11.4486, |
|
"step": 20875 |
|
}, |
|
{ |
|
"epoch": 3.7522441651705565, |
|
"grad_norm": 31.9267520904541, |
|
"learning_rate": 2.833249551166966e-05, |
|
"loss": 11.5519, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 3.7567324955116694, |
|
"grad_norm": 29.056507110595703, |
|
"learning_rate": 2.8326511071214845e-05, |
|
"loss": 11.4834, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 3.761220825852783, |
|
"grad_norm": 30.026002883911133, |
|
"learning_rate": 2.8320526630760024e-05, |
|
"loss": 11.3338, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 3.765709156193896, |
|
"grad_norm": 30.737932205200195, |
|
"learning_rate": 2.8314542190305206e-05, |
|
"loss": 10.9653, |
|
"step": 20975 |
|
}, |
|
{ |
|
"epoch": 3.770197486535009, |
|
"grad_norm": 30.978910446166992, |
|
"learning_rate": 2.8308557749850388e-05, |
|
"loss": 11.4722, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.774685816876122, |
|
"grad_norm": 30.23752212524414, |
|
"learning_rate": 2.830257330939557e-05, |
|
"loss": 11.3043, |
|
"step": 21025 |
|
}, |
|
{ |
|
"epoch": 3.779174147217235, |
|
"grad_norm": 32.29151153564453, |
|
"learning_rate": 2.8296588868940756e-05, |
|
"loss": 11.0943, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 3.783662477558348, |
|
"grad_norm": 30.46995735168457, |
|
"learning_rate": 2.829060442848594e-05, |
|
"loss": 11.345, |
|
"step": 21075 |
|
}, |
|
{ |
|
"epoch": 3.7881508078994615, |
|
"grad_norm": 32.500823974609375, |
|
"learning_rate": 2.828461998803112e-05, |
|
"loss": 11.2675, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 3.7926391382405744, |
|
"grad_norm": 31.643070220947266, |
|
"learning_rate": 2.8278635547576303e-05, |
|
"loss": 11.2081, |
|
"step": 21125 |
|
}, |
|
{ |
|
"epoch": 3.797127468581688, |
|
"grad_norm": 31.303314208984375, |
|
"learning_rate": 2.8272651107121482e-05, |
|
"loss": 11.3969, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 3.8016157989228008, |
|
"grad_norm": 32.96514129638672, |
|
"learning_rate": 2.8266666666666665e-05, |
|
"loss": 11.1525, |
|
"step": 21175 |
|
}, |
|
{ |
|
"epoch": 3.8061041292639137, |
|
"grad_norm": 30.002351760864258, |
|
"learning_rate": 2.826068222621185e-05, |
|
"loss": 10.8658, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 3.8105924596050267, |
|
"grad_norm": 31.169191360473633, |
|
"learning_rate": 2.8254697785757033e-05, |
|
"loss": 11.2565, |
|
"step": 21225 |
|
}, |
|
{ |
|
"epoch": 3.81508078994614, |
|
"grad_norm": 31.06591033935547, |
|
"learning_rate": 2.8248713345302215e-05, |
|
"loss": 11.2624, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 3.819569120287253, |
|
"grad_norm": 28.5202579498291, |
|
"learning_rate": 2.8242728904847398e-05, |
|
"loss": 10.9322, |
|
"step": 21275 |
|
}, |
|
{ |
|
"epoch": 3.8240574506283664, |
|
"grad_norm": 30.786962509155273, |
|
"learning_rate": 2.823674446439258e-05, |
|
"loss": 11.0254, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.8285457809694794, |
|
"grad_norm": 30.801992416381836, |
|
"learning_rate": 2.8230760023937762e-05, |
|
"loss": 11.156, |
|
"step": 21325 |
|
}, |
|
{ |
|
"epoch": 3.8330341113105924, |
|
"grad_norm": 28.441688537597656, |
|
"learning_rate": 2.8224775583482945e-05, |
|
"loss": 11.1657, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 3.8375224416517053, |
|
"grad_norm": 29.77831268310547, |
|
"learning_rate": 2.8218791143028127e-05, |
|
"loss": 11.3975, |
|
"step": 21375 |
|
}, |
|
{ |
|
"epoch": 3.8420107719928187, |
|
"grad_norm": 31.247785568237305, |
|
"learning_rate": 2.821280670257331e-05, |
|
"loss": 11.0109, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 3.8464991023339317, |
|
"grad_norm": 32.04808807373047, |
|
"learning_rate": 2.8206822262118492e-05, |
|
"loss": 11.0839, |
|
"step": 21425 |
|
}, |
|
{ |
|
"epoch": 3.850987432675045, |
|
"grad_norm": 30.55583953857422, |
|
"learning_rate": 2.8200837821663674e-05, |
|
"loss": 10.8381, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 3.855475763016158, |
|
"grad_norm": 29.084171295166016, |
|
"learning_rate": 2.819485338120886e-05, |
|
"loss": 11.187, |
|
"step": 21475 |
|
}, |
|
{ |
|
"epoch": 3.859964093357271, |
|
"grad_norm": 31.084972381591797, |
|
"learning_rate": 2.8188868940754042e-05, |
|
"loss": 11.52, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.864452423698384, |
|
"grad_norm": 31.979738235473633, |
|
"learning_rate": 2.8182884500299225e-05, |
|
"loss": 11.3922, |
|
"step": 21525 |
|
}, |
|
{ |
|
"epoch": 3.8689407540394973, |
|
"grad_norm": 28.92717742919922, |
|
"learning_rate": 2.8176900059844404e-05, |
|
"loss": 11.064, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 3.8734290843806103, |
|
"grad_norm": 29.832292556762695, |
|
"learning_rate": 2.8170915619389586e-05, |
|
"loss": 11.2098, |
|
"step": 21575 |
|
}, |
|
{ |
|
"epoch": 3.8779174147217237, |
|
"grad_norm": 31.74751091003418, |
|
"learning_rate": 2.816493117893477e-05, |
|
"loss": 10.9546, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.8824057450628366, |
|
"grad_norm": 32.31631088256836, |
|
"learning_rate": 2.8158946738479954e-05, |
|
"loss": 11.1283, |
|
"step": 21625 |
|
}, |
|
{ |
|
"epoch": 3.8868940754039496, |
|
"grad_norm": 30.267370223999023, |
|
"learning_rate": 2.8152962298025136e-05, |
|
"loss": 10.9434, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 3.891382405745063, |
|
"grad_norm": 31.132080078125, |
|
"learning_rate": 2.814697785757032e-05, |
|
"loss": 11.404, |
|
"step": 21675 |
|
}, |
|
{ |
|
"epoch": 3.895870736086176, |
|
"grad_norm": 31.34539794921875, |
|
"learning_rate": 2.81409934171155e-05, |
|
"loss": 11.3978, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 3.900359066427289, |
|
"grad_norm": 31.39044952392578, |
|
"learning_rate": 2.8135008976660684e-05, |
|
"loss": 11.187, |
|
"step": 21725 |
|
}, |
|
{ |
|
"epoch": 3.9048473967684023, |
|
"grad_norm": 32.7244873046875, |
|
"learning_rate": 2.8129024536205862e-05, |
|
"loss": 11.2598, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 3.9093357271095153, |
|
"grad_norm": 28.18239974975586, |
|
"learning_rate": 2.8123040095751048e-05, |
|
"loss": 11.3187, |
|
"step": 21775 |
|
}, |
|
{ |
|
"epoch": 3.9138240574506282, |
|
"grad_norm": 31.796775817871094, |
|
"learning_rate": 2.811705565529623e-05, |
|
"loss": 11.0664, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 3.9183123877917416, |
|
"grad_norm": 30.6005859375, |
|
"learning_rate": 2.8111071214841413e-05, |
|
"loss": 10.987, |
|
"step": 21825 |
|
}, |
|
{ |
|
"epoch": 3.9228007181328546, |
|
"grad_norm": 30.108829498291016, |
|
"learning_rate": 2.8105086774386595e-05, |
|
"loss": 11.0541, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 3.9272890484739675, |
|
"grad_norm": 31.7265682220459, |
|
"learning_rate": 2.8099102333931778e-05, |
|
"loss": 11.1046, |
|
"step": 21875 |
|
}, |
|
{ |
|
"epoch": 3.931777378815081, |
|
"grad_norm": 32.628074645996094, |
|
"learning_rate": 2.809311789347696e-05, |
|
"loss": 11.2953, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 3.936265709156194, |
|
"grad_norm": 28.80093765258789, |
|
"learning_rate": 2.8087133453022146e-05, |
|
"loss": 11.5525, |
|
"step": 21925 |
|
}, |
|
{ |
|
"epoch": 3.940754039497307, |
|
"grad_norm": 29.523881912231445, |
|
"learning_rate": 2.8081149012567325e-05, |
|
"loss": 11.2298, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 3.9452423698384202, |
|
"grad_norm": 30.06547737121582, |
|
"learning_rate": 2.8075164572112507e-05, |
|
"loss": 10.7736, |
|
"step": 21975 |
|
}, |
|
{ |
|
"epoch": 3.949730700179533, |
|
"grad_norm": 29.540449142456055, |
|
"learning_rate": 2.806918013165769e-05, |
|
"loss": 11.0709, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.954219030520646, |
|
"grad_norm": 33.31890869140625, |
|
"learning_rate": 2.8063195691202872e-05, |
|
"loss": 11.0698, |
|
"step": 22025 |
|
}, |
|
{ |
|
"epoch": 3.9587073608617596, |
|
"grad_norm": 30.68980598449707, |
|
"learning_rate": 2.8057211250748058e-05, |
|
"loss": 10.8253, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 3.9631956912028725, |
|
"grad_norm": 31.10498809814453, |
|
"learning_rate": 2.805122681029324e-05, |
|
"loss": 11.4169, |
|
"step": 22075 |
|
}, |
|
{ |
|
"epoch": 3.9676840215439855, |
|
"grad_norm": 30.547962188720703, |
|
"learning_rate": 2.8045242369838422e-05, |
|
"loss": 11.2333, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 3.972172351885099, |
|
"grad_norm": 30.325082778930664, |
|
"learning_rate": 2.8039257929383605e-05, |
|
"loss": 11.3395, |
|
"step": 22125 |
|
}, |
|
{ |
|
"epoch": 3.976660682226212, |
|
"grad_norm": 30.00259780883789, |
|
"learning_rate": 2.8033273488928784e-05, |
|
"loss": 11.2044, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 3.9811490125673252, |
|
"grad_norm": 27.535524368286133, |
|
"learning_rate": 2.8027289048473966e-05, |
|
"loss": 10.92, |
|
"step": 22175 |
|
}, |
|
{ |
|
"epoch": 3.985637342908438, |
|
"grad_norm": 31.112247467041016, |
|
"learning_rate": 2.8021304608019152e-05, |
|
"loss": 11.1473, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 3.990125673249551, |
|
"grad_norm": 30.036909103393555, |
|
"learning_rate": 2.8015320167564334e-05, |
|
"loss": 11.5283, |
|
"step": 22225 |
|
}, |
|
{ |
|
"epoch": 3.994614003590664, |
|
"grad_norm": 30.063087463378906, |
|
"learning_rate": 2.8009335727109516e-05, |
|
"loss": 11.1425, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 3.9991023339317775, |
|
"grad_norm": 32.6578483581543, |
|
"learning_rate": 2.80033512866547e-05, |
|
"loss": 10.6965, |
|
"step": 22275 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.07753995379298494, |
|
"eval_f1_macro": 0.009406764821620481, |
|
"eval_f1_micro": 0.07753995379298494, |
|
"eval_f1_weighted": 0.04766626516109554, |
|
"eval_loss": 6.724180221557617, |
|
"eval_precision_macro": 0.00894345687991437, |
|
"eval_precision_micro": 0.07753995379298494, |
|
"eval_precision_weighted": 0.04157378236856482, |
|
"eval_recall_macro": 0.014579264668079467, |
|
"eval_recall_micro": 0.07753995379298494, |
|
"eval_recall_weighted": 0.07753995379298494, |
|
"eval_runtime": 86.416, |
|
"eval_samples_per_second": 606.057, |
|
"eval_steps_per_second": 18.943, |
|
"step": 22280 |
|
}, |
|
{ |
|
"epoch": 4.003590664272891, |
|
"grad_norm": 30.3936824798584, |
|
"learning_rate": 2.799736684619988e-05, |
|
"loss": 9.4773, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 4.008078994614004, |
|
"grad_norm": 30.763669967651367, |
|
"learning_rate": 2.7991382405745064e-05, |
|
"loss": 9.1388, |
|
"step": 22325 |
|
}, |
|
{ |
|
"epoch": 4.012567324955117, |
|
"grad_norm": 30.83111572265625, |
|
"learning_rate": 2.7985397965290246e-05, |
|
"loss": 9.1059, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 4.01705565529623, |
|
"grad_norm": 32.58699035644531, |
|
"learning_rate": 2.7979413524835428e-05, |
|
"loss": 9.1599, |
|
"step": 22375 |
|
}, |
|
{ |
|
"epoch": 4.021543985637343, |
|
"grad_norm": 32.16946792602539, |
|
"learning_rate": 2.797342908438061e-05, |
|
"loss": 8.8678, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 4.026032315978456, |
|
"grad_norm": 32.695838928222656, |
|
"learning_rate": 2.7967444643925793e-05, |
|
"loss": 8.5109, |
|
"step": 22425 |
|
}, |
|
{ |
|
"epoch": 4.0305206463195695, |
|
"grad_norm": 32.195003509521484, |
|
"learning_rate": 2.7961460203470975e-05, |
|
"loss": 8.8435, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 4.0350089766606825, |
|
"grad_norm": 33.23640060424805, |
|
"learning_rate": 2.795547576301616e-05, |
|
"loss": 8.9362, |
|
"step": 22475 |
|
}, |
|
{ |
|
"epoch": 4.039497307001795, |
|
"grad_norm": 36.865997314453125, |
|
"learning_rate": 2.7949491322561343e-05, |
|
"loss": 9.0649, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.043985637342908, |
|
"grad_norm": 35.41594696044922, |
|
"learning_rate": 2.7943506882106526e-05, |
|
"loss": 8.9695, |
|
"step": 22525 |
|
}, |
|
{ |
|
"epoch": 4.048473967684021, |
|
"grad_norm": 35.198551177978516, |
|
"learning_rate": 2.7937522441651705e-05, |
|
"loss": 9.3542, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 4.052962298025134, |
|
"grad_norm": 49.4534912109375, |
|
"learning_rate": 2.7931538001196887e-05, |
|
"loss": 8.6442, |
|
"step": 22575 |
|
}, |
|
{ |
|
"epoch": 4.057450628366248, |
|
"grad_norm": 35.323726654052734, |
|
"learning_rate": 2.792555356074207e-05, |
|
"loss": 8.5507, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 4.061938958707361, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2.7919808497905447e-05, |
|
"loss": 9.0441, |
|
"step": 22625 |
|
}, |
|
{ |
|
"epoch": 4.066427289048474, |
|
"grad_norm": 35.7750244140625, |
|
"learning_rate": 2.791382405745063e-05, |
|
"loss": 9.1965, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 4.070915619389587, |
|
"grad_norm": 31.913360595703125, |
|
"learning_rate": 2.790783961699581e-05, |
|
"loss": 9.1348, |
|
"step": 22675 |
|
}, |
|
{ |
|
"epoch": 4.0754039497307, |
|
"grad_norm": 33.979190826416016, |
|
"learning_rate": 2.7901855176540994e-05, |
|
"loss": 9.1416, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 4.079892280071813, |
|
"grad_norm": 33.557029724121094, |
|
"learning_rate": 2.7895870736086176e-05, |
|
"loss": 8.8399, |
|
"step": 22725 |
|
}, |
|
{ |
|
"epoch": 4.084380610412927, |
|
"grad_norm": 35.37779998779297, |
|
"learning_rate": 2.788988629563136e-05, |
|
"loss": 9.2904, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 4.08886894075404, |
|
"grad_norm": 33.334224700927734, |
|
"learning_rate": 2.788390185517654e-05, |
|
"loss": 9.0412, |
|
"step": 22775 |
|
}, |
|
{ |
|
"epoch": 4.093357271095153, |
|
"grad_norm": 38.393653869628906, |
|
"learning_rate": 2.7877917414721726e-05, |
|
"loss": 8.8758, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 4.097845601436266, |
|
"grad_norm": 34.724517822265625, |
|
"learning_rate": 2.787193297426691e-05, |
|
"loss": 8.9632, |
|
"step": 22825 |
|
}, |
|
{ |
|
"epoch": 4.102333931777379, |
|
"grad_norm": 35.026126861572266, |
|
"learning_rate": 2.7865948533812088e-05, |
|
"loss": 8.81, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 4.1068222621184916, |
|
"grad_norm": 33.23841094970703, |
|
"learning_rate": 2.785996409335727e-05, |
|
"loss": 8.965, |
|
"step": 22875 |
|
}, |
|
{ |
|
"epoch": 4.111310592459605, |
|
"grad_norm": 33.344581604003906, |
|
"learning_rate": 2.7853979652902453e-05, |
|
"loss": 8.6121, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 4.115798922800718, |
|
"grad_norm": 33.311065673828125, |
|
"learning_rate": 2.7847995212447638e-05, |
|
"loss": 8.863, |
|
"step": 22925 |
|
}, |
|
{ |
|
"epoch": 4.120287253141831, |
|
"grad_norm": 31.99666404724121, |
|
"learning_rate": 2.784201077199282e-05, |
|
"loss": 9.2711, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 4.124775583482944, |
|
"grad_norm": 35.421077728271484, |
|
"learning_rate": 2.7836026331538003e-05, |
|
"loss": 8.8117, |
|
"step": 22975 |
|
}, |
|
{ |
|
"epoch": 4.129263913824057, |
|
"grad_norm": 35.499202728271484, |
|
"learning_rate": 2.7830041891083185e-05, |
|
"loss": 9.0647, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.13375224416517, |
|
"grad_norm": 39.84804916381836, |
|
"learning_rate": 2.7824057450628368e-05, |
|
"loss": 8.9921, |
|
"step": 23025 |
|
}, |
|
{ |
|
"epoch": 4.138240574506284, |
|
"grad_norm": 35.68635559082031, |
|
"learning_rate": 2.7818073010173547e-05, |
|
"loss": 9.0054, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 4.142728904847397, |
|
"grad_norm": 34.515098571777344, |
|
"learning_rate": 2.7812088569718732e-05, |
|
"loss": 9.2403, |
|
"step": 23075 |
|
}, |
|
{ |
|
"epoch": 4.14721723518851, |
|
"grad_norm": 35.22542190551758, |
|
"learning_rate": 2.7806104129263915e-05, |
|
"loss": 9.1803, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 4.151705565529623, |
|
"grad_norm": 31.101097106933594, |
|
"learning_rate": 2.7800119688809097e-05, |
|
"loss": 9.1061, |
|
"step": 23125 |
|
}, |
|
{ |
|
"epoch": 4.156193895870736, |
|
"grad_norm": 35.81389236450195, |
|
"learning_rate": 2.779413524835428e-05, |
|
"loss": 9.2282, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 4.160682226211849, |
|
"grad_norm": 33.05430603027344, |
|
"learning_rate": 2.7788150807899462e-05, |
|
"loss": 8.9339, |
|
"step": 23175 |
|
}, |
|
{ |
|
"epoch": 4.165170556552963, |
|
"grad_norm": 32.21403884887695, |
|
"learning_rate": 2.7782166367444644e-05, |
|
"loss": 9.0769, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 4.169658886894076, |
|
"grad_norm": 38.616085052490234, |
|
"learning_rate": 2.777618192698983e-05, |
|
"loss": 9.2644, |
|
"step": 23225 |
|
}, |
|
{ |
|
"epoch": 4.174147217235189, |
|
"grad_norm": 34.82571029663086, |
|
"learning_rate": 2.777019748653501e-05, |
|
"loss": 9.2723, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 4.1786355475763015, |
|
"grad_norm": 37.125797271728516, |
|
"learning_rate": 2.776421304608019e-05, |
|
"loss": 9.2647, |
|
"step": 23275 |
|
}, |
|
{ |
|
"epoch": 4.1831238779174145, |
|
"grad_norm": 36.201927185058594, |
|
"learning_rate": 2.7758228605625374e-05, |
|
"loss": 9.5391, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 4.187612208258527, |
|
"grad_norm": 34.90190505981445, |
|
"learning_rate": 2.7752244165170556e-05, |
|
"loss": 9.2669, |
|
"step": 23325 |
|
}, |
|
{ |
|
"epoch": 4.192100538599641, |
|
"grad_norm": 36.72137451171875, |
|
"learning_rate": 2.7746259724715742e-05, |
|
"loss": 9.3072, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 4.196588868940754, |
|
"grad_norm": 34.933372497558594, |
|
"learning_rate": 2.7740275284260924e-05, |
|
"loss": 9.2713, |
|
"step": 23375 |
|
}, |
|
{ |
|
"epoch": 4.201077199281867, |
|
"grad_norm": 37.9987907409668, |
|
"learning_rate": 2.7734290843806107e-05, |
|
"loss": 9.0352, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 4.20556552962298, |
|
"grad_norm": 33.95653533935547, |
|
"learning_rate": 2.772830640335129e-05, |
|
"loss": 9.1703, |
|
"step": 23425 |
|
}, |
|
{ |
|
"epoch": 4.210053859964093, |
|
"grad_norm": 32.79034423828125, |
|
"learning_rate": 2.7722321962896468e-05, |
|
"loss": 9.0717, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 4.214542190305206, |
|
"grad_norm": 41.263702392578125, |
|
"learning_rate": 2.771633752244165e-05, |
|
"loss": 9.0279, |
|
"step": 23475 |
|
}, |
|
{ |
|
"epoch": 4.21903052064632, |
|
"grad_norm": 34.632225036621094, |
|
"learning_rate": 2.7710353081986836e-05, |
|
"loss": 9.0236, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.223518850987433, |
|
"grad_norm": 34.72397232055664, |
|
"learning_rate": 2.770436864153202e-05, |
|
"loss": 9.2389, |
|
"step": 23525 |
|
}, |
|
{ |
|
"epoch": 4.228007181328546, |
|
"grad_norm": 34.320003509521484, |
|
"learning_rate": 2.76983842010772e-05, |
|
"loss": 9.0942, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 4.232495511669659, |
|
"grad_norm": 35.2785758972168, |
|
"learning_rate": 2.7692399760622383e-05, |
|
"loss": 8.7822, |
|
"step": 23575 |
|
}, |
|
{ |
|
"epoch": 4.236983842010772, |
|
"grad_norm": 40.83307647705078, |
|
"learning_rate": 2.7686415320167565e-05, |
|
"loss": 9.2271, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 4.241472172351885, |
|
"grad_norm": 34.236122131347656, |
|
"learning_rate": 2.7680430879712748e-05, |
|
"loss": 9.0967, |
|
"step": 23625 |
|
}, |
|
{ |
|
"epoch": 4.2459605026929985, |
|
"grad_norm": 34.03813171386719, |
|
"learning_rate": 2.767444643925793e-05, |
|
"loss": 9.1916, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 4.2504488330341115, |
|
"grad_norm": 32.90471267700195, |
|
"learning_rate": 2.7668461998803112e-05, |
|
"loss": 8.9725, |
|
"step": 23675 |
|
}, |
|
{ |
|
"epoch": 4.254937163375224, |
|
"grad_norm": 37.31569290161133, |
|
"learning_rate": 2.7662477558348295e-05, |
|
"loss": 9.5137, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 4.259425493716337, |
|
"grad_norm": 33.96034240722656, |
|
"learning_rate": 2.7656493117893477e-05, |
|
"loss": 9.1435, |
|
"step": 23725 |
|
}, |
|
{ |
|
"epoch": 4.26391382405745, |
|
"grad_norm": 37.626258850097656, |
|
"learning_rate": 2.765050867743866e-05, |
|
"loss": 9.1109, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 4.268402154398563, |
|
"grad_norm": 37.14412307739258, |
|
"learning_rate": 2.7644524236983842e-05, |
|
"loss": 8.9028, |
|
"step": 23775 |
|
}, |
|
{ |
|
"epoch": 4.272890484739677, |
|
"grad_norm": 33.2732048034668, |
|
"learning_rate": 2.7638539796529028e-05, |
|
"loss": 8.9961, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 4.27737881508079, |
|
"grad_norm": 35.71903991699219, |
|
"learning_rate": 2.7632555356074207e-05, |
|
"loss": 9.3034, |
|
"step": 23825 |
|
}, |
|
{ |
|
"epoch": 4.281867145421903, |
|
"grad_norm": 34.583213806152344, |
|
"learning_rate": 2.762657091561939e-05, |
|
"loss": 9.2041, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 4.286355475763016, |
|
"grad_norm": 36.03817367553711, |
|
"learning_rate": 2.762058647516457e-05, |
|
"loss": 9.2503, |
|
"step": 23875 |
|
}, |
|
{ |
|
"epoch": 4.290843806104129, |
|
"grad_norm": 34.202823638916016, |
|
"learning_rate": 2.7614602034709754e-05, |
|
"loss": 9.2398, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 4.295332136445243, |
|
"grad_norm": 35.64631652832031, |
|
"learning_rate": 2.760861759425494e-05, |
|
"loss": 9.139, |
|
"step": 23925 |
|
}, |
|
{ |
|
"epoch": 4.299820466786356, |
|
"grad_norm": 34.361637115478516, |
|
"learning_rate": 2.7602633153800122e-05, |
|
"loss": 9.0898, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 4.304308797127469, |
|
"grad_norm": 32.614646911621094, |
|
"learning_rate": 2.7596648713345304e-05, |
|
"loss": 9.4455, |
|
"step": 23975 |
|
}, |
|
{ |
|
"epoch": 4.308797127468582, |
|
"grad_norm": 36.456077575683594, |
|
"learning_rate": 2.7590664272890487e-05, |
|
"loss": 9.2413, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.313285457809695, |
|
"grad_norm": 33.43761444091797, |
|
"learning_rate": 2.7584679832435666e-05, |
|
"loss": 9.1153, |
|
"step": 24025 |
|
}, |
|
{ |
|
"epoch": 4.317773788150808, |
|
"grad_norm": 34.84223556518555, |
|
"learning_rate": 2.7578695391980848e-05, |
|
"loss": 9.2057, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 4.3222621184919205, |
|
"grad_norm": 31.044452667236328, |
|
"learning_rate": 2.7572710951526034e-05, |
|
"loss": 9.3385, |
|
"step": 24075 |
|
}, |
|
{ |
|
"epoch": 4.326750448833034, |
|
"grad_norm": 38.18600845336914, |
|
"learning_rate": 2.7566726511071216e-05, |
|
"loss": 9.0842, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 4.331238779174147, |
|
"grad_norm": 34.68734359741211, |
|
"learning_rate": 2.75607420706164e-05, |
|
"loss": 9.4446, |
|
"step": 24125 |
|
}, |
|
{ |
|
"epoch": 4.33572710951526, |
|
"grad_norm": 38.530601501464844, |
|
"learning_rate": 2.755475763016158e-05, |
|
"loss": 9.0487, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 4.340215439856373, |
|
"grad_norm": 35.827022552490234, |
|
"learning_rate": 2.7548773189706763e-05, |
|
"loss": 9.1242, |
|
"step": 24175 |
|
}, |
|
{ |
|
"epoch": 4.344703770197486, |
|
"grad_norm": 37.25276184082031, |
|
"learning_rate": 2.7542788749251945e-05, |
|
"loss": 9.1769, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 4.3491921005386, |
|
"grad_norm": 35.8741340637207, |
|
"learning_rate": 2.7536804308797128e-05, |
|
"loss": 9.256, |
|
"step": 24225 |
|
}, |
|
{ |
|
"epoch": 4.353680430879713, |
|
"grad_norm": 34.161651611328125, |
|
"learning_rate": 2.753081986834231e-05, |
|
"loss": 9.3959, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 4.358168761220826, |
|
"grad_norm": 36.703941345214844, |
|
"learning_rate": 2.7524835427887492e-05, |
|
"loss": 9.6069, |
|
"step": 24275 |
|
}, |
|
{ |
|
"epoch": 4.362657091561939, |
|
"grad_norm": 33.90925216674805, |
|
"learning_rate": 2.7518850987432675e-05, |
|
"loss": 9.2081, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 4.367145421903052, |
|
"grad_norm": 36.48859786987305, |
|
"learning_rate": 2.7512866546977857e-05, |
|
"loss": 9.2767, |
|
"step": 24325 |
|
}, |
|
{ |
|
"epoch": 4.371633752244165, |
|
"grad_norm": 36.00957489013672, |
|
"learning_rate": 2.7506882106523043e-05, |
|
"loss": 9.2949, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 4.376122082585279, |
|
"grad_norm": 33.388736724853516, |
|
"learning_rate": 2.7500897666068225e-05, |
|
"loss": 9.4621, |
|
"step": 24375 |
|
}, |
|
{ |
|
"epoch": 4.380610412926392, |
|
"grad_norm": 32.6502571105957, |
|
"learning_rate": 2.7494913225613408e-05, |
|
"loss": 9.2408, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 4.385098743267505, |
|
"grad_norm": 36.0883903503418, |
|
"learning_rate": 2.7488928785158587e-05, |
|
"loss": 9.3558, |
|
"step": 24425 |
|
}, |
|
{ |
|
"epoch": 4.3895870736086176, |
|
"grad_norm": 33.08795928955078, |
|
"learning_rate": 2.748294434470377e-05, |
|
"loss": 9.1737, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 4.3940754039497305, |
|
"grad_norm": 37.87990188598633, |
|
"learning_rate": 2.747695990424895e-05, |
|
"loss": 9.2635, |
|
"step": 24475 |
|
}, |
|
{ |
|
"epoch": 4.3985637342908435, |
|
"grad_norm": 32.306396484375, |
|
"learning_rate": 2.7470975463794137e-05, |
|
"loss": 9.4797, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.403052064631957, |
|
"grad_norm": 34.42149353027344, |
|
"learning_rate": 2.746499102333932e-05, |
|
"loss": 8.8528, |
|
"step": 24525 |
|
}, |
|
{ |
|
"epoch": 4.40754039497307, |
|
"grad_norm": 33.147850036621094, |
|
"learning_rate": 2.7459006582884502e-05, |
|
"loss": 9.3153, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 4.412028725314183, |
|
"grad_norm": 36.34206771850586, |
|
"learning_rate": 2.7453022142429684e-05, |
|
"loss": 9.1607, |
|
"step": 24575 |
|
}, |
|
{ |
|
"epoch": 4.416517055655296, |
|
"grad_norm": 36.275413513183594, |
|
"learning_rate": 2.7447037701974867e-05, |
|
"loss": 9.2555, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 4.421005385996409, |
|
"grad_norm": 34.83110427856445, |
|
"learning_rate": 2.7441053261520046e-05, |
|
"loss": 9.4131, |
|
"step": 24625 |
|
}, |
|
{ |
|
"epoch": 4.425493716337522, |
|
"grad_norm": 35.73281478881836, |
|
"learning_rate": 2.743506882106523e-05, |
|
"loss": 9.7517, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 4.429982046678636, |
|
"grad_norm": 32.646751403808594, |
|
"learning_rate": 2.7429084380610414e-05, |
|
"loss": 9.452, |
|
"step": 24675 |
|
}, |
|
{ |
|
"epoch": 4.434470377019749, |
|
"grad_norm": 42.54426956176758, |
|
"learning_rate": 2.7423099940155596e-05, |
|
"loss": 9.3777, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 4.438958707360862, |
|
"grad_norm": 35.09437942504883, |
|
"learning_rate": 2.741711549970078e-05, |
|
"loss": 9.3665, |
|
"step": 24725 |
|
}, |
|
{ |
|
"epoch": 4.443447037701975, |
|
"grad_norm": 36.45936965942383, |
|
"learning_rate": 2.741113105924596e-05, |
|
"loss": 9.4285, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 4.447935368043088, |
|
"grad_norm": 34.06489181518555, |
|
"learning_rate": 2.7405146618791146e-05, |
|
"loss": 9.1473, |
|
"step": 24775 |
|
}, |
|
{ |
|
"epoch": 4.452423698384201, |
|
"grad_norm": 38.4737663269043, |
|
"learning_rate": 2.739916217833633e-05, |
|
"loss": 9.5141, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 4.456912028725315, |
|
"grad_norm": 35.27596664428711, |
|
"learning_rate": 2.7393177737881508e-05, |
|
"loss": 9.3386, |
|
"step": 24825 |
|
}, |
|
{ |
|
"epoch": 4.4614003590664275, |
|
"grad_norm": 39.01841735839844, |
|
"learning_rate": 2.738719329742669e-05, |
|
"loss": 9.2959, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 4.4658886894075405, |
|
"grad_norm": 40.175697326660156, |
|
"learning_rate": 2.7381208856971873e-05, |
|
"loss": 9.3482, |
|
"step": 24875 |
|
}, |
|
{ |
|
"epoch": 4.470377019748653, |
|
"grad_norm": 37.285396575927734, |
|
"learning_rate": 2.7375224416517055e-05, |
|
"loss": 9.0562, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 4.474865350089766, |
|
"grad_norm": 37.979305267333984, |
|
"learning_rate": 2.736923997606224e-05, |
|
"loss": 9.4161, |
|
"step": 24925 |
|
}, |
|
{ |
|
"epoch": 4.479353680430879, |
|
"grad_norm": 34.52471160888672, |
|
"learning_rate": 2.7363255535607423e-05, |
|
"loss": 9.1926, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 4.483842010771993, |
|
"grad_norm": 32.52268600463867, |
|
"learning_rate": 2.7357271095152605e-05, |
|
"loss": 9.5568, |
|
"step": 24975 |
|
}, |
|
{ |
|
"epoch": 4.488330341113106, |
|
"grad_norm": 34.64008712768555, |
|
"learning_rate": 2.7351286654697788e-05, |
|
"loss": 9.34, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.492818671454219, |
|
"grad_norm": 35.43095397949219, |
|
"learning_rate": 2.7345302214242967e-05, |
|
"loss": 9.6012, |
|
"step": 25025 |
|
}, |
|
{ |
|
"epoch": 4.497307001795332, |
|
"grad_norm": 34.24216079711914, |
|
"learning_rate": 2.733931777378815e-05, |
|
"loss": 9.545, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 4.501795332136445, |
|
"grad_norm": 36.410186767578125, |
|
"learning_rate": 2.7333333333333335e-05, |
|
"loss": 9.5178, |
|
"step": 25075 |
|
}, |
|
{ |
|
"epoch": 4.506283662477558, |
|
"grad_norm": 33.58375549316406, |
|
"learning_rate": 2.7327348892878517e-05, |
|
"loss": 9.0259, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 4.510771992818672, |
|
"grad_norm": 33.377079010009766, |
|
"learning_rate": 2.73213644524237e-05, |
|
"loss": 9.1557, |
|
"step": 25125 |
|
}, |
|
{ |
|
"epoch": 4.515260323159785, |
|
"grad_norm": 37.322166442871094, |
|
"learning_rate": 2.7315380011968882e-05, |
|
"loss": 9.2679, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 4.519748653500898, |
|
"grad_norm": 35.399192810058594, |
|
"learning_rate": 2.7309395571514064e-05, |
|
"loss": 9.3599, |
|
"step": 25175 |
|
}, |
|
{ |
|
"epoch": 4.524236983842011, |
|
"grad_norm": 34.6229362487793, |
|
"learning_rate": 2.7303411131059247e-05, |
|
"loss": 9.2008, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 4.528725314183124, |
|
"grad_norm": 38.43641662597656, |
|
"learning_rate": 2.729742669060443e-05, |
|
"loss": 9.5365, |
|
"step": 25225 |
|
}, |
|
{ |
|
"epoch": 4.533213644524237, |
|
"grad_norm": 36.315940856933594, |
|
"learning_rate": 2.729144225014961e-05, |
|
"loss": 9.2328, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 4.53770197486535, |
|
"grad_norm": 36.93431091308594, |
|
"learning_rate": 2.7285457809694794e-05, |
|
"loss": 9.1937, |
|
"step": 25275 |
|
}, |
|
{ |
|
"epoch": 4.542190305206463, |
|
"grad_norm": 34.52630615234375, |
|
"learning_rate": 2.7279473369239976e-05, |
|
"loss": 9.3224, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 4.546678635547576, |
|
"grad_norm": 37.09843826293945, |
|
"learning_rate": 2.727348892878516e-05, |
|
"loss": 9.5531, |
|
"step": 25325 |
|
}, |
|
{ |
|
"epoch": 4.551166965888689, |
|
"grad_norm": 35.45225143432617, |
|
"learning_rate": 2.7267504488330344e-05, |
|
"loss": 9.3112, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 4.555655296229802, |
|
"grad_norm": 36.52423858642578, |
|
"learning_rate": 2.7261520047875526e-05, |
|
"loss": 9.2307, |
|
"step": 25375 |
|
}, |
|
{ |
|
"epoch": 4.560143626570916, |
|
"grad_norm": 31.571231842041016, |
|
"learning_rate": 2.725553560742071e-05, |
|
"loss": 9.0741, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 4.564631956912029, |
|
"grad_norm": 35.70735549926758, |
|
"learning_rate": 2.7249551166965888e-05, |
|
"loss": 9.4122, |
|
"step": 25425 |
|
}, |
|
{ |
|
"epoch": 4.569120287253142, |
|
"grad_norm": 35.43241882324219, |
|
"learning_rate": 2.724356672651107e-05, |
|
"loss": 9.4357, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 4.573608617594255, |
|
"grad_norm": 35.756832122802734, |
|
"learning_rate": 2.7237582286056253e-05, |
|
"loss": 9.3782, |
|
"step": 25475 |
|
}, |
|
{ |
|
"epoch": 4.578096947935368, |
|
"grad_norm": 33.91000747680664, |
|
"learning_rate": 2.7231597845601438e-05, |
|
"loss": 9.7165, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.582585278276481, |
|
"grad_norm": 34.89963912963867, |
|
"learning_rate": 2.722561340514662e-05, |
|
"loss": 9.5026, |
|
"step": 25525 |
|
}, |
|
{ |
|
"epoch": 4.587073608617594, |
|
"grad_norm": 35.42002868652344, |
|
"learning_rate": 2.7219628964691803e-05, |
|
"loss": 9.6544, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 4.591561938958708, |
|
"grad_norm": 35.01460647583008, |
|
"learning_rate": 2.7213644524236985e-05, |
|
"loss": 9.3751, |
|
"step": 25575 |
|
}, |
|
{ |
|
"epoch": 4.596050269299821, |
|
"grad_norm": 32.873260498046875, |
|
"learning_rate": 2.7207660083782164e-05, |
|
"loss": 9.1189, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 4.600538599640934, |
|
"grad_norm": 38.0374641418457, |
|
"learning_rate": 2.7201675643327347e-05, |
|
"loss": 9.4421, |
|
"step": 25625 |
|
}, |
|
{ |
|
"epoch": 4.6050269299820465, |
|
"grad_norm": 32.76029586791992, |
|
"learning_rate": 2.7195691202872532e-05, |
|
"loss": 9.2211, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 4.6095152603231595, |
|
"grad_norm": 35.879295349121094, |
|
"learning_rate": 2.7189706762417715e-05, |
|
"loss": 9.4768, |
|
"step": 25675 |
|
}, |
|
{ |
|
"epoch": 4.614003590664273, |
|
"grad_norm": 34.31226348876953, |
|
"learning_rate": 2.7183722321962897e-05, |
|
"loss": 9.1382, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 4.618491921005386, |
|
"grad_norm": 33.70473861694336, |
|
"learning_rate": 2.717773788150808e-05, |
|
"loss": 9.1704, |
|
"step": 25725 |
|
}, |
|
{ |
|
"epoch": 4.622980251346499, |
|
"grad_norm": 36.1688232421875, |
|
"learning_rate": 2.7171753441053262e-05, |
|
"loss": 9.4746, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 4.627468581687612, |
|
"grad_norm": 35.33478927612305, |
|
"learning_rate": 2.7165769000598448e-05, |
|
"loss": 9.2567, |
|
"step": 25775 |
|
}, |
|
{ |
|
"epoch": 4.631956912028725, |
|
"grad_norm": 35.50520324707031, |
|
"learning_rate": 2.7159784560143627e-05, |
|
"loss": 9.5426, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 4.636445242369838, |
|
"grad_norm": 34.68144989013672, |
|
"learning_rate": 2.715380011968881e-05, |
|
"loss": 9.4237, |
|
"step": 25825 |
|
}, |
|
{ |
|
"epoch": 4.640933572710951, |
|
"grad_norm": 32.5733528137207, |
|
"learning_rate": 2.714781567923399e-05, |
|
"loss": 9.2993, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 4.645421903052065, |
|
"grad_norm": 34.17429733276367, |
|
"learning_rate": 2.714207061639737e-05, |
|
"loss": 9.4056, |
|
"step": 25875 |
|
}, |
|
{ |
|
"epoch": 4.649910233393178, |
|
"grad_norm": 49.32793045043945, |
|
"learning_rate": 2.713608617594255e-05, |
|
"loss": 9.3828, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 4.654398563734291, |
|
"grad_norm": 35.83115768432617, |
|
"learning_rate": 2.713010173548773e-05, |
|
"loss": 9.4779, |
|
"step": 25925 |
|
}, |
|
{ |
|
"epoch": 4.658886894075404, |
|
"grad_norm": 35.35591125488281, |
|
"learning_rate": 2.7124117295032915e-05, |
|
"loss": 9.4844, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 4.663375224416517, |
|
"grad_norm": 35.725494384765625, |
|
"learning_rate": 2.7118132854578098e-05, |
|
"loss": 9.1772, |
|
"step": 25975 |
|
}, |
|
{ |
|
"epoch": 4.667863554757631, |
|
"grad_norm": 34.3475227355957, |
|
"learning_rate": 2.711214841412328e-05, |
|
"loss": 9.5834, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.6723518850987436, |
|
"grad_norm": 35.19342041015625, |
|
"learning_rate": 2.7106163973668463e-05, |
|
"loss": 9.1603, |
|
"step": 26025 |
|
}, |
|
{ |
|
"epoch": 4.6768402154398565, |
|
"grad_norm": 37.154518127441406, |
|
"learning_rate": 2.7100179533213645e-05, |
|
"loss": 9.5956, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 4.6813285457809695, |
|
"grad_norm": 36.49668884277344, |
|
"learning_rate": 2.7094195092758827e-05, |
|
"loss": 9.5274, |
|
"step": 26075 |
|
}, |
|
{ |
|
"epoch": 4.685816876122082, |
|
"grad_norm": 34.92998504638672, |
|
"learning_rate": 2.7088210652304013e-05, |
|
"loss": 9.5255, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 4.690305206463195, |
|
"grad_norm": 32.61775207519531, |
|
"learning_rate": 2.7082226211849192e-05, |
|
"loss": 9.4236, |
|
"step": 26125 |
|
}, |
|
{ |
|
"epoch": 4.694793536804308, |
|
"grad_norm": 35.2857666015625, |
|
"learning_rate": 2.7076241771394374e-05, |
|
"loss": 9.4578, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 4.699281867145422, |
|
"grad_norm": 37.08427429199219, |
|
"learning_rate": 2.7070257330939557e-05, |
|
"loss": 9.5587, |
|
"step": 26175 |
|
}, |
|
{ |
|
"epoch": 4.703770197486535, |
|
"grad_norm": 33.42496109008789, |
|
"learning_rate": 2.706427289048474e-05, |
|
"loss": 9.6455, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 4.708258527827648, |
|
"grad_norm": 38.109561920166016, |
|
"learning_rate": 2.7058288450029925e-05, |
|
"loss": 9.4372, |
|
"step": 26225 |
|
}, |
|
{ |
|
"epoch": 4.712746858168761, |
|
"grad_norm": 34.73807907104492, |
|
"learning_rate": 2.7052304009575107e-05, |
|
"loss": 9.2668, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 4.717235188509874, |
|
"grad_norm": 35.39613723754883, |
|
"learning_rate": 2.704631956912029e-05, |
|
"loss": 9.3931, |
|
"step": 26275 |
|
}, |
|
{ |
|
"epoch": 4.721723518850988, |
|
"grad_norm": 35.8447380065918, |
|
"learning_rate": 2.7040335128665472e-05, |
|
"loss": 9.5386, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 4.726211849192101, |
|
"grad_norm": 38.25541305541992, |
|
"learning_rate": 2.703435068821065e-05, |
|
"loss": 9.2727, |
|
"step": 26325 |
|
}, |
|
{ |
|
"epoch": 4.730700179533214, |
|
"grad_norm": 35.903507232666016, |
|
"learning_rate": 2.7028366247755833e-05, |
|
"loss": 9.7039, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 4.735188509874327, |
|
"grad_norm": 35.6234130859375, |
|
"learning_rate": 2.702238180730102e-05, |
|
"loss": 9.6165, |
|
"step": 26375 |
|
}, |
|
{ |
|
"epoch": 4.73967684021544, |
|
"grad_norm": 37.08405303955078, |
|
"learning_rate": 2.70163973668462e-05, |
|
"loss": 9.6926, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 4.744165170556553, |
|
"grad_norm": 32.01731491088867, |
|
"learning_rate": 2.7010412926391384e-05, |
|
"loss": 9.3151, |
|
"step": 26425 |
|
}, |
|
{ |
|
"epoch": 4.748653500897666, |
|
"grad_norm": 37.30953598022461, |
|
"learning_rate": 2.7004428485936566e-05, |
|
"loss": 9.4753, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 4.753141831238779, |
|
"grad_norm": 37.31596755981445, |
|
"learning_rate": 2.699844404548175e-05, |
|
"loss": 9.4824, |
|
"step": 26475 |
|
}, |
|
{ |
|
"epoch": 4.757630161579892, |
|
"grad_norm": 35.827213287353516, |
|
"learning_rate": 2.699245960502693e-05, |
|
"loss": 9.5776, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.762118491921005, |
|
"grad_norm": 39.54668045043945, |
|
"learning_rate": 2.6986475164572113e-05, |
|
"loss": 9.563, |
|
"step": 26525 |
|
}, |
|
{ |
|
"epoch": 4.766606822262118, |
|
"grad_norm": 32.41488265991211, |
|
"learning_rate": 2.6980490724117296e-05, |
|
"loss": 9.3826, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 4.771095152603231, |
|
"grad_norm": 36.029666900634766, |
|
"learning_rate": 2.6974506283662478e-05, |
|
"loss": 9.512, |
|
"step": 26575 |
|
}, |
|
{ |
|
"epoch": 4.775583482944345, |
|
"grad_norm": 32.85836410522461, |
|
"learning_rate": 2.696852184320766e-05, |
|
"loss": 9.4177, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 4.780071813285458, |
|
"grad_norm": 32.541988372802734, |
|
"learning_rate": 2.6962537402752843e-05, |
|
"loss": 9.3833, |
|
"step": 26625 |
|
}, |
|
{ |
|
"epoch": 4.784560143626571, |
|
"grad_norm": 35.42625045776367, |
|
"learning_rate": 2.695655296229803e-05, |
|
"loss": 9.5967, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 4.789048473967684, |
|
"grad_norm": 39.130592346191406, |
|
"learning_rate": 2.695056852184321e-05, |
|
"loss": 9.4185, |
|
"step": 26675 |
|
}, |
|
{ |
|
"epoch": 4.793536804308797, |
|
"grad_norm": 37.135032653808594, |
|
"learning_rate": 2.694458408138839e-05, |
|
"loss": 9.4166, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 4.79802513464991, |
|
"grad_norm": 32.95221710205078, |
|
"learning_rate": 2.6938599640933572e-05, |
|
"loss": 9.5325, |
|
"step": 26725 |
|
}, |
|
{ |
|
"epoch": 4.802513464991024, |
|
"grad_norm": 34.23844528198242, |
|
"learning_rate": 2.6932615200478754e-05, |
|
"loss": 9.5948, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 4.807001795332137, |
|
"grad_norm": 36.37213134765625, |
|
"learning_rate": 2.6926630760023937e-05, |
|
"loss": 9.1881, |
|
"step": 26775 |
|
}, |
|
{ |
|
"epoch": 4.81149012567325, |
|
"grad_norm": 37.12689971923828, |
|
"learning_rate": 2.6920646319569123e-05, |
|
"loss": 9.4443, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 4.815978456014363, |
|
"grad_norm": 30.90703582763672, |
|
"learning_rate": 2.6914661879114305e-05, |
|
"loss": 9.1284, |
|
"step": 26825 |
|
}, |
|
{ |
|
"epoch": 4.8204667863554755, |
|
"grad_norm": 34.2583122253418, |
|
"learning_rate": 2.6908677438659487e-05, |
|
"loss": 9.1908, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 4.8249551166965885, |
|
"grad_norm": 36.532203674316406, |
|
"learning_rate": 2.690269299820467e-05, |
|
"loss": 9.3999, |
|
"step": 26875 |
|
}, |
|
{ |
|
"epoch": 4.829443447037702, |
|
"grad_norm": 36.42616271972656, |
|
"learning_rate": 2.689670855774985e-05, |
|
"loss": 9.448, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 4.833931777378815, |
|
"grad_norm": 37.477928161621094, |
|
"learning_rate": 2.689072411729503e-05, |
|
"loss": 9.5414, |
|
"step": 26925 |
|
}, |
|
{ |
|
"epoch": 4.838420107719928, |
|
"grad_norm": 36.44997024536133, |
|
"learning_rate": 2.6884739676840217e-05, |
|
"loss": 9.406, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 4.842908438061041, |
|
"grad_norm": 34.89653396606445, |
|
"learning_rate": 2.68787552363854e-05, |
|
"loss": 9.8373, |
|
"step": 26975 |
|
}, |
|
{ |
|
"epoch": 4.847396768402154, |
|
"grad_norm": 34.84752655029297, |
|
"learning_rate": 2.687277079593058e-05, |
|
"loss": 9.3907, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.851885098743267, |
|
"grad_norm": 33.79581069946289, |
|
"learning_rate": 2.6866786355475764e-05, |
|
"loss": 9.4444, |
|
"step": 27025 |
|
}, |
|
{ |
|
"epoch": 4.856373429084381, |
|
"grad_norm": 34.37635040283203, |
|
"learning_rate": 2.6860801915020946e-05, |
|
"loss": 9.5671, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 4.860861759425494, |
|
"grad_norm": 35.371822357177734, |
|
"learning_rate": 2.685481747456613e-05, |
|
"loss": 9.5015, |
|
"step": 27075 |
|
}, |
|
{ |
|
"epoch": 4.865350089766607, |
|
"grad_norm": 38.23295211791992, |
|
"learning_rate": 2.684883303411131e-05, |
|
"loss": 9.3564, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 4.86983842010772, |
|
"grad_norm": 36.58891296386719, |
|
"learning_rate": 2.6842848593656493e-05, |
|
"loss": 9.6106, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 4.874326750448833, |
|
"grad_norm": 38.73398208618164, |
|
"learning_rate": 2.6836864153201676e-05, |
|
"loss": 9.5011, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 4.878815080789947, |
|
"grad_norm": 34.134403228759766, |
|
"learning_rate": 2.6830879712746858e-05, |
|
"loss": 9.5416, |
|
"step": 27175 |
|
}, |
|
{ |
|
"epoch": 4.88330341113106, |
|
"grad_norm": 34.43739318847656, |
|
"learning_rate": 2.682489527229204e-05, |
|
"loss": 9.4023, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 4.8877917414721725, |
|
"grad_norm": 33.59444808959961, |
|
"learning_rate": 2.6818910831837226e-05, |
|
"loss": 9.7006, |
|
"step": 27225 |
|
}, |
|
{ |
|
"epoch": 4.8922800718132855, |
|
"grad_norm": 37.26764678955078, |
|
"learning_rate": 2.681292639138241e-05, |
|
"loss": 9.5618, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 4.8967684021543985, |
|
"grad_norm": 34.99287033081055, |
|
"learning_rate": 2.680694195092759e-05, |
|
"loss": 9.181, |
|
"step": 27275 |
|
}, |
|
{ |
|
"epoch": 4.901256732495511, |
|
"grad_norm": 37.341121673583984, |
|
"learning_rate": 2.680095751047277e-05, |
|
"loss": 9.0808, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 4.905745062836624, |
|
"grad_norm": 31.948301315307617, |
|
"learning_rate": 2.6794973070017952e-05, |
|
"loss": 9.2991, |
|
"step": 27325 |
|
}, |
|
{ |
|
"epoch": 4.910233393177738, |
|
"grad_norm": 31.787092208862305, |
|
"learning_rate": 2.6788988629563134e-05, |
|
"loss": 9.7063, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 4.914721723518851, |
|
"grad_norm": 33.72126007080078, |
|
"learning_rate": 2.678300418910832e-05, |
|
"loss": 9.445, |
|
"step": 27375 |
|
}, |
|
{ |
|
"epoch": 4.919210053859964, |
|
"grad_norm": 35.92157745361328, |
|
"learning_rate": 2.6777019748653503e-05, |
|
"loss": 9.8848, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 4.923698384201077, |
|
"grad_norm": 35.00507354736328, |
|
"learning_rate": 2.6771035308198685e-05, |
|
"loss": 9.4444, |
|
"step": 27425 |
|
}, |
|
{ |
|
"epoch": 4.92818671454219, |
|
"grad_norm": 35.75861358642578, |
|
"learning_rate": 2.6765050867743867e-05, |
|
"loss": 9.8597, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 4.932675044883304, |
|
"grad_norm": 37.223167419433594, |
|
"learning_rate": 2.675906642728905e-05, |
|
"loss": 9.5199, |
|
"step": 27475 |
|
}, |
|
{ |
|
"epoch": 4.937163375224417, |
|
"grad_norm": 34.89140319824219, |
|
"learning_rate": 2.675308198683423e-05, |
|
"loss": 9.5326, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.94165170556553, |
|
"grad_norm": 38.68606948852539, |
|
"learning_rate": 2.6747097546379414e-05, |
|
"loss": 9.6149, |
|
"step": 27525 |
|
}, |
|
{ |
|
"epoch": 4.946140035906643, |
|
"grad_norm": 35.76506805419922, |
|
"learning_rate": 2.6741113105924597e-05, |
|
"loss": 9.2212, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 4.950628366247756, |
|
"grad_norm": 34.05699920654297, |
|
"learning_rate": 2.673512866546978e-05, |
|
"loss": 9.3154, |
|
"step": 27575 |
|
}, |
|
{ |
|
"epoch": 4.955116696588869, |
|
"grad_norm": 35.53427505493164, |
|
"learning_rate": 2.672914422501496e-05, |
|
"loss": 9.6563, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 4.959605026929982, |
|
"grad_norm": 33.76486587524414, |
|
"learning_rate": 2.6723159784560144e-05, |
|
"loss": 9.5415, |
|
"step": 27625 |
|
}, |
|
{ |
|
"epoch": 4.9640933572710955, |
|
"grad_norm": 33.02145767211914, |
|
"learning_rate": 2.671717534410533e-05, |
|
"loss": 9.3559, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 4.968581687612208, |
|
"grad_norm": 38.21705627441406, |
|
"learning_rate": 2.6711190903650512e-05, |
|
"loss": 9.6168, |
|
"step": 27675 |
|
}, |
|
{ |
|
"epoch": 4.973070017953321, |
|
"grad_norm": 37.642417907714844, |
|
"learning_rate": 2.670520646319569e-05, |
|
"loss": 9.5134, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 4.977558348294434, |
|
"grad_norm": 33.83686828613281, |
|
"learning_rate": 2.6699222022740873e-05, |
|
"loss": 9.4817, |
|
"step": 27725 |
|
}, |
|
{ |
|
"epoch": 4.982046678635547, |
|
"grad_norm": 34.98296356201172, |
|
"learning_rate": 2.6693237582286056e-05, |
|
"loss": 9.6689, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 4.986535008976661, |
|
"grad_norm": 35.06865692138672, |
|
"learning_rate": 2.6687253141831238e-05, |
|
"loss": 9.4976, |
|
"step": 27775 |
|
}, |
|
{ |
|
"epoch": 4.991023339317774, |
|
"grad_norm": 34.293113708496094, |
|
"learning_rate": 2.6681268701376424e-05, |
|
"loss": 9.3905, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 4.995511669658887, |
|
"grad_norm": 34.20943832397461, |
|
"learning_rate": 2.6675284260921606e-05, |
|
"loss": 9.6572, |
|
"step": 27825 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 61.015777587890625, |
|
"learning_rate": 2.666929982046679e-05, |
|
"loss": 9.5296, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.06898592786359384, |
|
"eval_f1_macro": 0.011309476538890082, |
|
"eval_f1_micro": 0.06898592786359384, |
|
"eval_f1_weighted": 0.047429091612135786, |
|
"eval_loss": 6.66681432723999, |
|
"eval_precision_macro": 0.010315927310867975, |
|
"eval_precision_micro": 0.06898592786359384, |
|
"eval_precision_weighted": 0.041722656687824905, |
|
"eval_recall_macro": 0.016669465188724426, |
|
"eval_recall_micro": 0.06898592786359384, |
|
"eval_recall_weighted": 0.06898592786359384, |
|
"eval_runtime": 83.3116, |
|
"eval_samples_per_second": 628.64, |
|
"eval_steps_per_second": 19.649, |
|
"step": 27850 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 139250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.8218004536284e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|