|
{ |
|
"best_metric": 0.8567568925713644, |
|
"best_model_checkpoint": "NoCurbRamp/dinov2/checkpoint-4598", |
|
"epoch": 22.0, |
|
"eval_steps": 500, |
|
"global_step": 4598, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04790419161676647, |
|
"grad_norm": 102.78497314453125, |
|
"learning_rate": 1.9230769230769234e-07, |
|
"loss": 3.3285, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09580838323353294, |
|
"grad_norm": 108.06490325927734, |
|
"learning_rate": 3.846153846153847e-07, |
|
"loss": 3.3413, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1437125748502994, |
|
"grad_norm": 168.06533813476562, |
|
"learning_rate": 5.76923076923077e-07, |
|
"loss": 3.1682, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19161676646706588, |
|
"grad_norm": 107.77788543701172, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 2.9011, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23952095808383234, |
|
"grad_norm": 73.95760345458984, |
|
"learning_rate": 9.615384615384617e-07, |
|
"loss": 2.5857, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2874251497005988, |
|
"grad_norm": 105.05018615722656, |
|
"learning_rate": 1.153846153846154e-06, |
|
"loss": 2.5002, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33532934131736525, |
|
"grad_norm": 674.4759521484375, |
|
"learning_rate": 1.3461538461538462e-06, |
|
"loss": 2.4689, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.38323353293413176, |
|
"grad_norm": 141.63890075683594, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 2.3887, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4311377245508982, |
|
"grad_norm": 160.6332550048828, |
|
"learning_rate": 1.7307692307692308e-06, |
|
"loss": 2.2373, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.47904191616766467, |
|
"grad_norm": 702.2935180664062, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 2.1801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5269461077844312, |
|
"grad_norm": 86.12798309326172, |
|
"learning_rate": 2.1153846153846155e-06, |
|
"loss": 2.0948, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5748502994011976, |
|
"grad_norm": 112.54278564453125, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 2.0035, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6227544910179641, |
|
"grad_norm": 187.13577270507812, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.13, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6706586826347305, |
|
"grad_norm": 74.73124694824219, |
|
"learning_rate": 2.6923076923076923e-06, |
|
"loss": 2.0507, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.718562874251497, |
|
"grad_norm": 111.90137481689453, |
|
"learning_rate": 2.8846153846153845e-06, |
|
"loss": 1.9312, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7664670658682635, |
|
"grad_norm": 162.74754333496094, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 2.3011, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8143712574850299, |
|
"grad_norm": 104.16117095947266, |
|
"learning_rate": 3.2692307692307696e-06, |
|
"loss": 1.8005, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8622754491017964, |
|
"grad_norm": 97.45256805419922, |
|
"learning_rate": 3.4615384615384617e-06, |
|
"loss": 1.8074, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9101796407185628, |
|
"grad_norm": 253.20359802246094, |
|
"learning_rate": 3.653846153846154e-06, |
|
"loss": 1.985, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9580838323353293, |
|
"grad_norm": 157.43246459960938, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.7699, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.4299589693546295, |
|
"eval_macro_f1": 0.7858166164735507, |
|
"eval_runtime": 81.1893, |
|
"eval_samples_per_second": 20.582, |
|
"eval_steps_per_second": 2.574, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.0047904191616766, |
|
"grad_norm": 79.56900787353516, |
|
"learning_rate": 4.0384615384615385e-06, |
|
"loss": 2.0621, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0526946107784432, |
|
"grad_norm": 80.78365325927734, |
|
"learning_rate": 4.230769230769231e-06, |
|
"loss": 1.9002, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1005988023952096, |
|
"grad_norm": 131.77723693847656, |
|
"learning_rate": 4.423076923076924e-06, |
|
"loss": 1.8801, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.148502994011976, |
|
"grad_norm": 59.51331329345703, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.6218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1964071856287426, |
|
"grad_norm": 59.175621032714844, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 1.6202, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.244311377245509, |
|
"grad_norm": 175.007568359375, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5911, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2922155688622754, |
|
"grad_norm": 112.08436584472656, |
|
"learning_rate": 5.192307692307693e-06, |
|
"loss": 1.8472, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.340119760479042, |
|
"grad_norm": 49.89094924926758, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 1.7157, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3880239520958084, |
|
"grad_norm": 105.93860626220703, |
|
"learning_rate": 5.576923076923077e-06, |
|
"loss": 1.7682, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4359281437125748, |
|
"grad_norm": 73.6728515625, |
|
"learning_rate": 5.769230769230769e-06, |
|
"loss": 1.6694, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4838323353293412, |
|
"grad_norm": 88.21549224853516, |
|
"learning_rate": 5.961538461538462e-06, |
|
"loss": 1.437, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5317365269461076, |
|
"grad_norm": 95.41169738769531, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.6035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5796407185628742, |
|
"grad_norm": 55.19960021972656, |
|
"learning_rate": 6.3461538461538466e-06, |
|
"loss": 1.6455, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6275449101796409, |
|
"grad_norm": 57.696773529052734, |
|
"learning_rate": 6.538461538461539e-06, |
|
"loss": 1.7315, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6754491017964073, |
|
"grad_norm": 241.8914337158203, |
|
"learning_rate": 6.730769230769232e-06, |
|
"loss": 1.8734, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7233532934131737, |
|
"grad_norm": 129.99835205078125, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 1.7628, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.77125748502994, |
|
"grad_norm": 125.50336456298828, |
|
"learning_rate": 7.115384615384616e-06, |
|
"loss": 1.5134, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8191616766467065, |
|
"grad_norm": 66.20431518554688, |
|
"learning_rate": 7.307692307692308e-06, |
|
"loss": 1.7175, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.867065868263473, |
|
"grad_norm": 45.693199157714844, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.5907, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.9149700598802395, |
|
"grad_norm": 57.71452713012695, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.6598, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.9628742514970061, |
|
"grad_norm": 87.35894012451172, |
|
"learning_rate": 7.884615384615384e-06, |
|
"loss": 1.6557, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.41893911361694336, |
|
"eval_macro_f1": 0.8138112665329834, |
|
"eval_runtime": 41.195, |
|
"eval_samples_per_second": 40.563, |
|
"eval_steps_per_second": 5.073, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.009580838323353, |
|
"grad_norm": 71.44452667236328, |
|
"learning_rate": 8.076923076923077e-06, |
|
"loss": 1.7133, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.05748502994012, |
|
"grad_norm": 53.76054000854492, |
|
"learning_rate": 8.26923076923077e-06, |
|
"loss": 1.648, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1053892215568863, |
|
"grad_norm": 1027.304931640625, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 1.7138, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1532934131736527, |
|
"grad_norm": 226.78587341308594, |
|
"learning_rate": 8.653846153846155e-06, |
|
"loss": 1.589, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.201197604790419, |
|
"grad_norm": 83.36427307128906, |
|
"learning_rate": 8.846153846153847e-06, |
|
"loss": 1.4567, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2491017964071855, |
|
"grad_norm": 56.77524185180664, |
|
"learning_rate": 9.03846153846154e-06, |
|
"loss": 1.6047, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.297005988023952, |
|
"grad_norm": 108.68399047851562, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.9924, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.344910179640719, |
|
"grad_norm": 82.6905288696289, |
|
"learning_rate": 9.423076923076923e-06, |
|
"loss": 2.0933, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.392814371257485, |
|
"grad_norm": 29.161964416503906, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 1.605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4407185628742516, |
|
"grad_norm": 37.83173370361328, |
|
"learning_rate": 9.807692307692308e-06, |
|
"loss": 1.7725, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.488622754491018, |
|
"grad_norm": 49.54322052001953, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4123, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5365269461077844, |
|
"grad_norm": 67.74500274658203, |
|
"learning_rate": 9.97863247863248e-06, |
|
"loss": 1.5403, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.584431137724551, |
|
"grad_norm": 55.931190490722656, |
|
"learning_rate": 9.957264957264958e-06, |
|
"loss": 1.4283, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.632335329341317, |
|
"grad_norm": 47.531734466552734, |
|
"learning_rate": 9.935897435897437e-06, |
|
"loss": 1.4491, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.680239520958084, |
|
"grad_norm": 30.019596099853516, |
|
"learning_rate": 9.914529914529915e-06, |
|
"loss": 1.4842, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7281437125748504, |
|
"grad_norm": 67.44615173339844, |
|
"learning_rate": 9.893162393162394e-06, |
|
"loss": 1.6565, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.776047904191617, |
|
"grad_norm": 54.778350830078125, |
|
"learning_rate": 9.871794871794872e-06, |
|
"loss": 1.4563, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.8239520958083832, |
|
"grad_norm": 54.04096221923828, |
|
"learning_rate": 9.850427350427351e-06, |
|
"loss": 1.5794, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.8718562874251496, |
|
"grad_norm": 35.025691986083984, |
|
"learning_rate": 9.82905982905983e-06, |
|
"loss": 1.475, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.919760479041916, |
|
"grad_norm": 94.27930450439453, |
|
"learning_rate": 9.807692307692308e-06, |
|
"loss": 1.7062, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.9676646706586824, |
|
"grad_norm": 83.62496185302734, |
|
"learning_rate": 9.786324786324787e-06, |
|
"loss": 1.3654, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.4371185898780823, |
|
"eval_macro_f1": 0.7854326010045019, |
|
"eval_runtime": 41.3851, |
|
"eval_samples_per_second": 40.377, |
|
"eval_steps_per_second": 5.05, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 3.01437125748503, |
|
"grad_norm": 270.2382507324219, |
|
"learning_rate": 9.764957264957265e-06, |
|
"loss": 1.4214, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.0622754491017963, |
|
"grad_norm": 39.00271224975586, |
|
"learning_rate": 9.743589743589744e-06, |
|
"loss": 1.4744, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.1101796407185627, |
|
"grad_norm": 32.14289474487305, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.6232, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.1580838323353295, |
|
"grad_norm": 39.31702423095703, |
|
"learning_rate": 9.700854700854701e-06, |
|
"loss": 1.4988, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.205988023952096, |
|
"grad_norm": 65.2395248413086, |
|
"learning_rate": 9.67948717948718e-06, |
|
"loss": 1.26, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.2538922155688623, |
|
"grad_norm": 57.717525482177734, |
|
"learning_rate": 9.658119658119659e-06, |
|
"loss": 1.331, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.3017964071856287, |
|
"grad_norm": 69.02164459228516, |
|
"learning_rate": 9.636752136752137e-06, |
|
"loss": 1.3799, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.349700598802395, |
|
"grad_norm": 60.07762145996094, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 1.6609, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.3976047904191615, |
|
"grad_norm": 44.97191619873047, |
|
"learning_rate": 9.594017094017094e-06, |
|
"loss": 1.4233, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.4455089820359284, |
|
"grad_norm": 65.7463150024414, |
|
"learning_rate": 9.572649572649575e-06, |
|
"loss": 1.3724, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.4934131736526948, |
|
"grad_norm": 57.15474319458008, |
|
"learning_rate": 9.551282051282053e-06, |
|
"loss": 1.6036, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.541317365269461, |
|
"grad_norm": 64.17628479003906, |
|
"learning_rate": 9.52991452991453e-06, |
|
"loss": 1.3152, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.5892215568862276, |
|
"grad_norm": 69.88690948486328, |
|
"learning_rate": 9.508547008547009e-06, |
|
"loss": 1.4094, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.637125748502994, |
|
"grad_norm": 98.0072021484375, |
|
"learning_rate": 9.487179487179487e-06, |
|
"loss": 1.441, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.6850299401197604, |
|
"grad_norm": 47.96156692504883, |
|
"learning_rate": 9.465811965811966e-06, |
|
"loss": 1.7361, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.7329341317365268, |
|
"grad_norm": 52.17365646362305, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.1775, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.7808383233532936, |
|
"grad_norm": 552.5225219726562, |
|
"learning_rate": 9.423076923076923e-06, |
|
"loss": 1.2409, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.82874251497006, |
|
"grad_norm": 34.344207763671875, |
|
"learning_rate": 9.401709401709402e-06, |
|
"loss": 1.4177, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.8766467065868264, |
|
"grad_norm": 112.69720458984375, |
|
"learning_rate": 9.38034188034188e-06, |
|
"loss": 1.4948, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.924550898203593, |
|
"grad_norm": 94.27716064453125, |
|
"learning_rate": 9.358974358974359e-06, |
|
"loss": 1.5927, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.972455089820359, |
|
"grad_norm": 48.06044387817383, |
|
"learning_rate": 9.33760683760684e-06, |
|
"loss": 1.4711, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.39647647738456726, |
|
"eval_macro_f1": 0.8267623951048628, |
|
"eval_runtime": 44.3505, |
|
"eval_samples_per_second": 37.677, |
|
"eval_steps_per_second": 4.712, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 4.019161676646706, |
|
"grad_norm": 130.36834716796875, |
|
"learning_rate": 9.316239316239318e-06, |
|
"loss": 1.8227, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.067065868263473, |
|
"grad_norm": 52.67673110961914, |
|
"learning_rate": 9.294871794871796e-06, |
|
"loss": 1.2007, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.11497005988024, |
|
"grad_norm": 38.394187927246094, |
|
"learning_rate": 9.273504273504275e-06, |
|
"loss": 1.2584, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.162874251497006, |
|
"grad_norm": 293.8218994140625, |
|
"learning_rate": 9.252136752136754e-06, |
|
"loss": 1.1846, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.210778443113773, |
|
"grad_norm": 85.88053894042969, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.3184, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.258682634730539, |
|
"grad_norm": 45.364723205566406, |
|
"learning_rate": 9.20940170940171e-06, |
|
"loss": 1.2298, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.3065868263473055, |
|
"grad_norm": 43.60586929321289, |
|
"learning_rate": 9.188034188034188e-06, |
|
"loss": 1.1389, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.3544910179640715, |
|
"grad_norm": 85.17373657226562, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.1531, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.402395209580838, |
|
"grad_norm": 38.943946838378906, |
|
"learning_rate": 9.145299145299145e-06, |
|
"loss": 1.2874, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.450299401197605, |
|
"grad_norm": 29.44234275817871, |
|
"learning_rate": 9.123931623931624e-06, |
|
"loss": 1.3225, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.498203592814371, |
|
"grad_norm": 119.2593002319336, |
|
"learning_rate": 9.102564102564104e-06, |
|
"loss": 1.5807, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.546107784431138, |
|
"grad_norm": 30.237037658691406, |
|
"learning_rate": 9.081196581196583e-06, |
|
"loss": 0.9587, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.594011976047904, |
|
"grad_norm": 100.8051986694336, |
|
"learning_rate": 9.059829059829061e-06, |
|
"loss": 1.1353, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.641916167664671, |
|
"grad_norm": 64.73004150390625, |
|
"learning_rate": 9.03846153846154e-06, |
|
"loss": 1.271, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.689820359281438, |
|
"grad_norm": 55.64018249511719, |
|
"learning_rate": 9.017094017094018e-06, |
|
"loss": 1.2463, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.7377245508982035, |
|
"grad_norm": 32.8985710144043, |
|
"learning_rate": 8.995726495726497e-06, |
|
"loss": 1.2084, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.78562874251497, |
|
"grad_norm": 71.72270202636719, |
|
"learning_rate": 8.974358974358976e-06, |
|
"loss": 0.9659, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.833532934131736, |
|
"grad_norm": 60.666831970214844, |
|
"learning_rate": 8.952991452991454e-06, |
|
"loss": 1.037, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.881437125748503, |
|
"grad_norm": 65.69544219970703, |
|
"learning_rate": 8.931623931623933e-06, |
|
"loss": 1.4125, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.929341317365269, |
|
"grad_norm": 78.90387725830078, |
|
"learning_rate": 8.910256410256411e-06, |
|
"loss": 1.1816, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.977245508982036, |
|
"grad_norm": 51.65810775756836, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.1849, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.4067809581756592, |
|
"eval_macro_f1": 0.838654650788542, |
|
"eval_runtime": 53.3705, |
|
"eval_samples_per_second": 31.309, |
|
"eval_steps_per_second": 3.916, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 5.023952095808383, |
|
"grad_norm": 61.138328552246094, |
|
"learning_rate": 8.867521367521369e-06, |
|
"loss": 1.1483, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.07185628742515, |
|
"grad_norm": 61.026058197021484, |
|
"learning_rate": 8.846153846153847e-06, |
|
"loss": 0.7873, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.119760479041916, |
|
"grad_norm": 100.50831604003906, |
|
"learning_rate": 8.824786324786326e-06, |
|
"loss": 0.9951, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.167664670658683, |
|
"grad_norm": 70.29264068603516, |
|
"learning_rate": 8.803418803418804e-06, |
|
"loss": 1.1178, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.2155688622754495, |
|
"grad_norm": 62.474613189697266, |
|
"learning_rate": 8.782051282051283e-06, |
|
"loss": 1.0041, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.263473053892215, |
|
"grad_norm": 32.0721321105957, |
|
"learning_rate": 8.760683760683762e-06, |
|
"loss": 1.0825, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.311377245508982, |
|
"grad_norm": 78.72416687011719, |
|
"learning_rate": 8.73931623931624e-06, |
|
"loss": 1.2565, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.359281437125748, |
|
"grad_norm": 119.30426788330078, |
|
"learning_rate": 8.717948717948719e-06, |
|
"loss": 1.3718, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.407185628742515, |
|
"grad_norm": 45.799991607666016, |
|
"learning_rate": 8.696581196581197e-06, |
|
"loss": 1.2321, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.455089820359281, |
|
"grad_norm": 25.331722259521484, |
|
"learning_rate": 8.675213675213676e-06, |
|
"loss": 0.9521, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.502994011976048, |
|
"grad_norm": 96.21393585205078, |
|
"learning_rate": 8.653846153846155e-06, |
|
"loss": 0.9443, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.550898203592815, |
|
"grad_norm": 45.29869079589844, |
|
"learning_rate": 8.632478632478633e-06, |
|
"loss": 0.8862, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.598802395209581, |
|
"grad_norm": 56.898780822753906, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.0529, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.6467065868263475, |
|
"grad_norm": 63.27139663696289, |
|
"learning_rate": 8.58974358974359e-06, |
|
"loss": 1.0086, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.6946107784431135, |
|
"grad_norm": 64.9346923828125, |
|
"learning_rate": 8.568376068376069e-06, |
|
"loss": 1.1777, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.74251497005988, |
|
"grad_norm": 34.74249267578125, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 1.1921, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.790419161676647, |
|
"grad_norm": 85.9405517578125, |
|
"learning_rate": 8.525641025641026e-06, |
|
"loss": 1.0274, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.838323353293413, |
|
"grad_norm": 51.7358512878418, |
|
"learning_rate": 8.504273504273505e-06, |
|
"loss": 1.088, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 5.88622754491018, |
|
"grad_norm": 43.08283615112305, |
|
"learning_rate": 8.482905982905983e-06, |
|
"loss": 0.8124, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 5.934131736526946, |
|
"grad_norm": 90.1866455078125, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 1.0978, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.982035928143713, |
|
"grad_norm": 31.511568069458008, |
|
"learning_rate": 8.44017094017094e-06, |
|
"loss": 1.2058, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.40154314041137695, |
|
"eval_macro_f1": 0.828210643143795, |
|
"eval_runtime": 65.0826, |
|
"eval_samples_per_second": 25.675, |
|
"eval_steps_per_second": 3.211, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 6.02874251497006, |
|
"grad_norm": 24.663558959960938, |
|
"learning_rate": 8.41880341880342e-06, |
|
"loss": 0.7289, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.076646706586827, |
|
"grad_norm": 32.12767028808594, |
|
"learning_rate": 8.397435897435898e-06, |
|
"loss": 0.9549, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.1245508982035926, |
|
"grad_norm": 45.44089126586914, |
|
"learning_rate": 8.376068376068377e-06, |
|
"loss": 0.8573, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.172455089820359, |
|
"grad_norm": 95.96544647216797, |
|
"learning_rate": 8.354700854700855e-06, |
|
"loss": 0.7697, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.220359281437125, |
|
"grad_norm": 88.79412841796875, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9044, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.268263473053892, |
|
"grad_norm": 57.126304626464844, |
|
"learning_rate": 8.311965811965812e-06, |
|
"loss": 0.7112, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.316167664670659, |
|
"grad_norm": 76.69857788085938, |
|
"learning_rate": 8.290598290598293e-06, |
|
"loss": 0.7156, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.364071856287425, |
|
"grad_norm": 54.02803039550781, |
|
"learning_rate": 8.26923076923077e-06, |
|
"loss": 0.8275, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.411976047904192, |
|
"grad_norm": 60.05768966674805, |
|
"learning_rate": 8.247863247863248e-06, |
|
"loss": 0.8879, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.459880239520958, |
|
"grad_norm": 80.0739517211914, |
|
"learning_rate": 8.226495726495727e-06, |
|
"loss": 0.9576, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.507784431137725, |
|
"grad_norm": 44.03201675415039, |
|
"learning_rate": 8.205128205128205e-06, |
|
"loss": 0.996, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.5556886227544915, |
|
"grad_norm": 54.739044189453125, |
|
"learning_rate": 8.183760683760684e-06, |
|
"loss": 0.8699, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.6035928143712574, |
|
"grad_norm": 44.991119384765625, |
|
"learning_rate": 8.162393162393163e-06, |
|
"loss": 0.894, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.651497005988024, |
|
"grad_norm": 58.03591537475586, |
|
"learning_rate": 8.141025641025641e-06, |
|
"loss": 0.9078, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.69940119760479, |
|
"grad_norm": 51.95082473754883, |
|
"learning_rate": 8.11965811965812e-06, |
|
"loss": 0.8032, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.747305389221557, |
|
"grad_norm": 47.06685256958008, |
|
"learning_rate": 8.098290598290598e-06, |
|
"loss": 0.8379, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.795209580838323, |
|
"grad_norm": 32.85672378540039, |
|
"learning_rate": 8.076923076923077e-06, |
|
"loss": 0.6956, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.84311377245509, |
|
"grad_norm": 72.61941528320312, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.8305, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 6.891017964071857, |
|
"grad_norm": 50.65372085571289, |
|
"learning_rate": 8.034188034188036e-06, |
|
"loss": 1.294, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 6.938922155688623, |
|
"grad_norm": 30.090587615966797, |
|
"learning_rate": 8.012820512820515e-06, |
|
"loss": 0.7265, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6.9868263473053895, |
|
"grad_norm": 61.50442886352539, |
|
"learning_rate": 7.991452991452993e-06, |
|
"loss": 0.9642, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.4302707612514496, |
|
"eval_macro_f1": 0.8384768009768009, |
|
"eval_runtime": 44.1392, |
|
"eval_samples_per_second": 37.857, |
|
"eval_steps_per_second": 4.735, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 7.0335329341317365, |
|
"grad_norm": 51.26984786987305, |
|
"learning_rate": 7.970085470085472e-06, |
|
"loss": 0.5057, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.081437125748503, |
|
"grad_norm": 40.46345138549805, |
|
"learning_rate": 7.948717948717949e-06, |
|
"loss": 0.6524, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.129341317365269, |
|
"grad_norm": 86.76705169677734, |
|
"learning_rate": 7.927350427350427e-06, |
|
"loss": 0.7067, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.177245508982036, |
|
"grad_norm": 47.121002197265625, |
|
"learning_rate": 7.905982905982906e-06, |
|
"loss": 0.6437, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.225149700598802, |
|
"grad_norm": 63.37303924560547, |
|
"learning_rate": 7.884615384615384e-06, |
|
"loss": 0.7471, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.273053892215569, |
|
"grad_norm": 47.41765594482422, |
|
"learning_rate": 7.863247863247863e-06, |
|
"loss": 0.8865, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.320958083832335, |
|
"grad_norm": 49.318092346191406, |
|
"learning_rate": 7.841880341880342e-06, |
|
"loss": 0.804, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.368862275449102, |
|
"grad_norm": 45.841331481933594, |
|
"learning_rate": 7.820512820512822e-06, |
|
"loss": 0.8108, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.416766467065869, |
|
"grad_norm": 50.48727035522461, |
|
"learning_rate": 7.7991452991453e-06, |
|
"loss": 0.8056, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.464670658682635, |
|
"grad_norm": 32.53761291503906, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.652, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.512574850299401, |
|
"grad_norm": 62.68672561645508, |
|
"learning_rate": 7.756410256410258e-06, |
|
"loss": 1.0458, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.560479041916167, |
|
"grad_norm": 45.77931594848633, |
|
"learning_rate": 7.735042735042736e-06, |
|
"loss": 0.712, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.608383233532934, |
|
"grad_norm": 46.453208923339844, |
|
"learning_rate": 7.713675213675215e-06, |
|
"loss": 0.8081, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.656287425149701, |
|
"grad_norm": 97.16567993164062, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.7669, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.704191616766467, |
|
"grad_norm": 40.78181076049805, |
|
"learning_rate": 7.670940170940172e-06, |
|
"loss": 0.7844, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.752095808383234, |
|
"grad_norm": 72.332763671875, |
|
"learning_rate": 7.649572649572649e-06, |
|
"loss": 0.7251, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 61.692543029785156, |
|
"learning_rate": 7.6282051282051286e-06, |
|
"loss": 0.8466, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 7.847904191616767, |
|
"grad_norm": 76.77676391601562, |
|
"learning_rate": 7.606837606837607e-06, |
|
"loss": 0.8042, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 7.895808383233533, |
|
"grad_norm": 29.141651153564453, |
|
"learning_rate": 7.585470085470086e-06, |
|
"loss": 0.7552, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7.9437125748502995, |
|
"grad_norm": 58.72688293457031, |
|
"learning_rate": 7.564102564102564e-06, |
|
"loss": 0.5293, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 7.991616766467066, |
|
"grad_norm": 52.79533386230469, |
|
"learning_rate": 7.542735042735043e-06, |
|
"loss": 0.7987, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.4819260835647583, |
|
"eval_macro_f1": 0.8445069931839906, |
|
"eval_runtime": 49.8575, |
|
"eval_samples_per_second": 33.516, |
|
"eval_steps_per_second": 4.192, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 8.038323353293412, |
|
"grad_norm": 15.822467803955078, |
|
"learning_rate": 7.521367521367522e-06, |
|
"loss": 0.4624, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.08622754491018, |
|
"grad_norm": 17.05672264099121, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.518, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.134131736526946, |
|
"grad_norm": 404.2013244628906, |
|
"learning_rate": 7.47863247863248e-06, |
|
"loss": 0.6137, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.182035928143712, |
|
"grad_norm": 72.656982421875, |
|
"learning_rate": 7.457264957264958e-06, |
|
"loss": 0.7263, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.22994011976048, |
|
"grad_norm": 42.37464141845703, |
|
"learning_rate": 7.435897435897437e-06, |
|
"loss": 0.5209, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.277844311377246, |
|
"grad_norm": 41.12651062011719, |
|
"learning_rate": 7.4145299145299155e-06, |
|
"loss": 0.6419, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.325748502994012, |
|
"grad_norm": 33.06435012817383, |
|
"learning_rate": 7.393162393162394e-06, |
|
"loss": 0.6121, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.373652694610778, |
|
"grad_norm": 54.51243209838867, |
|
"learning_rate": 7.371794871794873e-06, |
|
"loss": 0.554, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.421556886227545, |
|
"grad_norm": 23.176687240600586, |
|
"learning_rate": 7.350427350427351e-06, |
|
"loss": 0.8463, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.469461077844311, |
|
"grad_norm": 27.295547485351562, |
|
"learning_rate": 7.329059829059829e-06, |
|
"loss": 0.5512, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.517365269461077, |
|
"grad_norm": 19.80516242980957, |
|
"learning_rate": 7.307692307692308e-06, |
|
"loss": 0.7343, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.565269461077845, |
|
"grad_norm": 100.30529022216797, |
|
"learning_rate": 7.286324786324786e-06, |
|
"loss": 0.5751, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.613173652694611, |
|
"grad_norm": 40.754974365234375, |
|
"learning_rate": 7.264957264957266e-06, |
|
"loss": 0.8093, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.661077844311377, |
|
"grad_norm": 76.589111328125, |
|
"learning_rate": 7.243589743589744e-06, |
|
"loss": 0.7388, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.708982035928143, |
|
"grad_norm": 54.932838439941406, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.6432, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.75688622754491, |
|
"grad_norm": 54.24689483642578, |
|
"learning_rate": 7.2008547008547015e-06, |
|
"loss": 0.8261, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.804790419161677, |
|
"grad_norm": 26.55350112915039, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 0.701, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 8.852694610778443, |
|
"grad_norm": 28.47566032409668, |
|
"learning_rate": 7.158119658119659e-06, |
|
"loss": 0.8115, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 8.90059880239521, |
|
"grad_norm": 60.1458740234375, |
|
"learning_rate": 7.136752136752137e-06, |
|
"loss": 0.7178, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 8.948502994011976, |
|
"grad_norm": 47.09638214111328, |
|
"learning_rate": 7.115384615384616e-06, |
|
"loss": 0.8113, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 8.996407185628742, |
|
"grad_norm": 33.51266098022461, |
|
"learning_rate": 7.0940170940170945e-06, |
|
"loss": 0.7928, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.41668668389320374, |
|
"eval_macro_f1": 0.8441824722148172, |
|
"eval_runtime": 44.5976, |
|
"eval_samples_per_second": 37.468, |
|
"eval_steps_per_second": 4.686, |
|
"step": 1881 |
|
}, |
|
{ |
|
"epoch": 9.04311377245509, |
|
"grad_norm": 81.31795501708984, |
|
"learning_rate": 7.072649572649574e-06, |
|
"loss": 0.6389, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.091017964071856, |
|
"grad_norm": 13.542675971984863, |
|
"learning_rate": 7.051282051282053e-06, |
|
"loss": 0.5636, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.138922155688622, |
|
"grad_norm": 82.46122741699219, |
|
"learning_rate": 7.02991452991453e-06, |
|
"loss": 0.6297, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.18682634730539, |
|
"grad_norm": 47.003292083740234, |
|
"learning_rate": 7.008547008547009e-06, |
|
"loss": 0.5565, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.234730538922156, |
|
"grad_norm": 26.577322006225586, |
|
"learning_rate": 6.9871794871794876e-06, |
|
"loss": 0.9967, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.282634730538922, |
|
"grad_norm": 47.41004180908203, |
|
"learning_rate": 6.965811965811966e-06, |
|
"loss": 0.5435, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.33053892215569, |
|
"grad_norm": 64.1046371459961, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.6405, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.378443113772455, |
|
"grad_norm": 48.981136322021484, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 0.5692, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.426347305389221, |
|
"grad_norm": 113.02758026123047, |
|
"learning_rate": 6.901709401709402e-06, |
|
"loss": 0.5958, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.474251497005987, |
|
"grad_norm": 87.0408935546875, |
|
"learning_rate": 6.880341880341881e-06, |
|
"loss": 0.7034, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.522155688622755, |
|
"grad_norm": 59.14445495605469, |
|
"learning_rate": 6.858974358974359e-06, |
|
"loss": 0.7209, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.570059880239521, |
|
"grad_norm": 53.770408630371094, |
|
"learning_rate": 6.837606837606839e-06, |
|
"loss": 0.8953, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.617964071856287, |
|
"grad_norm": 46.554481506347656, |
|
"learning_rate": 6.816239316239317e-06, |
|
"loss": 0.7084, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.665868263473055, |
|
"grad_norm": 41.41537857055664, |
|
"learning_rate": 6.794871794871796e-06, |
|
"loss": 0.5916, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.71377245508982, |
|
"grad_norm": 46.17745590209961, |
|
"learning_rate": 6.7735042735042745e-06, |
|
"loss": 0.5613, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.761676646706587, |
|
"grad_norm": 61.96057891845703, |
|
"learning_rate": 6.752136752136753e-06, |
|
"loss": 0.7686, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 9.809580838323352, |
|
"grad_norm": 17.55687713623047, |
|
"learning_rate": 6.730769230769232e-06, |
|
"loss": 0.7887, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 9.85748502994012, |
|
"grad_norm": 45.83679962158203, |
|
"learning_rate": 6.7094017094017094e-06, |
|
"loss": 0.4342, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 9.905389221556886, |
|
"grad_norm": 65.41094970703125, |
|
"learning_rate": 6.688034188034188e-06, |
|
"loss": 0.7364, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 9.953293413173652, |
|
"grad_norm": 63.39105987548828, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.5624, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 35.72822189331055, |
|
"learning_rate": 6.645299145299145e-06, |
|
"loss": 0.5399, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.5035926103591919, |
|
"eval_macro_f1": 0.8252431536013625, |
|
"eval_runtime": 43.3409, |
|
"eval_samples_per_second": 38.555, |
|
"eval_steps_per_second": 4.822, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.047904191616766, |
|
"grad_norm": 43.547481536865234, |
|
"learning_rate": 6.623931623931624e-06, |
|
"loss": 0.6161, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.095808383233534, |
|
"grad_norm": 9.250253677368164, |
|
"learning_rate": 6.602564102564103e-06, |
|
"loss": 0.6646, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.1437125748503, |
|
"grad_norm": 37.02254867553711, |
|
"learning_rate": 6.581196581196582e-06, |
|
"loss": 0.4968, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 10.191616766467066, |
|
"grad_norm": 54.5052490234375, |
|
"learning_rate": 6.5598290598290605e-06, |
|
"loss": 0.3927, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 10.239520958083832, |
|
"grad_norm": 64.98939514160156, |
|
"learning_rate": 6.538461538461539e-06, |
|
"loss": 0.5233, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.2874251497006, |
|
"grad_norm": 41.95616912841797, |
|
"learning_rate": 6.517094017094018e-06, |
|
"loss": 0.7305, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.335329341317365, |
|
"grad_norm": 26.417930603027344, |
|
"learning_rate": 6.495726495726496e-06, |
|
"loss": 0.5581, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 10.383233532934131, |
|
"grad_norm": 60.45222473144531, |
|
"learning_rate": 6.474358974358975e-06, |
|
"loss": 0.5982, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 10.431137724550899, |
|
"grad_norm": 47.69218826293945, |
|
"learning_rate": 6.4529914529914535e-06, |
|
"loss": 0.3558, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 10.479041916167665, |
|
"grad_norm": 33.18900680541992, |
|
"learning_rate": 6.431623931623933e-06, |
|
"loss": 0.6521, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 10.52694610778443, |
|
"grad_norm": 39.501426696777344, |
|
"learning_rate": 6.410256410256412e-06, |
|
"loss": 0.6646, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.574850299401197, |
|
"grad_norm": 61.5817985534668, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.6448, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 10.622754491017965, |
|
"grad_norm": 37.70832061767578, |
|
"learning_rate": 6.367521367521368e-06, |
|
"loss": 0.4272, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 10.67065868263473, |
|
"grad_norm": 56.18067169189453, |
|
"learning_rate": 6.3461538461538466e-06, |
|
"loss": 0.5429, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 10.718562874251496, |
|
"grad_norm": 38.271278381347656, |
|
"learning_rate": 6.324786324786325e-06, |
|
"loss": 0.503, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 10.766467065868264, |
|
"grad_norm": 53.65212631225586, |
|
"learning_rate": 6.303418803418804e-06, |
|
"loss": 0.5207, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.81437125748503, |
|
"grad_norm": 15.48988151550293, |
|
"learning_rate": 6.282051282051282e-06, |
|
"loss": 0.6707, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 10.862275449101796, |
|
"grad_norm": 44.31208038330078, |
|
"learning_rate": 6.260683760683761e-06, |
|
"loss": 0.3833, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 10.910179640718562, |
|
"grad_norm": 48.97806167602539, |
|
"learning_rate": 6.23931623931624e-06, |
|
"loss": 0.5623, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 10.95808383233533, |
|
"grad_norm": 73.36396789550781, |
|
"learning_rate": 6.217948717948718e-06, |
|
"loss": 0.6053, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.4976274073123932, |
|
"eval_macro_f1": 0.8447303083464661, |
|
"eval_runtime": 44.1148, |
|
"eval_samples_per_second": 37.878, |
|
"eval_steps_per_second": 4.738, |
|
"step": 2299 |
|
}, |
|
{ |
|
"epoch": 11.004790419161676, |
|
"grad_norm": 52.19700622558594, |
|
"learning_rate": 6.196581196581198e-06, |
|
"loss": 0.4362, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.052694610778444, |
|
"grad_norm": 12.376163482666016, |
|
"learning_rate": 6.175213675213676e-06, |
|
"loss": 0.2856, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 11.10059880239521, |
|
"grad_norm": 55.741554260253906, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 0.4785, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 11.148502994011976, |
|
"grad_norm": 23.254854202270508, |
|
"learning_rate": 6.1324786324786335e-06, |
|
"loss": 0.5646, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 11.196407185628743, |
|
"grad_norm": 54.249332427978516, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.4413, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 11.24431137724551, |
|
"grad_norm": 91.06517791748047, |
|
"learning_rate": 6.08974358974359e-06, |
|
"loss": 0.4921, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.292215568862275, |
|
"grad_norm": 12.940735816955566, |
|
"learning_rate": 6.0683760683760684e-06, |
|
"loss": 0.5419, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 11.340119760479041, |
|
"grad_norm": 88.24459075927734, |
|
"learning_rate": 6.047008547008547e-06, |
|
"loss": 0.582, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 11.388023952095809, |
|
"grad_norm": 28.848173141479492, |
|
"learning_rate": 6.025641025641026e-06, |
|
"loss": 0.5232, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 11.435928143712575, |
|
"grad_norm": 18.56818389892578, |
|
"learning_rate": 6.004273504273504e-06, |
|
"loss": 0.4061, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 11.48383233532934, |
|
"grad_norm": 88.79745483398438, |
|
"learning_rate": 5.982905982905983e-06, |
|
"loss": 0.4603, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.531736526946109, |
|
"grad_norm": 59.039039611816406, |
|
"learning_rate": 5.961538461538462e-06, |
|
"loss": 0.4677, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 11.579640718562874, |
|
"grad_norm": 60.00382614135742, |
|
"learning_rate": 5.940170940170941e-06, |
|
"loss": 0.3458, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 11.62754491017964, |
|
"grad_norm": 37.48514938354492, |
|
"learning_rate": 5.9188034188034195e-06, |
|
"loss": 0.6475, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 11.675449101796406, |
|
"grad_norm": 53.149925231933594, |
|
"learning_rate": 5.897435897435898e-06, |
|
"loss": 0.4033, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 11.723353293413174, |
|
"grad_norm": 57.217735290527344, |
|
"learning_rate": 5.876068376068377e-06, |
|
"loss": 0.5827, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 11.77125748502994, |
|
"grad_norm": 32.07596206665039, |
|
"learning_rate": 5.854700854700855e-06, |
|
"loss": 0.4473, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 11.819161676646706, |
|
"grad_norm": 39.394474029541016, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.6156, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 11.867065868263474, |
|
"grad_norm": 42.22713088989258, |
|
"learning_rate": 5.8119658119658126e-06, |
|
"loss": 0.5249, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 11.91497005988024, |
|
"grad_norm": 44.544944763183594, |
|
"learning_rate": 5.790598290598292e-06, |
|
"loss": 0.5426, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 11.962874251497006, |
|
"grad_norm": 37.793052673339844, |
|
"learning_rate": 5.769230769230769e-06, |
|
"loss": 0.7017, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.5270896553993225, |
|
"eval_macro_f1": 0.8419574495547526, |
|
"eval_runtime": 52.4362, |
|
"eval_samples_per_second": 31.867, |
|
"eval_steps_per_second": 3.986, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 12.009580838323354, |
|
"grad_norm": 33.670814514160156, |
|
"learning_rate": 5.7478632478632475e-06, |
|
"loss": 0.348, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 12.05748502994012, |
|
"grad_norm": 16.13005256652832, |
|
"learning_rate": 5.726495726495727e-06, |
|
"loss": 0.3518, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 12.105389221556885, |
|
"grad_norm": 117.29934692382812, |
|
"learning_rate": 5.705128205128206e-06, |
|
"loss": 0.4044, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 12.153293413173653, |
|
"grad_norm": 66.2587661743164, |
|
"learning_rate": 5.683760683760684e-06, |
|
"loss": 0.3835, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 12.20119760479042, |
|
"grad_norm": 39.18204879760742, |
|
"learning_rate": 5.662393162393163e-06, |
|
"loss": 0.5428, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.249101796407185, |
|
"grad_norm": 29.139354705810547, |
|
"learning_rate": 5.641025641025641e-06, |
|
"loss": 0.2677, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 12.297005988023953, |
|
"grad_norm": 49.968666076660156, |
|
"learning_rate": 5.61965811965812e-06, |
|
"loss": 0.4607, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 12.344910179640719, |
|
"grad_norm": 30.069171905517578, |
|
"learning_rate": 5.598290598290599e-06, |
|
"loss": 0.4371, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 12.392814371257485, |
|
"grad_norm": 21.969675064086914, |
|
"learning_rate": 5.576923076923077e-06, |
|
"loss": 0.473, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 12.44071856287425, |
|
"grad_norm": 76.71914672851562, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.3471, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.488622754491018, |
|
"grad_norm": 50.17422866821289, |
|
"learning_rate": 5.534188034188035e-06, |
|
"loss": 0.7815, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 12.536526946107784, |
|
"grad_norm": 44.04591751098633, |
|
"learning_rate": 5.512820512820514e-06, |
|
"loss": 0.5847, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 12.58443113772455, |
|
"grad_norm": 17.07801055908203, |
|
"learning_rate": 5.4914529914529925e-06, |
|
"loss": 0.5836, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 12.632335329341318, |
|
"grad_norm": 37.61981201171875, |
|
"learning_rate": 5.470085470085471e-06, |
|
"loss": 0.6164, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 12.680239520958084, |
|
"grad_norm": 45.67870330810547, |
|
"learning_rate": 5.448717948717949e-06, |
|
"loss": 0.4233, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 12.72814371257485, |
|
"grad_norm": 6.8784918785095215, |
|
"learning_rate": 5.4273504273504275e-06, |
|
"loss": 0.4341, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 12.776047904191616, |
|
"grad_norm": 106.60427856445312, |
|
"learning_rate": 5.405982905982906e-06, |
|
"loss": 0.4442, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 12.823952095808384, |
|
"grad_norm": 10.980769157409668, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 0.3069, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 12.87185628742515, |
|
"grad_norm": 108.9623794555664, |
|
"learning_rate": 5.363247863247863e-06, |
|
"loss": 0.461, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 12.919760479041916, |
|
"grad_norm": 49.222408294677734, |
|
"learning_rate": 5.341880341880342e-06, |
|
"loss": 0.5779, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.967664670658683, |
|
"grad_norm": 77.78028869628906, |
|
"learning_rate": 5.320512820512821e-06, |
|
"loss": 0.5748, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.6038709878921509, |
|
"eval_macro_f1": 0.82953734826579, |
|
"eval_runtime": 45.778, |
|
"eval_samples_per_second": 36.502, |
|
"eval_steps_per_second": 4.566, |
|
"step": 2717 |
|
}, |
|
{ |
|
"epoch": 13.01437125748503, |
|
"grad_norm": 64.35317993164062, |
|
"learning_rate": 5.2991452991453e-06, |
|
"loss": 0.4684, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 13.062275449101797, |
|
"grad_norm": 27.278182983398438, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.2145, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 13.110179640718563, |
|
"grad_norm": 12.438913345336914, |
|
"learning_rate": 5.256410256410257e-06, |
|
"loss": 0.3516, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 13.158083832335329, |
|
"grad_norm": 111.6015396118164, |
|
"learning_rate": 5.235042735042736e-06, |
|
"loss": 0.4645, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.205988023952095, |
|
"grad_norm": 29.416948318481445, |
|
"learning_rate": 5.213675213675214e-06, |
|
"loss": 0.4283, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 13.253892215568863, |
|
"grad_norm": 42.40279006958008, |
|
"learning_rate": 5.192307692307693e-06, |
|
"loss": 0.3558, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 13.301796407185629, |
|
"grad_norm": 90.29447937011719, |
|
"learning_rate": 5.1709401709401716e-06, |
|
"loss": 0.4245, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 13.349700598802395, |
|
"grad_norm": 51.8712272644043, |
|
"learning_rate": 5.149572649572649e-06, |
|
"loss": 0.3301, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 13.397604790419162, |
|
"grad_norm": 32.75868606567383, |
|
"learning_rate": 5.128205128205128e-06, |
|
"loss": 0.401, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.445508982035928, |
|
"grad_norm": 14.076770782470703, |
|
"learning_rate": 5.1068376068376065e-06, |
|
"loss": 0.2116, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 13.493413173652694, |
|
"grad_norm": 158.29722595214844, |
|
"learning_rate": 5.085470085470086e-06, |
|
"loss": 0.4145, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 13.54131736526946, |
|
"grad_norm": 43.535980224609375, |
|
"learning_rate": 5.064102564102565e-06, |
|
"loss": 0.5061, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 13.589221556886228, |
|
"grad_norm": 129.22494506835938, |
|
"learning_rate": 5.042735042735043e-06, |
|
"loss": 0.2653, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 13.637125748502994, |
|
"grad_norm": 19.907264709472656, |
|
"learning_rate": 5.021367521367522e-06, |
|
"loss": 0.5019, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 13.68502994011976, |
|
"grad_norm": 101.03581237792969, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6201, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 13.732934131736528, |
|
"grad_norm": 54.207130432128906, |
|
"learning_rate": 4.978632478632479e-06, |
|
"loss": 0.4591, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 13.780838323353294, |
|
"grad_norm": 57.07173538208008, |
|
"learning_rate": 4.957264957264958e-06, |
|
"loss": 0.5418, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 13.82874251497006, |
|
"grad_norm": 54.304054260253906, |
|
"learning_rate": 4.935897435897436e-06, |
|
"loss": 0.6254, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 13.876646706586826, |
|
"grad_norm": 74.52729797363281, |
|
"learning_rate": 4.914529914529915e-06, |
|
"loss": 0.302, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.924550898203593, |
|
"grad_norm": 46.62977600097656, |
|
"learning_rate": 4.8931623931623934e-06, |
|
"loss": 0.3676, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 13.97245508982036, |
|
"grad_norm": 73.47917938232422, |
|
"learning_rate": 4.871794871794872e-06, |
|
"loss": 0.478, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.5613604784011841, |
|
"eval_macro_f1": 0.833819031115799, |
|
"eval_runtime": 44.8642, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.659, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 14.019161676646707, |
|
"grad_norm": 106.49620056152344, |
|
"learning_rate": 4.850427350427351e-06, |
|
"loss": 0.4661, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 14.067065868263473, |
|
"grad_norm": 23.546146392822266, |
|
"learning_rate": 4.829059829059829e-06, |
|
"loss": 0.3035, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 14.114970059880239, |
|
"grad_norm": 23.96906280517578, |
|
"learning_rate": 4.807692307692308e-06, |
|
"loss": 0.3686, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.162874251497007, |
|
"grad_norm": 82.27886199951172, |
|
"learning_rate": 4.786324786324787e-06, |
|
"loss": 0.4799, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 14.210778443113773, |
|
"grad_norm": 10.91873550415039, |
|
"learning_rate": 4.764957264957265e-06, |
|
"loss": 0.2748, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 14.258682634730539, |
|
"grad_norm": 38.029109954833984, |
|
"learning_rate": 4.743589743589744e-06, |
|
"loss": 0.3756, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 14.306586826347305, |
|
"grad_norm": 73.956787109375, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.5632, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 14.354491017964072, |
|
"grad_norm": 19.53325843811035, |
|
"learning_rate": 4.700854700854701e-06, |
|
"loss": 0.3685, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.402395209580838, |
|
"grad_norm": 21.934566497802734, |
|
"learning_rate": 4.6794871794871795e-06, |
|
"loss": 0.3186, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 14.450299401197604, |
|
"grad_norm": 48.01268768310547, |
|
"learning_rate": 4.658119658119659e-06, |
|
"loss": 0.4358, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 14.498203592814372, |
|
"grad_norm": 67.98200225830078, |
|
"learning_rate": 4.6367521367521375e-06, |
|
"loss": 0.2328, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 14.546107784431138, |
|
"grad_norm": 20.185400009155273, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 0.4095, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 14.594011976047904, |
|
"grad_norm": 35.661773681640625, |
|
"learning_rate": 4.594017094017094e-06, |
|
"loss": 0.6548, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 14.64191616766467, |
|
"grad_norm": 36.68177032470703, |
|
"learning_rate": 4.5726495726495725e-06, |
|
"loss": 0.3479, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 14.689820359281438, |
|
"grad_norm": 19.040000915527344, |
|
"learning_rate": 4.551282051282052e-06, |
|
"loss": 0.3784, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 14.737724550898204, |
|
"grad_norm": 77.58363342285156, |
|
"learning_rate": 4.5299145299145306e-06, |
|
"loss": 0.4044, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 14.78562874251497, |
|
"grad_norm": 98.52961730957031, |
|
"learning_rate": 4.508547008547009e-06, |
|
"loss": 0.42, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 14.833532934131737, |
|
"grad_norm": 167.6805419921875, |
|
"learning_rate": 4.487179487179488e-06, |
|
"loss": 0.3661, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.881437125748503, |
|
"grad_norm": 81.3060302734375, |
|
"learning_rate": 4.465811965811966e-06, |
|
"loss": 0.6895, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 14.92934131736527, |
|
"grad_norm": 31.589746475219727, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.3983, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 14.977245508982035, |
|
"grad_norm": 33.01588821411133, |
|
"learning_rate": 4.423076923076924e-06, |
|
"loss": 0.3302, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.6139780282974243, |
|
"eval_macro_f1": 0.850078934734469, |
|
"eval_runtime": 61.1653, |
|
"eval_samples_per_second": 27.319, |
|
"eval_steps_per_second": 3.417, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 15.023952095808383, |
|
"grad_norm": 63.095176696777344, |
|
"learning_rate": 4.401709401709402e-06, |
|
"loss": 0.492, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 15.071856287425149, |
|
"grad_norm": 14.580053329467773, |
|
"learning_rate": 4.380341880341881e-06, |
|
"loss": 0.5926, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.119760479041917, |
|
"grad_norm": 9.320775032043457, |
|
"learning_rate": 4.358974358974359e-06, |
|
"loss": 0.455, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 15.167664670658683, |
|
"grad_norm": 57.686344146728516, |
|
"learning_rate": 4.337606837606838e-06, |
|
"loss": 0.3759, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 15.215568862275449, |
|
"grad_norm": 4.791827201843262, |
|
"learning_rate": 4.316239316239317e-06, |
|
"loss": 0.2594, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 15.263473053892216, |
|
"grad_norm": 181.0989532470703, |
|
"learning_rate": 4.294871794871795e-06, |
|
"loss": 0.4456, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 15.311377245508982, |
|
"grad_norm": 15.999568939208984, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.3199, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.359281437125748, |
|
"grad_norm": 34.29719543457031, |
|
"learning_rate": 4.2521367521367524e-06, |
|
"loss": 0.3138, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 15.407185628742514, |
|
"grad_norm": 108.82241821289062, |
|
"learning_rate": 4.230769230769231e-06, |
|
"loss": 0.381, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 15.455089820359282, |
|
"grad_norm": 7.426156044006348, |
|
"learning_rate": 4.20940170940171e-06, |
|
"loss": 0.3027, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 15.502994011976048, |
|
"grad_norm": 33.00428009033203, |
|
"learning_rate": 4.188034188034188e-06, |
|
"loss": 0.2268, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 15.550898203592814, |
|
"grad_norm": 64.04177856445312, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.4501, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 15.598802395209582, |
|
"grad_norm": 28.86948585510254, |
|
"learning_rate": 4.145299145299146e-06, |
|
"loss": 0.4704, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 15.646706586826348, |
|
"grad_norm": 33.580509185791016, |
|
"learning_rate": 4.123931623931624e-06, |
|
"loss": 0.3373, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 15.694610778443113, |
|
"grad_norm": 27.37198257446289, |
|
"learning_rate": 4.102564102564103e-06, |
|
"loss": 0.3751, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 15.74251497005988, |
|
"grad_norm": 29.447359085083008, |
|
"learning_rate": 4.081196581196581e-06, |
|
"loss": 0.2583, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 15.790419161676647, |
|
"grad_norm": 53.31045913696289, |
|
"learning_rate": 4.05982905982906e-06, |
|
"loss": 0.2685, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.838323353293413, |
|
"grad_norm": 135.5189971923828, |
|
"learning_rate": 4.0384615384615385e-06, |
|
"loss": 0.6179, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 15.886227544910179, |
|
"grad_norm": 24.811853408813477, |
|
"learning_rate": 4.017094017094018e-06, |
|
"loss": 0.2871, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 15.934131736526947, |
|
"grad_norm": 44.590511322021484, |
|
"learning_rate": 3.9957264957264966e-06, |
|
"loss": 0.3918, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 15.982035928143713, |
|
"grad_norm": 34.29424285888672, |
|
"learning_rate": 3.974358974358974e-06, |
|
"loss": 0.4284, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.6892519593238831, |
|
"eval_macro_f1": 0.8416606209561599, |
|
"eval_runtime": 68.7411, |
|
"eval_samples_per_second": 24.309, |
|
"eval_steps_per_second": 3.04, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 16.02874251497006, |
|
"grad_norm": 76.20410919189453, |
|
"learning_rate": 3.952991452991453e-06, |
|
"loss": 0.2715, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 16.076646706586825, |
|
"grad_norm": 97.97017669677734, |
|
"learning_rate": 3.9316239316239315e-06, |
|
"loss": 0.4799, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 16.124550898203594, |
|
"grad_norm": 42.71623229980469, |
|
"learning_rate": 3.910256410256411e-06, |
|
"loss": 0.4008, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 16.17245508982036, |
|
"grad_norm": 48.501312255859375, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.4609, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 16.220359281437126, |
|
"grad_norm": 15.1089448928833, |
|
"learning_rate": 3.867521367521368e-06, |
|
"loss": 0.2344, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 16.268263473053892, |
|
"grad_norm": 87.63355255126953, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.4965, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 16.316167664670658, |
|
"grad_norm": 54.828556060791016, |
|
"learning_rate": 3.8247863247863246e-06, |
|
"loss": 0.3707, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 16.364071856287424, |
|
"grad_norm": 74.80049133300781, |
|
"learning_rate": 3.8034188034188036e-06, |
|
"loss": 0.3693, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 16.41197604790419, |
|
"grad_norm": 18.754371643066406, |
|
"learning_rate": 3.782051282051282e-06, |
|
"loss": 0.3593, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 16.45988023952096, |
|
"grad_norm": 24.015533447265625, |
|
"learning_rate": 3.760683760683761e-06, |
|
"loss": 0.215, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 16.507784431137726, |
|
"grad_norm": 41.305519104003906, |
|
"learning_rate": 3.73931623931624e-06, |
|
"loss": 0.3713, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 16.55568862275449, |
|
"grad_norm": 54.54863739013672, |
|
"learning_rate": 3.7179487179487184e-06, |
|
"loss": 0.4386, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 16.603592814371257, |
|
"grad_norm": 71.85780334472656, |
|
"learning_rate": 3.696581196581197e-06, |
|
"loss": 0.3638, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 16.651497005988023, |
|
"grad_norm": 92.95706939697266, |
|
"learning_rate": 3.6752136752136756e-06, |
|
"loss": 0.3444, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 16.69940119760479, |
|
"grad_norm": 67.28424072265625, |
|
"learning_rate": 3.653846153846154e-06, |
|
"loss": 0.5401, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 16.747305389221555, |
|
"grad_norm": 29.402442932128906, |
|
"learning_rate": 3.632478632478633e-06, |
|
"loss": 0.5194, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.795209580838325, |
|
"grad_norm": 38.80514144897461, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.4502, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 16.84311377245509, |
|
"grad_norm": 10.038030624389648, |
|
"learning_rate": 3.58974358974359e-06, |
|
"loss": 0.1391, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 16.891017964071857, |
|
"grad_norm": 53.43631362915039, |
|
"learning_rate": 3.5683760683760687e-06, |
|
"loss": 0.2745, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 16.938922155688623, |
|
"grad_norm": 82.64073181152344, |
|
"learning_rate": 3.5470085470085473e-06, |
|
"loss": 0.2056, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 16.98682634730539, |
|
"grad_norm": 57.446128845214844, |
|
"learning_rate": 3.5256410256410263e-06, |
|
"loss": 0.5293, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.7794991731643677, |
|
"eval_macro_f1": 0.8272296780330446, |
|
"eval_runtime": 42.3227, |
|
"eval_samples_per_second": 39.482, |
|
"eval_steps_per_second": 4.938, |
|
"step": 3553 |
|
}, |
|
{ |
|
"epoch": 17.033532934131735, |
|
"grad_norm": 47.826210021972656, |
|
"learning_rate": 3.5042735042735045e-06, |
|
"loss": 0.5691, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 17.081437125748504, |
|
"grad_norm": 51.37601852416992, |
|
"learning_rate": 3.482905982905983e-06, |
|
"loss": 0.3646, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 17.12934131736527, |
|
"grad_norm": 5.729410171508789, |
|
"learning_rate": 3.4615384615384617e-06, |
|
"loss": 0.2953, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 17.177245508982036, |
|
"grad_norm": 74.28618621826172, |
|
"learning_rate": 3.4401709401709403e-06, |
|
"loss": 0.4689, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 17.225149700598802, |
|
"grad_norm": 1.512823224067688, |
|
"learning_rate": 3.4188034188034193e-06, |
|
"loss": 0.2678, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.273053892215568, |
|
"grad_norm": 80.0973129272461, |
|
"learning_rate": 3.397435897435898e-06, |
|
"loss": 0.3399, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 17.320958083832334, |
|
"grad_norm": 46.759395599365234, |
|
"learning_rate": 3.3760683760683765e-06, |
|
"loss": 0.3441, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 17.368862275449104, |
|
"grad_norm": 41.87525939941406, |
|
"learning_rate": 3.3547008547008547e-06, |
|
"loss": 0.2421, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 17.41676646706587, |
|
"grad_norm": 31.34505844116211, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.2768, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 17.464670658682635, |
|
"grad_norm": 71.10737609863281, |
|
"learning_rate": 3.311965811965812e-06, |
|
"loss": 0.4662, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 17.5125748502994, |
|
"grad_norm": 70.69206237792969, |
|
"learning_rate": 3.290598290598291e-06, |
|
"loss": 0.3731, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 17.560479041916167, |
|
"grad_norm": 58.422847747802734, |
|
"learning_rate": 3.2692307692307696e-06, |
|
"loss": 0.5217, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 17.608383233532933, |
|
"grad_norm": 11.82396125793457, |
|
"learning_rate": 3.247863247863248e-06, |
|
"loss": 0.2558, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 17.6562874251497, |
|
"grad_norm": 144.99168395996094, |
|
"learning_rate": 3.2264957264957268e-06, |
|
"loss": 0.3243, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 17.704191616766465, |
|
"grad_norm": 59.86516571044922, |
|
"learning_rate": 3.205128205128206e-06, |
|
"loss": 0.4804, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 17.752095808383235, |
|
"grad_norm": 62.745296478271484, |
|
"learning_rate": 3.183760683760684e-06, |
|
"loss": 0.4333, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"grad_norm": 30.099609375, |
|
"learning_rate": 3.1623931623931626e-06, |
|
"loss": 0.2316, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 17.847904191616767, |
|
"grad_norm": 53.383583068847656, |
|
"learning_rate": 3.141025641025641e-06, |
|
"loss": 0.5045, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 17.895808383233533, |
|
"grad_norm": 9.510607719421387, |
|
"learning_rate": 3.11965811965812e-06, |
|
"loss": 0.2016, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 17.9437125748503, |
|
"grad_norm": 41.073726654052734, |
|
"learning_rate": 3.098290598290599e-06, |
|
"loss": 0.4775, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 17.991616766467065, |
|
"grad_norm": 18.685773849487305, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 0.3327, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.6723179817199707, |
|
"eval_macro_f1": 0.8484331568013577, |
|
"eval_runtime": 38.5624, |
|
"eval_samples_per_second": 43.332, |
|
"eval_steps_per_second": 5.42, |
|
"step": 3762 |
|
}, |
|
{ |
|
"epoch": 18.038323353293414, |
|
"grad_norm": 28.627132415771484, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.3131, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 18.08622754491018, |
|
"grad_norm": 22.602691650390625, |
|
"learning_rate": 3.0341880341880342e-06, |
|
"loss": 0.1872, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 18.134131736526946, |
|
"grad_norm": 70.41796112060547, |
|
"learning_rate": 3.012820512820513e-06, |
|
"loss": 0.2575, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 18.182035928143712, |
|
"grad_norm": 52.98373031616211, |
|
"learning_rate": 2.9914529914529914e-06, |
|
"loss": 0.366, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 18.229940119760478, |
|
"grad_norm": 76.51351928710938, |
|
"learning_rate": 2.9700854700854705e-06, |
|
"loss": 0.3965, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 18.277844311377244, |
|
"grad_norm": 57.413387298583984, |
|
"learning_rate": 2.948717948717949e-06, |
|
"loss": 0.1353, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 18.325748502994013, |
|
"grad_norm": 38.45537185668945, |
|
"learning_rate": 2.9273504273504277e-06, |
|
"loss": 0.2277, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 18.37365269461078, |
|
"grad_norm": 76.71987915039062, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 0.4852, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 18.421556886227545, |
|
"grad_norm": 13.536568641662598, |
|
"learning_rate": 2.8846153846153845e-06, |
|
"loss": 0.2461, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 18.46946107784431, |
|
"grad_norm": 63.02874755859375, |
|
"learning_rate": 2.8632478632478635e-06, |
|
"loss": 0.3511, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 18.517365269461077, |
|
"grad_norm": 98.7140884399414, |
|
"learning_rate": 2.841880341880342e-06, |
|
"loss": 0.2256, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 18.565269461077843, |
|
"grad_norm": 35.94133758544922, |
|
"learning_rate": 2.8205128205128207e-06, |
|
"loss": 0.1746, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 18.61317365269461, |
|
"grad_norm": 87.98931121826172, |
|
"learning_rate": 2.7991452991452993e-06, |
|
"loss": 0.5013, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 18.66107784431138, |
|
"grad_norm": 1.2747302055358887, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.3938, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 18.708982035928145, |
|
"grad_norm": 0.7769640684127808, |
|
"learning_rate": 2.756410256410257e-06, |
|
"loss": 0.5851, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 18.75688622754491, |
|
"grad_norm": 65.76081085205078, |
|
"learning_rate": 2.7350427350427355e-06, |
|
"loss": 0.4233, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 18.804790419161677, |
|
"grad_norm": 4.068265438079834, |
|
"learning_rate": 2.7136752136752137e-06, |
|
"loss": 0.3455, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 18.852694610778443, |
|
"grad_norm": 1912.1357421875, |
|
"learning_rate": 2.6923076923076923e-06, |
|
"loss": 0.6044, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 18.90059880239521, |
|
"grad_norm": 20.534839630126953, |
|
"learning_rate": 2.670940170940171e-06, |
|
"loss": 0.1716, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 18.948502994011974, |
|
"grad_norm": 77.07267761230469, |
|
"learning_rate": 2.64957264957265e-06, |
|
"loss": 0.2453, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 18.996407185628744, |
|
"grad_norm": 89.15924072265625, |
|
"learning_rate": 2.6282051282051286e-06, |
|
"loss": 0.5155, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.7802789807319641, |
|
"eval_macro_f1": 0.8321765860916699, |
|
"eval_runtime": 41.5052, |
|
"eval_samples_per_second": 40.26, |
|
"eval_steps_per_second": 5.036, |
|
"step": 3971 |
|
}, |
|
{ |
|
"epoch": 19.04311377245509, |
|
"grad_norm": 119.42920684814453, |
|
"learning_rate": 2.606837606837607e-06, |
|
"loss": 0.4741, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 19.091017964071856, |
|
"grad_norm": 48.56228256225586, |
|
"learning_rate": 2.5854700854700858e-06, |
|
"loss": 0.3726, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 19.138922155688622, |
|
"grad_norm": 22.457124710083008, |
|
"learning_rate": 2.564102564102564e-06, |
|
"loss": 0.3174, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.186826347305388, |
|
"grad_norm": 56.32841873168945, |
|
"learning_rate": 2.542735042735043e-06, |
|
"loss": 0.2022, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 19.234730538922157, |
|
"grad_norm": 28.47975730895996, |
|
"learning_rate": 2.5213675213675216e-06, |
|
"loss": 0.2525, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 19.282634730538923, |
|
"grad_norm": 17.601634979248047, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2316, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 19.33053892215569, |
|
"grad_norm": 47.431663513183594, |
|
"learning_rate": 2.478632478632479e-06, |
|
"loss": 0.3101, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 19.378443113772455, |
|
"grad_norm": 77.91453552246094, |
|
"learning_rate": 2.4572649572649574e-06, |
|
"loss": 0.4441, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 19.42634730538922, |
|
"grad_norm": 78.02772521972656, |
|
"learning_rate": 2.435897435897436e-06, |
|
"loss": 0.3817, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 19.474251497005987, |
|
"grad_norm": 20.506948471069336, |
|
"learning_rate": 2.4145299145299146e-06, |
|
"loss": 0.2292, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 19.522155688622753, |
|
"grad_norm": 16.479249954223633, |
|
"learning_rate": 2.3931623931623937e-06, |
|
"loss": 0.5324, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 19.57005988023952, |
|
"grad_norm": 30.338481903076172, |
|
"learning_rate": 2.371794871794872e-06, |
|
"loss": 0.2052, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 19.61796407185629, |
|
"grad_norm": 57.33430480957031, |
|
"learning_rate": 2.3504273504273504e-06, |
|
"loss": 0.2965, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 19.665868263473055, |
|
"grad_norm": 52.898719787597656, |
|
"learning_rate": 2.3290598290598295e-06, |
|
"loss": 0.2153, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 19.71377245508982, |
|
"grad_norm": 13.877717971801758, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 0.1302, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 19.761676646706587, |
|
"grad_norm": 82.69320678710938, |
|
"learning_rate": 2.2863247863247863e-06, |
|
"loss": 0.1914, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 19.809580838323352, |
|
"grad_norm": 69.18468475341797, |
|
"learning_rate": 2.2649572649572653e-06, |
|
"loss": 0.2875, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 19.85748502994012, |
|
"grad_norm": 60.78540802001953, |
|
"learning_rate": 2.243589743589744e-06, |
|
"loss": 0.1801, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 19.905389221556888, |
|
"grad_norm": 32.22920227050781, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.3202, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 19.953293413173654, |
|
"grad_norm": 65.0261459350586, |
|
"learning_rate": 2.200854700854701e-06, |
|
"loss": 0.1849, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.3435074985027313, |
|
"learning_rate": 2.1794871794871797e-06, |
|
"loss": 0.3044, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.7713411450386047, |
|
"eval_macro_f1": 0.844977734683617, |
|
"eval_runtime": 50.3873, |
|
"eval_samples_per_second": 33.163, |
|
"eval_steps_per_second": 4.148, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 20.047904191616766, |
|
"grad_norm": 40.881629943847656, |
|
"learning_rate": 2.1581196581196583e-06, |
|
"loss": 0.3328, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 20.095808383233532, |
|
"grad_norm": 19.17068862915039, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.3443, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 20.143712574850298, |
|
"grad_norm": 33.69413375854492, |
|
"learning_rate": 2.1153846153846155e-06, |
|
"loss": 0.3359, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 20.191616766467067, |
|
"grad_norm": 17.18857765197754, |
|
"learning_rate": 2.094017094017094e-06, |
|
"loss": 0.3028, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 20.239520958083833, |
|
"grad_norm": 57.64948272705078, |
|
"learning_rate": 2.072649572649573e-06, |
|
"loss": 0.3474, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 20.2874251497006, |
|
"grad_norm": 0.19221803545951843, |
|
"learning_rate": 2.0512820512820513e-06, |
|
"loss": 0.3006, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 20.335329341317365, |
|
"grad_norm": 7.332968235015869, |
|
"learning_rate": 2.02991452991453e-06, |
|
"loss": 0.3067, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 20.38323353293413, |
|
"grad_norm": 6.971332550048828, |
|
"learning_rate": 2.008547008547009e-06, |
|
"loss": 0.2523, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 20.431137724550897, |
|
"grad_norm": 16.12879180908203, |
|
"learning_rate": 1.987179487179487e-06, |
|
"loss": 0.1617, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 20.479041916167663, |
|
"grad_norm": 4.716946125030518, |
|
"learning_rate": 1.9658119658119658e-06, |
|
"loss": 0.2161, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 20.526946107784433, |
|
"grad_norm": 29.626829147338867, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.2236, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 20.5748502994012, |
|
"grad_norm": 5.962003231048584, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 0.1348, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 20.622754491017965, |
|
"grad_norm": 22.97109603881836, |
|
"learning_rate": 1.9017094017094018e-06, |
|
"loss": 0.3083, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 20.67065868263473, |
|
"grad_norm": 45.447818756103516, |
|
"learning_rate": 1.8803418803418804e-06, |
|
"loss": 0.3335, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 20.718562874251496, |
|
"grad_norm": 205.6171417236328, |
|
"learning_rate": 1.8589743589743592e-06, |
|
"loss": 0.3472, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 20.766467065868262, |
|
"grad_norm": 41.59774398803711, |
|
"learning_rate": 1.8376068376068378e-06, |
|
"loss": 0.2514, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 20.81437125748503, |
|
"grad_norm": 37.663997650146484, |
|
"learning_rate": 1.8162393162393164e-06, |
|
"loss": 0.3328, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 20.862275449101798, |
|
"grad_norm": 5.474306106567383, |
|
"learning_rate": 1.794871794871795e-06, |
|
"loss": 0.3874, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 20.910179640718564, |
|
"grad_norm": 17.301982879638672, |
|
"learning_rate": 1.7735042735042736e-06, |
|
"loss": 0.2811, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 20.95808383233533, |
|
"grad_norm": 51.472537994384766, |
|
"learning_rate": 1.7521367521367522e-06, |
|
"loss": 0.4697, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.7450286746025085, |
|
"eval_macro_f1": 0.827007153949272, |
|
"eval_runtime": 57.1204, |
|
"eval_samples_per_second": 29.254, |
|
"eval_steps_per_second": 3.659, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 21.004790419161676, |
|
"grad_norm": 86.52445983886719, |
|
"learning_rate": 1.7307692307692308e-06, |
|
"loss": 0.4239, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 21.052694610778442, |
|
"grad_norm": 68.6434326171875, |
|
"learning_rate": 1.7094017094017097e-06, |
|
"loss": 0.3987, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 21.10059880239521, |
|
"grad_norm": 45.44715881347656, |
|
"learning_rate": 1.6880341880341883e-06, |
|
"loss": 0.2822, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 21.148502994011977, |
|
"grad_norm": 104.67868041992188, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.1772, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 21.196407185628743, |
|
"grad_norm": 35.29579162597656, |
|
"learning_rate": 1.6452991452991455e-06, |
|
"loss": 0.2434, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 21.24431137724551, |
|
"grad_norm": 77.53510284423828, |
|
"learning_rate": 1.623931623931624e-06, |
|
"loss": 0.2378, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 21.292215568862275, |
|
"grad_norm": 67.75172424316406, |
|
"learning_rate": 1.602564102564103e-06, |
|
"loss": 0.3755, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 21.34011976047904, |
|
"grad_norm": 8.150789260864258, |
|
"learning_rate": 1.5811965811965813e-06, |
|
"loss": 0.2303, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 21.388023952095807, |
|
"grad_norm": 57.804100036621094, |
|
"learning_rate": 1.55982905982906e-06, |
|
"loss": 0.3705, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 21.435928143712573, |
|
"grad_norm": 12.233683586120605, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 0.1344, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 21.483832335329343, |
|
"grad_norm": 48.35832595825195, |
|
"learning_rate": 1.5170940170940171e-06, |
|
"loss": 0.1988, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 21.53173652694611, |
|
"grad_norm": 112.48056030273438, |
|
"learning_rate": 1.4957264957264957e-06, |
|
"loss": 0.2977, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.579640718562874, |
|
"grad_norm": 97.60857391357422, |
|
"learning_rate": 1.4743589743589745e-06, |
|
"loss": 0.2382, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 21.62754491017964, |
|
"grad_norm": 4.879365921020508, |
|
"learning_rate": 1.4529914529914531e-06, |
|
"loss": 0.2374, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 21.675449101796406, |
|
"grad_norm": 60.12843704223633, |
|
"learning_rate": 1.4316239316239317e-06, |
|
"loss": 0.1913, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 21.723353293413172, |
|
"grad_norm": 39.34722900390625, |
|
"learning_rate": 1.4102564102564104e-06, |
|
"loss": 0.1366, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 21.771257485029942, |
|
"grad_norm": 0.2388223111629486, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.1015, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 21.819161676646708, |
|
"grad_norm": 63.66411209106445, |
|
"learning_rate": 1.3675213675213678e-06, |
|
"loss": 0.3127, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 21.867065868263474, |
|
"grad_norm": 4.219015598297119, |
|
"learning_rate": 1.3461538461538462e-06, |
|
"loss": 0.3047, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 21.91497005988024, |
|
"grad_norm": 55.83110427856445, |
|
"learning_rate": 1.324786324786325e-06, |
|
"loss": 0.2811, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 21.962874251497006, |
|
"grad_norm": 34.60869216918945, |
|
"learning_rate": 1.3034188034188036e-06, |
|
"loss": 0.1733, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.745612621307373, |
|
"eval_macro_f1": 0.8567568925713644, |
|
"eval_runtime": 53.3616, |
|
"eval_samples_per_second": 31.315, |
|
"eval_steps_per_second": 3.917, |
|
"step": 4598 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.273478692599549e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|