|
{ |
|
"best_global_step": 8815, |
|
"best_metric": 0.545, |
|
"best_model_checkpoint": "results_indot5-dapt-finetune-1/checkpoint-8815", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 10250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 17.200429916381836, |
|
"learning_rate": 4.4009779951100247e-07, |
|
"loss": 7.0618, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 25.410673141479492, |
|
"learning_rate": 9.290953545232274e-07, |
|
"loss": 7.0349, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 19.62042999267578, |
|
"learning_rate": 1.4180929095354523e-06, |
|
"loss": 7.0469, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 56.81166076660156, |
|
"learning_rate": 1.9070904645476774e-06, |
|
"loss": 6.9333, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 107.1426773071289, |
|
"learning_rate": 2.3960880195599024e-06, |
|
"loss": 6.7783, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 12.458768844604492, |
|
"learning_rate": 2.8850855745721272e-06, |
|
"loss": 6.445, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 8.825554847717285, |
|
"learning_rate": 3.3740831295843525e-06, |
|
"loss": 6.2858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 35.15165710449219, |
|
"learning_rate": 3.863080684596577e-06, |
|
"loss": 6.1864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 7.6371073722839355, |
|
"learning_rate": 4.352078239608802e-06, |
|
"loss": 5.9375, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 5.905430793762207, |
|
"learning_rate": 4.841075794621027e-06, |
|
"loss": 5.8115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 4.986270427703857, |
|
"learning_rate": 5.330073349633253e-06, |
|
"loss": 5.6514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 4.87542724609375, |
|
"learning_rate": 5.8190709046454775e-06, |
|
"loss": 5.4786, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 4.698776721954346, |
|
"learning_rate": 6.308068459657702e-06, |
|
"loss": 5.4382, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 3.5507872104644775, |
|
"learning_rate": 6.797066014669927e-06, |
|
"loss": 5.3708, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 3.769075393676758, |
|
"learning_rate": 7.286063569682151e-06, |
|
"loss": 5.1883, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 3.371607780456543, |
|
"learning_rate": 7.775061124694378e-06, |
|
"loss": 5.16, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 7.1413116455078125, |
|
"learning_rate": 8.264058679706602e-06, |
|
"loss": 5.0679, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 7.872459411621094, |
|
"learning_rate": 8.753056234718827e-06, |
|
"loss": 5.0494, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 4.166257858276367, |
|
"learning_rate": 9.242053789731052e-06, |
|
"loss": 4.9679, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 23.25122833251953, |
|
"learning_rate": 9.731051344743277e-06, |
|
"loss": 4.9343, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2759, |
|
"eval_gen_len": 211.6429, |
|
"eval_loss": 4.457103729248047, |
|
"eval_rouge1": 0.3508, |
|
"eval_rouge2": 0.0603, |
|
"eval_rougeL": 0.2235, |
|
"eval_runtime": 26.6238, |
|
"eval_samples_per_second": 6.836, |
|
"eval_steps_per_second": 1.728, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 15.634258270263672, |
|
"learning_rate": 1.0220048899755502e-05, |
|
"loss": 4.9, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 3.670888900756836, |
|
"learning_rate": 1.0709046454767727e-05, |
|
"loss": 4.7805, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 4.5713210105896, |
|
"learning_rate": 1.1198044009779951e-05, |
|
"loss": 4.7084, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 2.022193193435669, |
|
"learning_rate": 1.1687041564792176e-05, |
|
"loss": 4.6972, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 1.8646818399429321, |
|
"learning_rate": 1.2176039119804401e-05, |
|
"loss": 4.6286, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 3.063166618347168, |
|
"learning_rate": 1.2665036674816628e-05, |
|
"loss": 4.6438, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 4.173657417297363, |
|
"learning_rate": 1.3154034229828852e-05, |
|
"loss": 4.6001, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 7.44461727142334, |
|
"learning_rate": 1.3643031784841076e-05, |
|
"loss": 4.5358, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 1.7125922441482544, |
|
"learning_rate": 1.41320293398533e-05, |
|
"loss": 4.4905, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 9.653430938720703, |
|
"learning_rate": 1.4621026894865527e-05, |
|
"loss": 4.4497, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 2.094055652618408, |
|
"learning_rate": 1.5110024449877752e-05, |
|
"loss": 4.3886, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 1.2126773595809937, |
|
"learning_rate": 1.5599022004889977e-05, |
|
"loss": 4.3007, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 2.478563070297241, |
|
"learning_rate": 1.60880195599022e-05, |
|
"loss": 4.2896, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 3.717766523361206, |
|
"learning_rate": 1.6577017114914426e-05, |
|
"loss": 4.2184, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 1.308283805847168, |
|
"learning_rate": 1.706601466992665e-05, |
|
"loss": 4.2381, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 1.2101848125457764, |
|
"learning_rate": 1.7555012224938876e-05, |
|
"loss": 4.135, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 1.7718238830566406, |
|
"learning_rate": 1.8044009779951102e-05, |
|
"loss": 4.1122, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 1.8747498989105225, |
|
"learning_rate": 1.8533007334963325e-05, |
|
"loss": 4.0616, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 1.3740488290786743, |
|
"learning_rate": 1.9022004889975552e-05, |
|
"loss": 3.952, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 4.758138179779053, |
|
"learning_rate": 1.9511002444987775e-05, |
|
"loss": 3.9793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1578041315078735, |
|
"learning_rate": 2e-05, |
|
"loss": 3.9215, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3934, |
|
"eval_gen_len": 189.9505, |
|
"eval_loss": 3.538588523864746, |
|
"eval_rouge1": 0.4765, |
|
"eval_rouge2": 0.1303, |
|
"eval_rougeL": 0.3377, |
|
"eval_runtime": 26.1399, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 1.76, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.048780487804878, |
|
"grad_norm": 2.019075393676758, |
|
"learning_rate": 2.0488997555012228e-05, |
|
"loss": 3.8844, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.097560975609756, |
|
"grad_norm": 1.2126365900039673, |
|
"learning_rate": 2.097799511002445e-05, |
|
"loss": 3.8611, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1463414634146343, |
|
"grad_norm": 11.292882919311523, |
|
"learning_rate": 2.1466992665036674e-05, |
|
"loss": 3.7623, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 1.2274696826934814, |
|
"learning_rate": 2.19559902200489e-05, |
|
"loss": 3.7778, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2439024390243905, |
|
"grad_norm": 9.684100151062012, |
|
"learning_rate": 2.2444987775061127e-05, |
|
"loss": 3.7204, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.292682926829268, |
|
"grad_norm": 1.1520313024520874, |
|
"learning_rate": 2.293398533007335e-05, |
|
"loss": 3.7227, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.341463414634146, |
|
"grad_norm": 1.2632983922958374, |
|
"learning_rate": 2.3422982885085577e-05, |
|
"loss": 3.7362, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.3902439024390243, |
|
"grad_norm": 0.9752365946769714, |
|
"learning_rate": 2.39119804400978e-05, |
|
"loss": 3.6642, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 1.1547846794128418, |
|
"learning_rate": 2.4400977995110023e-05, |
|
"loss": 3.5886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4878048780487805, |
|
"grad_norm": 1.2422934770584106, |
|
"learning_rate": 2.488997555012225e-05, |
|
"loss": 3.6133, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.5365853658536586, |
|
"grad_norm": 1.253853440284729, |
|
"learning_rate": 2.5378973105134473e-05, |
|
"loss": 3.5837, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5853658536585367, |
|
"grad_norm": 1.0211130380630493, |
|
"learning_rate": 2.58679706601467e-05, |
|
"loss": 3.6163, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.6341463414634148, |
|
"grad_norm": 1.131115436553955, |
|
"learning_rate": 2.635696821515893e-05, |
|
"loss": 3.4982, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 0.9331684708595276, |
|
"learning_rate": 2.684596577017115e-05, |
|
"loss": 3.5171, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.7317073170731705, |
|
"grad_norm": 1.1529852151870728, |
|
"learning_rate": 2.7334963325183376e-05, |
|
"loss": 3.4674, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7804878048780486, |
|
"grad_norm": 1.1946120262145996, |
|
"learning_rate": 2.78239608801956e-05, |
|
"loss": 3.4866, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.8292682926829267, |
|
"grad_norm": 1.292737364768982, |
|
"learning_rate": 2.8312958435207825e-05, |
|
"loss": 3.4434, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.8780487804878048, |
|
"grad_norm": 1.0269521474838257, |
|
"learning_rate": 2.880195599022005e-05, |
|
"loss": 3.3643, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 1.1100542545318604, |
|
"learning_rate": 2.9290953545232275e-05, |
|
"loss": 3.4197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.975609756097561, |
|
"grad_norm": 0.9210990071296692, |
|
"learning_rate": 2.97799511002445e-05, |
|
"loss": 3.3427, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.4734, |
|
"eval_gen_len": 181.8681, |
|
"eval_loss": 3.0026919841766357, |
|
"eval_rouge1": 0.5525, |
|
"eval_rouge2": 0.1927, |
|
"eval_rougeL": 0.4214, |
|
"eval_runtime": 25.9002, |
|
"eval_samples_per_second": 7.027, |
|
"eval_steps_per_second": 1.776, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.024390243902439, |
|
"grad_norm": 0.8961557745933533, |
|
"learning_rate": 3.0268948655256725e-05, |
|
"loss": 3.3786, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.073170731707317, |
|
"grad_norm": 1.2599140405654907, |
|
"learning_rate": 3.075794621026895e-05, |
|
"loss": 3.3689, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.1219512195121952, |
|
"grad_norm": 1.0294504165649414, |
|
"learning_rate": 3.1246943765281174e-05, |
|
"loss": 3.354, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 0.9114963412284851, |
|
"learning_rate": 3.17359413202934e-05, |
|
"loss": 3.2847, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.2195121951219514, |
|
"grad_norm": 1.0025136470794678, |
|
"learning_rate": 3.222493887530563e-05, |
|
"loss": 3.2635, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.2682926829268295, |
|
"grad_norm": 1.1256715059280396, |
|
"learning_rate": 3.271393643031785e-05, |
|
"loss": 3.2903, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.317073170731707, |
|
"grad_norm": 1.0486799478530884, |
|
"learning_rate": 3.3202933985330074e-05, |
|
"loss": 3.2502, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.3658536585365852, |
|
"grad_norm": 0.9213652014732361, |
|
"learning_rate": 3.36919315403423e-05, |
|
"loss": 3.2612, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 0.9584261178970337, |
|
"learning_rate": 3.418092909535453e-05, |
|
"loss": 3.1981, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.4634146341463414, |
|
"grad_norm": 0.9650068283081055, |
|
"learning_rate": 3.466992665036675e-05, |
|
"loss": 3.2118, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.5121951219512195, |
|
"grad_norm": 1.228827953338623, |
|
"learning_rate": 3.515892420537897e-05, |
|
"loss": 3.2021, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.5609756097560976, |
|
"grad_norm": 1.1250883340835571, |
|
"learning_rate": 3.56479217603912e-05, |
|
"loss": 3.1994, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.6097560975609757, |
|
"grad_norm": 0.938084602355957, |
|
"learning_rate": 3.613691931540342e-05, |
|
"loss": 3.1803, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 0.9992406964302063, |
|
"learning_rate": 3.662591687041565e-05, |
|
"loss": 3.1305, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.7073170731707314, |
|
"grad_norm": 15.40438175201416, |
|
"learning_rate": 3.711491442542788e-05, |
|
"loss": 3.1497, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.7560975609756095, |
|
"grad_norm": 1.1189182996749878, |
|
"learning_rate": 3.76039119804401e-05, |
|
"loss": 3.1439, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.8048780487804876, |
|
"grad_norm": 0.8453640341758728, |
|
"learning_rate": 3.8092909535452325e-05, |
|
"loss": 3.1512, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.8536585365853657, |
|
"grad_norm": 0.8329293131828308, |
|
"learning_rate": 3.8581907090464545e-05, |
|
"loss": 3.0875, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 1.1350609064102173, |
|
"learning_rate": 3.907090464547677e-05, |
|
"loss": 3.1501, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.951219512195122, |
|
"grad_norm": 0.9956341981887817, |
|
"learning_rate": 3.9559902200489005e-05, |
|
"loss": 3.0673, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.0671465396881104, |
|
"learning_rate": 4.0048899755501225e-05, |
|
"loss": 3.0923, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5152, |
|
"eval_gen_len": 177.4011, |
|
"eval_loss": 2.7666826248168945, |
|
"eval_rouge1": 0.582, |
|
"eval_rouge2": 0.233, |
|
"eval_rougeL": 0.4623, |
|
"eval_runtime": 26.0386, |
|
"eval_samples_per_second": 6.99, |
|
"eval_steps_per_second": 1.767, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.048780487804878, |
|
"grad_norm": 0.8541052937507629, |
|
"learning_rate": 4.053789731051345e-05, |
|
"loss": 3.1169, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.097560975609756, |
|
"grad_norm": 0.9787116050720215, |
|
"learning_rate": 4.102689486552567e-05, |
|
"loss": 3.0454, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 0.9616861343383789, |
|
"learning_rate": 4.15158924205379e-05, |
|
"loss": 3.0472, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.195121951219512, |
|
"grad_norm": 1.127516269683838, |
|
"learning_rate": 4.2004889975550124e-05, |
|
"loss": 3.0318, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.2439024390243905, |
|
"grad_norm": 1.0883930921554565, |
|
"learning_rate": 4.249388753056235e-05, |
|
"loss": 3.0661, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.2926829268292686, |
|
"grad_norm": 1.149770736694336, |
|
"learning_rate": 4.298288508557458e-05, |
|
"loss": 3.0137, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.341463414634147, |
|
"grad_norm": 0.9378666281700134, |
|
"learning_rate": 4.34718826405868e-05, |
|
"loss": 3.0059, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.9855746030807495, |
|
"learning_rate": 4.396088019559902e-05, |
|
"loss": 3.0009, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.439024390243903, |
|
"grad_norm": 1.0716888904571533, |
|
"learning_rate": 4.444987775061125e-05, |
|
"loss": 3.0053, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.487804878048781, |
|
"grad_norm": 0.9853790998458862, |
|
"learning_rate": 4.4938875305623476e-05, |
|
"loss": 2.9294, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.536585365853659, |
|
"grad_norm": 0.9836393594741821, |
|
"learning_rate": 4.54278728606357e-05, |
|
"loss": 2.955, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.585365853658536, |
|
"grad_norm": 1.4114398956298828, |
|
"learning_rate": 4.591687041564792e-05, |
|
"loss": 2.9944, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.8007514476776123, |
|
"learning_rate": 4.640586797066015e-05, |
|
"loss": 2.9658, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.682926829268292, |
|
"grad_norm": 0.98356693983078, |
|
"learning_rate": 4.689486552567237e-05, |
|
"loss": 2.9314, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.7317073170731705, |
|
"grad_norm": 1.2858251333236694, |
|
"learning_rate": 4.7383863080684595e-05, |
|
"loss": 2.9415, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.780487804878049, |
|
"grad_norm": 1.033654808998108, |
|
"learning_rate": 4.787286063569683e-05, |
|
"loss": 2.9866, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.829268292682927, |
|
"grad_norm": 0.8810921907424927, |
|
"learning_rate": 4.836185819070905e-05, |
|
"loss": 2.9511, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 1.1121643781661987, |
|
"learning_rate": 4.8850855745721275e-05, |
|
"loss": 2.9617, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.926829268292683, |
|
"grad_norm": 1.039544701576233, |
|
"learning_rate": 4.9339853300733495e-05, |
|
"loss": 2.9242, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.975609756097561, |
|
"grad_norm": 0.8879916667938232, |
|
"learning_rate": 4.982885085574572e-05, |
|
"loss": 2.8744, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5424, |
|
"eval_gen_len": 179.2088, |
|
"eval_loss": 2.6143970489501953, |
|
"eval_rouge1": 0.6042, |
|
"eval_rouge2": 0.2613, |
|
"eval_rougeL": 0.4883, |
|
"eval_runtime": 25.9288, |
|
"eval_samples_per_second": 7.019, |
|
"eval_steps_per_second": 1.774, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 5.024390243902439, |
|
"grad_norm": 1.1612334251403809, |
|
"learning_rate": 5.031784841075795e-05, |
|
"loss": 2.9611, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.073170731707317, |
|
"grad_norm": 1.5247349739074707, |
|
"learning_rate": 5.080684596577018e-05, |
|
"loss": 2.9112, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.121951219512195, |
|
"grad_norm": 0.9210406541824341, |
|
"learning_rate": 5.1295843520782394e-05, |
|
"loss": 2.8527, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.170731707317073, |
|
"grad_norm": 1.1976507902145386, |
|
"learning_rate": 5.178484107579462e-05, |
|
"loss": 2.9057, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.219512195121951, |
|
"grad_norm": 1.2032570838928223, |
|
"learning_rate": 5.227383863080685e-05, |
|
"loss": 2.8742, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.2682926829268295, |
|
"grad_norm": 0.9623976945877075, |
|
"learning_rate": 5.2762836185819073e-05, |
|
"loss": 2.829, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.317073170731708, |
|
"grad_norm": 0.8499730229377747, |
|
"learning_rate": 5.32518337408313e-05, |
|
"loss": 2.8517, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.365853658536586, |
|
"grad_norm": 1.1849853992462158, |
|
"learning_rate": 5.374083129584352e-05, |
|
"loss": 2.8572, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.414634146341464, |
|
"grad_norm": 1.0028434991836548, |
|
"learning_rate": 5.4229828850855746e-05, |
|
"loss": 2.835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.463414634146342, |
|
"grad_norm": 0.8181015253067017, |
|
"learning_rate": 5.471882640586797e-05, |
|
"loss": 2.8557, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 5.512195121951219, |
|
"grad_norm": 1.0572930574417114, |
|
"learning_rate": 5.52078239608802e-05, |
|
"loss": 2.8222, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 5.560975609756097, |
|
"grad_norm": 0.8895127177238464, |
|
"learning_rate": 5.5696821515892426e-05, |
|
"loss": 2.7869, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.609756097560975, |
|
"grad_norm": 1.0056649446487427, |
|
"learning_rate": 5.6185819070904646e-05, |
|
"loss": 2.8131, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.658536585365853, |
|
"grad_norm": 0.8315693736076355, |
|
"learning_rate": 5.667481662591687e-05, |
|
"loss": 2.8587, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 5.7073170731707314, |
|
"grad_norm": 0.8721175789833069, |
|
"learning_rate": 5.71638141809291e-05, |
|
"loss": 2.8064, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 5.7560975609756095, |
|
"grad_norm": 0.9493646025657654, |
|
"learning_rate": 5.7652811735941325e-05, |
|
"loss": 2.8169, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.804878048780488, |
|
"grad_norm": 0.9617358446121216, |
|
"learning_rate": 5.814180929095355e-05, |
|
"loss": 2.8168, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 5.853658536585366, |
|
"grad_norm": 0.9313523173332214, |
|
"learning_rate": 5.863080684596577e-05, |
|
"loss": 2.771, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.902439024390244, |
|
"grad_norm": 0.8505017161369324, |
|
"learning_rate": 5.9119804400978e-05, |
|
"loss": 2.7744, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 5.951219512195122, |
|
"grad_norm": 0.9459558129310608, |
|
"learning_rate": 5.9608801955990224e-05, |
|
"loss": 2.7718, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.208365797996521, |
|
"learning_rate": 6.009779951100245e-05, |
|
"loss": 2.7677, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5575, |
|
"eval_gen_len": 186.0879, |
|
"eval_loss": 2.5141470432281494, |
|
"eval_rouge1": 0.6141, |
|
"eval_rouge2": 0.2737, |
|
"eval_rougeL": 0.5028, |
|
"eval_runtime": 26.0185, |
|
"eval_samples_per_second": 6.995, |
|
"eval_steps_per_second": 1.768, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.048780487804878, |
|
"grad_norm": 1.1182864904403687, |
|
"learning_rate": 6.058679706601468e-05, |
|
"loss": 2.7941, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.097560975609756, |
|
"grad_norm": 0.906001627445221, |
|
"learning_rate": 6.107579462102689e-05, |
|
"loss": 2.7356, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.146341463414634, |
|
"grad_norm": 0.9255037903785706, |
|
"learning_rate": 6.156479217603912e-05, |
|
"loss": 2.7293, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.195121951219512, |
|
"grad_norm": 0.8442258834838867, |
|
"learning_rate": 6.205378973105134e-05, |
|
"loss": 2.7488, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.2439024390243905, |
|
"grad_norm": 0.9159016013145447, |
|
"learning_rate": 6.254278728606357e-05, |
|
"loss": 2.7394, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.2926829268292686, |
|
"grad_norm": 0.909702718257904, |
|
"learning_rate": 6.30317848410758e-05, |
|
"loss": 2.7518, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.341463414634147, |
|
"grad_norm": 0.8735110759735107, |
|
"learning_rate": 6.352078239608802e-05, |
|
"loss": 2.6913, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.390243902439025, |
|
"grad_norm": 1.0950109958648682, |
|
"learning_rate": 6.400977995110025e-05, |
|
"loss": 2.6841, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 6.439024390243903, |
|
"grad_norm": 0.8980112671852112, |
|
"learning_rate": 6.449877750611248e-05, |
|
"loss": 2.7115, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.487804878048781, |
|
"grad_norm": 1.0420292615890503, |
|
"learning_rate": 6.49877750611247e-05, |
|
"loss": 2.7501, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 6.536585365853659, |
|
"grad_norm": 0.9553576111793518, |
|
"learning_rate": 6.547677261613693e-05, |
|
"loss": 2.652, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 6.585365853658536, |
|
"grad_norm": 1.0302420854568481, |
|
"learning_rate": 6.596577017114914e-05, |
|
"loss": 2.6845, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.634146341463414, |
|
"grad_norm": 1.077214241027832, |
|
"learning_rate": 6.645476772616137e-05, |
|
"loss": 2.6935, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 6.682926829268292, |
|
"grad_norm": 0.9967917203903198, |
|
"learning_rate": 6.69437652811736e-05, |
|
"loss": 2.7126, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 6.7317073170731705, |
|
"grad_norm": 0.8878841996192932, |
|
"learning_rate": 6.743276283618582e-05, |
|
"loss": 2.6744, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.780487804878049, |
|
"grad_norm": 0.9185668230056763, |
|
"learning_rate": 6.792176039119805e-05, |
|
"loss": 2.6995, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 6.829268292682927, |
|
"grad_norm": 0.9542802572250366, |
|
"learning_rate": 6.841075794621027e-05, |
|
"loss": 2.7088, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.878048780487805, |
|
"grad_norm": 0.9949384331703186, |
|
"learning_rate": 6.88997555012225e-05, |
|
"loss": 2.6895, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 6.926829268292683, |
|
"grad_norm": 0.9762834310531616, |
|
"learning_rate": 6.938875305623473e-05, |
|
"loss": 2.6164, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 6.975609756097561, |
|
"grad_norm": 0.9873429536819458, |
|
"learning_rate": 6.987775061124695e-05, |
|
"loss": 2.7168, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.57, |
|
"eval_gen_len": 190.6484, |
|
"eval_loss": 2.4311938285827637, |
|
"eval_rouge1": 0.6205, |
|
"eval_rouge2": 0.2863, |
|
"eval_rougeL": 0.5136, |
|
"eval_runtime": 26.1579, |
|
"eval_samples_per_second": 6.958, |
|
"eval_steps_per_second": 1.759, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 7.024390243902439, |
|
"grad_norm": 0.8859543204307556, |
|
"learning_rate": 7.036674816625917e-05, |
|
"loss": 2.6648, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.073170731707317, |
|
"grad_norm": 0.9650115966796875, |
|
"learning_rate": 7.08557457212714e-05, |
|
"loss": 2.6306, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.121951219512195, |
|
"grad_norm": 0.9108964800834656, |
|
"learning_rate": 7.134474327628362e-05, |
|
"loss": 2.6047, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.170731707317073, |
|
"grad_norm": 0.8837963342666626, |
|
"learning_rate": 7.183374083129585e-05, |
|
"loss": 2.6432, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.219512195121951, |
|
"grad_norm": 1.0622878074645996, |
|
"learning_rate": 7.232273838630807e-05, |
|
"loss": 2.6387, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.2682926829268295, |
|
"grad_norm": 0.9728686213493347, |
|
"learning_rate": 7.281173594132029e-05, |
|
"loss": 2.6206, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"grad_norm": 0.7860050797462463, |
|
"learning_rate": 7.330073349633251e-05, |
|
"loss": 2.6196, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.365853658536586, |
|
"grad_norm": 0.9671594500541687, |
|
"learning_rate": 7.378973105134474e-05, |
|
"loss": 2.6267, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 7.414634146341464, |
|
"grad_norm": 1.1058562994003296, |
|
"learning_rate": 7.427872860635698e-05, |
|
"loss": 2.6606, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 7.463414634146342, |
|
"grad_norm": 0.9301013946533203, |
|
"learning_rate": 7.47677261613692e-05, |
|
"loss": 2.6526, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 7.512195121951219, |
|
"grad_norm": 1.050396203994751, |
|
"learning_rate": 7.525672371638142e-05, |
|
"loss": 2.572, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 7.560975609756097, |
|
"grad_norm": 0.9724476933479309, |
|
"learning_rate": 7.574572127139365e-05, |
|
"loss": 2.566, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.609756097560975, |
|
"grad_norm": 0.9213927388191223, |
|
"learning_rate": 7.623471882640587e-05, |
|
"loss": 2.6139, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 7.658536585365853, |
|
"grad_norm": 1.0309463739395142, |
|
"learning_rate": 7.67237163814181e-05, |
|
"loss": 2.6379, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 7.7073170731707314, |
|
"grad_norm": 0.8563573956489563, |
|
"learning_rate": 7.721271393643032e-05, |
|
"loss": 2.6191, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 7.7560975609756095, |
|
"grad_norm": 0.9945729970932007, |
|
"learning_rate": 7.770171149144254e-05, |
|
"loss": 2.5668, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 7.804878048780488, |
|
"grad_norm": 0.7494526505470276, |
|
"learning_rate": 7.819070904645476e-05, |
|
"loss": 2.5047, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.853658536585366, |
|
"grad_norm": 0.9642314910888672, |
|
"learning_rate": 7.867970660146699e-05, |
|
"loss": 2.592, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 7.902439024390244, |
|
"grad_norm": 0.8688525557518005, |
|
"learning_rate": 7.916870415647922e-05, |
|
"loss": 2.5551, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 7.951219512195122, |
|
"grad_norm": 0.8314557075500488, |
|
"learning_rate": 7.965770171149144e-05, |
|
"loss": 2.5851, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.000834584236145, |
|
"learning_rate": 8.014669926650367e-05, |
|
"loss": 2.5717, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5839, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.3604416847229004, |
|
"eval_rouge1": 0.5641, |
|
"eval_rouge2": 0.2668, |
|
"eval_rougeL": 0.4705, |
|
"eval_runtime": 26.4515, |
|
"eval_samples_per_second": 6.881, |
|
"eval_steps_per_second": 1.739, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.048780487804878, |
|
"grad_norm": 1.0702135562896729, |
|
"learning_rate": 8.06356968215159e-05, |
|
"loss": 2.5839, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.097560975609756, |
|
"grad_norm": 1.0597014427185059, |
|
"learning_rate": 8.112469437652812e-05, |
|
"loss": 2.5569, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.146341463414634, |
|
"grad_norm": 1.0889874696731567, |
|
"learning_rate": 8.161369193154035e-05, |
|
"loss": 2.551, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.195121951219512, |
|
"grad_norm": 0.9655137658119202, |
|
"learning_rate": 8.210268948655258e-05, |
|
"loss": 2.5546, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 8.24390243902439, |
|
"grad_norm": 0.9861440062522888, |
|
"learning_rate": 8.259168704156479e-05, |
|
"loss": 2.4937, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 8.292682926829269, |
|
"grad_norm": 0.9669071435928345, |
|
"learning_rate": 8.308068459657702e-05, |
|
"loss": 2.5275, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.341463414634147, |
|
"grad_norm": 0.9090121984481812, |
|
"learning_rate": 8.356968215158924e-05, |
|
"loss": 2.4987, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 8.390243902439025, |
|
"grad_norm": 1.0994421243667603, |
|
"learning_rate": 8.405867970660147e-05, |
|
"loss": 2.5473, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 8.439024390243903, |
|
"grad_norm": 0.8295562267303467, |
|
"learning_rate": 8.45476772616137e-05, |
|
"loss": 2.5134, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 8.487804878048781, |
|
"grad_norm": 0.7855349183082581, |
|
"learning_rate": 8.503667481662592e-05, |
|
"loss": 2.4448, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 8.536585365853659, |
|
"grad_norm": 1.008330225944519, |
|
"learning_rate": 8.552567237163815e-05, |
|
"loss": 2.5522, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.585365853658537, |
|
"grad_norm": 1.0320262908935547, |
|
"learning_rate": 8.601466992665038e-05, |
|
"loss": 2.5141, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 8.634146341463415, |
|
"grad_norm": 1.0223716497421265, |
|
"learning_rate": 8.65036674816626e-05, |
|
"loss": 2.5079, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 8.682926829268293, |
|
"grad_norm": 0.882462203502655, |
|
"learning_rate": 8.699266503667483e-05, |
|
"loss": 2.5387, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 8.731707317073171, |
|
"grad_norm": 1.1398268938064575, |
|
"learning_rate": 8.748166259168704e-05, |
|
"loss": 2.4915, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 8.78048780487805, |
|
"grad_norm": 1.1037157773971558, |
|
"learning_rate": 8.797066014669927e-05, |
|
"loss": 2.5326, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.829268292682928, |
|
"grad_norm": 0.7518386840820312, |
|
"learning_rate": 8.84596577017115e-05, |
|
"loss": 2.4366, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 8.878048780487806, |
|
"grad_norm": 0.9014624953269958, |
|
"learning_rate": 8.894865525672372e-05, |
|
"loss": 2.4906, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 8.926829268292684, |
|
"grad_norm": 1.0596896409988403, |
|
"learning_rate": 8.943765281173595e-05, |
|
"loss": 2.541, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 8.975609756097562, |
|
"grad_norm": 0.99559485912323, |
|
"learning_rate": 8.992665036674816e-05, |
|
"loss": 2.4663, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5932, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.3033788204193115, |
|
"eval_rouge1": 0.5701, |
|
"eval_rouge2": 0.2773, |
|
"eval_rougeL": 0.4791, |
|
"eval_runtime": 26.6033, |
|
"eval_samples_per_second": 6.841, |
|
"eval_steps_per_second": 1.729, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 9.024390243902438, |
|
"grad_norm": 1.1031367778778076, |
|
"learning_rate": 9.04156479217604e-05, |
|
"loss": 2.4692, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.073170731707316, |
|
"grad_norm": 1.0542200803756714, |
|
"learning_rate": 9.090464547677263e-05, |
|
"loss": 2.4629, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 9.121951219512194, |
|
"grad_norm": 0.8666772246360779, |
|
"learning_rate": 9.139364303178485e-05, |
|
"loss": 2.4193, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 9.170731707317072, |
|
"grad_norm": 1.041645884513855, |
|
"learning_rate": 9.188264058679708e-05, |
|
"loss": 2.4512, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 9.21951219512195, |
|
"grad_norm": 0.8610559701919556, |
|
"learning_rate": 9.237163814180929e-05, |
|
"loss": 2.441, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 9.268292682926829, |
|
"grad_norm": 0.9617129564285278, |
|
"learning_rate": 9.286063569682152e-05, |
|
"loss": 2.4903, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.317073170731707, |
|
"grad_norm": 0.9784387946128845, |
|
"learning_rate": 9.334963325183375e-05, |
|
"loss": 2.4764, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 9.365853658536585, |
|
"grad_norm": 0.8843352794647217, |
|
"learning_rate": 9.383863080684597e-05, |
|
"loss": 2.4541, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 9.414634146341463, |
|
"grad_norm": 1.1032359600067139, |
|
"learning_rate": 9.43276283618582e-05, |
|
"loss": 2.4355, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 9.463414634146341, |
|
"grad_norm": 1.2064437866210938, |
|
"learning_rate": 9.481662591687041e-05, |
|
"loss": 2.4135, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 9.512195121951219, |
|
"grad_norm": 0.810530424118042, |
|
"learning_rate": 9.530562347188264e-05, |
|
"loss": 2.3846, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.560975609756097, |
|
"grad_norm": 0.7688414454460144, |
|
"learning_rate": 9.579462102689486e-05, |
|
"loss": 2.4259, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 9.609756097560975, |
|
"grad_norm": 0.9564357399940491, |
|
"learning_rate": 9.628361858190709e-05, |
|
"loss": 2.4354, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 9.658536585365853, |
|
"grad_norm": 0.8629823923110962, |
|
"learning_rate": 9.677261613691933e-05, |
|
"loss": 2.4389, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.707317073170731, |
|
"grad_norm": 0.8377267122268677, |
|
"learning_rate": 9.726161369193154e-05, |
|
"loss": 2.4162, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"grad_norm": 0.9582370519638062, |
|
"learning_rate": 9.775061124694377e-05, |
|
"loss": 2.4044, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.804878048780488, |
|
"grad_norm": 0.901448667049408, |
|
"learning_rate": 9.8239608801956e-05, |
|
"loss": 2.4627, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 9.853658536585366, |
|
"grad_norm": 1.0475609302520752, |
|
"learning_rate": 9.872860635696822e-05, |
|
"loss": 2.4079, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 9.902439024390244, |
|
"grad_norm": 1.0225861072540283, |
|
"learning_rate": 9.921760391198045e-05, |
|
"loss": 2.4705, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 9.951219512195122, |
|
"grad_norm": 0.8137247562408447, |
|
"learning_rate": 9.970660146699266e-05, |
|
"loss": 2.4109, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.1472225189208984, |
|
"learning_rate": 9.997826677533279e-05, |
|
"loss": 2.4236, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6014, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.2605769634246826, |
|
"eval_rouge1": 0.5749, |
|
"eval_rouge2": 0.285, |
|
"eval_rougeL": 0.4872, |
|
"eval_runtime": 26.4591, |
|
"eval_samples_per_second": 6.879, |
|
"eval_steps_per_second": 1.739, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.048780487804878, |
|
"grad_norm": 1.0116795301437378, |
|
"learning_rate": 9.992393371366477e-05, |
|
"loss": 2.3413, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 10.097560975609756, |
|
"grad_norm": 0.7885662913322449, |
|
"learning_rate": 9.986960065199674e-05, |
|
"loss": 2.3437, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 10.146341463414634, |
|
"grad_norm": 0.9145445227622986, |
|
"learning_rate": 9.981526759032873e-05, |
|
"loss": 2.3793, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 10.195121951219512, |
|
"grad_norm": 0.8369097113609314, |
|
"learning_rate": 9.97609345286607e-05, |
|
"loss": 2.3435, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 10.24390243902439, |
|
"grad_norm": 0.9109391570091248, |
|
"learning_rate": 9.970660146699266e-05, |
|
"loss": 2.4217, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.292682926829269, |
|
"grad_norm": 0.8721039891242981, |
|
"learning_rate": 9.965226840532465e-05, |
|
"loss": 2.3891, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 10.341463414634147, |
|
"grad_norm": 1.0082284212112427, |
|
"learning_rate": 9.959793534365663e-05, |
|
"loss": 2.3427, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 10.390243902439025, |
|
"grad_norm": 0.9386345148086548, |
|
"learning_rate": 9.95436022819886e-05, |
|
"loss": 2.3686, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 10.439024390243903, |
|
"grad_norm": 0.9664391279220581, |
|
"learning_rate": 9.948926922032057e-05, |
|
"loss": 2.4001, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.487804878048781, |
|
"grad_norm": 0.9189599752426147, |
|
"learning_rate": 9.943493615865255e-05, |
|
"loss": 2.3468, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.536585365853659, |
|
"grad_norm": 0.8793349266052246, |
|
"learning_rate": 9.938060309698452e-05, |
|
"loss": 2.33, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 10.585365853658537, |
|
"grad_norm": 0.7458771467208862, |
|
"learning_rate": 9.93262700353165e-05, |
|
"loss": 2.3779, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 10.634146341463415, |
|
"grad_norm": 1.412410855293274, |
|
"learning_rate": 9.927193697364847e-05, |
|
"loss": 2.3591, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 10.682926829268293, |
|
"grad_norm": 0.97608882188797, |
|
"learning_rate": 9.921760391198045e-05, |
|
"loss": 2.4116, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 10.731707317073171, |
|
"grad_norm": 0.8573082089424133, |
|
"learning_rate": 9.916327085031242e-05, |
|
"loss": 2.3711, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.78048780487805, |
|
"grad_norm": 0.9795628786087036, |
|
"learning_rate": 9.91089377886444e-05, |
|
"loss": 2.3187, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 10.829268292682928, |
|
"grad_norm": 0.9566768407821655, |
|
"learning_rate": 9.905460472697637e-05, |
|
"loss": 2.3801, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 10.878048780487806, |
|
"grad_norm": 0.931747317314148, |
|
"learning_rate": 9.900027166530834e-05, |
|
"loss": 2.4118, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 10.926829268292684, |
|
"grad_norm": 1.0201259851455688, |
|
"learning_rate": 9.894593860364032e-05, |
|
"loss": 2.395, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 10.975609756097562, |
|
"grad_norm": 0.9366726875305176, |
|
"learning_rate": 9.88916055419723e-05, |
|
"loss": 2.3715, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6101, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.2207939624786377, |
|
"eval_rouge1": 0.5796, |
|
"eval_rouge2": 0.2939, |
|
"eval_rougeL": 0.4951, |
|
"eval_runtime": 26.5531, |
|
"eval_samples_per_second": 6.854, |
|
"eval_steps_per_second": 1.732, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 11.024390243902438, |
|
"grad_norm": 0.8451365828514099, |
|
"learning_rate": 9.883727248030426e-05, |
|
"loss": 2.3, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 11.073170731707316, |
|
"grad_norm": 0.9864517450332642, |
|
"learning_rate": 9.878293941863624e-05, |
|
"loss": 2.3132, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 11.121951219512194, |
|
"grad_norm": 0.9425224661827087, |
|
"learning_rate": 9.872860635696822e-05, |
|
"loss": 2.3193, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 11.170731707317072, |
|
"grad_norm": 0.8904969692230225, |
|
"learning_rate": 9.867427329530019e-05, |
|
"loss": 2.2679, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 11.21951219512195, |
|
"grad_norm": 0.8168789744377136, |
|
"learning_rate": 9.861994023363218e-05, |
|
"loss": 2.3595, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.268292682926829, |
|
"grad_norm": 0.8249573707580566, |
|
"learning_rate": 9.856560717196414e-05, |
|
"loss": 2.2372, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 11.317073170731707, |
|
"grad_norm": 1.2351031303405762, |
|
"learning_rate": 9.851127411029611e-05, |
|
"loss": 2.3489, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 11.365853658536585, |
|
"grad_norm": 0.9290939569473267, |
|
"learning_rate": 9.84569410486281e-05, |
|
"loss": 2.2802, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 11.414634146341463, |
|
"grad_norm": 1.1329292058944702, |
|
"learning_rate": 9.840260798696008e-05, |
|
"loss": 2.3173, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 11.463414634146341, |
|
"grad_norm": 1.0866820812225342, |
|
"learning_rate": 9.834827492529205e-05, |
|
"loss": 2.2682, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.512195121951219, |
|
"grad_norm": 0.8205491304397583, |
|
"learning_rate": 9.829394186362402e-05, |
|
"loss": 2.3116, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 11.560975609756097, |
|
"grad_norm": 0.8546379804611206, |
|
"learning_rate": 9.8239608801956e-05, |
|
"loss": 2.3475, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 11.609756097560975, |
|
"grad_norm": 0.8734157085418701, |
|
"learning_rate": 9.818527574028798e-05, |
|
"loss": 2.3206, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 11.658536585365853, |
|
"grad_norm": 0.799018919467926, |
|
"learning_rate": 9.813094267861994e-05, |
|
"loss": 2.3096, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 11.707317073170731, |
|
"grad_norm": 0.8234296441078186, |
|
"learning_rate": 9.807660961695192e-05, |
|
"loss": 2.32, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.75609756097561, |
|
"grad_norm": 0.908701479434967, |
|
"learning_rate": 9.80222765552839e-05, |
|
"loss": 2.3284, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 11.804878048780488, |
|
"grad_norm": 0.8176116347312927, |
|
"learning_rate": 9.796794349361587e-05, |
|
"loss": 2.3131, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 11.853658536585366, |
|
"grad_norm": 1.1658778190612793, |
|
"learning_rate": 9.791361043194785e-05, |
|
"loss": 2.3174, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 11.902439024390244, |
|
"grad_norm": 0.8987638354301453, |
|
"learning_rate": 9.785927737027982e-05, |
|
"loss": 2.2697, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 11.951219512195122, |
|
"grad_norm": 1.0862481594085693, |
|
"learning_rate": 9.780494430861179e-05, |
|
"loss": 2.3026, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.0809197425842285, |
|
"learning_rate": 9.775061124694377e-05, |
|
"loss": 2.2779, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6144, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.1923000812530518, |
|
"eval_rouge1": 0.584, |
|
"eval_rouge2": 0.298, |
|
"eval_rougeL": 0.4986, |
|
"eval_runtime": 26.6759, |
|
"eval_samples_per_second": 6.823, |
|
"eval_steps_per_second": 1.724, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 12.048780487804878, |
|
"grad_norm": 0.9038712978363037, |
|
"learning_rate": 9.769627818527575e-05, |
|
"loss": 2.279, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 12.097560975609756, |
|
"grad_norm": 0.9469591975212097, |
|
"learning_rate": 9.764194512360772e-05, |
|
"loss": 2.2548, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 12.146341463414634, |
|
"grad_norm": 1.0490235090255737, |
|
"learning_rate": 9.758761206193969e-05, |
|
"loss": 2.2762, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 0.9397657513618469, |
|
"learning_rate": 9.753327900027167e-05, |
|
"loss": 2.2562, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.24390243902439, |
|
"grad_norm": 0.903236985206604, |
|
"learning_rate": 9.747894593860366e-05, |
|
"loss": 2.2559, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 12.292682926829269, |
|
"grad_norm": 0.8761768937110901, |
|
"learning_rate": 9.742461287693561e-05, |
|
"loss": 2.2569, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 12.341463414634147, |
|
"grad_norm": 0.7565495371818542, |
|
"learning_rate": 9.73702798152676e-05, |
|
"loss": 2.2202, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 12.390243902439025, |
|
"grad_norm": 0.9713935852050781, |
|
"learning_rate": 9.731594675359958e-05, |
|
"loss": 2.2509, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 12.439024390243903, |
|
"grad_norm": 0.8867313861846924, |
|
"learning_rate": 9.726161369193154e-05, |
|
"loss": 2.2889, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.487804878048781, |
|
"grad_norm": 1.0842514038085938, |
|
"learning_rate": 9.720728063026351e-05, |
|
"loss": 2.2381, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 12.536585365853659, |
|
"grad_norm": 0.8155404329299927, |
|
"learning_rate": 9.71529475685955e-05, |
|
"loss": 2.2468, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 12.585365853658537, |
|
"grad_norm": 0.7613242864608765, |
|
"learning_rate": 9.709861450692746e-05, |
|
"loss": 2.2128, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 12.634146341463415, |
|
"grad_norm": 0.9132261276245117, |
|
"learning_rate": 9.704428144525945e-05, |
|
"loss": 2.2659, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 12.682926829268293, |
|
"grad_norm": 1.1883471012115479, |
|
"learning_rate": 9.698994838359143e-05, |
|
"loss": 2.2473, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.731707317073171, |
|
"grad_norm": 0.7840389013290405, |
|
"learning_rate": 9.693561532192338e-05, |
|
"loss": 2.2435, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 12.78048780487805, |
|
"grad_norm": 0.9382815957069397, |
|
"learning_rate": 9.688128226025537e-05, |
|
"loss": 2.2278, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 12.829268292682928, |
|
"grad_norm": 0.8595882654190063, |
|
"learning_rate": 9.682694919858735e-05, |
|
"loss": 2.27, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 12.878048780487806, |
|
"grad_norm": 0.9498746395111084, |
|
"learning_rate": 9.677261613691933e-05, |
|
"loss": 2.2706, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 12.926829268292684, |
|
"grad_norm": 1.005918025970459, |
|
"learning_rate": 9.671828307525129e-05, |
|
"loss": 2.2543, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 12.975609756097562, |
|
"grad_norm": 0.8443641662597656, |
|
"learning_rate": 9.666395001358327e-05, |
|
"loss": 2.2018, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6211, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.1619982719421387, |
|
"eval_rouge1": 0.5881, |
|
"eval_rouge2": 0.3053, |
|
"eval_rougeL": 0.5057, |
|
"eval_runtime": 26.4837, |
|
"eval_samples_per_second": 6.872, |
|
"eval_steps_per_second": 1.737, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 13.024390243902438, |
|
"grad_norm": 0.9114805459976196, |
|
"learning_rate": 9.660961695191525e-05, |
|
"loss": 2.243, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 13.073170731707316, |
|
"grad_norm": 1.0895026922225952, |
|
"learning_rate": 9.655528389024722e-05, |
|
"loss": 2.2126, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 13.121951219512194, |
|
"grad_norm": 0.9829836487770081, |
|
"learning_rate": 9.650095082857919e-05, |
|
"loss": 2.1898, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 13.170731707317072, |
|
"grad_norm": 0.9027822017669678, |
|
"learning_rate": 9.644661776691117e-05, |
|
"loss": 2.2044, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.21951219512195, |
|
"grad_norm": 0.8191620111465454, |
|
"learning_rate": 9.639228470524314e-05, |
|
"loss": 2.1748, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 13.268292682926829, |
|
"grad_norm": 0.8903685808181763, |
|
"learning_rate": 9.633795164357512e-05, |
|
"loss": 2.2767, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 13.317073170731707, |
|
"grad_norm": 0.9027280211448669, |
|
"learning_rate": 9.628361858190709e-05, |
|
"loss": 2.1899, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 13.365853658536585, |
|
"grad_norm": 0.8687236309051514, |
|
"learning_rate": 9.622928552023906e-05, |
|
"loss": 2.1784, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 13.414634146341463, |
|
"grad_norm": 0.828973650932312, |
|
"learning_rate": 9.617495245857104e-05, |
|
"loss": 2.2028, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.463414634146341, |
|
"grad_norm": 0.8230794072151184, |
|
"learning_rate": 9.612061939690302e-05, |
|
"loss": 2.1924, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 13.512195121951219, |
|
"grad_norm": 0.9190805554389954, |
|
"learning_rate": 9.6066286335235e-05, |
|
"loss": 2.2352, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 13.560975609756097, |
|
"grad_norm": 0.7604323029518127, |
|
"learning_rate": 9.601195327356696e-05, |
|
"loss": 2.1649, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 13.609756097560975, |
|
"grad_norm": 1.0770535469055176, |
|
"learning_rate": 9.595762021189894e-05, |
|
"loss": 2.2197, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 13.658536585365853, |
|
"grad_norm": 0.94279944896698, |
|
"learning_rate": 9.590328715023093e-05, |
|
"loss": 2.2216, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.707317073170731, |
|
"grad_norm": 0.9158275723457336, |
|
"learning_rate": 9.58489540885629e-05, |
|
"loss": 2.215, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 13.75609756097561, |
|
"grad_norm": 0.8379354476928711, |
|
"learning_rate": 9.579462102689486e-05, |
|
"loss": 2.1912, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 13.804878048780488, |
|
"grad_norm": 1.0317034721374512, |
|
"learning_rate": 9.574028796522685e-05, |
|
"loss": 2.2501, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 13.853658536585366, |
|
"grad_norm": 0.8205541372299194, |
|
"learning_rate": 9.568595490355882e-05, |
|
"loss": 2.1945, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 13.902439024390244, |
|
"grad_norm": 0.8240370750427246, |
|
"learning_rate": 9.56316218418908e-05, |
|
"loss": 2.1658, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 13.951219512195122, |
|
"grad_norm": 0.8294575214385986, |
|
"learning_rate": 9.557728878022277e-05, |
|
"loss": 2.2301, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 1.1440404653549194, |
|
"learning_rate": 9.552295571855474e-05, |
|
"loss": 2.1942, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.625, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.1468594074249268, |
|
"eval_rouge1": 0.5886, |
|
"eval_rouge2": 0.3073, |
|
"eval_rougeL": 0.5089, |
|
"eval_runtime": 26.4113, |
|
"eval_samples_per_second": 6.891, |
|
"eval_steps_per_second": 1.742, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 14.048780487804878, |
|
"grad_norm": 0.8380372524261475, |
|
"learning_rate": 9.546862265688672e-05, |
|
"loss": 2.169, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 14.097560975609756, |
|
"grad_norm": 0.8634399771690369, |
|
"learning_rate": 9.54142895952187e-05, |
|
"loss": 2.1461, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 14.146341463414634, |
|
"grad_norm": 0.8449454307556152, |
|
"learning_rate": 9.535995653355067e-05, |
|
"loss": 2.1429, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 14.195121951219512, |
|
"grad_norm": 1.0804766416549683, |
|
"learning_rate": 9.530562347188264e-05, |
|
"loss": 2.1806, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 14.24390243902439, |
|
"grad_norm": 0.8354528546333313, |
|
"learning_rate": 9.525129041021462e-05, |
|
"loss": 2.1485, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 14.292682926829269, |
|
"grad_norm": 0.905103325843811, |
|
"learning_rate": 9.51969573485466e-05, |
|
"loss": 2.1903, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 14.341463414634147, |
|
"grad_norm": 0.8975040316581726, |
|
"learning_rate": 9.514262428687857e-05, |
|
"loss": 2.1938, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 14.390243902439025, |
|
"grad_norm": 0.8443695306777954, |
|
"learning_rate": 9.508829122521054e-05, |
|
"loss": 2.1604, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.439024390243903, |
|
"grad_norm": 0.8442232608795166, |
|
"learning_rate": 9.503395816354252e-05, |
|
"loss": 2.127, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 14.487804878048781, |
|
"grad_norm": 0.7876951098442078, |
|
"learning_rate": 9.497962510187449e-05, |
|
"loss": 2.1824, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 14.536585365853659, |
|
"grad_norm": 1.0184301137924194, |
|
"learning_rate": 9.492529204020647e-05, |
|
"loss": 2.2021, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 14.585365853658537, |
|
"grad_norm": 0.9558939933776855, |
|
"learning_rate": 9.487095897853844e-05, |
|
"loss": 2.1671, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 14.634146341463415, |
|
"grad_norm": 0.8477892875671387, |
|
"learning_rate": 9.481662591687041e-05, |
|
"loss": 2.1123, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.682926829268293, |
|
"grad_norm": 0.9942978024482727, |
|
"learning_rate": 9.47622928552024e-05, |
|
"loss": 2.1388, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 14.731707317073171, |
|
"grad_norm": 0.9383358955383301, |
|
"learning_rate": 9.470795979353438e-05, |
|
"loss": 2.1496, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 14.78048780487805, |
|
"grad_norm": 1.062231421470642, |
|
"learning_rate": 9.465362673186635e-05, |
|
"loss": 2.1736, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 14.829268292682928, |
|
"grad_norm": 0.9161679148674011, |
|
"learning_rate": 9.459929367019831e-05, |
|
"loss": 2.1688, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 14.878048780487806, |
|
"grad_norm": 0.9697257876396179, |
|
"learning_rate": 9.45449606085303e-05, |
|
"loss": 2.154, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 14.926829268292684, |
|
"grad_norm": 1.0235304832458496, |
|
"learning_rate": 9.449062754686227e-05, |
|
"loss": 2.1676, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 14.975609756097562, |
|
"grad_norm": 0.8928753137588501, |
|
"learning_rate": 9.443629448519425e-05, |
|
"loss": 2.1642, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6293, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.1218607425689697, |
|
"eval_rouge1": 0.5932, |
|
"eval_rouge2": 0.3125, |
|
"eval_rougeL": 0.5127, |
|
"eval_runtime": 26.5151, |
|
"eval_samples_per_second": 6.864, |
|
"eval_steps_per_second": 1.735, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 15.024390243902438, |
|
"grad_norm": 0.8534289598464966, |
|
"learning_rate": 9.438196142352622e-05, |
|
"loss": 2.1589, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 15.073170731707316, |
|
"grad_norm": 0.846277117729187, |
|
"learning_rate": 9.43276283618582e-05, |
|
"loss": 2.0645, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 15.121951219512194, |
|
"grad_norm": 1.1414495706558228, |
|
"learning_rate": 9.427329530019017e-05, |
|
"loss": 2.0968, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 15.170731707317072, |
|
"grad_norm": 0.8166375160217285, |
|
"learning_rate": 9.421896223852215e-05, |
|
"loss": 2.1028, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 15.21951219512195, |
|
"grad_norm": 0.7828956246376038, |
|
"learning_rate": 9.416462917685412e-05, |
|
"loss": 2.1178, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 15.268292682926829, |
|
"grad_norm": 0.8929072022438049, |
|
"learning_rate": 9.411029611518609e-05, |
|
"loss": 2.1018, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 15.317073170731707, |
|
"grad_norm": 0.7981455326080322, |
|
"learning_rate": 9.405596305351807e-05, |
|
"loss": 2.127, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 15.365853658536585, |
|
"grad_norm": 0.8577933311462402, |
|
"learning_rate": 9.400162999185005e-05, |
|
"loss": 2.1227, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.414634146341463, |
|
"grad_norm": 0.8780116438865662, |
|
"learning_rate": 9.394729693018202e-05, |
|
"loss": 2.1043, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 15.463414634146341, |
|
"grad_norm": 0.8575468063354492, |
|
"learning_rate": 9.389296386851399e-05, |
|
"loss": 2.0918, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 15.512195121951219, |
|
"grad_norm": 1.4090367555618286, |
|
"learning_rate": 9.383863080684597e-05, |
|
"loss": 2.1102, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 15.560975609756097, |
|
"grad_norm": 1.0460201501846313, |
|
"learning_rate": 9.378429774517794e-05, |
|
"loss": 2.1424, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 15.609756097560975, |
|
"grad_norm": 0.9865913391113281, |
|
"learning_rate": 9.372996468350992e-05, |
|
"loss": 2.0499, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.658536585365853, |
|
"grad_norm": 1.0010690689086914, |
|
"learning_rate": 9.367563162184189e-05, |
|
"loss": 2.176, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 15.707317073170731, |
|
"grad_norm": 0.8663599491119385, |
|
"learning_rate": 9.362129856017387e-05, |
|
"loss": 2.1513, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 15.75609756097561, |
|
"grad_norm": 0.8186299204826355, |
|
"learning_rate": 9.356696549850584e-05, |
|
"loss": 2.1712, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 15.804878048780488, |
|
"grad_norm": 0.8568254709243774, |
|
"learning_rate": 9.351263243683783e-05, |
|
"loss": 2.1232, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 15.853658536585366, |
|
"grad_norm": 1.000522255897522, |
|
"learning_rate": 9.34582993751698e-05, |
|
"loss": 2.1435, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 15.902439024390244, |
|
"grad_norm": 0.8970864415168762, |
|
"learning_rate": 9.340396631350176e-05, |
|
"loss": 2.1366, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 15.951219512195122, |
|
"grad_norm": 0.9909244179725647, |
|
"learning_rate": 9.334963325183375e-05, |
|
"loss": 2.1536, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.0475345849990845, |
|
"learning_rate": 9.329530019016573e-05, |
|
"loss": 2.0952, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6317, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.1110589504241943, |
|
"eval_rouge1": 0.5938, |
|
"eval_rouge2": 0.3169, |
|
"eval_rougeL": 0.516, |
|
"eval_runtime": 26.4003, |
|
"eval_samples_per_second": 6.894, |
|
"eval_steps_per_second": 1.742, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 16.048780487804876, |
|
"grad_norm": 0.8568356037139893, |
|
"learning_rate": 9.32409671284977e-05, |
|
"loss": 2.0625, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 16.097560975609756, |
|
"grad_norm": 0.7888637781143188, |
|
"learning_rate": 9.318663406682967e-05, |
|
"loss": 2.0771, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 16.146341463414632, |
|
"grad_norm": 0.8220765590667725, |
|
"learning_rate": 9.313230100516165e-05, |
|
"loss": 2.1645, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 16.195121951219512, |
|
"grad_norm": 0.8520556688308716, |
|
"learning_rate": 9.307796794349362e-05, |
|
"loss": 2.0747, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 16.24390243902439, |
|
"grad_norm": 0.910944938659668, |
|
"learning_rate": 9.30236348818256e-05, |
|
"loss": 2.0778, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 16.29268292682927, |
|
"grad_norm": 0.9317068457603455, |
|
"learning_rate": 9.296930182015757e-05, |
|
"loss": 2.1142, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 16.341463414634145, |
|
"grad_norm": 0.8472641706466675, |
|
"learning_rate": 9.291496875848954e-05, |
|
"loss": 2.103, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 16.390243902439025, |
|
"grad_norm": 0.9557019472122192, |
|
"learning_rate": 9.286063569682152e-05, |
|
"loss": 2.0788, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 16.4390243902439, |
|
"grad_norm": 0.8406639695167542, |
|
"learning_rate": 9.28063026351535e-05, |
|
"loss": 2.1061, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 16.48780487804878, |
|
"grad_norm": 1.0637025833129883, |
|
"learning_rate": 9.275196957348547e-05, |
|
"loss": 2.039, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 16.536585365853657, |
|
"grad_norm": 0.9092559218406677, |
|
"learning_rate": 9.269763651181744e-05, |
|
"loss": 2.0324, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 16.585365853658537, |
|
"grad_norm": 1.031977653503418, |
|
"learning_rate": 9.264330345014942e-05, |
|
"loss": 2.1002, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 16.634146341463413, |
|
"grad_norm": 1.0254329442977905, |
|
"learning_rate": 9.25889703884814e-05, |
|
"loss": 2.0954, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 16.682926829268293, |
|
"grad_norm": 0.9532445073127747, |
|
"learning_rate": 9.253463732681337e-05, |
|
"loss": 2.0529, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 16.73170731707317, |
|
"grad_norm": 1.1140599250793457, |
|
"learning_rate": 9.248030426514534e-05, |
|
"loss": 2.0982, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 16.78048780487805, |
|
"grad_norm": 0.8425986766815186, |
|
"learning_rate": 9.242597120347732e-05, |
|
"loss": 2.09, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 16.829268292682926, |
|
"grad_norm": 0.9490983486175537, |
|
"learning_rate": 9.237163814180929e-05, |
|
"loss": 2.0732, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 16.878048780487806, |
|
"grad_norm": 0.9101831912994385, |
|
"learning_rate": 9.231730508014128e-05, |
|
"loss": 2.1161, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 16.926829268292682, |
|
"grad_norm": 0.961280345916748, |
|
"learning_rate": 9.226297201847324e-05, |
|
"loss": 2.1085, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 16.975609756097562, |
|
"grad_norm": 1.1503115892410278, |
|
"learning_rate": 9.220863895680521e-05, |
|
"loss": 2.0838, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6334, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.098520040512085, |
|
"eval_rouge1": 0.5944, |
|
"eval_rouge2": 0.3188, |
|
"eval_rougeL": 0.5175, |
|
"eval_runtime": 26.5163, |
|
"eval_samples_per_second": 6.864, |
|
"eval_steps_per_second": 1.735, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 17.024390243902438, |
|
"grad_norm": 0.9509657621383667, |
|
"learning_rate": 9.21543058951372e-05, |
|
"loss": 2.0309, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 17.073170731707318, |
|
"grad_norm": 0.9084831476211548, |
|
"learning_rate": 9.209997283346918e-05, |
|
"loss": 2.0358, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.121951219512194, |
|
"grad_norm": 0.8318648338317871, |
|
"learning_rate": 9.204563977180115e-05, |
|
"loss": 2.026, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 17.170731707317074, |
|
"grad_norm": 0.9886439442634583, |
|
"learning_rate": 9.199130671013312e-05, |
|
"loss": 2.0421, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 17.21951219512195, |
|
"grad_norm": 0.998199462890625, |
|
"learning_rate": 9.19369736484651e-05, |
|
"loss": 2.0552, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 17.26829268292683, |
|
"grad_norm": 0.9013775587081909, |
|
"learning_rate": 9.188264058679708e-05, |
|
"loss": 2.0296, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 17.317073170731707, |
|
"grad_norm": 0.8572256565093994, |
|
"learning_rate": 9.182830752512905e-05, |
|
"loss": 2.0409, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 17.365853658536587, |
|
"grad_norm": 0.9061347246170044, |
|
"learning_rate": 9.177397446346102e-05, |
|
"loss": 2.0467, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 17.414634146341463, |
|
"grad_norm": 0.7994931936264038, |
|
"learning_rate": 9.1719641401793e-05, |
|
"loss": 2.0645, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 17.463414634146343, |
|
"grad_norm": 0.8628284931182861, |
|
"learning_rate": 9.166530834012497e-05, |
|
"loss": 2.0127, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 17.51219512195122, |
|
"grad_norm": 1.0142345428466797, |
|
"learning_rate": 9.161097527845695e-05, |
|
"loss": 2.059, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 17.5609756097561, |
|
"grad_norm": 0.9873341917991638, |
|
"learning_rate": 9.155664221678892e-05, |
|
"loss": 2.079, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.609756097560975, |
|
"grad_norm": 0.9785902500152588, |
|
"learning_rate": 9.150230915512089e-05, |
|
"loss": 2.0932, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 17.658536585365855, |
|
"grad_norm": 0.8768711686134338, |
|
"learning_rate": 9.144797609345287e-05, |
|
"loss": 2.0169, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 17.70731707317073, |
|
"grad_norm": 0.9447925686836243, |
|
"learning_rate": 9.139364303178485e-05, |
|
"loss": 2.0419, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 17.75609756097561, |
|
"grad_norm": 0.8872796297073364, |
|
"learning_rate": 9.133930997011682e-05, |
|
"loss": 2.0768, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 17.804878048780488, |
|
"grad_norm": 0.9131309986114502, |
|
"learning_rate": 9.128497690844879e-05, |
|
"loss": 2.0619, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 17.853658536585368, |
|
"grad_norm": 0.9417833685874939, |
|
"learning_rate": 9.123064384678077e-05, |
|
"loss": 2.0971, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 17.902439024390244, |
|
"grad_norm": 0.9056317806243896, |
|
"learning_rate": 9.117631078511276e-05, |
|
"loss": 2.0509, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 17.951219512195124, |
|
"grad_norm": 1.052677869796753, |
|
"learning_rate": 9.112197772344472e-05, |
|
"loss": 2.059, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.1331645250320435, |
|
"learning_rate": 9.10676446617767e-05, |
|
"loss": 2.0219, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.637, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.086111307144165, |
|
"eval_rouge1": 0.5969, |
|
"eval_rouge2": 0.3226, |
|
"eval_rougeL": 0.521, |
|
"eval_runtime": 26.4745, |
|
"eval_samples_per_second": 6.875, |
|
"eval_steps_per_second": 1.738, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 18.048780487804876, |
|
"grad_norm": 0.8600811958312988, |
|
"learning_rate": 9.101331160010868e-05, |
|
"loss": 2.0092, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 18.097560975609756, |
|
"grad_norm": 0.8948452472686768, |
|
"learning_rate": 9.095897853844064e-05, |
|
"loss": 2.0124, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 18.146341463414632, |
|
"grad_norm": 0.9096921682357788, |
|
"learning_rate": 9.090464547677263e-05, |
|
"loss": 2.016, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 18.195121951219512, |
|
"grad_norm": 0.948293924331665, |
|
"learning_rate": 9.08503124151046e-05, |
|
"loss": 1.9863, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 18.24390243902439, |
|
"grad_norm": 0.9195007085800171, |
|
"learning_rate": 9.079597935343656e-05, |
|
"loss": 2.0405, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 18.29268292682927, |
|
"grad_norm": 0.9686821699142456, |
|
"learning_rate": 9.074164629176855e-05, |
|
"loss": 2.0113, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 18.341463414634145, |
|
"grad_norm": 0.8149988055229187, |
|
"learning_rate": 9.068731323010053e-05, |
|
"loss": 1.994, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 18.390243902439025, |
|
"grad_norm": 1.002677083015442, |
|
"learning_rate": 9.063298016843248e-05, |
|
"loss": 2.0301, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 18.4390243902439, |
|
"grad_norm": 0.7668618559837341, |
|
"learning_rate": 9.057864710676447e-05, |
|
"loss": 1.9885, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 18.48780487804878, |
|
"grad_norm": 1.0023763179779053, |
|
"learning_rate": 9.052431404509645e-05, |
|
"loss": 2.0015, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 18.536585365853657, |
|
"grad_norm": 0.8811227679252625, |
|
"learning_rate": 9.046998098342842e-05, |
|
"loss": 2.0479, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 18.585365853658537, |
|
"grad_norm": 0.8438715934753418, |
|
"learning_rate": 9.04156479217604e-05, |
|
"loss": 2.0093, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 18.634146341463413, |
|
"grad_norm": 0.9621524810791016, |
|
"learning_rate": 9.036131486009237e-05, |
|
"loss": 2.0786, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 18.682926829268293, |
|
"grad_norm": 1.0231949090957642, |
|
"learning_rate": 9.030698179842435e-05, |
|
"loss": 2.0356, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 18.73170731707317, |
|
"grad_norm": 1.0325591564178467, |
|
"learning_rate": 9.025264873675632e-05, |
|
"loss": 2.0383, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 18.78048780487805, |
|
"grad_norm": 1.2348233461380005, |
|
"learning_rate": 9.01983156750883e-05, |
|
"loss": 2.0033, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 18.829268292682926, |
|
"grad_norm": 0.9654771685600281, |
|
"learning_rate": 9.014398261342027e-05, |
|
"loss": 1.9976, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 18.878048780487806, |
|
"grad_norm": 0.9214688539505005, |
|
"learning_rate": 9.008964955175224e-05, |
|
"loss": 2.0259, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 18.926829268292682, |
|
"grad_norm": 0.8998551368713379, |
|
"learning_rate": 9.003531649008422e-05, |
|
"loss": 2.015, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 18.975609756097562, |
|
"grad_norm": 0.9731389880180359, |
|
"learning_rate": 8.99809834284162e-05, |
|
"loss": 2.0727, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6386, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0800883769989014, |
|
"eval_rouge1": 0.5985, |
|
"eval_rouge2": 0.3256, |
|
"eval_rougeL": 0.5236, |
|
"eval_runtime": 26.467, |
|
"eval_samples_per_second": 6.876, |
|
"eval_steps_per_second": 1.738, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 19.024390243902438, |
|
"grad_norm": 0.9533644318580627, |
|
"learning_rate": 8.992665036674816e-05, |
|
"loss": 1.9601, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 19.073170731707318, |
|
"grad_norm": 0.8680891990661621, |
|
"learning_rate": 8.987231730508014e-05, |
|
"loss": 1.9617, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 19.121951219512194, |
|
"grad_norm": 0.9162204265594482, |
|
"learning_rate": 8.981798424341212e-05, |
|
"loss": 1.9745, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 19.170731707317074, |
|
"grad_norm": 0.9257919788360596, |
|
"learning_rate": 8.97636511817441e-05, |
|
"loss": 1.9737, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 19.21951219512195, |
|
"grad_norm": 0.8662911653518677, |
|
"learning_rate": 8.970931812007606e-05, |
|
"loss": 1.9627, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 19.26829268292683, |
|
"grad_norm": 0.7983854413032532, |
|
"learning_rate": 8.965498505840804e-05, |
|
"loss": 1.9995, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 19.317073170731707, |
|
"grad_norm": 0.9230179190635681, |
|
"learning_rate": 8.960065199674003e-05, |
|
"loss": 1.9865, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 19.365853658536587, |
|
"grad_norm": 0.8826920390129089, |
|
"learning_rate": 8.9546318935072e-05, |
|
"loss": 1.9957, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 19.414634146341463, |
|
"grad_norm": 0.915490984916687, |
|
"learning_rate": 8.949198587340398e-05, |
|
"loss": 1.995, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 19.463414634146343, |
|
"grad_norm": 0.9427376985549927, |
|
"learning_rate": 8.943765281173595e-05, |
|
"loss": 1.947, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 19.51219512195122, |
|
"grad_norm": 0.9200916290283203, |
|
"learning_rate": 8.938331975006792e-05, |
|
"loss": 1.9974, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.5609756097561, |
|
"grad_norm": 0.9528909921646118, |
|
"learning_rate": 8.93289866883999e-05, |
|
"loss": 1.9867, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 19.609756097560975, |
|
"grad_norm": 1.02826726436615, |
|
"learning_rate": 8.927465362673188e-05, |
|
"loss": 1.9863, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 19.658536585365855, |
|
"grad_norm": 0.8667694926261902, |
|
"learning_rate": 8.922032056506384e-05, |
|
"loss": 2.0085, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 19.70731707317073, |
|
"grad_norm": 0.9008286595344543, |
|
"learning_rate": 8.916598750339582e-05, |
|
"loss": 1.983, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 19.75609756097561, |
|
"grad_norm": 0.8765814900398254, |
|
"learning_rate": 8.91116544417278e-05, |
|
"loss": 1.9919, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 19.804878048780488, |
|
"grad_norm": 0.9815711379051208, |
|
"learning_rate": 8.905732138005977e-05, |
|
"loss": 1.9724, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 19.853658536585368, |
|
"grad_norm": 0.8371131420135498, |
|
"learning_rate": 8.900298831839174e-05, |
|
"loss": 1.9957, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 19.902439024390244, |
|
"grad_norm": 0.9084226489067078, |
|
"learning_rate": 8.894865525672372e-05, |
|
"loss": 2.0002, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 19.951219512195124, |
|
"grad_norm": 1.1448745727539062, |
|
"learning_rate": 8.889432219505569e-05, |
|
"loss": 2.0231, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.0898938179016113, |
|
"learning_rate": 8.883998913338767e-05, |
|
"loss": 2.0165, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.642, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0671799182891846, |
|
"eval_rouge1": 0.6005, |
|
"eval_rouge2": 0.3291, |
|
"eval_rougeL": 0.5255, |
|
"eval_runtime": 26.6176, |
|
"eval_samples_per_second": 6.838, |
|
"eval_steps_per_second": 1.728, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 20.048780487804876, |
|
"grad_norm": 0.8856220841407776, |
|
"learning_rate": 8.878565607171964e-05, |
|
"loss": 1.948, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 20.097560975609756, |
|
"grad_norm": 0.8878317475318909, |
|
"learning_rate": 8.873132301005162e-05, |
|
"loss": 1.9741, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 20.146341463414632, |
|
"grad_norm": 0.8628621697425842, |
|
"learning_rate": 8.867698994838359e-05, |
|
"loss": 1.9349, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 20.195121951219512, |
|
"grad_norm": 0.8303452134132385, |
|
"learning_rate": 8.862265688671557e-05, |
|
"loss": 1.9748, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 20.24390243902439, |
|
"grad_norm": 0.9785374999046326, |
|
"learning_rate": 8.856832382504756e-05, |
|
"loss": 1.9411, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 20.29268292682927, |
|
"grad_norm": 1.0943126678466797, |
|
"learning_rate": 8.851399076337951e-05, |
|
"loss": 1.9892, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 20.341463414634145, |
|
"grad_norm": 0.8537071347236633, |
|
"learning_rate": 8.84596577017115e-05, |
|
"loss": 1.9492, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 20.390243902439025, |
|
"grad_norm": 0.9113747477531433, |
|
"learning_rate": 8.840532464004348e-05, |
|
"loss": 2.0174, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 20.4390243902439, |
|
"grad_norm": 0.9659337997436523, |
|
"learning_rate": 8.835099157837545e-05, |
|
"loss": 1.9281, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 20.48780487804878, |
|
"grad_norm": 1.0948446989059448, |
|
"learning_rate": 8.829665851670741e-05, |
|
"loss": 1.9777, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 20.536585365853657, |
|
"grad_norm": 1.1053972244262695, |
|
"learning_rate": 8.82423254550394e-05, |
|
"loss": 1.9583, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 20.585365853658537, |
|
"grad_norm": 0.9370738863945007, |
|
"learning_rate": 8.818799239337137e-05, |
|
"loss": 1.9746, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 20.634146341463413, |
|
"grad_norm": 0.8967805504798889, |
|
"learning_rate": 8.813365933170335e-05, |
|
"loss": 1.9868, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 20.682926829268293, |
|
"grad_norm": 0.8011646270751953, |
|
"learning_rate": 8.807932627003532e-05, |
|
"loss": 1.9357, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 20.73170731707317, |
|
"grad_norm": 0.8785592317581177, |
|
"learning_rate": 8.80249932083673e-05, |
|
"loss": 1.9788, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 20.78048780487805, |
|
"grad_norm": 0.8387641310691833, |
|
"learning_rate": 8.797066014669927e-05, |
|
"loss": 1.9253, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 20.829268292682926, |
|
"grad_norm": 0.8664023280143738, |
|
"learning_rate": 8.791632708503125e-05, |
|
"loss": 1.9868, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 20.878048780487806, |
|
"grad_norm": 0.8770254254341125, |
|
"learning_rate": 8.786199402336322e-05, |
|
"loss": 1.9192, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 20.926829268292682, |
|
"grad_norm": 0.8913759589195251, |
|
"learning_rate": 8.780766096169519e-05, |
|
"loss": 1.9655, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 20.975609756097562, |
|
"grad_norm": 0.7817836999893188, |
|
"learning_rate": 8.775332790002717e-05, |
|
"loss": 1.9613, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6431, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.057805061340332, |
|
"eval_rouge1": 0.6017, |
|
"eval_rouge2": 0.3311, |
|
"eval_rougeL": 0.5274, |
|
"eval_runtime": 26.5642, |
|
"eval_samples_per_second": 6.851, |
|
"eval_steps_per_second": 1.732, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 21.024390243902438, |
|
"grad_norm": 1.0928252935409546, |
|
"learning_rate": 8.769899483835915e-05, |
|
"loss": 1.9218, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 21.073170731707318, |
|
"grad_norm": 1.0891989469528198, |
|
"learning_rate": 8.764466177669112e-05, |
|
"loss": 1.9555, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 21.121951219512194, |
|
"grad_norm": 0.8458701372146606, |
|
"learning_rate": 8.759032871502309e-05, |
|
"loss": 1.9781, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 21.170731707317074, |
|
"grad_norm": 0.9393008351325989, |
|
"learning_rate": 8.753599565335507e-05, |
|
"loss": 1.9089, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 21.21951219512195, |
|
"grad_norm": 0.9594295620918274, |
|
"learning_rate": 8.748166259168704e-05, |
|
"loss": 1.9764, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 21.26829268292683, |
|
"grad_norm": 0.9915680289268494, |
|
"learning_rate": 8.742732953001902e-05, |
|
"loss": 1.9328, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 21.317073170731707, |
|
"grad_norm": 1.0338279008865356, |
|
"learning_rate": 8.737299646835099e-05, |
|
"loss": 1.9124, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 21.365853658536587, |
|
"grad_norm": 0.8570368885993958, |
|
"learning_rate": 8.731866340668296e-05, |
|
"loss": 1.9258, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 21.414634146341463, |
|
"grad_norm": 0.9843132495880127, |
|
"learning_rate": 8.726433034501494e-05, |
|
"loss": 1.9368, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 21.463414634146343, |
|
"grad_norm": 0.7749292850494385, |
|
"learning_rate": 8.720999728334693e-05, |
|
"loss": 1.8731, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 21.51219512195122, |
|
"grad_norm": 0.9112648367881775, |
|
"learning_rate": 8.71556642216789e-05, |
|
"loss": 1.8874, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 21.5609756097561, |
|
"grad_norm": 0.9614556431770325, |
|
"learning_rate": 8.710133116001086e-05, |
|
"loss": 1.9299, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 21.609756097560975, |
|
"grad_norm": 0.8870623111724854, |
|
"learning_rate": 8.704699809834285e-05, |
|
"loss": 1.9401, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 21.658536585365855, |
|
"grad_norm": 0.9557124376296997, |
|
"learning_rate": 8.699266503667483e-05, |
|
"loss": 1.9403, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 21.70731707317073, |
|
"grad_norm": 0.8714508414268494, |
|
"learning_rate": 8.69383319750068e-05, |
|
"loss": 1.9282, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 21.75609756097561, |
|
"grad_norm": 1.0693939924240112, |
|
"learning_rate": 8.688399891333877e-05, |
|
"loss": 1.9715, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 21.804878048780488, |
|
"grad_norm": 0.8669313192367554, |
|
"learning_rate": 8.682966585167075e-05, |
|
"loss": 1.9108, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 21.853658536585368, |
|
"grad_norm": 1.013626217842102, |
|
"learning_rate": 8.677533279000272e-05, |
|
"loss": 1.92, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 21.902439024390244, |
|
"grad_norm": 1.2305961847305298, |
|
"learning_rate": 8.67209997283347e-05, |
|
"loss": 1.9894, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 21.951219512195124, |
|
"grad_norm": 0.8011859655380249, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 1.9103, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 1.023545503616333, |
|
"learning_rate": 8.661233360499864e-05, |
|
"loss": 1.9716, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6435, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.056776762008667, |
|
"eval_rouge1": 0.6021, |
|
"eval_rouge2": 0.3317, |
|
"eval_rougeL": 0.5279, |
|
"eval_runtime": 26.5956, |
|
"eval_samples_per_second": 6.843, |
|
"eval_steps_per_second": 1.73, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 22.048780487804876, |
|
"grad_norm": 0.9685917496681213, |
|
"learning_rate": 8.655800054333062e-05, |
|
"loss": 1.9143, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 22.097560975609756, |
|
"grad_norm": 0.8598119616508484, |
|
"learning_rate": 8.65036674816626e-05, |
|
"loss": 1.883, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 22.146341463414632, |
|
"grad_norm": 0.9003345966339111, |
|
"learning_rate": 8.644933441999457e-05, |
|
"loss": 1.8913, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 22.195121951219512, |
|
"grad_norm": 0.8545246720314026, |
|
"learning_rate": 8.639500135832654e-05, |
|
"loss": 1.8511, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 22.24390243902439, |
|
"grad_norm": 0.8255957365036011, |
|
"learning_rate": 8.634066829665852e-05, |
|
"loss": 1.909, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 22.29268292682927, |
|
"grad_norm": 0.8835028409957886, |
|
"learning_rate": 8.62863352349905e-05, |
|
"loss": 1.9223, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 22.341463414634145, |
|
"grad_norm": 0.8599757552146912, |
|
"learning_rate": 8.623200217332247e-05, |
|
"loss": 1.8848, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 22.390243902439025, |
|
"grad_norm": 0.8880361318588257, |
|
"learning_rate": 8.617766911165444e-05, |
|
"loss": 1.9087, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 22.4390243902439, |
|
"grad_norm": 0.9443928003311157, |
|
"learning_rate": 8.612333604998642e-05, |
|
"loss": 1.9419, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 22.48780487804878, |
|
"grad_norm": 0.9929980635643005, |
|
"learning_rate": 8.606900298831839e-05, |
|
"loss": 1.9057, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 22.536585365853657, |
|
"grad_norm": 1.2470592260360718, |
|
"learning_rate": 8.601466992665038e-05, |
|
"loss": 1.9128, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 22.585365853658537, |
|
"grad_norm": 0.8578996062278748, |
|
"learning_rate": 8.596033686498234e-05, |
|
"loss": 1.8991, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 22.634146341463413, |
|
"grad_norm": 1.0579174757003784, |
|
"learning_rate": 8.590600380331431e-05, |
|
"loss": 1.9208, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 22.682926829268293, |
|
"grad_norm": 1.0299713611602783, |
|
"learning_rate": 8.58516707416463e-05, |
|
"loss": 1.9141, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 22.73170731707317, |
|
"grad_norm": 0.853769838809967, |
|
"learning_rate": 8.579733767997828e-05, |
|
"loss": 1.9069, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 22.78048780487805, |
|
"grad_norm": 0.9463319182395935, |
|
"learning_rate": 8.574300461831025e-05, |
|
"loss": 1.9162, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 22.829268292682926, |
|
"grad_norm": 0.9902511835098267, |
|
"learning_rate": 8.568867155664222e-05, |
|
"loss": 1.9439, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 22.878048780487806, |
|
"grad_norm": 0.9432972073554993, |
|
"learning_rate": 8.56343384949742e-05, |
|
"loss": 1.8906, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 22.926829268292682, |
|
"grad_norm": 0.9613229632377625, |
|
"learning_rate": 8.558000543330618e-05, |
|
"loss": 1.9144, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 22.975609756097562, |
|
"grad_norm": 0.8384121060371399, |
|
"learning_rate": 8.552567237163815e-05, |
|
"loss": 1.9618, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6467, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.055529832839966, |
|
"eval_rouge1": 0.603, |
|
"eval_rouge2": 0.336, |
|
"eval_rougeL": 0.531, |
|
"eval_runtime": 26.4562, |
|
"eval_samples_per_second": 6.879, |
|
"eval_steps_per_second": 1.739, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 23.024390243902438, |
|
"grad_norm": 0.836834728717804, |
|
"learning_rate": 8.547133930997012e-05, |
|
"loss": 1.8823, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 23.073170731707318, |
|
"grad_norm": 1.070541501045227, |
|
"learning_rate": 8.54170062483021e-05, |
|
"loss": 1.9168, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 23.121951219512194, |
|
"grad_norm": 0.9768303036689758, |
|
"learning_rate": 8.536267318663407e-05, |
|
"loss": 1.8677, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 23.170731707317074, |
|
"grad_norm": 0.9395390748977661, |
|
"learning_rate": 8.530834012496605e-05, |
|
"loss": 1.8669, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 23.21951219512195, |
|
"grad_norm": 0.9083912968635559, |
|
"learning_rate": 8.525400706329802e-05, |
|
"loss": 1.8952, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 23.26829268292683, |
|
"grad_norm": 0.8809327483177185, |
|
"learning_rate": 8.519967400162999e-05, |
|
"loss": 1.884, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 23.317073170731707, |
|
"grad_norm": 1.0371663570404053, |
|
"learning_rate": 8.514534093996197e-05, |
|
"loss": 1.889, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 23.365853658536587, |
|
"grad_norm": 0.8325390219688416, |
|
"learning_rate": 8.509100787829395e-05, |
|
"loss": 1.9055, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 23.414634146341463, |
|
"grad_norm": 0.9794867038726807, |
|
"learning_rate": 8.503667481662592e-05, |
|
"loss": 1.8662, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 23.463414634146343, |
|
"grad_norm": 0.8732627034187317, |
|
"learning_rate": 8.498234175495789e-05, |
|
"loss": 1.8775, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 23.51219512195122, |
|
"grad_norm": 0.99794602394104, |
|
"learning_rate": 8.492800869328987e-05, |
|
"loss": 1.8718, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 23.5609756097561, |
|
"grad_norm": 0.9115703701972961, |
|
"learning_rate": 8.487367563162184e-05, |
|
"loss": 1.9097, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 23.609756097560975, |
|
"grad_norm": 0.835806131362915, |
|
"learning_rate": 8.481934256995382e-05, |
|
"loss": 1.8978, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 23.658536585365855, |
|
"grad_norm": 1.145419955253601, |
|
"learning_rate": 8.476500950828579e-05, |
|
"loss": 1.8654, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 23.70731707317073, |
|
"grad_norm": 0.9367853999137878, |
|
"learning_rate": 8.471067644661778e-05, |
|
"loss": 1.8677, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 23.75609756097561, |
|
"grad_norm": 0.9627723097801208, |
|
"learning_rate": 8.465634338494974e-05, |
|
"loss": 1.8869, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 23.804878048780488, |
|
"grad_norm": 0.8921970129013062, |
|
"learning_rate": 8.460201032328173e-05, |
|
"loss": 1.8935, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 23.853658536585368, |
|
"grad_norm": 0.8427318334579468, |
|
"learning_rate": 8.45476772616137e-05, |
|
"loss": 1.8612, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 23.902439024390244, |
|
"grad_norm": 0.8289673924446106, |
|
"learning_rate": 8.449334419994566e-05, |
|
"loss": 1.9018, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 23.951219512195124, |
|
"grad_norm": 0.8206982016563416, |
|
"learning_rate": 8.443901113827765e-05, |
|
"loss": 1.8747, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 1.0877290964126587, |
|
"learning_rate": 8.438467807660963e-05, |
|
"loss": 1.916, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6477, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0489752292633057, |
|
"eval_rouge1": 0.6045, |
|
"eval_rouge2": 0.3365, |
|
"eval_rougeL": 0.5323, |
|
"eval_runtime": 26.6799, |
|
"eval_samples_per_second": 6.822, |
|
"eval_steps_per_second": 1.724, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 24.048780487804876, |
|
"grad_norm": 0.9822722673416138, |
|
"learning_rate": 8.43303450149416e-05, |
|
"loss": 1.8604, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 24.097560975609756, |
|
"grad_norm": 0.8830099701881409, |
|
"learning_rate": 8.427601195327357e-05, |
|
"loss": 1.8229, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 24.146341463414632, |
|
"grad_norm": 0.931056022644043, |
|
"learning_rate": 8.422167889160555e-05, |
|
"loss": 1.853, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 24.195121951219512, |
|
"grad_norm": 1.1673840284347534, |
|
"learning_rate": 8.416734582993752e-05, |
|
"loss": 1.8008, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 24.24390243902439, |
|
"grad_norm": 0.8437151908874512, |
|
"learning_rate": 8.41130127682695e-05, |
|
"loss": 1.8747, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 24.29268292682927, |
|
"grad_norm": 0.8535563945770264, |
|
"learning_rate": 8.405867970660147e-05, |
|
"loss": 1.8898, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 24.341463414634145, |
|
"grad_norm": 0.9258223176002502, |
|
"learning_rate": 8.400434664493345e-05, |
|
"loss": 1.86, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 24.390243902439025, |
|
"grad_norm": 1.0053988695144653, |
|
"learning_rate": 8.395001358326542e-05, |
|
"loss": 1.8101, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 24.4390243902439, |
|
"grad_norm": 0.8608886003494263, |
|
"learning_rate": 8.38956805215974e-05, |
|
"loss": 1.8998, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 24.48780487804878, |
|
"grad_norm": 1.111860752105713, |
|
"learning_rate": 8.384134745992937e-05, |
|
"loss": 1.9247, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 24.536585365853657, |
|
"grad_norm": 0.9506413340568542, |
|
"learning_rate": 8.378701439826134e-05, |
|
"loss": 1.8309, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 24.585365853658537, |
|
"grad_norm": 0.9532819390296936, |
|
"learning_rate": 8.373268133659332e-05, |
|
"loss": 1.8863, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 24.634146341463413, |
|
"grad_norm": 1.0726252794265747, |
|
"learning_rate": 8.36783482749253e-05, |
|
"loss": 1.8353, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 24.682926829268293, |
|
"grad_norm": 0.8491644263267517, |
|
"learning_rate": 8.362401521325727e-05, |
|
"loss": 1.8833, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 24.73170731707317, |
|
"grad_norm": 0.8814260959625244, |
|
"learning_rate": 8.356968215158924e-05, |
|
"loss": 1.8765, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 24.78048780487805, |
|
"grad_norm": 1.0011340379714966, |
|
"learning_rate": 8.351534908992122e-05, |
|
"loss": 1.8507, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 24.829268292682926, |
|
"grad_norm": 0.819486141204834, |
|
"learning_rate": 8.34610160282532e-05, |
|
"loss": 1.8754, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 24.878048780487806, |
|
"grad_norm": 0.797431230545044, |
|
"learning_rate": 8.340668296658518e-05, |
|
"loss": 1.8754, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 24.926829268292682, |
|
"grad_norm": 0.8899915814399719, |
|
"learning_rate": 8.335234990491714e-05, |
|
"loss": 1.8484, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 24.975609756097562, |
|
"grad_norm": 0.9835275411605835, |
|
"learning_rate": 8.329801684324911e-05, |
|
"loss": 1.864, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.649, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0439529418945312, |
|
"eval_rouge1": 0.6046, |
|
"eval_rouge2": 0.3378, |
|
"eval_rougeL": 0.533, |
|
"eval_runtime": 26.5855, |
|
"eval_samples_per_second": 6.846, |
|
"eval_steps_per_second": 1.73, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 25.024390243902438, |
|
"grad_norm": 1.0006405115127563, |
|
"learning_rate": 8.32436837815811e-05, |
|
"loss": 1.8371, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 25.073170731707318, |
|
"grad_norm": 0.9687209129333496, |
|
"learning_rate": 8.318935071991308e-05, |
|
"loss": 1.8286, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 25.121951219512194, |
|
"grad_norm": 0.8135698437690735, |
|
"learning_rate": 8.313501765824505e-05, |
|
"loss": 1.8417, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 25.170731707317074, |
|
"grad_norm": 0.9199495911598206, |
|
"learning_rate": 8.308068459657702e-05, |
|
"loss": 1.8202, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 25.21951219512195, |
|
"grad_norm": 0.7822583317756653, |
|
"learning_rate": 8.3026351534909e-05, |
|
"loss": 1.8535, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 25.26829268292683, |
|
"grad_norm": 0.8207578063011169, |
|
"learning_rate": 8.297201847324098e-05, |
|
"loss": 1.8064, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 25.317073170731707, |
|
"grad_norm": 0.8425044417381287, |
|
"learning_rate": 8.291768541157295e-05, |
|
"loss": 1.8554, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 25.365853658536587, |
|
"grad_norm": 0.9588025212287903, |
|
"learning_rate": 8.286335234990492e-05, |
|
"loss": 1.8297, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 25.414634146341463, |
|
"grad_norm": 0.8000227212905884, |
|
"learning_rate": 8.28090192882369e-05, |
|
"loss": 1.856, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 25.463414634146343, |
|
"grad_norm": 0.9651764035224915, |
|
"learning_rate": 8.275468622656887e-05, |
|
"loss": 1.8476, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 25.51219512195122, |
|
"grad_norm": 1.183806300163269, |
|
"learning_rate": 8.270035316490085e-05, |
|
"loss": 1.8133, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 25.5609756097561, |
|
"grad_norm": 0.9591472744941711, |
|
"learning_rate": 8.264602010323282e-05, |
|
"loss": 1.8158, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 25.609756097560975, |
|
"grad_norm": 1.062137484550476, |
|
"learning_rate": 8.259168704156479e-05, |
|
"loss": 1.8258, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 25.658536585365855, |
|
"grad_norm": 1.1689926385879517, |
|
"learning_rate": 8.253735397989677e-05, |
|
"loss": 1.8838, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 25.70731707317073, |
|
"grad_norm": 1.1229288578033447, |
|
"learning_rate": 8.248302091822875e-05, |
|
"loss": 1.8473, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 25.75609756097561, |
|
"grad_norm": 0.8929149508476257, |
|
"learning_rate": 8.242868785656071e-05, |
|
"loss": 1.7918, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 25.804878048780488, |
|
"grad_norm": 0.9470261931419373, |
|
"learning_rate": 8.237435479489269e-05, |
|
"loss": 1.8462, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 25.853658536585368, |
|
"grad_norm": 0.8769372701644897, |
|
"learning_rate": 8.232002173322467e-05, |
|
"loss": 1.8893, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 25.902439024390244, |
|
"grad_norm": 0.9617382287979126, |
|
"learning_rate": 8.226568867155666e-05, |
|
"loss": 1.8635, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 25.951219512195124, |
|
"grad_norm": 0.9043958187103271, |
|
"learning_rate": 8.221135560988861e-05, |
|
"loss": 1.8432, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 1.0180726051330566, |
|
"learning_rate": 8.21570225482206e-05, |
|
"loss": 1.8356, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.65, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.047593832015991, |
|
"eval_rouge1": 0.6044, |
|
"eval_rouge2": 0.3385, |
|
"eval_rougeL": 0.534, |
|
"eval_runtime": 26.7369, |
|
"eval_samples_per_second": 6.807, |
|
"eval_steps_per_second": 1.72, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 26.048780487804876, |
|
"grad_norm": 2.064523696899414, |
|
"learning_rate": 8.210268948655258e-05, |
|
"loss": 1.8343, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 26.097560975609756, |
|
"grad_norm": 0.8723379969596863, |
|
"learning_rate": 8.204835642488455e-05, |
|
"loss": 1.83, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 26.146341463414632, |
|
"grad_norm": 0.9224991798400879, |
|
"learning_rate": 8.199402336321653e-05, |
|
"loss": 1.8117, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 26.195121951219512, |
|
"grad_norm": 1.009584903717041, |
|
"learning_rate": 8.19396903015485e-05, |
|
"loss": 1.7873, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 26.24390243902439, |
|
"grad_norm": 1.431678295135498, |
|
"learning_rate": 8.188535723988047e-05, |
|
"loss": 1.8388, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 26.29268292682927, |
|
"grad_norm": 1.0047580003738403, |
|
"learning_rate": 8.183102417821245e-05, |
|
"loss": 1.8312, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 26.341463414634145, |
|
"grad_norm": 0.9655563235282898, |
|
"learning_rate": 8.177669111654443e-05, |
|
"loss": 1.7934, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 26.390243902439025, |
|
"grad_norm": 0.8792780637741089, |
|
"learning_rate": 8.172235805487639e-05, |
|
"loss": 1.82, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 26.4390243902439, |
|
"grad_norm": 1.029196858406067, |
|
"learning_rate": 8.166802499320837e-05, |
|
"loss": 1.8188, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 26.48780487804878, |
|
"grad_norm": 0.8266960382461548, |
|
"learning_rate": 8.161369193154035e-05, |
|
"loss": 1.816, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 26.536585365853657, |
|
"grad_norm": 0.9967761039733887, |
|
"learning_rate": 8.155935886987233e-05, |
|
"loss": 1.8426, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 26.585365853658537, |
|
"grad_norm": 1.188904881477356, |
|
"learning_rate": 8.150502580820429e-05, |
|
"loss": 1.8239, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 26.634146341463413, |
|
"grad_norm": 0.8830356001853943, |
|
"learning_rate": 8.145069274653627e-05, |
|
"loss": 1.8242, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 26.682926829268293, |
|
"grad_norm": 0.9936667084693909, |
|
"learning_rate": 8.139635968486825e-05, |
|
"loss": 1.8068, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 26.73170731707317, |
|
"grad_norm": 1.1022076606750488, |
|
"learning_rate": 8.134202662320022e-05, |
|
"loss": 1.7986, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 26.78048780487805, |
|
"grad_norm": 1.1271874904632568, |
|
"learning_rate": 8.128769356153219e-05, |
|
"loss": 1.7993, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 26.829268292682926, |
|
"grad_norm": 0.9428896903991699, |
|
"learning_rate": 8.123336049986417e-05, |
|
"loss": 1.8519, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 26.878048780487806, |
|
"grad_norm": 0.874235212802887, |
|
"learning_rate": 8.117902743819614e-05, |
|
"loss": 1.7874, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 26.926829268292682, |
|
"grad_norm": 0.8641676902770996, |
|
"learning_rate": 8.112469437652812e-05, |
|
"loss": 1.8449, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 26.975609756097562, |
|
"grad_norm": 1.411012887954712, |
|
"learning_rate": 8.10703613148601e-05, |
|
"loss": 1.8203, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6519, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.039573907852173, |
|
"eval_rouge1": 0.6066, |
|
"eval_rouge2": 0.3403, |
|
"eval_rougeL": 0.5359, |
|
"eval_runtime": 26.6606, |
|
"eval_samples_per_second": 6.827, |
|
"eval_steps_per_second": 1.725, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 27.024390243902438, |
|
"grad_norm": 0.8869510293006897, |
|
"learning_rate": 8.101602825319206e-05, |
|
"loss": 1.8272, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 27.073170731707318, |
|
"grad_norm": 0.8785194158554077, |
|
"learning_rate": 8.096169519152404e-05, |
|
"loss": 1.8282, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 27.121951219512194, |
|
"grad_norm": 0.9073692560195923, |
|
"learning_rate": 8.090736212985603e-05, |
|
"loss": 1.7835, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 27.170731707317074, |
|
"grad_norm": 0.9559656977653503, |
|
"learning_rate": 8.0853029068188e-05, |
|
"loss": 1.8084, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 27.21951219512195, |
|
"grad_norm": 0.9089655876159668, |
|
"learning_rate": 8.079869600651996e-05, |
|
"loss": 1.8016, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 27.26829268292683, |
|
"grad_norm": 0.9023314714431763, |
|
"learning_rate": 8.074436294485195e-05, |
|
"loss": 1.8039, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 27.317073170731707, |
|
"grad_norm": 0.9001786708831787, |
|
"learning_rate": 8.069002988318393e-05, |
|
"loss": 1.8007, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 27.365853658536587, |
|
"grad_norm": 0.9051687121391296, |
|
"learning_rate": 8.06356968215159e-05, |
|
"loss": 1.8289, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 27.414634146341463, |
|
"grad_norm": 1.1516135931015015, |
|
"learning_rate": 8.058136375984787e-05, |
|
"loss": 1.8031, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 27.463414634146343, |
|
"grad_norm": 0.8888604640960693, |
|
"learning_rate": 8.052703069817985e-05, |
|
"loss": 1.7895, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 27.51219512195122, |
|
"grad_norm": 0.905109703540802, |
|
"learning_rate": 8.047269763651182e-05, |
|
"loss": 1.7975, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 27.5609756097561, |
|
"grad_norm": 0.9893642067909241, |
|
"learning_rate": 8.04183645748438e-05, |
|
"loss": 1.8248, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 27.609756097560975, |
|
"grad_norm": 0.8948488831520081, |
|
"learning_rate": 8.036403151317577e-05, |
|
"loss": 1.8219, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 27.658536585365855, |
|
"grad_norm": 0.871070921421051, |
|
"learning_rate": 8.030969845150774e-05, |
|
"loss": 1.7988, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 27.70731707317073, |
|
"grad_norm": 0.9689252376556396, |
|
"learning_rate": 8.025536538983972e-05, |
|
"loss": 1.7638, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 27.75609756097561, |
|
"grad_norm": 0.9327222108840942, |
|
"learning_rate": 8.02010323281717e-05, |
|
"loss": 1.7726, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 27.804878048780488, |
|
"grad_norm": 0.9302574396133423, |
|
"learning_rate": 8.014669926650367e-05, |
|
"loss": 1.7918, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 27.853658536585368, |
|
"grad_norm": 0.840606153011322, |
|
"learning_rate": 8.009236620483564e-05, |
|
"loss": 1.8006, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 27.902439024390244, |
|
"grad_norm": 0.894504725933075, |
|
"learning_rate": 8.003803314316762e-05, |
|
"loss": 1.7624, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 27.951219512195124, |
|
"grad_norm": 0.861795961856842, |
|
"learning_rate": 7.99837000814996e-05, |
|
"loss": 1.8129, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 1.6097631454467773, |
|
"learning_rate": 7.992936701983157e-05, |
|
"loss": 1.7592, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6507, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.048104763031006, |
|
"eval_rouge1": 0.6057, |
|
"eval_rouge2": 0.3405, |
|
"eval_rougeL": 0.5354, |
|
"eval_runtime": 26.6142, |
|
"eval_samples_per_second": 6.838, |
|
"eval_steps_per_second": 1.728, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 28.048780487804876, |
|
"grad_norm": 0.8586742877960205, |
|
"learning_rate": 7.987503395816354e-05, |
|
"loss": 1.76, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 28.097560975609756, |
|
"grad_norm": 1.0001939535140991, |
|
"learning_rate": 7.982070089649552e-05, |
|
"loss": 1.8019, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 28.146341463414632, |
|
"grad_norm": 0.9395818710327148, |
|
"learning_rate": 7.976636783482749e-05, |
|
"loss": 1.7472, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 28.195121951219512, |
|
"grad_norm": 0.9348780512809753, |
|
"learning_rate": 7.971203477315948e-05, |
|
"loss": 1.7761, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 28.24390243902439, |
|
"grad_norm": 0.9028440713882446, |
|
"learning_rate": 7.965770171149144e-05, |
|
"loss": 1.7789, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 28.29268292682927, |
|
"grad_norm": 0.8583874702453613, |
|
"learning_rate": 7.960336864982341e-05, |
|
"loss": 1.7558, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 28.341463414634145, |
|
"grad_norm": 0.9990294575691223, |
|
"learning_rate": 7.95490355881554e-05, |
|
"loss": 1.8019, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 28.390243902439025, |
|
"grad_norm": 1.0208673477172852, |
|
"learning_rate": 7.949470252648738e-05, |
|
"loss": 1.783, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 28.4390243902439, |
|
"grad_norm": 0.9775688052177429, |
|
"learning_rate": 7.944036946481935e-05, |
|
"loss": 1.7554, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 28.48780487804878, |
|
"grad_norm": 0.9302669763565063, |
|
"learning_rate": 7.938603640315132e-05, |
|
"loss": 1.7615, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 28.536585365853657, |
|
"grad_norm": 0.8069590926170349, |
|
"learning_rate": 7.93317033414833e-05, |
|
"loss": 1.7985, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 28.585365853658537, |
|
"grad_norm": 0.979270339012146, |
|
"learning_rate": 7.927737027981527e-05, |
|
"loss": 1.7564, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 28.634146341463413, |
|
"grad_norm": 0.9459352493286133, |
|
"learning_rate": 7.922303721814725e-05, |
|
"loss": 1.7706, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 28.682926829268293, |
|
"grad_norm": 0.9745155572891235, |
|
"learning_rate": 7.916870415647922e-05, |
|
"loss": 1.7843, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 28.73170731707317, |
|
"grad_norm": 0.8070019483566284, |
|
"learning_rate": 7.91143710948112e-05, |
|
"loss": 1.7472, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 28.78048780487805, |
|
"grad_norm": 0.7862805128097534, |
|
"learning_rate": 7.906003803314317e-05, |
|
"loss": 1.7924, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 28.829268292682926, |
|
"grad_norm": 0.8524170517921448, |
|
"learning_rate": 7.900570497147515e-05, |
|
"loss": 1.7843, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 28.878048780487806, |
|
"grad_norm": 0.989010751247406, |
|
"learning_rate": 7.895137190980712e-05, |
|
"loss": 1.8014, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 28.926829268292682, |
|
"grad_norm": 0.9778967499732971, |
|
"learning_rate": 7.889703884813909e-05, |
|
"loss": 1.7958, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 28.975609756097562, |
|
"grad_norm": 0.9401681423187256, |
|
"learning_rate": 7.884270578647107e-05, |
|
"loss": 1.8091, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6501, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.045945405960083, |
|
"eval_rouge1": 0.6045, |
|
"eval_rouge2": 0.3397, |
|
"eval_rougeL": 0.5346, |
|
"eval_runtime": 26.6264, |
|
"eval_samples_per_second": 6.835, |
|
"eval_steps_per_second": 1.728, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 29.024390243902438, |
|
"grad_norm": 0.9820492267608643, |
|
"learning_rate": 7.878837272480305e-05, |
|
"loss": 1.7651, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 29.073170731707318, |
|
"grad_norm": 1.0031334161758423, |
|
"learning_rate": 7.873403966313502e-05, |
|
"loss": 1.7359, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 29.121951219512194, |
|
"grad_norm": 0.8762280344963074, |
|
"learning_rate": 7.867970660146699e-05, |
|
"loss": 1.7908, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 29.170731707317074, |
|
"grad_norm": 0.8959635496139526, |
|
"learning_rate": 7.862537353979897e-05, |
|
"loss": 1.7316, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 29.21951219512195, |
|
"grad_norm": 1.0365235805511475, |
|
"learning_rate": 7.857104047813094e-05, |
|
"loss": 1.7899, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 29.26829268292683, |
|
"grad_norm": 0.9004618525505066, |
|
"learning_rate": 7.851670741646292e-05, |
|
"loss": 1.7315, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 29.317073170731707, |
|
"grad_norm": 1.1213397979736328, |
|
"learning_rate": 7.846237435479489e-05, |
|
"loss": 1.7577, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 29.365853658536587, |
|
"grad_norm": 0.8381146788597107, |
|
"learning_rate": 7.840804129312686e-05, |
|
"loss": 1.7488, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 29.414634146341463, |
|
"grad_norm": 1.0437678098678589, |
|
"learning_rate": 7.835370823145884e-05, |
|
"loss": 1.7583, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 29.463414634146343, |
|
"grad_norm": 1.0147404670715332, |
|
"learning_rate": 7.829937516979083e-05, |
|
"loss": 1.7358, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 29.51219512195122, |
|
"grad_norm": 1.1309133768081665, |
|
"learning_rate": 7.82450421081228e-05, |
|
"loss": 1.7431, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 29.5609756097561, |
|
"grad_norm": 1.2266108989715576, |
|
"learning_rate": 7.819070904645476e-05, |
|
"loss": 1.7526, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 29.609756097560975, |
|
"grad_norm": 0.9693206548690796, |
|
"learning_rate": 7.813637598478675e-05, |
|
"loss": 1.7633, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 29.658536585365855, |
|
"grad_norm": 1.009680151939392, |
|
"learning_rate": 7.808204292311873e-05, |
|
"loss": 1.7504, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 29.70731707317073, |
|
"grad_norm": 0.9043843746185303, |
|
"learning_rate": 7.80277098614507e-05, |
|
"loss": 1.7796, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 29.75609756097561, |
|
"grad_norm": 0.7989161610603333, |
|
"learning_rate": 7.797337679978267e-05, |
|
"loss": 1.7718, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 29.804878048780488, |
|
"grad_norm": 0.9764060974121094, |
|
"learning_rate": 7.791904373811465e-05, |
|
"loss": 1.7213, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 29.853658536585368, |
|
"grad_norm": 1.0457733869552612, |
|
"learning_rate": 7.786471067644662e-05, |
|
"loss": 1.7723, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 29.902439024390244, |
|
"grad_norm": 1.0509238243103027, |
|
"learning_rate": 7.78103776147786e-05, |
|
"loss": 1.7622, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 29.951219512195124, |
|
"grad_norm": 1.1585285663604736, |
|
"learning_rate": 7.775604455311057e-05, |
|
"loss": 1.765, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.9969220757484436, |
|
"learning_rate": 7.770171149144254e-05, |
|
"loss": 1.7445, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6529, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.041071653366089, |
|
"eval_rouge1": 0.6081, |
|
"eval_rouge2": 0.3424, |
|
"eval_rougeL": 0.5373, |
|
"eval_runtime": 26.8722, |
|
"eval_samples_per_second": 6.773, |
|
"eval_steps_per_second": 1.712, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 30.048780487804876, |
|
"grad_norm": 0.8964574933052063, |
|
"learning_rate": 7.764737842977452e-05, |
|
"loss": 1.7597, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 30.097560975609756, |
|
"grad_norm": 1.152288794517517, |
|
"learning_rate": 7.75930453681065e-05, |
|
"loss": 1.7119, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 30.146341463414632, |
|
"grad_norm": 0.9494566917419434, |
|
"learning_rate": 7.753871230643847e-05, |
|
"loss": 1.7019, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 30.195121951219512, |
|
"grad_norm": 0.9420191049575806, |
|
"learning_rate": 7.748437924477044e-05, |
|
"loss": 1.759, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 30.24390243902439, |
|
"grad_norm": 1.0232353210449219, |
|
"learning_rate": 7.743004618310242e-05, |
|
"loss": 1.7417, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 30.29268292682927, |
|
"grad_norm": 0.9363576769828796, |
|
"learning_rate": 7.73757131214344e-05, |
|
"loss": 1.7557, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 30.341463414634145, |
|
"grad_norm": 0.8861675262451172, |
|
"learning_rate": 7.732138005976637e-05, |
|
"loss": 1.7179, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 30.390243902439025, |
|
"grad_norm": 1.0272518396377563, |
|
"learning_rate": 7.726704699809834e-05, |
|
"loss": 1.7583, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 30.4390243902439, |
|
"grad_norm": 0.9605531096458435, |
|
"learning_rate": 7.721271393643032e-05, |
|
"loss": 1.7494, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 30.48780487804878, |
|
"grad_norm": 0.9756708741188049, |
|
"learning_rate": 7.71583808747623e-05, |
|
"loss": 1.7332, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 30.536585365853657, |
|
"grad_norm": 1.0306313037872314, |
|
"learning_rate": 7.710404781309428e-05, |
|
"loss": 1.726, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 30.585365853658537, |
|
"grad_norm": 1.0111984014511108, |
|
"learning_rate": 7.704971475142624e-05, |
|
"loss": 1.7343, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 30.634146341463413, |
|
"grad_norm": 0.9273544549942017, |
|
"learning_rate": 7.699538168975821e-05, |
|
"loss": 1.747, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 30.682926829268293, |
|
"grad_norm": 0.8815616369247437, |
|
"learning_rate": 7.69410486280902e-05, |
|
"loss": 1.7264, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 30.73170731707317, |
|
"grad_norm": 0.8531066179275513, |
|
"learning_rate": 7.688671556642218e-05, |
|
"loss": 1.7417, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 30.78048780487805, |
|
"grad_norm": 0.964625358581543, |
|
"learning_rate": 7.683238250475415e-05, |
|
"loss": 1.7882, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 30.829268292682926, |
|
"grad_norm": 1.0142056941986084, |
|
"learning_rate": 7.677804944308612e-05, |
|
"loss": 1.7422, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 30.878048780487806, |
|
"grad_norm": 0.9176868200302124, |
|
"learning_rate": 7.67237163814181e-05, |
|
"loss": 1.7422, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 30.926829268292682, |
|
"grad_norm": 1.126758337020874, |
|
"learning_rate": 7.666938331975008e-05, |
|
"loss": 1.7626, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 30.975609756097562, |
|
"grad_norm": 1.0797853469848633, |
|
"learning_rate": 7.661505025808205e-05, |
|
"loss": 1.7303, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6559, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0429515838623047, |
|
"eval_rouge1": 0.6091, |
|
"eval_rouge2": 0.3452, |
|
"eval_rougeL": 0.5398, |
|
"eval_runtime": 26.8129, |
|
"eval_samples_per_second": 6.788, |
|
"eval_steps_per_second": 1.716, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 31.024390243902438, |
|
"grad_norm": 1.0031147003173828, |
|
"learning_rate": 7.656071719641402e-05, |
|
"loss": 1.6963, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 31.073170731707318, |
|
"grad_norm": 1.0235823392868042, |
|
"learning_rate": 7.6506384134746e-05, |
|
"loss": 1.6978, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 31.121951219512194, |
|
"grad_norm": 0.9943031072616577, |
|
"learning_rate": 7.645205107307797e-05, |
|
"loss": 1.7137, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 31.170731707317074, |
|
"grad_norm": 0.9972837567329407, |
|
"learning_rate": 7.639771801140995e-05, |
|
"loss": 1.7383, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 31.21951219512195, |
|
"grad_norm": 0.9345105290412903, |
|
"learning_rate": 7.634338494974192e-05, |
|
"loss": 1.6891, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 31.26829268292683, |
|
"grad_norm": 0.9205601215362549, |
|
"learning_rate": 7.628905188807389e-05, |
|
"loss": 1.7028, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 31.317073170731707, |
|
"grad_norm": 0.8768946528434753, |
|
"learning_rate": 7.623471882640587e-05, |
|
"loss": 1.7453, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 31.365853658536587, |
|
"grad_norm": 0.7679451107978821, |
|
"learning_rate": 7.618038576473785e-05, |
|
"loss": 1.739, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 31.414634146341463, |
|
"grad_norm": 0.8392273783683777, |
|
"learning_rate": 7.612605270306982e-05, |
|
"loss": 1.7096, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 31.463414634146343, |
|
"grad_norm": 1.02446448802948, |
|
"learning_rate": 7.607171964140179e-05, |
|
"loss": 1.7051, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 31.51219512195122, |
|
"grad_norm": 0.9734623432159424, |
|
"learning_rate": 7.601738657973377e-05, |
|
"loss": 1.7371, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 31.5609756097561, |
|
"grad_norm": 0.8957915306091309, |
|
"learning_rate": 7.596305351806576e-05, |
|
"loss": 1.7308, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 31.609756097560975, |
|
"grad_norm": 0.969406008720398, |
|
"learning_rate": 7.590872045639773e-05, |
|
"loss": 1.7053, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 31.658536585365855, |
|
"grad_norm": 0.9478543996810913, |
|
"learning_rate": 7.58543873947297e-05, |
|
"loss": 1.7717, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 31.70731707317073, |
|
"grad_norm": 0.8509706854820251, |
|
"learning_rate": 7.580005433306168e-05, |
|
"loss": 1.7209, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 31.75609756097561, |
|
"grad_norm": 0.9139954447746277, |
|
"learning_rate": 7.574572127139365e-05, |
|
"loss": 1.7463, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 31.804878048780488, |
|
"grad_norm": 1.0609540939331055, |
|
"learning_rate": 7.569138820972563e-05, |
|
"loss": 1.7234, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 31.853658536585368, |
|
"grad_norm": 0.9388116002082825, |
|
"learning_rate": 7.56370551480576e-05, |
|
"loss": 1.7286, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 31.902439024390244, |
|
"grad_norm": 0.9279087781906128, |
|
"learning_rate": 7.558272208638957e-05, |
|
"loss": 1.6966, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 31.951219512195124, |
|
"grad_norm": 0.9830589294433594, |
|
"learning_rate": 7.552838902472155e-05, |
|
"loss": 1.7369, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 1.2547836303710938, |
|
"learning_rate": 7.547405596305353e-05, |
|
"loss": 1.7659, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6549, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0383822917938232, |
|
"eval_rouge1": 0.6098, |
|
"eval_rouge2": 0.3453, |
|
"eval_rougeL": 0.5391, |
|
"eval_runtime": 26.6337, |
|
"eval_samples_per_second": 6.833, |
|
"eval_steps_per_second": 1.727, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 32.048780487804876, |
|
"grad_norm": 1.1577900648117065, |
|
"learning_rate": 7.54197229013855e-05, |
|
"loss": 1.693, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 32.09756097560975, |
|
"grad_norm": 1.0532095432281494, |
|
"learning_rate": 7.536538983971747e-05, |
|
"loss": 1.6854, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 32.146341463414636, |
|
"grad_norm": 1.2690882682800293, |
|
"learning_rate": 7.531105677804945e-05, |
|
"loss": 1.7024, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 32.19512195121951, |
|
"grad_norm": 0.9884124994277954, |
|
"learning_rate": 7.525672371638142e-05, |
|
"loss": 1.7099, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 32.24390243902439, |
|
"grad_norm": 0.9226039052009583, |
|
"learning_rate": 7.52023906547134e-05, |
|
"loss": 1.6841, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 32.292682926829265, |
|
"grad_norm": 0.8764020800590515, |
|
"learning_rate": 7.514805759304537e-05, |
|
"loss": 1.7114, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 32.34146341463415, |
|
"grad_norm": 0.9021767973899841, |
|
"learning_rate": 7.509372453137735e-05, |
|
"loss": 1.7112, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 32.390243902439025, |
|
"grad_norm": 0.9591324925422668, |
|
"learning_rate": 7.503939146970932e-05, |
|
"loss": 1.7037, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 32.4390243902439, |
|
"grad_norm": 0.8968470096588135, |
|
"learning_rate": 7.49850584080413e-05, |
|
"loss": 1.7042, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 32.48780487804878, |
|
"grad_norm": 0.8518949151039124, |
|
"learning_rate": 7.493072534637327e-05, |
|
"loss": 1.7031, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 32.53658536585366, |
|
"grad_norm": 1.0725502967834473, |
|
"learning_rate": 7.487639228470524e-05, |
|
"loss": 1.6743, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 32.58536585365854, |
|
"grad_norm": 0.92808997631073, |
|
"learning_rate": 7.482205922303722e-05, |
|
"loss": 1.7228, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 32.63414634146341, |
|
"grad_norm": 0.856819748878479, |
|
"learning_rate": 7.47677261613692e-05, |
|
"loss": 1.7069, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 32.68292682926829, |
|
"grad_norm": 1.0233670473098755, |
|
"learning_rate": 7.471339309970116e-05, |
|
"loss": 1.7207, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 32.73170731707317, |
|
"grad_norm": 0.8869268298149109, |
|
"learning_rate": 7.465906003803314e-05, |
|
"loss": 1.6835, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 32.78048780487805, |
|
"grad_norm": 0.9873183369636536, |
|
"learning_rate": 7.460472697636513e-05, |
|
"loss": 1.6548, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 32.829268292682926, |
|
"grad_norm": 1.2351232767105103, |
|
"learning_rate": 7.45503939146971e-05, |
|
"loss": 1.7437, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 32.8780487804878, |
|
"grad_norm": 0.9190348982810974, |
|
"learning_rate": 7.449606085302908e-05, |
|
"loss": 1.7203, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 32.926829268292686, |
|
"grad_norm": 1.0856812000274658, |
|
"learning_rate": 7.444172779136105e-05, |
|
"loss": 1.7515, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 32.97560975609756, |
|
"grad_norm": 0.9336557388305664, |
|
"learning_rate": 7.438739472969301e-05, |
|
"loss": 1.7392, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6555, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0462191104888916, |
|
"eval_rouge1": 0.6078, |
|
"eval_rouge2": 0.3444, |
|
"eval_rougeL": 0.5387, |
|
"eval_runtime": 26.6235, |
|
"eval_samples_per_second": 6.836, |
|
"eval_steps_per_second": 1.728, |
|
"step": 6765 |
|
}, |
|
{ |
|
"epoch": 33.02439024390244, |
|
"grad_norm": 0.9959226250648499, |
|
"learning_rate": 7.4333061668025e-05, |
|
"loss": 1.6647, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 33.073170731707314, |
|
"grad_norm": 0.9365916848182678, |
|
"learning_rate": 7.427872860635698e-05, |
|
"loss": 1.6814, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 33.1219512195122, |
|
"grad_norm": 0.9217053651809692, |
|
"learning_rate": 7.422439554468895e-05, |
|
"loss": 1.6695, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 33.170731707317074, |
|
"grad_norm": 1.186452031135559, |
|
"learning_rate": 7.417006248302092e-05, |
|
"loss": 1.6631, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 33.21951219512195, |
|
"grad_norm": 0.8331051468849182, |
|
"learning_rate": 7.41157294213529e-05, |
|
"loss": 1.6808, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 33.26829268292683, |
|
"grad_norm": 0.9306808114051819, |
|
"learning_rate": 7.406139635968488e-05, |
|
"loss": 1.7146, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 33.31707317073171, |
|
"grad_norm": 1.0423853397369385, |
|
"learning_rate": 7.400706329801684e-05, |
|
"loss": 1.7025, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 33.36585365853659, |
|
"grad_norm": 0.974290132522583, |
|
"learning_rate": 7.395273023634882e-05, |
|
"loss": 1.7215, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 33.41463414634146, |
|
"grad_norm": 0.9810996055603027, |
|
"learning_rate": 7.38983971746808e-05, |
|
"loss": 1.6833, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 33.46341463414634, |
|
"grad_norm": 1.0704784393310547, |
|
"learning_rate": 7.384406411301277e-05, |
|
"loss": 1.6692, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 33.51219512195122, |
|
"grad_norm": 0.9080216884613037, |
|
"learning_rate": 7.378973105134474e-05, |
|
"loss": 1.7025, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 33.5609756097561, |
|
"grad_norm": 0.9012085795402527, |
|
"learning_rate": 7.373539798967672e-05, |
|
"loss": 1.7092, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 33.609756097560975, |
|
"grad_norm": 1.106162667274475, |
|
"learning_rate": 7.368106492800869e-05, |
|
"loss": 1.673, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 33.65853658536585, |
|
"grad_norm": 0.9788945317268372, |
|
"learning_rate": 7.362673186634067e-05, |
|
"loss": 1.6947, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 33.707317073170735, |
|
"grad_norm": 0.968518853187561, |
|
"learning_rate": 7.357239880467266e-05, |
|
"loss": 1.6818, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 33.75609756097561, |
|
"grad_norm": 0.9239228963851929, |
|
"learning_rate": 7.351806574300462e-05, |
|
"loss": 1.6944, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 33.80487804878049, |
|
"grad_norm": 1.005051851272583, |
|
"learning_rate": 7.346373268133659e-05, |
|
"loss": 1.7169, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 33.853658536585364, |
|
"grad_norm": 0.8998588919639587, |
|
"learning_rate": 7.340939961966858e-05, |
|
"loss": 1.6913, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 33.90243902439025, |
|
"grad_norm": 0.918029248714447, |
|
"learning_rate": 7.335506655800056e-05, |
|
"loss": 1.6925, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 33.951219512195124, |
|
"grad_norm": 0.8755961656570435, |
|
"learning_rate": 7.330073349633251e-05, |
|
"loss": 1.6859, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 1.1943050622940063, |
|
"learning_rate": 7.32464004346645e-05, |
|
"loss": 1.7102, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6567, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.041721820831299, |
|
"eval_rouge1": 0.6102, |
|
"eval_rouge2": 0.3486, |
|
"eval_rougeL": 0.5411, |
|
"eval_runtime": 26.6556, |
|
"eval_samples_per_second": 6.828, |
|
"eval_steps_per_second": 1.726, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 34.048780487804876, |
|
"grad_norm": 0.9968337416648865, |
|
"learning_rate": 7.319206737299648e-05, |
|
"loss": 1.6624, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 34.09756097560975, |
|
"grad_norm": 0.837787389755249, |
|
"learning_rate": 7.313773431132845e-05, |
|
"loss": 1.6608, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 34.146341463414636, |
|
"grad_norm": 0.9373610615730286, |
|
"learning_rate": 7.308340124966042e-05, |
|
"loss": 1.6851, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 34.19512195121951, |
|
"grad_norm": 0.8893011808395386, |
|
"learning_rate": 7.30290681879924e-05, |
|
"loss": 1.6436, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 34.24390243902439, |
|
"grad_norm": 1.1527223587036133, |
|
"learning_rate": 7.297473512632437e-05, |
|
"loss": 1.6833, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 34.292682926829265, |
|
"grad_norm": 0.9689236879348755, |
|
"learning_rate": 7.292040206465635e-05, |
|
"loss": 1.6533, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 34.34146341463415, |
|
"grad_norm": 1.0654863119125366, |
|
"learning_rate": 7.286606900298833e-05, |
|
"loss": 1.6665, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 34.390243902439025, |
|
"grad_norm": 1.0611974000930786, |
|
"learning_rate": 7.281173594132029e-05, |
|
"loss": 1.6584, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 34.4390243902439, |
|
"grad_norm": 0.9801427721977234, |
|
"learning_rate": 7.275740287965227e-05, |
|
"loss": 1.7052, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 34.48780487804878, |
|
"grad_norm": 1.125786542892456, |
|
"learning_rate": 7.270306981798425e-05, |
|
"loss": 1.6948, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 34.53658536585366, |
|
"grad_norm": 0.8822893500328064, |
|
"learning_rate": 7.264873675631623e-05, |
|
"loss": 1.6732, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 34.58536585365854, |
|
"grad_norm": 0.8989709615707397, |
|
"learning_rate": 7.259440369464819e-05, |
|
"loss": 1.6392, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 34.63414634146341, |
|
"grad_norm": 0.8959782719612122, |
|
"learning_rate": 7.254007063298017e-05, |
|
"loss": 1.6664, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 34.68292682926829, |
|
"grad_norm": 0.9004738330841064, |
|
"learning_rate": 7.248573757131215e-05, |
|
"loss": 1.6846, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 34.73170731707317, |
|
"grad_norm": 1.095853567123413, |
|
"learning_rate": 7.243140450964412e-05, |
|
"loss": 1.6757, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 34.78048780487805, |
|
"grad_norm": 1.0721943378448486, |
|
"learning_rate": 7.237707144797609e-05, |
|
"loss": 1.6562, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 34.829268292682926, |
|
"grad_norm": 0.9506831169128418, |
|
"learning_rate": 7.232273838630807e-05, |
|
"loss": 1.6834, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 34.8780487804878, |
|
"grad_norm": 1.006168007850647, |
|
"learning_rate": 7.226840532464004e-05, |
|
"loss": 1.6787, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 34.926829268292686, |
|
"grad_norm": 0.990227460861206, |
|
"learning_rate": 7.221407226297202e-05, |
|
"loss": 1.6531, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 34.97560975609756, |
|
"grad_norm": 0.9547024965286255, |
|
"learning_rate": 7.215973920130399e-05, |
|
"loss": 1.6717, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6541, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0457675457000732, |
|
"eval_rouge1": 0.608, |
|
"eval_rouge2": 0.3446, |
|
"eval_rougeL": 0.5386, |
|
"eval_runtime": 26.6803, |
|
"eval_samples_per_second": 6.822, |
|
"eval_steps_per_second": 1.724, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 35.02439024390244, |
|
"grad_norm": 0.9323550462722778, |
|
"learning_rate": 7.210540613963596e-05, |
|
"loss": 1.6773, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 35.073170731707314, |
|
"grad_norm": 0.9128536581993103, |
|
"learning_rate": 7.205107307796794e-05, |
|
"loss": 1.6005, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 35.1219512195122, |
|
"grad_norm": 0.9445728659629822, |
|
"learning_rate": 7.199674001629993e-05, |
|
"loss": 1.6501, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 35.170731707317074, |
|
"grad_norm": 0.8394743204116821, |
|
"learning_rate": 7.194240695463191e-05, |
|
"loss": 1.6709, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 35.21951219512195, |
|
"grad_norm": 0.8431241512298584, |
|
"learning_rate": 7.188807389296386e-05, |
|
"loss": 1.6454, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 35.26829268292683, |
|
"grad_norm": 1.0416409969329834, |
|
"learning_rate": 7.183374083129585e-05, |
|
"loss": 1.6869, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 35.31707317073171, |
|
"grad_norm": 1.0885652303695679, |
|
"learning_rate": 7.177940776962783e-05, |
|
"loss": 1.6383, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 35.36585365853659, |
|
"grad_norm": 1.0008647441864014, |
|
"learning_rate": 7.17250747079598e-05, |
|
"loss": 1.6154, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 35.41463414634146, |
|
"grad_norm": 1.1045302152633667, |
|
"learning_rate": 7.167074164629177e-05, |
|
"loss": 1.65, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 35.46341463414634, |
|
"grad_norm": 0.8903409838676453, |
|
"learning_rate": 7.161640858462375e-05, |
|
"loss": 1.6746, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 35.51219512195122, |
|
"grad_norm": 1.0800362825393677, |
|
"learning_rate": 7.156207552295572e-05, |
|
"loss": 1.6472, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 35.5609756097561, |
|
"grad_norm": 0.8663403391838074, |
|
"learning_rate": 7.15077424612877e-05, |
|
"loss": 1.6474, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 35.609756097560975, |
|
"grad_norm": 1.0188908576965332, |
|
"learning_rate": 7.145340939961967e-05, |
|
"loss": 1.6916, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 35.65853658536585, |
|
"grad_norm": 0.9418292045593262, |
|
"learning_rate": 7.139907633795164e-05, |
|
"loss": 1.6662, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 35.707317073170735, |
|
"grad_norm": 0.963869035243988, |
|
"learning_rate": 7.134474327628362e-05, |
|
"loss": 1.6341, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 35.75609756097561, |
|
"grad_norm": 0.908099353313446, |
|
"learning_rate": 7.12904102146156e-05, |
|
"loss": 1.6545, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 35.80487804878049, |
|
"grad_norm": 0.9877302646636963, |
|
"learning_rate": 7.123607715294757e-05, |
|
"loss": 1.6664, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 35.853658536585364, |
|
"grad_norm": 0.8455460667610168, |
|
"learning_rate": 7.118174409127954e-05, |
|
"loss": 1.6903, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 35.90243902439025, |
|
"grad_norm": 1.126727819442749, |
|
"learning_rate": 7.112741102961152e-05, |
|
"loss": 1.7045, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 35.951219512195124, |
|
"grad_norm": 0.9532201290130615, |
|
"learning_rate": 7.10730779679435e-05, |
|
"loss": 1.6644, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 1.0697003602981567, |
|
"learning_rate": 7.101874490627547e-05, |
|
"loss": 1.6716, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6569, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0504531860351562, |
|
"eval_rouge1": 0.6102, |
|
"eval_rouge2": 0.347, |
|
"eval_rougeL": 0.5419, |
|
"eval_runtime": 26.7626, |
|
"eval_samples_per_second": 6.801, |
|
"eval_steps_per_second": 1.719, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 36.048780487804876, |
|
"grad_norm": 1.00981605052948, |
|
"learning_rate": 7.096441184460744e-05, |
|
"loss": 1.6188, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 36.09756097560975, |
|
"grad_norm": 0.9147917032241821, |
|
"learning_rate": 7.091007878293942e-05, |
|
"loss": 1.6757, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 36.146341463414636, |
|
"grad_norm": 0.8071935176849365, |
|
"learning_rate": 7.08557457212714e-05, |
|
"loss": 1.6582, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 36.19512195121951, |
|
"grad_norm": 1.0185075998306274, |
|
"learning_rate": 7.080141265960338e-05, |
|
"loss": 1.643, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 36.24390243902439, |
|
"grad_norm": 1.0392359495162964, |
|
"learning_rate": 7.074707959793534e-05, |
|
"loss": 1.6333, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 36.292682926829265, |
|
"grad_norm": 1.3190245628356934, |
|
"learning_rate": 7.069274653626731e-05, |
|
"loss": 1.6432, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 36.34146341463415, |
|
"grad_norm": 0.9551993608474731, |
|
"learning_rate": 7.06384134745993e-05, |
|
"loss": 1.6192, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 36.390243902439025, |
|
"grad_norm": 0.9931862354278564, |
|
"learning_rate": 7.058408041293128e-05, |
|
"loss": 1.6457, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 36.4390243902439, |
|
"grad_norm": 1.0751144886016846, |
|
"learning_rate": 7.052974735126325e-05, |
|
"loss": 1.6484, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 36.48780487804878, |
|
"grad_norm": 0.9190940260887146, |
|
"learning_rate": 7.047541428959522e-05, |
|
"loss": 1.6115, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 36.53658536585366, |
|
"grad_norm": 0.8716026544570923, |
|
"learning_rate": 7.04210812279272e-05, |
|
"loss": 1.6337, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 36.58536585365854, |
|
"grad_norm": 1.0738297700881958, |
|
"learning_rate": 7.036674816625917e-05, |
|
"loss": 1.6441, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 36.63414634146341, |
|
"grad_norm": 0.9632681608200073, |
|
"learning_rate": 7.031241510459115e-05, |
|
"loss": 1.639, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 36.68292682926829, |
|
"grad_norm": 1.0181065797805786, |
|
"learning_rate": 7.025808204292312e-05, |
|
"loss": 1.6603, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 36.73170731707317, |
|
"grad_norm": 0.9177279472351074, |
|
"learning_rate": 7.02037489812551e-05, |
|
"loss": 1.6373, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 36.78048780487805, |
|
"grad_norm": 0.9398072361946106, |
|
"learning_rate": 7.014941591958707e-05, |
|
"loss": 1.646, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 36.829268292682926, |
|
"grad_norm": 0.8873163461685181, |
|
"learning_rate": 7.009508285791905e-05, |
|
"loss": 1.6427, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 36.8780487804878, |
|
"grad_norm": 1.026986837387085, |
|
"learning_rate": 7.004074979625102e-05, |
|
"loss": 1.6429, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 36.926829268292686, |
|
"grad_norm": 0.9548583626747131, |
|
"learning_rate": 6.998641673458299e-05, |
|
"loss": 1.6611, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 36.97560975609756, |
|
"grad_norm": 0.9584435224533081, |
|
"learning_rate": 6.993208367291497e-05, |
|
"loss": 1.649, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6581, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.046879768371582, |
|
"eval_rouge1": 0.6126, |
|
"eval_rouge2": 0.3496, |
|
"eval_rougeL": 0.5429, |
|
"eval_runtime": 26.6906, |
|
"eval_samples_per_second": 6.819, |
|
"eval_steps_per_second": 1.723, |
|
"step": 7585 |
|
}, |
|
{ |
|
"epoch": 37.02439024390244, |
|
"grad_norm": 0.9119429588317871, |
|
"learning_rate": 6.987775061124695e-05, |
|
"loss": 1.6352, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 37.073170731707314, |
|
"grad_norm": 1.1885426044464111, |
|
"learning_rate": 6.982341754957892e-05, |
|
"loss": 1.6066, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 37.1219512195122, |
|
"grad_norm": 0.9667913317680359, |
|
"learning_rate": 6.976908448791089e-05, |
|
"loss": 1.6495, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 37.170731707317074, |
|
"grad_norm": 0.9809781312942505, |
|
"learning_rate": 6.971475142624287e-05, |
|
"loss": 1.6541, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 37.21951219512195, |
|
"grad_norm": 0.8620357513427734, |
|
"learning_rate": 6.966041836457484e-05, |
|
"loss": 1.6428, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 37.26829268292683, |
|
"grad_norm": 0.8469148278236389, |
|
"learning_rate": 6.960608530290683e-05, |
|
"loss": 1.5949, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 37.31707317073171, |
|
"grad_norm": 0.930397629737854, |
|
"learning_rate": 6.95517522412388e-05, |
|
"loss": 1.6256, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 37.36585365853659, |
|
"grad_norm": 0.9291563630104065, |
|
"learning_rate": 6.949741917957078e-05, |
|
"loss": 1.6236, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 37.41463414634146, |
|
"grad_norm": 0.9306030869483948, |
|
"learning_rate": 6.944308611790275e-05, |
|
"loss": 1.6182, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 37.46341463414634, |
|
"grad_norm": 0.8049061298370361, |
|
"learning_rate": 6.938875305623473e-05, |
|
"loss": 1.638, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 37.51219512195122, |
|
"grad_norm": 0.8694387674331665, |
|
"learning_rate": 6.93344199945667e-05, |
|
"loss": 1.6102, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 37.5609756097561, |
|
"grad_norm": 1.0936464071273804, |
|
"learning_rate": 6.928008693289867e-05, |
|
"loss": 1.6482, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 37.609756097560975, |
|
"grad_norm": 0.9273491501808167, |
|
"learning_rate": 6.922575387123065e-05, |
|
"loss": 1.6184, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 37.65853658536585, |
|
"grad_norm": 0.8881824016571045, |
|
"learning_rate": 6.917142080956263e-05, |
|
"loss": 1.633, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 37.707317073170735, |
|
"grad_norm": 1.1889209747314453, |
|
"learning_rate": 6.91170877478946e-05, |
|
"loss": 1.6263, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 37.75609756097561, |
|
"grad_norm": 0.9249302744865417, |
|
"learning_rate": 6.906275468622657e-05, |
|
"loss": 1.5986, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 37.80487804878049, |
|
"grad_norm": 0.8693262338638306, |
|
"learning_rate": 6.900842162455855e-05, |
|
"loss": 1.6218, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 37.853658536585364, |
|
"grad_norm": 0.9223681688308716, |
|
"learning_rate": 6.895408856289052e-05, |
|
"loss": 1.6129, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 37.90243902439025, |
|
"grad_norm": 0.9245777130126953, |
|
"learning_rate": 6.88997555012225e-05, |
|
"loss": 1.6467, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 37.951219512195124, |
|
"grad_norm": 1.1191091537475586, |
|
"learning_rate": 6.884542243955447e-05, |
|
"loss": 1.6185, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 1.2376272678375244, |
|
"learning_rate": 6.879108937788644e-05, |
|
"loss": 1.6635, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6581, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.046464681625366, |
|
"eval_rouge1": 0.6116, |
|
"eval_rouge2": 0.3489, |
|
"eval_rougeL": 0.5422, |
|
"eval_runtime": 26.6682, |
|
"eval_samples_per_second": 6.825, |
|
"eval_steps_per_second": 1.725, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 38.048780487804876, |
|
"grad_norm": 1.0961906909942627, |
|
"learning_rate": 6.873675631621842e-05, |
|
"loss": 1.6007, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 38.09756097560975, |
|
"grad_norm": 0.9302559494972229, |
|
"learning_rate": 6.86824232545504e-05, |
|
"loss": 1.5941, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 38.146341463414636, |
|
"grad_norm": 0.8833104968070984, |
|
"learning_rate": 6.862809019288237e-05, |
|
"loss": 1.587, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 38.19512195121951, |
|
"grad_norm": 0.9067671895027161, |
|
"learning_rate": 6.857375713121434e-05, |
|
"loss": 1.5975, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 38.24390243902439, |
|
"grad_norm": 1.0321314334869385, |
|
"learning_rate": 6.851942406954632e-05, |
|
"loss": 1.6184, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 38.292682926829265, |
|
"grad_norm": 1.0822004079818726, |
|
"learning_rate": 6.84650910078783e-05, |
|
"loss": 1.608, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 38.34146341463415, |
|
"grad_norm": 1.154619812965393, |
|
"learning_rate": 6.841075794621027e-05, |
|
"loss": 1.6111, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 38.390243902439025, |
|
"grad_norm": 0.8337762951850891, |
|
"learning_rate": 6.835642488454224e-05, |
|
"loss": 1.6151, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 38.4390243902439, |
|
"grad_norm": 16.136899948120117, |
|
"learning_rate": 6.830209182287423e-05, |
|
"loss": 1.638, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 38.48780487804878, |
|
"grad_norm": 1.3192740678787231, |
|
"learning_rate": 6.82477587612062e-05, |
|
"loss": 1.6336, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 38.53658536585366, |
|
"grad_norm": 0.9884423613548279, |
|
"learning_rate": 6.819342569953818e-05, |
|
"loss": 1.5949, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 38.58536585365854, |
|
"grad_norm": 0.9335576891899109, |
|
"learning_rate": 6.813909263787015e-05, |
|
"loss": 1.627, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 38.63414634146341, |
|
"grad_norm": 0.7936056852340698, |
|
"learning_rate": 6.808475957620211e-05, |
|
"loss": 1.6181, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 38.68292682926829, |
|
"grad_norm": 0.8893793225288391, |
|
"learning_rate": 6.80304265145341e-05, |
|
"loss": 1.6484, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 38.73170731707317, |
|
"grad_norm": 0.9549230933189392, |
|
"learning_rate": 6.797609345286608e-05, |
|
"loss": 1.6268, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 38.78048780487805, |
|
"grad_norm": 0.8394344449043274, |
|
"learning_rate": 6.792176039119805e-05, |
|
"loss": 1.6217, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 38.829268292682926, |
|
"grad_norm": 0.9312503337860107, |
|
"learning_rate": 6.786742732953002e-05, |
|
"loss": 1.6401, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 38.8780487804878, |
|
"grad_norm": 1.1489685773849487, |
|
"learning_rate": 6.7813094267862e-05, |
|
"loss": 1.6342, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 38.926829268292686, |
|
"grad_norm": 0.9154052138328552, |
|
"learning_rate": 6.775876120619398e-05, |
|
"loss": 1.6165, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 38.97560975609756, |
|
"grad_norm": 0.8178635239601135, |
|
"learning_rate": 6.770442814452595e-05, |
|
"loss": 1.6019, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6571, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.057765483856201, |
|
"eval_rouge1": 0.6095, |
|
"eval_rouge2": 0.3467, |
|
"eval_rougeL": 0.5406, |
|
"eval_runtime": 26.6397, |
|
"eval_samples_per_second": 6.832, |
|
"eval_steps_per_second": 1.727, |
|
"step": 7995 |
|
}, |
|
{ |
|
"epoch": 39.02439024390244, |
|
"grad_norm": 1.0003865957260132, |
|
"learning_rate": 6.765009508285792e-05, |
|
"loss": 1.6608, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 39.073170731707314, |
|
"grad_norm": 0.8248067498207092, |
|
"learning_rate": 6.75957620211899e-05, |
|
"loss": 1.5807, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 39.1219512195122, |
|
"grad_norm": 0.9207936525344849, |
|
"learning_rate": 6.754142895952187e-05, |
|
"loss": 1.5651, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 39.170731707317074, |
|
"grad_norm": 0.9343249797821045, |
|
"learning_rate": 6.748709589785385e-05, |
|
"loss": 1.5866, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 39.21951219512195, |
|
"grad_norm": 0.9275141954421997, |
|
"learning_rate": 6.743276283618582e-05, |
|
"loss": 1.6036, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 39.26829268292683, |
|
"grad_norm": 0.859588086605072, |
|
"learning_rate": 6.737842977451779e-05, |
|
"loss": 1.591, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 39.31707317073171, |
|
"grad_norm": 1.0168163776397705, |
|
"learning_rate": 6.732409671284977e-05, |
|
"loss": 1.6141, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 39.36585365853659, |
|
"grad_norm": 0.9833230972290039, |
|
"learning_rate": 6.726976365118176e-05, |
|
"loss": 1.5999, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 39.41463414634146, |
|
"grad_norm": 0.9076850414276123, |
|
"learning_rate": 6.721543058951371e-05, |
|
"loss": 1.5879, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 39.46341463414634, |
|
"grad_norm": 0.9242377877235413, |
|
"learning_rate": 6.716109752784569e-05, |
|
"loss": 1.6272, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 39.51219512195122, |
|
"grad_norm": 1.1659233570098877, |
|
"learning_rate": 6.710676446617768e-05, |
|
"loss": 1.6131, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 39.5609756097561, |
|
"grad_norm": 1.0663799047470093, |
|
"learning_rate": 6.705243140450966e-05, |
|
"loss": 1.626, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 39.609756097560975, |
|
"grad_norm": 1.089972734451294, |
|
"learning_rate": 6.699809834284163e-05, |
|
"loss": 1.609, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 39.65853658536585, |
|
"grad_norm": 0.8596948981285095, |
|
"learning_rate": 6.69437652811736e-05, |
|
"loss": 1.6103, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 39.707317073170735, |
|
"grad_norm": 0.9379211664199829, |
|
"learning_rate": 6.688943221950558e-05, |
|
"loss": 1.6289, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 39.75609756097561, |
|
"grad_norm": 0.9608719944953918, |
|
"learning_rate": 6.683509915783755e-05, |
|
"loss": 1.5971, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 39.80487804878049, |
|
"grad_norm": 0.9829626083374023, |
|
"learning_rate": 6.678076609616953e-05, |
|
"loss": 1.6082, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 39.853658536585364, |
|
"grad_norm": 1.1987886428833008, |
|
"learning_rate": 6.67264330345015e-05, |
|
"loss": 1.5899, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 39.90243902439025, |
|
"grad_norm": 0.9772175550460815, |
|
"learning_rate": 6.667209997283347e-05, |
|
"loss": 1.6133, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 39.951219512195124, |
|
"grad_norm": 0.9270687103271484, |
|
"learning_rate": 6.661776691116545e-05, |
|
"loss": 1.5937, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.2685972452163696, |
|
"learning_rate": 6.656343384949743e-05, |
|
"loss": 1.5987, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6576, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0566084384918213, |
|
"eval_rouge1": 0.6098, |
|
"eval_rouge2": 0.3483, |
|
"eval_rougeL": 0.5419, |
|
"eval_runtime": 26.6474, |
|
"eval_samples_per_second": 6.83, |
|
"eval_steps_per_second": 1.726, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 40.048780487804876, |
|
"grad_norm": 0.9441468119621277, |
|
"learning_rate": 6.650910078782939e-05, |
|
"loss": 1.5862, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 40.09756097560975, |
|
"grad_norm": 0.9096714854240417, |
|
"learning_rate": 6.645476772616137e-05, |
|
"loss": 1.552, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 40.146341463414636, |
|
"grad_norm": 0.980735719203949, |
|
"learning_rate": 6.640043466449335e-05, |
|
"loss": 1.5896, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 40.19512195121951, |
|
"grad_norm": 1.0142563581466675, |
|
"learning_rate": 6.634610160282532e-05, |
|
"loss": 1.6244, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 40.24390243902439, |
|
"grad_norm": 0.9127105474472046, |
|
"learning_rate": 6.62917685411573e-05, |
|
"loss": 1.5819, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 40.292682926829265, |
|
"grad_norm": 0.9310650825500488, |
|
"learning_rate": 6.623743547948927e-05, |
|
"loss": 1.6063, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 40.34146341463415, |
|
"grad_norm": 1.0724411010742188, |
|
"learning_rate": 6.618310241782125e-05, |
|
"loss": 1.5862, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 40.390243902439025, |
|
"grad_norm": 0.8463386297225952, |
|
"learning_rate": 6.612876935615322e-05, |
|
"loss": 1.5425, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 40.4390243902439, |
|
"grad_norm": 0.9898768067359924, |
|
"learning_rate": 6.60744362944852e-05, |
|
"loss": 1.5725, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 40.48780487804878, |
|
"grad_norm": 0.9412344694137573, |
|
"learning_rate": 6.602010323281717e-05, |
|
"loss": 1.5738, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 40.53658536585366, |
|
"grad_norm": 1.1098183393478394, |
|
"learning_rate": 6.596577017114914e-05, |
|
"loss": 1.6052, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 40.58536585365854, |
|
"grad_norm": 1.029123067855835, |
|
"learning_rate": 6.591143710948112e-05, |
|
"loss": 1.6101, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 40.63414634146341, |
|
"grad_norm": 1.137770175933838, |
|
"learning_rate": 6.58571040478131e-05, |
|
"loss": 1.6024, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 40.68292682926829, |
|
"grad_norm": 1.009940505027771, |
|
"learning_rate": 6.580277098614506e-05, |
|
"loss": 1.5886, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 40.73170731707317, |
|
"grad_norm": 0.953379213809967, |
|
"learning_rate": 6.574843792447704e-05, |
|
"loss": 1.6252, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 40.78048780487805, |
|
"grad_norm": 0.9754422903060913, |
|
"learning_rate": 6.569410486280903e-05, |
|
"loss": 1.5888, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 40.829268292682926, |
|
"grad_norm": 1.1647788286209106, |
|
"learning_rate": 6.5639771801141e-05, |
|
"loss": 1.5829, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 40.8780487804878, |
|
"grad_norm": 1.0146311521530151, |
|
"learning_rate": 6.558543873947296e-05, |
|
"loss": 1.606, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 40.926829268292686, |
|
"grad_norm": 0.9312621355056763, |
|
"learning_rate": 6.553110567780495e-05, |
|
"loss": 1.5791, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 40.97560975609756, |
|
"grad_norm": 0.8976138830184937, |
|
"learning_rate": 6.547677261613693e-05, |
|
"loss": 1.5977, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6604, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.050206184387207, |
|
"eval_rouge1": 0.6125, |
|
"eval_rouge2": 0.3508, |
|
"eval_rougeL": 0.5448, |
|
"eval_runtime": 26.7993, |
|
"eval_samples_per_second": 6.791, |
|
"eval_steps_per_second": 1.716, |
|
"step": 8405 |
|
}, |
|
{ |
|
"epoch": 41.02439024390244, |
|
"grad_norm": 0.9545244574546814, |
|
"learning_rate": 6.54224395544689e-05, |
|
"loss": 1.5558, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 41.073170731707314, |
|
"grad_norm": 0.8932915925979614, |
|
"learning_rate": 6.536810649280088e-05, |
|
"loss": 1.5799, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 41.1219512195122, |
|
"grad_norm": 0.8935472965240479, |
|
"learning_rate": 6.531377343113285e-05, |
|
"loss": 1.5513, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 41.170731707317074, |
|
"grad_norm": 0.9940924048423767, |
|
"learning_rate": 6.525944036946482e-05, |
|
"loss": 1.5354, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 41.21951219512195, |
|
"grad_norm": 0.9507209062576294, |
|
"learning_rate": 6.52051073077968e-05, |
|
"loss": 1.5698, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 41.26829268292683, |
|
"grad_norm": 0.9492302536964417, |
|
"learning_rate": 6.515077424612878e-05, |
|
"loss": 1.5804, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 41.31707317073171, |
|
"grad_norm": 0.840816855430603, |
|
"learning_rate": 6.509644118446074e-05, |
|
"loss": 1.5849, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 41.36585365853659, |
|
"grad_norm": 0.9821751713752747, |
|
"learning_rate": 6.504210812279272e-05, |
|
"loss": 1.5754, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 41.41463414634146, |
|
"grad_norm": 0.9300991892814636, |
|
"learning_rate": 6.49877750611247e-05, |
|
"loss": 1.5818, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 41.46341463414634, |
|
"grad_norm": 1.0535385608673096, |
|
"learning_rate": 6.493344199945667e-05, |
|
"loss": 1.5699, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 41.51219512195122, |
|
"grad_norm": 0.8814225196838379, |
|
"learning_rate": 6.487910893778864e-05, |
|
"loss": 1.5778, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 41.5609756097561, |
|
"grad_norm": 1.0759248733520508, |
|
"learning_rate": 6.482477587612062e-05, |
|
"loss": 1.5656, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 41.609756097560975, |
|
"grad_norm": 1.0118745565414429, |
|
"learning_rate": 6.477044281445259e-05, |
|
"loss": 1.5969, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 41.65853658536585, |
|
"grad_norm": 0.9958849549293518, |
|
"learning_rate": 6.471610975278457e-05, |
|
"loss": 1.5702, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 41.707317073170735, |
|
"grad_norm": 0.8765369057655334, |
|
"learning_rate": 6.466177669111654e-05, |
|
"loss": 1.5907, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 41.75609756097561, |
|
"grad_norm": 0.9264476299285889, |
|
"learning_rate": 6.460744362944852e-05, |
|
"loss": 1.5897, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 41.80487804878049, |
|
"grad_norm": 1.0101897716522217, |
|
"learning_rate": 6.45531105677805e-05, |
|
"loss": 1.5912, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 41.853658536585364, |
|
"grad_norm": 1.125571608543396, |
|
"learning_rate": 6.449877750611248e-05, |
|
"loss": 1.5849, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 41.90243902439025, |
|
"grad_norm": 0.9828618764877319, |
|
"learning_rate": 6.444444444444446e-05, |
|
"loss": 1.6028, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 41.951219512195124, |
|
"grad_norm": 1.0148931741714478, |
|
"learning_rate": 6.439011138277641e-05, |
|
"loss": 1.5818, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 1.1765875816345215, |
|
"learning_rate": 6.43357783211084e-05, |
|
"loss": 1.6187, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6561, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0694971084594727, |
|
"eval_rouge1": 0.61, |
|
"eval_rouge2": 0.3467, |
|
"eval_rougeL": 0.5411, |
|
"eval_runtime": 26.635, |
|
"eval_samples_per_second": 6.833, |
|
"eval_steps_per_second": 1.727, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 42.048780487804876, |
|
"grad_norm": 0.9942975640296936, |
|
"learning_rate": 6.428144525944038e-05, |
|
"loss": 1.5533, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 42.09756097560975, |
|
"grad_norm": 0.9553746581077576, |
|
"learning_rate": 6.422711219777235e-05, |
|
"loss": 1.5592, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 42.146341463414636, |
|
"grad_norm": 0.9394342303276062, |
|
"learning_rate": 6.417277913610432e-05, |
|
"loss": 1.5817, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 42.19512195121951, |
|
"grad_norm": 0.9168606400489807, |
|
"learning_rate": 6.41184460744363e-05, |
|
"loss": 1.5574, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 42.24390243902439, |
|
"grad_norm": 0.9688276052474976, |
|
"learning_rate": 6.406411301276827e-05, |
|
"loss": 1.5462, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 42.292682926829265, |
|
"grad_norm": 1.0756962299346924, |
|
"learning_rate": 6.400977995110025e-05, |
|
"loss": 1.5723, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 42.34146341463415, |
|
"grad_norm": 0.9371064901351929, |
|
"learning_rate": 6.395544688943222e-05, |
|
"loss": 1.5391, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 42.390243902439025, |
|
"grad_norm": 1.1569350957870483, |
|
"learning_rate": 6.39011138277642e-05, |
|
"loss": 1.5705, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 42.4390243902439, |
|
"grad_norm": 0.8656958341598511, |
|
"learning_rate": 6.384678076609617e-05, |
|
"loss": 1.5384, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 42.48780487804878, |
|
"grad_norm": 0.8878985643386841, |
|
"learning_rate": 6.379244770442815e-05, |
|
"loss": 1.5634, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 42.53658536585366, |
|
"grad_norm": 0.9635202288627625, |
|
"learning_rate": 6.373811464276012e-05, |
|
"loss": 1.5564, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 42.58536585365854, |
|
"grad_norm": 0.9100601077079773, |
|
"learning_rate": 6.368378158109209e-05, |
|
"loss": 1.5908, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 42.63414634146341, |
|
"grad_norm": 0.8707578778266907, |
|
"learning_rate": 6.362944851942407e-05, |
|
"loss": 1.5661, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 42.68292682926829, |
|
"grad_norm": 1.0029586553573608, |
|
"learning_rate": 6.357511545775605e-05, |
|
"loss": 1.5594, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 42.73170731707317, |
|
"grad_norm": 0.9894896745681763, |
|
"learning_rate": 6.352078239608802e-05, |
|
"loss": 1.5755, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 42.78048780487805, |
|
"grad_norm": 1.100540280342102, |
|
"learning_rate": 6.346644933441999e-05, |
|
"loss": 1.5841, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 42.829268292682926, |
|
"grad_norm": 0.9192934632301331, |
|
"learning_rate": 6.341211627275197e-05, |
|
"loss": 1.5381, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 42.8780487804878, |
|
"grad_norm": 0.964634120464325, |
|
"learning_rate": 6.335778321108394e-05, |
|
"loss": 1.5509, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 42.926829268292686, |
|
"grad_norm": 0.9472265839576721, |
|
"learning_rate": 6.330345014941593e-05, |
|
"loss": 1.5761, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 42.97560975609756, |
|
"grad_norm": 1.0623375177383423, |
|
"learning_rate": 6.32491170877479e-05, |
|
"loss": 1.564, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6605, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0630738735198975, |
|
"eval_rouge1": 0.6122, |
|
"eval_rouge2": 0.3516, |
|
"eval_rougeL": 0.545, |
|
"eval_runtime": 26.7171, |
|
"eval_samples_per_second": 6.812, |
|
"eval_steps_per_second": 1.722, |
|
"step": 8815 |
|
}, |
|
{ |
|
"epoch": 43.02439024390244, |
|
"grad_norm": 0.9268796443939209, |
|
"learning_rate": 6.319478402607986e-05, |
|
"loss": 1.5838, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 43.073170731707314, |
|
"grad_norm": 0.9908514022827148, |
|
"learning_rate": 6.314045096441185e-05, |
|
"loss": 1.5476, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 43.1219512195122, |
|
"grad_norm": 0.9008921980857849, |
|
"learning_rate": 6.308611790274383e-05, |
|
"loss": 1.5401, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 43.170731707317074, |
|
"grad_norm": 0.9314485788345337, |
|
"learning_rate": 6.30317848410758e-05, |
|
"loss": 1.5596, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 43.21951219512195, |
|
"grad_norm": 1.072072982788086, |
|
"learning_rate": 6.297745177940777e-05, |
|
"loss": 1.5731, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 43.26829268292683, |
|
"grad_norm": 0.9419031739234924, |
|
"learning_rate": 6.292311871773975e-05, |
|
"loss": 1.5651, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 43.31707317073171, |
|
"grad_norm": 0.9719817042350769, |
|
"learning_rate": 6.286878565607173e-05, |
|
"loss": 1.5481, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 43.36585365853659, |
|
"grad_norm": 0.9506922960281372, |
|
"learning_rate": 6.28144525944037e-05, |
|
"loss": 1.5288, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 43.41463414634146, |
|
"grad_norm": 0.8473609089851379, |
|
"learning_rate": 6.276011953273567e-05, |
|
"loss": 1.5502, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 43.46341463414634, |
|
"grad_norm": 0.8621916770935059, |
|
"learning_rate": 6.270578647106765e-05, |
|
"loss": 1.532, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 43.51219512195122, |
|
"grad_norm": 0.94191974401474, |
|
"learning_rate": 6.265145340939962e-05, |
|
"loss": 1.5396, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 43.5609756097561, |
|
"grad_norm": 0.983315110206604, |
|
"learning_rate": 6.25971203477316e-05, |
|
"loss": 1.602, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 43.609756097560975, |
|
"grad_norm": 0.9320430755615234, |
|
"learning_rate": 6.254278728606357e-05, |
|
"loss": 1.5246, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 43.65853658536585, |
|
"grad_norm": 0.859078049659729, |
|
"learning_rate": 6.248845422439554e-05, |
|
"loss": 1.5346, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 43.707317073170735, |
|
"grad_norm": 1.0229722261428833, |
|
"learning_rate": 6.243412116272752e-05, |
|
"loss": 1.5397, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 43.75609756097561, |
|
"grad_norm": 0.9220072031021118, |
|
"learning_rate": 6.23797881010595e-05, |
|
"loss": 1.5728, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 43.80487804878049, |
|
"grad_norm": 0.9612592458724976, |
|
"learning_rate": 6.232545503939147e-05, |
|
"loss": 1.5375, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 43.853658536585364, |
|
"grad_norm": 0.9555723071098328, |
|
"learning_rate": 6.227112197772344e-05, |
|
"loss": 1.5689, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 43.90243902439025, |
|
"grad_norm": 0.9540090560913086, |
|
"learning_rate": 6.221678891605542e-05, |
|
"loss": 1.5672, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 43.951219512195124, |
|
"grad_norm": 1.0556234121322632, |
|
"learning_rate": 6.21624558543874e-05, |
|
"loss": 1.555, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 1.2409921884536743, |
|
"learning_rate": 6.210812279271937e-05, |
|
"loss": 1.5469, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6587, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0725717544555664, |
|
"eval_rouge1": 0.6119, |
|
"eval_rouge2": 0.3496, |
|
"eval_rougeL": 0.5434, |
|
"eval_runtime": 26.7294, |
|
"eval_samples_per_second": 6.809, |
|
"eval_steps_per_second": 1.721, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 44.048780487804876, |
|
"grad_norm": 0.9860386252403259, |
|
"learning_rate": 6.205378973105134e-05, |
|
"loss": 1.5527, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 44.09756097560975, |
|
"grad_norm": 0.9774211049079895, |
|
"learning_rate": 6.199945666938333e-05, |
|
"loss": 1.5325, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 44.146341463414636, |
|
"grad_norm": 1.153135895729065, |
|
"learning_rate": 6.19451236077153e-05, |
|
"loss": 1.5533, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 44.19512195121951, |
|
"grad_norm": 0.8926750421524048, |
|
"learning_rate": 6.189079054604728e-05, |
|
"loss": 1.534, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 44.24390243902439, |
|
"grad_norm": 0.9813703894615173, |
|
"learning_rate": 6.183645748437925e-05, |
|
"loss": 1.555, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 44.292682926829265, |
|
"grad_norm": 0.9816197156906128, |
|
"learning_rate": 6.178212442271121e-05, |
|
"loss": 1.5252, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 44.34146341463415, |
|
"grad_norm": 0.9646711349487305, |
|
"learning_rate": 6.17277913610432e-05, |
|
"loss": 1.5246, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 44.390243902439025, |
|
"grad_norm": 0.9579485654830933, |
|
"learning_rate": 6.167345829937518e-05, |
|
"loss": 1.5301, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 44.4390243902439, |
|
"grad_norm": 0.9191059470176697, |
|
"learning_rate": 6.161912523770715e-05, |
|
"loss": 1.5144, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 44.48780487804878, |
|
"grad_norm": 0.9516372680664062, |
|
"learning_rate": 6.156479217603912e-05, |
|
"loss": 1.5256, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 44.53658536585366, |
|
"grad_norm": 0.9884464740753174, |
|
"learning_rate": 6.15104591143711e-05, |
|
"loss": 1.5442, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 44.58536585365854, |
|
"grad_norm": 0.9394478797912598, |
|
"learning_rate": 6.145612605270308e-05, |
|
"loss": 1.5154, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 44.63414634146341, |
|
"grad_norm": 1.0759013891220093, |
|
"learning_rate": 6.140179299103505e-05, |
|
"loss": 1.5659, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 44.68292682926829, |
|
"grad_norm": 1.0106854438781738, |
|
"learning_rate": 6.134745992936702e-05, |
|
"loss": 1.5532, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 44.73170731707317, |
|
"grad_norm": 0.9932792782783508, |
|
"learning_rate": 6.1293126867699e-05, |
|
"loss": 1.5544, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 44.78048780487805, |
|
"grad_norm": 0.9034538865089417, |
|
"learning_rate": 6.123879380603097e-05, |
|
"loss": 1.5535, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 44.829268292682926, |
|
"grad_norm": 0.8972671031951904, |
|
"learning_rate": 6.118446074436295e-05, |
|
"loss": 1.503, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 44.8780487804878, |
|
"grad_norm": 1.041559100151062, |
|
"learning_rate": 6.113012768269492e-05, |
|
"loss": 1.5716, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 44.926829268292686, |
|
"grad_norm": 1.0077598094940186, |
|
"learning_rate": 6.107579462102689e-05, |
|
"loss": 1.5548, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 44.97560975609756, |
|
"grad_norm": 0.9414793848991394, |
|
"learning_rate": 6.102146155935887e-05, |
|
"loss": 1.5317, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6567, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.076894521713257, |
|
"eval_rouge1": 0.6108, |
|
"eval_rouge2": 0.3478, |
|
"eval_rougeL": 0.5417, |
|
"eval_runtime": 26.7192, |
|
"eval_samples_per_second": 6.812, |
|
"eval_steps_per_second": 1.722, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 45.02439024390244, |
|
"grad_norm": 1.0775814056396484, |
|
"learning_rate": 6.096712849769085e-05, |
|
"loss": 1.5471, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 45.073170731707314, |
|
"grad_norm": 0.9766963124275208, |
|
"learning_rate": 6.091279543602282e-05, |
|
"loss": 1.4864, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 45.1219512195122, |
|
"grad_norm": 0.9466918706893921, |
|
"learning_rate": 6.08584623743548e-05, |
|
"loss": 1.5288, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 45.170731707317074, |
|
"grad_norm": 1.0558148622512817, |
|
"learning_rate": 6.0804129312686775e-05, |
|
"loss": 1.5422, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 45.21951219512195, |
|
"grad_norm": 1.0389299392700195, |
|
"learning_rate": 6.0749796251018744e-05, |
|
"loss": 1.5205, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 45.26829268292683, |
|
"grad_norm": 0.779005765914917, |
|
"learning_rate": 6.069546318935072e-05, |
|
"loss": 1.5206, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 45.31707317073171, |
|
"grad_norm": 0.9916539192199707, |
|
"learning_rate": 6.06411301276827e-05, |
|
"loss": 1.5342, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 45.36585365853659, |
|
"grad_norm": 0.9707236886024475, |
|
"learning_rate": 6.058679706601468e-05, |
|
"loss": 1.516, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 45.41463414634146, |
|
"grad_norm": 1.099131464958191, |
|
"learning_rate": 6.0532464004346646e-05, |
|
"loss": 1.5047, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 45.46341463414634, |
|
"grad_norm": 0.967131495475769, |
|
"learning_rate": 6.047813094267862e-05, |
|
"loss": 1.5228, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 45.51219512195122, |
|
"grad_norm": 1.0093587636947632, |
|
"learning_rate": 6.0423797881010604e-05, |
|
"loss": 1.5409, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 45.5609756097561, |
|
"grad_norm": 0.8259481191635132, |
|
"learning_rate": 6.0369464819342566e-05, |
|
"loss": 1.5176, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 45.609756097560975, |
|
"grad_norm": 0.9468622803688049, |
|
"learning_rate": 6.031513175767455e-05, |
|
"loss": 1.5416, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 45.65853658536585, |
|
"grad_norm": 0.9278303980827332, |
|
"learning_rate": 6.0260798696006524e-05, |
|
"loss": 1.5471, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 45.707317073170735, |
|
"grad_norm": 0.8681600689888, |
|
"learning_rate": 6.020646563433849e-05, |
|
"loss": 1.5393, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 45.75609756097561, |
|
"grad_norm": 0.863831639289856, |
|
"learning_rate": 6.0152132572670475e-05, |
|
"loss": 1.5301, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 45.80487804878049, |
|
"grad_norm": 0.9817381501197815, |
|
"learning_rate": 6.009779951100245e-05, |
|
"loss": 1.5353, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 45.853658536585364, |
|
"grad_norm": 0.9569991827011108, |
|
"learning_rate": 6.004346644933442e-05, |
|
"loss": 1.5303, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 45.90243902439025, |
|
"grad_norm": 0.8829732537269592, |
|
"learning_rate": 5.9989133387666395e-05, |
|
"loss": 1.544, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 45.951219512195124, |
|
"grad_norm": 0.989835798740387, |
|
"learning_rate": 5.993480032599838e-05, |
|
"loss": 1.5373, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 1.108426570892334, |
|
"learning_rate": 5.988046726433035e-05, |
|
"loss": 1.51, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6558, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.081815004348755, |
|
"eval_rouge1": 0.6101, |
|
"eval_rouge2": 0.347, |
|
"eval_rougeL": 0.5414, |
|
"eval_runtime": 26.6895, |
|
"eval_samples_per_second": 6.819, |
|
"eval_steps_per_second": 1.724, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 46.048780487804876, |
|
"grad_norm": 0.9329161643981934, |
|
"learning_rate": 5.982613420266232e-05, |
|
"loss": 1.5037, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 46.09756097560975, |
|
"grad_norm": 0.9613210558891296, |
|
"learning_rate": 5.97718011409943e-05, |
|
"loss": 1.5273, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 46.146341463414636, |
|
"grad_norm": 1.005125880241394, |
|
"learning_rate": 5.971746807932628e-05, |
|
"loss": 1.512, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 46.19512195121951, |
|
"grad_norm": 0.9762298464775085, |
|
"learning_rate": 5.966313501765824e-05, |
|
"loss": 1.4876, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 46.24390243902439, |
|
"grad_norm": 1.0202457904815674, |
|
"learning_rate": 5.9608801955990224e-05, |
|
"loss": 1.5143, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 46.292682926829265, |
|
"grad_norm": 0.8822451829910278, |
|
"learning_rate": 5.95544688943222e-05, |
|
"loss": 1.5268, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 46.34146341463415, |
|
"grad_norm": 0.9687830805778503, |
|
"learning_rate": 5.950013583265417e-05, |
|
"loss": 1.5161, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 46.390243902439025, |
|
"grad_norm": 0.8915550708770752, |
|
"learning_rate": 5.9445802770986144e-05, |
|
"loss": 1.5014, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 46.4390243902439, |
|
"grad_norm": 0.9499079585075378, |
|
"learning_rate": 5.939146970931813e-05, |
|
"loss": 1.5192, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 46.48780487804878, |
|
"grad_norm": 0.9280877113342285, |
|
"learning_rate": 5.9337136647650096e-05, |
|
"loss": 1.5136, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 46.53658536585366, |
|
"grad_norm": 0.9028041958808899, |
|
"learning_rate": 5.928280358598207e-05, |
|
"loss": 1.5314, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 46.58536585365854, |
|
"grad_norm": 1.0268795490264893, |
|
"learning_rate": 5.9228470524314053e-05, |
|
"loss": 1.5047, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 46.63414634146341, |
|
"grad_norm": 1.0093835592269897, |
|
"learning_rate": 5.9174137462646015e-05, |
|
"loss": 1.5284, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 46.68292682926829, |
|
"grad_norm": 1.0665066242218018, |
|
"learning_rate": 5.9119804400978e-05, |
|
"loss": 1.5319, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 46.73170731707317, |
|
"grad_norm": 0.9907817244529724, |
|
"learning_rate": 5.9065471339309973e-05, |
|
"loss": 1.5348, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 46.78048780487805, |
|
"grad_norm": 0.8871275186538696, |
|
"learning_rate": 5.9011138277641956e-05, |
|
"loss": 1.5202, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 46.829268292682926, |
|
"grad_norm": 0.8575066328048706, |
|
"learning_rate": 5.895680521597392e-05, |
|
"loss": 1.5259, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 46.8780487804878, |
|
"grad_norm": 0.9315159320831299, |
|
"learning_rate": 5.89024721543059e-05, |
|
"loss": 1.5551, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 46.926829268292686, |
|
"grad_norm": 0.9398794770240784, |
|
"learning_rate": 5.8848139092637876e-05, |
|
"loss": 1.4994, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 46.97560975609756, |
|
"grad_norm": 1.1729868650436401, |
|
"learning_rate": 5.8793806030969845e-05, |
|
"loss": 1.5174, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6592, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.078704595565796, |
|
"eval_rouge1": 0.6124, |
|
"eval_rouge2": 0.3514, |
|
"eval_rougeL": 0.5439, |
|
"eval_runtime": 26.7177, |
|
"eval_samples_per_second": 6.812, |
|
"eval_steps_per_second": 1.722, |
|
"step": 9635 |
|
}, |
|
{ |
|
"epoch": 47.02439024390244, |
|
"grad_norm": 0.9085572957992554, |
|
"learning_rate": 5.873947296930182e-05, |
|
"loss": 1.5013, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 47.073170731707314, |
|
"grad_norm": 1.0357688665390015, |
|
"learning_rate": 5.86851399076338e-05, |
|
"loss": 1.4971, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 47.1219512195122, |
|
"grad_norm": 0.9050779342651367, |
|
"learning_rate": 5.863080684596577e-05, |
|
"loss": 1.5163, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 47.170731707317074, |
|
"grad_norm": 0.9679107666015625, |
|
"learning_rate": 5.857647378429775e-05, |
|
"loss": 1.4896, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 47.21951219512195, |
|
"grad_norm": 0.9459844827651978, |
|
"learning_rate": 5.852214072262972e-05, |
|
"loss": 1.51, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 47.26829268292683, |
|
"grad_norm": 0.8712072372436523, |
|
"learning_rate": 5.846780766096169e-05, |
|
"loss": 1.5141, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 47.31707317073171, |
|
"grad_norm": 1.0053471326828003, |
|
"learning_rate": 5.8413474599293674e-05, |
|
"loss": 1.5439, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 47.36585365853659, |
|
"grad_norm": 1.0147236585617065, |
|
"learning_rate": 5.835914153762565e-05, |
|
"loss": 1.5161, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 47.41463414634146, |
|
"grad_norm": 0.9281237125396729, |
|
"learning_rate": 5.830480847595763e-05, |
|
"loss": 1.4859, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 47.46341463414634, |
|
"grad_norm": 0.9387296438217163, |
|
"learning_rate": 5.8250475414289594e-05, |
|
"loss": 1.5253, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 47.51219512195122, |
|
"grad_norm": 1.0199874639511108, |
|
"learning_rate": 5.8196142352621576e-05, |
|
"loss": 1.5278, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 47.5609756097561, |
|
"grad_norm": 0.9802501797676086, |
|
"learning_rate": 5.814180929095355e-05, |
|
"loss": 1.5018, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 47.609756097560975, |
|
"grad_norm": 0.8037899732589722, |
|
"learning_rate": 5.808747622928552e-05, |
|
"loss": 1.5121, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 47.65853658536585, |
|
"grad_norm": 1.0279395580291748, |
|
"learning_rate": 5.8033143167617496e-05, |
|
"loss": 1.4819, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 47.707317073170735, |
|
"grad_norm": 1.021676778793335, |
|
"learning_rate": 5.797881010594948e-05, |
|
"loss": 1.5011, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 47.75609756097561, |
|
"grad_norm": 1.1812975406646729, |
|
"learning_rate": 5.792447704428145e-05, |
|
"loss": 1.4819, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 47.80487804878049, |
|
"grad_norm": 0.9932900071144104, |
|
"learning_rate": 5.787014398261342e-05, |
|
"loss": 1.5001, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 47.853658536585364, |
|
"grad_norm": 1.0043509006500244, |
|
"learning_rate": 5.78158109209454e-05, |
|
"loss": 1.5159, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 47.90243902439025, |
|
"grad_norm": 0.9138625860214233, |
|
"learning_rate": 5.776147785927737e-05, |
|
"loss": 1.5267, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 47.951219512195124, |
|
"grad_norm": 0.9451781511306763, |
|
"learning_rate": 5.770714479760935e-05, |
|
"loss": 1.5051, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 1.0806002616882324, |
|
"learning_rate": 5.7652811735941325e-05, |
|
"loss": 1.5357, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6595, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.079742908477783, |
|
"eval_rouge1": 0.6128, |
|
"eval_rouge2": 0.351, |
|
"eval_rougeL": 0.5445, |
|
"eval_runtime": 26.732, |
|
"eval_samples_per_second": 6.808, |
|
"eval_steps_per_second": 1.721, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 48.048780487804876, |
|
"grad_norm": 0.8942461609840393, |
|
"learning_rate": 5.7598478674273294e-05, |
|
"loss": 1.5143, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 48.09756097560975, |
|
"grad_norm": 0.9439877867698669, |
|
"learning_rate": 5.754414561260527e-05, |
|
"loss": 1.4611, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 48.146341463414636, |
|
"grad_norm": 0.9229232668876648, |
|
"learning_rate": 5.748981255093725e-05, |
|
"loss": 1.4988, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 48.19512195121951, |
|
"grad_norm": 0.9288315176963806, |
|
"learning_rate": 5.743547948926923e-05, |
|
"loss": 1.4753, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 48.24390243902439, |
|
"grad_norm": 1.0723158121109009, |
|
"learning_rate": 5.7381146427601196e-05, |
|
"loss": 1.5116, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 48.292682926829265, |
|
"grad_norm": 0.9865169525146484, |
|
"learning_rate": 5.732681336593317e-05, |
|
"loss": 1.4924, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 48.34146341463415, |
|
"grad_norm": 1.02310311794281, |
|
"learning_rate": 5.7272480304265154e-05, |
|
"loss": 1.4992, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 48.390243902439025, |
|
"grad_norm": 1.032989263534546, |
|
"learning_rate": 5.721814724259712e-05, |
|
"loss": 1.5065, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 48.4390243902439, |
|
"grad_norm": 0.9401241540908813, |
|
"learning_rate": 5.71638141809291e-05, |
|
"loss": 1.4799, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 48.48780487804878, |
|
"grad_norm": 1.178525447845459, |
|
"learning_rate": 5.7109481119261074e-05, |
|
"loss": 1.5085, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 48.53658536585366, |
|
"grad_norm": 0.8683713674545288, |
|
"learning_rate": 5.705514805759304e-05, |
|
"loss": 1.5123, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 48.58536585365854, |
|
"grad_norm": 1.0342472791671753, |
|
"learning_rate": 5.7000814995925025e-05, |
|
"loss": 1.5292, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 48.63414634146341, |
|
"grad_norm": 0.9925521016120911, |
|
"learning_rate": 5.6946481934257e-05, |
|
"loss": 1.4806, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 48.68292682926829, |
|
"grad_norm": 0.9937178492546082, |
|
"learning_rate": 5.689214887258897e-05, |
|
"loss": 1.4674, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 48.73170731707317, |
|
"grad_norm": 0.8783002495765686, |
|
"learning_rate": 5.6837815810920945e-05, |
|
"loss": 1.4889, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 48.78048780487805, |
|
"grad_norm": 0.9060630798339844, |
|
"learning_rate": 5.678348274925293e-05, |
|
"loss": 1.4731, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 48.829268292682926, |
|
"grad_norm": 0.9229355454444885, |
|
"learning_rate": 5.672914968758489e-05, |
|
"loss": 1.5158, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 48.8780487804878, |
|
"grad_norm": 1.007168173789978, |
|
"learning_rate": 5.667481662591687e-05, |
|
"loss": 1.5212, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 48.926829268292686, |
|
"grad_norm": 0.9485666155815125, |
|
"learning_rate": 5.662048356424885e-05, |
|
"loss": 1.5356, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 48.97560975609756, |
|
"grad_norm": 1.065011978149414, |
|
"learning_rate": 5.656615050258083e-05, |
|
"loss": 1.4857, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6598, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.087491273880005, |
|
"eval_rouge1": 0.6112, |
|
"eval_rouge2": 0.3522, |
|
"eval_rougeL": 0.5444, |
|
"eval_runtime": 26.6917, |
|
"eval_samples_per_second": 6.819, |
|
"eval_steps_per_second": 1.723, |
|
"step": 10045 |
|
}, |
|
{ |
|
"epoch": 49.02439024390244, |
|
"grad_norm": 0.9155561327934265, |
|
"learning_rate": 5.65118174409128e-05, |
|
"loss": 1.5131, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 49.073170731707314, |
|
"grad_norm": 0.9302152991294861, |
|
"learning_rate": 5.6457484379244774e-05, |
|
"loss": 1.5032, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 49.1219512195122, |
|
"grad_norm": 0.9523373246192932, |
|
"learning_rate": 5.640315131757675e-05, |
|
"loss": 1.4814, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 49.170731707317074, |
|
"grad_norm": 1.1650649309158325, |
|
"learning_rate": 5.634881825590872e-05, |
|
"loss": 1.4777, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 49.21951219512195, |
|
"grad_norm": 0.9776322245597839, |
|
"learning_rate": 5.62944851942407e-05, |
|
"loss": 1.4791, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 49.26829268292683, |
|
"grad_norm": 1.0491796731948853, |
|
"learning_rate": 5.624015213257268e-05, |
|
"loss": 1.5079, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 49.31707317073171, |
|
"grad_norm": 0.9312990307807922, |
|
"learning_rate": 5.6185819070904646e-05, |
|
"loss": 1.486, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 49.36585365853659, |
|
"grad_norm": 1.1731209754943848, |
|
"learning_rate": 5.613148600923662e-05, |
|
"loss": 1.4841, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 49.41463414634146, |
|
"grad_norm": 0.9213290810585022, |
|
"learning_rate": 5.6077152947568603e-05, |
|
"loss": 1.4767, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 49.46341463414634, |
|
"grad_norm": 1.035176157951355, |
|
"learning_rate": 5.6022819885900565e-05, |
|
"loss": 1.4872, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 49.51219512195122, |
|
"grad_norm": 1.0663214921951294, |
|
"learning_rate": 5.596848682423255e-05, |
|
"loss": 1.4926, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 49.5609756097561, |
|
"grad_norm": 0.9391820430755615, |
|
"learning_rate": 5.5914153762564523e-05, |
|
"loss": 1.4914, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 49.609756097560975, |
|
"grad_norm": 0.9385082721710205, |
|
"learning_rate": 5.5859820700896506e-05, |
|
"loss": 1.4833, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 49.65853658536585, |
|
"grad_norm": 0.8373975157737732, |
|
"learning_rate": 5.580548763922847e-05, |
|
"loss": 1.4758, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 49.707317073170735, |
|
"grad_norm": 0.9798086881637573, |
|
"learning_rate": 5.575115457756045e-05, |
|
"loss": 1.5062, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 49.75609756097561, |
|
"grad_norm": 0.9505453109741211, |
|
"learning_rate": 5.5696821515892426e-05, |
|
"loss": 1.4867, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 49.80487804878049, |
|
"grad_norm": 0.8706372976303101, |
|
"learning_rate": 5.5642488454224395e-05, |
|
"loss": 1.4573, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 49.853658536585364, |
|
"grad_norm": 1.0630508661270142, |
|
"learning_rate": 5.558815539255638e-05, |
|
"loss": 1.4822, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 49.90243902439025, |
|
"grad_norm": 0.9813632369041443, |
|
"learning_rate": 5.553382233088835e-05, |
|
"loss": 1.5155, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 49.951219512195124, |
|
"grad_norm": 1.0495411157608032, |
|
"learning_rate": 5.547948926922032e-05, |
|
"loss": 1.4857, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 1.2372345924377441, |
|
"learning_rate": 5.54251562075523e-05, |
|
"loss": 1.5093, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.66, |
|
"eval_gen_len": 211.6593, |
|
"eval_loss": 2.0857725143432617, |
|
"eval_rouge1": 0.6128, |
|
"eval_rouge2": 0.3524, |
|
"eval_rougeL": 0.545, |
|
"eval_runtime": 26.6761, |
|
"eval_samples_per_second": 6.823, |
|
"eval_steps_per_second": 1.724, |
|
"step": 10250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 7 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3138571725673472e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|