|
{ |
|
"best_global_step": 2072, |
|
"best_metric": 89.937106918239, |
|
"best_model_checkpoint": "/data/hungnm/unisentiment/roberta-base-sentiment/checkpoint-2072", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08928571428571429, |
|
"grad_norm": 59.24269104003906, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 2.85, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 29.214595794677734, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 2.3363, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26785714285714285, |
|
"grad_norm": 22.542577743530273, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 2.4922, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 142.14141845703125, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 2.0449, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 7.237235069274902, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 1.827, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 7.419255256652832, |
|
"learning_rate": 4.999993577810563e-05, |
|
"loss": 1.6313, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 6.396734714508057, |
|
"learning_rate": 4.999921328558333e-05, |
|
"loss": 1.6582, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 10.179349899291992, |
|
"learning_rate": 4.999768804644796e-05, |
|
"loss": 1.766, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8035714285714286, |
|
"grad_norm": 4.080478191375732, |
|
"learning_rate": 4.9995360109676296e-05, |
|
"loss": 1.6039, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 46.95652389526367, |
|
"learning_rate": 4.999222955002041e-05, |
|
"loss": 1.7658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9821428571428571, |
|
"grad_norm": 13.342621803283691, |
|
"learning_rate": 4.998829646800533e-05, |
|
"loss": 1.541, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.34518229961395264, |
|
"eval_macro_f1": 78.41773492091933, |
|
"eval_macro_precision": 86.07313432835821, |
|
"eval_macro_recall": 75.09860202167894, |
|
"eval_micro_f1": 85.84905660377359, |
|
"eval_micro_precision": 85.84905660377359, |
|
"eval_micro_recall": 85.84905660377359, |
|
"eval_runtime": 10.6756, |
|
"eval_samples_per_second": 148.938, |
|
"eval_steps_per_second": 2.342, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 20.603862762451172, |
|
"learning_rate": 4.9983560989925736e-05, |
|
"loss": 1.3594, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1607142857142858, |
|
"grad_norm": 8.545742988586426, |
|
"learning_rate": 4.9978023267841994e-05, |
|
"loss": 1.3447, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 7.969589710235596, |
|
"learning_rate": 4.99716834795752e-05, |
|
"loss": 1.3035, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 37.12427520751953, |
|
"learning_rate": 4.9964541828701506e-05, |
|
"loss": 1.2727, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 61.47677993774414, |
|
"learning_rate": 4.9956598544545566e-05, |
|
"loss": 1.4631, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5178571428571428, |
|
"grad_norm": 20.555511474609375, |
|
"learning_rate": 4.994785388217318e-05, |
|
"loss": 1.7768, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 19.720369338989258, |
|
"learning_rate": 4.993830812238311e-05, |
|
"loss": 1.4105, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6964285714285714, |
|
"grad_norm": 11.87168025970459, |
|
"learning_rate": 4.9927961571698064e-05, |
|
"loss": 1.2576, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 7.716609001159668, |
|
"learning_rate": 4.991681456235483e-05, |
|
"loss": 1.3186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 4.707287788391113, |
|
"learning_rate": 4.990486745229364e-05, |
|
"loss": 1.2502, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 7.120730400085449, |
|
"learning_rate": 4.989212062514664e-05, |
|
"loss": 1.0652, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.3161654770374298, |
|
"eval_macro_f1": 82.51917393751759, |
|
"eval_macro_precision": 84.85169367165287, |
|
"eval_macro_recall": 80.82915005991929, |
|
"eval_micro_f1": 87.42138364779875, |
|
"eval_micro_precision": 87.42138364779875, |
|
"eval_micro_recall": 87.42138364779875, |
|
"eval_runtime": 1.9934, |
|
"eval_samples_per_second": 797.637, |
|
"eval_steps_per_second": 12.541, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0535714285714284, |
|
"grad_norm": 9.230934143066406, |
|
"learning_rate": 4.987857449022561e-05, |
|
"loss": 1.0412, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 4.535208225250244, |
|
"learning_rate": 4.9864229482508804e-05, |
|
"loss": 1.0646, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 39.12550354003906, |
|
"learning_rate": 4.984908606262696e-05, |
|
"loss": 1.0901, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 18.9006404876709, |
|
"learning_rate": 4.983314471684853e-05, |
|
"loss": 1.165, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.4107142857142856, |
|
"grad_norm": 5.734167098999023, |
|
"learning_rate": 4.9816405957064106e-05, |
|
"loss": 1.0594, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 16.50884437561035, |
|
"learning_rate": 4.9798870320769886e-05, |
|
"loss": 1.0566, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.5892857142857144, |
|
"grad_norm": 48.42763900756836, |
|
"learning_rate": 4.97805383710505e-05, |
|
"loss": 1.383, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 19.594017028808594, |
|
"learning_rate": 4.976141069656091e-05, |
|
"loss": 1.2805, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.767857142857143, |
|
"grad_norm": 4.824181079864502, |
|
"learning_rate": 4.974148791150746e-05, |
|
"loss": 1.0623, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 11.474513053894043, |
|
"learning_rate": 4.972077065562821e-05, |
|
"loss": 1.0732, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.946428571428571, |
|
"grad_norm": 17.615800857543945, |
|
"learning_rate": 4.96992595941724e-05, |
|
"loss": 1.0885, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.2910524904727936, |
|
"eval_macro_f1": 83.94523203683508, |
|
"eval_macro_precision": 84.81357128694967, |
|
"eval_macro_recall": 83.18583703199087, |
|
"eval_micro_f1": 88.0503144654088, |
|
"eval_micro_precision": 88.0503144654088, |
|
"eval_micro_recall": 88.0503144654088, |
|
"eval_runtime": 1.8143, |
|
"eval_samples_per_second": 876.376, |
|
"eval_steps_per_second": 13.78, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.0357142857142856, |
|
"grad_norm": 9.219614028930664, |
|
"learning_rate": 4.967695541787901e-05, |
|
"loss": 1.0449, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 11.528852462768555, |
|
"learning_rate": 4.965385884295467e-05, |
|
"loss": 0.8327, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.2142857142857144, |
|
"grad_norm": 14.702798843383789, |
|
"learning_rate": 4.96299706110506e-05, |
|
"loss": 0.8543, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.3035714285714284, |
|
"grad_norm": 9.77267837524414, |
|
"learning_rate": 4.960529148923884e-05, |
|
"loss": 1.0777, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.392857142857143, |
|
"grad_norm": 11.903849601745605, |
|
"learning_rate": 4.9579822269987574e-05, |
|
"loss": 1.111, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.482142857142857, |
|
"grad_norm": 15.278186798095703, |
|
"learning_rate": 4.955356377113574e-05, |
|
"loss": 0.8274, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 11.262117385864258, |
|
"learning_rate": 4.952651683586668e-05, |
|
"loss": 0.8345, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6607142857142856, |
|
"grad_norm": 13.382967948913574, |
|
"learning_rate": 4.9498682332681174e-05, |
|
"loss": 0.6874, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 6.932016849517822, |
|
"learning_rate": 4.947006115536947e-05, |
|
"loss": 0.7483, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.8392857142857144, |
|
"grad_norm": 14.735459327697754, |
|
"learning_rate": 4.944065422298262e-05, |
|
"loss": 0.8449, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.928571428571429, |
|
"grad_norm": 7.518039703369141, |
|
"learning_rate": 4.9410462479802945e-05, |
|
"loss": 0.8368, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.28605297207832336, |
|
"eval_macro_f1": 83.79635460918196, |
|
"eval_macro_precision": 88.60881482037983, |
|
"eval_macro_recall": 80.95314249160404, |
|
"eval_micro_f1": 88.80503144654088, |
|
"eval_micro_precision": 88.80503144654088, |
|
"eval_micro_recall": 88.80503144654088, |
|
"eval_runtime": 1.9349, |
|
"eval_samples_per_second": 821.746, |
|
"eval_steps_per_second": 12.921, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 4.017857142857143, |
|
"grad_norm": 16.081928253173828, |
|
"learning_rate": 4.937948689531373e-05, |
|
"loss": 0.7979, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.107142857142857, |
|
"grad_norm": 7.138861179351807, |
|
"learning_rate": 4.934772846416812e-05, |
|
"loss": 0.5874, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.196428571428571, |
|
"grad_norm": 18.04113006591797, |
|
"learning_rate": 4.931518820615711e-05, |
|
"loss": 0.5545, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 13.751228332519531, |
|
"learning_rate": 4.928186716617686e-05, |
|
"loss": 0.5696, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.375, |
|
"grad_norm": 17.97528839111328, |
|
"learning_rate": 4.924776641419513e-05, |
|
"loss": 0.625, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.464285714285714, |
|
"grad_norm": 6.758862495422363, |
|
"learning_rate": 4.921288704521689e-05, |
|
"loss": 0.6494, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.553571428571429, |
|
"grad_norm": 39.63971710205078, |
|
"learning_rate": 4.917723017924921e-05, |
|
"loss": 0.7084, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.642857142857143, |
|
"grad_norm": 22.54784393310547, |
|
"learning_rate": 4.914079696126526e-05, |
|
"loss": 0.6685, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.732142857142857, |
|
"grad_norm": 17.557443618774414, |
|
"learning_rate": 4.910358856116752e-05, |
|
"loss": 0.6967, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.821428571428571, |
|
"grad_norm": 12.355552673339844, |
|
"learning_rate": 4.90656061737503e-05, |
|
"loss": 0.7881, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.910714285714286, |
|
"grad_norm": 14.7780179977417, |
|
"learning_rate": 4.90268510186613e-05, |
|
"loss": 0.6595, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 16.71040153503418, |
|
"learning_rate": 4.898732434036244e-05, |
|
"loss": 0.7777, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.2805473804473877, |
|
"eval_macro_f1": 85.35613362920841, |
|
"eval_macro_precision": 87.13597361085554, |
|
"eval_macro_recall": 83.9505608736378, |
|
"eval_micro_f1": 89.30817610062893, |
|
"eval_micro_precision": 89.30817610062893, |
|
"eval_micro_recall": 89.30817610062893, |
|
"eval_runtime": 1.8728, |
|
"eval_samples_per_second": 849.0, |
|
"eval_steps_per_second": 13.349, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.089285714285714, |
|
"grad_norm": 12.743489265441895, |
|
"learning_rate": 4.894702740808995e-05, |
|
"loss": 0.4128, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 5.178571428571429, |
|
"grad_norm": 19.04743766784668, |
|
"learning_rate": 4.8905961515813604e-05, |
|
"loss": 0.477, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.267857142857143, |
|
"grad_norm": 24.844810485839844, |
|
"learning_rate": 4.886412798219512e-05, |
|
"loss": 0.4719, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.357142857142857, |
|
"grad_norm": 9.876107215881348, |
|
"learning_rate": 4.882152815054587e-05, |
|
"loss": 0.4332, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.446428571428571, |
|
"grad_norm": 25.508865356445312, |
|
"learning_rate": 4.8778163388783724e-05, |
|
"loss": 0.4225, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 5.535714285714286, |
|
"grad_norm": 12.033214569091797, |
|
"learning_rate": 4.8734035089389115e-05, |
|
"loss": 0.5101, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.625, |
|
"grad_norm": 11.438920974731445, |
|
"learning_rate": 4.8689144669360375e-05, |
|
"loss": 0.4257, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 11.853082656860352, |
|
"learning_rate": 4.864349357016815e-05, |
|
"loss": 0.4271, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.803571428571429, |
|
"grad_norm": 12.522577285766602, |
|
"learning_rate": 4.8597083257709194e-05, |
|
"loss": 0.538, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.892857142857143, |
|
"grad_norm": 6.630044937133789, |
|
"learning_rate": 4.854991522225923e-05, |
|
"loss": 0.4855, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.982142857142857, |
|
"grad_norm": 8.849501609802246, |
|
"learning_rate": 4.850199097842517e-05, |
|
"loss": 0.4158, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.35284000635147095, |
|
"eval_macro_f1": 84.31737482203201, |
|
"eval_macro_precision": 85.56294653855629, |
|
"eval_macro_recall": 83.27826020133713, |
|
"eval_micro_f1": 88.42767295597484, |
|
"eval_micro_precision": 88.42767295597484, |
|
"eval_micro_recall": 88.42767295597484, |
|
"eval_runtime": 1.8262, |
|
"eval_samples_per_second": 870.648, |
|
"eval_steps_per_second": 13.689, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 6.071428571428571, |
|
"grad_norm": 10.551375389099121, |
|
"learning_rate": 4.84533120650964e-05, |
|
"loss": 0.2718, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.160714285714286, |
|
"grad_norm": 11.759309768676758, |
|
"learning_rate": 4.8403880045395434e-05, |
|
"loss": 0.2064, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 11.094610214233398, |
|
"learning_rate": 4.835369650662767e-05, |
|
"loss": 0.2482, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.339285714285714, |
|
"grad_norm": 18.329065322875977, |
|
"learning_rate": 4.8302763060230446e-05, |
|
"loss": 0.2556, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 6.428571428571429, |
|
"grad_norm": 10.95065975189209, |
|
"learning_rate": 4.825108134172131e-05, |
|
"loss": 0.318, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.517857142857143, |
|
"grad_norm": 17.075756072998047, |
|
"learning_rate": 4.819865301064545e-05, |
|
"loss": 0.2354, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 6.607142857142857, |
|
"grad_norm": 10.705339431762695, |
|
"learning_rate": 4.814547975052245e-05, |
|
"loss": 0.2294, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.696428571428571, |
|
"grad_norm": 31.16196632385254, |
|
"learning_rate": 4.8091563268792236e-05, |
|
"loss": 0.2385, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.785714285714286, |
|
"grad_norm": 15.710704803466797, |
|
"learning_rate": 4.803690529676019e-05, |
|
"loss": 0.3026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.875, |
|
"grad_norm": 22.431447982788086, |
|
"learning_rate": 4.798150758954164e-05, |
|
"loss": 0.3048, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 6.964285714285714, |
|
"grad_norm": 10.632715225219727, |
|
"learning_rate": 4.7925371926005435e-05, |
|
"loss": 0.3086, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.41216832399368286, |
|
"eval_macro_f1": 85.17808273905835, |
|
"eval_macro_precision": 88.20624434584586, |
|
"eval_macro_recall": 83.06807537576768, |
|
"eval_micro_f1": 89.43396226415095, |
|
"eval_micro_precision": 89.43396226415095, |
|
"eval_micro_recall": 89.43396226415095, |
|
"eval_runtime": 1.8458, |
|
"eval_samples_per_second": 861.394, |
|
"eval_steps_per_second": 13.544, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 7.053571428571429, |
|
"grad_norm": 11.026453971862793, |
|
"learning_rate": 4.786850010871684e-05, |
|
"loss": 0.221, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 19.100629806518555, |
|
"learning_rate": 4.781089396387968e-05, |
|
"loss": 0.1621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.232142857142857, |
|
"grad_norm": 17.89957618713379, |
|
"learning_rate": 4.775255534127766e-05, |
|
"loss": 0.2228, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 7.321428571428571, |
|
"grad_norm": 11.095701217651367, |
|
"learning_rate": 4.7693486114215015e-05, |
|
"loss": 0.1461, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.410714285714286, |
|
"grad_norm": 56.87965393066406, |
|
"learning_rate": 4.76336881794563e-05, |
|
"loss": 0.3093, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 18.552824020385742, |
|
"learning_rate": 4.7573163457165534e-05, |
|
"loss": 0.3726, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.589285714285714, |
|
"grad_norm": 28.140094757080078, |
|
"learning_rate": 4.75119138908445e-05, |
|
"loss": 0.2765, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.678571428571429, |
|
"grad_norm": 10.527276039123535, |
|
"learning_rate": 4.744994144727036e-05, |
|
"loss": 0.1934, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.767857142857143, |
|
"grad_norm": 5.746723651885986, |
|
"learning_rate": 4.738724811643252e-05, |
|
"loss": 0.1292, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 7.857142857142857, |
|
"grad_norm": 12.251644134521484, |
|
"learning_rate": 4.732383591146869e-05, |
|
"loss": 0.1795, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.946428571428571, |
|
"grad_norm": 8.05550765991211, |
|
"learning_rate": 4.725970686860025e-05, |
|
"loss": 0.191, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.49135711789131165, |
|
"eval_macro_f1": 84.5839261475176, |
|
"eval_macro_precision": 86.58899167373744, |
|
"eval_macro_recall": 83.04834458680612, |
|
"eval_micro_f1": 88.80503144654088, |
|
"eval_micro_precision": 88.80503144654088, |
|
"eval_micro_recall": 88.80503144654088, |
|
"eval_runtime": 1.8149, |
|
"eval_samples_per_second": 876.068, |
|
"eval_steps_per_second": 13.775, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 8.035714285714286, |
|
"grad_norm": 10.807100296020508, |
|
"learning_rate": 4.719486304706687e-05, |
|
"loss": 0.1643, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.125, |
|
"grad_norm": 8.784672737121582, |
|
"learning_rate": 4.712930652906041e-05, |
|
"loss": 0.1144, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 8.214285714285714, |
|
"grad_norm": 18.46906280517578, |
|
"learning_rate": 4.7063039419658035e-05, |
|
"loss": 0.0868, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.303571428571429, |
|
"grad_norm": 6.650496959686279, |
|
"learning_rate": 4.699606384675459e-05, |
|
"loss": 0.1557, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 8.392857142857142, |
|
"grad_norm": 27.389806747436523, |
|
"learning_rate": 4.6928381960994336e-05, |
|
"loss": 0.1858, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.482142857142858, |
|
"grad_norm": 11.773507118225098, |
|
"learning_rate": 4.6859995935701855e-05, |
|
"loss": 0.1233, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 16.25447654724121, |
|
"learning_rate": 4.679090796681225e-05, |
|
"loss": 0.1306, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.660714285714286, |
|
"grad_norm": 14.601356506347656, |
|
"learning_rate": 4.6721120272800646e-05, |
|
"loss": 0.0961, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 9.302750587463379, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 0.1043, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.839285714285714, |
|
"grad_norm": 52.55154800415039, |
|
"learning_rate": 4.657945469558397e-05, |
|
"loss": 0.1102, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"grad_norm": 24.64861488342285, |
|
"learning_rate": 4.6507581361384537e-05, |
|
"loss": 0.1652, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.5782527327537537, |
|
"eval_macro_f1": 83.94912174439733, |
|
"eval_macro_precision": 85.74556651650795, |
|
"eval_macro_recall": 82.54905177982101, |
|
"eval_micro_f1": 88.30188679245283, |
|
"eval_micro_precision": 88.30188679245283, |
|
"eval_micro_recall": 88.30188679245283, |
|
"eval_runtime": 1.916, |
|
"eval_samples_per_second": 829.87, |
|
"eval_steps_per_second": 13.048, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.017857142857142, |
|
"grad_norm": 2.140636920928955, |
|
"learning_rate": 4.643501739992833e-05, |
|
"loss": 0.1599, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 9.107142857142858, |
|
"grad_norm": 14.48595905303955, |
|
"learning_rate": 4.6361765141307645e-05, |
|
"loss": 0.1669, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.196428571428571, |
|
"grad_norm": 18.363910675048828, |
|
"learning_rate": 4.628782693771659e-05, |
|
"loss": 0.1088, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 9.285714285714286, |
|
"grad_norm": 3.3701069355010986, |
|
"learning_rate": 4.6213205163375586e-05, |
|
"loss": 0.0675, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 14.012438774108887, |
|
"learning_rate": 4.613790221445511e-05, |
|
"loss": 0.0949, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 9.464285714285714, |
|
"grad_norm": 7.062801361083984, |
|
"learning_rate": 4.6061920508998735e-05, |
|
"loss": 0.182, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.553571428571429, |
|
"grad_norm": 18.400386810302734, |
|
"learning_rate": 4.59852624868455e-05, |
|
"loss": 0.2805, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 9.642857142857142, |
|
"grad_norm": 11.67214298248291, |
|
"learning_rate": 4.5907930609551584e-05, |
|
"loss": 0.089, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.732142857142858, |
|
"grad_norm": 18.16691017150879, |
|
"learning_rate": 4.582992736031123e-05, |
|
"loss": 0.1596, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 9.821428571428571, |
|
"grad_norm": 6.478634834289551, |
|
"learning_rate": 4.5751255243877015e-05, |
|
"loss": 0.1941, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.910714285714286, |
|
"grad_norm": 5.8572096824646, |
|
"learning_rate": 4.567191678647945e-05, |
|
"loss": 0.152, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 28.061464309692383, |
|
"learning_rate": 4.559191453574582e-05, |
|
"loss": 0.1177, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.5562991499900818, |
|
"eval_macro_f1": 83.77790670583238, |
|
"eval_macro_precision": 83.0857567614838, |
|
"eval_macro_recall": 84.57436534359611, |
|
"eval_micro_f1": 87.35849056603774, |
|
"eval_micro_precision": 87.35849056603774, |
|
"eval_micro_recall": 87.35849056603774, |
|
"eval_runtime": 2.3477, |
|
"eval_samples_per_second": 677.264, |
|
"eval_steps_per_second": 10.649, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.089285714285714, |
|
"grad_norm": 7.564888954162598, |
|
"learning_rate": 4.55112510606184e-05, |
|
"loss": 0.0341, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 10.178571428571429, |
|
"grad_norm": 8.534261703491211, |
|
"learning_rate": 4.542992895127195e-05, |
|
"loss": 0.0521, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.267857142857142, |
|
"grad_norm": 13.397907257080078, |
|
"learning_rate": 4.534795081903056e-05, |
|
"loss": 0.0723, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 10.357142857142858, |
|
"grad_norm": 22.610706329345703, |
|
"learning_rate": 4.526531929628379e-05, |
|
"loss": 0.1207, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.446428571428571, |
|
"grad_norm": 7.134080410003662, |
|
"learning_rate": 4.518203703640214e-05, |
|
"loss": 0.056, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 10.535714285714286, |
|
"grad_norm": 12.124205589294434, |
|
"learning_rate": 4.5098106713651846e-05, |
|
"loss": 0.1325, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.625, |
|
"grad_norm": 4.9503583908081055, |
|
"learning_rate": 4.5013531023109014e-05, |
|
"loss": 0.1044, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 10.714285714285714, |
|
"grad_norm": 19.115802764892578, |
|
"learning_rate": 4.4928312680573064e-05, |
|
"loss": 0.0675, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.803571428571429, |
|
"grad_norm": 18.239246368408203, |
|
"learning_rate": 4.484245442247955e-05, |
|
"loss": 0.1275, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 10.892857142857142, |
|
"grad_norm": 12.322056770324707, |
|
"learning_rate": 4.4755959005812256e-05, |
|
"loss": 0.1087, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.982142857142858, |
|
"grad_norm": 10.249615669250488, |
|
"learning_rate": 4.4668829208014705e-05, |
|
"loss": 0.1236, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.7119177579879761, |
|
"eval_macro_f1": 82.11289781379863, |
|
"eval_macro_precision": 80.6222110582464, |
|
"eval_macro_recall": 84.43458828074213, |
|
"eval_micro_f1": 85.47169811320755, |
|
"eval_micro_precision": 85.47169811320755, |
|
"eval_micro_recall": 85.47169811320755, |
|
"eval_runtime": 2.1826, |
|
"eval_samples_per_second": 728.484, |
|
"eval_steps_per_second": 11.454, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 11.071428571428571, |
|
"grad_norm": 7.2919440269470215, |
|
"learning_rate": 4.458106782690094e-05, |
|
"loss": 0.3132, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.160714285714286, |
|
"grad_norm": 4.609331130981445, |
|
"learning_rate": 4.4492677680565696e-05, |
|
"loss": 0.0392, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"grad_norm": 11.323241233825684, |
|
"learning_rate": 4.440366160729392e-05, |
|
"loss": 0.0863, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.339285714285714, |
|
"grad_norm": 7.759965896606445, |
|
"learning_rate": 4.431402246546962e-05, |
|
"loss": 0.0227, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 10.826987266540527, |
|
"learning_rate": 4.422376313348405e-05, |
|
"loss": 0.0385, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.517857142857142, |
|
"grad_norm": 6.147857189178467, |
|
"learning_rate": 4.413288650964337e-05, |
|
"loss": 0.0684, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 11.607142857142858, |
|
"grad_norm": 6.45582914352417, |
|
"learning_rate": 4.4041395512075464e-05, |
|
"loss": 0.0503, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.696428571428571, |
|
"grad_norm": 23.845369338989258, |
|
"learning_rate": 4.394929307863633e-05, |
|
"loss": 0.0553, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 11.785714285714286, |
|
"grad_norm": 11.343393325805664, |
|
"learning_rate": 4.385658216681569e-05, |
|
"loss": 0.0788, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.875, |
|
"grad_norm": 9.691651344299316, |
|
"learning_rate": 4.3763265753642055e-05, |
|
"loss": 0.1661, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 11.964285714285714, |
|
"grad_norm": 33.286651611328125, |
|
"learning_rate": 4.36693468355871e-05, |
|
"loss": 0.058, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.6721820831298828, |
|
"eval_macro_f1": 84.28322715184908, |
|
"eval_macro_precision": 85.15999991284815, |
|
"eval_macro_recall": 83.51606813145274, |
|
"eval_micro_f1": 88.30188679245283, |
|
"eval_micro_precision": 88.30188679245283, |
|
"eval_micro_recall": 88.30188679245283, |
|
"eval_runtime": 2.0425, |
|
"eval_samples_per_second": 778.444, |
|
"eval_steps_per_second": 12.24, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 12.053571428571429, |
|
"grad_norm": 1.1854312419891357, |
|
"learning_rate": 4.357482842846946e-05, |
|
"loss": 0.0744, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 12.142857142857142, |
|
"grad_norm": 13.661476135253906, |
|
"learning_rate": 4.3479713567357886e-05, |
|
"loss": 0.0436, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.232142857142858, |
|
"grad_norm": 9.265774726867676, |
|
"learning_rate": 4.338400530647382e-05, |
|
"loss": 0.077, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 12.321428571428571, |
|
"grad_norm": 1.9117738008499146, |
|
"learning_rate": 4.328770671909323e-05, |
|
"loss": 0.0637, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.410714285714286, |
|
"grad_norm": 10.00926399230957, |
|
"learning_rate": 4.319082089744804e-05, |
|
"loss": 0.0254, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 9.133126258850098, |
|
"learning_rate": 4.309335095262676e-05, |
|
"loss": 0.0579, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.589285714285714, |
|
"grad_norm": 12.192875862121582, |
|
"learning_rate": 4.299530001447459e-05, |
|
"loss": 0.0787, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 12.678571428571429, |
|
"grad_norm": 9.46296501159668, |
|
"learning_rate": 4.2896671231492966e-05, |
|
"loss": 0.0822, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.767857142857142, |
|
"grad_norm": 20.78971290588379, |
|
"learning_rate": 4.27974677707384e-05, |
|
"loss": 0.0967, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"grad_norm": 4.571549415588379, |
|
"learning_rate": 4.269769281772082e-05, |
|
"loss": 0.1071, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.946428571428571, |
|
"grad_norm": 14.227160453796387, |
|
"learning_rate": 4.259734957630127e-05, |
|
"loss": 0.0767, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.663281261920929, |
|
"eval_macro_f1": 84.36653757838053, |
|
"eval_macro_precision": 86.22744226866327, |
|
"eval_macro_recall": 82.9215483061637, |
|
"eval_micro_f1": 88.61635220125787, |
|
"eval_micro_precision": 88.61635220125787, |
|
"eval_micro_recall": 88.61635220125787, |
|
"eval_runtime": 1.9979, |
|
"eval_samples_per_second": 795.816, |
|
"eval_steps_per_second": 12.513, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 13.035714285714286, |
|
"grad_norm": 9.426419258117676, |
|
"learning_rate": 4.2496441268589046e-05, |
|
"loss": 0.0781, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 13.125, |
|
"grad_norm": 19.891582489013672, |
|
"learning_rate": 4.239497113483819e-05, |
|
"loss": 0.0603, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 13.214285714285714, |
|
"grad_norm": 6.893115043640137, |
|
"learning_rate": 4.22929424333435e-05, |
|
"loss": 0.0334, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.303571428571429, |
|
"grad_norm": 3.4693875312805176, |
|
"learning_rate": 4.219035844033583e-05, |
|
"loss": 0.0515, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 13.392857142857142, |
|
"grad_norm": 9.117530822753906, |
|
"learning_rate": 4.208722244987698e-05, |
|
"loss": 0.0438, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.482142857142858, |
|
"grad_norm": 7.665452480316162, |
|
"learning_rate": 4.198353777375384e-05, |
|
"loss": 0.0323, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 13.571428571428571, |
|
"grad_norm": 9.480864524841309, |
|
"learning_rate": 4.187930774137209e-05, |
|
"loss": 0.04, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.660714285714286, |
|
"grad_norm": 8.460432052612305, |
|
"learning_rate": 4.1774535699649255e-05, |
|
"loss": 0.035, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"grad_norm": 0.8143876791000366, |
|
"learning_rate": 4.166922501290729e-05, |
|
"loss": 0.0417, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.839285714285714, |
|
"grad_norm": 18.344676971435547, |
|
"learning_rate": 4.156337906276449e-05, |
|
"loss": 0.1389, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 13.928571428571429, |
|
"grad_norm": 15.893628120422363, |
|
"learning_rate": 4.145700124802693e-05, |
|
"loss": 0.0607, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.6969339847564697, |
|
"eval_macro_f1": 85.3983643196325, |
|
"eval_macro_precision": 85.17815944629582, |
|
"eval_macro_recall": 85.62705485782409, |
|
"eval_micro_f1": 88.80503144654088, |
|
"eval_micro_precision": 88.80503144654088, |
|
"eval_micro_recall": 88.80503144654088, |
|
"eval_runtime": 2.0363, |
|
"eval_samples_per_second": 780.832, |
|
"eval_steps_per_second": 12.277, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 14.017857142857142, |
|
"grad_norm": 3.4685308933258057, |
|
"learning_rate": 4.135009498457931e-05, |
|
"loss": 0.0951, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 14.107142857142858, |
|
"grad_norm": 5.312774658203125, |
|
"learning_rate": 4.124266370527531e-05, |
|
"loss": 0.017, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.196428571428571, |
|
"grad_norm": 16.61371421813965, |
|
"learning_rate": 4.11347108598273e-05, |
|
"loss": 0.0694, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.9555211663246155, |
|
"learning_rate": 4.1026239914695617e-05, |
|
"loss": 0.016, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.375, |
|
"grad_norm": 11.234779357910156, |
|
"learning_rate": 4.0917254352977206e-05, |
|
"loss": 0.0538, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 14.464285714285714, |
|
"grad_norm": 21.127065658569336, |
|
"learning_rate": 4.0807757674293834e-05, |
|
"loss": 0.1221, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.553571428571429, |
|
"grad_norm": 19.199129104614258, |
|
"learning_rate": 4.069775339467966e-05, |
|
"loss": 0.1065, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 14.642857142857142, |
|
"grad_norm": 20.038087844848633, |
|
"learning_rate": 4.058724504646834e-05, |
|
"loss": 0.0733, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.732142857142858, |
|
"grad_norm": 9.910551071166992, |
|
"learning_rate": 4.047623617817965e-05, |
|
"loss": 0.0645, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 14.821428571428571, |
|
"grad_norm": 13.347238540649414, |
|
"learning_rate": 4.0364730354405475e-05, |
|
"loss": 0.1127, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.910714285714286, |
|
"grad_norm": 39.92618942260742, |
|
"learning_rate": 4.0252731155695396e-05, |
|
"loss": 0.0883, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 8.375712394714355, |
|
"learning_rate": 4.014024217844167e-05, |
|
"loss": 0.066, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.9945361614227295, |
|
"eval_macro_f1": 83.19661865450335, |
|
"eval_macro_precision": 89.30070883315157, |
|
"eval_macro_recall": 79.93991455529917, |
|
"eval_micro_f1": 88.61635220125787, |
|
"eval_micro_precision": 88.61635220125787, |
|
"eval_micro_recall": 88.61635220125787, |
|
"eval_runtime": 1.747, |
|
"eval_samples_per_second": 910.121, |
|
"eval_steps_per_second": 14.31, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 15.089285714285714, |
|
"grad_norm": 1.6275001764297485, |
|
"learning_rate": 4.0027267034763796e-05, |
|
"loss": 0.0499, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 15.178571428571429, |
|
"grad_norm": 11.117130279541016, |
|
"learning_rate": 3.9913809352392474e-05, |
|
"loss": 0.0465, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.267857142857142, |
|
"grad_norm": 1.5368372201919556, |
|
"learning_rate": 3.979987277455317e-05, |
|
"loss": 0.031, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 15.357142857142858, |
|
"grad_norm": 2.8059964179992676, |
|
"learning_rate": 3.9685460959849105e-05, |
|
"loss": 0.0134, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.446428571428571, |
|
"grad_norm": 0.37871724367141724, |
|
"learning_rate": 3.9570577582143756e-05, |
|
"loss": 0.026, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 15.535714285714286, |
|
"grad_norm": 4.849483489990234, |
|
"learning_rate": 3.945522633044289e-05, |
|
"loss": 0.0582, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 4.785881996154785, |
|
"learning_rate": 3.933941090877615e-05, |
|
"loss": 0.0239, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 15.714285714285714, |
|
"grad_norm": 5.867705821990967, |
|
"learning_rate": 3.9223135036078064e-05, |
|
"loss": 0.0506, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.803571428571429, |
|
"grad_norm": 5.988280296325684, |
|
"learning_rate": 3.910640244606863e-05, |
|
"loss": 0.0406, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 15.892857142857142, |
|
"grad_norm": 10.76251220703125, |
|
"learning_rate": 3.898921688713346e-05, |
|
"loss": 0.033, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.982142857142858, |
|
"grad_norm": 10.54697322845459, |
|
"learning_rate": 3.88715821222034e-05, |
|
"loss": 0.0474, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.8277662992477417, |
|
"eval_macro_f1": 84.62665166292602, |
|
"eval_macro_precision": 84.3093535297127, |
|
"eval_macro_recall": 84.96264650110804, |
|
"eval_micro_f1": 88.17610062893083, |
|
"eval_micro_precision": 88.17610062893083, |
|
"eval_micro_recall": 88.17610062893083, |
|
"eval_runtime": 1.7038, |
|
"eval_samples_per_second": 933.188, |
|
"eval_steps_per_second": 14.673, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 16.071428571428573, |
|
"grad_norm": 0.2526906728744507, |
|
"learning_rate": 3.875350192863368e-05, |
|
"loss": 0.028, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.160714285714285, |
|
"grad_norm": 4.583995819091797, |
|
"learning_rate": 3.863498009808263e-05, |
|
"loss": 0.0262, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"grad_norm": 2.2302212715148926, |
|
"learning_rate": 3.851602043638994e-05, |
|
"loss": 0.0297, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.339285714285715, |
|
"grad_norm": 4.950682163238525, |
|
"learning_rate": 3.839662676345445e-05, |
|
"loss": 0.0802, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 16.428571428571427, |
|
"grad_norm": 1.306373953819275, |
|
"learning_rate": 3.827680291311143e-05, |
|
"loss": 0.0683, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.517857142857142, |
|
"grad_norm": 3.978598117828369, |
|
"learning_rate": 3.81565527330096e-05, |
|
"loss": 0.0467, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 16.607142857142858, |
|
"grad_norm": 31.76022720336914, |
|
"learning_rate": 3.803588008448745e-05, |
|
"loss": 0.0599, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.696428571428573, |
|
"grad_norm": 10.791604042053223, |
|
"learning_rate": 3.791478884244931e-05, |
|
"loss": 0.0811, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 16.785714285714285, |
|
"grad_norm": 7.506629467010498, |
|
"learning_rate": 3.7793282895240926e-05, |
|
"loss": 0.2063, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.875, |
|
"grad_norm": 2.9035871028900146, |
|
"learning_rate": 3.767136614452458e-05, |
|
"loss": 0.1391, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 16.964285714285715, |
|
"grad_norm": 7.189354419708252, |
|
"learning_rate": 3.75490425051538e-05, |
|
"loss": 0.0634, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.7015231847763062, |
|
"eval_macro_f1": 83.68481902838367, |
|
"eval_macro_precision": 83.01537542916853, |
|
"eval_macro_recall": 84.45151522074599, |
|
"eval_micro_f1": 87.29559748427673, |
|
"eval_micro_precision": 87.29559748427673, |
|
"eval_micro_recall": 87.29559748427673, |
|
"eval_runtime": 1.6913, |
|
"eval_samples_per_second": 940.118, |
|
"eval_steps_per_second": 14.782, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 17.053571428571427, |
|
"grad_norm": 3.729951858520508, |
|
"learning_rate": 3.7426315905047696e-05, |
|
"loss": 0.0609, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 2.013429880142212, |
|
"learning_rate": 3.7303190285064776e-05, |
|
"loss": 0.0077, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.232142857142858, |
|
"grad_norm": 1.032761573791504, |
|
"learning_rate": 3.717966959887643e-05, |
|
"loss": 0.0287, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 17.321428571428573, |
|
"grad_norm": 10.677305221557617, |
|
"learning_rate": 3.705575781283999e-05, |
|
"loss": 0.0242, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.410714285714285, |
|
"grad_norm": 3.170926809310913, |
|
"learning_rate": 3.6931458905871314e-05, |
|
"loss": 0.0576, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 1.3387705087661743, |
|
"learning_rate": 3.680677686931707e-05, |
|
"loss": 0.0022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.589285714285715, |
|
"grad_norm": 8.100290298461914, |
|
"learning_rate": 3.668171570682655e-05, |
|
"loss": 0.0199, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 17.678571428571427, |
|
"grad_norm": 4.04311990737915, |
|
"learning_rate": 3.6556279434223116e-05, |
|
"loss": 0.0149, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.767857142857142, |
|
"grad_norm": 0.5880358815193176, |
|
"learning_rate": 3.6430472079375234e-05, |
|
"loss": 0.0169, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 17.857142857142858, |
|
"grad_norm": 1.5214190483093262, |
|
"learning_rate": 3.6304297682067144e-05, |
|
"loss": 0.0209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.946428571428573, |
|
"grad_norm": 8.436260223388672, |
|
"learning_rate": 3.617776029386916e-05, |
|
"loss": 0.0188, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.9059441089630127, |
|
"eval_macro_f1": 85.13226593607345, |
|
"eval_macro_precision": 85.78507737593169, |
|
"eval_macro_recall": 84.54113454113454, |
|
"eval_micro_f1": 88.86792452830188, |
|
"eval_micro_precision": 88.86792452830188, |
|
"eval_micro_recall": 88.86792452830188, |
|
"eval_runtime": 1.7254, |
|
"eval_samples_per_second": 921.533, |
|
"eval_steps_per_second": 14.49, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 18.035714285714285, |
|
"grad_norm": 0.06204601749777794, |
|
"learning_rate": 3.605086397800753e-05, |
|
"loss": 0.0242, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 18.125, |
|
"grad_norm": 0.5178263783454895, |
|
"learning_rate": 3.592361280923399e-05, |
|
"loss": 0.0073, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 18.214285714285715, |
|
"grad_norm": 2.0144951343536377, |
|
"learning_rate": 3.579601087369492e-05, |
|
"loss": 0.0149, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.303571428571427, |
|
"grad_norm": 1.788545846939087, |
|
"learning_rate": 3.566806226880012e-05, |
|
"loss": 0.0193, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 18.392857142857142, |
|
"grad_norm": 5.27187442779541, |
|
"learning_rate": 3.553977110309125e-05, |
|
"loss": 0.0089, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.482142857142858, |
|
"grad_norm": 0.5820537209510803, |
|
"learning_rate": 3.5411141496109904e-05, |
|
"loss": 0.0248, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 18.571428571428573, |
|
"grad_norm": 5.2609710693359375, |
|
"learning_rate": 3.5282177578265296e-05, |
|
"loss": 0.0329, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.660714285714285, |
|
"grad_norm": 9.395613670349121, |
|
"learning_rate": 3.5152883490701684e-05, |
|
"loss": 0.0277, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 16.66202735900879, |
|
"learning_rate": 3.502326338516534e-05, |
|
"loss": 0.035, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.839285714285715, |
|
"grad_norm": 4.464576721191406, |
|
"learning_rate": 3.48933214238713e-05, |
|
"loss": 0.0427, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 18.928571428571427, |
|
"grad_norm": 2.8455142974853516, |
|
"learning_rate": 3.476306177936961e-05, |
|
"loss": 0.028, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.9811861515045166, |
|
"eval_macro_f1": 85.30339277946933, |
|
"eval_macro_precision": 87.2576209004239, |
|
"eval_macro_recall": 83.78939148169917, |
|
"eval_micro_f1": 89.30817610062893, |
|
"eval_micro_precision": 89.30817610062893, |
|
"eval_micro_recall": 89.30817610062893, |
|
"eval_runtime": 1.8833, |
|
"eval_samples_per_second": 844.274, |
|
"eval_steps_per_second": 13.275, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 19.017857142857142, |
|
"grad_norm": 0.4936154782772064, |
|
"learning_rate": 3.463248863441145e-05, |
|
"loss": 0.0573, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 19.107142857142858, |
|
"grad_norm": 7.516551971435547, |
|
"learning_rate": 3.450160618181476e-05, |
|
"loss": 0.0142, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 19.196428571428573, |
|
"grad_norm": 0.28197282552719116, |
|
"learning_rate": 3.43704186243296e-05, |
|
"loss": 0.0059, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 19.285714285714285, |
|
"grad_norm": 0.0721740797162056, |
|
"learning_rate": 3.4238930174503245e-05, |
|
"loss": 0.0043, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.375, |
|
"grad_norm": 11.249062538146973, |
|
"learning_rate": 3.4107145054544857e-05, |
|
"loss": 0.0968, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 19.464285714285715, |
|
"grad_norm": 1.9606690406799316, |
|
"learning_rate": 3.3975067496189965e-05, |
|
"loss": 0.0169, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.553571428571427, |
|
"grad_norm": 16.80199432373047, |
|
"learning_rate": 3.3842701740564534e-05, |
|
"loss": 0.0422, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 19.642857142857142, |
|
"grad_norm": 14.884848594665527, |
|
"learning_rate": 3.37100520380488e-05, |
|
"loss": 0.0665, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.732142857142858, |
|
"grad_norm": 8.680991172790527, |
|
"learning_rate": 3.357712264814077e-05, |
|
"loss": 0.0675, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 19.821428571428573, |
|
"grad_norm": 4.685244560241699, |
|
"learning_rate": 3.344391783931947e-05, |
|
"loss": 0.0494, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.910714285714285, |
|
"grad_norm": 10.966636657714844, |
|
"learning_rate": 3.331044188890788e-05, |
|
"loss": 0.0193, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 18.55583381652832, |
|
"learning_rate": 3.3176699082935545e-05, |
|
"loss": 0.0704, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.9311222434043884, |
|
"eval_macro_f1": 84.58923756150028, |
|
"eval_macro_precision": 84.91129891883661, |
|
"eval_macro_recall": 84.28359582205735, |
|
"eval_micro_f1": 88.36477987421384, |
|
"eval_micro_precision": 88.36477987421384, |
|
"eval_micro_recall": 88.36477987421384, |
|
"eval_runtime": 1.7297, |
|
"eval_samples_per_second": 919.235, |
|
"eval_steps_per_second": 14.453, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 20.089285714285715, |
|
"grad_norm": 0.6181861758232117, |
|
"learning_rate": 3.304269371600099e-05, |
|
"loss": 0.0264, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 20.178571428571427, |
|
"grad_norm": 0.6055905818939209, |
|
"learning_rate": 3.290843009113382e-05, |
|
"loss": 0.0312, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.267857142857142, |
|
"grad_norm": 4.4057111740112305, |
|
"learning_rate": 3.277391251965649e-05, |
|
"loss": 0.0124, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 20.357142857142858, |
|
"grad_norm": 3.0049655437469482, |
|
"learning_rate": 3.263914532104593e-05, |
|
"loss": 0.0175, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.446428571428573, |
|
"grad_norm": 10.01473331451416, |
|
"learning_rate": 3.250413282279482e-05, |
|
"loss": 0.0172, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 20.535714285714285, |
|
"grad_norm": 3.3975746631622314, |
|
"learning_rate": 3.2368879360272606e-05, |
|
"loss": 0.0223, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.625, |
|
"grad_norm": 3.1504733562469482, |
|
"learning_rate": 3.223338927658632e-05, |
|
"loss": 0.0046, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 20.714285714285715, |
|
"grad_norm": 7.759596347808838, |
|
"learning_rate": 3.20976669224411e-05, |
|
"loss": 0.0194, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.803571428571427, |
|
"grad_norm": 2.1500484943389893, |
|
"learning_rate": 3.196171665600051e-05, |
|
"loss": 0.0087, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 20.892857142857142, |
|
"grad_norm": 3.8775603771209717, |
|
"learning_rate": 3.182554284274654e-05, |
|
"loss": 0.0191, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.982142857142858, |
|
"grad_norm": 5.29668664932251, |
|
"learning_rate": 3.1689149855339496e-05, |
|
"loss": 0.0363, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.9204599261283875, |
|
"eval_macro_f1": 85.419711590922, |
|
"eval_macro_precision": 84.96998284734134, |
|
"eval_macro_recall": 85.90712821482052, |
|
"eval_micro_f1": 88.74213836477988, |
|
"eval_micro_precision": 88.74213836477988, |
|
"eval_micro_recall": 88.74213836477988, |
|
"eval_runtime": 1.7455, |
|
"eval_samples_per_second": 910.893, |
|
"eval_steps_per_second": 14.322, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 21.071428571428573, |
|
"grad_norm": 1.5591216087341309, |
|
"learning_rate": 3.1552542073477555e-05, |
|
"loss": 0.0155, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 21.160714285714285, |
|
"grad_norm": 11.346221923828125, |
|
"learning_rate": 3.141572388375612e-05, |
|
"loss": 0.0071, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"grad_norm": 0.09788035601377487, |
|
"learning_rate": 3.127869967952698e-05, |
|
"loss": 0.0172, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.339285714285715, |
|
"grad_norm": 0.4548446238040924, |
|
"learning_rate": 3.114147386075724e-05, |
|
"loss": 0.0103, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 16.57025718688965, |
|
"learning_rate": 3.1004050833887985e-05, |
|
"loss": 0.0392, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.517857142857142, |
|
"grad_norm": 1.1993194818496704, |
|
"learning_rate": 3.0866435011692885e-05, |
|
"loss": 0.025, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 21.607142857142858, |
|
"grad_norm": 1.881464958190918, |
|
"learning_rate": 3.072863081313639e-05, |
|
"loss": 0.0096, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.696428571428573, |
|
"grad_norm": 13.144051551818848, |
|
"learning_rate": 3.05906426632319e-05, |
|
"loss": 0.0171, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 21.785714285714285, |
|
"grad_norm": 0.2325822114944458, |
|
"learning_rate": 3.0452474992899643e-05, |
|
"loss": 0.0099, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"grad_norm": 1.384522557258606, |
|
"learning_rate": 3.0314132238824415e-05, |
|
"loss": 0.0126, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 21.964285714285715, |
|
"grad_norm": 0.3896070718765259, |
|
"learning_rate": 3.017561884331311e-05, |
|
"loss": 0.0025, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.9775845408439636, |
|
"eval_macro_f1": 85.79642633816226, |
|
"eval_macro_precision": 87.86862854659465, |
|
"eval_macro_recall": 84.20415343492267, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 1.7115, |
|
"eval_samples_per_second": 929.005, |
|
"eval_steps_per_second": 14.607, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 22.053571428571427, |
|
"grad_norm": 15.109649658203125, |
|
"learning_rate": 3.003693925415204e-05, |
|
"loss": 0.0147, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 22.142857142857142, |
|
"grad_norm": 0.29477667808532715, |
|
"learning_rate": 2.989809792446417e-05, |
|
"loss": 0.0515, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 22.232142857142858, |
|
"grad_norm": 0.05692288279533386, |
|
"learning_rate": 2.9759099312566076e-05, |
|
"loss": 0.0004, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 22.321428571428573, |
|
"grad_norm": 2.0338664054870605, |
|
"learning_rate": 2.9619947881824818e-05, |
|
"loss": 0.0109, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.410714285714285, |
|
"grad_norm": 0.07057174295186996, |
|
"learning_rate": 2.9480648100514586e-05, |
|
"loss": 0.0127, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"grad_norm": 0.08349260687828064, |
|
"learning_rate": 2.9341204441673266e-05, |
|
"loss": 0.0258, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.589285714285715, |
|
"grad_norm": 0.5570873022079468, |
|
"learning_rate": 2.9201621382958733e-05, |
|
"loss": 0.002, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 22.678571428571427, |
|
"grad_norm": 0.06609360128641129, |
|
"learning_rate": 2.9061903406505154e-05, |
|
"loss": 0.0055, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.767857142857142, |
|
"grad_norm": 0.501964807510376, |
|
"learning_rate": 2.8922054998778998e-05, |
|
"loss": 0.0068, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 0.03342385217547417, |
|
"learning_rate": 2.8782080650435006e-05, |
|
"loss": 0.0181, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.946428571428573, |
|
"grad_norm": 6.850861072540283, |
|
"learning_rate": 2.864198485617199e-05, |
|
"loss": 0.0188, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.1122395992279053, |
|
"eval_macro_f1": 84.66160439893609, |
|
"eval_macro_precision": 87.28755884076602, |
|
"eval_macro_recall": 82.772217387602, |
|
"eval_micro_f1": 88.9937106918239, |
|
"eval_micro_precision": 88.9937106918239, |
|
"eval_micro_recall": 88.9937106918239, |
|
"eval_runtime": 1.7968, |
|
"eval_samples_per_second": 884.902, |
|
"eval_steps_per_second": 13.914, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 23.035714285714285, |
|
"grad_norm": 12.33535099029541, |
|
"learning_rate": 2.8501772114588476e-05, |
|
"loss": 0.0167, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 23.125, |
|
"grad_norm": 0.05313009023666382, |
|
"learning_rate": 2.8361446928038298e-05, |
|
"loss": 0.004, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 23.214285714285715, |
|
"grad_norm": 0.664737343788147, |
|
"learning_rate": 2.8221013802485975e-05, |
|
"loss": 0.0042, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.303571428571427, |
|
"grad_norm": 10.9341459274292, |
|
"learning_rate": 2.808047724736204e-05, |
|
"loss": 0.0077, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 23.392857142857142, |
|
"grad_norm": 8.750741004943848, |
|
"learning_rate": 2.793984177541827e-05, |
|
"loss": 0.0064, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.482142857142858, |
|
"grad_norm": 0.8044894933700562, |
|
"learning_rate": 2.7799111902582696e-05, |
|
"loss": 0.0068, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 23.571428571428573, |
|
"grad_norm": 8.937823295593262, |
|
"learning_rate": 2.76582921478147e-05, |
|
"loss": 0.0121, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.660714285714285, |
|
"grad_norm": 0.01974612846970558, |
|
"learning_rate": 2.7517387032959813e-05, |
|
"loss": 0.0043, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"grad_norm": 1.4588861465454102, |
|
"learning_rate": 2.7376401082604564e-05, |
|
"loss": 0.0066, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.839285714285715, |
|
"grad_norm": 0.37790974974632263, |
|
"learning_rate": 2.72353388239312e-05, |
|
"loss": 0.0006, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 23.928571428571427, |
|
"grad_norm": 1.2444077730178833, |
|
"learning_rate": 2.7094204786572254e-05, |
|
"loss": 0.0282, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.0914798974990845, |
|
"eval_macro_f1": 85.26953769339522, |
|
"eval_macro_precision": 87.64161596177536, |
|
"eval_macro_recall": 83.50931812470273, |
|
"eval_micro_f1": 89.37106918238995, |
|
"eval_micro_precision": 89.37106918238995, |
|
"eval_micro_recall": 89.37106918238995, |
|
"eval_runtime": 1.7496, |
|
"eval_samples_per_second": 908.769, |
|
"eval_steps_per_second": 14.289, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 24.017857142857142, |
|
"grad_norm": 0.005385238211601973, |
|
"learning_rate": 2.6953003502465168e-05, |
|
"loss": 0.0042, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 24.107142857142858, |
|
"grad_norm": 0.1486300677061081, |
|
"learning_rate": 2.681173950570674e-05, |
|
"loss": 0.0042, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.196428571428573, |
|
"grad_norm": 0.11711076647043228, |
|
"learning_rate": 2.6670417332407487e-05, |
|
"loss": 0.0022, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 24.285714285714285, |
|
"grad_norm": 0.18978235125541687, |
|
"learning_rate": 2.652904152054607e-05, |
|
"loss": 0.003, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.375, |
|
"grad_norm": 8.865602493286133, |
|
"learning_rate": 2.6387616609823507e-05, |
|
"loss": 0.005, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 24.464285714285715, |
|
"grad_norm": 0.7902134656906128, |
|
"learning_rate": 2.624614714151743e-05, |
|
"loss": 0.0006, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.553571428571427, |
|
"grad_norm": 0.005069936625659466, |
|
"learning_rate": 2.610463765833625e-05, |
|
"loss": 0.0032, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 24.642857142857142, |
|
"grad_norm": 0.02278885804116726, |
|
"learning_rate": 2.59630927042733e-05, |
|
"loss": 0.0009, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.732142857142858, |
|
"grad_norm": 0.06174265593290329, |
|
"learning_rate": 2.5821516824460905e-05, |
|
"loss": 0.0033, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 24.821428571428573, |
|
"grad_norm": 0.04255477339029312, |
|
"learning_rate": 2.5679914565024443e-05, |
|
"loss": 0.0065, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.910714285714285, |
|
"grad_norm": 0.4989578127861023, |
|
"learning_rate": 2.5538290472936372e-05, |
|
"loss": 0.0077, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.17359009385108948, |
|
"learning_rate": 2.5396649095870202e-05, |
|
"loss": 0.0136, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.1381731033325195, |
|
"eval_macro_f1": 84.72942532348473, |
|
"eval_macro_precision": 86.83333756629393, |
|
"eval_macro_recall": 83.13287544056776, |
|
"eval_micro_f1": 88.93081761006289, |
|
"eval_micro_precision": 88.93081761006289, |
|
"eval_micro_recall": 88.93081761006289, |
|
"eval_runtime": 1.7399, |
|
"eval_samples_per_second": 913.828, |
|
"eval_steps_per_second": 14.368, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.089285714285715, |
|
"grad_norm": 0.8178830742835999, |
|
"learning_rate": 2.5254994982054493e-05, |
|
"loss": 0.0003, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 25.178571428571427, |
|
"grad_norm": 2.3602683544158936, |
|
"learning_rate": 2.5113332680126795e-05, |
|
"loss": 0.001, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 25.267857142857142, |
|
"grad_norm": 0.004060968291014433, |
|
"learning_rate": 2.4971666738987563e-05, |
|
"loss": 0.0002, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 25.357142857142858, |
|
"grad_norm": 0.6710391044616699, |
|
"learning_rate": 2.4830001707654134e-05, |
|
"loss": 0.0003, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.446428571428573, |
|
"grad_norm": 0.008804717101156712, |
|
"learning_rate": 2.4688342135114627e-05, |
|
"loss": 0.0054, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 25.535714285714285, |
|
"grad_norm": 0.4956241250038147, |
|
"learning_rate": 2.4546692570181863e-05, |
|
"loss": 0.0035, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.625, |
|
"grad_norm": 0.04511274769902229, |
|
"learning_rate": 2.4405057561347315e-05, |
|
"loss": 0.0004, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.032900311052799225, |
|
"learning_rate": 2.4263441656635053e-05, |
|
"loss": 0.0038, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.803571428571427, |
|
"grad_norm": 0.15933604538440704, |
|
"learning_rate": 2.4121849403455688e-05, |
|
"loss": 0.001, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 25.892857142857142, |
|
"grad_norm": 0.1360047459602356, |
|
"learning_rate": 2.3980285348460363e-05, |
|
"loss": 0.002, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.982142857142858, |
|
"grad_norm": 0.02792578749358654, |
|
"learning_rate": 2.3838754037394757e-05, |
|
"loss": 0.0, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.164141058921814, |
|
"eval_macro_f1": 85.84761272086648, |
|
"eval_macro_precision": 87.74020642071049, |
|
"eval_macro_recall": 84.36532282686129, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 1.9014, |
|
"eval_samples_per_second": 836.217, |
|
"eval_steps_per_second": 13.148, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 26.071428571428573, |
|
"grad_norm": 0.0013366724597290158, |
|
"learning_rate": 2.3697260014953108e-05, |
|
"loss": 0.0001, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 26.160714285714285, |
|
"grad_norm": 0.5680537223815918, |
|
"learning_rate": 2.3555807824632285e-05, |
|
"loss": 0.0053, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"grad_norm": 0.0030330184381455183, |
|
"learning_rate": 2.3414402008585888e-05, |
|
"loss": 0.0008, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 26.339285714285715, |
|
"grad_norm": 0.0012838690308853984, |
|
"learning_rate": 2.327304710747841e-05, |
|
"loss": 0.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 26.428571428571427, |
|
"grad_norm": 0.006956954021006823, |
|
"learning_rate": 2.3131747660339394e-05, |
|
"loss": 0.0014, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.517857142857142, |
|
"grad_norm": 0.06738751381635666, |
|
"learning_rate": 2.2990508204417742e-05, |
|
"loss": 0.0004, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 26.607142857142858, |
|
"grad_norm": 0.01422626618295908, |
|
"learning_rate": 2.2849333275035964e-05, |
|
"loss": 0.0, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.696428571428573, |
|
"grad_norm": 0.004991587717086077, |
|
"learning_rate": 2.270822740544457e-05, |
|
"loss": 0.0, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 26.785714285714285, |
|
"grad_norm": 0.001760053331963718, |
|
"learning_rate": 2.2567195126676507e-05, |
|
"loss": 0.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.875, |
|
"grad_norm": 0.0031189576257020235, |
|
"learning_rate": 2.242624096740164e-05, |
|
"loss": 0.0, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 26.964285714285715, |
|
"grad_norm": 0.001600801246240735, |
|
"learning_rate": 2.2285369453781364e-05, |
|
"loss": 0.0, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.1644015312194824, |
|
"eval_macro_f1": 85.87021885704534, |
|
"eval_macro_precision": 87.99308755760369, |
|
"eval_macro_recall": 84.24641886180348, |
|
"eval_micro_f1": 89.74842767295598, |
|
"eval_micro_precision": 89.74842767295598, |
|
"eval_micro_recall": 89.74842767295598, |
|
"eval_runtime": 1.7672, |
|
"eval_samples_per_second": 899.738, |
|
"eval_steps_per_second": 14.147, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 27.053571428571427, |
|
"grad_norm": 0.0008946519810706377, |
|
"learning_rate": 2.214458510932325e-05, |
|
"loss": 0.0011, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 27.142857142857142, |
|
"grad_norm": 0.002819470362737775, |
|
"learning_rate": 2.2003892454735786e-05, |
|
"loss": 0.0001, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 27.232142857142858, |
|
"grad_norm": 0.002619238570332527, |
|
"learning_rate": 2.1863296007783206e-05, |
|
"loss": 0.0008, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 27.321428571428573, |
|
"grad_norm": 0.0019296056125313044, |
|
"learning_rate": 2.172280028314045e-05, |
|
"loss": 0.0059, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.410714285714285, |
|
"grad_norm": 0.0006752462941221893, |
|
"learning_rate": 2.158240979224817e-05, |
|
"loss": 0.0, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"grad_norm": 0.002963811159133911, |
|
"learning_rate": 2.1442129043167874e-05, |
|
"loss": 0.0, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.589285714285715, |
|
"grad_norm": 0.0020487557630985975, |
|
"learning_rate": 2.1301962540437164e-05, |
|
"loss": 0.0, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 27.678571428571427, |
|
"grad_norm": 0.004336291924118996, |
|
"learning_rate": 2.1161914784925083e-05, |
|
"loss": 0.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.767857142857142, |
|
"grad_norm": 0.002049487316980958, |
|
"learning_rate": 2.102199027368761e-05, |
|
"loss": 0.0, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 27.857142857142858, |
|
"grad_norm": 0.008441206067800522, |
|
"learning_rate": 2.088219349982323e-05, |
|
"loss": 0.0, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.946428571428573, |
|
"grad_norm": 0.0020169492345303297, |
|
"learning_rate": 2.0742528952328673e-05, |
|
"loss": 0.0, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.1838983297348022, |
|
"eval_macro_f1": 85.74461897087475, |
|
"eval_macro_precision": 88.00197532696066, |
|
"eval_macro_recall": 84.04298404298405, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 2.4777, |
|
"eval_samples_per_second": 641.733, |
|
"eval_steps_per_second": 10.09, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 28.035714285714285, |
|
"grad_norm": 0.21369871497154236, |
|
"learning_rate": 2.0603001115954774e-05, |
|
"loss": 0.0026, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"grad_norm": 0.001929258112795651, |
|
"learning_rate": 2.0463614471062435e-05, |
|
"loss": 0.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 28.214285714285715, |
|
"grad_norm": 0.0026586749590933323, |
|
"learning_rate": 2.0324373493478804e-05, |
|
"loss": 0.005, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 28.303571428571427, |
|
"grad_norm": 0.021981006488204002, |
|
"learning_rate": 2.0185282654353493e-05, |
|
"loss": 0.0, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 28.392857142857142, |
|
"grad_norm": 0.005900249350816011, |
|
"learning_rate": 2.0046346420015067e-05, |
|
"loss": 0.0, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.482142857142858, |
|
"grad_norm": 0.0033512930385768414, |
|
"learning_rate": 1.990756925182756e-05, |
|
"loss": 0.0, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.0007393535925075412, |
|
"learning_rate": 1.976895560604729e-05, |
|
"loss": 0.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.660714285714285, |
|
"grad_norm": 0.2156071811914444, |
|
"learning_rate": 1.9630509933679704e-05, |
|
"loss": 0.0028, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"grad_norm": 0.0010669779730960727, |
|
"learning_rate": 1.9492236680336485e-05, |
|
"loss": 0.0, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.839285714285715, |
|
"grad_norm": 0.0025355510879307985, |
|
"learning_rate": 1.9354140286092785e-05, |
|
"loss": 0.0, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 28.928571428571427, |
|
"grad_norm": 0.004663623869419098, |
|
"learning_rate": 1.9216225185344662e-05, |
|
"loss": 0.0, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.169285535812378, |
|
"eval_macro_f1": 85.77409578612829, |
|
"eval_macro_precision": 87.61836905650758, |
|
"eval_macro_recall": 84.32305739998047, |
|
"eval_micro_f1": 89.62264150943396, |
|
"eval_micro_precision": 89.62264150943396, |
|
"eval_micro_recall": 89.62264150943396, |
|
"eval_runtime": 1.9141, |
|
"eval_samples_per_second": 830.659, |
|
"eval_steps_per_second": 13.061, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 29.017857142857142, |
|
"grad_norm": 0.0022395530249923468, |
|
"learning_rate": 1.907849580666668e-05, |
|
"loss": 0.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 29.107142857142858, |
|
"grad_norm": 0.0007931589498184621, |
|
"learning_rate": 1.8940956572669692e-05, |
|
"loss": 0.0006, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 29.196428571428573, |
|
"grad_norm": 0.0019468627870082855, |
|
"learning_rate": 1.880361189985886e-05, |
|
"loss": 0.0, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 29.285714285714285, |
|
"grad_norm": 0.0028856031130999327, |
|
"learning_rate": 1.8666466198491795e-05, |
|
"loss": 0.0, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 29.375, |
|
"grad_norm": 0.0021576446015387774, |
|
"learning_rate": 1.852952387243698e-05, |
|
"loss": 0.0, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 29.464285714285715, |
|
"grad_norm": 0.0026545205619186163, |
|
"learning_rate": 1.8392789319032328e-05, |
|
"loss": 0.0009, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.553571428571427, |
|
"grad_norm": 0.0022205617278814316, |
|
"learning_rate": 1.8256266928943988e-05, |
|
"loss": 0.0066, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 29.642857142857142, |
|
"grad_norm": 0.001808985136449337, |
|
"learning_rate": 1.8119961086025374e-05, |
|
"loss": 0.0, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.732142857142858, |
|
"grad_norm": 0.0015430036000907421, |
|
"learning_rate": 1.7983876167176362e-05, |
|
"loss": 0.0, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 29.821428571428573, |
|
"grad_norm": 0.002092926762998104, |
|
"learning_rate": 1.7848016542202767e-05, |
|
"loss": 0.0, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.910714285714285, |
|
"grad_norm": 0.001246055937372148, |
|
"learning_rate": 1.7712386573676044e-05, |
|
"loss": 0.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.001110477140173316, |
|
"learning_rate": 1.7576990616793137e-05, |
|
"loss": 0.0, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.193253755569458, |
|
"eval_macro_f1": 85.4909143681396, |
|
"eval_macro_precision": 88.02490672890218, |
|
"eval_macro_recall": 83.63611440534517, |
|
"eval_micro_f1": 89.55974842767296, |
|
"eval_micro_precision": 89.55974842767296, |
|
"eval_micro_recall": 89.55974842767296, |
|
"eval_runtime": 2.0785, |
|
"eval_samples_per_second": 764.987, |
|
"eval_steps_per_second": 12.028, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 30.089285714285715, |
|
"grad_norm": 0.015624803490936756, |
|
"learning_rate": 1.7441833019236704e-05, |
|
"loss": 0.0011, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 30.178571428571427, |
|
"grad_norm": 0.0003042487951461226, |
|
"learning_rate": 1.730691812103546e-05, |
|
"loss": 0.0, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 30.267857142857142, |
|
"grad_norm": 0.0016463997308164835, |
|
"learning_rate": 1.717225025442485e-05, |
|
"loss": 0.0, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 30.357142857142858, |
|
"grad_norm": 0.0009225396788679063, |
|
"learning_rate": 1.7037833743707892e-05, |
|
"loss": 0.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.446428571428573, |
|
"grad_norm": 0.14133678376674652, |
|
"learning_rate": 1.690367290511637e-05, |
|
"loss": 0.0008, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 30.535714285714285, |
|
"grad_norm": 0.0003841827274300158, |
|
"learning_rate": 1.676977204667221e-05, |
|
"loss": 0.0, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.625, |
|
"grad_norm": 0.0009803869761526585, |
|
"learning_rate": 1.6636135468049123e-05, |
|
"loss": 0.0, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 30.714285714285715, |
|
"grad_norm": 0.002163276541978121, |
|
"learning_rate": 1.6502767460434588e-05, |
|
"loss": 0.0, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 30.803571428571427, |
|
"grad_norm": 0.002792706247419119, |
|
"learning_rate": 1.6369672306392027e-05, |
|
"loss": 0.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 30.892857142857142, |
|
"grad_norm": 0.0011888825101777911, |
|
"learning_rate": 1.62368542797233e-05, |
|
"loss": 0.0, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 30.982142857142858, |
|
"grad_norm": 0.0003651406441349536, |
|
"learning_rate": 1.6104317645331456e-05, |
|
"loss": 0.0063, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.1838295459747314, |
|
"eval_macro_f1": 85.54969445546462, |
|
"eval_macro_precision": 87.56241738875019, |
|
"eval_macro_recall": 83.99677245831091, |
|
"eval_micro_f1": 89.49685534591195, |
|
"eval_micro_precision": 89.49685534591195, |
|
"eval_micro_recall": 89.49685534591195, |
|
"eval_runtime": 2.059, |
|
"eval_samples_per_second": 772.237, |
|
"eval_steps_per_second": 12.142, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 31.071428571428573, |
|
"grad_norm": 0.0007483928930014372, |
|
"learning_rate": 1.5972066659083796e-05, |
|
"loss": 0.0, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 31.160714285714285, |
|
"grad_norm": 0.004502744879573584, |
|
"learning_rate": 1.5840105567675218e-05, |
|
"loss": 0.0, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"grad_norm": 0.009936104528605938, |
|
"learning_rate": 1.5708438608491814e-05, |
|
"loss": 0.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 31.339285714285715, |
|
"grad_norm": 0.0025622285902500153, |
|
"learning_rate": 1.557707000947487e-05, |
|
"loss": 0.0, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 0.0010868199169635773, |
|
"learning_rate": 1.5446003988985043e-05, |
|
"loss": 0.0, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 31.517857142857142, |
|
"grad_norm": 0.0007128150318749249, |
|
"learning_rate": 1.531524475566693e-05, |
|
"loss": 0.0012, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 31.607142857142858, |
|
"grad_norm": 0.0021832745987921953, |
|
"learning_rate": 1.5184796508313934e-05, |
|
"loss": 0.0038, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 31.696428571428573, |
|
"grad_norm": 0.001526080071926117, |
|
"learning_rate": 1.5054663435733418e-05, |
|
"loss": 0.0014, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 31.785714285714285, |
|
"grad_norm": 0.00137015909422189, |
|
"learning_rate": 1.492484971661221e-05, |
|
"loss": 0.0, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 31.875, |
|
"grad_norm": 0.0007851460832171142, |
|
"learning_rate": 1.479535951938243e-05, |
|
"loss": 0.0, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 31.964285714285715, |
|
"grad_norm": 0.0010572908213362098, |
|
"learning_rate": 1.4666197002087594e-05, |
|
"loss": 0.0013, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.1904088258743286, |
|
"eval_macro_f1": 85.89264432682533, |
|
"eval_macro_precision": 88.25563122053431, |
|
"eval_macro_recall": 84.12751489674567, |
|
"eval_micro_f1": 89.81132075471699, |
|
"eval_micro_precision": 89.81132075471699, |
|
"eval_micro_recall": 89.81132075471699, |
|
"eval_runtime": 1.7315, |
|
"eval_samples_per_second": 918.253, |
|
"eval_steps_per_second": 14.438, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 32.05357142857143, |
|
"grad_norm": 0.0002830619050655514, |
|
"learning_rate": 1.4537366312249165e-05, |
|
"loss": 0.0, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 32.142857142857146, |
|
"grad_norm": 0.0003966302901972085, |
|
"learning_rate": 1.4408871586733318e-05, |
|
"loss": 0.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.232142857142854, |
|
"grad_norm": 0.0010989709990099072, |
|
"learning_rate": 1.428071695161812e-05, |
|
"loss": 0.0, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 32.32142857142857, |
|
"grad_norm": 0.0009420845308341086, |
|
"learning_rate": 1.4152906522061048e-05, |
|
"loss": 0.0042, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 32.410714285714285, |
|
"grad_norm": 0.0009583772043697536, |
|
"learning_rate": 1.402544440216682e-05, |
|
"loss": 0.0, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"grad_norm": 0.000329616479575634, |
|
"learning_rate": 1.3898334684855647e-05, |
|
"loss": 0.0, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 32.589285714285715, |
|
"grad_norm": 0.0010914219310507178, |
|
"learning_rate": 1.3771581451731768e-05, |
|
"loss": 0.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 32.67857142857143, |
|
"grad_norm": 0.001109420321881771, |
|
"learning_rate": 1.3645188772952411e-05, |
|
"loss": 0.0017, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 32.767857142857146, |
|
"grad_norm": 0.003983737900853157, |
|
"learning_rate": 1.3519160707097073e-05, |
|
"loss": 0.0016, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"grad_norm": 0.0013640534598380327, |
|
"learning_rate": 1.3393501301037245e-05, |
|
"loss": 0.0013, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 32.94642857142857, |
|
"grad_norm": 0.00043303275015205145, |
|
"learning_rate": 1.3268214589806388e-05, |
|
"loss": 0.0, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.1757960319519043, |
|
"eval_macro_f1": 85.82209656372336, |
|
"eval_macro_precision": 87.80381119449642, |
|
"eval_macro_recall": 84.28473813089197, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 2.0676, |
|
"eval_samples_per_second": 769.018, |
|
"eval_steps_per_second": 12.091, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 33.035714285714285, |
|
"grad_norm": 0.0008297289023175836, |
|
"learning_rate": 1.3143304596470443e-05, |
|
"loss": 0.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 33.125, |
|
"grad_norm": 0.0008214128902181983, |
|
"learning_rate": 1.301877533199859e-05, |
|
"loss": 0.0, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 33.214285714285715, |
|
"grad_norm": 0.0019036834128201008, |
|
"learning_rate": 1.2894630795134455e-05, |
|
"loss": 0.0, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 33.30357142857143, |
|
"grad_norm": 0.0015944598708301783, |
|
"learning_rate": 1.2770874972267777e-05, |
|
"loss": 0.0, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 33.392857142857146, |
|
"grad_norm": 0.0004286083276383579, |
|
"learning_rate": 1.2647511837306284e-05, |
|
"loss": 0.0, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 33.482142857142854, |
|
"grad_norm": 0.0017838689964264631, |
|
"learning_rate": 1.2524545351548206e-05, |
|
"loss": 0.0052, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 33.57142857142857, |
|
"grad_norm": 0.0007197365048341453, |
|
"learning_rate": 1.2401979463554982e-05, |
|
"loss": 0.0008, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 33.660714285714285, |
|
"grad_norm": 0.0011250259121879935, |
|
"learning_rate": 1.2279818109024538e-05, |
|
"loss": 0.0, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 33.75, |
|
"grad_norm": 0.0006792208878323436, |
|
"learning_rate": 1.2158065210664848e-05, |
|
"loss": 0.001, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 33.839285714285715, |
|
"grad_norm": 0.0010428227251395583, |
|
"learning_rate": 1.2036724678068006e-05, |
|
"loss": 0.0, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 33.92857142857143, |
|
"grad_norm": 0.0009357041562907398, |
|
"learning_rate": 1.1915800407584704e-05, |
|
"loss": 0.0009, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.1771963834762573, |
|
"eval_macro_f1": 85.57575566624061, |
|
"eval_macro_precision": 87.49931435467062, |
|
"eval_macro_recall": 84.07735715428024, |
|
"eval_micro_f1": 89.49685534591195, |
|
"eval_micro_precision": 89.49685534591195, |
|
"eval_micro_recall": 89.49685534591195, |
|
"eval_runtime": 2.4523, |
|
"eval_samples_per_second": 648.379, |
|
"eval_steps_per_second": 10.195, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 34.017857142857146, |
|
"grad_norm": 0.0015970384702086449, |
|
"learning_rate": 1.1795296282199061e-05, |
|
"loss": 0.0, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 34.107142857142854, |
|
"grad_norm": 0.0010594812920317054, |
|
"learning_rate": 1.1675216171404002e-05, |
|
"loss": 0.0048, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 34.19642857142857, |
|
"grad_norm": 0.0008670884999446571, |
|
"learning_rate": 1.1555563931076934e-05, |
|
"loss": 0.0, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 0.000477910740301013, |
|
"learning_rate": 1.1436343403356017e-05, |
|
"loss": 0.0, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 34.375, |
|
"grad_norm": 0.00853039976209402, |
|
"learning_rate": 1.1317558416516697e-05, |
|
"loss": 0.0012, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 34.464285714285715, |
|
"grad_norm": 0.001123997732065618, |
|
"learning_rate": 1.1199212784848834e-05, |
|
"loss": 0.0, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 34.55357142857143, |
|
"grad_norm": 0.0008179740980267525, |
|
"learning_rate": 1.1081310308534229e-05, |
|
"loss": 0.0011, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 34.642857142857146, |
|
"grad_norm": 0.0008750974084250629, |
|
"learning_rate": 1.096385477352455e-05, |
|
"loss": 0.0, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 34.732142857142854, |
|
"grad_norm": 0.0006880298024043441, |
|
"learning_rate": 1.0846849951419814e-05, |
|
"loss": 0.0009, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 34.82142857142857, |
|
"grad_norm": 0.0012920346343889832, |
|
"learning_rate": 1.0730299599347219e-05, |
|
"loss": 0.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 34.910714285714285, |
|
"grad_norm": 0.00165931461378932, |
|
"learning_rate": 1.0614207459840572e-05, |
|
"loss": 0.0, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 0.003719399683177471, |
|
"learning_rate": 1.049857726072005e-05, |
|
"loss": 0.0, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.1785622835159302, |
|
"eval_macro_f1": 86.0688671097593, |
|
"eval_macro_precision": 88.10971691878396, |
|
"eval_macro_recall": 84.49211910750371, |
|
"eval_micro_f1": 89.87421383647799, |
|
"eval_micro_precision": 89.87421383647799, |
|
"eval_micro_recall": 89.87421383647799, |
|
"eval_runtime": 1.6934, |
|
"eval_samples_per_second": 938.939, |
|
"eval_steps_per_second": 14.763, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 35.089285714285715, |
|
"grad_norm": 0.001190517912618816, |
|
"learning_rate": 1.0383412714972507e-05, |
|
"loss": 0.0007, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 35.17857142857143, |
|
"grad_norm": 0.0001941876980708912, |
|
"learning_rate": 1.0268717520632298e-05, |
|
"loss": 0.0, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 35.267857142857146, |
|
"grad_norm": 0.0013438657624647021, |
|
"learning_rate": 1.0154495360662464e-05, |
|
"loss": 0.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 35.357142857142854, |
|
"grad_norm": 0.0008899585227482021, |
|
"learning_rate": 1.0040749902836507e-05, |
|
"loss": 0.0, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 35.44642857142857, |
|
"grad_norm": 0.0008040536195039749, |
|
"learning_rate": 9.927484799620595e-06, |
|
"loss": 0.0, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 35.535714285714285, |
|
"grad_norm": 0.0008036054205149412, |
|
"learning_rate": 9.814703688056321e-06, |
|
"loss": 0.0, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 35.625, |
|
"grad_norm": 0.000511976657435298, |
|
"learning_rate": 9.702410189643837e-06, |
|
"loss": 0.0, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 0.000789080688264221, |
|
"learning_rate": 9.59060791022566e-06, |
|
"loss": 0.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.80357142857143, |
|
"grad_norm": 0.0002290449192514643, |
|
"learning_rate": 9.479300439870787e-06, |
|
"loss": 0.0, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 35.892857142857146, |
|
"grad_norm": 0.0005157162086106837, |
|
"learning_rate": 9.368491352759506e-06, |
|
"loss": 0.0, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 35.982142857142854, |
|
"grad_norm": 0.5052797794342041, |
|
"learning_rate": 9.258184207068551e-06, |
|
"loss": 0.0069, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.1818641424179077, |
|
"eval_macro_f1": 85.82209656372336, |
|
"eval_macro_precision": 87.80381119449642, |
|
"eval_macro_recall": 84.28473813089197, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 1.9269, |
|
"eval_samples_per_second": 825.175, |
|
"eval_steps_per_second": 12.974, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 36.07142857142857, |
|
"grad_norm": 0.001218083780258894, |
|
"learning_rate": 9.148382544856884e-06, |
|
"loss": 0.0, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 36.160714285714285, |
|
"grad_norm": 0.0006271243910305202, |
|
"learning_rate": 9.039089891951975e-06, |
|
"loss": 0.0051, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 36.25, |
|
"grad_norm": 0.001310994615778327, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.0, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 36.339285714285715, |
|
"grad_norm": 0.0016614202177152038, |
|
"learning_rate": 8.822045635535823e-06, |
|
"loss": 0.0, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 36.42857142857143, |
|
"grad_norm": 0.00039496883982792497, |
|
"learning_rate": 8.714301001505567e-06, |
|
"loss": 0.0012, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 36.517857142857146, |
|
"grad_norm": 0.0006432042573578656, |
|
"learning_rate": 8.607079315520252e-06, |
|
"loss": 0.0, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 36.607142857142854, |
|
"grad_norm": 0.00702462624758482, |
|
"learning_rate": 8.500384020562018e-06, |
|
"loss": 0.0, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 36.69642857142857, |
|
"grad_norm": 0.17590132355690002, |
|
"learning_rate": 8.394218542710141e-06, |
|
"loss": 0.0012, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 36.785714285714285, |
|
"grad_norm": 0.00369036803022027, |
|
"learning_rate": 8.288586291031026e-06, |
|
"loss": 0.0, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 36.875, |
|
"grad_norm": 0.0006468078936450183, |
|
"learning_rate": 8.183490657468688e-06, |
|
"loss": 0.0, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 36.964285714285715, |
|
"grad_norm": 0.15709273517131805, |
|
"learning_rate": 8.078935016735891e-06, |
|
"loss": 0.001, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.1875933408737183, |
|
"eval_macro_f1": 86.06680921167936, |
|
"eval_macro_precision": 88.44071939933647, |
|
"eval_macro_recall": 84.29263044647661, |
|
"eval_micro_f1": 89.937106918239, |
|
"eval_micro_precision": 89.937106918239, |
|
"eval_micro_recall": 89.937106918239, |
|
"eval_runtime": 1.7266, |
|
"eval_samples_per_second": 920.898, |
|
"eval_steps_per_second": 14.48, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 37.05357142857143, |
|
"grad_norm": 0.009659999050199986, |
|
"learning_rate": 7.974922726205736e-06, |
|
"loss": 0.0, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 0.0007702059228904545, |
|
"learning_rate": 7.871457125803896e-06, |
|
"loss": 0.0, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 37.232142857142854, |
|
"grad_norm": 0.0009207057883031666, |
|
"learning_rate": 7.768541537901325e-06, |
|
"loss": 0.0009, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 37.32142857142857, |
|
"grad_norm": 0.00031363347079604864, |
|
"learning_rate": 7.666179267207596e-06, |
|
"loss": 0.0, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 37.410714285714285, |
|
"grad_norm": 0.0014384811511263251, |
|
"learning_rate": 7.564373600664804e-06, |
|
"loss": 0.0056, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"grad_norm": 0.0012792075285688043, |
|
"learning_rate": 7.463127807341966e-06, |
|
"loss": 0.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 37.589285714285715, |
|
"grad_norm": 0.000563352950848639, |
|
"learning_rate": 7.3624451383301125e-06, |
|
"loss": 0.0, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 37.67857142857143, |
|
"grad_norm": 0.0017736536683514714, |
|
"learning_rate": 7.262328826637826e-06, |
|
"loss": 0.0009, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 37.767857142857146, |
|
"grad_norm": 0.000779169553425163, |
|
"learning_rate": 7.162782087087494e-06, |
|
"loss": 0.0, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 37.857142857142854, |
|
"grad_norm": 0.001163293025456369, |
|
"learning_rate": 7.06380811621202e-06, |
|
"loss": 0.0, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 37.94642857142857, |
|
"grad_norm": 0.00028616635245271027, |
|
"learning_rate": 6.965410092152211e-06, |
|
"loss": 0.0, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.1880896091461182, |
|
"eval_macro_f1": 85.5970716119231, |
|
"eval_macro_precision": 87.7511203877084, |
|
"eval_macro_recall": 83.95845318922241, |
|
"eval_micro_f1": 89.55974842767296, |
|
"eval_micro_precision": 89.55974842767296, |
|
"eval_micro_recall": 89.55974842767296, |
|
"eval_runtime": 2.01, |
|
"eval_samples_per_second": 791.054, |
|
"eval_steps_per_second": 12.438, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 38.035714285714285, |
|
"grad_norm": 0.0016029111575335264, |
|
"learning_rate": 6.867591174554713e-06, |
|
"loss": 0.0, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 38.125, |
|
"grad_norm": 0.0014079079264774919, |
|
"learning_rate": 6.770354504470575e-06, |
|
"loss": 0.0, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 38.214285714285715, |
|
"grad_norm": 0.0005063859280198812, |
|
"learning_rate": 6.673703204254347e-06, |
|
"loss": 0.0, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 38.30357142857143, |
|
"grad_norm": 0.0009960135212168097, |
|
"learning_rate": 6.577640377463848e-06, |
|
"loss": 0.0009, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 38.392857142857146, |
|
"grad_norm": 0.0003499105223454535, |
|
"learning_rate": 6.482169108760511e-06, |
|
"loss": 0.0, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 38.482142857142854, |
|
"grad_norm": 0.0017842509550973773, |
|
"learning_rate": 6.387292463810299e-06, |
|
"loss": 0.0008, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 38.57142857142857, |
|
"grad_norm": 0.0008073888020589948, |
|
"learning_rate": 6.2930134891853146e-06, |
|
"loss": 0.0, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 38.660714285714285, |
|
"grad_norm": 0.0010807816870510578, |
|
"learning_rate": 6.199335212265911e-06, |
|
"loss": 0.0, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"grad_norm": 0.0004026450333185494, |
|
"learning_rate": 6.106260641143546e-06, |
|
"loss": 0.0051, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 38.839285714285715, |
|
"grad_norm": 0.13531385362148285, |
|
"learning_rate": 6.013792764524129e-06, |
|
"loss": 0.0008, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 38.92857142857143, |
|
"grad_norm": 0.00043903145706281066, |
|
"learning_rate": 5.921934551632085e-06, |
|
"loss": 0.0, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.1880995035171509, |
|
"eval_macro_f1": 85.5970716119231, |
|
"eval_macro_precision": 87.7511203877084, |
|
"eval_macro_recall": 83.95845318922241, |
|
"eval_micro_f1": 89.55974842767296, |
|
"eval_micro_precision": 89.55974842767296, |
|
"eval_micro_recall": 89.55974842767296, |
|
"eval_runtime": 2.1641, |
|
"eval_samples_per_second": 734.708, |
|
"eval_steps_per_second": 11.552, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 39.017857142857146, |
|
"grad_norm": 0.0007070303545333445, |
|
"learning_rate": 5.830688952115018e-06, |
|
"loss": 0.0, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 39.107142857142854, |
|
"grad_norm": 0.0003032834501937032, |
|
"learning_rate": 5.740058895948955e-06, |
|
"loss": 0.0, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 39.19642857142857, |
|
"grad_norm": 0.1616564691066742, |
|
"learning_rate": 5.650047293344315e-06, |
|
"loss": 0.0071, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 39.285714285714285, |
|
"grad_norm": 0.00030247235554270446, |
|
"learning_rate": 5.560657034652406e-06, |
|
"loss": 0.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 39.375, |
|
"grad_norm": 0.0009000123827718198, |
|
"learning_rate": 5.471890990272666e-06, |
|
"loss": 0.0, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 39.464285714285715, |
|
"grad_norm": 0.0009570589754730463, |
|
"learning_rate": 5.383752010560441e-06, |
|
"loss": 0.0, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 39.55357142857143, |
|
"grad_norm": 0.0008730028057470918, |
|
"learning_rate": 5.296242925735487e-06, |
|
"loss": 0.0, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 39.642857142857146, |
|
"grad_norm": 0.0006854677340015769, |
|
"learning_rate": 5.2093665457911e-06, |
|
"loss": 0.0, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 39.732142857142854, |
|
"grad_norm": 0.0013594292104244232, |
|
"learning_rate": 5.123125660403849e-06, |
|
"loss": 0.0, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 39.82142857142857, |
|
"grad_norm": 0.0011872885515913367, |
|
"learning_rate": 5.037523038844033e-06, |
|
"loss": 0.0, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 39.910714285714285, |
|
"grad_norm": 0.0007189795724116266, |
|
"learning_rate": 4.952561429886721e-06, |
|
"loss": 0.0, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 8.409917063545436e-05, |
|
"learning_rate": 4.868243561723535e-06, |
|
"loss": 0.0, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.1868513822555542, |
|
"eval_macro_f1": 85.77060086961077, |
|
"eval_macro_precision": 87.93467695199129, |
|
"eval_macro_recall": 84.12356873895335, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 2.0444, |
|
"eval_samples_per_second": 777.736, |
|
"eval_steps_per_second": 12.229, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 40.089285714285715, |
|
"grad_norm": 0.0010488256812095642, |
|
"learning_rate": 4.7845721418749905e-06, |
|
"loss": 0.0, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 40.17857142857143, |
|
"grad_norm": 0.00023322908964473754, |
|
"learning_rate": 4.701549857103588e-06, |
|
"loss": 0.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 40.267857142857146, |
|
"grad_norm": 0.0007012597052380443, |
|
"learning_rate": 4.619179373327545e-06, |
|
"loss": 0.0, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 40.357142857142854, |
|
"grad_norm": 0.0006945223431102931, |
|
"learning_rate": 4.537463335535161e-06, |
|
"loss": 0.0, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 40.44642857142857, |
|
"grad_norm": 0.003948695491999388, |
|
"learning_rate": 4.456404367699923e-06, |
|
"loss": 0.0, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 40.535714285714285, |
|
"grad_norm": 0.0009591460693627596, |
|
"learning_rate": 4.376005072696204e-06, |
|
"loss": 0.0, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 40.625, |
|
"grad_norm": 0.0007004874059930444, |
|
"learning_rate": 4.296268032215733e-06, |
|
"loss": 0.0, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 40.714285714285715, |
|
"grad_norm": 0.00040511120459996164, |
|
"learning_rate": 4.217195806684629e-06, |
|
"loss": 0.0053, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 40.80357142857143, |
|
"grad_norm": 0.0005234309355728328, |
|
"learning_rate": 4.138790935181258e-06, |
|
"loss": 0.0, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 40.892857142857146, |
|
"grad_norm": 0.0015812547644600272, |
|
"learning_rate": 4.061055935354643e-06, |
|
"loss": 0.0, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 40.982142857142854, |
|
"grad_norm": 0.0006628704722970724, |
|
"learning_rate": 3.983993303343639e-06, |
|
"loss": 0.001, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.1929736137390137, |
|
"eval_macro_f1": 85.96683768424042, |
|
"eval_macro_precision": 88.38352495427227, |
|
"eval_macro_recall": 84.16978032362647, |
|
"eval_micro_f1": 89.87421383647799, |
|
"eval_micro_precision": 89.87421383647799, |
|
"eval_micro_recall": 89.87421383647799, |
|
"eval_runtime": 2.071, |
|
"eval_samples_per_second": 767.76, |
|
"eval_steps_per_second": 12.072, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 41.07142857142857, |
|
"grad_norm": 0.0007947610574774444, |
|
"learning_rate": 3.907605513696808e-06, |
|
"loss": 0.001, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 41.160714285714285, |
|
"grad_norm": 0.006750487256795168, |
|
"learning_rate": 3.831895019292897e-06, |
|
"loss": 0.0, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 41.25, |
|
"grad_norm": 0.0010204770369455218, |
|
"learning_rate": 3.756864251262143e-06, |
|
"loss": 0.0, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 41.339285714285715, |
|
"grad_norm": 0.1501074582338333, |
|
"learning_rate": 3.68251561890815e-06, |
|
"loss": 0.0062, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 41.42857142857143, |
|
"grad_norm": 0.0004478511691559106, |
|
"learning_rate": 3.6088515096305674e-06, |
|
"loss": 0.0, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 41.517857142857146, |
|
"grad_norm": 0.0007376694120466709, |
|
"learning_rate": 3.535874288848398e-06, |
|
"loss": 0.0, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 41.607142857142854, |
|
"grad_norm": 0.0005087918252684176, |
|
"learning_rate": 3.4635862999240457e-06, |
|
"loss": 0.0, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 41.69642857142857, |
|
"grad_norm": 0.0007002074271440506, |
|
"learning_rate": 3.391989864088102e-06, |
|
"loss": 0.0009, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 41.785714285714285, |
|
"grad_norm": 0.0004804203344974667, |
|
"learning_rate": 3.321087280364757e-06, |
|
"loss": 0.0, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 41.875, |
|
"grad_norm": 0.00030447664903476834, |
|
"learning_rate": 3.250880825498026e-06, |
|
"loss": 0.0, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 41.964285714285715, |
|
"grad_norm": 0.0008117399993352592, |
|
"learning_rate": 3.181372753878595e-06, |
|
"loss": 0.0, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.1892344951629639, |
|
"eval_macro_f1": 85.91846508098604, |
|
"eval_macro_precision": 88.18629280744503, |
|
"eval_macro_recall": 84.20809959271497, |
|
"eval_micro_f1": 89.81132075471699, |
|
"eval_micro_precision": 89.81132075471699, |
|
"eval_micro_recall": 89.81132075471699, |
|
"eval_runtime": 2.1894, |
|
"eval_samples_per_second": 726.234, |
|
"eval_steps_per_second": 11.419, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 42.05357142857143, |
|
"grad_norm": 0.0006969855749048293, |
|
"learning_rate": 3.1125652974714758e-06, |
|
"loss": 0.0, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 42.142857142857146, |
|
"grad_norm": 0.0007993881008587778, |
|
"learning_rate": 3.044460665744284e-06, |
|
"loss": 0.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 42.232142857142854, |
|
"grad_norm": 0.14198355376720428, |
|
"learning_rate": 2.9770610455963547e-06, |
|
"loss": 0.0009, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 42.32142857142857, |
|
"grad_norm": 0.001101199653930962, |
|
"learning_rate": 2.9103686012884546e-06, |
|
"loss": 0.0009, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 42.410714285714285, |
|
"grad_norm": 0.0004274248203728348, |
|
"learning_rate": 2.8443854743733233e-06, |
|
"loss": 0.0, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"grad_norm": 0.00032507788273505867, |
|
"learning_rate": 2.779113783626916e-06, |
|
"loss": 0.0, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 42.589285714285715, |
|
"grad_norm": 0.4506078064441681, |
|
"learning_rate": 2.7145556249803193e-06, |
|
"loss": 0.0055, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 42.67857142857143, |
|
"grad_norm": 0.00020234609837643802, |
|
"learning_rate": 2.6507130714525095e-06, |
|
"loss": 0.0, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 42.767857142857146, |
|
"grad_norm": 0.0005826003616675735, |
|
"learning_rate": 2.5875881730837324e-06, |
|
"loss": 0.0, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 0.000374118477338925, |
|
"learning_rate": 2.5251829568697207e-06, |
|
"loss": 0.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.94642857142857, |
|
"grad_norm": 0.0006962314946576953, |
|
"learning_rate": 2.463499426696564e-06, |
|
"loss": 0.0, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.1839672327041626, |
|
"eval_macro_f1": 85.74461897087475, |
|
"eval_macro_precision": 88.00197532696066, |
|
"eval_macro_recall": 84.04298404298405, |
|
"eval_micro_f1": 89.68553459119497, |
|
"eval_micro_precision": 89.68553459119497, |
|
"eval_micro_recall": 89.68553459119497, |
|
"eval_runtime": 2.6258, |
|
"eval_samples_per_second": 605.527, |
|
"eval_steps_per_second": 9.521, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 43.035714285714285, |
|
"grad_norm": 0.0006290263263508677, |
|
"learning_rate": 2.4025395632763846e-06, |
|
"loss": 0.0, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 43.125, |
|
"grad_norm": 0.00028139716596342623, |
|
"learning_rate": 2.3423053240837515e-06, |
|
"loss": 0.0, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 43.214285714285715, |
|
"grad_norm": 0.0004424660000950098, |
|
"learning_rate": 2.282798643292777e-06, |
|
"loss": 0.0009, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 43.30357142857143, |
|
"grad_norm": 0.0006072869873605669, |
|
"learning_rate": 2.224021431715065e-06, |
|
"loss": 0.0009, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 43.392857142857146, |
|
"grad_norm": 0.0006662249797955155, |
|
"learning_rate": 2.165975576738294e-06, |
|
"loss": 0.0, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 43.482142857142854, |
|
"grad_norm": 0.4406328797340393, |
|
"learning_rate": 2.108662942265666e-06, |
|
"loss": 0.0045, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 43.57142857142857, |
|
"grad_norm": 0.0005156341940164566, |
|
"learning_rate": 2.0520853686560178e-06, |
|
"loss": 0.0008, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 43.660714285714285, |
|
"grad_norm": 0.0010501693468540907, |
|
"learning_rate": 1.996244672664749e-06, |
|
"loss": 0.0, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"grad_norm": 0.0007498673512600362, |
|
"learning_rate": 1.9411426473854688e-06, |
|
"loss": 0.0, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 43.839285714285715, |
|
"grad_norm": 0.000809444987680763, |
|
"learning_rate": 1.8867810621924165e-06, |
|
"loss": 0.0, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 43.92857142857143, |
|
"grad_norm": 0.0011670913081616163, |
|
"learning_rate": 1.8331616626836718e-06, |
|
"loss": 0.0, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.1925363540649414, |
|
"eval_macro_f1": 85.62321202521304, |
|
"eval_macro_precision": 87.68582327904362, |
|
"eval_macro_recall": 84.03903788519173, |
|
"eval_micro_f1": 89.55974842767296, |
|
"eval_micro_precision": 89.55974842767296, |
|
"eval_micro_recall": 89.55974842767296, |
|
"eval_runtime": 2.1265, |
|
"eval_samples_per_second": 747.691, |
|
"eval_steps_per_second": 11.756, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 44.017857142857146, |
|
"grad_norm": 0.0004898426122963428, |
|
"learning_rate": 1.7802861706250563e-06, |
|
"loss": 0.0, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 44.107142857142854, |
|
"grad_norm": 0.0005355161265470088, |
|
"learning_rate": 1.7281562838948966e-06, |
|
"loss": 0.0044, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 44.19642857142857, |
|
"grad_norm": 0.00022611931490246207, |
|
"learning_rate": 1.6767736764294605e-06, |
|
"loss": 0.0, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 44.285714285714285, |
|
"grad_norm": 0.0004798888403456658, |
|
"learning_rate": 1.626139998169246e-06, |
|
"loss": 0.0, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 44.375, |
|
"grad_norm": 0.002094451105222106, |
|
"learning_rate": 1.5762568750059604e-06, |
|
"loss": 0.0, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 44.464285714285715, |
|
"grad_norm": 0.0004955387557856739, |
|
"learning_rate": 1.5271259087303314e-06, |
|
"loss": 0.0, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 44.55357142857143, |
|
"grad_norm": 0.0005791817093268037, |
|
"learning_rate": 1.4787486769806847e-06, |
|
"loss": 0.002, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 44.642857142857146, |
|
"grad_norm": 0.0011661151656880975, |
|
"learning_rate": 1.4311267331922534e-06, |
|
"loss": 0.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 44.732142857142854, |
|
"grad_norm": 0.14946410059928894, |
|
"learning_rate": 1.3842616065473297e-06, |
|
"loss": 0.0009, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 44.82142857142857, |
|
"grad_norm": 0.0009950968669727445, |
|
"learning_rate": 1.3381548019261335e-06, |
|
"loss": 0.0, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 44.910714285714285, |
|
"grad_norm": 0.0006654797471128404, |
|
"learning_rate": 1.2928077998585087e-06, |
|
"loss": 0.0, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.000741883646696806, |
|
"learning_rate": 1.248222056476367e-06, |
|
"loss": 0.0, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.1892344951629639, |
|
"eval_macro_f1": 85.69684730927904, |
|
"eval_macro_precision": 87.80990783410138, |
|
"eval_macro_recall": 84.08130331207254, |
|
"eval_micro_f1": 89.62264150943396, |
|
"eval_micro_precision": 89.62264150943396, |
|
"eval_micro_recall": 89.62264150943396, |
|
"eval_runtime": 2.0693, |
|
"eval_samples_per_second": 768.362, |
|
"eval_steps_per_second": 12.081, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 45.089285714285715, |
|
"grad_norm": 0.0008588531636632979, |
|
"learning_rate": 1.204399003466941e-06, |
|
"loss": 0.0043, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 45.17857142857143, |
|
"grad_norm": 0.0007599690579809248, |
|
"learning_rate": 1.1613400480268099e-06, |
|
"loss": 0.0, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 45.267857142857146, |
|
"grad_norm": 0.0005483416607603431, |
|
"learning_rate": 1.1190465728167066e-06, |
|
"loss": 0.0, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 45.357142857142854, |
|
"grad_norm": 0.0006434289389289916, |
|
"learning_rate": 1.0775199359171345e-06, |
|
"loss": 0.001, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 45.44642857142857, |
|
"grad_norm": 0.0026349611580371857, |
|
"learning_rate": 1.0367614707847334e-06, |
|
"loss": 0.0, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 45.535714285714285, |
|
"grad_norm": 0.00044675698154605925, |
|
"learning_rate": 9.96772486209485e-07, |
|
"loss": 0.0, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 45.625, |
|
"grad_norm": 0.0010068505071103573, |
|
"learning_rate": 9.575542662726754e-07, |
|
"loss": 0.001, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 0.00023187148326542228, |
|
"learning_rate": 9.191080703056604e-07, |
|
"loss": 0.0, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 45.80357142857143, |
|
"grad_norm": 0.006482269149273634, |
|
"learning_rate": 8.814351328494369e-07, |
|
"loss": 0.0, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 45.892857142857146, |
|
"grad_norm": 0.0037991167046129704, |
|
"learning_rate": 8.445366636149865e-07, |
|
"loss": 0.0, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 45.982142857142854, |
|
"grad_norm": 0.0010641113622114062, |
|
"learning_rate": 8.084138474444503e-07, |
|
"loss": 0.0009, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.1895390748977661, |
|
"eval_macro_f1": 85.81857161383309, |
|
"eval_macro_precision": 88.1284500864264, |
|
"eval_macro_recall": 84.08524946986486, |
|
"eval_micro_f1": 89.74842767295598, |
|
"eval_micro_precision": 89.74842767295598, |
|
"eval_micro_recall": 89.74842767295598, |
|
"eval_runtime": 2.0545, |
|
"eval_samples_per_second": 773.904, |
|
"eval_steps_per_second": 12.168, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 46.07142857142857, |
|
"grad_norm": 0.0006879018619656563, |
|
"learning_rate": 7.730678442730538e-07, |
|
"loss": 0.0, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 46.160714285714285, |
|
"grad_norm": 0.0003646935510914773, |
|
"learning_rate": 7.384997890918899e-07, |
|
"loss": 0.0, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 46.25, |
|
"grad_norm": 0.0005362197407521307, |
|
"learning_rate": 7.047107919114588e-07, |
|
"loss": 0.0009, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 46.339285714285715, |
|
"grad_norm": 0.0005088266334496439, |
|
"learning_rate": 6.71701937726027e-07, |
|
"loss": 0.0009, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 46.42857142857143, |
|
"grad_norm": 0.000821845605969429, |
|
"learning_rate": 6.394742864787806e-07, |
|
"loss": 0.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 46.517857142857146, |
|
"grad_norm": 0.0003609760315157473, |
|
"learning_rate": 6.080288730278077e-07, |
|
"loss": 0.0, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 46.607142857142854, |
|
"grad_norm": 0.0006496753776445985, |
|
"learning_rate": 5.773667071128447e-07, |
|
"loss": 0.0, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 46.69642857142857, |
|
"grad_norm": 0.14351055026054382, |
|
"learning_rate": 5.474887733228656e-07, |
|
"loss": 0.0009, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 46.785714285714285, |
|
"grad_norm": 0.0012996145524084568, |
|
"learning_rate": 5.183960310644748e-07, |
|
"loss": 0.0, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 46.875, |
|
"grad_norm": 0.43366459012031555, |
|
"learning_rate": 4.900894145310753e-07, |
|
"loss": 0.0044, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 46.964285714285715, |
|
"grad_norm": 0.0005488655297085643, |
|
"learning_rate": 4.6256983267289887e-07, |
|
"loss": 0.0, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.1887431144714355, |
|
"eval_macro_f1": 86.01816071550488, |
|
"eval_macro_precision": 88.24407240824033, |
|
"eval_macro_recall": 84.3309497155651, |
|
"eval_micro_f1": 89.87421383647799, |
|
"eval_micro_precision": 89.87421383647799, |
|
"eval_micro_recall": 89.87421383647799, |
|
"eval_runtime": 2.1006, |
|
"eval_samples_per_second": 756.93, |
|
"eval_steps_per_second": 11.901, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 47.05357142857143, |
|
"grad_norm": 0.4307861626148224, |
|
"learning_rate": 4.358381691677932e-07, |
|
"loss": 0.0044, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 47.142857142857146, |
|
"grad_norm": 0.0007851451518945396, |
|
"learning_rate": 4.098952823928692e-07, |
|
"loss": 0.0, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 47.232142857142854, |
|
"grad_norm": 0.0006281957612372935, |
|
"learning_rate": 3.8474200539692087e-07, |
|
"loss": 0.0, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 47.32142857142857, |
|
"grad_norm": 0.000335185817675665, |
|
"learning_rate": 3.603791458736766e-07, |
|
"loss": 0.0, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 47.410714285714285, |
|
"grad_norm": 0.0007661879062652588, |
|
"learning_rate": 3.3680748613587885e-07, |
|
"loss": 0.0, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"grad_norm": 0.00047480862122029066, |
|
"learning_rate": 3.140277830901428e-07, |
|
"loss": 0.0, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 47.589285714285715, |
|
"grad_norm": 0.0009058488649316132, |
|
"learning_rate": 2.9204076821266747e-07, |
|
"loss": 0.0, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 47.67857142857143, |
|
"grad_norm": 0.0015131317777559161, |
|
"learning_rate": 2.708471475257407e-07, |
|
"loss": 0.0, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 47.767857142857146, |
|
"grad_norm": 0.001217082142829895, |
|
"learning_rate": 2.5044760157506565e-07, |
|
"loss": 0.0018, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 47.857142857142854, |
|
"grad_norm": 0.0005450554890558124, |
|
"learning_rate": 2.3084278540791427e-07, |
|
"loss": 0.0, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 47.94642857142857, |
|
"grad_norm": 0.0010108886053785682, |
|
"learning_rate": 2.1203332855208313e-07, |
|
"loss": 0.0, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.1932783126831055, |
|
"eval_macro_f1": 85.84447327097699, |
|
"eval_macro_precision": 88.06013659836749, |
|
"eval_macro_recall": 84.16583416583417, |
|
"eval_micro_f1": 89.74842767295598, |
|
"eval_micro_precision": 89.74842767295598, |
|
"eval_micro_recall": 89.74842767295598, |
|
"eval_runtime": 2.0801, |
|
"eval_samples_per_second": 764.369, |
|
"eval_steps_per_second": 12.018, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 48.035714285714285, |
|
"grad_norm": 0.0012142349733039737, |
|
"learning_rate": 1.9401983499569842e-07, |
|
"loss": 0.0009, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 48.125, |
|
"grad_norm": 0.0010546569246798754, |
|
"learning_rate": 1.768028831677926e-07, |
|
"loss": 0.0, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 48.214285714285715, |
|
"grad_norm": 0.0007233622018247843, |
|
"learning_rate": 1.6038302591975806e-07, |
|
"loss": 0.0043, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 48.30357142857143, |
|
"grad_norm": 0.0009546867804601789, |
|
"learning_rate": 1.4476079050757818e-07, |
|
"loss": 0.0, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 48.392857142857146, |
|
"grad_norm": 0.0010814859997481108, |
|
"learning_rate": 1.29936678574899e-07, |
|
"loss": 0.0009, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 48.482142857142854, |
|
"grad_norm": 0.00036580185405910015, |
|
"learning_rate": 1.1591116613692832e-07, |
|
"loss": 0.0, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"grad_norm": 0.0009721943642944098, |
|
"learning_rate": 1.0268470356514237e-07, |
|
"loss": 0.0009, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 48.660714285714285, |
|
"grad_norm": 0.000539219006896019, |
|
"learning_rate": 9.025771557282792e-08, |
|
"loss": 0.0, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 48.75, |
|
"grad_norm": 0.0017280342290177941, |
|
"learning_rate": 7.863060120144317e-08, |
|
"loss": 0.0009, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 48.839285714285715, |
|
"grad_norm": 0.0009316341020166874, |
|
"learning_rate": 6.780373380780025e-08, |
|
"loss": 0.0, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 48.92857142857143, |
|
"grad_norm": 0.0016215546056628227, |
|
"learning_rate": 5.7777461052091474e-08, |
|
"loss": 0.0, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.190055012702942, |
|
"eval_macro_f1": 85.81857161383309, |
|
"eval_macro_precision": 88.1284500864264, |
|
"eval_macro_recall": 84.08524946986486, |
|
"eval_micro_f1": 89.74842767295598, |
|
"eval_micro_precision": 89.74842767295598, |
|
"eval_micro_recall": 89.74842767295598, |
|
"eval_runtime": 2.1039, |
|
"eval_samples_per_second": 755.753, |
|
"eval_steps_per_second": 11.883, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 49.017857142857146, |
|
"grad_norm": 0.0018325834535062313, |
|
"learning_rate": 4.855210488670381e-08, |
|
"loss": 0.0, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 49.107142857142854, |
|
"grad_norm": 0.00028368146740831435, |
|
"learning_rate": 4.01279615458966e-08, |
|
"loss": 0.0, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 49.19642857142857, |
|
"grad_norm": 0.0009773739147931337, |
|
"learning_rate": 3.250530153628417e-08, |
|
"loss": 0.0009, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 49.285714285714285, |
|
"grad_norm": 0.0006530345417559147, |
|
"learning_rate": 2.5684369628148353e-08, |
|
"loss": 0.0009, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 49.375, |
|
"grad_norm": 0.001267548301257193, |
|
"learning_rate": 1.9665384847583622e-08, |
|
"loss": 0.0, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 49.464285714285715, |
|
"grad_norm": 0.00035301086609251797, |
|
"learning_rate": 1.4448540469458316e-08, |
|
"loss": 0.0, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 49.55357142857143, |
|
"grad_norm": 0.0017323438078165054, |
|
"learning_rate": 1.0034004011202913e-08, |
|
"loss": 0.0, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 49.642857142857146, |
|
"grad_norm": 0.0008427600259892642, |
|
"learning_rate": 6.421917227455998e-09, |
|
"loss": 0.0, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 49.732142857142854, |
|
"grad_norm": 0.14544406533241272, |
|
"learning_rate": 3.6123961054762567e-09, |
|
"loss": 0.0009, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 49.82142857142857, |
|
"grad_norm": 0.0004677158431150019, |
|
"learning_rate": 1.605530861450988e-09, |
|
"loss": 0.0, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 49.910714285714285, |
|
"grad_norm": 0.0009074215777218342, |
|
"learning_rate": 4.0138593757621523e-10, |
|
"loss": 0.0, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0004363281768746674, |
|
"learning_rate": 0.0, |
|
"loss": 0.0043, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.1904431581497192, |
|
"eval_macro_f1": 85.6707858264491, |
|
"eval_macro_precision": 87.87620078849466, |
|
"eval_macro_recall": 84.00071861610323, |
|
"eval_micro_f1": 89.62264150943396, |
|
"eval_micro_precision": 89.62264150943396, |
|
"eval_micro_recall": 89.62264150943396, |
|
"eval_runtime": 2.6821, |
|
"eval_samples_per_second": 592.81, |
|
"eval_steps_per_second": 9.321, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 2800, |
|
"total_flos": 1.502828979688571e+17, |
|
"train_loss": 0.1562508400436075, |
|
"train_runtime": 2658.12, |
|
"train_samples_per_second": 269.1, |
|
"train_steps_per_second": 1.053 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.502828979688571e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|