|
{ |
|
"best_metric": 0.8691893001382458, |
|
"best_model_checkpoint": "CurbRamp/dinov2/checkpoint-2875", |
|
"epoch": 24.786177105831534, |
|
"eval_steps": 500, |
|
"global_step": 2875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08639308855291576, |
|
"grad_norm": 381.287109375, |
|
"learning_rate": 3.472222222222223e-07, |
|
"loss": 3.0906, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17278617710583152, |
|
"grad_norm": 161.04339599609375, |
|
"learning_rate": 6.944444444444446e-07, |
|
"loss": 2.6994, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2591792656587473, |
|
"grad_norm": 111.33332824707031, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"loss": 2.6304, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.34557235421166305, |
|
"grad_norm": 74.54171752929688, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 2.4279, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4319654427645788, |
|
"grad_norm": 62.23763656616211, |
|
"learning_rate": 1.7361111111111112e-06, |
|
"loss": 2.0601, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5183585313174947, |
|
"grad_norm": 109.49163055419922, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 2.2144, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6047516198704104, |
|
"grad_norm": 140.09844970703125, |
|
"learning_rate": 2.4305555555555557e-06, |
|
"loss": 1.9144, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6911447084233261, |
|
"grad_norm": 86.56855010986328, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 1.9794, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7775377969762419, |
|
"grad_norm": 87.58937072753906, |
|
"learning_rate": 3.125e-06, |
|
"loss": 1.99, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.8639308855291576, |
|
"grad_norm": 95.44474029541016, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 1.8081, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9503239740820735, |
|
"grad_norm": 135.51611328125, |
|
"learning_rate": 3.819444444444444e-06, |
|
"loss": 1.723, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5266662836074829, |
|
"eval_macro_f1": 0.7558472709624431, |
|
"eval_runtime": 52.7672, |
|
"eval_samples_per_second": 17.549, |
|
"eval_steps_per_second": 2.198, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0345572354211663, |
|
"grad_norm": 67.76123809814453, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.7629, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1209503239740821, |
|
"grad_norm": 164.42552185058594, |
|
"learning_rate": 4.5138888888888895e-06, |
|
"loss": 1.7207, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.2073434125269977, |
|
"grad_norm": 63.25754165649414, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 1.8562, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.2937365010799136, |
|
"grad_norm": 101.76183319091797, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 1.5125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3801295896328294, |
|
"grad_norm": 104.06229400634766, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.7283, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4665226781857452, |
|
"grad_norm": 100.93425750732422, |
|
"learning_rate": 5.9027777777777785e-06, |
|
"loss": 1.8195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.552915766738661, |
|
"grad_norm": 73.55767822265625, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.6554, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6393088552915767, |
|
"grad_norm": 80.74930572509766, |
|
"learning_rate": 6.597222222222223e-06, |
|
"loss": 1.4719, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.7257019438444925, |
|
"grad_norm": 70.96980285644531, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.8123, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.812095032397408, |
|
"grad_norm": 59.61140823364258, |
|
"learning_rate": 7.291666666666667e-06, |
|
"loss": 1.4448, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.898488120950324, |
|
"grad_norm": 278.9685974121094, |
|
"learning_rate": 7.638888888888888e-06, |
|
"loss": 1.7311, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.9848812095032398, |
|
"grad_norm": 149.49319458007812, |
|
"learning_rate": 7.986111111111112e-06, |
|
"loss": 1.79, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5045331716537476, |
|
"eval_macro_f1": 0.7580948382431834, |
|
"eval_runtime": 16.9579, |
|
"eval_samples_per_second": 54.606, |
|
"eval_steps_per_second": 6.84, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.0691144708423326, |
|
"grad_norm": 39.244441986083984, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.4816, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1555075593952484, |
|
"grad_norm": 62.760921478271484, |
|
"learning_rate": 8.680555555555557e-06, |
|
"loss": 1.5412, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.2419006479481642, |
|
"grad_norm": 58.07555389404297, |
|
"learning_rate": 9.027777777777779e-06, |
|
"loss": 1.5415, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.32829373650108, |
|
"grad_norm": 83.29940032958984, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 1.3842, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.4146868250539955, |
|
"grad_norm": 91.45130157470703, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.3325, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.5010799136069113, |
|
"grad_norm": 63.806114196777344, |
|
"learning_rate": 9.99226903749517e-06, |
|
"loss": 1.4369, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.587473002159827, |
|
"grad_norm": 70.26383972167969, |
|
"learning_rate": 9.95361422497101e-06, |
|
"loss": 1.8433, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.673866090712743, |
|
"grad_norm": 136.43692016601562, |
|
"learning_rate": 9.914959412446851e-06, |
|
"loss": 1.9714, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.760259179265659, |
|
"grad_norm": 66.07856750488281, |
|
"learning_rate": 9.87630459992269e-06, |
|
"loss": 1.5597, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.8466522678185746, |
|
"grad_norm": 43.0218620300293, |
|
"learning_rate": 9.837649787398532e-06, |
|
"loss": 1.7732, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.9330453563714904, |
|
"grad_norm": 85.20895385742188, |
|
"learning_rate": 9.798994974874372e-06, |
|
"loss": 1.8472, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.5161793231964111, |
|
"eval_macro_f1": 0.7796335673223247, |
|
"eval_runtime": 17.216, |
|
"eval_samples_per_second": 53.787, |
|
"eval_steps_per_second": 6.738, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 3.0172786177105833, |
|
"grad_norm": 42.26615905761719, |
|
"learning_rate": 9.760340162350214e-06, |
|
"loss": 1.261, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.103671706263499, |
|
"grad_norm": 152.00830078125, |
|
"learning_rate": 9.721685349826055e-06, |
|
"loss": 1.1856, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.190064794816415, |
|
"grad_norm": 70.4239273071289, |
|
"learning_rate": 9.683030537301895e-06, |
|
"loss": 1.2873, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.2764578833693303, |
|
"grad_norm": 48.222984313964844, |
|
"learning_rate": 9.644375724777736e-06, |
|
"loss": 1.0901, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.362850971922246, |
|
"grad_norm": 56.9564323425293, |
|
"learning_rate": 9.605720912253576e-06, |
|
"loss": 1.1631, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.449244060475162, |
|
"grad_norm": 48.229427337646484, |
|
"learning_rate": 9.567066099729418e-06, |
|
"loss": 1.3436, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.535637149028078, |
|
"grad_norm": 69.0005111694336, |
|
"learning_rate": 9.528411287205257e-06, |
|
"loss": 1.0393, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.6220302375809936, |
|
"grad_norm": 69.22911071777344, |
|
"learning_rate": 9.489756474681099e-06, |
|
"loss": 1.3696, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.708423326133909, |
|
"grad_norm": 89.3969497680664, |
|
"learning_rate": 9.45110166215694e-06, |
|
"loss": 1.2418, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.7948164146868253, |
|
"grad_norm": 76.02294921875, |
|
"learning_rate": 9.41244684963278e-06, |
|
"loss": 1.3444, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.8812095032397407, |
|
"grad_norm": 79.65322875976562, |
|
"learning_rate": 9.373792037108622e-06, |
|
"loss": 1.5226, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.9676025917926565, |
|
"grad_norm": 118.36911010742188, |
|
"learning_rate": 9.335137224584461e-06, |
|
"loss": 1.4474, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.39091792702674866, |
|
"eval_macro_f1": 0.8356080726022923, |
|
"eval_runtime": 17.0101, |
|
"eval_samples_per_second": 54.438, |
|
"eval_steps_per_second": 6.819, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 4.05183585313175, |
|
"grad_norm": 34.40155029296875, |
|
"learning_rate": 9.296482412060303e-06, |
|
"loss": 0.9722, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.138228941684665, |
|
"grad_norm": 52.04656219482422, |
|
"learning_rate": 9.257827599536143e-06, |
|
"loss": 0.9749, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.224622030237581, |
|
"grad_norm": 65.06253814697266, |
|
"learning_rate": 9.219172787011984e-06, |
|
"loss": 1.1242, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.311015118790497, |
|
"grad_norm": 66.4505844116211, |
|
"learning_rate": 9.180517974487826e-06, |
|
"loss": 1.1458, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.397408207343412, |
|
"grad_norm": 158.91885375976562, |
|
"learning_rate": 9.141863161963665e-06, |
|
"loss": 1.3353, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.4838012958963285, |
|
"grad_norm": 60.72650909423828, |
|
"learning_rate": 9.103208349439507e-06, |
|
"loss": 1.1444, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.570194384449244, |
|
"grad_norm": 46.68295669555664, |
|
"learning_rate": 9.064553536915347e-06, |
|
"loss": 1.1094, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.65658747300216, |
|
"grad_norm": 121.51728820800781, |
|
"learning_rate": 9.025898724391188e-06, |
|
"loss": 1.398, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.7429805615550755, |
|
"grad_norm": 46.293556213378906, |
|
"learning_rate": 8.987243911867028e-06, |
|
"loss": 1.1777, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.829373650107991, |
|
"grad_norm": 67.82626342773438, |
|
"learning_rate": 8.94858909934287e-06, |
|
"loss": 1.0463, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.915766738660907, |
|
"grad_norm": 54.6425895690918, |
|
"learning_rate": 8.90993428681871e-06, |
|
"loss": 1.0591, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 46.120140075683594, |
|
"learning_rate": 8.87127947429455e-06, |
|
"loss": 1.0474, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.5121903419494629, |
|
"eval_macro_f1": 0.7977332272718247, |
|
"eval_runtime": 17.2455, |
|
"eval_samples_per_second": 53.695, |
|
"eval_steps_per_second": 6.726, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.086393088552915, |
|
"grad_norm": 59.340538024902344, |
|
"learning_rate": 8.832624661770392e-06, |
|
"loss": 0.9155, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.172786177105832, |
|
"grad_norm": 47.96815872192383, |
|
"learning_rate": 8.793969849246232e-06, |
|
"loss": 0.8381, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.259179265658747, |
|
"grad_norm": 50.32111358642578, |
|
"learning_rate": 8.755315036722073e-06, |
|
"loss": 0.9608, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.345572354211663, |
|
"grad_norm": 76.34898376464844, |
|
"learning_rate": 8.716660224197913e-06, |
|
"loss": 0.9766, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.431965442764579, |
|
"grad_norm": 40.53537368774414, |
|
"learning_rate": 8.678005411673755e-06, |
|
"loss": 1.1101, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.518358531317495, |
|
"grad_norm": 62.64771270751953, |
|
"learning_rate": 8.639350599149594e-06, |
|
"loss": 1.0492, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.60475161987041, |
|
"grad_norm": 34.409095764160156, |
|
"learning_rate": 8.600695786625436e-06, |
|
"loss": 1.0763, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.691144708423326, |
|
"grad_norm": 77.56722259521484, |
|
"learning_rate": 8.562040974101277e-06, |
|
"loss": 0.9917, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.777537796976242, |
|
"grad_norm": 37.74449920654297, |
|
"learning_rate": 8.523386161577117e-06, |
|
"loss": 1.1072, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.863930885529157, |
|
"grad_norm": 55.60712814331055, |
|
"learning_rate": 8.484731349052959e-06, |
|
"loss": 0.9095, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.950323974082074, |
|
"grad_norm": 45.91484451293945, |
|
"learning_rate": 8.446076536528798e-06, |
|
"loss": 0.931, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.41715672612190247, |
|
"eval_macro_f1": 0.8296228150873965, |
|
"eval_runtime": 16.9093, |
|
"eval_samples_per_second": 54.763, |
|
"eval_steps_per_second": 6.86, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 6.0345572354211665, |
|
"grad_norm": 38.476654052734375, |
|
"learning_rate": 8.40742172400464e-06, |
|
"loss": 0.7875, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.120950323974082, |
|
"grad_norm": 23.64281463623047, |
|
"learning_rate": 8.36876691148048e-06, |
|
"loss": 0.7492, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.207343412526998, |
|
"grad_norm": 102.30036926269531, |
|
"learning_rate": 8.330112098956321e-06, |
|
"loss": 0.6453, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.293736501079914, |
|
"grad_norm": 27.228309631347656, |
|
"learning_rate": 8.291457286432163e-06, |
|
"loss": 0.9595, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.38012958963283, |
|
"grad_norm": 54.58305358886719, |
|
"learning_rate": 8.252802473908002e-06, |
|
"loss": 0.5922, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.466522678185745, |
|
"grad_norm": 99.91376495361328, |
|
"learning_rate": 8.214147661383844e-06, |
|
"loss": 0.7878, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.552915766738661, |
|
"grad_norm": 91.14471435546875, |
|
"learning_rate": 8.175492848859684e-06, |
|
"loss": 0.7622, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.639308855291577, |
|
"grad_norm": 79.27790069580078, |
|
"learning_rate": 8.136838036335525e-06, |
|
"loss": 0.9503, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.725701943844492, |
|
"grad_norm": 70.4741439819336, |
|
"learning_rate": 8.098183223811365e-06, |
|
"loss": 0.8432, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.8120950323974085, |
|
"grad_norm": 62.995697021484375, |
|
"learning_rate": 8.059528411287206e-06, |
|
"loss": 0.8593, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.898488120950324, |
|
"grad_norm": 34.43967056274414, |
|
"learning_rate": 8.020873598763048e-06, |
|
"loss": 0.577, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.984881209503239, |
|
"grad_norm": 101.01258087158203, |
|
"learning_rate": 7.982218786238888e-06, |
|
"loss": 0.9042, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.4327438175678253, |
|
"eval_macro_f1": 0.8414019539605782, |
|
"eval_runtime": 16.9563, |
|
"eval_samples_per_second": 54.611, |
|
"eval_steps_per_second": 6.841, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 7.069114470842333, |
|
"grad_norm": 60.159706115722656, |
|
"learning_rate": 7.943563973714729e-06, |
|
"loss": 0.7565, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.155507559395248, |
|
"grad_norm": 94.81135559082031, |
|
"learning_rate": 7.904909161190569e-06, |
|
"loss": 0.5015, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.241900647948164, |
|
"grad_norm": 148.54061889648438, |
|
"learning_rate": 7.86625434866641e-06, |
|
"loss": 0.9681, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.32829373650108, |
|
"grad_norm": 89.39724731445312, |
|
"learning_rate": 7.82759953614225e-06, |
|
"loss": 1.3573, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.4146868250539955, |
|
"grad_norm": 32.64114761352539, |
|
"learning_rate": 7.788944723618092e-06, |
|
"loss": 0.5587, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.501079913606912, |
|
"grad_norm": 49.56209182739258, |
|
"learning_rate": 7.750289911093933e-06, |
|
"loss": 0.7781, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.587473002159827, |
|
"grad_norm": 65.53961181640625, |
|
"learning_rate": 7.711635098569773e-06, |
|
"loss": 0.6911, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.6738660907127425, |
|
"grad_norm": 52.27827835083008, |
|
"learning_rate": 7.672980286045614e-06, |
|
"loss": 0.8156, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.760259179265659, |
|
"grad_norm": 58.8625602722168, |
|
"learning_rate": 7.634325473521454e-06, |
|
"loss": 0.6504, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.846652267818574, |
|
"grad_norm": 58.183353424072266, |
|
"learning_rate": 7.595670660997296e-06, |
|
"loss": 0.8262, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.93304535637149, |
|
"grad_norm": 66.90734100341797, |
|
"learning_rate": 7.557015848473136e-06, |
|
"loss": 0.7893, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.4484424591064453, |
|
"eval_macro_f1": 0.832676482797353, |
|
"eval_runtime": 17.3598, |
|
"eval_samples_per_second": 53.342, |
|
"eval_steps_per_second": 6.682, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 8.017278617710582, |
|
"grad_norm": 56.62957000732422, |
|
"learning_rate": 7.518361035948977e-06, |
|
"loss": 0.6286, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.1036717062635, |
|
"grad_norm": 83.84600067138672, |
|
"learning_rate": 7.4797062234248175e-06, |
|
"loss": 0.809, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.190064794816415, |
|
"grad_norm": 63.44249725341797, |
|
"learning_rate": 7.441051410900658e-06, |
|
"loss": 0.7384, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.27645788336933, |
|
"grad_norm": 30.475244522094727, |
|
"learning_rate": 7.402396598376499e-06, |
|
"loss": 0.561, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.362850971922246, |
|
"grad_norm": 67.37047576904297, |
|
"learning_rate": 7.363741785852339e-06, |
|
"loss": 0.7086, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.449244060475163, |
|
"grad_norm": 61.570945739746094, |
|
"learning_rate": 7.325086973328181e-06, |
|
"loss": 0.5365, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 8.535637149028078, |
|
"grad_norm": 52.1601676940918, |
|
"learning_rate": 7.2864321608040215e-06, |
|
"loss": 0.4852, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 8.622030237580994, |
|
"grad_norm": 34.1832275390625, |
|
"learning_rate": 7.247777348279862e-06, |
|
"loss": 0.6972, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.708423326133909, |
|
"grad_norm": 63.52924346923828, |
|
"learning_rate": 7.209122535755703e-06, |
|
"loss": 0.7469, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.794816414686824, |
|
"grad_norm": 62.66010284423828, |
|
"learning_rate": 7.170467723231543e-06, |
|
"loss": 0.9771, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 8.881209503239742, |
|
"grad_norm": 76.71471405029297, |
|
"learning_rate": 7.131812910707384e-06, |
|
"loss": 0.7411, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 8.967602591792657, |
|
"grad_norm": 27.406539916992188, |
|
"learning_rate": 7.093158098183225e-06, |
|
"loss": 0.7943, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.46906793117523193, |
|
"eval_macro_f1": 0.8359894804603093, |
|
"eval_runtime": 17.671, |
|
"eval_samples_per_second": 52.402, |
|
"eval_steps_per_second": 6.564, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 9.051835853131749, |
|
"grad_norm": 16.15360450744629, |
|
"learning_rate": 7.054503285659065e-06, |
|
"loss": 0.4819, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.138228941684666, |
|
"grad_norm": 86.55136108398438, |
|
"learning_rate": 7.015848473134907e-06, |
|
"loss": 0.8165, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.224622030237581, |
|
"grad_norm": 58.48967361450195, |
|
"learning_rate": 6.977193660610747e-06, |
|
"loss": 0.6899, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.311015118790497, |
|
"grad_norm": 74.65888214111328, |
|
"learning_rate": 6.938538848086588e-06, |
|
"loss": 0.4518, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.397408207343412, |
|
"grad_norm": 32.85594940185547, |
|
"learning_rate": 6.899884035562429e-06, |
|
"loss": 0.5768, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.483801295896328, |
|
"grad_norm": 97.70096588134766, |
|
"learning_rate": 6.861229223038269e-06, |
|
"loss": 0.5102, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.570194384449245, |
|
"grad_norm": 46.2236213684082, |
|
"learning_rate": 6.82257441051411e-06, |
|
"loss": 0.5204, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 9.65658747300216, |
|
"grad_norm": 67.02069091796875, |
|
"learning_rate": 6.7839195979899505e-06, |
|
"loss": 0.7371, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 9.742980561555076, |
|
"grad_norm": 47.46559143066406, |
|
"learning_rate": 6.745264785465792e-06, |
|
"loss": 0.5447, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 9.829373650107991, |
|
"grad_norm": 31.422773361206055, |
|
"learning_rate": 6.706609972941633e-06, |
|
"loss": 0.6734, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 9.915766738660906, |
|
"grad_norm": 35.7156867980957, |
|
"learning_rate": 6.667955160417473e-06, |
|
"loss": 0.6255, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 71.64591217041016, |
|
"learning_rate": 6.629300347893314e-06, |
|
"loss": 0.482, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.44722601771354675, |
|
"eval_macro_f1": 0.846851581651592, |
|
"eval_runtime": 16.8938, |
|
"eval_samples_per_second": 54.813, |
|
"eval_steps_per_second": 6.866, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 10.086393088552915, |
|
"grad_norm": 53.67696762084961, |
|
"learning_rate": 6.5906455353691545e-06, |
|
"loss": 0.5785, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 10.17278617710583, |
|
"grad_norm": 26.457019805908203, |
|
"learning_rate": 6.551990722844995e-06, |
|
"loss": 0.528, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.259179265658748, |
|
"grad_norm": 87.2643051147461, |
|
"learning_rate": 6.513335910320836e-06, |
|
"loss": 0.5065, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 10.345572354211663, |
|
"grad_norm": 63.67802810668945, |
|
"learning_rate": 6.474681097796676e-06, |
|
"loss": 0.4843, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.431965442764579, |
|
"grad_norm": 92.45316314697266, |
|
"learning_rate": 6.436026285272518e-06, |
|
"loss": 0.3929, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 10.518358531317494, |
|
"grad_norm": 110.85811614990234, |
|
"learning_rate": 6.3973714727483585e-06, |
|
"loss": 0.5893, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.60475161987041, |
|
"grad_norm": 84.82708740234375, |
|
"learning_rate": 6.358716660224199e-06, |
|
"loss": 0.5031, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 10.691144708423327, |
|
"grad_norm": 75.6590576171875, |
|
"learning_rate": 6.32006184770004e-06, |
|
"loss": 0.5282, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 10.777537796976242, |
|
"grad_norm": 55.62372589111328, |
|
"learning_rate": 6.28140703517588e-06, |
|
"loss": 0.4551, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 10.863930885529157, |
|
"grad_norm": 61.89540100097656, |
|
"learning_rate": 6.24275222265172e-06, |
|
"loss": 0.5981, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 10.950323974082073, |
|
"grad_norm": 51.5389404296875, |
|
"learning_rate": 6.204097410127561e-06, |
|
"loss": 0.5145, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.45396292209625244, |
|
"eval_macro_f1": 0.8514981354090372, |
|
"eval_runtime": 16.9232, |
|
"eval_samples_per_second": 54.718, |
|
"eval_steps_per_second": 6.855, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 11.034557235421167, |
|
"grad_norm": 73.23987579345703, |
|
"learning_rate": 6.165442597603401e-06, |
|
"loss": 0.5773, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 11.120950323974082, |
|
"grad_norm": 26.071277618408203, |
|
"learning_rate": 6.126787785079242e-06, |
|
"loss": 0.5628, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 11.207343412526997, |
|
"grad_norm": 49.137691497802734, |
|
"learning_rate": 6.088132972555083e-06, |
|
"loss": 0.4894, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.293736501079914, |
|
"grad_norm": 63.3178825378418, |
|
"learning_rate": 6.049478160030924e-06, |
|
"loss": 0.6504, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 11.38012958963283, |
|
"grad_norm": 20.981409072875977, |
|
"learning_rate": 6.010823347506765e-06, |
|
"loss": 0.4835, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 11.466522678185745, |
|
"grad_norm": 35.6384162902832, |
|
"learning_rate": 5.972168534982605e-06, |
|
"loss": 0.568, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 11.55291576673866, |
|
"grad_norm": 20.20071029663086, |
|
"learning_rate": 5.933513722458446e-06, |
|
"loss": 0.2126, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 11.639308855291576, |
|
"grad_norm": 37.92521667480469, |
|
"learning_rate": 5.894858909934287e-06, |
|
"loss": 0.4932, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 11.725701943844493, |
|
"grad_norm": 20.3985538482666, |
|
"learning_rate": 5.856204097410127e-06, |
|
"loss": 0.5133, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 11.812095032397409, |
|
"grad_norm": 70.6824951171875, |
|
"learning_rate": 5.817549284885968e-06, |
|
"loss": 0.7987, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 11.898488120950324, |
|
"grad_norm": 133.84133911132812, |
|
"learning_rate": 5.778894472361809e-06, |
|
"loss": 0.5215, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 11.98488120950324, |
|
"grad_norm": 30.232711791992188, |
|
"learning_rate": 5.74023965983765e-06, |
|
"loss": 0.4581, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.4816704988479614, |
|
"eval_macro_f1": 0.8516025641025642, |
|
"eval_runtime": 17.3032, |
|
"eval_samples_per_second": 53.516, |
|
"eval_steps_per_second": 6.704, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 12.069114470842333, |
|
"grad_norm": 65.2093276977539, |
|
"learning_rate": 5.701584847313491e-06, |
|
"loss": 0.285, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.155507559395248, |
|
"grad_norm": 73.72334289550781, |
|
"learning_rate": 5.662930034789331e-06, |
|
"loss": 0.5098, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 12.241900647948164, |
|
"grad_norm": 49.37625503540039, |
|
"learning_rate": 5.624275222265172e-06, |
|
"loss": 0.623, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 12.32829373650108, |
|
"grad_norm": 49.2904052734375, |
|
"learning_rate": 5.5856204097410125e-06, |
|
"loss": 0.4641, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 12.414686825053996, |
|
"grad_norm": 29.282928466796875, |
|
"learning_rate": 5.546965597216853e-06, |
|
"loss": 0.4904, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 12.501079913606912, |
|
"grad_norm": 51.97225570678711, |
|
"learning_rate": 5.508310784692694e-06, |
|
"loss": 0.4044, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 12.587473002159827, |
|
"grad_norm": 44.74934768676758, |
|
"learning_rate": 5.469655972168535e-06, |
|
"loss": 0.6768, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 12.673866090712743, |
|
"grad_norm": 40.65571975708008, |
|
"learning_rate": 5.431001159644376e-06, |
|
"loss": 0.3807, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 12.76025917926566, |
|
"grad_norm": 204.83670043945312, |
|
"learning_rate": 5.3923463471202165e-06, |
|
"loss": 0.3421, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 12.846652267818575, |
|
"grad_norm": 45.30833053588867, |
|
"learning_rate": 5.353691534596057e-06, |
|
"loss": 0.4806, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 12.93304535637149, |
|
"grad_norm": 30.75054168701172, |
|
"learning_rate": 5.315036722071898e-06, |
|
"loss": 0.3743, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.4918636083602905, |
|
"eval_macro_f1": 0.8659523584493705, |
|
"eval_runtime": 17.5008, |
|
"eval_samples_per_second": 52.912, |
|
"eval_steps_per_second": 6.628, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 13.017278617710582, |
|
"grad_norm": 76.16972351074219, |
|
"learning_rate": 5.2763819095477384e-06, |
|
"loss": 0.2202, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 13.1036717062635, |
|
"grad_norm": 2.941861152648926, |
|
"learning_rate": 5.237727097023579e-06, |
|
"loss": 0.5201, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 13.190064794816415, |
|
"grad_norm": 102.31110382080078, |
|
"learning_rate": 5.1990722844994205e-06, |
|
"loss": 0.3936, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 13.27645788336933, |
|
"grad_norm": 118.30928802490234, |
|
"learning_rate": 5.160417471975261e-06, |
|
"loss": 0.4541, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 13.362850971922246, |
|
"grad_norm": 27.03119659423828, |
|
"learning_rate": 5.121762659451102e-06, |
|
"loss": 0.5705, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 13.449244060475163, |
|
"grad_norm": 21.778711318969727, |
|
"learning_rate": 5.083107846926942e-06, |
|
"loss": 0.3093, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 13.535637149028078, |
|
"grad_norm": 7.353912830352783, |
|
"learning_rate": 5.044453034402783e-06, |
|
"loss": 0.2553, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 13.622030237580994, |
|
"grad_norm": 41.609153747558594, |
|
"learning_rate": 5.005798221878624e-06, |
|
"loss": 0.4612, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 13.708423326133909, |
|
"grad_norm": 167.5371856689453, |
|
"learning_rate": 4.967143409354465e-06, |
|
"loss": 0.8447, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 13.794816414686824, |
|
"grad_norm": 63.91857147216797, |
|
"learning_rate": 4.928488596830306e-06, |
|
"loss": 0.2376, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.881209503239742, |
|
"grad_norm": 5.819667816162109, |
|
"learning_rate": 4.889833784306146e-06, |
|
"loss": 0.2624, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 13.967602591792657, |
|
"grad_norm": 61.421180725097656, |
|
"learning_rate": 4.851178971781987e-06, |
|
"loss": 0.5579, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.5265308022499084, |
|
"eval_macro_f1": 0.8497394016895295, |
|
"eval_runtime": 16.9332, |
|
"eval_samples_per_second": 54.685, |
|
"eval_steps_per_second": 6.85, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 14.051835853131749, |
|
"grad_norm": 55.67881774902344, |
|
"learning_rate": 4.8125241592578285e-06, |
|
"loss": 0.3435, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 14.138228941684666, |
|
"grad_norm": 68.73692321777344, |
|
"learning_rate": 4.773869346733669e-06, |
|
"loss": 0.4087, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 14.224622030237581, |
|
"grad_norm": 17.55417823791504, |
|
"learning_rate": 4.73521453420951e-06, |
|
"loss": 0.4633, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 14.311015118790497, |
|
"grad_norm": 71.73014831542969, |
|
"learning_rate": 4.69655972168535e-06, |
|
"loss": 0.3692, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 14.397408207343412, |
|
"grad_norm": 56.00436782836914, |
|
"learning_rate": 4.657904909161191e-06, |
|
"loss": 0.3288, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 14.483801295896328, |
|
"grad_norm": 25.29050636291504, |
|
"learning_rate": 4.619250096637032e-06, |
|
"loss": 0.3702, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 14.570194384449245, |
|
"grad_norm": 56.083961486816406, |
|
"learning_rate": 4.580595284112872e-06, |
|
"loss": 0.3286, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 14.65658747300216, |
|
"grad_norm": 75.87052154541016, |
|
"learning_rate": 4.541940471588713e-06, |
|
"loss": 0.4873, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.742980561555076, |
|
"grad_norm": 44.087547302246094, |
|
"learning_rate": 4.503285659064554e-06, |
|
"loss": 0.2683, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 14.829373650107991, |
|
"grad_norm": 22.894262313842773, |
|
"learning_rate": 4.464630846540395e-06, |
|
"loss": 0.3559, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 14.915766738660906, |
|
"grad_norm": 52.25741195678711, |
|
"learning_rate": 4.425976034016236e-06, |
|
"loss": 0.3136, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 29.12067222595215, |
|
"learning_rate": 4.387321221492076e-06, |
|
"loss": 0.5044, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.638742983341217, |
|
"eval_macro_f1": 0.8443575406474457, |
|
"eval_runtime": 17.2989, |
|
"eval_samples_per_second": 53.53, |
|
"eval_steps_per_second": 6.706, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 15.086393088552915, |
|
"grad_norm": 47.597984313964844, |
|
"learning_rate": 4.348666408967917e-06, |
|
"loss": 0.4503, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 15.17278617710583, |
|
"grad_norm": 13.964532852172852, |
|
"learning_rate": 4.3100115964437575e-06, |
|
"loss": 0.2143, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 15.259179265658748, |
|
"grad_norm": 82.6571273803711, |
|
"learning_rate": 4.271356783919598e-06, |
|
"loss": 0.5836, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 15.345572354211663, |
|
"grad_norm": 38.710899353027344, |
|
"learning_rate": 4.23270197139544e-06, |
|
"loss": 0.2878, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 15.431965442764579, |
|
"grad_norm": 60.6817626953125, |
|
"learning_rate": 4.19404715887128e-06, |
|
"loss": 0.4409, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 15.518358531317494, |
|
"grad_norm": 43.522804260253906, |
|
"learning_rate": 4.155392346347121e-06, |
|
"loss": 0.7443, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.60475161987041, |
|
"grad_norm": 39.97816848754883, |
|
"learning_rate": 4.1167375338229615e-06, |
|
"loss": 0.3902, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 15.691144708423327, |
|
"grad_norm": 4.281501293182373, |
|
"learning_rate": 4.078082721298802e-06, |
|
"loss": 0.2109, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 15.777537796976242, |
|
"grad_norm": 53.80859375, |
|
"learning_rate": 4.039427908774643e-06, |
|
"loss": 0.427, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.863930885529157, |
|
"grad_norm": 43.7281494140625, |
|
"learning_rate": 4.0007730962504834e-06, |
|
"loss": 0.4247, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 15.950323974082073, |
|
"grad_norm": 62.55876541137695, |
|
"learning_rate": 3.962118283726324e-06, |
|
"loss": 0.5326, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.5283326506614685, |
|
"eval_macro_f1": 0.8416865323027283, |
|
"eval_runtime": 17.0638, |
|
"eval_samples_per_second": 54.267, |
|
"eval_steps_per_second": 6.798, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 16.034557235421165, |
|
"grad_norm": 24.753618240356445, |
|
"learning_rate": 3.9234634712021655e-06, |
|
"loss": 0.2712, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 16.120950323974082, |
|
"grad_norm": 81.78247833251953, |
|
"learning_rate": 3.884808658678006e-06, |
|
"loss": 0.372, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 16.207343412527, |
|
"grad_norm": 43.68366241455078, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.3139, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 16.293736501079913, |
|
"grad_norm": 83.01486206054688, |
|
"learning_rate": 3.8074990336296874e-06, |
|
"loss": 0.3837, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 16.38012958963283, |
|
"grad_norm": 14.622414588928223, |
|
"learning_rate": 3.768844221105528e-06, |
|
"loss": 0.2072, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.466522678185743, |
|
"grad_norm": 53.248390197753906, |
|
"learning_rate": 3.730189408581369e-06, |
|
"loss": 0.3522, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 16.55291576673866, |
|
"grad_norm": 53.421539306640625, |
|
"learning_rate": 3.6915345960572097e-06, |
|
"loss": 0.2473, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 16.639308855291578, |
|
"grad_norm": 53.37113571166992, |
|
"learning_rate": 3.6528797835330504e-06, |
|
"loss": 0.6206, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 16.72570194384449, |
|
"grad_norm": 152.26011657714844, |
|
"learning_rate": 3.614224971008891e-06, |
|
"loss": 0.4216, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 16.81209503239741, |
|
"grad_norm": 25.104888916015625, |
|
"learning_rate": 3.575570158484732e-06, |
|
"loss": 0.333, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 16.898488120950326, |
|
"grad_norm": 50.0928955078125, |
|
"learning_rate": 3.5369153459605727e-06, |
|
"loss": 0.3633, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 16.98488120950324, |
|
"grad_norm": 4.997694969177246, |
|
"learning_rate": 3.4982605334364133e-06, |
|
"loss": 0.3026, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.4961493909358978, |
|
"eval_macro_f1": 0.8517054282094797, |
|
"eval_runtime": 16.6125, |
|
"eval_samples_per_second": 55.741, |
|
"eval_steps_per_second": 6.983, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 17.069114470842333, |
|
"grad_norm": 19.699106216430664, |
|
"learning_rate": 3.459605720912254e-06, |
|
"loss": 0.1953, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 17.155507559395247, |
|
"grad_norm": 39.93479919433594, |
|
"learning_rate": 3.420950908388095e-06, |
|
"loss": 0.3818, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 17.241900647948164, |
|
"grad_norm": 31.303096771240234, |
|
"learning_rate": 3.3822960958639356e-06, |
|
"loss": 0.281, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.32829373650108, |
|
"grad_norm": 15.34146785736084, |
|
"learning_rate": 3.3436412833397762e-06, |
|
"loss": 0.4427, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 17.414686825053995, |
|
"grad_norm": 69.86293029785156, |
|
"learning_rate": 3.304986470815617e-06, |
|
"loss": 0.2839, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 17.50107991360691, |
|
"grad_norm": 6.422046661376953, |
|
"learning_rate": 3.266331658291458e-06, |
|
"loss": 0.3117, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 17.58747300215983, |
|
"grad_norm": 88.27076721191406, |
|
"learning_rate": 3.2276768457672986e-06, |
|
"loss": 0.2722, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 17.673866090712743, |
|
"grad_norm": 26.126628875732422, |
|
"learning_rate": 3.189022033243139e-06, |
|
"loss": 0.375, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 17.76025917926566, |
|
"grad_norm": 22.49889373779297, |
|
"learning_rate": 3.1503672207189802e-06, |
|
"loss": 0.4536, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 17.846652267818573, |
|
"grad_norm": 137.3453826904297, |
|
"learning_rate": 3.11171240819482e-06, |
|
"loss": 0.591, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 17.93304535637149, |
|
"grad_norm": 18.906539916992188, |
|
"learning_rate": 3.073057595670661e-06, |
|
"loss": 0.4155, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.546431303024292, |
|
"eval_macro_f1": 0.8590125396622391, |
|
"eval_runtime": 17.1502, |
|
"eval_samples_per_second": 53.993, |
|
"eval_steps_per_second": 6.764, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 18.017278617710584, |
|
"grad_norm": 20.98369789123535, |
|
"learning_rate": 3.0344027831465017e-06, |
|
"loss": 0.3026, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 18.103671706263498, |
|
"grad_norm": 27.89241600036621, |
|
"learning_rate": 2.9957479706223423e-06, |
|
"loss": 0.2171, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.190064794816415, |
|
"grad_norm": 61.913612365722656, |
|
"learning_rate": 2.9570931580981834e-06, |
|
"loss": 0.4255, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 18.276457883369332, |
|
"grad_norm": 917.5242309570312, |
|
"learning_rate": 2.918438345574024e-06, |
|
"loss": 0.1092, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 18.362850971922246, |
|
"grad_norm": 18.6544189453125, |
|
"learning_rate": 2.8797835330498646e-06, |
|
"loss": 0.3306, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 18.449244060475163, |
|
"grad_norm": 13.983207702636719, |
|
"learning_rate": 2.8411287205257053e-06, |
|
"loss": 0.3184, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 18.535637149028076, |
|
"grad_norm": 19.48896026611328, |
|
"learning_rate": 2.8024739080015463e-06, |
|
"loss": 0.1137, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 18.622030237580994, |
|
"grad_norm": 56.50817108154297, |
|
"learning_rate": 2.763819095477387e-06, |
|
"loss": 0.3258, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 18.70842332613391, |
|
"grad_norm": 54.56320571899414, |
|
"learning_rate": 2.7251642829532276e-06, |
|
"loss": 0.2, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 18.794816414686824, |
|
"grad_norm": 129.62660217285156, |
|
"learning_rate": 2.6865094704290682e-06, |
|
"loss": 0.3135, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 18.88120950323974, |
|
"grad_norm": 107.59940338134766, |
|
"learning_rate": 2.6478546579049093e-06, |
|
"loss": 0.3521, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 18.967602591792655, |
|
"grad_norm": 148.64234924316406, |
|
"learning_rate": 2.60919984538075e-06, |
|
"loss": 0.2763, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.5828408598899841, |
|
"eval_macro_f1": 0.8685469279334771, |
|
"eval_runtime": 17.8662, |
|
"eval_samples_per_second": 51.83, |
|
"eval_steps_per_second": 6.493, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 19.05183585313175, |
|
"grad_norm": 104.04680633544922, |
|
"learning_rate": 2.5705450328565905e-06, |
|
"loss": 0.4048, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 19.138228941684666, |
|
"grad_norm": 123.32837677001953, |
|
"learning_rate": 2.5318902203324316e-06, |
|
"loss": 0.5209, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 19.22462203023758, |
|
"grad_norm": 46.73143005371094, |
|
"learning_rate": 2.493235407808272e-06, |
|
"loss": 0.5217, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 19.311015118790497, |
|
"grad_norm": 8.207721710205078, |
|
"learning_rate": 2.4545805952841133e-06, |
|
"loss": 0.3255, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 19.397408207343414, |
|
"grad_norm": 63.93031311035156, |
|
"learning_rate": 2.415925782759954e-06, |
|
"loss": 0.2312, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 19.483801295896328, |
|
"grad_norm": 43.651065826416016, |
|
"learning_rate": 2.3772709702357945e-06, |
|
"loss": 0.2947, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 19.570194384449245, |
|
"grad_norm": 5.546581745147705, |
|
"learning_rate": 2.338616157711635e-06, |
|
"loss": 0.2463, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 19.65658747300216, |
|
"grad_norm": 63.26512145996094, |
|
"learning_rate": 2.299961345187476e-06, |
|
"loss": 0.4906, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 19.742980561555076, |
|
"grad_norm": 27.615131378173828, |
|
"learning_rate": 2.261306532663317e-06, |
|
"loss": 0.3544, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 19.829373650107993, |
|
"grad_norm": 42.514869689941406, |
|
"learning_rate": 2.2226517201391575e-06, |
|
"loss": 0.2251, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.915766738660906, |
|
"grad_norm": 47.34213638305664, |
|
"learning_rate": 2.1839969076149985e-06, |
|
"loss": 0.222, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 103.04148864746094, |
|
"learning_rate": 2.145342095090839e-06, |
|
"loss": 0.1733, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.6083136200904846, |
|
"eval_macro_f1": 0.8617720910645434, |
|
"eval_runtime": 16.82, |
|
"eval_samples_per_second": 55.054, |
|
"eval_steps_per_second": 6.897, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 20.086393088552917, |
|
"grad_norm": 11.250152587890625, |
|
"learning_rate": 2.1066872825666798e-06, |
|
"loss": 0.2448, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 20.17278617710583, |
|
"grad_norm": 128.7437744140625, |
|
"learning_rate": 2.0680324700425204e-06, |
|
"loss": 0.2663, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 20.259179265658748, |
|
"grad_norm": 47.49317932128906, |
|
"learning_rate": 2.0293776575183615e-06, |
|
"loss": 0.2796, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 20.34557235421166, |
|
"grad_norm": 49.71406555175781, |
|
"learning_rate": 1.990722844994202e-06, |
|
"loss": 0.3162, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 20.43196544276458, |
|
"grad_norm": 72.845458984375, |
|
"learning_rate": 1.9520680324700427e-06, |
|
"loss": 0.3388, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 20.518358531317496, |
|
"grad_norm": 45.771018981933594, |
|
"learning_rate": 1.9134132199458833e-06, |
|
"loss": 0.3733, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 20.60475161987041, |
|
"grad_norm": 15.408432960510254, |
|
"learning_rate": 1.8747584074217242e-06, |
|
"loss": 0.3589, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 20.691144708423327, |
|
"grad_norm": 53.17451095581055, |
|
"learning_rate": 1.836103594897565e-06, |
|
"loss": 0.2963, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.77753779697624, |
|
"grad_norm": 118.89311218261719, |
|
"learning_rate": 1.7974487823734057e-06, |
|
"loss": 0.2782, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 20.863930885529157, |
|
"grad_norm": 42.53224182128906, |
|
"learning_rate": 1.7587939698492465e-06, |
|
"loss": 0.2777, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 20.950323974082075, |
|
"grad_norm": 453.1598205566406, |
|
"learning_rate": 1.7201391573250873e-06, |
|
"loss": 0.4015, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.6018757820129395, |
|
"eval_macro_f1": 0.8631354430151588, |
|
"eval_runtime": 17.8334, |
|
"eval_samples_per_second": 51.925, |
|
"eval_steps_per_second": 6.505, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 21.034557235421165, |
|
"grad_norm": 1.9662721157073975, |
|
"learning_rate": 1.681484344800928e-06, |
|
"loss": 0.1209, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 21.120950323974082, |
|
"grad_norm": 42.46015167236328, |
|
"learning_rate": 1.6428295322767688e-06, |
|
"loss": 0.278, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 21.207343412527, |
|
"grad_norm": 9.549817085266113, |
|
"learning_rate": 1.6041747197526094e-06, |
|
"loss": 0.1866, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 21.293736501079913, |
|
"grad_norm": 3.735248565673828, |
|
"learning_rate": 1.5655199072284503e-06, |
|
"loss": 0.1963, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 21.38012958963283, |
|
"grad_norm": 16.802431106567383, |
|
"learning_rate": 1.5268650947042907e-06, |
|
"loss": 0.2157, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 21.466522678185743, |
|
"grad_norm": 19.582170486450195, |
|
"learning_rate": 1.4882102821801313e-06, |
|
"loss": 0.3382, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 21.55291576673866, |
|
"grad_norm": 0.9627342820167542, |
|
"learning_rate": 1.4495554696559722e-06, |
|
"loss": 0.307, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.639308855291578, |
|
"grad_norm": 41.96005630493164, |
|
"learning_rate": 1.410900657131813e-06, |
|
"loss": 0.2993, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 21.72570194384449, |
|
"grad_norm": 81.29263305664062, |
|
"learning_rate": 1.3722458446076536e-06, |
|
"loss": 0.3624, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 21.81209503239741, |
|
"grad_norm": 1.3688504695892334, |
|
"learning_rate": 1.3335910320834945e-06, |
|
"loss": 0.1005, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 21.898488120950326, |
|
"grad_norm": 51.99349594116211, |
|
"learning_rate": 1.2949362195593351e-06, |
|
"loss": 0.3422, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 21.98488120950324, |
|
"grad_norm": 10.257471084594727, |
|
"learning_rate": 1.256281407035176e-06, |
|
"loss": 0.2649, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.6120893955230713, |
|
"eval_macro_f1": 0.8630768559502398, |
|
"eval_runtime": 17.5342, |
|
"eval_samples_per_second": 52.811, |
|
"eval_steps_per_second": 6.616, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 22.069114470842333, |
|
"grad_norm": 31.507051467895508, |
|
"learning_rate": 1.2176265945110168e-06, |
|
"loss": 0.268, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 22.155507559395247, |
|
"grad_norm": 91.0459976196289, |
|
"learning_rate": 1.1789717819868574e-06, |
|
"loss": 0.3532, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 22.241900647948164, |
|
"grad_norm": 47.774391174316406, |
|
"learning_rate": 1.1403169694626983e-06, |
|
"loss": 0.2976, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 22.32829373650108, |
|
"grad_norm": 49.88670349121094, |
|
"learning_rate": 1.1016621569385389e-06, |
|
"loss": 0.065, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 22.414686825053995, |
|
"grad_norm": 187.57847595214844, |
|
"learning_rate": 1.0630073444143797e-06, |
|
"loss": 0.3762, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.50107991360691, |
|
"grad_norm": 281.0491638183594, |
|
"learning_rate": 1.0243525318902204e-06, |
|
"loss": 0.2553, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 22.58747300215983, |
|
"grad_norm": 8.582008361816406, |
|
"learning_rate": 9.856977193660612e-07, |
|
"loss": 0.2701, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 22.673866090712743, |
|
"grad_norm": 40.54225540161133, |
|
"learning_rate": 9.470429068419019e-07, |
|
"loss": 0.2672, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 22.76025917926566, |
|
"grad_norm": 45.52231216430664, |
|
"learning_rate": 9.083880943177427e-07, |
|
"loss": 0.3036, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 22.846652267818573, |
|
"grad_norm": 108.5732421875, |
|
"learning_rate": 8.697332817935834e-07, |
|
"loss": 0.3182, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 22.93304535637149, |
|
"grad_norm": 29.52652931213379, |
|
"learning_rate": 8.310784692694241e-07, |
|
"loss": 0.1196, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.6888664960861206, |
|
"eval_macro_f1": 0.8538403632743256, |
|
"eval_runtime": 17.2141, |
|
"eval_samples_per_second": 53.793, |
|
"eval_steps_per_second": 6.739, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 23.017278617710584, |
|
"grad_norm": 68.86863708496094, |
|
"learning_rate": 7.924236567452649e-07, |
|
"loss": 0.2634, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 23.103671706263498, |
|
"grad_norm": 58.899391174316406, |
|
"learning_rate": 7.537688442211055e-07, |
|
"loss": 0.3309, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 23.190064794816415, |
|
"grad_norm": 71.05393981933594, |
|
"learning_rate": 7.151140316969462e-07, |
|
"loss": 0.2153, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 23.276457883369332, |
|
"grad_norm": 2.041644334793091, |
|
"learning_rate": 6.76459219172787e-07, |
|
"loss": 0.3616, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.362850971922246, |
|
"grad_norm": 82.25381469726562, |
|
"learning_rate": 6.378044066486277e-07, |
|
"loss": 0.2538, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 23.449244060475163, |
|
"grad_norm": 8.824419021606445, |
|
"learning_rate": 5.991495941244686e-07, |
|
"loss": 0.0927, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 23.535637149028076, |
|
"grad_norm": 17.117977142333984, |
|
"learning_rate": 5.604947816003093e-07, |
|
"loss": 0.1538, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 23.622030237580994, |
|
"grad_norm": 1.6735382080078125, |
|
"learning_rate": 5.2183996907615e-07, |
|
"loss": 0.1548, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 23.70842332613391, |
|
"grad_norm": 6.845789432525635, |
|
"learning_rate": 4.831851565519908e-07, |
|
"loss": 0.1832, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 23.794816414686824, |
|
"grad_norm": 19.85407066345215, |
|
"learning_rate": 4.4453034402783155e-07, |
|
"loss": 0.2808, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 23.88120950323974, |
|
"grad_norm": 5.298573017120361, |
|
"learning_rate": 4.058755315036723e-07, |
|
"loss": 0.2098, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 23.967602591792655, |
|
"grad_norm": 0.8520543575286865, |
|
"learning_rate": 3.6722071897951296e-07, |
|
"loss": 0.0521, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.6725981831550598, |
|
"eval_macro_f1": 0.8610719903206292, |
|
"eval_runtime": 16.9666, |
|
"eval_samples_per_second": 54.578, |
|
"eval_steps_per_second": 6.837, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 24.05183585313175, |
|
"grad_norm": 1.0969666242599487, |
|
"learning_rate": 3.285659064553537e-07, |
|
"loss": 0.1449, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 24.138228941684666, |
|
"grad_norm": 4.448155403137207, |
|
"learning_rate": 2.899110939311945e-07, |
|
"loss": 0.1756, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.22462203023758, |
|
"grad_norm": 2.900632381439209, |
|
"learning_rate": 2.512562814070352e-07, |
|
"loss": 0.1385, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 24.311015118790497, |
|
"grad_norm": 24.39512062072754, |
|
"learning_rate": 2.1260146888287596e-07, |
|
"loss": 0.2804, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 24.397408207343414, |
|
"grad_norm": 0.170551598072052, |
|
"learning_rate": 1.7394665635871667e-07, |
|
"loss": 0.3555, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 24.483801295896328, |
|
"grad_norm": 0.42594221234321594, |
|
"learning_rate": 1.3529184383455743e-07, |
|
"loss": 0.1433, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 24.570194384449245, |
|
"grad_norm": 26.60655975341797, |
|
"learning_rate": 9.663703131039815e-08, |
|
"loss": 0.3081, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 24.65658747300216, |
|
"grad_norm": 113.71473693847656, |
|
"learning_rate": 5.79822187862389e-08, |
|
"loss": 0.4043, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 24.742980561555076, |
|
"grad_norm": 34.953372955322266, |
|
"learning_rate": 1.9327406262079632e-08, |
|
"loss": 0.1929, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 24.786177105831534, |
|
"eval_loss": 0.6091281175613403, |
|
"eval_macro_f1": 0.8691893001382458, |
|
"eval_runtime": 16.5742, |
|
"eval_samples_per_second": 55.87, |
|
"eval_steps_per_second": 6.999, |
|
"step": 2875 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.2955292249139184e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|