|
{ |
|
"best_metric": 0.031979888677597046, |
|
"best_model_checkpoint": "./results/answerdotai/ModernBERT-base/trial-2/checkpoint-6022", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 6022, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016605778811026237, |
|
"grad_norm": 21.788597106933594, |
|
"learning_rate": 5.429575351871404e-06, |
|
"loss": 0.5789, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.033211557622052475, |
|
"grad_norm": 20.038349151611328, |
|
"learning_rate": 5.416664391316233e-06, |
|
"loss": 0.37, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04981733643307871, |
|
"grad_norm": 23.927526473999023, |
|
"learning_rate": 5.403753430761063e-06, |
|
"loss": 0.25, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06642311524410495, |
|
"grad_norm": 4.1712799072265625, |
|
"learning_rate": 5.390842470205893e-06, |
|
"loss": 0.1921, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08302889405513118, |
|
"grad_norm": 6.138601303100586, |
|
"learning_rate": 5.3779315096507225e-06, |
|
"loss": 0.1365, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09963467286615742, |
|
"grad_norm": 0.9431160092353821, |
|
"learning_rate": 5.3650205490955514e-06, |
|
"loss": 0.1473, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11624045167718366, |
|
"grad_norm": 25.303245544433594, |
|
"learning_rate": 5.352109588540381e-06, |
|
"loss": 0.0875, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1328462304882099, |
|
"grad_norm": 14.83379077911377, |
|
"learning_rate": 5.33919862798521e-06, |
|
"loss": 0.111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14945200929923613, |
|
"grad_norm": 0.2346535325050354, |
|
"learning_rate": 5.32628766743004e-06, |
|
"loss": 0.0722, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16605778811026237, |
|
"grad_norm": 19.045169830322266, |
|
"learning_rate": 5.31337670687487e-06, |
|
"loss": 0.1236, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1826635669212886, |
|
"grad_norm": 10.871609687805176, |
|
"learning_rate": 5.300465746319699e-06, |
|
"loss": 0.1018, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.19926934573231483, |
|
"grad_norm": 8.278830528259277, |
|
"learning_rate": 5.287554785764528e-06, |
|
"loss": 0.0608, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2158751245433411, |
|
"grad_norm": 3.4486818313598633, |
|
"learning_rate": 5.274643825209358e-06, |
|
"loss": 0.0684, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.23248090335436733, |
|
"grad_norm": 9.789453506469727, |
|
"learning_rate": 5.261732864654187e-06, |
|
"loss": 0.0826, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24908668216539356, |
|
"grad_norm": 0.013454285450279713, |
|
"learning_rate": 5.248821904099017e-06, |
|
"loss": 0.0672, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2656924609764198, |
|
"grad_norm": 0.8878294825553894, |
|
"learning_rate": 5.2359109435438465e-06, |
|
"loss": 0.0472, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.282298239787446, |
|
"grad_norm": 15.41006088256836, |
|
"learning_rate": 5.222999982988676e-06, |
|
"loss": 0.0616, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.29890401859847227, |
|
"grad_norm": 0.04324938729405403, |
|
"learning_rate": 5.210089022433506e-06, |
|
"loss": 0.0215, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3155097974094985, |
|
"grad_norm": 0.011849366128444672, |
|
"learning_rate": 5.197178061878335e-06, |
|
"loss": 0.0398, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.33211557622052473, |
|
"grad_norm": 0.0020897299982607365, |
|
"learning_rate": 5.184267101323165e-06, |
|
"loss": 0.0294, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.348721355031551, |
|
"grad_norm": 0.00038467388367280364, |
|
"learning_rate": 5.171356140767994e-06, |
|
"loss": 0.0328, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3653271338425772, |
|
"grad_norm": 0.0022064056247472763, |
|
"learning_rate": 5.158445180212823e-06, |
|
"loss": 0.0216, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.38193291265360346, |
|
"grad_norm": 0.012603014707565308, |
|
"learning_rate": 5.145534219657653e-06, |
|
"loss": 0.0293, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.39853869146462967, |
|
"grad_norm": 0.002970542525872588, |
|
"learning_rate": 5.132623259102483e-06, |
|
"loss": 0.0133, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.41514447027565593, |
|
"grad_norm": 0.09289965778589249, |
|
"learning_rate": 5.119712298547312e-06, |
|
"loss": 0.0189, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4317502490866822, |
|
"grad_norm": 0.030116688460111618, |
|
"learning_rate": 5.106801337992142e-06, |
|
"loss": 0.0266, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4483560278977084, |
|
"grad_norm": 23.291847229003906, |
|
"learning_rate": 5.0938903774369705e-06, |
|
"loss": 0.0378, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.46496180670873466, |
|
"grad_norm": 0.00580954784527421, |
|
"learning_rate": 5.0809794168818e-06, |
|
"loss": 0.0002, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.48156758551976087, |
|
"grad_norm": 0.0036250711418688297, |
|
"learning_rate": 5.06806845632663e-06, |
|
"loss": 0.0297, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.4981733643307871, |
|
"grad_norm": 0.0013630707981064916, |
|
"learning_rate": 5.05515749577146e-06, |
|
"loss": 0.0114, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5147791431418134, |
|
"grad_norm": 0.025447094812989235, |
|
"learning_rate": 5.042246535216289e-06, |
|
"loss": 0.0019, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5313849219528396, |
|
"grad_norm": 18.81841468811035, |
|
"learning_rate": 5.0293355746611185e-06, |
|
"loss": 0.0286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5479907007638658, |
|
"grad_norm": 0.0033424277789890766, |
|
"learning_rate": 5.016424614105948e-06, |
|
"loss": 0.0393, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.564596479574892, |
|
"grad_norm": 0.039123374968767166, |
|
"learning_rate": 5.003513653550777e-06, |
|
"loss": 0.0186, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5812022583859183, |
|
"grad_norm": 0.0005275913863442838, |
|
"learning_rate": 4.990602692995607e-06, |
|
"loss": 0.0003, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5978080371969445, |
|
"grad_norm": 0.005070064682513475, |
|
"learning_rate": 4.977691732440437e-06, |
|
"loss": 0.01, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6144138160079707, |
|
"grad_norm": 0.003932475112378597, |
|
"learning_rate": 4.9647807718852664e-06, |
|
"loss": 0.0222, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.631019594818997, |
|
"grad_norm": 0.6544032692909241, |
|
"learning_rate": 4.951869811330095e-06, |
|
"loss": 0.0138, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6476253736300233, |
|
"grad_norm": 0.008768323808908463, |
|
"learning_rate": 4.938958850774925e-06, |
|
"loss": 0.0056, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6642311524410495, |
|
"grad_norm": 0.0021180976182222366, |
|
"learning_rate": 4.926047890219754e-06, |
|
"loss": 0.0049, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6808369312520757, |
|
"grad_norm": 0.002039346843957901, |
|
"learning_rate": 4.913136929664584e-06, |
|
"loss": 0.0142, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.697442710063102, |
|
"grad_norm": 0.012900142930448055, |
|
"learning_rate": 4.9002259691094136e-06, |
|
"loss": 0.0105, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7140484888741282, |
|
"grad_norm": 0.0022153747268021107, |
|
"learning_rate": 4.887315008554243e-06, |
|
"loss": 0.0142, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7306542676851544, |
|
"grad_norm": 0.001426122267730534, |
|
"learning_rate": 4.874404047999072e-06, |
|
"loss": 0.0068, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7472600464961807, |
|
"grad_norm": 0.0008603449095971882, |
|
"learning_rate": 4.861493087443902e-06, |
|
"loss": 0.0119, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7638658253072069, |
|
"grad_norm": 0.0006780526018701494, |
|
"learning_rate": 4.848582126888731e-06, |
|
"loss": 0.0108, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7804716041182331, |
|
"grad_norm": 0.014527379535138607, |
|
"learning_rate": 4.835671166333561e-06, |
|
"loss": 0.0002, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7970773829292593, |
|
"grad_norm": 0.00022624376288149506, |
|
"learning_rate": 4.8227602057783904e-06, |
|
"loss": 0.0092, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8136831617402857, |
|
"grad_norm": 0.0044932495802640915, |
|
"learning_rate": 4.80984924522322e-06, |
|
"loss": 0.0001, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8302889405513119, |
|
"grad_norm": 0.0009355309884995222, |
|
"learning_rate": 4.79693828466805e-06, |
|
"loss": 0.0002, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8468947193623381, |
|
"grad_norm": 0.12550997734069824, |
|
"learning_rate": 4.784027324112879e-06, |
|
"loss": 0.0024, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8635004981733644, |
|
"grad_norm": 0.02399071305990219, |
|
"learning_rate": 4.771116363557709e-06, |
|
"loss": 0.0099, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8801062769843906, |
|
"grad_norm": 0.008470265194773674, |
|
"learning_rate": 4.7582054030025375e-06, |
|
"loss": 0.0157, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8967120557954168, |
|
"grad_norm": 3.967735028709285e-05, |
|
"learning_rate": 4.745294442447367e-06, |
|
"loss": 0.0013, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.913317834606443, |
|
"grad_norm": 0.0005532742943614721, |
|
"learning_rate": 4.732383481892197e-06, |
|
"loss": 0.0025, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.9299236134174693, |
|
"grad_norm": 9.227233022102155e-06, |
|
"learning_rate": 4.719472521337027e-06, |
|
"loss": 0.0028, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.9465293922284955, |
|
"grad_norm": 0.280258446931839, |
|
"learning_rate": 4.706561560781856e-06, |
|
"loss": 0.0004, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.9631351710395217, |
|
"grad_norm": 27.427757263183594, |
|
"learning_rate": 4.6936506002266855e-06, |
|
"loss": 0.0127, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9797409498505479, |
|
"grad_norm": 176.85423278808594, |
|
"learning_rate": 4.680739639671514e-06, |
|
"loss": 0.0298, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9963467286615743, |
|
"grad_norm": 0.00011263355554547161, |
|
"learning_rate": 4.667828679116344e-06, |
|
"loss": 0.001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9963024809160306, |
|
"eval_f1": 0.9962431632227496, |
|
"eval_loss": 0.04071500524878502, |
|
"eval_precision": 0.9962693439313673, |
|
"eval_recall": 0.9963024809160306, |
|
"eval_runtime": 38.0003, |
|
"eval_samples_per_second": 220.63, |
|
"eval_steps_per_second": 13.789, |
|
"step": 3011 |
|
}, |
|
{ |
|
"epoch": 1.0129525074726005, |
|
"grad_norm": 0.05092976614832878, |
|
"learning_rate": 4.654917718561174e-06, |
|
"loss": 0.018, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.0295582862836268, |
|
"grad_norm": 3.4633874747669324e-05, |
|
"learning_rate": 4.642006758006004e-06, |
|
"loss": 0.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.0461640650946529, |
|
"grad_norm": 8.058391540544108e-05, |
|
"learning_rate": 4.629095797450833e-06, |
|
"loss": 0.0, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.0627698439056792, |
|
"grad_norm": 0.00043129033292643726, |
|
"learning_rate": 4.616184836895662e-06, |
|
"loss": 0.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.0793756227167055, |
|
"grad_norm": 0.012417804449796677, |
|
"learning_rate": 4.603273876340492e-06, |
|
"loss": 0.0204, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.0959814015277316, |
|
"grad_norm": 0.07707448303699493, |
|
"learning_rate": 4.590362915785321e-06, |
|
"loss": 0.0089, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.112587180338758, |
|
"grad_norm": 0.0019856118597090244, |
|
"learning_rate": 4.577451955230151e-06, |
|
"loss": 0.0003, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1291929591497842, |
|
"grad_norm": 0.0003844090970233083, |
|
"learning_rate": 4.564540994674981e-06, |
|
"loss": 0.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.1457987379608103, |
|
"grad_norm": 0.004796341527253389, |
|
"learning_rate": 4.55163003411981e-06, |
|
"loss": 0.0054, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.1624045167718366, |
|
"grad_norm": 0.0021394495852291584, |
|
"learning_rate": 4.538719073564639e-06, |
|
"loss": 0.0001, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.1790102955828627, |
|
"grad_norm": 0.00016287445032503456, |
|
"learning_rate": 4.525808113009469e-06, |
|
"loss": 0.0017, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.195616074393889, |
|
"grad_norm": 0.005753168836236, |
|
"learning_rate": 4.512897152454298e-06, |
|
"loss": 0.0132, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.2122218532049154, |
|
"grad_norm": 0.00012519631127361208, |
|
"learning_rate": 4.499986191899128e-06, |
|
"loss": 0.0, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.2288276320159415, |
|
"grad_norm": 0.0009526669164188206, |
|
"learning_rate": 4.487075231343957e-06, |
|
"loss": 0.0083, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.2454334108269678, |
|
"grad_norm": 6.90124070388265e-05, |
|
"learning_rate": 4.474164270788787e-06, |
|
"loss": 0.0114, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.2620391896379939, |
|
"grad_norm": 0.0029422417283058167, |
|
"learning_rate": 4.461253310233616e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.2786449684490202, |
|
"grad_norm": 1.6564589738845825, |
|
"learning_rate": 4.448342349678446e-06, |
|
"loss": 0.0065, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.2952507472600465, |
|
"grad_norm": 4.6906425268389285e-05, |
|
"learning_rate": 4.435431389123275e-06, |
|
"loss": 0.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.3118565260710726, |
|
"grad_norm": 1.4456440112553537e-05, |
|
"learning_rate": 4.4225204285681046e-06, |
|
"loss": 0.0, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.328462304882099, |
|
"grad_norm": 4.6707005822099745e-05, |
|
"learning_rate": 4.409609468012934e-06, |
|
"loss": 0.0227, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.3450680836931252, |
|
"grad_norm": 4.7155015636235476e-05, |
|
"learning_rate": 4.396698507457763e-06, |
|
"loss": 0.0002, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.3616738625041513, |
|
"grad_norm": 0.01696430891752243, |
|
"learning_rate": 4.383787546902593e-06, |
|
"loss": 0.0188, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.3782796413151777, |
|
"grad_norm": 0.0008329456904903054, |
|
"learning_rate": 4.370876586347423e-06, |
|
"loss": 0.0178, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.394885420126204, |
|
"grad_norm": 9.179511835100129e-05, |
|
"learning_rate": 4.3579656257922525e-06, |
|
"loss": 0.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.41149119893723, |
|
"grad_norm": 2.924172622442711e-05, |
|
"learning_rate": 4.3450546652370814e-06, |
|
"loss": 0.0013, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.4280969777482564, |
|
"grad_norm": 0.015076125971972942, |
|
"learning_rate": 4.332143704681911e-06, |
|
"loss": 0.0104, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.4447027565592827, |
|
"grad_norm": 5.385762415244244e-05, |
|
"learning_rate": 4.31923274412674e-06, |
|
"loss": 0.014, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.4613085353703088, |
|
"grad_norm": 0.0007110639126040041, |
|
"learning_rate": 4.30632178357157e-06, |
|
"loss": 0.0126, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.4779143141813351, |
|
"grad_norm": 0.00014339391782414168, |
|
"learning_rate": 4.2934108230164e-06, |
|
"loss": 0.0003, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.4945200929923614, |
|
"grad_norm": 0.0006024091853760183, |
|
"learning_rate": 4.280499862461229e-06, |
|
"loss": 0.0118, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5111258718033875, |
|
"grad_norm": 0.0002353072923142463, |
|
"learning_rate": 4.267588901906058e-06, |
|
"loss": 0.0086, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.5277316506144138, |
|
"grad_norm": 0.0008946498855948448, |
|
"learning_rate": 4.254677941350888e-06, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.5443374294254402, |
|
"grad_norm": 7.315174298128113e-05, |
|
"learning_rate": 4.241766980795717e-06, |
|
"loss": 0.0003, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.5609432082364663, |
|
"grad_norm": 9.232313459506258e-05, |
|
"learning_rate": 4.228856020240547e-06, |
|
"loss": 0.0001, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.5775489870474926, |
|
"grad_norm": 1.4020029084349517e-05, |
|
"learning_rate": 4.2159450596853765e-06, |
|
"loss": 0.0, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.594154765858519, |
|
"grad_norm": 4.0607475966680795e-05, |
|
"learning_rate": 4.203034099130206e-06, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.610760544669545, |
|
"grad_norm": 4.69290571345482e-05, |
|
"learning_rate": 4.190123138575036e-06, |
|
"loss": 0.0177, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.627366323480571, |
|
"grad_norm": 0.14096687734127045, |
|
"learning_rate": 4.177212178019865e-06, |
|
"loss": 0.0115, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.6439721022915976, |
|
"grad_norm": 0.00020342542848084122, |
|
"learning_rate": 4.164301217464695e-06, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.6605778811026237, |
|
"grad_norm": 0.0002786288969218731, |
|
"learning_rate": 4.151390256909524e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6771836599136498, |
|
"grad_norm": 2.8438846129574813e-05, |
|
"learning_rate": 4.138479296354353e-06, |
|
"loss": 0.0032, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.6937894387246761, |
|
"grad_norm": 5.944320037087891e-06, |
|
"learning_rate": 4.125568335799183e-06, |
|
"loss": 0.0001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.7103952175357025, |
|
"grad_norm": 0.005958211608231068, |
|
"learning_rate": 4.112657375244013e-06, |
|
"loss": 0.0, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.7270009963467285, |
|
"grad_norm": 0.002004456939175725, |
|
"learning_rate": 4.099746414688842e-06, |
|
"loss": 0.0106, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.7436067751577549, |
|
"grad_norm": 0.0008562383009120822, |
|
"learning_rate": 4.086835454133672e-06, |
|
"loss": 0.0081, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.7602125539687812, |
|
"grad_norm": 0.03570560738444328, |
|
"learning_rate": 4.0739244935785005e-06, |
|
"loss": 0.025, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.7768183327798073, |
|
"grad_norm": 0.001486024702899158, |
|
"learning_rate": 4.06101353302333e-06, |
|
"loss": 0.0145, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.7934241115908336, |
|
"grad_norm": 0.0015331929316744208, |
|
"learning_rate": 4.04810257246816e-06, |
|
"loss": 0.0001, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.81002989040186, |
|
"grad_norm": 0.004162834957242012, |
|
"learning_rate": 4.03519161191299e-06, |
|
"loss": 0.0005, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.826635669212886, |
|
"grad_norm": 0.0003064811462536454, |
|
"learning_rate": 4.022280651357819e-06, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8432414480239123, |
|
"grad_norm": 0.000830256671179086, |
|
"learning_rate": 4.0093696908026485e-06, |
|
"loss": 0.0034, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.8598472268349386, |
|
"grad_norm": 0.001540405093692243, |
|
"learning_rate": 3.996458730247478e-06, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.8764530056459647, |
|
"grad_norm": 0.011221639811992645, |
|
"learning_rate": 3.983547769692307e-06, |
|
"loss": 0.0116, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.893058784456991, |
|
"grad_norm": 0.0031693174969404936, |
|
"learning_rate": 3.970636809137137e-06, |
|
"loss": 0.0061, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.9096645632680174, |
|
"grad_norm": 7.828649540897459e-05, |
|
"learning_rate": 3.957725848581967e-06, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.9262703420790435, |
|
"grad_norm": 0.00892726145684719, |
|
"learning_rate": 3.9448148880267964e-06, |
|
"loss": 0.0003, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.9428761208900698, |
|
"grad_norm": 0.0033830904867500067, |
|
"learning_rate": 3.931903927471625e-06, |
|
"loss": 0.0007, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.959481899701096, |
|
"grad_norm": 0.017441514879465103, |
|
"learning_rate": 3.918992966916455e-06, |
|
"loss": 0.0109, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.9760876785121222, |
|
"grad_norm": 0.006790176033973694, |
|
"learning_rate": 3.906082006361284e-06, |
|
"loss": 0.0101, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.9926934573231485, |
|
"grad_norm": 0.0004248483164701611, |
|
"learning_rate": 3.893171045806114e-06, |
|
"loss": 0.0103, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9959446564885496, |
|
"eval_f1": 0.9958827988724177, |
|
"eval_loss": 0.031979888677597046, |
|
"eval_precision": 0.9958978797187497, |
|
"eval_recall": 0.9959446564885496, |
|
"eval_runtime": 37.4063, |
|
"eval_samples_per_second": 224.134, |
|
"eval_steps_per_second": 14.008, |
|
"step": 6022 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 21077, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.282861088518144e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|