|
{ |
|
"best_metric": 0.8181082820112143, |
|
"best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196", |
|
"epoch": 25.0, |
|
"eval_steps": 500, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.5993395707209687, |
|
"eval_loss": 0.6179381608963013, |
|
"eval_runtime": 7.1172, |
|
"eval_samples_per_second": 12.927, |
|
"eval_steps_per_second": 3.232, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 3.5607712268829346, |
|
"learning_rate": 4.782608695652174e-05, |
|
"loss": 0.7932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.5993395707209687, |
|
"eval_loss": 0.6113200187683105, |
|
"eval_runtime": 7.1729, |
|
"eval_samples_per_second": 12.826, |
|
"eval_steps_per_second": 3.206, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 1.3359662294387817, |
|
"learning_rate": 4.565217391304348e-05, |
|
"loss": 0.6589, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.6304912478825522, |
|
"eval_loss": 0.6631842851638794, |
|
"eval_runtime": 7.1467, |
|
"eval_samples_per_second": 12.873, |
|
"eval_steps_per_second": 3.218, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.260869565217391, |
|
"grad_norm": 3.4564907550811768, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.6418, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.5993395707209687, |
|
"eval_loss": 0.6268433928489685, |
|
"eval_runtime": 7.1588, |
|
"eval_samples_per_second": 12.851, |
|
"eval_steps_per_second": 3.213, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 2.798123836517334, |
|
"learning_rate": 4.130434782608696e-05, |
|
"loss": 0.6315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.25077639751552794, |
|
"eval_loss": 0.9374740719795227, |
|
"eval_runtime": 7.2024, |
|
"eval_samples_per_second": 12.773, |
|
"eval_steps_per_second": 3.193, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.434782608695652, |
|
"grad_norm": 3.6863114833831787, |
|
"learning_rate": 3.91304347826087e-05, |
|
"loss": 0.6522, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.588628762541806, |
|
"eval_loss": 0.582027792930603, |
|
"eval_runtime": 7.2495, |
|
"eval_samples_per_second": 12.691, |
|
"eval_steps_per_second": 3.173, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 6.521739130434782, |
|
"grad_norm": 5.186334133148193, |
|
"learning_rate": 3.695652173913043e-05, |
|
"loss": 0.6183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.6857289002557545, |
|
"eval_loss": 0.5537915825843811, |
|
"eval_runtime": 7.1555, |
|
"eval_samples_per_second": 12.857, |
|
"eval_steps_per_second": 3.214, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 7.608695652173913, |
|
"grad_norm": 11.184236526489258, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 0.6136, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.6735976418570376, |
|
"eval_loss": 0.5223021507263184, |
|
"eval_runtime": 7.2029, |
|
"eval_samples_per_second": 12.773, |
|
"eval_steps_per_second": 3.193, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 7.914676666259766, |
|
"learning_rate": 3.260869565217392e-05, |
|
"loss": 0.496, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.7776872198363687, |
|
"eval_loss": 0.7307997345924377, |
|
"eval_runtime": 7.1943, |
|
"eval_samples_per_second": 12.788, |
|
"eval_steps_per_second": 3.197, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 9.782608695652174, |
|
"grad_norm": 1.3677582740783691, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"loss": 0.4858, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.7831521739130435, |
|
"eval_loss": 0.7452064156532288, |
|
"eval_runtime": 7.2905, |
|
"eval_samples_per_second": 12.619, |
|
"eval_steps_per_second": 3.155, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.869565217391305, |
|
"grad_norm": 12.851914405822754, |
|
"learning_rate": 2.826086956521739e-05, |
|
"loss": 0.4181, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.7798831927319922, |
|
"eval_loss": 0.7523320913314819, |
|
"eval_runtime": 7.2341, |
|
"eval_samples_per_second": 12.718, |
|
"eval_steps_per_second": 3.179, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 11.956521739130435, |
|
"grad_norm": 12.312176704406738, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 0.3395, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.741650913673598, |
|
"eval_loss": 1.184059500694275, |
|
"eval_runtime": 7.2136, |
|
"eval_samples_per_second": 12.754, |
|
"eval_steps_per_second": 3.188, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.8181082820112143, |
|
"eval_loss": 0.7770065069198608, |
|
"eval_runtime": 7.1902, |
|
"eval_samples_per_second": 12.795, |
|
"eval_steps_per_second": 3.199, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 0.024596206843852997, |
|
"learning_rate": 2.391304347826087e-05, |
|
"loss": 0.2375, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.77725724787834, |
|
"eval_loss": 1.149732232093811, |
|
"eval_runtime": 7.2423, |
|
"eval_samples_per_second": 12.703, |
|
"eval_steps_per_second": 3.176, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 14.130434782608695, |
|
"grad_norm": 0.04395654425024986, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 0.1728, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.518917202949524, |
|
"eval_runtime": 7.2313, |
|
"eval_samples_per_second": 12.722, |
|
"eval_steps_per_second": 3.181, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.217391304347826, |
|
"grad_norm": 0.007132470607757568, |
|
"learning_rate": 1.956521739130435e-05, |
|
"loss": 0.0324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.7419546636937941, |
|
"eval_loss": 1.6500256061553955, |
|
"eval_runtime": 7.2475, |
|
"eval_samples_per_second": 12.694, |
|
"eval_steps_per_second": 3.174, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 16.304347826086957, |
|
"grad_norm": 0.004443590063601732, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.0764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.5010850429534912, |
|
"eval_runtime": 7.2861, |
|
"eval_samples_per_second": 12.627, |
|
"eval_steps_per_second": 3.157, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 8.947516441345215, |
|
"learning_rate": 1.5217391304347828e-05, |
|
"loss": 0.0244, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.75442242114237, |
|
"eval_loss": 1.5790338516235352, |
|
"eval_runtime": 7.2916, |
|
"eval_samples_per_second": 12.617, |
|
"eval_steps_per_second": 3.154, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 18.47826086956522, |
|
"grad_norm": 0.016934270039200783, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"loss": 0.0002, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.7539112050739958, |
|
"eval_loss": 1.929887056350708, |
|
"eval_runtime": 7.2258, |
|
"eval_samples_per_second": 12.732, |
|
"eval_steps_per_second": 3.183, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 19.565217391304348, |
|
"grad_norm": 0.02545306272804737, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 0.0004, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.7885226011276245, |
|
"eval_runtime": 7.2042, |
|
"eval_samples_per_second": 12.77, |
|
"eval_steps_per_second": 3.193, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.652173913043477, |
|
"grad_norm": 0.0008984901360236108, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.0001, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.82301926612854, |
|
"eval_runtime": 7.2146, |
|
"eval_samples_per_second": 12.752, |
|
"eval_steps_per_second": 3.188, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 0.0009025917970575392, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 0.0001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.8578201532363892, |
|
"eval_runtime": 7.1765, |
|
"eval_samples_per_second": 12.82, |
|
"eval_steps_per_second": 3.205, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 22.82608695652174, |
|
"grad_norm": 0.001884501543827355, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.87649405002594, |
|
"eval_runtime": 7.1744, |
|
"eval_samples_per_second": 12.823, |
|
"eval_steps_per_second": 3.206, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 23.91304347826087, |
|
"grad_norm": 0.003843324724584818, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 0.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.8845328092575073, |
|
"eval_runtime": 7.1501, |
|
"eval_samples_per_second": 12.867, |
|
"eval_steps_per_second": 3.217, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.0016703982837498188, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.7634584417193113, |
|
"eval_loss": 1.8875935077667236, |
|
"eval_runtime": 7.3972, |
|
"eval_samples_per_second": 12.437, |
|
"eval_steps_per_second": 3.109, |
|
"step": 2300 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.57899997983e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|