|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.08849557522124, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 70.23552703857422, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.0554, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 19.844127655029297, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7061, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3185840707964602, |
|
"grad_norm": 7.727048397064209, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.3636, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.7610619469026547, |
|
"grad_norm": 4.834627628326416, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1976, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.1946902654867255, |
|
"grad_norm": 4.151981830596924, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1237, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.6371681415929205, |
|
"grad_norm": 3.742462635040283, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1117, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.0707964601769913, |
|
"grad_norm": 4.222021102905273, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.0927, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.5132743362831858, |
|
"grad_norm": 3.9585046768188477, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.9557522123893807, |
|
"grad_norm": 2.4528427124023438, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0605, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.389380530973451, |
|
"grad_norm": 3.324159622192383, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0598, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.831858407079646, |
|
"grad_norm": 3.3953816890716553, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0543, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.265486725663717, |
|
"grad_norm": 4.006282329559326, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0372, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.707964601769912, |
|
"grad_norm": 0.8992710113525391, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0382, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 6.1415929203539825, |
|
"grad_norm": 1.6717145442962646, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0379, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.584070796460177, |
|
"grad_norm": 1.683933973312378, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0354, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 7.017699115044247, |
|
"grad_norm": 4.067246437072754, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0267, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.460176991150442, |
|
"grad_norm": 0.14960134029388428, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.0215, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.902654867256637, |
|
"grad_norm": 2.5055603981018066, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0294, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.336283185840708, |
|
"grad_norm": 2.1897315979003906, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0202, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 8.778761061946902, |
|
"grad_norm": 2.6399195194244385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0195, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.778761061946902, |
|
"eval_loss": 3.6442341804504395, |
|
"eval_runtime": 98.5424, |
|
"eval_samples_per_second": 1.005, |
|
"eval_steps_per_second": 0.071, |
|
"eval_wer": 1.2195824334053276, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.212389380530974, |
|
"grad_norm": 1.9207507371902466, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 0.0136, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 9.654867256637168, |
|
"grad_norm": 1.3496884107589722, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 0.0184, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.08849557522124, |
|
"grad_norm": 1.9232096672058105, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0194, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 10.530973451327434, |
|
"grad_norm": 2.705425977706909, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0139, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.973451327433628, |
|
"grad_norm": 0.18757027387619019, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.0075, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 11.4070796460177, |
|
"grad_norm": 2.2813608646392822, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0088, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.849557522123893, |
|
"grad_norm": 0.2083648443222046, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 0.0108, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 12.283185840707965, |
|
"grad_norm": 3.0644192695617676, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.0088, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.725663716814159, |
|
"grad_norm": 0.33310991525650024, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.007, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 13.15929203539823, |
|
"grad_norm": 0.6558440923690796, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0071, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.601769911504425, |
|
"grad_norm": 0.04615064710378647, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.0066, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 14.035398230088495, |
|
"grad_norm": 2.2992634773254395, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0051, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.47787610619469, |
|
"grad_norm": 1.2088443040847778, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.0035, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 14.920353982300885, |
|
"grad_norm": 1.7774810791015625, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.0059, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.353982300884956, |
|
"grad_norm": 0.07690909504890442, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0047, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 15.79646017699115, |
|
"grad_norm": 1.2860054969787598, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.0053, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.23008849557522, |
|
"grad_norm": 0.6857427358627319, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.0023, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 16.672566371681416, |
|
"grad_norm": 0.7444378733634949, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0043, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.106194690265486, |
|
"grad_norm": 0.0285570677369833, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.0033, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 17.548672566371682, |
|
"grad_norm": 1.755213737487793, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0044, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.548672566371682, |
|
"eval_loss": 4.271320343017578, |
|
"eval_runtime": 87.4509, |
|
"eval_samples_per_second": 1.132, |
|
"eval_steps_per_second": 0.08, |
|
"eval_wer": 1.2203023758099352, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.991150442477878, |
|
"grad_norm": 3.525615930557251, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.004, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 18.424778761061948, |
|
"grad_norm": 1.9330180883407593, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.0026, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.86725663716814, |
|
"grad_norm": 0.3188052475452423, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.0046, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 19.300884955752213, |
|
"grad_norm": 1.5139490365982056, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0042, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.743362831858406, |
|
"grad_norm": 5.1269354820251465, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.0032, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 20.17699115044248, |
|
"grad_norm": 0.024498550221323967, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.0029, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.61946902654867, |
|
"grad_norm": 0.017963914200663567, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0011, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 21.053097345132745, |
|
"grad_norm": 0.10410400480031967, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.0008, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.495575221238937, |
|
"grad_norm": 0.05556880682706833, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.0003, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 21.938053097345133, |
|
"grad_norm": 0.005813132505863905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0004, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.371681415929203, |
|
"grad_norm": 0.008638879284262657, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.0003, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 22.8141592920354, |
|
"grad_norm": 0.033254146575927734, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0003, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.24778761061947, |
|
"grad_norm": 0.018534550443291664, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0002, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 23.690265486725664, |
|
"grad_norm": 0.00659082131460309, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.0003, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 24.123893805309734, |
|
"grad_norm": 0.009523949585855007, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 24.56637168141593, |
|
"grad_norm": 0.028162825852632523, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0002, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.012596463784575462, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 25.442477876106196, |
|
"grad_norm": 0.006055805366486311, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.0002, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.884955752212388, |
|
"grad_norm": 0.01260603778064251, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.0002, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 26.31858407079646, |
|
"grad_norm": 0.02524421364068985, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.31858407079646, |
|
"eval_loss": 4.261180877685547, |
|
"eval_runtime": 92.5387, |
|
"eval_samples_per_second": 1.07, |
|
"eval_steps_per_second": 0.076, |
|
"eval_wer": 1.2203023758099352, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.761061946902654, |
|
"grad_norm": 0.04416137561202049, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 27.194690265486727, |
|
"grad_norm": 0.002192295156419277, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0002, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.63716814159292, |
|
"grad_norm": 0.005449674092233181, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.0002, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 28.07079646017699, |
|
"grad_norm": 0.02437576837837696, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.513274336283185, |
|
"grad_norm": 0.042830660939216614, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0002, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 28.95575221238938, |
|
"grad_norm": 0.0039032117929309607, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.0001, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.38938053097345, |
|
"grad_norm": 0.008059768006205559, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 29.831858407079647, |
|
"grad_norm": 0.012063885107636452, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0002, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.265486725663717, |
|
"grad_norm": 0.04550086334347725, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 30.707964601769913, |
|
"grad_norm": 0.0046273404732346535, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 31.141592920353983, |
|
"grad_norm": 0.00888855941593647, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0002, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 31.58407079646018, |
|
"grad_norm": 0.011799165047705173, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.017699115044245, |
|
"grad_norm": 0.033783987164497375, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.0001, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 32.46017699115044, |
|
"grad_norm": 0.002975311130285263, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0001, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 32.902654867256636, |
|
"grad_norm": 0.009084297344088554, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0002, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 33.336283185840706, |
|
"grad_norm": 0.011445912532508373, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0002, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.7787610619469, |
|
"grad_norm": 0.025164591148495674, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0001, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 34.21238938053097, |
|
"grad_norm": 0.00487129669636488, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.0001, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 34.65486725663717, |
|
"grad_norm": 0.014068867079913616, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.0002, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 35.08849557522124, |
|
"grad_norm": 0.009848229587078094, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.08849557522124, |
|
"eval_loss": 4.253291606903076, |
|
"eval_runtime": 97.0898, |
|
"eval_samples_per_second": 1.02, |
|
"eval_steps_per_second": 0.072, |
|
"eval_wer": 1.2203023758099352, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.08849557522124, |
|
"step": 2000, |
|
"total_flos": 1.83078577963008e+19, |
|
"train_loss": 0.0418594888363732, |
|
"train_runtime": 32879.5132, |
|
"train_samples_per_second": 1.947, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 36, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.83078577963008e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|