|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.015873015873016, |
|
"eval_steps": 126, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0015873015873015873, |
|
"grad_norm": 1.802138090133667, |
|
"learning_rate": 1e-05, |
|
"loss": 2.7056, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.0015873015873016, |
|
"grad_norm": 1.3845086097717285, |
|
"learning_rate": 0.00063, |
|
"loss": 1.2986, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.003174603174603, |
|
"grad_norm": 0.29827240109443665, |
|
"learning_rate": 0.0009509433962264152, |
|
"loss": 0.369, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.003174603174603, |
|
"eval_loss": 0.2990573048591614, |
|
"eval_runtime": 76.7071, |
|
"eval_samples_per_second": 6.479, |
|
"eval_steps_per_second": 0.821, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.0047619047619047, |
|
"grad_norm": 0.28911444544792175, |
|
"learning_rate": 0.0008320754716981132, |
|
"loss": 0.2589, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.006349206349206, |
|
"grad_norm": 0.2769618332386017, |
|
"learning_rate": 0.0007132075471698113, |
|
"loss": 0.2183, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.006349206349206, |
|
"eval_loss": 0.24794502556324005, |
|
"eval_runtime": 75.6398, |
|
"eval_samples_per_second": 6.571, |
|
"eval_steps_per_second": 0.833, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.007936507936508, |
|
"grad_norm": 0.2538459897041321, |
|
"learning_rate": 0.0005943396226415095, |
|
"loss": 0.1887, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 6.0095238095238095, |
|
"grad_norm": 0.28301894664764404, |
|
"learning_rate": 0.0004754716981132076, |
|
"loss": 0.1622, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 6.0095238095238095, |
|
"eval_loss": 0.253131628036499, |
|
"eval_runtime": 75.5929, |
|
"eval_samples_per_second": 6.575, |
|
"eval_steps_per_second": 0.833, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 7.011111111111111, |
|
"grad_norm": 0.29330751299858093, |
|
"learning_rate": 0.00035660377358490565, |
|
"loss": 0.138, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 8.012698412698413, |
|
"grad_norm": 0.2697054147720337, |
|
"learning_rate": 0.0002377358490566038, |
|
"loss": 0.1124, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 8.012698412698413, |
|
"eval_loss": 0.2732747197151184, |
|
"eval_runtime": 75.8397, |
|
"eval_samples_per_second": 6.553, |
|
"eval_steps_per_second": 0.831, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 9.014285714285714, |
|
"grad_norm": 0.21484734117984772, |
|
"learning_rate": 0.0001188679245283019, |
|
"loss": 0.0883, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 10.015873015873016, |
|
"grad_norm": 0.20847243070602417, |
|
"learning_rate": 0.0, |
|
"loss": 0.0692, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.015873015873016, |
|
"eval_loss": 0.2962268590927124, |
|
"eval_runtime": 75.5859, |
|
"eval_samples_per_second": 6.575, |
|
"eval_steps_per_second": 0.833, |
|
"step": 630 |
|
} |
|
], |
|
"logging_steps": 63, |
|
"max_steps": 630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 126, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1425470562304e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|