|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.3998, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 4.993398052663416e-05, |
|
"loss": 1.2457, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 0.75390625, |
|
"learning_rate": 4.953193036870676e-05, |
|
"loss": 1.0969, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 4.877104772313846e-05, |
|
"loss": 1.0383, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 4.766372503162375e-05, |
|
"loss": 0.9794, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 4.622799718031961e-05, |
|
"loss": 0.9258, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.6923076923076925, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 4.448724776693342e-05, |
|
"loss": 0.9013, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 4.246982825372522e-05, |
|
"loss": 0.8851, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.4615384615384617, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 4.020859620925235e-05, |
|
"loss": 0.8483, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 3.77403801594802e-05, |
|
"loss": 0.8315, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.230769230769231, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 3.510537976419131e-05, |
|
"loss": 0.8181, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 3.234651108797708e-05, |
|
"loss": 0.7815, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 2.9508707629336874e-05, |
|
"loss": 0.8063, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 5.384615384615385, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 2.6638188491974207e-05, |
|
"loss": 0.7603, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.769230769230769, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 2.378170561753209e-05, |
|
"loss": 0.7657, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 2.098578234003466e-05, |
|
"loss": 0.749, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.538461538461538, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.8295955663644855e-05, |
|
"loss": 0.7504, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.575603460470665e-05, |
|
"loss": 0.7376, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.3076923076923075, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 1.3407386677402312e-05, |
|
"loss": 0.7354, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 1.1288264143982296e-05, |
|
"loss": 0.7365, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.076923076923077, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.433181002882383e-06, |
|
"loss": 0.731, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 8.461538461538462, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 7.872350861678565e-06, |
|
"loss": 0.7102, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.846153846153847, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 6.631194850202872e-06, |
|
"loss": 0.7296, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 5.729927588404395e-06, |
|
"loss": 0.7268, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 9.615384615384615, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 5.1832279522675925e-06, |
|
"loss": 0.7262, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7346, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 130, |
|
"total_flos": 2.5375909456379904e+17, |
|
"train_loss": 0.0, |
|
"train_runtime": 2.0428, |
|
"train_samples_per_second": 964.355, |
|
"train_steps_per_second": 63.638 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5375909456379904e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|