review_evaluation_actionability / trainer_state.json
boda's picture
Model save
1379dfc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.7058823529411766,
"eval_steps": 500,
"global_step": 24,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11764705882352941,
"grad_norm": 0.5514968283268096,
"learning_rate": 6.666666666666667e-05,
"loss": 2.103,
"mean_token_accuracy": 0.6410358250141144,
"step": 1
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.5564948741796076,
"learning_rate": 0.0001955572805786141,
"loss": 1.95,
"mean_token_accuracy": 0.6562966406345367,
"step": 5
},
{
"epoch": 1.0,
"eval_loss": 2.2298309803009033,
"eval_mean_token_accuracy": 0.6953375414013863,
"eval_runtime": 1.1412,
"eval_samples_per_second": 4.381,
"eval_steps_per_second": 0.876,
"step": 9
},
{
"epoch": 1.1176470588235294,
"grad_norm": 0.3550428109424067,
"learning_rate": 0.00015000000000000001,
"loss": 1.5318,
"mean_token_accuracy": 0.6940737068653107,
"step": 10
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.15172839418805506,
"learning_rate": 7.774790660436858e-05,
"loss": 1.4025,
"mean_token_accuracy": 0.7238408386707306,
"step": 15
},
{
"epoch": 2.0,
"eval_loss": 2.040649652481079,
"eval_mean_token_accuracy": 0.6979705393314362,
"eval_runtime": 1.278,
"eval_samples_per_second": 3.912,
"eval_steps_per_second": 0.782,
"step": 18
},
{
"epoch": 2.235294117647059,
"grad_norm": 0.16155482865148485,
"learning_rate": 1.7376122568400532e-05,
"loss": 1.4263,
"mean_token_accuracy": 0.7188252657651901,
"step": 20
},
{
"epoch": 2.7058823529411766,
"eval_loss": 2.0348782539367676,
"eval_mean_token_accuracy": 0.710096988413069,
"eval_runtime": 1.278,
"eval_samples_per_second": 3.912,
"eval_steps_per_second": 0.782,
"step": 24
},
{
"epoch": 2.7058823529411766,
"step": 24,
"total_flos": 7336072979677184.0,
"train_loss": 1.5557562708854675,
"train_runtime": 102.3381,
"train_samples_per_second": 1.935,
"train_steps_per_second": 0.235
}
],
"logging_steps": 5,
"max_steps": 24,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7336072979677184.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}