wangyichen25's picture
Training in progress, step 100, checkpoint
f534bc3 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9230769230769231,
"eval_steps": 10,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.19230769230769232,
"grad_norm": 21.498493194580078,
"learning_rate": 0.00019470198675496689,
"loss": 14.4551,
"mean_token_accuracy": 0.6612499989569187,
"num_tokens": 49120.0,
"step": 10
},
{
"epoch": 0.19230769230769232,
"eval_loss": 1.0248602628707886,
"eval_mean_token_accuracy": 0.8969230560156015,
"eval_num_tokens": 49120.0,
"eval_runtime": 17.9265,
"eval_samples_per_second": 2.789,
"eval_steps_per_second": 0.725,
"step": 10
},
{
"epoch": 0.38461538461538464,
"grad_norm": 6.777862071990967,
"learning_rate": 0.00018145695364238411,
"loss": 2.185,
"mean_token_accuracy": 0.9445000052452087,
"num_tokens": 98240.0,
"step": 20
},
{
"epoch": 0.38461538461538464,
"eval_loss": 0.13720029592514038,
"eval_mean_token_accuracy": 0.9761538688953106,
"eval_num_tokens": 98240.0,
"eval_runtime": 17.8731,
"eval_samples_per_second": 2.797,
"eval_steps_per_second": 0.727,
"step": 20
},
{
"epoch": 0.5769230769230769,
"grad_norm": 4.832788467407227,
"learning_rate": 0.00016821192052980132,
"loss": 0.2858,
"mean_token_accuracy": 0.9777500212192536,
"num_tokens": 147360.0,
"step": 30
},
{
"epoch": 0.5769230769230769,
"eval_loss": 0.03728929162025452,
"eval_mean_token_accuracy": 0.9765384839131281,
"eval_num_tokens": 147360.0,
"eval_runtime": 18.2308,
"eval_samples_per_second": 2.743,
"eval_steps_per_second": 0.713,
"step": 30
},
{
"epoch": 0.7692307692307693,
"grad_norm": 6.547402858734131,
"learning_rate": 0.00015496688741721855,
"loss": 0.1824,
"mean_token_accuracy": 0.9781250208616257,
"num_tokens": 196480.0,
"step": 40
},
{
"epoch": 0.7692307692307693,
"eval_loss": 0.05031127855181694,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 196480.0,
"eval_runtime": 17.8889,
"eval_samples_per_second": 2.795,
"eval_steps_per_second": 0.727,
"step": 40
},
{
"epoch": 0.9615384615384616,
"grad_norm": 4.728590965270996,
"learning_rate": 0.00014172185430463575,
"loss": 0.1721,
"mean_token_accuracy": 0.9783750206232071,
"num_tokens": 245600.0,
"step": 50
},
{
"epoch": 0.9615384615384616,
"eval_loss": 0.04732300713658333,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 245600.0,
"eval_runtime": 18.0023,
"eval_samples_per_second": 2.777,
"eval_steps_per_second": 0.722,
"step": 50
},
{
"epoch": 1.1538461538461537,
"grad_norm": 1.990704894065857,
"learning_rate": 0.00012847682119205298,
"loss": 0.1396,
"mean_token_accuracy": 0.9791250199079513,
"num_tokens": 294413.0,
"step": 60
},
{
"epoch": 1.1538461538461537,
"eval_loss": 0.040356263518333435,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 294413.0,
"eval_runtime": 17.8824,
"eval_samples_per_second": 2.796,
"eval_steps_per_second": 0.727,
"step": 60
},
{
"epoch": 1.3461538461538463,
"grad_norm": 0.9954031109809875,
"learning_rate": 0.0001152317880794702,
"loss": 0.1391,
"mean_token_accuracy": 0.9780000209808349,
"num_tokens": 343533.0,
"step": 70
},
{
"epoch": 1.3461538461538463,
"eval_loss": 0.03649154305458069,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 343533.0,
"eval_runtime": 18.0872,
"eval_samples_per_second": 2.764,
"eval_steps_per_second": 0.719,
"step": 70
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.5908142328262329,
"learning_rate": 0.00010198675496688744,
"loss": 0.1376,
"mean_token_accuracy": 0.9785000190138817,
"num_tokens": 392653.0,
"step": 80
},
{
"epoch": 1.5384615384615383,
"eval_loss": 0.03940116986632347,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 392653.0,
"eval_runtime": 17.8649,
"eval_samples_per_second": 2.799,
"eval_steps_per_second": 0.728,
"step": 80
},
{
"epoch": 1.7307692307692308,
"grad_norm": 1.166898250579834,
"learning_rate": 8.874172185430464e-05,
"loss": 0.1379,
"mean_token_accuracy": 0.9782500207424164,
"num_tokens": 441773.0,
"step": 90
},
{
"epoch": 1.7307692307692308,
"eval_loss": 0.03707318380475044,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 441773.0,
"eval_runtime": 18.0365,
"eval_samples_per_second": 2.772,
"eval_steps_per_second": 0.721,
"step": 90
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.38323092460632324,
"learning_rate": 7.549668874172185e-05,
"loss": 0.1415,
"mean_token_accuracy": 0.9765000149607659,
"num_tokens": 490893.0,
"step": 100
},
{
"epoch": 1.9230769230769231,
"eval_loss": 0.036779891699552536,
"eval_mean_token_accuracy": 0.9769230989309458,
"eval_num_tokens": 490893.0,
"eval_runtime": 17.8928,
"eval_samples_per_second": 2.794,
"eval_steps_per_second": 0.727,
"step": 100
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2764765310444576e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}