|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 9590, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.22007166537097106, |
|
"eval_loss": 4.9203200340271, |
|
"eval_runtime": 6.672, |
|
"eval_samples_per_second": 32.824, |
|
"eval_steps_per_second": 1.049, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.0427528675703859, |
|
"grad_norm": 14641.076171875, |
|
"learning_rate": 0.0005993999999999999, |
|
"loss": 1.251, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.26578738081834347, |
|
"eval_loss": 4.47660493850708, |
|
"eval_runtime": 5.7576, |
|
"eval_samples_per_second": 38.037, |
|
"eval_steps_per_second": 1.216, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 2.0855057351407718, |
|
"grad_norm": 10230.9931640625, |
|
"learning_rate": 0.0005302211874272409, |
|
"loss": 1.0398, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.27962898426400024, |
|
"eval_loss": 4.3035736083984375, |
|
"eval_runtime": 5.7523, |
|
"eval_samples_per_second": 38.071, |
|
"eval_steps_per_second": 1.217, |
|
"step": 2877 |
|
}, |
|
{ |
|
"epoch": 3.1282586027111576, |
|
"grad_norm": 13416.8994140625, |
|
"learning_rate": 0.00046037252619324795, |
|
"loss": 0.978, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2879125003350937, |
|
"eval_loss": 4.201538562774658, |
|
"eval_runtime": 5.7517, |
|
"eval_samples_per_second": 38.076, |
|
"eval_steps_per_second": 1.217, |
|
"step": 3836 |
|
}, |
|
{ |
|
"epoch": 4.1710114702815435, |
|
"grad_norm": 12218.20703125, |
|
"learning_rate": 0.00039052386495925487, |
|
"loss": 0.9464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2940960959350901, |
|
"eval_loss": 4.131160259246826, |
|
"eval_runtime": 5.7355, |
|
"eval_samples_per_second": 38.183, |
|
"eval_steps_per_second": 1.22, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 5.213764337851929, |
|
"grad_norm": 11805.5654296875, |
|
"learning_rate": 0.0003206752037252619, |
|
"loss": 0.9242, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3038450884200556, |
|
"eval_loss": 4.05396842956543, |
|
"eval_runtime": 5.7181, |
|
"eval_samples_per_second": 38.3, |
|
"eval_steps_per_second": 1.224, |
|
"step": 5754 |
|
}, |
|
{ |
|
"epoch": 6.256517205422315, |
|
"grad_norm": 13648.1845703125, |
|
"learning_rate": 0.0002508265424912689, |
|
"loss": 0.9027, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.30757133027727884, |
|
"eval_loss": 3.9987871646881104, |
|
"eval_runtime": 5.7301, |
|
"eval_samples_per_second": 38.219, |
|
"eval_steps_per_second": 1.222, |
|
"step": 6713 |
|
}, |
|
{ |
|
"epoch": 7.299270072992701, |
|
"grad_norm": 12588.6240234375, |
|
"learning_rate": 0.0001809778812572759, |
|
"loss": 0.8825, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.31707905530386293, |
|
"eval_loss": 3.932004690170288, |
|
"eval_runtime": 5.7156, |
|
"eval_samples_per_second": 38.316, |
|
"eval_steps_per_second": 1.225, |
|
"step": 7672 |
|
}, |
|
{ |
|
"epoch": 8.342022940563087, |
|
"grad_norm": 10292.294921875, |
|
"learning_rate": 0.00011112922002328288, |
|
"loss": 0.8632, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.32071593884316724, |
|
"eval_loss": 3.891136884689331, |
|
"eval_runtime": 5.8599, |
|
"eval_samples_per_second": 37.372, |
|
"eval_steps_per_second": 1.195, |
|
"step": 8631 |
|
}, |
|
{ |
|
"epoch": 9.384775808133472, |
|
"grad_norm": 10414.537109375, |
|
"learning_rate": 4.128055878928987e-05, |
|
"loss": 0.8454, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.32392390245646013, |
|
"eval_loss": 3.869386911392212, |
|
"eval_runtime": 5.7187, |
|
"eval_samples_per_second": 38.295, |
|
"eval_steps_per_second": 1.224, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 9590, |
|
"total_flos": 8.010952409088e+16, |
|
"train_loss": 0.9517836235611232, |
|
"train_runtime": 10856.3074, |
|
"train_samples_per_second": 28.241, |
|
"train_steps_per_second": 0.883 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 9590, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.010952409088e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|