| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6823027718550106, | |
| "eval_steps": 100, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.042643923240938165, | |
| "grad_norm": 6.837185310983226, | |
| "learning_rate": 9.8e-05, | |
| "loss": 3.4374, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 5.856628309511881, | |
| "learning_rate": 9.999608504824794e-05, | |
| "loss": 2.3516, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08528784648187633, | |
| "eval_loss": 2.009383201599121, | |
| "eval_runtime": 12.4493, | |
| "eval_samples_per_second": 77.193, | |
| "eval_steps_per_second": 2.49, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1279317697228145, | |
| "grad_norm": 2.5524801484518003, | |
| "learning_rate": 9.998401968872145e-05, | |
| "loss": 2.112, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 2.5064021178510165, | |
| "learning_rate": 9.996380447675372e-05, | |
| "loss": 1.966, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17057569296375266, | |
| "eval_loss": 1.7050557136535645, | |
| "eval_runtime": 12.4422, | |
| "eval_samples_per_second": 77.237, | |
| "eval_steps_per_second": 2.492, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21321961620469082, | |
| "grad_norm": 1.578271992628694, | |
| "learning_rate": 9.993544307477859e-05, | |
| "loss": 1.858, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 1.429198717254802, | |
| "learning_rate": 9.989894062109277e-05, | |
| "loss": 1.8137, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.255863539445629, | |
| "eval_loss": 1.606706976890564, | |
| "eval_runtime": 12.4342, | |
| "eval_samples_per_second": 77.287, | |
| "eval_steps_per_second": 2.493, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 1.2571840549107522, | |
| "learning_rate": 9.985430372892501e-05, | |
| "loss": 1.76, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 1.049587851608184, | |
| "learning_rate": 9.980154048523787e-05, | |
| "loss": 1.7241, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3411513859275053, | |
| "eval_loss": 1.537925124168396, | |
| "eval_runtime": 12.4508, | |
| "eval_samples_per_second": 77.184, | |
| "eval_steps_per_second": 2.49, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3837953091684435, | |
| "grad_norm": 1.9775958103062996, | |
| "learning_rate": 9.97406604492627e-05, | |
| "loss": 1.6688, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 3.376659544027156, | |
| "learning_rate": 9.96716746507677e-05, | |
| "loss": 1.6328, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.42643923240938164, | |
| "eval_loss": 1.5163637399673462, | |
| "eval_runtime": 12.4602, | |
| "eval_samples_per_second": 77.126, | |
| "eval_steps_per_second": 2.488, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4690831556503198, | |
| "grad_norm": 1.9970506581035867, | |
| "learning_rate": 9.959459558805966e-05, | |
| "loss": 1.5897, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 1.0149632306217509, | |
| "learning_rate": 9.950943722571958e-05, | |
| "loss": 1.5678, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.511727078891258, | |
| "eval_loss": 1.4665403366088867, | |
| "eval_runtime": 12.4707, | |
| "eval_samples_per_second": 77.06, | |
| "eval_steps_per_second": 2.486, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5543710021321961, | |
| "grad_norm": 1.4865013565611145, | |
| "learning_rate": 9.941621499207273e-05, | |
| "loss": 1.5246, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 1.2665911254417623, | |
| "learning_rate": 9.931494577639339e-05, | |
| "loss": 1.4724, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "eval_loss": 1.4630547761917114, | |
| "eval_runtime": 12.5024, | |
| "eval_samples_per_second": 76.865, | |
| "eval_steps_per_second": 2.48, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6396588486140725, | |
| "grad_norm": 1.3103343588206324, | |
| "learning_rate": 9.920564792584512e-05, | |
| "loss": 1.4582, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 1.3932403498894013, | |
| "learning_rate": 9.908834124215664e-05, | |
| "loss": 1.4046, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6823027718550106, | |
| "eval_loss": 1.4460753202438354, | |
| "eval_runtime": 12.4356, | |
| "eval_samples_per_second": 77.278, | |
| "eval_steps_per_second": 2.493, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 11720, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 800, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 700830323834880.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |