|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9926617745163442, |
|
"eval_steps": 5, |
|
"global_step": 93, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0533689126084056, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.66, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1067378252168112, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6231, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1601067378252168, |
|
"grad_norm": 0.2781398892402649, |
|
"learning_rate": 0.001414213562373095, |
|
"loss": 1.6169, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2134756504336224, |
|
"grad_norm": 2.027355194091797, |
|
"learning_rate": 0.001, |
|
"loss": 1.5934, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.266844563042028, |
|
"grad_norm": 0.6781024932861328, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 1.4104, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3202134756504336, |
|
"grad_norm": 0.8801289796829224, |
|
"learning_rate": 0.0005345224838248488, |
|
"loss": 1.3078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3735823882588392, |
|
"grad_norm": 0.37911781668663025, |
|
"learning_rate": 0.0004588314677411235, |
|
"loss": 1.2235, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4269513008672448, |
|
"grad_norm": 0.3639412224292755, |
|
"learning_rate": 0.0004082482904638631, |
|
"loss": 1.2058, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48032021347565046, |
|
"grad_norm": 0.3324061930179596, |
|
"learning_rate": 0.0003713906763541037, |
|
"loss": 1.2022, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.533689126084056, |
|
"grad_norm": 0.3669029772281647, |
|
"learning_rate": 0.00034299717028501764, |
|
"loss": 1.1901, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5870580386924616, |
|
"grad_norm": 0.3316662013530731, |
|
"learning_rate": 0.00032025630761017425, |
|
"loss": 1.1541, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.6404269513008672, |
|
"grad_norm": 0.36560526490211487, |
|
"learning_rate": 0.00030151134457776364, |
|
"loss": 1.1021, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6937958639092728, |
|
"grad_norm": 0.36231526732444763, |
|
"learning_rate": 0.0002857142857142857, |
|
"loss": 1.1178, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.7471647765176784, |
|
"grad_norm": 0.3893248438835144, |
|
"learning_rate": 0.0002721655269759087, |
|
"loss": 1.1032, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.800533689126084, |
|
"grad_norm": 0.4164714813232422, |
|
"learning_rate": 0.0002603778219616478, |
|
"loss": 1.0781, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8539026017344896, |
|
"grad_norm": 0.3659443259239197, |
|
"learning_rate": 0.00025, |
|
"loss": 1.0644, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9072715143428952, |
|
"grad_norm": 0.3580448031425476, |
|
"learning_rate": 0.0002407717061715384, |
|
"loss": 1.0597, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.9606404269513009, |
|
"grad_norm": 0.4161628186702728, |
|
"learning_rate": 0.00023249527748763857, |
|
"loss": 1.0817, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9926617745163442, |
|
"step": 93, |
|
"total_flos": 9.445511318816686e+17, |
|
"train_loss": 1.2599219224786247, |
|
"train_runtime": 872.6845, |
|
"train_samples_per_second": 13.737, |
|
"train_steps_per_second": 0.107 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 93, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.445511318816686e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|