|
{ |
|
"best_metric": 12.990397453308105, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-500", |
|
"epoch": 0.24551927326295114, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004910385465259023, |
|
"eval_loss": 8.885814666748047, |
|
"eval_runtime": 238.0058, |
|
"eval_samples_per_second": 14.411, |
|
"eval_steps_per_second": 3.605, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.024551927326295114, |
|
"grad_norm": 42008307564544.0, |
|
"learning_rate": 0.000106, |
|
"loss": 8.5743, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04910385465259023, |
|
"grad_norm": 557210992640.0, |
|
"learning_rate": 0.000212, |
|
"loss": 7.9517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07365578197888534, |
|
"grad_norm": 6631273791488.0, |
|
"learning_rate": 0.0002039312304461964, |
|
"loss": 10.2486, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09820770930518045, |
|
"grad_norm": 4286375424.0, |
|
"learning_rate": 0.00018095331880577404, |
|
"loss": 15.7423, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12275963663147557, |
|
"grad_norm": 14081196032000.0, |
|
"learning_rate": 0.00014656444383069953, |
|
"loss": 13.472, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14731156395777067, |
|
"grad_norm": 2926234566656.0, |
|
"learning_rate": 0.000106, |
|
"loss": 13.7329, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1718634912840658, |
|
"grad_norm": 20068786176.0, |
|
"learning_rate": 6.543555616930049e-05, |
|
"loss": 13.4434, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1964154186103609, |
|
"grad_norm": 5454654464.0, |
|
"learning_rate": 3.104668119422597e-05, |
|
"loss": 12.9573, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22096734593665604, |
|
"grad_norm": 4763621888.0, |
|
"learning_rate": 8.068769553803605e-06, |
|
"loss": 13.2059, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24551927326295114, |
|
"grad_norm": 1581244481536.0, |
|
"learning_rate": 0.0, |
|
"loss": 13.1652, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24551927326295114, |
|
"eval_loss": 12.990397453308105, |
|
"eval_runtime": 238.0138, |
|
"eval_samples_per_second": 14.411, |
|
"eval_steps_per_second": 3.605, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.77695431032832e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|