|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 1.1256082074272271, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4398, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 1.088771511094631, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.4412, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 1.8376444921378117, |
|
"learning_rate": 2e-05, |
|
"loss": 0.47, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 0.529094126656205, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.2967, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 0.6045407978315229, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2834, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.0370791330907858, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2388, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 0.9943285807402519, |
|
"learning_rate": 3.995717846477207e-05, |
|
"loss": 0.2102, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.734159963431325, |
|
"learning_rate": 3.982889722747621e-05, |
|
"loss": 0.1507, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.6269810975525937, |
|
"learning_rate": 3.961570560806461e-05, |
|
"loss": 0.1478, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 0.5651528327820644, |
|
"learning_rate": 3.931851652578137e-05, |
|
"loss": 0.0943, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6111111111111112, |
|
"grad_norm": 0.4737837967269508, |
|
"learning_rate": 3.893860258990212e-05, |
|
"loss": 0.1186, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.22977095830410557, |
|
"learning_rate": 3.8477590650225735e-05, |
|
"loss": 0.1061, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.7222222222222222, |
|
"grad_norm": 0.4134357432752201, |
|
"learning_rate": 3.793745483065377e-05, |
|
"loss": 0.0879, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.3218674795732037, |
|
"learning_rate": 3.732050807568878e-05, |
|
"loss": 0.0936, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.1669034849615332, |
|
"learning_rate": 3.662939224605091e-05, |
|
"loss": 0.09, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.16802071996279108, |
|
"learning_rate": 3.586706680582471e-05, |
|
"loss": 0.0758, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.9444444444444444, |
|
"grad_norm": 0.1369439456224394, |
|
"learning_rate": 3.503679614957955e-05, |
|
"loss": 0.08, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.20649754108360613, |
|
"learning_rate": 3.4142135623730954e-05, |
|
"loss": 0.0895, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.0555555555555556, |
|
"grad_norm": 0.09880137297267558, |
|
"learning_rate": 3.318691630200138e-05, |
|
"loss": 0.0564, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.1260825954366184, |
|
"learning_rate": 3.217522858017442e-05, |
|
"loss": 0.0557, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.0911954618176465, |
|
"learning_rate": 3.111140466039205e-05, |
|
"loss": 0.0538, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.10435877731045032, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 0.0552, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.2777777777777777, |
|
"grad_norm": 0.09814153580724687, |
|
"learning_rate": 2.8845773804380028e-05, |
|
"loss": 0.0536, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.45047741750772974, |
|
"learning_rate": 2.7653668647301797e-05, |
|
"loss": 0.0511, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 0.30626691874091017, |
|
"learning_rate": 2.6428789306063233e-05, |
|
"loss": 0.0462, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.6599213438303501, |
|
"learning_rate": 2.5176380902050418e-05, |
|
"loss": 0.0513, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.2529588579336132, |
|
"learning_rate": 2.390180644032257e-05, |
|
"loss": 0.0513, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 2.00761533609209, |
|
"learning_rate": 2.261052384440104e-05, |
|
"loss": 0.0564, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.6111111111111112, |
|
"grad_norm": 0.34872709425136345, |
|
"learning_rate": 2.1308062584602865e-05, |
|
"loss": 0.0561, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.4904440153660744, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0492, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.7222222222222223, |
|
"grad_norm": 0.12493803487246685, |
|
"learning_rate": 1.869193741539714e-05, |
|
"loss": 0.0425, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.27834780304111784, |
|
"learning_rate": 1.7389476155598974e-05, |
|
"loss": 0.0504, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.12529358778962452, |
|
"learning_rate": 1.609819355967744e-05, |
|
"loss": 0.0416, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 0.17096633068642128, |
|
"learning_rate": 1.4823619097949584e-05, |
|
"loss": 0.0427, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 0.2813740448600146, |
|
"learning_rate": 1.3571210693936774e-05, |
|
"loss": 0.044, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.10020685560024836, |
|
"learning_rate": 1.2346331352698206e-05, |
|
"loss": 0.0399, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 54, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 126748779872256.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|