|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.272408527964712, |
|
"eval_steps": 500, |
|
"global_step": 22500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028205562136853386, |
|
"grad_norm": 0.20581459999084473, |
|
"learning_rate": 4.929486094657867e-05, |
|
"loss": 2.15, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05641112427370677, |
|
"grad_norm": 0.14948038756847382, |
|
"learning_rate": 4.858972189315733e-05, |
|
"loss": 0.0702, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08461668641056017, |
|
"grad_norm": 0.18362277746200562, |
|
"learning_rate": 4.7884582839736e-05, |
|
"loss": 0.0657, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11282224854741355, |
|
"grad_norm": 0.13676001131534576, |
|
"learning_rate": 4.7179443786314666e-05, |
|
"loss": 0.0647, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14102781068426692, |
|
"grad_norm": 0.1710374653339386, |
|
"learning_rate": 4.6474304732893334e-05, |
|
"loss": 0.0641, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16923337282112033, |
|
"grad_norm": 0.10585477948188782, |
|
"learning_rate": 4.5769165679471995e-05, |
|
"loss": 0.0645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1974389349579737, |
|
"grad_norm": 0.10310818254947662, |
|
"learning_rate": 4.506402662605066e-05, |
|
"loss": 0.063, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2256444970948271, |
|
"grad_norm": 0.12138450145721436, |
|
"learning_rate": 4.4358887572629323e-05, |
|
"loss": 0.063, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25385005923168047, |
|
"grad_norm": 0.11205188930034637, |
|
"learning_rate": 4.3653748519207984e-05, |
|
"loss": 0.0616, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.28205562136853385, |
|
"grad_norm": 0.11399635672569275, |
|
"learning_rate": 4.294860946578665e-05, |
|
"loss": 0.0624, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3102611835053873, |
|
"grad_norm": 0.11061274260282516, |
|
"learning_rate": 4.224347041236532e-05, |
|
"loss": 0.0631, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.33846674564224066, |
|
"grad_norm": 0.08492901921272278, |
|
"learning_rate": 4.153833135894399e-05, |
|
"loss": 0.0615, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.36667230777909404, |
|
"grad_norm": 0.09080592542886734, |
|
"learning_rate": 4.083319230552265e-05, |
|
"loss": 0.0616, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3948778699159474, |
|
"grad_norm": 0.09771362692117691, |
|
"learning_rate": 4.0128053252101316e-05, |
|
"loss": 0.0616, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.4230834320528008, |
|
"grad_norm": 0.09805440902709961, |
|
"learning_rate": 3.9422914198679984e-05, |
|
"loss": 0.0621, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4512889941896542, |
|
"grad_norm": 0.11397408694028854, |
|
"learning_rate": 3.8717775145258645e-05, |
|
"loss": 0.0605, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4794945563265076, |
|
"grad_norm": 0.10463790595531464, |
|
"learning_rate": 3.801263609183731e-05, |
|
"loss": 0.0618, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5077001184633609, |
|
"grad_norm": 0.09112311899662018, |
|
"learning_rate": 3.730749703841598e-05, |
|
"loss": 0.061, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5359056806002144, |
|
"grad_norm": 0.09545386582612991, |
|
"learning_rate": 3.660235798499465e-05, |
|
"loss": 0.0604, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5641112427370677, |
|
"grad_norm": 0.08686262369155884, |
|
"learning_rate": 3.589721893157331e-05, |
|
"loss": 0.0605, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5923168048739211, |
|
"grad_norm": 0.09380181133747101, |
|
"learning_rate": 3.5192079878151976e-05, |
|
"loss": 0.0612, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6205223670107746, |
|
"grad_norm": 0.10441367328166962, |
|
"learning_rate": 3.448694082473064e-05, |
|
"loss": 0.0611, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6487279291476279, |
|
"grad_norm": 0.11171282082796097, |
|
"learning_rate": 3.3781801771309305e-05, |
|
"loss": 0.0611, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.6769334912844813, |
|
"grad_norm": 0.08861543238162994, |
|
"learning_rate": 3.3076662717887966e-05, |
|
"loss": 0.0602, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7051390534213346, |
|
"grad_norm": 0.09776312112808228, |
|
"learning_rate": 3.237152366446663e-05, |
|
"loss": 0.0597, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.7333446155581881, |
|
"grad_norm": 0.10567035526037216, |
|
"learning_rate": 3.16663846110453e-05, |
|
"loss": 0.0607, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7615501776950414, |
|
"grad_norm": 0.08531802147626877, |
|
"learning_rate": 3.096124555762396e-05, |
|
"loss": 0.0602, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.7897557398318948, |
|
"grad_norm": 0.08937925100326538, |
|
"learning_rate": 3.025610650420263e-05, |
|
"loss": 0.0606, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8179613019687483, |
|
"grad_norm": 0.0823800340294838, |
|
"learning_rate": 2.9550967450781297e-05, |
|
"loss": 0.0602, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.8461668641056016, |
|
"grad_norm": 0.09639054536819458, |
|
"learning_rate": 2.8845828397359958e-05, |
|
"loss": 0.0603, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.874372426242455, |
|
"grad_norm": 0.08088058978319168, |
|
"learning_rate": 2.8140689343938626e-05, |
|
"loss": 0.0609, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.9025779883793084, |
|
"grad_norm": 0.06688382476568222, |
|
"learning_rate": 2.7435550290517293e-05, |
|
"loss": 0.06, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9307835505161618, |
|
"grad_norm": 0.07398771494626999, |
|
"learning_rate": 2.6730411237095958e-05, |
|
"loss": 0.0603, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.9613753322400045, |
|
"grad_norm": 0.08878236263990402, |
|
"learning_rate": 2.596561669399989e-05, |
|
"loss": 0.0579, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.989651077305887, |
|
"grad_norm": 0.0783604308962822, |
|
"learning_rate": 2.5258723067352823e-05, |
|
"loss": 0.0593, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.0179268223717695, |
|
"grad_norm": 0.10440826416015625, |
|
"learning_rate": 2.4551829440705764e-05, |
|
"loss": 0.0583, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.046202567437652, |
|
"grad_norm": 0.11020490527153015, |
|
"learning_rate": 2.3844935814058702e-05, |
|
"loss": 0.0596, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.0744783125035344, |
|
"grad_norm": 0.09160757064819336, |
|
"learning_rate": 2.313804218741164e-05, |
|
"loss": 0.0587, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.102754057569417, |
|
"grad_norm": 0.10222485661506653, |
|
"learning_rate": 2.2431148560764577e-05, |
|
"loss": 0.0592, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.1310298026352994, |
|
"grad_norm": 0.09362111240625381, |
|
"learning_rate": 2.1724254934117515e-05, |
|
"loss": 0.0594, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.159305547701182, |
|
"grad_norm": 0.0931045264005661, |
|
"learning_rate": 2.1017361307470452e-05, |
|
"loss": 0.0596, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.1875812927670644, |
|
"grad_norm": 0.0950915515422821, |
|
"learning_rate": 2.0310467680823393e-05, |
|
"loss": 0.0592, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.215857037832947, |
|
"grad_norm": 0.09537334740161896, |
|
"learning_rate": 1.960357405417633e-05, |
|
"loss": 0.0591, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.2441327828988293, |
|
"grad_norm": 0.08932538330554962, |
|
"learning_rate": 1.889668042752927e-05, |
|
"loss": 0.059, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.272408527964712, |
|
"grad_norm": 0.07329034805297852, |
|
"learning_rate": 1.8189786800882206e-05, |
|
"loss": 0.0592, |
|
"step": 22500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 35366, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|