|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9959721713658001, |
|
"eval_steps": 500, |
|
"global_step": 170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02929329915781765, |
|
"grad_norm": 1.6078954935073853, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.9551, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0585865983156353, |
|
"grad_norm": 1.1429349184036255, |
|
"learning_rate": 4.9995716618706634e-05, |
|
"loss": 0.9029, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08787989747345294, |
|
"grad_norm": 0.7431623935699463, |
|
"learning_rate": 4.9845969445888354e-05, |
|
"loss": 0.7873, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.1171731966312706, |
|
"grad_norm": 0.5352614521980286, |
|
"learning_rate": 4.948368129547296e-05, |
|
"loss": 0.7782, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14646649578908824, |
|
"grad_norm": 0.599611759185791, |
|
"learning_rate": 4.891229802725401e-05, |
|
"loss": 0.7671, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17575979494690588, |
|
"grad_norm": 0.5661498308181763, |
|
"learning_rate": 4.8137254283872696e-05, |
|
"loss": 0.7371, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20505309410472355, |
|
"grad_norm": 0.4830688238143921, |
|
"learning_rate": 4.7165921799873716e-05, |
|
"loss": 0.7424, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2343463932625412, |
|
"grad_norm": 0.4806412160396576, |
|
"learning_rate": 4.6007539286346375e-05, |
|
"loss": 0.7218, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26363969242035884, |
|
"grad_norm": 0.43145492672920227, |
|
"learning_rate": 4.467312455804482e-05, |
|
"loss": 0.729, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2929329915781765, |
|
"grad_norm": 0.4414162337779999, |
|
"learning_rate": 4.317536973877955e-05, |
|
"loss": 0.7322, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3222262907359941, |
|
"grad_norm": 0.6285475492477417, |
|
"learning_rate": 4.1528520541821506e-05, |
|
"loss": 0.7152, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35151958989381177, |
|
"grad_norm": 0.5875662565231323, |
|
"learning_rate": 3.974824077352845e-05, |
|
"loss": 0.744, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38081288905162947, |
|
"grad_norm": 0.5579010844230652, |
|
"learning_rate": 3.785146334895093e-05, |
|
"loss": 0.715, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4101061882094471, |
|
"grad_norm": 0.5400099754333496, |
|
"learning_rate": 3.58562292364649e-05, |
|
"loss": 0.7322, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43939948736726475, |
|
"grad_norm": 0.43588295578956604, |
|
"learning_rate": 3.378151586328963e-05, |
|
"loss": 0.7098, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4686927865250824, |
|
"grad_norm": 0.42549237608909607, |
|
"learning_rate": 3.164705661399079e-05, |
|
"loss": 0.7227, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49798608568290004, |
|
"grad_norm": 0.4135156273841858, |
|
"learning_rate": 2.947315313878701e-05, |
|
"loss": 0.6879, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5272793848407177, |
|
"grad_norm": 0.4909493029117584, |
|
"learning_rate": 2.7280482256866697e-05, |
|
"loss": 0.6863, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5565726839985353, |
|
"grad_norm": 0.44555437564849854, |
|
"learning_rate": 2.508989929133051e-05, |
|
"loss": 0.6977, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.585865983156353, |
|
"grad_norm": 0.4100770056247711, |
|
"learning_rate": 2.2922239706315745e-05, |
|
"loss": 0.7202, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6151592823141706, |
|
"grad_norm": 0.44192320108413696, |
|
"learning_rate": 2.079812093300668e-05, |
|
"loss": 0.6974, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6444525814719883, |
|
"grad_norm": 0.4109411835670471, |
|
"learning_rate": 1.8737746269439006e-05, |
|
"loss": 0.6709, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6737458806298059, |
|
"grad_norm": 0.3698427975177765, |
|
"learning_rate": 1.6760712719281375e-05, |
|
"loss": 0.6868, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7030391797876235, |
|
"grad_norm": 0.3567708730697632, |
|
"learning_rate": 1.4885824597312362e-05, |
|
"loss": 0.6767, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7323324789454412, |
|
"grad_norm": 0.37050729990005493, |
|
"learning_rate": 1.313091467446158e-05, |
|
"loss": 0.6788, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7616257781032589, |
|
"grad_norm": 0.35710033774375916, |
|
"learning_rate": 1.1512674563572253e-05, |
|
"loss": 0.6797, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7909190772610766, |
|
"grad_norm": 0.328642874956131, |
|
"learning_rate": 1.0046495959150554e-05, |
|
"loss": 0.6554, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8202123764188942, |
|
"grad_norm": 0.3246113359928131, |
|
"learning_rate": 8.746324241130455e-06, |
|
"loss": 0.6895, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8495056755767119, |
|
"grad_norm": 0.3387470543384552, |
|
"learning_rate": 7.624525835084185e-06, |
|
"loss": 0.6864, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8787989747345295, |
|
"grad_norm": 0.34192147850990295, |
|
"learning_rate": 6.691770590465606e-06, |
|
"loss": 0.6718, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9080922738923471, |
|
"grad_norm": 0.3072430491447449, |
|
"learning_rate": 5.95693029563144e-06, |
|
"loss": 0.673, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9373855730501648, |
|
"grad_norm": 0.32493317127227783, |
|
"learning_rate": 5.426994294902611e-06, |
|
"loss": 0.6767, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9666788722079824, |
|
"grad_norm": 0.32937249541282654, |
|
"learning_rate": 5.10700301026355e-06, |
|
"loss": 0.6815, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9959721713658001, |
|
"grad_norm": 0.3123301863670349, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6732, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9959721713658001, |
|
"step": 170, |
|
"total_flos": 77096571633664.0, |
|
"train_loss": 0.7200509982950547, |
|
"train_runtime": 16045.7979, |
|
"train_samples_per_second": 0.34, |
|
"train_steps_per_second": 0.011 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 77096571633664.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|