|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9004832647216753, |
|
"eval_steps": 1677, |
|
"global_step": 15093, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9403376886820598e-05, |
|
"loss": 3.0319, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8806753773641194e-05, |
|
"loss": 2.8898, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8210130660461786e-05, |
|
"loss": 2.8471, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_f1": 0.09119675351283209, |
|
"eval_loss": 2.799565315246582, |
|
"eval_runtime": 565.0093, |
|
"eval_samples_per_second": 210.945, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7613507547282383e-05, |
|
"loss": 2.826, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.701688443410298e-05, |
|
"loss": 2.8064, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6420261320923575e-05, |
|
"loss": 2.7912, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1": 0.09886291342405676, |
|
"eval_loss": 2.7552475929260254, |
|
"eval_runtime": 565.8136, |
|
"eval_samples_per_second": 210.645, |
|
"eval_steps_per_second": 3.293, |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.582363820774417e-05, |
|
"loss": 2.769, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5227015094564765e-05, |
|
"loss": 2.7626, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.463039198138536e-05, |
|
"loss": 2.7493, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4033768868205956e-05, |
|
"loss": 2.7485, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_f1": 0.10934945582171454, |
|
"eval_loss": 2.701012134552002, |
|
"eval_runtime": 563.4598, |
|
"eval_samples_per_second": 211.525, |
|
"eval_steps_per_second": 3.306, |
|
"step": 5031 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.343714575502655e-05, |
|
"loss": 2.7355, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2840522641847146e-05, |
|
"loss": 2.7254, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2243899528667742e-05, |
|
"loss": 2.7163, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_f1": 0.11683918795621939, |
|
"eval_loss": 2.6925740242004395, |
|
"eval_runtime": 561.7517, |
|
"eval_samples_per_second": 212.168, |
|
"eval_steps_per_second": 3.316, |
|
"step": 6708 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1647276415488338e-05, |
|
"loss": 2.7101, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1050653302308931e-05, |
|
"loss": 2.7063, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0454030189129527e-05, |
|
"loss": 2.7037, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_f1": 0.1264069487053307, |
|
"eval_loss": 2.657057762145996, |
|
"eval_runtime": 557.0113, |
|
"eval_samples_per_second": 213.974, |
|
"eval_steps_per_second": 3.345, |
|
"step": 8385 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.857407075950123e-06, |
|
"loss": 2.6922, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.260783962770719e-06, |
|
"loss": 2.6857, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.664160849591315e-06, |
|
"loss": 2.6796, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.06753773641191e-06, |
|
"loss": 2.6686, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_f1": 0.1275429879642049, |
|
"eval_loss": 2.6493475437164307, |
|
"eval_runtime": 556.9366, |
|
"eval_samples_per_second": 214.003, |
|
"eval_steps_per_second": 3.345, |
|
"step": 10062 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.470914623232505e-06, |
|
"loss": 2.6798, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.874291510053101e-06, |
|
"loss": 2.6528, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.277668396873695e-06, |
|
"loss": 2.6469, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_f1": 0.1396037444667752, |
|
"eval_loss": 2.620657444000244, |
|
"eval_runtime": 557.3053, |
|
"eval_samples_per_second": 213.861, |
|
"eval_steps_per_second": 3.343, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.681045283694291e-06, |
|
"loss": 2.6569, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.0844221705148865e-06, |
|
"loss": 2.6472, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.487799057335481e-06, |
|
"loss": 2.6507, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_f1": 0.1391354358365679, |
|
"eval_loss": 2.6126439571380615, |
|
"eval_runtime": 556.9611, |
|
"eval_samples_per_second": 213.993, |
|
"eval_steps_per_second": 3.345, |
|
"step": 13416 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.891175944156077e-06, |
|
"loss": 2.6297, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.294552830976672e-06, |
|
"loss": 2.6364, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6979297177972674e-06, |
|
"loss": 2.6287, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.101306604617863e-06, |
|
"loss": 2.6269, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_f1": 0.14388305284850214, |
|
"eval_loss": 2.6065549850463867, |
|
"eval_runtime": 556.552, |
|
"eval_samples_per_second": 214.151, |
|
"eval_steps_per_second": 3.347, |
|
"step": 15093 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16761, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1677, |
|
"total_flos": 4.1849662176682214e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|