|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9221902017291066, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.49082234501838684, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.10258625447750092, |
|
"logits/rejected": 0.2278534471988678, |
|
"logps/chosen": -134.6281280517578, |
|
"logps/ref_chosen": -124.05789947509766, |
|
"logps/ref_rejected": -154.88946533203125, |
|
"logps/rejected": -165.65171813964844, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -0.1057022362947464, |
|
"rewards/margins": 0.0019202901748940349, |
|
"rewards/rejected": -0.10762252658605576, |
|
"sft_loss": 0.47003409266471863, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.457568883895874, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 0.06952403485774994, |
|
"logits/rejected": 0.3078790605068207, |
|
"logps/chosen": -133.73080444335938, |
|
"logps/ref_chosen": -123.33631896972656, |
|
"logps/ref_rejected": -168.01480102539062, |
|
"logps/rejected": -178.5548553466797, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.10394492000341415, |
|
"rewards/margins": 0.001455550198443234, |
|
"rewards/rejected": -0.10540048032999039, |
|
"sft_loss": 0.4669448435306549, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.3315995931625366, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 0.09175828099250793, |
|
"logits/rejected": 0.22343572974205017, |
|
"logps/chosen": -133.59866333007812, |
|
"logps/ref_chosen": -124.51382446289062, |
|
"logps/ref_rejected": -155.95262145996094, |
|
"logps/rejected": -165.19468688964844, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.6328125, |
|
"rewards/chosen": -0.0908483937382698, |
|
"rewards/margins": 0.0015722049865871668, |
|
"rewards/rejected": -0.09242061525583267, |
|
"sft_loss": 0.48662421107292175, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.45795556902885437, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.0821409523487091, |
|
"logits/rejected": 0.24085786938667297, |
|
"logps/chosen": -132.02798461914062, |
|
"logps/ref_chosen": -123.37024688720703, |
|
"logps/ref_rejected": -160.60987854003906, |
|
"logps/rejected": -169.43934631347656, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08657727390527725, |
|
"rewards/margins": 0.0017174197128042579, |
|
"rewards/rejected": -0.08829469233751297, |
|
"sft_loss": 0.45568379759788513, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.5167257785797119, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 0.1414952129125595, |
|
"logits/rejected": 0.23190416395664215, |
|
"logps/chosen": -129.588134765625, |
|
"logps/ref_chosen": -122.73413848876953, |
|
"logps/ref_rejected": -143.36245727539062, |
|
"logps/rejected": -150.25531005859375, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.06853996217250824, |
|
"rewards/margins": 0.00038862242945469916, |
|
"rewards/rejected": -0.06892858445644379, |
|
"sft_loss": 0.4531819820404053, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.7184420824050903, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 0.14788731932640076, |
|
"logits/rejected": 0.29836705327033997, |
|
"logps/chosen": -130.35125732421875, |
|
"logps/ref_chosen": -124.05830383300781, |
|
"logps/ref_rejected": -150.84971618652344, |
|
"logps/rejected": -157.2368621826172, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -0.0629296600818634, |
|
"rewards/margins": 0.0009420262649655342, |
|
"rewards/rejected": -0.06387168914079666, |
|
"sft_loss": 0.4549233317375183, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.5709189772605896, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 0.16531895101070404, |
|
"logits/rejected": 0.24730782210826874, |
|
"logps/chosen": -128.30152893066406, |
|
"logps/ref_chosen": -126.53475189208984, |
|
"logps/ref_rejected": -153.44937133789062, |
|
"logps/rejected": -155.2527313232422, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -0.017667775973677635, |
|
"rewards/margins": 0.0003658741479739547, |
|
"rewards/rejected": -0.018033649772405624, |
|
"sft_loss": 0.4390857517719269, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.3785640299320221, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 0.05871865525841713, |
|
"logits/rejected": 0.2406335175037384, |
|
"logps/chosen": -120.95695495605469, |
|
"logps/ref_chosen": -120.56550598144531, |
|
"logps/ref_rejected": -159.84185791015625, |
|
"logps/rejected": -160.2302703857422, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.003914527129381895, |
|
"rewards/margins": -3.0469876946881413e-05, |
|
"rewards/rejected": -0.0038840575143694878, |
|
"sft_loss": 0.42091870307922363, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.39315882325172424, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 0.14951348304748535, |
|
"logits/rejected": 0.21872764825820923, |
|
"logps/chosen": -124.28387451171875, |
|
"logps/ref_chosen": -124.57730102539062, |
|
"logps/ref_rejected": -146.5771484375, |
|
"logps/rejected": -146.2667236328125, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": 0.002934188349172473, |
|
"rewards/margins": -0.00017014730838127434, |
|
"rewards/rejected": 0.003104335628449917, |
|
"sft_loss": 0.41538387537002563, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.4166741371154785, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.12583141028881073, |
|
"logits/rejected": 0.25348690152168274, |
|
"logps/chosen": -124.92916107177734, |
|
"logps/ref_chosen": -125.44955444335938, |
|
"logps/ref_rejected": -160.47593688964844, |
|
"logps/rejected": -159.9407958984375, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.0052038198336958885, |
|
"rewards/margins": -0.00014770496636629105, |
|
"rewards/rejected": 0.0053515248000621796, |
|
"sft_loss": 0.4205903112888336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.48907283544540403, |
|
"train_runtime": 630.6427, |
|
"train_samples_per_second": 2.199, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|