|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9221902017291066, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.48872771859169006, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.12278681248426437, |
|
"logits/rejected": 0.26080453395843506, |
|
"logps/chosen": -132.40695190429688, |
|
"logps/ref_chosen": -121.57450103759766, |
|
"logps/ref_rejected": -166.17074584960938, |
|
"logps/rejected": -177.05752563476562, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.10832449048757553, |
|
"rewards/margins": 0.000543450063560158, |
|
"rewards/rejected": -0.10886794328689575, |
|
"sft_loss": 0.4607769250869751, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.7124770283699036, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 0.1629990041255951, |
|
"logits/rejected": 0.3135248124599457, |
|
"logps/chosen": -134.13861083984375, |
|
"logps/ref_chosen": -123.470703125, |
|
"logps/ref_rejected": -159.41018676757812, |
|
"logps/rejected": -170.13693237304688, |
|
"loss": 0.5337, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.10667898505926132, |
|
"rewards/margins": 0.000588476425036788, |
|
"rewards/rejected": -0.10726746171712875, |
|
"sft_loss": 0.4541751444339752, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.5838692784309387, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 0.09741576015949249, |
|
"logits/rejected": 0.2005864381790161, |
|
"logps/chosen": -134.69981384277344, |
|
"logps/ref_chosen": -124.5863037109375, |
|
"logps/ref_rejected": -152.92868041992188, |
|
"logps/rejected": -163.17599487304688, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -0.10113508999347687, |
|
"rewards/margins": 0.0013378520961850882, |
|
"rewards/rejected": -0.1024729460477829, |
|
"sft_loss": 0.4703802764415741, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.5232725739479065, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.11469627916812897, |
|
"logits/rejected": 0.34779858589172363, |
|
"logps/chosen": -131.96142578125, |
|
"logps/ref_chosen": -123.3237533569336, |
|
"logps/ref_rejected": -160.29180908203125, |
|
"logps/rejected": -169.1221466064453, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.08637689054012299, |
|
"rewards/margins": 0.00192636763677001, |
|
"rewards/rejected": -0.08830326050519943, |
|
"sft_loss": 0.48508843779563904, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.4373687505722046, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 0.13797219097614288, |
|
"logits/rejected": 0.2943338453769684, |
|
"logps/chosen": -129.5037841796875, |
|
"logps/ref_chosen": -122.18938446044922, |
|
"logps/ref_rejected": -166.5259246826172, |
|
"logps/rejected": -173.7913055419922, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.4921875, |
|
"rewards/chosen": -0.07314412295818329, |
|
"rewards/margins": -0.0004904267261736095, |
|
"rewards/rejected": -0.07265370339155197, |
|
"sft_loss": 0.4711988866329193, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.41881853342056274, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 0.1323821097612381, |
|
"logits/rejected": 0.31871122121810913, |
|
"logps/chosen": -132.2285614013672, |
|
"logps/ref_chosen": -125.81028747558594, |
|
"logps/ref_rejected": -156.31320190429688, |
|
"logps/rejected": -162.79730224609375, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.06418267637491226, |
|
"rewards/margins": 0.0006581920897588134, |
|
"rewards/rejected": -0.06484086811542511, |
|
"sft_loss": 0.44570034742355347, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.3571808934211731, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 0.18207934498786926, |
|
"logits/rejected": 0.28170838952064514, |
|
"logps/chosen": -130.76219177246094, |
|
"logps/ref_chosen": -128.7080535888672, |
|
"logps/ref_rejected": -158.98272705078125, |
|
"logps/rejected": -161.1389923095703, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.02054133079946041, |
|
"rewards/margins": 0.0010213707573711872, |
|
"rewards/rejected": -0.02156270295381546, |
|
"sft_loss": 0.4298672676086426, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.4075394570827484, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 0.10367591679096222, |
|
"logits/rejected": 0.28194165229797363, |
|
"logps/chosen": -124.07286834716797, |
|
"logps/ref_chosen": -123.2734375, |
|
"logps/ref_rejected": -156.3369903564453, |
|
"logps/rejected": -157.23104858398438, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -0.007994325831532478, |
|
"rewards/margins": 0.0009461576119065285, |
|
"rewards/rejected": -0.008940483443439007, |
|
"sft_loss": 0.4388274848461151, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.230843648314476, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 0.06345280259847641, |
|
"logits/rejected": 0.206377774477005, |
|
"logps/chosen": -127.7571792602539, |
|
"logps/ref_chosen": -127.38499450683594, |
|
"logps/ref_rejected": -157.14312744140625, |
|
"logps/rejected": -157.56546020507812, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.5859375, |
|
"rewards/chosen": -0.00372200645506382, |
|
"rewards/margins": 0.0005012964247725904, |
|
"rewards/rejected": -0.004223302938044071, |
|
"sft_loss": 0.42283880710601807, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.43585899472236633, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.08928333222866058, |
|
"logits/rejected": 0.238207146525383, |
|
"logps/chosen": -124.22206115722656, |
|
"logps/ref_chosen": -123.99081420898438, |
|
"logps/ref_rejected": -154.0594940185547, |
|
"logps/rejected": -154.32521057128906, |
|
"loss": 0.5223, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -0.0023124003782868385, |
|
"rewards/margins": 0.00034469965612515807, |
|
"rewards/rejected": -0.002657099859789014, |
|
"sft_loss": 0.4369175434112549, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5319805860519409, |
|
"train_runtime": 668.7287, |
|
"train_samples_per_second": 2.074, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|