|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15808974886784685, |
|
"eval_steps": 500, |
|
"global_step": 12, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 1.0125823020935059, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": 9.990612030029297, |
|
"logits/rejected": 10.698101997375488, |
|
"logps/chosen": -102.88545989990234, |
|
"logps/ref_chosen": -102.88545989990234, |
|
"logps/ref_rejected": -121.84871673583984, |
|
"logps/rejected": -121.84871673583984, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.36753880977630615, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.3579196035861969, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.211905479431152, |
|
"logits/rejected": 11.06594467163086, |
|
"logps/chosen": -107.70349884033203, |
|
"logps/ref_chosen": -107.70349884033203, |
|
"logps/ref_rejected": -121.89966583251953, |
|
"logps/rejected": -121.89966583251953, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"sft_loss": 0.41013145446777344, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.49040451645851135, |
|
"learning_rate": 1.875e-07, |
|
"logits/chosen": 10.035531044006348, |
|
"logits/rejected": 11.027185440063477, |
|
"logps/chosen": -108.23310852050781, |
|
"logps/ref_chosen": -107.98188781738281, |
|
"logps/ref_rejected": -124.51527404785156, |
|
"logps/rejected": -124.64785766601562, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0025122263468801975, |
|
"rewards/margins": -0.0011863748077303171, |
|
"rewards/rejected": -0.0013258515391498804, |
|
"sft_loss": 0.41194257140159607, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 0.8740162253379822, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 9.860024452209473, |
|
"logits/rejected": 10.876106262207031, |
|
"logps/chosen": -109.94369506835938, |
|
"logps/ref_chosen": -109.20836639404297, |
|
"logps/ref_rejected": -119.23908996582031, |
|
"logps/rejected": -119.73454284667969, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": -0.007353362161666155, |
|
"rewards/margins": -0.002398767275735736, |
|
"rewards/rejected": -0.004954595118761063, |
|
"sft_loss": 0.40552011132240295, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 1.1980141401290894, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 10.19467830657959, |
|
"logits/rejected": 10.95050048828125, |
|
"logps/chosen": -104.02793884277344, |
|
"logps/ref_chosen": -103.87680053710938, |
|
"logps/ref_rejected": -118.41618347167969, |
|
"logps/rejected": -118.46170806884766, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.5078125, |
|
"rewards/chosen": -0.001511452835984528, |
|
"rewards/margins": -0.001056289067491889, |
|
"rewards/rejected": -0.0004551640013232827, |
|
"sft_loss": 0.3704559803009033, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.928102433681488, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.701957702636719, |
|
"logits/rejected": 11.477033615112305, |
|
"logps/chosen": -107.61714935302734, |
|
"logps/ref_chosen": -107.58968353271484, |
|
"logps/ref_rejected": -122.07303619384766, |
|
"logps/rejected": -122.0443115234375, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.0002746534300968051, |
|
"rewards/margins": -0.0005618570139631629, |
|
"rewards/rejected": 0.0002872035256586969, |
|
"sft_loss": 0.3909577429294586, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 1.0250380039215088, |
|
"learning_rate": 4.375e-07, |
|
"logits/chosen": 10.025421142578125, |
|
"logits/rejected": 10.72871208190918, |
|
"logps/chosen": -107.13175201416016, |
|
"logps/ref_chosen": -107.42727661132812, |
|
"logps/ref_rejected": -116.87063598632812, |
|
"logps/rejected": -116.28421020507812, |
|
"loss": 0.4392, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 0.002955180360004306, |
|
"rewards/margins": -0.0029091311153024435, |
|
"rewards/rejected": 0.005864311475306749, |
|
"sft_loss": 0.3753029406070709, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 0.5661666393280029, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.203546524047852, |
|
"logits/rejected": 11.103278160095215, |
|
"logps/chosen": -104.93194580078125, |
|
"logps/ref_chosen": -105.60282135009766, |
|
"logps/ref_rejected": -119.53916931152344, |
|
"logps/rejected": -118.93331909179688, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.006708861328661442, |
|
"rewards/margins": 0.0006504050688818097, |
|
"rewards/rejected": 0.006058456376194954, |
|
"sft_loss": 0.3787955939769745, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 0.820360541343689, |
|
"learning_rate": 4.997252228714278e-07, |
|
"logits/chosen": 10.184520721435547, |
|
"logits/rejected": 11.154094696044922, |
|
"logps/chosen": -104.26238250732422, |
|
"logps/ref_chosen": -105.46086120605469, |
|
"logps/ref_rejected": -119.00373840332031, |
|
"logps/rejected": -117.88744354248047, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.011985024437308311, |
|
"rewards/margins": 0.0008220230811275542, |
|
"rewards/rejected": 0.011163001880049706, |
|
"sft_loss": 0.38146448135375977, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.4781506061553955, |
|
"learning_rate": 4.989014955054745e-07, |
|
"logits/chosen": 10.042634963989258, |
|
"logits/rejected": 10.866905212402344, |
|
"logps/chosen": -101.11405944824219, |
|
"logps/ref_chosen": -104.21009826660156, |
|
"logps/ref_rejected": -118.9209213256836, |
|
"logps/rejected": -115.99314880371094, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.030960241332650185, |
|
"rewards/margins": 0.0016824830090627074, |
|
"rewards/rejected": 0.029277760535478592, |
|
"sft_loss": 0.3378788232803345, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 0.8178320527076721, |
|
"learning_rate": 4.975306286336627e-07, |
|
"logits/chosen": 9.987105369567871, |
|
"logits/rejected": 11.181533813476562, |
|
"logps/chosen": -101.77717590332031, |
|
"logps/ref_chosen": -105.94319152832031, |
|
"logps/ref_rejected": -122.76007843017578, |
|
"logps/rejected": -119.00365447998047, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.04166024178266525, |
|
"rewards/margins": 0.0040960111655294895, |
|
"rewards/rejected": 0.0375642292201519, |
|
"sft_loss": 0.3869646191596985, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 0.7931886315345764, |
|
"learning_rate": 4.956156357188939e-07, |
|
"logits/chosen": 9.913724899291992, |
|
"logits/rejected": 10.605714797973633, |
|
"logps/chosen": -104.08253479003906, |
|
"logps/ref_chosen": -109.08442687988281, |
|
"logps/ref_rejected": -121.41947174072266, |
|
"logps/rejected": -116.61964416503906, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.05001899227499962, |
|
"rewards/margins": 0.002020882908254862, |
|
"rewards/rejected": 0.04799811542034149, |
|
"sft_loss": 0.3560585379600525, |
|
"step": 12 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|