|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9182209469153515, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09182209469153516, |
|
"grad_norm": 0.48370492458343506, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.16483092308044434, |
|
"logits/rejected": 0.3757104277610779, |
|
"logps/chosen": -129.87265014648438, |
|
"logps/ref_chosen": -130.11143493652344, |
|
"logps/ref_rejected": -170.61505126953125, |
|
"logps/rejected": -170.22250366210938, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.3359375, |
|
"rewards/chosen": 0.0023880640510469675, |
|
"rewards/margins": -0.0015374040231108665, |
|
"rewards/rejected": 0.003925468306988478, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1836441893830703, |
|
"grad_norm": 0.2921633720397949, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 0.21723544597625732, |
|
"logits/rejected": 0.33153384923934937, |
|
"logps/chosen": -136.4618377685547, |
|
"logps/ref_chosen": -136.66879272460938, |
|
"logps/ref_rejected": -170.20431518554688, |
|
"logps/rejected": -169.85755920410156, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": 0.0020694590639322996, |
|
"rewards/margins": -0.0013981539523229003, |
|
"rewards/rejected": 0.003467612899839878, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.27546628407460544, |
|
"grad_norm": 0.3820483684539795, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 0.278810977935791, |
|
"logits/rejected": 0.3778008222579956, |
|
"logps/chosen": -140.81326293945312, |
|
"logps/ref_chosen": -141.2205047607422, |
|
"logps/ref_rejected": -163.39132690429688, |
|
"logps/rejected": -162.9018096923828, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.004072446841746569, |
|
"rewards/margins": -0.000822849222458899, |
|
"rewards/rejected": 0.004895296413451433, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3672883787661406, |
|
"grad_norm": 0.34646955132484436, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.14010563492774963, |
|
"logits/rejected": 0.28288155794143677, |
|
"logps/chosen": -140.85638427734375, |
|
"logps/ref_chosen": -141.0355987548828, |
|
"logps/ref_rejected": -166.15609741210938, |
|
"logps/rejected": -165.95912170410156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0017920829122886062, |
|
"rewards/margins": -0.00017783755902200937, |
|
"rewards/rejected": 0.001969920238479972, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.45911047345767575, |
|
"grad_norm": 0.3133969306945801, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 0.20951783657073975, |
|
"logits/rejected": 0.33099910616874695, |
|
"logps/chosen": -136.3818817138672, |
|
"logps/ref_chosen": -136.40235900878906, |
|
"logps/ref_rejected": -158.99447631835938, |
|
"logps/rejected": -158.85215759277344, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": 0.00020450774172786623, |
|
"rewards/margins": -0.0012185449013486505, |
|
"rewards/rejected": 0.0014230526285246015, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.5509325681492109, |
|
"grad_norm": 0.413125604391098, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 0.1270689070224762, |
|
"logits/rejected": 0.35084813833236694, |
|
"logps/chosen": -134.93136596679688, |
|
"logps/ref_chosen": -134.92478942871094, |
|
"logps/ref_rejected": -171.18968200683594, |
|
"logps/rejected": -171.1276092529297, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -6.584226503036916e-05, |
|
"rewards/margins": -0.0006865662289783359, |
|
"rewards/rejected": 0.0006207239348441362, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.642754662840746, |
|
"grad_norm": 0.3633699417114258, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 0.160421222448349, |
|
"logits/rejected": 0.31845855712890625, |
|
"logps/chosen": -131.51528930664062, |
|
"logps/ref_chosen": -131.45591735839844, |
|
"logps/ref_rejected": -161.3601531982422, |
|
"logps/rejected": -161.34597778320312, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.0005936581292189658, |
|
"rewards/margins": -0.0007353991386480629, |
|
"rewards/rejected": 0.00014174105308484286, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7345767575322812, |
|
"grad_norm": 0.31316861510276794, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 0.20960983633995056, |
|
"logits/rejected": 0.41680708527565, |
|
"logps/chosen": -136.77139282226562, |
|
"logps/ref_chosen": -136.7902069091797, |
|
"logps/ref_rejected": -181.39559936523438, |
|
"logps/rejected": -181.3594512939453, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.00018815101066138595, |
|
"rewards/margins": -0.0001733488024910912, |
|
"rewards/rejected": 0.0003614998422563076, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.8263988522238164, |
|
"grad_norm": 0.33593055605888367, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 0.21831227838993073, |
|
"logits/rejected": 0.32834041118621826, |
|
"logps/chosen": -137.8075408935547, |
|
"logps/ref_chosen": -137.73573303222656, |
|
"logps/ref_rejected": -171.2981719970703, |
|
"logps/rejected": -171.36036682128906, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0007180434186011553, |
|
"rewards/margins": -9.611125278752297e-05, |
|
"rewards/rejected": -0.0006219320930540562, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9182209469153515, |
|
"grad_norm": 0.3731705844402313, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.15047258138656616, |
|
"logits/rejected": 0.27864834666252136, |
|
"logps/chosen": -146.82630920410156, |
|
"logps/ref_chosen": -146.76913452148438, |
|
"logps/ref_rejected": -181.7820281982422, |
|
"logps/rejected": -181.79539489746094, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.000571864191442728, |
|
"rewards/margins": -0.0004380465252324939, |
|
"rewards/rejected": -0.000133817782625556, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9182209469153515, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6935156106948852, |
|
"train_runtime": 611.1413, |
|
"train_samples_per_second": 2.279, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|