|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.15808974886784685, |
|
"eval_steps": 500, |
|
"global_step": 12, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013174145738987238, |
|
"grad_norm": 0.5102696418762207, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 10.088521957397461, |
|
"logits/rejected": 10.263787269592285, |
|
"logps/chosen": -0.9118157029151917, |
|
"logps/rejected": -0.9621729850769043, |
|
"loss": 1.3897, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": -1.8236314058303833, |
|
"rewards/margins": 0.10071463882923126, |
|
"rewards/rejected": -1.9243459701538086, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026348291477974475, |
|
"grad_norm": 0.9815747141838074, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 10.592972755432129, |
|
"logits/rejected": 10.720216751098633, |
|
"logps/chosen": -0.945902943611145, |
|
"logps/rejected": -1.0317902565002441, |
|
"loss": 1.3077, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": -1.89180588722229, |
|
"rewards/margins": 0.1717745065689087, |
|
"rewards/rejected": -2.0635805130004883, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03952243721696171, |
|
"grad_norm": 0.9049758315086365, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 10.041976928710938, |
|
"logits/rejected": 10.399316787719727, |
|
"logps/chosen": -1.0869810581207275, |
|
"logps/rejected": -1.1895216703414917, |
|
"loss": 1.346, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -2.173962116241455, |
|
"rewards/margins": 0.20508113503456116, |
|
"rewards/rejected": -2.3790433406829834, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05269658295594895, |
|
"grad_norm": 1.8911848068237305, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 10.243470191955566, |
|
"logits/rejected": 10.443375587463379, |
|
"logps/chosen": -0.966098427772522, |
|
"logps/rejected": -1.0040662288665771, |
|
"loss": 1.4032, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -1.932196855545044, |
|
"rewards/margins": 0.07593552023172379, |
|
"rewards/rejected": -2.0081324577331543, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.6135074496269226, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 10.439040184020996, |
|
"logits/rejected": 10.739177703857422, |
|
"logps/chosen": -0.9262609481811523, |
|
"logps/rejected": -0.9657196998596191, |
|
"loss": 1.3727, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": -1.8525218963623047, |
|
"rewards/margins": 0.07891744375228882, |
|
"rewards/rejected": -1.9314393997192383, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.07904487443392343, |
|
"grad_norm": 0.5990542769432068, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": 10.910269737243652, |
|
"logits/rejected": 11.204473495483398, |
|
"logps/chosen": -0.9439595341682434, |
|
"logps/rejected": -1.0420396327972412, |
|
"loss": 1.3491, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8879190683364868, |
|
"rewards/margins": 0.196160227060318, |
|
"rewards/rejected": -2.0840792655944824, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 1.3676807880401611, |
|
"learning_rate": 8.75e-07, |
|
"logits/chosen": 9.873465538024902, |
|
"logits/rejected": 10.022269248962402, |
|
"logps/chosen": -0.8941428661346436, |
|
"logps/rejected": -1.0010743141174316, |
|
"loss": 1.3507, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.788285732269287, |
|
"rewards/margins": 0.21386288106441498, |
|
"rewards/rejected": -2.0021486282348633, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1053931659118979, |
|
"grad_norm": 1.7690002918243408, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": 10.597719192504883, |
|
"logits/rejected": 10.780376434326172, |
|
"logps/chosen": -0.9080270528793335, |
|
"logps/rejected": -0.9909782409667969, |
|
"loss": 1.3305, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.816054105758667, |
|
"rewards/margins": 0.16590236127376556, |
|
"rewards/rejected": -1.9819564819335938, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.11856731165088513, |
|
"grad_norm": 1.056247353553772, |
|
"learning_rate": 9.994504457428556e-07, |
|
"logits/chosen": 10.446786880493164, |
|
"logits/rejected": 10.839168548583984, |
|
"logps/chosen": -1.1091859340667725, |
|
"logps/rejected": -1.0694739818572998, |
|
"loss": 1.5127, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": -2.218371868133545, |
|
"rewards/margins": -0.07942387461662292, |
|
"rewards/rejected": -2.1389479637145996, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 2.076240062713623, |
|
"learning_rate": 9.97802991010949e-07, |
|
"logits/chosen": 10.343971252441406, |
|
"logits/rejected": 10.492179870605469, |
|
"logps/chosen": -0.9705042839050293, |
|
"logps/rejected": -0.9916192889213562, |
|
"loss": 1.4611, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.9410085678100586, |
|
"rewards/margins": 0.04223020374774933, |
|
"rewards/rejected": -1.9832385778427124, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14491560312885962, |
|
"grad_norm": 1.13358736038208, |
|
"learning_rate": 9.950612572673255e-07, |
|
"logits/chosen": 10.49313735961914, |
|
"logits/rejected": 10.680143356323242, |
|
"logps/chosen": -1.1081148386001587, |
|
"logps/rejected": -1.223841667175293, |
|
"loss": 1.3449, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.2162296772003174, |
|
"rewards/margins": 0.23145350813865662, |
|
"rewards/rejected": -2.447683334350586, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.15808974886784685, |
|
"grad_norm": 1.1638388633728027, |
|
"learning_rate": 9.912312714377879e-07, |
|
"logits/chosen": 10.328557014465332, |
|
"logits/rejected": 10.365793228149414, |
|
"logps/chosen": -0.9249637722969055, |
|
"logps/rejected": -0.9842618703842163, |
|
"loss": 1.351, |
|
"rewards/accuracies": 0.6015625, |
|
"rewards/chosen": -1.849927544593811, |
|
"rewards/margins": 0.11859625577926636, |
|
"rewards/rejected": -1.9685237407684326, |
|
"step": 12 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|