{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15808974886784685, "eval_steps": 500, "global_step": 12, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013174145738987238, "grad_norm": 1.194186806678772, "learning_rate": 6.25e-08, "logits/chosen": 9.990612030029297, "logits/rejected": 10.698101997375488, "logps/chosen": -102.88545989990234, "logps/ref_chosen": -102.88545989990234, "logps/ref_rejected": -121.84871673583984, "logps/rejected": -121.84871673583984, "loss": 0.3675, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "sft_loss": 0.36753880977630615, "step": 1 }, { "epoch": 0.026348291477974475, "grad_norm": 0.5353251099586487, "learning_rate": 1.25e-07, "logits/chosen": 10.211905479431152, "logits/rejected": 11.06594467163086, "logps/chosen": -107.70349884033203, "logps/ref_chosen": -107.70349884033203, "logps/ref_rejected": -121.89966583251953, "logps/rejected": -121.89966583251953, "loss": 0.4101, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "sft_loss": 0.41013145446777344, "step": 2 }, { "epoch": 0.03952243721696171, "grad_norm": 0.7126303315162659, "learning_rate": 1.875e-07, "logits/chosen": 10.032384872436523, "logits/rejected": 11.023520469665527, "logps/chosen": -108.3123779296875, "logps/ref_chosen": -107.98188781738281, "logps/ref_rejected": -124.51527404785156, "logps/rejected": -124.87130737304688, "loss": 0.412, "rewards/accuracies": 0.5234375, "rewards/chosen": -0.003304910147562623, "rewards/margins": 0.0002554532838985324, "rewards/rejected": -0.003560363780707121, "sft_loss": 0.41195932030677795, "step": 3 }, { "epoch": 0.05269658295594895, "grad_norm": 1.2344533205032349, "learning_rate": 2.5e-07, "logits/chosen": 9.836658477783203, "logits/rejected": 10.855621337890625, "logps/chosen": -109.55919647216797, "logps/ref_chosen": -109.20836639404297, "logps/ref_rejected": -119.23908996582031, "logps/rejected": -119.48279571533203, "loss": 0.4039, "rewards/accuracies": 0.4921875, "rewards/chosen": -0.003508324269205332, "rewards/margins": -0.0010712125804275274, "rewards/rejected": -0.0024371116887778044, "sft_loss": 0.4038863480091095, "step": 4 }, { "epoch": 0.06587072869493618, "grad_norm": 1.426048994064331, "learning_rate": 3.1249999999999997e-07, "logits/chosen": 10.212320327758789, "logits/rejected": 10.966379165649414, "logps/chosen": -103.76991271972656, "logps/ref_chosen": -103.87680053710938, "logps/ref_rejected": -118.41618347167969, "logps/rejected": -118.23270416259766, "loss": 0.3697, "rewards/accuracies": 0.453125, "rewards/chosen": 0.0010687037138268352, "rewards/margins": -0.000766113749705255, "rewards/rejected": 0.0018348174635320902, "sft_loss": 0.3697226345539093, "step": 5 }, { "epoch": 0.07904487443392343, "grad_norm": 1.413549780845642, "learning_rate": 3.75e-07, "logits/chosen": 10.700042724609375, "logits/rejected": 11.478326797485352, "logps/chosen": -107.56877899169922, "logps/ref_chosen": -107.58968353271484, "logps/ref_rejected": -122.07303619384766, "logps/rejected": -121.85940551757812, "loss": 0.3909, "rewards/accuracies": 0.4609375, "rewards/chosen": 0.0002090137859340757, "rewards/margins": -0.0019273017533123493, "rewards/rejected": 0.00213631521910429, "sft_loss": 0.390906423330307, "step": 6 }, { "epoch": 0.09221902017291066, "grad_norm": 1.2342580556869507, "learning_rate": 4.375e-07, "logits/chosen": 10.01632308959961, "logits/rejected": 10.7178955078125, "logps/chosen": -107.01339721679688, "logps/ref_chosen": -107.42727661132812, "logps/ref_rejected": -116.87063598632812, "logps/rejected": -116.37357330322266, "loss": 0.3747, "rewards/accuracies": 0.4453125, "rewards/chosen": 0.00413867924362421, "rewards/margins": -0.0008318667532876134, "rewards/rejected": 0.004970546346157789, "sft_loss": 0.3746669888496399, "step": 7 }, { "epoch": 0.1053931659118979, "grad_norm": 0.6644937992095947, "learning_rate": 5e-07, "logits/chosen": 10.211028099060059, "logits/rejected": 11.11027717590332, "logps/chosen": -104.41184997558594, "logps/ref_chosen": -105.60282135009766, "logps/ref_rejected": -119.53916931152344, "logps/rejected": -118.27430725097656, "loss": 0.3773, "rewards/accuracies": 0.4921875, "rewards/chosen": 0.01190974935889244, "rewards/margins": -0.0007388982339762151, "rewards/rejected": 0.012648648582398891, "sft_loss": 0.37729793787002563, "step": 8 }, { "epoch": 0.11856731165088513, "grad_norm": 0.9437576532363892, "learning_rate": 4.997252228714278e-07, "logits/chosen": 10.179821014404297, "logits/rejected": 11.147579193115234, "logps/chosen": -104.13174438476562, "logps/ref_chosen": -105.46086120605469, "logps/ref_rejected": -119.00373840332031, "logps/rejected": -117.734130859375, "loss": 0.3807, "rewards/accuracies": 0.5234375, "rewards/chosen": 0.013291322626173496, "rewards/margins": 0.0005952615174464881, "rewards/rejected": 0.012696062214672565, "sft_loss": 0.38070446252822876, "step": 9 }, { "epoch": 0.13174145738987236, "grad_norm": 0.700039803981781, "learning_rate": 4.989014955054745e-07, "logits/chosen": 10.076737403869629, "logits/rejected": 10.897785186767578, "logps/chosen": -100.81087493896484, "logps/ref_chosen": -104.21009826660156, "logps/ref_rejected": -118.9209213256836, "logps/rejected": -115.75495910644531, "loss": 0.3367, "rewards/accuracies": 0.5546875, "rewards/chosen": 0.033992186188697815, "rewards/margins": 0.0023326175287365913, "rewards/rejected": 0.0316595658659935, "sft_loss": 0.33672136068344116, "step": 10 }, { "epoch": 0.14491560312885962, "grad_norm": 0.9160856008529663, "learning_rate": 4.975306286336627e-07, "logits/chosen": 9.973880767822266, "logits/rejected": 11.158487319946289, "logps/chosen": -101.3505630493164, "logps/ref_chosen": -105.94319152832031, "logps/ref_rejected": -122.76007843017578, "logps/rejected": -118.6338119506836, "loss": 0.3851, "rewards/accuracies": 0.578125, "rewards/chosen": 0.045926500111818314, "rewards/margins": 0.004663803614675999, "rewards/rejected": 0.04126270115375519, "sft_loss": 0.3850533962249756, "step": 11 }, { "epoch": 0.15808974886784685, "grad_norm": 0.9421964883804321, "learning_rate": 4.956156357188939e-07, "logits/chosen": 9.908226013183594, "logits/rejected": 10.598045349121094, "logps/chosen": -103.32762908935547, "logps/ref_chosen": -109.08442687988281, "logps/ref_rejected": -121.41947174072266, "logps/rejected": -115.84996795654297, "loss": 0.3532, "rewards/accuracies": 0.4921875, "rewards/chosen": 0.05756799131631851, "rewards/margins": 0.0018730255542322993, "rewards/rejected": 0.05569496005773544, "sft_loss": 0.3532242476940155, "step": 12 } ], "logging_steps": 1, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 12, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }