|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9221902017291066, |
|
"eval_steps": 500, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09221902017291066, |
|
"grad_norm": 0.37403297424316406, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": 0.10453951358795166, |
|
"logits/rejected": 0.2759632468223572, |
|
"logps/chosen": -132.67144775390625, |
|
"logps/ref_chosen": -133.03013610839844, |
|
"logps/ref_rejected": -164.177734375, |
|
"logps/rejected": -163.5892333984375, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.3984375, |
|
"rewards/chosen": 0.003586653620004654, |
|
"rewards/margins": -0.002298325300216675, |
|
"rewards/rejected": 0.005884978454560041, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1844380403458213, |
|
"grad_norm": 0.358889102935791, |
|
"learning_rate": 4.849231551964771e-07, |
|
"logits/chosen": 0.08250421285629272, |
|
"logits/rejected": 0.33024948835372925, |
|
"logps/chosen": -134.9761962890625, |
|
"logps/ref_chosen": -135.27749633789062, |
|
"logps/ref_rejected": -188.41795349121094, |
|
"logps/rejected": -187.88470458984375, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.3515625, |
|
"rewards/chosen": 0.003012962406501174, |
|
"rewards/margins": -0.0023194574750959873, |
|
"rewards/rejected": 0.005332420114427805, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.276657060518732, |
|
"grad_norm": 0.3239206075668335, |
|
"learning_rate": 4.415111107797445e-07, |
|
"logits/chosen": 0.08983182162046432, |
|
"logits/rejected": 0.2510722875595093, |
|
"logps/chosen": -134.5572052001953, |
|
"logps/ref_chosen": -134.77491760253906, |
|
"logps/ref_rejected": -161.10980224609375, |
|
"logps/rejected": -160.8002471923828, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.002177180489525199, |
|
"rewards/margins": -0.0009184688096866012, |
|
"rewards/rejected": 0.003095649415627122, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3688760806916426, |
|
"grad_norm": 0.3113608658313751, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.18616041541099548, |
|
"logits/rejected": 0.3378028869628906, |
|
"logps/chosen": -141.66685485839844, |
|
"logps/ref_chosen": -142.0138702392578, |
|
"logps/ref_rejected": -173.76629638671875, |
|
"logps/rejected": -173.24481201171875, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": 0.0034699777606874704, |
|
"rewards/margins": -0.0017446475103497505, |
|
"rewards/rejected": 0.005214625503867865, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 0.3619195818901062, |
|
"learning_rate": 2.934120444167326e-07, |
|
"logits/chosen": 0.1221667155623436, |
|
"logits/rejected": 0.268534779548645, |
|
"logps/chosen": -134.57679748535156, |
|
"logps/ref_chosen": -134.8294677734375, |
|
"logps/ref_rejected": -177.42715454101562, |
|
"logps/rejected": -177.0855712890625, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.002526558004319668, |
|
"rewards/margins": -0.0008892094483599067, |
|
"rewards/rejected": 0.00341576780192554, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.553314121037464, |
|
"grad_norm": 0.3865343928337097, |
|
"learning_rate": 2.065879555832674e-07, |
|
"logits/chosen": 0.19299811124801636, |
|
"logits/rejected": 0.30019116401672363, |
|
"logps/chosen": -135.14425659179688, |
|
"logps/ref_chosen": -135.45623779296875, |
|
"logps/ref_rejected": -159.72341918945312, |
|
"logps/rejected": -159.44705200195312, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.003119847271591425, |
|
"rewards/margins": 0.00035619616392068565, |
|
"rewards/rejected": 0.002763650845736265, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6455331412103746, |
|
"grad_norm": 0.40479081869125366, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": 0.15088775753974915, |
|
"logits/rejected": 0.3000352084636688, |
|
"logps/chosen": -139.46456909179688, |
|
"logps/ref_chosen": -139.45156860351562, |
|
"logps/ref_rejected": -172.6890869140625, |
|
"logps/rejected": -172.62013244628906, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": -0.0001300960429944098, |
|
"rewards/margins": -0.000819505425170064, |
|
"rewards/rejected": 0.0006894093239679933, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7377521613832853, |
|
"grad_norm": 0.3541257679462433, |
|
"learning_rate": 5.848888922025552e-08, |
|
"logits/chosen": 0.15152569115161896, |
|
"logits/rejected": 0.2909863293170929, |
|
"logps/chosen": -133.0579376220703, |
|
"logps/ref_chosen": -133.19911193847656, |
|
"logps/ref_rejected": -167.17926025390625, |
|
"logps/rejected": -167.0704803466797, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.0014116661623120308, |
|
"rewards/margins": 0.00032381698838435113, |
|
"rewards/rejected": 0.0010878491448238492, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.829971181556196, |
|
"grad_norm": 0.36632558703422546, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": 0.0733647495508194, |
|
"logits/rejected": 0.21200355887413025, |
|
"logps/chosen": -137.79318237304688, |
|
"logps/ref_chosen": -137.95736694335938, |
|
"logps/ref_rejected": -172.7656707763672, |
|
"logps/rejected": -172.64659118652344, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0016417349688708782, |
|
"rewards/margins": 0.00045086207683198154, |
|
"rewards/rejected": 0.0011908727465197444, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 0.33898451924324036, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.10225574672222137, |
|
"logits/rejected": 0.29343536496162415, |
|
"logps/chosen": -135.6978759765625, |
|
"logps/ref_chosen": -135.66470336914062, |
|
"logps/ref_rejected": -178.8431854248047, |
|
"logps/rejected": -178.8609619140625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4453125, |
|
"rewards/chosen": -0.0003318000235594809, |
|
"rewards/margins": -0.00015407620230689645, |
|
"rewards/rejected": -0.00017772376304492354, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"step": 10, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6935525000095367, |
|
"train_runtime": 687.8249, |
|
"train_samples_per_second": 2.017, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 10, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|