lzc0525's picture
Upload folder using huggingface_hub
b077538 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9221902017291066,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09221902017291066,
"grad_norm": 0.37403297424316406,
"learning_rate": 5e-07,
"logits/chosen": 0.10453951358795166,
"logits/rejected": 0.2759632468223572,
"logps/chosen": -132.67144775390625,
"logps/ref_chosen": -133.03013610839844,
"logps/ref_rejected": -164.177734375,
"logps/rejected": -163.5892333984375,
"loss": 0.6943,
"rewards/accuracies": 0.3984375,
"rewards/chosen": 0.003586653620004654,
"rewards/margins": -0.002298325300216675,
"rewards/rejected": 0.005884978454560041,
"step": 1
},
{
"epoch": 0.1844380403458213,
"grad_norm": 0.358889102935791,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 0.08250421285629272,
"logits/rejected": 0.33024948835372925,
"logps/chosen": -134.9761962890625,
"logps/ref_chosen": -135.27749633789062,
"logps/ref_rejected": -188.41795349121094,
"logps/rejected": -187.88470458984375,
"loss": 0.6943,
"rewards/accuracies": 0.3515625,
"rewards/chosen": 0.003012962406501174,
"rewards/margins": -0.0023194574750959873,
"rewards/rejected": 0.005332420114427805,
"step": 2
},
{
"epoch": 0.276657060518732,
"grad_norm": 0.3239206075668335,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.08983182162046432,
"logits/rejected": 0.2510722875595093,
"logps/chosen": -134.5572052001953,
"logps/ref_chosen": -134.77491760253906,
"logps/ref_rejected": -161.10980224609375,
"logps/rejected": -160.8002471923828,
"loss": 0.6936,
"rewards/accuracies": 0.453125,
"rewards/chosen": 0.002177180489525199,
"rewards/margins": -0.0009184688096866012,
"rewards/rejected": 0.003095649415627122,
"step": 3
},
{
"epoch": 0.3688760806916426,
"grad_norm": 0.3113608658313751,
"learning_rate": 3.75e-07,
"logits/chosen": 0.18616041541099548,
"logits/rejected": 0.3378028869628906,
"logps/chosen": -141.66685485839844,
"logps/ref_chosen": -142.0138702392578,
"logps/ref_rejected": -173.76629638671875,
"logps/rejected": -173.24481201171875,
"loss": 0.694,
"rewards/accuracies": 0.390625,
"rewards/chosen": 0.0034699777606874704,
"rewards/margins": -0.0017446475103497505,
"rewards/rejected": 0.005214625503867865,
"step": 4
},
{
"epoch": 0.4610951008645533,
"grad_norm": 0.3619195818901062,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 0.1221667155623436,
"logits/rejected": 0.268534779548645,
"logps/chosen": -134.57679748535156,
"logps/ref_chosen": -134.8294677734375,
"logps/ref_rejected": -177.42715454101562,
"logps/rejected": -177.0855712890625,
"loss": 0.6936,
"rewards/accuracies": 0.4609375,
"rewards/chosen": 0.002526558004319668,
"rewards/margins": -0.0008892094483599067,
"rewards/rejected": 0.00341576780192554,
"step": 5
},
{
"epoch": 0.553314121037464,
"grad_norm": 0.3865343928337097,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 0.19299811124801636,
"logits/rejected": 0.30019116401672363,
"logps/chosen": -135.14425659179688,
"logps/ref_chosen": -135.45623779296875,
"logps/ref_rejected": -159.72341918945312,
"logps/rejected": -159.44705200195312,
"loss": 0.693,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.003119847271591425,
"rewards/margins": 0.00035619616392068565,
"rewards/rejected": 0.002763650845736265,
"step": 6
},
{
"epoch": 0.6455331412103746,
"grad_norm": 0.40479081869125366,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 0.15088775753974915,
"logits/rejected": 0.3000352084636688,
"logps/chosen": -139.46456909179688,
"logps/ref_chosen": -139.45156860351562,
"logps/ref_rejected": -172.6890869140625,
"logps/rejected": -172.62013244628906,
"loss": 0.6936,
"rewards/accuracies": 0.4765625,
"rewards/chosen": -0.0001300960429944098,
"rewards/margins": -0.000819505425170064,
"rewards/rejected": 0.0006894093239679933,
"step": 7
},
{
"epoch": 0.7377521613832853,
"grad_norm": 0.3541257679462433,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.15152569115161896,
"logits/rejected": 0.2909863293170929,
"logps/chosen": -133.0579376220703,
"logps/ref_chosen": -133.19911193847656,
"logps/ref_rejected": -167.17926025390625,
"logps/rejected": -167.0704803466797,
"loss": 0.693,
"rewards/accuracies": 0.4609375,
"rewards/chosen": 0.0014116661623120308,
"rewards/margins": 0.00032381698838435113,
"rewards/rejected": 0.0010878491448238492,
"step": 8
},
{
"epoch": 0.829971181556196,
"grad_norm": 0.36632558703422546,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 0.0733647495508194,
"logits/rejected": 0.21200355887413025,
"logps/chosen": -137.79318237304688,
"logps/ref_chosen": -137.95736694335938,
"logps/ref_rejected": -172.7656707763672,
"logps/rejected": -172.64659118652344,
"loss": 0.6929,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.0016417349688708782,
"rewards/margins": 0.00045086207683198154,
"rewards/rejected": 0.0011908727465197444,
"step": 9
},
{
"epoch": 0.9221902017291066,
"grad_norm": 0.33898451924324036,
"learning_rate": 0.0,
"logits/chosen": 0.10225574672222137,
"logits/rejected": 0.29343536496162415,
"logps/chosen": -135.6978759765625,
"logps/ref_chosen": -135.66470336914062,
"logps/ref_rejected": -178.8431854248047,
"logps/rejected": -178.8609619140625,
"loss": 0.6932,
"rewards/accuracies": 0.4453125,
"rewards/chosen": -0.0003318000235594809,
"rewards/margins": -0.00015407620230689645,
"rewards/rejected": -0.00017772376304492354,
"step": 10
},
{
"epoch": 0.9221902017291066,
"step": 10,
"total_flos": 0.0,
"train_loss": 0.6935525000095367,
"train_runtime": 687.8249,
"train_samples_per_second": 2.017,
"train_steps_per_second": 0.015
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}