File size: 7,573 Bytes
8da0bf5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9221902017291066,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09221902017291066,
"grad_norm": 0.49082234501838684,
"learning_rate": 5e-07,
"logits/chosen": 0.10258625447750092,
"logits/rejected": 0.2278534471988678,
"logps/chosen": -134.6281280517578,
"logps/ref_chosen": -124.05789947509766,
"logps/ref_rejected": -154.88946533203125,
"logps/rejected": -165.65171813964844,
"loss": 0.5071,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.1057022362947464,
"rewards/margins": 0.0019202901748940349,
"rewards/rejected": -0.10762252658605576,
"sft_loss": 0.47003409266471863,
"step": 1
},
{
"epoch": 0.1844380403458213,
"grad_norm": 0.457568883895874,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 0.06952403485774994,
"logits/rejected": 0.3078790605068207,
"logps/chosen": -133.73080444335938,
"logps/ref_chosen": -123.33631896972656,
"logps/ref_rejected": -168.01480102539062,
"logps/rejected": -178.5548553466797,
"loss": 0.5045,
"rewards/accuracies": 0.6015625,
"rewards/chosen": -0.10394492000341415,
"rewards/margins": 0.001455550198443234,
"rewards/rejected": -0.10540048032999039,
"sft_loss": 0.4669448435306549,
"step": 2
},
{
"epoch": 0.276657060518732,
"grad_norm": 0.3315995931625366,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.09175828099250793,
"logits/rejected": 0.22343572974205017,
"logps/chosen": -133.59866333007812,
"logps/ref_chosen": -124.51382446289062,
"logps/ref_rejected": -155.95262145996094,
"logps/rejected": -165.19468688964844,
"loss": 0.5209,
"rewards/accuracies": 0.6328125,
"rewards/chosen": -0.0908483937382698,
"rewards/margins": 0.0015722049865871668,
"rewards/rejected": -0.09242061525583267,
"sft_loss": 0.48662421107292175,
"step": 3
},
{
"epoch": 0.3688760806916426,
"grad_norm": 0.45795556902885437,
"learning_rate": 3.75e-07,
"logits/chosen": 0.0821409523487091,
"logits/rejected": 0.24085786938667297,
"logps/chosen": -132.02798461914062,
"logps/ref_chosen": -123.37024688720703,
"logps/ref_rejected": -160.60987854003906,
"logps/rejected": -169.43934631347656,
"loss": 0.4951,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.08657727390527725,
"rewards/margins": 0.0017174197128042579,
"rewards/rejected": -0.08829469233751297,
"sft_loss": 0.45568379759788513,
"step": 4
},
{
"epoch": 0.4610951008645533,
"grad_norm": 0.5167257785797119,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 0.1414952129125595,
"logits/rejected": 0.23190416395664215,
"logps/chosen": -129.588134765625,
"logps/ref_chosen": -122.73413848876953,
"logps/ref_rejected": -143.36245727539062,
"logps/rejected": -150.25531005859375,
"loss": 0.4931,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.06853996217250824,
"rewards/margins": 0.00038862242945469916,
"rewards/rejected": -0.06892858445644379,
"sft_loss": 0.4531819820404053,
"step": 5
},
{
"epoch": 0.553314121037464,
"grad_norm": 0.7184420824050903,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 0.14788731932640076,
"logits/rejected": 0.29836705327033997,
"logps/chosen": -130.35125732421875,
"logps/ref_chosen": -124.05830383300781,
"logps/ref_rejected": -150.84971618652344,
"logps/rejected": -157.2368621826172,
"loss": 0.4945,
"rewards/accuracies": 0.5859375,
"rewards/chosen": -0.0629296600818634,
"rewards/margins": 0.0009420262649655342,
"rewards/rejected": -0.06387168914079666,
"sft_loss": 0.4549233317375183,
"step": 6
},
{
"epoch": 0.6455331412103746,
"grad_norm": 0.5709189772605896,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 0.16531895101070404,
"logits/rejected": 0.24730782210826874,
"logps/chosen": -128.30152893066406,
"logps/ref_chosen": -126.53475189208984,
"logps/ref_rejected": -153.44937133789062,
"logps/rejected": -155.2527313232422,
"loss": 0.4814,
"rewards/accuracies": 0.5546875,
"rewards/chosen": -0.017667775973677635,
"rewards/margins": 0.0003658741479739547,
"rewards/rejected": -0.018033649772405624,
"sft_loss": 0.4390857517719269,
"step": 7
},
{
"epoch": 0.7377521613832853,
"grad_norm": 0.3785640299320221,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.05871865525841713,
"logits/rejected": 0.2406335175037384,
"logps/chosen": -120.95695495605469,
"logps/ref_chosen": -120.56550598144531,
"logps/ref_rejected": -159.84185791015625,
"logps/rejected": -160.2302703857422,
"loss": 0.4663,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.003914527129381895,
"rewards/margins": -3.0469876946881413e-05,
"rewards/rejected": -0.0038840575143694878,
"sft_loss": 0.42091870307922363,
"step": 8
},
{
"epoch": 0.829971181556196,
"grad_norm": 0.39315882325172424,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 0.14951348304748535,
"logits/rejected": 0.21872764825820923,
"logps/chosen": -124.28387451171875,
"logps/ref_chosen": -124.57730102539062,
"logps/ref_rejected": -146.5771484375,
"logps/rejected": -146.2667236328125,
"loss": 0.4617,
"rewards/accuracies": 0.4453125,
"rewards/chosen": 0.002934188349172473,
"rewards/margins": -0.00017014730838127434,
"rewards/rejected": 0.003104335628449917,
"sft_loss": 0.41538387537002563,
"step": 9
},
{
"epoch": 0.9221902017291066,
"grad_norm": 0.4166741371154785,
"learning_rate": 0.0,
"logits/chosen": 0.12583141028881073,
"logits/rejected": 0.25348690152168274,
"logps/chosen": -124.92916107177734,
"logps/ref_chosen": -125.44955444335938,
"logps/ref_rejected": -160.47593688964844,
"logps/rejected": -159.9407958984375,
"loss": 0.466,
"rewards/accuracies": 0.5390625,
"rewards/chosen": 0.0052038198336958885,
"rewards/margins": -0.00014770496636629105,
"rewards/rejected": 0.0053515248000621796,
"sft_loss": 0.4205903112888336,
"step": 10
},
{
"epoch": 0.9221902017291066,
"step": 10,
"total_flos": 0.0,
"train_loss": 0.48907283544540403,
"train_runtime": 630.6427,
"train_samples_per_second": 2.199,
"train_steps_per_second": 0.016
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|