File size: 7,190 Bytes
efe9069 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9221902017291066,
"eval_steps": 500,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09221902017291066,
"grad_norm": 0.3019522428512573,
"learning_rate": 5e-07,
"logits/chosen": 0.06190446391701698,
"logits/rejected": 0.24964340031147003,
"logps/chosen": -133.9302215576172,
"logps/ref_chosen": -134.30517578125,
"logps/ref_rejected": -173.56590270996094,
"logps/rejected": -173.0438232421875,
"loss": 0.6939,
"rewards/accuracies": 0.4453125,
"rewards/chosen": 0.0037494890857487917,
"rewards/margins": -0.0014713926939293742,
"rewards/rejected": 0.005220881663262844,
"step": 1
},
{
"epoch": 0.1844380403458213,
"grad_norm": 0.41573917865753174,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 0.1268736720085144,
"logits/rejected": 0.2718392312526703,
"logps/chosen": -138.44927978515625,
"logps/ref_chosen": -138.6737518310547,
"logps/ref_rejected": -170.81944274902344,
"logps/rejected": -170.447265625,
"loss": 0.6939,
"rewards/accuracies": 0.34375,
"rewards/chosen": 0.0022446608636528254,
"rewards/margins": -0.0014773242874071002,
"rewards/rejected": 0.0037219852674752474,
"step": 2
},
{
"epoch": 0.276657060518732,
"grad_norm": 0.3277066946029663,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.1311892718076706,
"logits/rejected": 0.26672443747520447,
"logps/chosen": -132.68569946289062,
"logps/ref_chosen": -132.93014526367188,
"logps/ref_rejected": -165.84507751464844,
"logps/rejected": -165.56008911132812,
"loss": 0.6934,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.0024443636648356915,
"rewards/margins": -0.00040555946179665625,
"rewards/rejected": 0.0028499234467744827,
"step": 3
},
{
"epoch": 0.3688760806916426,
"grad_norm": 0.41292324662208557,
"learning_rate": 3.75e-07,
"logits/chosen": 0.08853106200695038,
"logits/rejected": 0.23616701364517212,
"logps/chosen": -137.2488250732422,
"logps/ref_chosen": -137.4427032470703,
"logps/ref_rejected": -177.97886657714844,
"logps/rejected": -177.62860107421875,
"loss": 0.6939,
"rewards/accuracies": 0.421875,
"rewards/chosen": 0.001938714412972331,
"rewards/margins": -0.0015639358898624778,
"rewards/rejected": 0.003502650186419487,
"step": 4
},
{
"epoch": 0.4610951008645533,
"grad_norm": 0.31257063150405884,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 0.12685821950435638,
"logits/rejected": 0.23144984245300293,
"logps/chosen": -131.0224609375,
"logps/ref_chosen": -131.1569061279297,
"logps/ref_rejected": -164.11549377441406,
"logps/rejected": -163.87142944335938,
"loss": 0.6937,
"rewards/accuracies": 0.4453125,
"rewards/chosen": 0.0013444966170936823,
"rewards/margins": -0.001095889019779861,
"rewards/rejected": 0.0024403855204582214,
"step": 5
},
{
"epoch": 0.553314121037464,
"grad_norm": 0.36982661485671997,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 0.16632890701293945,
"logits/rejected": 0.27399736642837524,
"logps/chosen": -130.6382598876953,
"logps/ref_chosen": -130.83815002441406,
"logps/ref_rejected": -160.47244262695312,
"logps/rejected": -160.15945434570312,
"loss": 0.6937,
"rewards/accuracies": 0.3671875,
"rewards/chosen": 0.001998710911720991,
"rewards/margins": -0.0011310731060802937,
"rewards/rejected": 0.003129784483462572,
"step": 6
},
{
"epoch": 0.6455331412103746,
"grad_norm": 0.27344921231269836,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 0.11610936373472214,
"logits/rejected": 0.24762782454490662,
"logps/chosen": -131.18833923339844,
"logps/ref_chosen": -131.2764892578125,
"logps/ref_rejected": -175.32669067382812,
"logps/rejected": -175.1273193359375,
"loss": 0.6937,
"rewards/accuracies": 0.4296875,
"rewards/chosen": 0.0008815132896415889,
"rewards/margins": -0.0011121523566544056,
"rewards/rejected": 0.0019936657045036554,
"step": 7
},
{
"epoch": 0.7377521613832853,
"grad_norm": 0.3876575529575348,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.0759858638048172,
"logits/rejected": 0.2628093659877777,
"logps/chosen": -131.0194549560547,
"logps/ref_chosen": -131.0524139404297,
"logps/ref_rejected": -162.96224975585938,
"logps/rejected": -162.89967346191406,
"loss": 0.6933,
"rewards/accuracies": 0.484375,
"rewards/chosen": 0.00032957797520793974,
"rewards/margins": -0.00029635371174663305,
"rewards/rejected": 0.0006259315996430814,
"step": 8
},
{
"epoch": 0.829971181556196,
"grad_norm": 0.3749904930591583,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 0.12389053404331207,
"logits/rejected": 0.27916383743286133,
"logps/chosen": -136.4560546875,
"logps/ref_chosen": -136.45892333984375,
"logps/ref_rejected": -167.01512145996094,
"logps/rejected": -166.9549560546875,
"loss": 0.6934,
"rewards/accuracies": 0.4609375,
"rewards/chosen": 2.849580778274685e-05,
"rewards/margins": -0.0005732894060201943,
"rewards/rejected": 0.0006017851992510259,
"step": 9
},
{
"epoch": 0.9221902017291066,
"grad_norm": 0.32978877425193787,
"learning_rate": 0.0,
"logits/chosen": 0.1598304808139801,
"logits/rejected": 0.3594120740890503,
"logps/chosen": -140.08253479003906,
"logps/ref_chosen": -140.051513671875,
"logps/ref_rejected": -173.16290283203125,
"logps/rejected": -173.14581298828125,
"loss": 0.6934,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.0003102564951404929,
"rewards/margins": -0.0004811614053323865,
"rewards/rejected": 0.00017090495384763926,
"step": 10
},
{
"epoch": 0.9221902017291066,
"step": 10,
"total_flos": 0.0,
"train_loss": 0.6936326384544372,
"train_runtime": 616.2156,
"train_samples_per_second": 2.251,
"train_steps_per_second": 0.016
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|