Training in progress, step 20, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83115256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0067bf041abcc7f640b160e1e89e396eecd076d1fa103ca8aced3343ea31020e
|
3 |
size 83115256
|
last-checkpoint/optimizer.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 166458326
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:196bef52b600c621ba1bc1b0c23bba01a60fd669d7849244954d82a09fccbec5
|
3 |
size 166458326
|
last-checkpoint/pytorch_model_fsdp.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83202410
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:063ad7778a473e9ff3680c3427add129f7c79526e1b6f0c5ee9ff29d1378eaa0
|
3 |
size 83202410
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e2ed9259304616a8ecebc61c5d000777b2978635f7a705b8d7081c480ce0bde
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
-
"epoch": 0.
|
6 |
"eval_steps": 500,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -278,6 +278,36 @@
|
|
278 |
"rewards/margins": 7.247048854827881,
|
279 |
"rewards/rejected": -8.813165664672852,
|
280 |
"step": 18
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
}
|
282 |
],
|
283 |
"logging_steps": 1,
|
@@ -292,7 +322,7 @@
|
|
292 |
"should_evaluate": false,
|
293 |
"should_log": false,
|
294 |
"should_save": true,
|
295 |
-
"should_training_stop":
|
296 |
},
|
297 |
"attributes": {}
|
298 |
}
|
|
|
2 |
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.0896358543417367,
|
6 |
"eval_steps": 500,
|
7 |
+
"global_step": 20,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
278 |
"rewards/margins": 7.247048854827881,
|
279 |
"rewards/rejected": -8.813165664672852,
|
280 |
"step": 18
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"epoch": 0.08515406162464986,
|
284 |
+
"grad_norm": 1.9467198848724365,
|
285 |
+
"learning_rate": 8.645454235739903e-06,
|
286 |
+
"logits/chosen": -6.274759769439697,
|
287 |
+
"logits/rejected": -6.058895111083984,
|
288 |
+
"logps/chosen": -253.76348876953125,
|
289 |
+
"logps/rejected": -375.3273010253906,
|
290 |
+
"loss": 0.2131,
|
291 |
+
"rewards/accuracies": 0.921875,
|
292 |
+
"rewards/chosen": -0.9127716422080994,
|
293 |
+
"rewards/margins": 7.889276504516602,
|
294 |
+
"rewards/rejected": -8.802047729492188,
|
295 |
+
"step": 19
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"epoch": 0.0896358543417367,
|
299 |
+
"grad_norm": 2.9522013664245605,
|
300 |
+
"learning_rate": 2.1852399266194314e-06,
|
301 |
+
"logits/chosen": -6.096031665802002,
|
302 |
+
"logits/rejected": -6.0123748779296875,
|
303 |
+
"logps/chosen": -293.298095703125,
|
304 |
+
"logps/rejected": -377.3089599609375,
|
305 |
+
"loss": 0.256,
|
306 |
+
"rewards/accuracies": 0.890625,
|
307 |
+
"rewards/chosen": -1.636772871017456,
|
308 |
+
"rewards/margins": 7.670954704284668,
|
309 |
+
"rewards/rejected": -9.307727813720703,
|
310 |
+
"step": 20
|
311 |
}
|
312 |
],
|
313 |
"logging_steps": 1,
|
|
|
322 |
"should_evaluate": false,
|
323 |
"should_log": false,
|
324 |
"should_save": true,
|
325 |
+
"should_training_stop": true
|
326 |
},
|
327 |
"attributes": {}
|
328 |
}
|