PhoenixB commited on
Commit
781f062
·
verified ·
1 Parent(s): 46c8d85

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96ce207444d7a10e17049c94dc5648bd8f200d8da34ca1d6852ec281d5c6819
3
  size 18516456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:804a607f5bc7875b5255290292365e3c6ac847585ff434cabc2f3d5d24a00d2a
3
  size 18516456
last-checkpoint/global_step30/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05eb3a4f136a1958f3c5c416988ec786fd4c8d5d0643b737fc49f2dcf009470a
3
+ size 13857360
last-checkpoint/global_step30/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91ac6728dd0a4faffb6c1e6a3fdd75b56a85a6268fd94a7680afa248b3da8e75
3
+ size 13856656
last-checkpoint/global_step30/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d52dd5892669f2296b0f1cd15f59f9b778781cd4d8fa46c605089268e3b4856
3
+ size 13857360
last-checkpoint/global_step30/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73128a6936313c0b5c6441c7e184e66232d0f1e5d280a967a1f2869234377055
3
+ size 13856656
last-checkpoint/global_step30/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d068e0becce39bb30539549ecb05d3b848255e1180e7e11dd1532e210ded51de
3
+ size 13857424
last-checkpoint/global_step30/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8bf5e83c4a91c075d1a231f4d410dd31390a15ee5ade745981888f1b0ccfa59
3
+ size 13856656
last-checkpoint/global_step30/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d528ba595355b52cd6de62323f9d86bbebd02f2ccaa1e00133bd6e6d0053de16
3
+ size 13857424
last-checkpoint/global_step30/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82543bbd7216ed705c94af8b6cfce2860bafd6d698a86c7293702a9ad967ff86
3
+ size 13856656
last-checkpoint/global_step30/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8414e911efad872d067efb4e057790ccd411733f208e2dfd83b1bcf8dfe342a
3
+ size 485512632
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step24
 
1
+ global_step30
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9527d2431b9400e39f4784d5d63151434952de4dfec19feb2ab624b5f1922cb0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61694caed75a4de60717c7522cd2bcddf9803313b4abf292318bb17a445487e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08613728129205922,
6
  "eval_steps": 500,
7
- "global_step": 24,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -368,6 +368,96 @@
368
  "rewards/margins": 0.2939453125,
369
  "rewards/rejected": 0.05828094482421875,
370
  "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
  ],
373
  "logging_steps": 1,
@@ -382,7 +472,7 @@
382
  "should_evaluate": false,
383
  "should_log": false,
384
  "should_save": true,
385
- "should_training_stop": false
386
  },
387
  "attributes": {}
388
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.10767160161507403,
6
  "eval_steps": 500,
7
+ "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
368
  "rewards/margins": 0.2939453125,
369
  "rewards/rejected": 0.05828094482421875,
370
  "step": 24
371
+ },
372
+ {
373
+ "epoch": 0.08972633467922836,
374
+ "grad_norm": 0.5010308027267456,
375
+ "learning_rate": 4.06547058867883e-06,
376
+ "logits/chosen": -0.87158203125,
377
+ "logits/rejected": -1.21484375,
378
+ "logps/chosen": -532.75,
379
+ "logps/rejected": -346.25,
380
+ "loss": 0.5499,
381
+ "rewards/accuracies": 0.765625,
382
+ "rewards/chosen": 0.373779296875,
383
+ "rewards/margins": 0.337646484375,
384
+ "rewards/rejected": 0.03619384765625,
385
+ "step": 25
386
+ },
387
+ {
388
+ "epoch": 0.09331538806639748,
389
+ "grad_norm": 0.5847342014312744,
390
+ "learning_rate": 2.86474508437579e-06,
391
+ "logits/chosen": -0.861328125,
392
+ "logits/rejected": -1.162109375,
393
+ "logps/chosen": -524.25,
394
+ "logps/rejected": -364.0,
395
+ "loss": 0.5534,
396
+ "rewards/accuracies": 0.8046875,
397
+ "rewards/chosen": 0.3896484375,
398
+ "rewards/margins": 0.3223876953125,
399
+ "rewards/rejected": 0.0676116943359375,
400
+ "step": 26
401
+ },
402
+ {
403
+ "epoch": 0.09690444145356662,
404
+ "grad_norm": 0.5329582095146179,
405
+ "learning_rate": 1.8553997993420495e-06,
406
+ "logits/chosen": -0.9013671875,
407
+ "logits/rejected": -1.12451171875,
408
+ "logps/chosen": -544.25,
409
+ "logps/rejected": -378.75,
410
+ "loss": 0.583,
411
+ "rewards/accuracies": 0.7109375,
412
+ "rewards/chosen": 0.317138671875,
413
+ "rewards/margins": 0.24462890625,
414
+ "rewards/rejected": 0.07248687744140625,
415
+ "step": 27
416
+ },
417
+ {
418
+ "epoch": 0.10049349484073576,
419
+ "grad_norm": 0.5788964629173279,
420
+ "learning_rate": 1.0533527116762298e-06,
421
+ "logits/chosen": -0.830078125,
422
+ "logits/rejected": -1.13916015625,
423
+ "logps/chosen": -501.25,
424
+ "logps/rejected": -367.75,
425
+ "loss": 0.5667,
426
+ "rewards/accuracies": 0.8359375,
427
+ "rewards/chosen": 0.368896484375,
428
+ "rewards/margins": 0.321533203125,
429
+ "rewards/rejected": 0.047332763671875,
430
+ "step": 28
431
+ },
432
+ {
433
+ "epoch": 0.10408254822790489,
434
+ "grad_norm": 0.5081790685653687,
435
+ "learning_rate": 4.7125258307053385e-07,
436
+ "logits/chosen": -0.89208984375,
437
+ "logits/rejected": -1.15283203125,
438
+ "logps/chosen": -516.0,
439
+ "logps/rejected": -350.75,
440
+ "loss": 0.5557,
441
+ "rewards/accuracies": 0.8046875,
442
+ "rewards/chosen": 0.3759765625,
443
+ "rewards/margins": 0.31689453125,
444
+ "rewards/rejected": 0.059051513671875,
445
+ "step": 29
446
+ },
447
+ {
448
+ "epoch": 0.10767160161507403,
449
+ "grad_norm": 0.6344053745269775,
450
+ "learning_rate": 1.1827948028283353e-07,
451
+ "logits/chosen": -0.79736328125,
452
+ "logits/rejected": -1.0771484375,
453
+ "logps/chosen": -549.75,
454
+ "logps/rejected": -358.0,
455
+ "loss": 0.5547,
456
+ "rewards/accuracies": 0.8203125,
457
+ "rewards/chosen": 0.364013671875,
458
+ "rewards/margins": 0.3228759765625,
459
+ "rewards/rejected": 0.0411224365234375,
460
+ "step": 30
461
  }
462
  ],
463
  "logging_steps": 1,
 
472
  "should_evaluate": false,
473
  "should_log": false,
474
  "should_save": true,
475
+ "should_training_stop": true
476
  },
477
  "attributes": {}
478
  }