radna commited on
Commit
cce5155
·
verified ·
1 Parent(s): ecdfec9

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-10/adapter_config.json +4 -4
  2. checkpoint-10/global_step10/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  3. checkpoint-10/global_step10/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  4. checkpoint-10/global_step10/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  5. checkpoint-10/global_step10/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
  6. checkpoint-10/global_step10/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
  7. checkpoint-10/global_step10/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
  8. checkpoint-10/global_step10/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
  9. checkpoint-10/global_step10/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
  10. checkpoint-10/rng_state_10.pth +3 -0
  11. checkpoint-10/rng_state_11.pth +3 -0
  12. checkpoint-10/rng_state_12.pth +3 -0
  13. checkpoint-10/rng_state_13.pth +3 -0
  14. checkpoint-10/rng_state_14.pth +3 -0
  15. checkpoint-10/rng_state_15.pth +3 -0
  16. checkpoint-10/rng_state_8.pth +3 -0
  17. checkpoint-10/rng_state_9.pth +3 -0
  18. checkpoint-10/trainer_state.json +11 -11
  19. checkpoint-10/training_args.bin +1 -1
  20. checkpoint-12/adapter_config.json +4 -4
  21. checkpoint-12/global_step12/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  22. checkpoint-12/global_step12/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  23. checkpoint-12/global_step12/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  24. checkpoint-12/global_step12/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
  25. checkpoint-12/global_step12/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
  26. checkpoint-12/global_step12/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
  27. checkpoint-12/global_step12/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
  28. checkpoint-12/global_step12/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
  29. checkpoint-12/rng_state_10.pth +3 -0
  30. checkpoint-12/rng_state_11.pth +3 -0
  31. checkpoint-12/rng_state_12.pth +3 -0
  32. checkpoint-12/rng_state_13.pth +3 -0
  33. checkpoint-12/rng_state_14.pth +3 -0
  34. checkpoint-12/rng_state_15.pth +3 -0
  35. checkpoint-12/rng_state_8.pth +3 -0
  36. checkpoint-12/rng_state_9.pth +3 -0
  37. checkpoint-12/trainer_state.json +14 -14
  38. checkpoint-12/training_args.bin +1 -1
  39. checkpoint-14/adapter_config.json +4 -4
  40. checkpoint-14/global_step14/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
  41. checkpoint-14/global_step14/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
  42. checkpoint-14/global_step14/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
  43. checkpoint-14/global_step14/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
  44. checkpoint-14/global_step14/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
  45. checkpoint-14/global_step14/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
  46. checkpoint-14/global_step14/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
  47. checkpoint-14/global_step14/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
  48. checkpoint-14/rng_state_10.pth +3 -0
  49. checkpoint-14/rng_state_11.pth +3 -0
  50. checkpoint-14/rng_state_12.pth +3 -0
checkpoint-10/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "v_proj",
27
  "up_proj",
28
- "k_proj",
29
  "gate_proj",
30
- "o_proj",
31
- "q_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "o_proj",
28
+ "q_proj",
29
  "v_proj",
30
  "up_proj",
 
31
  "gate_proj",
32
+ "k_proj"
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-10/global_step10/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31423c63186358d053bcc5b3ed94db995a32802fe97e6a76ad418e9b8025b7d0
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02838e0d64424b6cb6300aaccd975292676ce439f342a7ab768ba8ebc037a939
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fea295c285de348f7df4ab14315adf1b2ac4fa15552e7ec830878b7e80ebf6b4
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851e813be505c91be02a733f58ca8e7976bca03e4443b8b9ac5570889fd9f32a
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79209a0ac3cae6f89ff32baaf91ef125ad45ac56f6091dffbd133f4a302fce6c
3
+ size 51616527
checkpoint-10/global_step10/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc4aadd9cff23e98e096a247e97fd7aa3aa946711ad93723c038d1b3255f3b3
3
+ size 51616015
checkpoint-10/global_step10/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30830f2e29749d79058d73b1c3b637fa4196ca48d37a656ac9d7961dfe36dc9
3
+ size 51616517
checkpoint-10/global_step10/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102fbcf4358a000e60cb5b6a88dc1033a27fcfc2266002b06a639958d018dd92
3
+ size 51616005
checkpoint-10/rng_state_10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02cbc5312caa4527a932bd01244e717ac64c19550759d78c25b6d4897bb86349
3
+ size 16404
checkpoint-10/rng_state_11.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7363fb6f7233fff5f8146194e1eecf2d869fada6009ad153c85df1707d10ed6f
3
+ size 16468
checkpoint-10/rng_state_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d30743415391c1069ba3ab70ed1da0efc223c976e184639053dcf7380d6cdd
3
+ size 16340
checkpoint-10/rng_state_13.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee60b08b1cf23d4f9f0f117d77ed7e8fa1ac312a38abe56e4d466b6a4227e9d
3
+ size 16468
checkpoint-10/rng_state_14.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8560f8334413a01597d0a3ccad9d05395750a6239553f076da5e3cccf31c41e
3
+ size 16404
checkpoint-10/rng_state_15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ea81b40f32da094d53f444b97db883b2fa2452c60a9c74fab28de8b9c8bdae
3
+ size 16404
checkpoint-10/rng_state_8.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51356d5c061931ce8276759999e3ec37a77c677e630b0feb20a2f2405f50de2d
3
+ size 16389
checkpoint-10/rng_state_9.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7bc6da1ba40ef613e242a724fa971e15ef8c90312b8b1472bab1f3749faf50
3
+ size 16389
checkpoint-10/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 176.98,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 176.98,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 176.98,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 176.98,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 176.98,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,7 +87,7 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 176.98,
91
  "step": 6,
92
  "train_speed(iter/s)": 0.000458
93
  },
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1126,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 176.98,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 176.98,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 176.98,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 176.98,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 180.29,
91
  "step": 6,
92
  "train_speed(iter/s)": 0.000458
93
  },
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  }
checkpoint-10/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
checkpoint-12/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "v_proj",
27
  "up_proj",
28
- "k_proj",
29
  "gate_proj",
30
- "o_proj",
31
- "q_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "o_proj",
28
+ "q_proj",
29
  "v_proj",
30
  "up_proj",
 
31
  "gate_proj",
32
+ "k_proj"
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-12/global_step12/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:822b7229a1bea103b4ea6123a0c441dc6d1439fc94f949416b706af2b11393b8
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8394d158f867db49d932934b759fd14a26b6b60201392edfd7da2c3a8f8198
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35fa216219b835333a072810153c4ef7c296a66ae453a41979a7bc9f3f685f9e
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b24e4ac8ec1a02119157b197ca7535fb49b61d68a81f533af277df6a02d427b
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca51772cf3c9bcb49a0c691ab973063956f6f9960873dd39c9f35e8a9c7baceb
3
+ size 51616527
checkpoint-12/global_step12/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c62d9674c47f55a1fffbb308c52366659ae361c546685d79943f5f55c75e6ec
3
+ size 51616015
checkpoint-12/global_step12/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600f4192e6e52b5af553a3fcf8be2f1849e04450bbc353b6ef46ad3a8946d588
3
+ size 51616517
checkpoint-12/global_step12/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9c29401936327e19cc04b3d65152447c7b168640500d3729231fa3ffb6625b
3
+ size 51616005
checkpoint-12/rng_state_10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fbfcbd5b3ad1ea91d306ea93b3bf1db721a10749da638dc58e5a7eb8ef5c060
3
+ size 16404
checkpoint-12/rng_state_11.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bb91559c98a1fac8c39253e100b4949629a45a4563ba169f5be99716d52b80
3
+ size 16468
checkpoint-12/rng_state_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef25e80c3d47579c059f0446460239ed32d09d212f17101821e5c560ec261c4
3
+ size 16340
checkpoint-12/rng_state_13.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e31897e9d43cb146f65d037920c8c11d77396d3a557de0f4a0b1d73953a305
3
+ size 16468
checkpoint-12/rng_state_14.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:606a7fa2a7952eb8cdc167df71045befac645e37f12f2c3fcce14b5f1e5da01c
3
+ size 16404
checkpoint-12/rng_state_15.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa68c84a8b51fa41541e27a6dd21bfd8b666e0bfce19c554e6fb078dbebe8983
3
+ size 16404
checkpoint-12/rng_state_8.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fb6cdfd982e8c84e600915f2279236e8f10f62341c522ac88160fe782aa164
3
+ size 16389
checkpoint-12/rng_state_9.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d2fee6f99950abccdc5d627af4a0c87c49e72162451c70e23b8b58a39c5ee4
3
+ size 16389
checkpoint-12/trainer_state.json CHANGED
@@ -17,7 +17,7 @@
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
- "memory(GiB)": 176.98,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
@@ -33,7 +33,7 @@
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
- "memory(GiB)": 176.98,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
@@ -45,7 +45,7 @@
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
- "memory(GiB)": 176.98,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
@@ -61,7 +61,7 @@
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
- "memory(GiB)": 176.98,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
@@ -73,7 +73,7 @@
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
- "memory(GiB)": 176.98,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
@@ -87,7 +87,7 @@
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
- "memory(GiB)": 176.98,
91
  "step": 6,
92
  "train_speed(iter/s)": 0.000458
93
  },
@@ -102,7 +102,7 @@
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
- "eval_runtime": 1030.1126,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
@@ -115,7 +115,7 @@
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
- "memory(GiB)": 176.98,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
@@ -131,7 +131,7 @@
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
- "memory(GiB)": 176.98,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
@@ -143,7 +143,7 @@
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
- "memory(GiB)": 176.98,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
@@ -159,7 +159,7 @@
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
- "memory(GiB)": 176.98,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
@@ -171,7 +171,7 @@
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
- "memory(GiB)": 187.02,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
@@ -185,7 +185,7 @@
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
- "memory(GiB)": 187.02,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
@@ -200,7 +200,7 @@
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
- "eval_runtime": 1025.9048,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
 
17
  "kl": 0.0,
18
  "learning_rate": 1.6666666666666667e-05,
19
  "loss": -0.11016345024108887,
20
+ "memory(GiB)": 180.29,
21
  "response_clip_ratio": 0.11328125,
22
  "reward": -0.002658387296833098,
23
  "reward_std": 0.06134121119976044,
 
33
  "kl": 0.0,
34
  "learning_rate": 3.3333333333333335e-05,
35
  "loss": -0.11016345024108887,
36
+ "memory(GiB)": 180.29,
37
  "step": 2,
38
  "train_speed(iter/s)": 0.000466
39
  },
 
45
  "kl": 9.50181856751442e-07,
46
  "learning_rate": 5e-05,
47
  "loss": -0.06604708731174469,
48
+ "memory(GiB)": 180.29,
49
  "response_clip_ratio": 0.13671875,
50
  "reward": 0.0006296975770965219,
51
  "reward_std": 0.07172460854053497,
 
61
  "kl": 1.1101365089416504e-05,
62
  "learning_rate": 6.666666666666667e-05,
63
  "loss": -0.06727766245603561,
64
+ "memory(GiB)": 180.29,
65
  "step": 4,
66
  "train_speed(iter/s)": 0.000458
67
  },
 
73
  "kl": 0.00017762184143066406,
74
  "learning_rate": 8.333333333333334e-05,
75
  "loss": -0.09315311908721924,
76
+ "memory(GiB)": 180.29,
77
  "response_clip_ratio": 0.119140625,
78
  "reward": -0.005135859013535082,
79
  "reward_std": 0.07994875870645046,
 
87
  "grad_norm": 0.18263348937034607,
88
  "learning_rate": 0.0001,
89
  "loss": -0.1041698157787323,
90
+ "memory(GiB)": 180.29,
91
  "step": 6,
92
  "train_speed(iter/s)": 0.000458
93
  },
 
102
  "eval_reward_std": 0.08769983053207397,
103
  "eval_rewards/CosineReward": 0.012996694073081017,
104
  "eval_rewards/RepetitionPenalty": 0.0,
105
+ "eval_runtime": 1030.1223,
106
  "eval_samples_per_second": 0.001,
107
  "eval_steps_per_second": 0.001,
108
  "step": 6
 
115
  "kl": 0.017406463623046875,
116
  "learning_rate": 9.991540791356342e-05,
117
  "loss": -0.051375165581703186,
118
+ "memory(GiB)": 180.29,
119
  "response_clip_ratio": 0.1484375,
120
  "reward": 0.004909618757665157,
121
  "reward_std": 0.08167182095348835,
 
131
  "kl": 0.089599609375,
132
  "learning_rate": 9.966191788709716e-05,
133
  "loss": -0.05105742812156677,
134
+ "memory(GiB)": 180.29,
135
  "step": 8,
136
  "train_speed(iter/s)": 0.000433
137
  },
 
143
  "kl": 0.0963134765625,
144
  "learning_rate": 9.924038765061042e-05,
145
  "loss": -0.05842069163918495,
146
+ "memory(GiB)": 180.29,
147
  "response_clip_ratio": 0.255859375,
148
  "reward": 0.03643610421568155,
149
  "reward_std": 0.11898956261575222,
 
159
  "kl": 0.1185302734375,
160
  "learning_rate": 9.865224352899119e-05,
161
  "loss": -0.06491819024085999,
162
+ "memory(GiB)": 180.29,
163
  "step": 10,
164
  "train_speed(iter/s)": 0.000436
165
  },
 
171
  "kl": 0.1275634765625,
172
  "learning_rate": 9.789947561577445e-05,
173
  "loss": -0.04600231721997261,
174
+ "memory(GiB)": 180.29,
175
  "response_clip_ratio": 0.361328125,
176
  "reward": 0.023204635945148766,
177
  "reward_std": 0.10593634657561779,
 
185
  "grad_norm": 0.05781339108943939,
186
  "learning_rate": 9.698463103929542e-05,
187
  "loss": -0.05069056898355484,
188
+ "memory(GiB)": 180.29,
189
  "step": 12,
190
  "train_speed(iter/s)": 0.000439
191
  },
 
200
  "eval_reward_std": 0.10685288906097412,
201
  "eval_rewards/CosineReward": 0.03234308212995529,
202
  "eval_rewards/RepetitionPenalty": 0.0,
203
+ "eval_runtime": 1025.9045,
204
  "eval_samples_per_second": 0.001,
205
  "eval_steps_per_second": 0.001,
206
  "step": 12
checkpoint-12/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09cdf21dfd9faa218b7fd99e3f3dc0ef681c4e3fd3b905e7348f5467b0198044
3
  size 9809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
3
  size 9809
checkpoint-14/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "v_proj",
27
  "up_proj",
28
- "k_proj",
29
  "gate_proj",
30
- "o_proj",
31
- "q_proj",
32
- "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "o_proj",
28
+ "q_proj",
29
  "v_proj",
30
  "up_proj",
 
31
  "gate_proj",
32
+ "k_proj"
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-14/global_step14/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987ae005efaff2991e8c298a69bdab1a97208f36e35344eb8b1fb65762731dfc
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05cac762c3f44b1d98241aec390dc65daa33e8f7398ed433eee1e495e4d0b560
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf818cfdb7066dbde4d3dfb089b34e214a613704021039d4caf381186baed5ab
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707d84406d88c3919d30ec9dcf83ae4ef21c9b29cc38086d36157d7edd31b501
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5de8c330c50df7e2adf5dde811bad3c52e5e2c9c084ddb35bca14a6bb83ba67
3
+ size 51616527
checkpoint-14/global_step14/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c27e1d29ea2feb71f051b4ab6f11352d55cf6bec46aba95811f25bb3d69081c
3
+ size 51616015
checkpoint-14/global_step14/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da1a503819351e6774b2525c5e49b1456f017e729f3ca8a78a5f3e2ac5d89f4
3
+ size 51616517
checkpoint-14/global_step14/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d142701af8386a993512a2032632171cbb114e8945d01d85fd4f2c0b2412226
3
+ size 51616005
checkpoint-14/rng_state_10.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7934873a8dc658a1c742fe7fcb215347facbe475dccfaceac1ffba3ca9bd8421
3
+ size 16404
checkpoint-14/rng_state_11.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9735aadee20166cb6297399822e43964b7f9ba374abf785504ea0cc3685b3460
3
+ size 16468
checkpoint-14/rng_state_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee83e76542218def27dfde611cc946dd25555fdb9fc54c1720d827f457d8d116
3
+ size 16340