Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint-10/adapter_config.json +5 -5
- checkpoint-10/trainer_state.json +16 -15
- checkpoint-10/training_args.bin +1 -1
- checkpoint-12/adapter_config.json +5 -5
- checkpoint-12/trainer_state.json +19 -18
- checkpoint-12/training_args.bin +1 -1
- checkpoint-14/adapter_config.json +5 -5
- checkpoint-14/trainer_state.json +21 -20
- checkpoint-14/training_args.bin +1 -1
- checkpoint-16/adapter_config.json +5 -5
- checkpoint-16/trainer_state.json +23 -22
- checkpoint-16/training_args.bin +1 -1
- checkpoint-18/adapter_config.json +5 -5
- checkpoint-18/trainer_state.json +26 -25
- checkpoint-18/training_args.bin +1 -1
- checkpoint-2/adapter_config.json +5 -5
- checkpoint-2/trainer_state.json +6 -5
- checkpoint-2/training_args.bin +1 -1
- checkpoint-20/adapter_config.json +5 -5
- checkpoint-20/trainer_state.json +28 -27
- checkpoint-20/training_args.bin +1 -1
- checkpoint-22/adapter_config.json +5 -5
- checkpoint-22/trainer_state.json +31 -30
- checkpoint-22/training_args.bin +1 -1
- checkpoint-24/adapter_config.json +5 -5
- checkpoint-24/trainer_state.json +34 -33
- checkpoint-24/training_args.bin +1 -1
- checkpoint-26/adapter_config.json +5 -5
- checkpoint-26/trainer_state.json +36 -35
- checkpoint-26/training_args.bin +1 -1
- checkpoint-28/adapter_config.json +5 -5
- checkpoint-28/trainer_state.json +38 -37
- checkpoint-28/training_args.bin +1 -1
- checkpoint-30/adapter_config.json +5 -5
- checkpoint-30/global_step30/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/global_step30/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
- checkpoint-30/rng_state_10.pth +3 -0
- checkpoint-30/rng_state_11.pth +3 -0
- checkpoint-30/rng_state_12.pth +3 -0
- checkpoint-30/rng_state_13.pth +3 -0
- checkpoint-30/rng_state_14.pth +3 -0
- checkpoint-30/rng_state_15.pth +3 -0
- checkpoint-30/rng_state_8.pth +3 -0
- checkpoint-30/rng_state_9.pth +3 -0
checkpoint-10/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-10/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.012996690347790718,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6",
|
4 |
"epoch": 2.4210526315789473,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 10,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 6,
|
3 |
"best_metric": 0.012996690347790718,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-6",
|
5 |
"epoch": 2.4210526315789473,
|
|
|
7 |
"global_step": 10,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
}
|
checkpoint-10/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-12/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-12/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03234308212995529,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
4 |
"epoch": 2.8421052631578947,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 12,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
|
|
1 |
{
|
2 |
+
"best_global_step": 12,
|
3 |
"best_metric": 0.03234308212995529,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
5 |
"epoch": 2.8421052631578947,
|
|
|
7 |
"global_step": 12,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
checkpoint-12/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-14/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-14/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03234308212995529,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
4 |
"epoch": 3.4210526315789473,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 14,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 12,
|
3 |
"best_metric": 0.03234308212995529,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
5 |
"epoch": 3.4210526315789473,
|
|
|
7 |
"global_step": 14,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
}
|
checkpoint-14/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-16/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-16/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03234308212995529,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
4 |
"epoch": 3.8421052631578947,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 16,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 12,
|
3 |
"best_metric": 0.03234308212995529,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-12",
|
5 |
"epoch": 3.8421052631578947,
|
|
|
7 |
"global_step": 16,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
}
|
checkpoint-16/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-18/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-18/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03729328140616417,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
4 |
"epoch": 4.421052631578947,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 18,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
|
|
1 |
{
|
2 |
+
"best_global_step": 18,
|
3 |
"best_metric": 0.03729328140616417,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
5 |
"epoch": 4.421052631578947,
|
|
|
7 |
"global_step": 18,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
checkpoint-18/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-2/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-2/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 0.42105263157894735,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 2,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
}
|
39 |
],
|
40 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
+
"best_global_step": null,
|
3 |
"best_metric": null,
|
4 |
"best_model_checkpoint": null,
|
5 |
"epoch": 0.42105263157894735,
|
|
|
7 |
"global_step": 2,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
}
|
40 |
],
|
41 |
"logging_steps": 1,
|
checkpoint-2/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-20/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-20/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03729328140616417,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
4 |
"epoch": 4.842105263157895,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 20,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
@@ -310,7 +311,7 @@
|
|
310 |
"kl": 0.1820068359375,
|
311 |
"learning_rate": 8.636868207865244e-05,
|
312 |
"loss": -0.03466903418302536,
|
313 |
-
"memory(GiB)":
|
314 |
"response_clip_ratio": 0.466796875,
|
315 |
"reward": 0.04069916973821819,
|
316 |
"reward_std": 0.11991005763411522,
|
@@ -326,7 +327,7 @@
|
|
326 |
"kl": 0.19287109375,
|
327 |
"learning_rate": 8.43120818934367e-05,
|
328 |
"loss": -0.03502114117145538,
|
329 |
-
"memory(GiB)":
|
330 |
"step": 20,
|
331 |
"train_speed(iter/s)": 0.000424
|
332 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 18,
|
3 |
"best_metric": 0.03729328140616417,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
5 |
"epoch": 4.842105263157895,
|
|
|
7 |
"global_step": 20,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
|
|
311 |
"kl": 0.1820068359375,
|
312 |
"learning_rate": 8.636868207865244e-05,
|
313 |
"loss": -0.03466903418302536,
|
314 |
+
"memory(GiB)": 180.29,
|
315 |
"response_clip_ratio": 0.466796875,
|
316 |
"reward": 0.04069916973821819,
|
317 |
"reward_std": 0.11991005763411522,
|
|
|
327 |
"kl": 0.19287109375,
|
328 |
"learning_rate": 8.43120818934367e-05,
|
329 |
"loss": -0.03502114117145538,
|
330 |
+
"memory(GiB)": 180.29,
|
331 |
"step": 20,
|
332 |
"train_speed(iter/s)": 0.000424
|
333 |
}
|
checkpoint-20/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-22/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-22/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.03729328140616417,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
4 |
"epoch": 5.421052631578947,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 22,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
@@ -310,7 +311,7 @@
|
|
310 |
"kl": 0.1820068359375,
|
311 |
"learning_rate": 8.636868207865244e-05,
|
312 |
"loss": -0.03466903418302536,
|
313 |
-
"memory(GiB)":
|
314 |
"response_clip_ratio": 0.466796875,
|
315 |
"reward": 0.04069916973821819,
|
316 |
"reward_std": 0.11991005763411522,
|
@@ -326,7 +327,7 @@
|
|
326 |
"kl": 0.19287109375,
|
327 |
"learning_rate": 8.43120818934367e-05,
|
328 |
"loss": -0.03502114117145538,
|
329 |
-
"memory(GiB)":
|
330 |
"step": 20,
|
331 |
"train_speed(iter/s)": 0.000424
|
332 |
},
|
@@ -338,14 +339,14 @@
|
|
338 |
"kl": 0.17626953125,
|
339 |
"learning_rate": 8.213938048432697e-05,
|
340 |
"loss": -0.008662773296236992,
|
341 |
-
"memory(GiB)":
|
342 |
"response_clip_ratio": 0.5625,
|
343 |
"reward": 0.04996980866417289,
|
344 |
"reward_std": 0.13849420100450516,
|
345 |
"rewards/CosineReward": 0.049969930201768875,
|
346 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
347 |
"step": 21,
|
348 |
-
"train_speed(iter/s)": 0.
|
349 |
},
|
350 |
{
|
351 |
"clip_ratio": 5.869188044016482e-05,
|
@@ -354,7 +355,7 @@
|
|
354 |
"kl": 0.178955078125,
|
355 |
"learning_rate": 7.985792958513931e-05,
|
356 |
"loss": -0.008743642829358578,
|
357 |
-
"memory(GiB)":
|
358 |
"step": 22,
|
359 |
"train_speed(iter/s)": 0.000426
|
360 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 18,
|
3 |
"best_metric": 0.03729328140616417,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-18",
|
5 |
"epoch": 5.421052631578947,
|
|
|
7 |
"global_step": 22,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
|
|
311 |
"kl": 0.1820068359375,
|
312 |
"learning_rate": 8.636868207865244e-05,
|
313 |
"loss": -0.03466903418302536,
|
314 |
+
"memory(GiB)": 180.29,
|
315 |
"response_clip_ratio": 0.466796875,
|
316 |
"reward": 0.04069916973821819,
|
317 |
"reward_std": 0.11991005763411522,
|
|
|
327 |
"kl": 0.19287109375,
|
328 |
"learning_rate": 8.43120818934367e-05,
|
329 |
"loss": -0.03502114117145538,
|
330 |
+
"memory(GiB)": 180.29,
|
331 |
"step": 20,
|
332 |
"train_speed(iter/s)": 0.000424
|
333 |
},
|
|
|
339 |
"kl": 0.17626953125,
|
340 |
"learning_rate": 8.213938048432697e-05,
|
341 |
"loss": -0.008662773296236992,
|
342 |
+
"memory(GiB)": 180.29,
|
343 |
"response_clip_ratio": 0.5625,
|
344 |
"reward": 0.04996980866417289,
|
345 |
"reward_std": 0.13849420100450516,
|
346 |
"rewards/CosineReward": 0.049969930201768875,
|
347 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
348 |
"step": 21,
|
349 |
+
"train_speed(iter/s)": 0.000407
|
350 |
},
|
351 |
{
|
352 |
"clip_ratio": 5.869188044016482e-05,
|
|
|
355 |
"kl": 0.178955078125,
|
356 |
"learning_rate": 7.985792958513931e-05,
|
357 |
"loss": -0.008743642829358578,
|
358 |
+
"memory(GiB)": 180.29,
|
359 |
"step": 22,
|
360 |
"train_speed(iter/s)": 0.000426
|
361 |
}
|
checkpoint-22/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-24/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-24/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.04339282959699631,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
4 |
"epoch": 5.842105263157895,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 24,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
@@ -310,7 +311,7 @@
|
|
310 |
"kl": 0.1820068359375,
|
311 |
"learning_rate": 8.636868207865244e-05,
|
312 |
"loss": -0.03466903418302536,
|
313 |
-
"memory(GiB)":
|
314 |
"response_clip_ratio": 0.466796875,
|
315 |
"reward": 0.04069916973821819,
|
316 |
"reward_std": 0.11991005763411522,
|
@@ -326,7 +327,7 @@
|
|
326 |
"kl": 0.19287109375,
|
327 |
"learning_rate": 8.43120818934367e-05,
|
328 |
"loss": -0.03502114117145538,
|
329 |
-
"memory(GiB)":
|
330 |
"step": 20,
|
331 |
"train_speed(iter/s)": 0.000424
|
332 |
},
|
@@ -338,14 +339,14 @@
|
|
338 |
"kl": 0.17626953125,
|
339 |
"learning_rate": 8.213938048432697e-05,
|
340 |
"loss": -0.008662773296236992,
|
341 |
-
"memory(GiB)":
|
342 |
"response_clip_ratio": 0.5625,
|
343 |
"reward": 0.04996980866417289,
|
344 |
"reward_std": 0.13849420100450516,
|
345 |
"rewards/CosineReward": 0.049969930201768875,
|
346 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
347 |
"step": 21,
|
348 |
-
"train_speed(iter/s)": 0.
|
349 |
},
|
350 |
{
|
351 |
"clip_ratio": 5.869188044016482e-05,
|
@@ -354,7 +355,7 @@
|
|
354 |
"kl": 0.178955078125,
|
355 |
"learning_rate": 7.985792958513931e-05,
|
356 |
"loss": -0.008743642829358578,
|
357 |
-
"memory(GiB)":
|
358 |
"step": 22,
|
359 |
"train_speed(iter/s)": 0.000426
|
360 |
},
|
@@ -366,7 +367,7 @@
|
|
366 |
"kl": 0.1796875,
|
367 |
"learning_rate": 7.74754489035403e-05,
|
368 |
"loss": -0.03423420712351799,
|
369 |
-
"memory(GiB)":
|
370 |
"response_clip_ratio": 0.583984375,
|
371 |
"reward": 0.034468831261619925,
|
372 |
"reward_std": 0.11841745302081108,
|
@@ -380,7 +381,7 @@
|
|
380 |
"grad_norm": 0.014131724834442139,
|
381 |
"learning_rate": 7.500000000000001e-05,
|
382 |
"loss": -0.03426633030176163,
|
383 |
-
"memory(GiB)":
|
384 |
"step": 24,
|
385 |
"train_speed(iter/s)": 0.000427
|
386 |
},
|
@@ -395,7 +396,7 @@
|
|
395 |
"eval_reward_std": 0.10456253588199615,
|
396 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
397 |
"eval_rewards/RepetitionPenalty": 0.0,
|
398 |
-
"eval_runtime": 1045.
|
399 |
"eval_samples_per_second": 0.001,
|
400 |
"eval_steps_per_second": 0.001,
|
401 |
"step": 24
|
|
|
1 |
{
|
2 |
+
"best_global_step": 24,
|
3 |
"best_metric": 0.04339282959699631,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
5 |
"epoch": 5.842105263157895,
|
|
|
7 |
"global_step": 24,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
|
|
311 |
"kl": 0.1820068359375,
|
312 |
"learning_rate": 8.636868207865244e-05,
|
313 |
"loss": -0.03466903418302536,
|
314 |
+
"memory(GiB)": 180.29,
|
315 |
"response_clip_ratio": 0.466796875,
|
316 |
"reward": 0.04069916973821819,
|
317 |
"reward_std": 0.11991005763411522,
|
|
|
327 |
"kl": 0.19287109375,
|
328 |
"learning_rate": 8.43120818934367e-05,
|
329 |
"loss": -0.03502114117145538,
|
330 |
+
"memory(GiB)": 180.29,
|
331 |
"step": 20,
|
332 |
"train_speed(iter/s)": 0.000424
|
333 |
},
|
|
|
339 |
"kl": 0.17626953125,
|
340 |
"learning_rate": 8.213938048432697e-05,
|
341 |
"loss": -0.008662773296236992,
|
342 |
+
"memory(GiB)": 180.29,
|
343 |
"response_clip_ratio": 0.5625,
|
344 |
"reward": 0.04996980866417289,
|
345 |
"reward_std": 0.13849420100450516,
|
346 |
"rewards/CosineReward": 0.049969930201768875,
|
347 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
348 |
"step": 21,
|
349 |
+
"train_speed(iter/s)": 0.000407
|
350 |
},
|
351 |
{
|
352 |
"clip_ratio": 5.869188044016482e-05,
|
|
|
355 |
"kl": 0.178955078125,
|
356 |
"learning_rate": 7.985792958513931e-05,
|
357 |
"loss": -0.008743642829358578,
|
358 |
+
"memory(GiB)": 180.29,
|
359 |
"step": 22,
|
360 |
"train_speed(iter/s)": 0.000426
|
361 |
},
|
|
|
367 |
"kl": 0.1796875,
|
368 |
"learning_rate": 7.74754489035403e-05,
|
369 |
"loss": -0.03423420712351799,
|
370 |
+
"memory(GiB)": 180.29,
|
371 |
"response_clip_ratio": 0.583984375,
|
372 |
"reward": 0.034468831261619925,
|
373 |
"reward_std": 0.11841745302081108,
|
|
|
381 |
"grad_norm": 0.014131724834442139,
|
382 |
"learning_rate": 7.500000000000001e-05,
|
383 |
"loss": -0.03426633030176163,
|
384 |
+
"memory(GiB)": 180.29,
|
385 |
"step": 24,
|
386 |
"train_speed(iter/s)": 0.000427
|
387 |
},
|
|
|
396 |
"eval_reward_std": 0.10456253588199615,
|
397 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
398 |
"eval_rewards/RepetitionPenalty": 0.0,
|
399 |
+
"eval_runtime": 1045.0616,
|
400 |
"eval_samples_per_second": 0.001,
|
401 |
"eval_steps_per_second": 0.001,
|
402 |
"step": 24
|
checkpoint-24/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-26/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-26/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.04339282959699631,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
4 |
"epoch": 6.421052631578947,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 26,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
@@ -310,7 +311,7 @@
|
|
310 |
"kl": 0.1820068359375,
|
311 |
"learning_rate": 8.636868207865244e-05,
|
312 |
"loss": -0.03466903418302536,
|
313 |
-
"memory(GiB)":
|
314 |
"response_clip_ratio": 0.466796875,
|
315 |
"reward": 0.04069916973821819,
|
316 |
"reward_std": 0.11991005763411522,
|
@@ -326,7 +327,7 @@
|
|
326 |
"kl": 0.19287109375,
|
327 |
"learning_rate": 8.43120818934367e-05,
|
328 |
"loss": -0.03502114117145538,
|
329 |
-
"memory(GiB)":
|
330 |
"step": 20,
|
331 |
"train_speed(iter/s)": 0.000424
|
332 |
},
|
@@ -338,14 +339,14 @@
|
|
338 |
"kl": 0.17626953125,
|
339 |
"learning_rate": 8.213938048432697e-05,
|
340 |
"loss": -0.008662773296236992,
|
341 |
-
"memory(GiB)":
|
342 |
"response_clip_ratio": 0.5625,
|
343 |
"reward": 0.04996980866417289,
|
344 |
"reward_std": 0.13849420100450516,
|
345 |
"rewards/CosineReward": 0.049969930201768875,
|
346 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
347 |
"step": 21,
|
348 |
-
"train_speed(iter/s)": 0.
|
349 |
},
|
350 |
{
|
351 |
"clip_ratio": 5.869188044016482e-05,
|
@@ -354,7 +355,7 @@
|
|
354 |
"kl": 0.178955078125,
|
355 |
"learning_rate": 7.985792958513931e-05,
|
356 |
"loss": -0.008743642829358578,
|
357 |
-
"memory(GiB)":
|
358 |
"step": 22,
|
359 |
"train_speed(iter/s)": 0.000426
|
360 |
},
|
@@ -366,7 +367,7 @@
|
|
366 |
"kl": 0.1796875,
|
367 |
"learning_rate": 7.74754489035403e-05,
|
368 |
"loss": -0.03423420712351799,
|
369 |
-
"memory(GiB)":
|
370 |
"response_clip_ratio": 0.583984375,
|
371 |
"reward": 0.034468831261619925,
|
372 |
"reward_std": 0.11841745302081108,
|
@@ -380,7 +381,7 @@
|
|
380 |
"grad_norm": 0.014131724834442139,
|
381 |
"learning_rate": 7.500000000000001e-05,
|
382 |
"loss": -0.03426633030176163,
|
383 |
-
"memory(GiB)":
|
384 |
"step": 24,
|
385 |
"train_speed(iter/s)": 0.000427
|
386 |
},
|
@@ -395,7 +396,7 @@
|
|
395 |
"eval_reward_std": 0.10456253588199615,
|
396 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
397 |
"eval_rewards/RepetitionPenalty": 0.0,
|
398 |
-
"eval_runtime": 1045.
|
399 |
"eval_samples_per_second": 0.001,
|
400 |
"eval_steps_per_second": 0.001,
|
401 |
"step": 24
|
@@ -408,7 +409,7 @@
|
|
408 |
"kl": 0.1800537109375,
|
409 |
"learning_rate": 7.243995901002312e-05,
|
410 |
"loss": -0.02097315341234207,
|
411 |
-
"memory(GiB)":
|
412 |
"response_clip_ratio": 0.6171875,
|
413 |
"reward": 0.03010205877944827,
|
414 |
"reward_std": 0.10742511600255966,
|
@@ -424,7 +425,7 @@
|
|
424 |
"kl": 0.18408203125,
|
425 |
"learning_rate": 6.980398830195785e-05,
|
426 |
"loss": -0.02103913575410843,
|
427 |
-
"memory(GiB)":
|
428 |
"step": 26,
|
429 |
"train_speed(iter/s)": 0.000421
|
430 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 24,
|
3 |
"best_metric": 0.04339282959699631,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
5 |
"epoch": 6.421052631578947,
|
|
|
7 |
"global_step": 26,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
|
|
311 |
"kl": 0.1820068359375,
|
312 |
"learning_rate": 8.636868207865244e-05,
|
313 |
"loss": -0.03466903418302536,
|
314 |
+
"memory(GiB)": 180.29,
|
315 |
"response_clip_ratio": 0.466796875,
|
316 |
"reward": 0.04069916973821819,
|
317 |
"reward_std": 0.11991005763411522,
|
|
|
327 |
"kl": 0.19287109375,
|
328 |
"learning_rate": 8.43120818934367e-05,
|
329 |
"loss": -0.03502114117145538,
|
330 |
+
"memory(GiB)": 180.29,
|
331 |
"step": 20,
|
332 |
"train_speed(iter/s)": 0.000424
|
333 |
},
|
|
|
339 |
"kl": 0.17626953125,
|
340 |
"learning_rate": 8.213938048432697e-05,
|
341 |
"loss": -0.008662773296236992,
|
342 |
+
"memory(GiB)": 180.29,
|
343 |
"response_clip_ratio": 0.5625,
|
344 |
"reward": 0.04996980866417289,
|
345 |
"reward_std": 0.13849420100450516,
|
346 |
"rewards/CosineReward": 0.049969930201768875,
|
347 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
348 |
"step": 21,
|
349 |
+
"train_speed(iter/s)": 0.000407
|
350 |
},
|
351 |
{
|
352 |
"clip_ratio": 5.869188044016482e-05,
|
|
|
355 |
"kl": 0.178955078125,
|
356 |
"learning_rate": 7.985792958513931e-05,
|
357 |
"loss": -0.008743642829358578,
|
358 |
+
"memory(GiB)": 180.29,
|
359 |
"step": 22,
|
360 |
"train_speed(iter/s)": 0.000426
|
361 |
},
|
|
|
367 |
"kl": 0.1796875,
|
368 |
"learning_rate": 7.74754489035403e-05,
|
369 |
"loss": -0.03423420712351799,
|
370 |
+
"memory(GiB)": 180.29,
|
371 |
"response_clip_ratio": 0.583984375,
|
372 |
"reward": 0.034468831261619925,
|
373 |
"reward_std": 0.11841745302081108,
|
|
|
381 |
"grad_norm": 0.014131724834442139,
|
382 |
"learning_rate": 7.500000000000001e-05,
|
383 |
"loss": -0.03426633030176163,
|
384 |
+
"memory(GiB)": 180.29,
|
385 |
"step": 24,
|
386 |
"train_speed(iter/s)": 0.000427
|
387 |
},
|
|
|
396 |
"eval_reward_std": 0.10456253588199615,
|
397 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
398 |
"eval_rewards/RepetitionPenalty": 0.0,
|
399 |
+
"eval_runtime": 1045.0616,
|
400 |
"eval_samples_per_second": 0.001,
|
401 |
"eval_steps_per_second": 0.001,
|
402 |
"step": 24
|
|
|
409 |
"kl": 0.1800537109375,
|
410 |
"learning_rate": 7.243995901002312e-05,
|
411 |
"loss": -0.02097315341234207,
|
412 |
+
"memory(GiB)": 180.29,
|
413 |
"response_clip_ratio": 0.6171875,
|
414 |
"reward": 0.03010205877944827,
|
415 |
"reward_std": 0.10742511600255966,
|
|
|
425 |
"kl": 0.18408203125,
|
426 |
"learning_rate": 6.980398830195785e-05,
|
427 |
"loss": -0.02103913575410843,
|
428 |
+
"memory(GiB)": 180.29,
|
429 |
"step": 26,
|
430 |
"train_speed(iter/s)": 0.000421
|
431 |
}
|
checkpoint-26/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-28/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-28/trainer_state.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"best_metric": 0.04339282959699631,
|
3 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
4 |
"epoch": 6.842105263157895,
|
@@ -6,7 +7,7 @@
|
|
6 |
"global_step": 28,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
-
"is_world_process_zero":
|
10 |
"log_history": [
|
11 |
{
|
12 |
"clip_ratio": 0.0,
|
@@ -16,14 +17,14 @@
|
|
16 |
"kl": 0.0,
|
17 |
"learning_rate": 1.6666666666666667e-05,
|
18 |
"loss": -0.11016345024108887,
|
19 |
-
"memory(GiB)":
|
20 |
"response_clip_ratio": 0.11328125,
|
21 |
"reward": -0.002658387296833098,
|
22 |
"reward_std": 0.06134121119976044,
|
23 |
"rewards/CosineReward": -0.0026579967816360295,
|
24 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
25 |
"step": 1,
|
26 |
-
"train_speed(iter/s)": 0.
|
27 |
},
|
28 |
{
|
29 |
"clip_ratio": 0.0,
|
@@ -32,9 +33,9 @@
|
|
32 |
"kl": 0.0,
|
33 |
"learning_rate": 3.3333333333333335e-05,
|
34 |
"loss": -0.11016345024108887,
|
35 |
-
"memory(GiB)":
|
36 |
"step": 2,
|
37 |
-
"train_speed(iter/s)": 0.
|
38 |
},
|
39 |
{
|
40 |
"clip_ratio": 1.3441811461234465e-05,
|
@@ -44,7 +45,7 @@
|
|
44 |
"kl": 9.50181856751442e-07,
|
45 |
"learning_rate": 5e-05,
|
46 |
"loss": -0.06604708731174469,
|
47 |
-
"memory(GiB)":
|
48 |
"response_clip_ratio": 0.13671875,
|
49 |
"reward": 0.0006296975770965219,
|
50 |
"reward_std": 0.07172460854053497,
|
@@ -60,7 +61,7 @@
|
|
60 |
"kl": 1.1101365089416504e-05,
|
61 |
"learning_rate": 6.666666666666667e-05,
|
62 |
"loss": -0.06727766245603561,
|
63 |
-
"memory(GiB)":
|
64 |
"step": 4,
|
65 |
"train_speed(iter/s)": 0.000458
|
66 |
},
|
@@ -72,7 +73,7 @@
|
|
72 |
"kl": 0.00017762184143066406,
|
73 |
"learning_rate": 8.333333333333334e-05,
|
74 |
"loss": -0.09315311908721924,
|
75 |
-
"memory(GiB)":
|
76 |
"response_clip_ratio": 0.119140625,
|
77 |
"reward": -0.005135859013535082,
|
78 |
"reward_std": 0.07994875870645046,
|
@@ -86,9 +87,9 @@
|
|
86 |
"grad_norm": 0.18263348937034607,
|
87 |
"learning_rate": 0.0001,
|
88 |
"loss": -0.1041698157787323,
|
89 |
-
"memory(GiB)":
|
90 |
"step": 6,
|
91 |
-
"train_speed(iter/s)": 0.
|
92 |
},
|
93 |
{
|
94 |
"epoch": 1.4210526315789473,
|
@@ -101,7 +102,7 @@
|
|
101 |
"eval_reward_std": 0.08769983053207397,
|
102 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
103 |
"eval_rewards/RepetitionPenalty": 0.0,
|
104 |
-
"eval_runtime": 1030.
|
105 |
"eval_samples_per_second": 0.001,
|
106 |
"eval_steps_per_second": 0.001,
|
107 |
"step": 6
|
@@ -114,7 +115,7 @@
|
|
114 |
"kl": 0.017406463623046875,
|
115 |
"learning_rate": 9.991540791356342e-05,
|
116 |
"loss": -0.051375165581703186,
|
117 |
-
"memory(GiB)":
|
118 |
"response_clip_ratio": 0.1484375,
|
119 |
"reward": 0.004909618757665157,
|
120 |
"reward_std": 0.08167182095348835,
|
@@ -130,7 +131,7 @@
|
|
130 |
"kl": 0.089599609375,
|
131 |
"learning_rate": 9.966191788709716e-05,
|
132 |
"loss": -0.05105742812156677,
|
133 |
-
"memory(GiB)":
|
134 |
"step": 8,
|
135 |
"train_speed(iter/s)": 0.000433
|
136 |
},
|
@@ -142,7 +143,7 @@
|
|
142 |
"kl": 0.0963134765625,
|
143 |
"learning_rate": 9.924038765061042e-05,
|
144 |
"loss": -0.05842069163918495,
|
145 |
-
"memory(GiB)":
|
146 |
"response_clip_ratio": 0.255859375,
|
147 |
"reward": 0.03643610421568155,
|
148 |
"reward_std": 0.11898956261575222,
|
@@ -158,7 +159,7 @@
|
|
158 |
"kl": 0.1185302734375,
|
159 |
"learning_rate": 9.865224352899119e-05,
|
160 |
"loss": -0.06491819024085999,
|
161 |
-
"memory(GiB)":
|
162 |
"step": 10,
|
163 |
"train_speed(iter/s)": 0.000436
|
164 |
},
|
@@ -170,7 +171,7 @@
|
|
170 |
"kl": 0.1275634765625,
|
171 |
"learning_rate": 9.789947561577445e-05,
|
172 |
"loss": -0.04600231721997261,
|
173 |
-
"memory(GiB)":
|
174 |
"response_clip_ratio": 0.361328125,
|
175 |
"reward": 0.023204635945148766,
|
176 |
"reward_std": 0.10593634657561779,
|
@@ -184,7 +185,7 @@
|
|
184 |
"grad_norm": 0.05781339108943939,
|
185 |
"learning_rate": 9.698463103929542e-05,
|
186 |
"loss": -0.05069056898355484,
|
187 |
-
"memory(GiB)":
|
188 |
"step": 12,
|
189 |
"train_speed(iter/s)": 0.000439
|
190 |
},
|
@@ -199,7 +200,7 @@
|
|
199 |
"eval_reward_std": 0.10685288906097412,
|
200 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
201 |
"eval_rewards/RepetitionPenalty": 0.0,
|
202 |
-
"eval_runtime": 1025.
|
203 |
"eval_samples_per_second": 0.001,
|
204 |
"eval_steps_per_second": 0.001,
|
205 |
"step": 12
|
@@ -212,7 +213,7 @@
|
|
212 |
"kl": 0.151123046875,
|
213 |
"learning_rate": 9.591080534401371e-05,
|
214 |
"loss": -0.02191038429737091,
|
215 |
-
"memory(GiB)":
|
216 |
"response_clip_ratio": 0.419921875,
|
217 |
"reward": 0.035983758978545666,
|
218 |
"reward_std": 0.11553369648754597,
|
@@ -228,7 +229,7 @@
|
|
228 |
"kl": 0.169189453125,
|
229 |
"learning_rate": 9.468163201617062e-05,
|
230 |
"loss": -0.022672578692436218,
|
231 |
-
"memory(GiB)":
|
232 |
"step": 14,
|
233 |
"train_speed(iter/s)": 0.000427
|
234 |
},
|
@@ -240,7 +241,7 @@
|
|
240 |
"kl": 0.166748046875,
|
241 |
"learning_rate": 9.330127018922194e-05,
|
242 |
"loss": -0.059799157083034515,
|
243 |
-
"memory(GiB)":
|
244 |
"response_clip_ratio": 0.4765625,
|
245 |
"reward": 0.03584331553429365,
|
246 |
"reward_std": 0.11829411797225475,
|
@@ -256,7 +257,7 @@
|
|
256 |
"kl": 0.16748046875,
|
257 |
"learning_rate": 9.177439057064683e-05,
|
258 |
"loss": -0.06071458384394646,
|
259 |
-
"memory(GiB)":
|
260 |
"step": 16,
|
261 |
"train_speed(iter/s)": 0.000431
|
262 |
},
|
@@ -268,7 +269,7 @@
|
|
268 |
"kl": 0.1787109375,
|
269 |
"learning_rate": 9.01061596377522e-05,
|
270 |
"loss": -0.04504441097378731,
|
271 |
-
"memory(GiB)":
|
272 |
"response_clip_ratio": 0.5625,
|
273 |
"reward": 0.027318883687257767,
|
274 |
"reward_std": 0.10441224090754986,
|
@@ -282,7 +283,7 @@
|
|
282 |
"grad_norm": 0.005998397711664438,
|
283 |
"learning_rate": 8.83022221559489e-05,
|
284 |
"loss": -0.045487549155950546,
|
285 |
-
"memory(GiB)":
|
286 |
"step": 18,
|
287 |
"train_speed(iter/s)": 0.000432
|
288 |
},
|
@@ -297,7 +298,7 @@
|
|
297 |
"eval_reward_std": 0.10691346973180771,
|
298 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
299 |
"eval_rewards/RepetitionPenalty": 0.0,
|
300 |
-
"eval_runtime": 1041.
|
301 |
"eval_samples_per_second": 0.001,
|
302 |
"eval_steps_per_second": 0.001,
|
303 |
"step": 18
|
@@ -310,7 +311,7 @@
|
|
310 |
"kl": 0.1820068359375,
|
311 |
"learning_rate": 8.636868207865244e-05,
|
312 |
"loss": -0.03466903418302536,
|
313 |
-
"memory(GiB)":
|
314 |
"response_clip_ratio": 0.466796875,
|
315 |
"reward": 0.04069916973821819,
|
316 |
"reward_std": 0.11991005763411522,
|
@@ -326,7 +327,7 @@
|
|
326 |
"kl": 0.19287109375,
|
327 |
"learning_rate": 8.43120818934367e-05,
|
328 |
"loss": -0.03502114117145538,
|
329 |
-
"memory(GiB)":
|
330 |
"step": 20,
|
331 |
"train_speed(iter/s)": 0.000424
|
332 |
},
|
@@ -338,14 +339,14 @@
|
|
338 |
"kl": 0.17626953125,
|
339 |
"learning_rate": 8.213938048432697e-05,
|
340 |
"loss": -0.008662773296236992,
|
341 |
-
"memory(GiB)":
|
342 |
"response_clip_ratio": 0.5625,
|
343 |
"reward": 0.04996980866417289,
|
344 |
"reward_std": 0.13849420100450516,
|
345 |
"rewards/CosineReward": 0.049969930201768875,
|
346 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
347 |
"step": 21,
|
348 |
-
"train_speed(iter/s)": 0.
|
349 |
},
|
350 |
{
|
351 |
"clip_ratio": 5.869188044016482e-05,
|
@@ -354,7 +355,7 @@
|
|
354 |
"kl": 0.178955078125,
|
355 |
"learning_rate": 7.985792958513931e-05,
|
356 |
"loss": -0.008743642829358578,
|
357 |
-
"memory(GiB)":
|
358 |
"step": 22,
|
359 |
"train_speed(iter/s)": 0.000426
|
360 |
},
|
@@ -366,7 +367,7 @@
|
|
366 |
"kl": 0.1796875,
|
367 |
"learning_rate": 7.74754489035403e-05,
|
368 |
"loss": -0.03423420712351799,
|
369 |
-
"memory(GiB)":
|
370 |
"response_clip_ratio": 0.583984375,
|
371 |
"reward": 0.034468831261619925,
|
372 |
"reward_std": 0.11841745302081108,
|
@@ -380,7 +381,7 @@
|
|
380 |
"grad_norm": 0.014131724834442139,
|
381 |
"learning_rate": 7.500000000000001e-05,
|
382 |
"loss": -0.03426633030176163,
|
383 |
-
"memory(GiB)":
|
384 |
"step": 24,
|
385 |
"train_speed(iter/s)": 0.000427
|
386 |
},
|
@@ -395,7 +396,7 @@
|
|
395 |
"eval_reward_std": 0.10456253588199615,
|
396 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
397 |
"eval_rewards/RepetitionPenalty": 0.0,
|
398 |
-
"eval_runtime": 1045.
|
399 |
"eval_samples_per_second": 0.001,
|
400 |
"eval_steps_per_second": 0.001,
|
401 |
"step": 24
|
@@ -408,7 +409,7 @@
|
|
408 |
"kl": 0.1800537109375,
|
409 |
"learning_rate": 7.243995901002312e-05,
|
410 |
"loss": -0.02097315341234207,
|
411 |
-
"memory(GiB)":
|
412 |
"response_clip_ratio": 0.6171875,
|
413 |
"reward": 0.03010205877944827,
|
414 |
"reward_std": 0.10742511600255966,
|
@@ -424,7 +425,7 @@
|
|
424 |
"kl": 0.18408203125,
|
425 |
"learning_rate": 6.980398830195785e-05,
|
426 |
"loss": -0.02103913575410843,
|
427 |
-
"memory(GiB)":
|
428 |
"step": 26,
|
429 |
"train_speed(iter/s)": 0.000421
|
430 |
},
|
@@ -436,7 +437,7 @@
|
|
436 |
"kl": 0.174560546875,
|
437 |
"learning_rate": 6.710100716628344e-05,
|
438 |
"loss": -0.03593946248292923,
|
439 |
-
"memory(GiB)":
|
440 |
"response_clip_ratio": 0.513671875,
|
441 |
"reward": 0.04752760287374258,
|
442 |
"reward_std": 0.14935147762298584,
|
@@ -452,7 +453,7 @@
|
|
452 |
"kl": 0.182373046875,
|
453 |
"learning_rate": 6.434016163555452e-05,
|
454 |
"loss": -0.03595500811934471,
|
455 |
-
"memory(GiB)":
|
456 |
"step": 28,
|
457 |
"train_speed(iter/s)": 0.000422
|
458 |
}
|
|
|
1 |
{
|
2 |
+
"best_global_step": 24,
|
3 |
"best_metric": 0.04339282959699631,
|
4 |
"best_model_checkpoint": "/mnt/nvme5n1p1/trained_grpo_distill_14b_rl_70_s3/v3-20250330-200345/checkpoint-24",
|
5 |
"epoch": 6.842105263157895,
|
|
|
7 |
"global_step": 28,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": false,
|
11 |
"log_history": [
|
12 |
{
|
13 |
"clip_ratio": 0.0,
|
|
|
17 |
"kl": 0.0,
|
18 |
"learning_rate": 1.6666666666666667e-05,
|
19 |
"loss": -0.11016345024108887,
|
20 |
+
"memory(GiB)": 180.29,
|
21 |
"response_clip_ratio": 0.11328125,
|
22 |
"reward": -0.002658387296833098,
|
23 |
"reward_std": 0.06134121119976044,
|
24 |
"rewards/CosineReward": -0.0026579967816360295,
|
25 |
"rewards/RepetitionPenalty": -3.8975886695880035e-07,
|
26 |
"step": 1,
|
27 |
+
"train_speed(iter/s)": 0.000241
|
28 |
},
|
29 |
{
|
30 |
"clip_ratio": 0.0,
|
|
|
33 |
"kl": 0.0,
|
34 |
"learning_rate": 3.3333333333333335e-05,
|
35 |
"loss": -0.11016345024108887,
|
36 |
+
"memory(GiB)": 180.29,
|
37 |
"step": 2,
|
38 |
+
"train_speed(iter/s)": 0.000466
|
39 |
},
|
40 |
{
|
41 |
"clip_ratio": 1.3441811461234465e-05,
|
|
|
45 |
"kl": 9.50181856751442e-07,
|
46 |
"learning_rate": 5e-05,
|
47 |
"loss": -0.06604708731174469,
|
48 |
+
"memory(GiB)": 180.29,
|
49 |
"response_clip_ratio": 0.13671875,
|
50 |
"reward": 0.0006296975770965219,
|
51 |
"reward_std": 0.07172460854053497,
|
|
|
61 |
"kl": 1.1101365089416504e-05,
|
62 |
"learning_rate": 6.666666666666667e-05,
|
63 |
"loss": -0.06727766245603561,
|
64 |
+
"memory(GiB)": 180.29,
|
65 |
"step": 4,
|
66 |
"train_speed(iter/s)": 0.000458
|
67 |
},
|
|
|
73 |
"kl": 0.00017762184143066406,
|
74 |
"learning_rate": 8.333333333333334e-05,
|
75 |
"loss": -0.09315311908721924,
|
76 |
+
"memory(GiB)": 180.29,
|
77 |
"response_clip_ratio": 0.119140625,
|
78 |
"reward": -0.005135859013535082,
|
79 |
"reward_std": 0.07994875870645046,
|
|
|
87 |
"grad_norm": 0.18263348937034607,
|
88 |
"learning_rate": 0.0001,
|
89 |
"loss": -0.1041698157787323,
|
90 |
+
"memory(GiB)": 180.29,
|
91 |
"step": 6,
|
92 |
+
"train_speed(iter/s)": 0.000458
|
93 |
},
|
94 |
{
|
95 |
"epoch": 1.4210526315789473,
|
|
|
102 |
"eval_reward_std": 0.08769983053207397,
|
103 |
"eval_rewards/CosineReward": 0.012996694073081017,
|
104 |
"eval_rewards/RepetitionPenalty": 0.0,
|
105 |
+
"eval_runtime": 1030.1223,
|
106 |
"eval_samples_per_second": 0.001,
|
107 |
"eval_steps_per_second": 0.001,
|
108 |
"step": 6
|
|
|
115 |
"kl": 0.017406463623046875,
|
116 |
"learning_rate": 9.991540791356342e-05,
|
117 |
"loss": -0.051375165581703186,
|
118 |
+
"memory(GiB)": 180.29,
|
119 |
"response_clip_ratio": 0.1484375,
|
120 |
"reward": 0.004909618757665157,
|
121 |
"reward_std": 0.08167182095348835,
|
|
|
131 |
"kl": 0.089599609375,
|
132 |
"learning_rate": 9.966191788709716e-05,
|
133 |
"loss": -0.05105742812156677,
|
134 |
+
"memory(GiB)": 180.29,
|
135 |
"step": 8,
|
136 |
"train_speed(iter/s)": 0.000433
|
137 |
},
|
|
|
143 |
"kl": 0.0963134765625,
|
144 |
"learning_rate": 9.924038765061042e-05,
|
145 |
"loss": -0.05842069163918495,
|
146 |
+
"memory(GiB)": 180.29,
|
147 |
"response_clip_ratio": 0.255859375,
|
148 |
"reward": 0.03643610421568155,
|
149 |
"reward_std": 0.11898956261575222,
|
|
|
159 |
"kl": 0.1185302734375,
|
160 |
"learning_rate": 9.865224352899119e-05,
|
161 |
"loss": -0.06491819024085999,
|
162 |
+
"memory(GiB)": 180.29,
|
163 |
"step": 10,
|
164 |
"train_speed(iter/s)": 0.000436
|
165 |
},
|
|
|
171 |
"kl": 0.1275634765625,
|
172 |
"learning_rate": 9.789947561577445e-05,
|
173 |
"loss": -0.04600231721997261,
|
174 |
+
"memory(GiB)": 180.29,
|
175 |
"response_clip_ratio": 0.361328125,
|
176 |
"reward": 0.023204635945148766,
|
177 |
"reward_std": 0.10593634657561779,
|
|
|
185 |
"grad_norm": 0.05781339108943939,
|
186 |
"learning_rate": 9.698463103929542e-05,
|
187 |
"loss": -0.05069056898355484,
|
188 |
+
"memory(GiB)": 180.29,
|
189 |
"step": 12,
|
190 |
"train_speed(iter/s)": 0.000439
|
191 |
},
|
|
|
200 |
"eval_reward_std": 0.10685288906097412,
|
201 |
"eval_rewards/CosineReward": 0.03234308212995529,
|
202 |
"eval_rewards/RepetitionPenalty": 0.0,
|
203 |
+
"eval_runtime": 1025.9045,
|
204 |
"eval_samples_per_second": 0.001,
|
205 |
"eval_steps_per_second": 0.001,
|
206 |
"step": 12
|
|
|
213 |
"kl": 0.151123046875,
|
214 |
"learning_rate": 9.591080534401371e-05,
|
215 |
"loss": -0.02191038429737091,
|
216 |
+
"memory(GiB)": 180.29,
|
217 |
"response_clip_ratio": 0.419921875,
|
218 |
"reward": 0.035983758978545666,
|
219 |
"reward_std": 0.11553369648754597,
|
|
|
229 |
"kl": 0.169189453125,
|
230 |
"learning_rate": 9.468163201617062e-05,
|
231 |
"loss": -0.022672578692436218,
|
232 |
+
"memory(GiB)": 180.29,
|
233 |
"step": 14,
|
234 |
"train_speed(iter/s)": 0.000427
|
235 |
},
|
|
|
241 |
"kl": 0.166748046875,
|
242 |
"learning_rate": 9.330127018922194e-05,
|
243 |
"loss": -0.059799157083034515,
|
244 |
+
"memory(GiB)": 180.29,
|
245 |
"response_clip_ratio": 0.4765625,
|
246 |
"reward": 0.03584331553429365,
|
247 |
"reward_std": 0.11829411797225475,
|
|
|
257 |
"kl": 0.16748046875,
|
258 |
"learning_rate": 9.177439057064683e-05,
|
259 |
"loss": -0.06071458384394646,
|
260 |
+
"memory(GiB)": 180.29,
|
261 |
"step": 16,
|
262 |
"train_speed(iter/s)": 0.000431
|
263 |
},
|
|
|
269 |
"kl": 0.1787109375,
|
270 |
"learning_rate": 9.01061596377522e-05,
|
271 |
"loss": -0.04504441097378731,
|
272 |
+
"memory(GiB)": 180.29,
|
273 |
"response_clip_ratio": 0.5625,
|
274 |
"reward": 0.027318883687257767,
|
275 |
"reward_std": 0.10441224090754986,
|
|
|
283 |
"grad_norm": 0.005998397711664438,
|
284 |
"learning_rate": 8.83022221559489e-05,
|
285 |
"loss": -0.045487549155950546,
|
286 |
+
"memory(GiB)": 180.29,
|
287 |
"step": 18,
|
288 |
"train_speed(iter/s)": 0.000432
|
289 |
},
|
|
|
298 |
"eval_reward_std": 0.10691346973180771,
|
299 |
"eval_rewards/CosineReward": 0.03729327768087387,
|
300 |
"eval_rewards/RepetitionPenalty": 0.0,
|
301 |
+
"eval_runtime": 1041.2321,
|
302 |
"eval_samples_per_second": 0.001,
|
303 |
"eval_steps_per_second": 0.001,
|
304 |
"step": 18
|
|
|
311 |
"kl": 0.1820068359375,
|
312 |
"learning_rate": 8.636868207865244e-05,
|
313 |
"loss": -0.03466903418302536,
|
314 |
+
"memory(GiB)": 180.29,
|
315 |
"response_clip_ratio": 0.466796875,
|
316 |
"reward": 0.04069916973821819,
|
317 |
"reward_std": 0.11991005763411522,
|
|
|
327 |
"kl": 0.19287109375,
|
328 |
"learning_rate": 8.43120818934367e-05,
|
329 |
"loss": -0.03502114117145538,
|
330 |
+
"memory(GiB)": 180.29,
|
331 |
"step": 20,
|
332 |
"train_speed(iter/s)": 0.000424
|
333 |
},
|
|
|
339 |
"kl": 0.17626953125,
|
340 |
"learning_rate": 8.213938048432697e-05,
|
341 |
"loss": -0.008662773296236992,
|
342 |
+
"memory(GiB)": 180.29,
|
343 |
"response_clip_ratio": 0.5625,
|
344 |
"reward": 0.04996980866417289,
|
345 |
"reward_std": 0.13849420100450516,
|
346 |
"rewards/CosineReward": 0.049969930201768875,
|
347 |
"rewards/RepetitionPenalty": -1.1864573679076784e-07,
|
348 |
"step": 21,
|
349 |
+
"train_speed(iter/s)": 0.000407
|
350 |
},
|
351 |
{
|
352 |
"clip_ratio": 5.869188044016482e-05,
|
|
|
355 |
"kl": 0.178955078125,
|
356 |
"learning_rate": 7.985792958513931e-05,
|
357 |
"loss": -0.008743642829358578,
|
358 |
+
"memory(GiB)": 180.29,
|
359 |
"step": 22,
|
360 |
"train_speed(iter/s)": 0.000426
|
361 |
},
|
|
|
367 |
"kl": 0.1796875,
|
368 |
"learning_rate": 7.74754489035403e-05,
|
369 |
"loss": -0.03423420712351799,
|
370 |
+
"memory(GiB)": 180.29,
|
371 |
"response_clip_ratio": 0.583984375,
|
372 |
"reward": 0.034468831261619925,
|
373 |
"reward_std": 0.11841745302081108,
|
|
|
381 |
"grad_norm": 0.014131724834442139,
|
382 |
"learning_rate": 7.500000000000001e-05,
|
383 |
"loss": -0.03426633030176163,
|
384 |
+
"memory(GiB)": 180.29,
|
385 |
"step": 24,
|
386 |
"train_speed(iter/s)": 0.000427
|
387 |
},
|
|
|
396 |
"eval_reward_std": 0.10456253588199615,
|
397 |
"eval_rewards/CosineReward": 0.04339282959699631,
|
398 |
"eval_rewards/RepetitionPenalty": 0.0,
|
399 |
+
"eval_runtime": 1045.0616,
|
400 |
"eval_samples_per_second": 0.001,
|
401 |
"eval_steps_per_second": 0.001,
|
402 |
"step": 24
|
|
|
409 |
"kl": 0.1800537109375,
|
410 |
"learning_rate": 7.243995901002312e-05,
|
411 |
"loss": -0.02097315341234207,
|
412 |
+
"memory(GiB)": 180.29,
|
413 |
"response_clip_ratio": 0.6171875,
|
414 |
"reward": 0.03010205877944827,
|
415 |
"reward_std": 0.10742511600255966,
|
|
|
425 |
"kl": 0.18408203125,
|
426 |
"learning_rate": 6.980398830195785e-05,
|
427 |
"loss": -0.02103913575410843,
|
428 |
+
"memory(GiB)": 180.29,
|
429 |
"step": 26,
|
430 |
"train_speed(iter/s)": 0.000421
|
431 |
},
|
|
|
437 |
"kl": 0.174560546875,
|
438 |
"learning_rate": 6.710100716628344e-05,
|
439 |
"loss": -0.03593946248292923,
|
440 |
+
"memory(GiB)": 180.29,
|
441 |
"response_clip_ratio": 0.513671875,
|
442 |
"reward": 0.04752760287374258,
|
443 |
"reward_std": 0.14935147762298584,
|
|
|
453 |
"kl": 0.182373046875,
|
454 |
"learning_rate": 6.434016163555452e-05,
|
455 |
"loss": -0.03595500811934471,
|
456 |
+
"memory(GiB)": 180.29,
|
457 |
"step": 28,
|
458 |
"train_speed(iter/s)": 0.000422
|
459 |
}
|
checkpoint-28/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7039ba231031019d68d36961179e879852b9f8a6de15562e5b792330bcbb4412
|
3 |
size 9809
|
checkpoint-30/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"
|
27 |
-
"gate_proj",
|
28 |
-
"k_proj",
|
29 |
"o_proj",
|
30 |
-
"up_proj",
|
31 |
"q_proj",
|
32 |
-
"
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"down_proj",
|
|
|
|
|
27 |
"o_proj",
|
|
|
28 |
"q_proj",
|
29 |
+
"v_proj",
|
30 |
+
"up_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
checkpoint-30/global_step30/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e55637d10a606b1c4548d11222cf2203be3732556568beb057466cdc664ad4d
|
3 |
+
size 51616527
|
checkpoint-30/global_step30/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2f918a8d78d02a6bb0c3b78aa535ba635edd803e57b1d29a398f8044ee1f23d
|
3 |
+
size 51616015
|
checkpoint-30/global_step30/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43e9a17698868042b6c5d413425d6800e585ce6747e333ca3c1f237999bee440
|
3 |
+
size 51616527
|
checkpoint-30/global_step30/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5466499266f4b34f807e3c8219f5d1dc961145b81040242cdc2604123afb29f
|
3 |
+
size 51616015
|
checkpoint-30/global_step30/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cf7bbc813d8c4a9ddd84f99f272e1a3597a0aedd614f42be8b26292f36bd0d4
|
3 |
+
size 51616527
|
checkpoint-30/global_step30/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0b43f39939991ea04c843fb1af82098f35e53f66d1ed284bfe74b2b156e3c90
|
3 |
+
size 51616015
|
checkpoint-30/global_step30/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c6064fa63ee43597a575223e5d27d1a9f78c46ca5d2249d2c286a8e11546906
|
3 |
+
size 51616517
|
checkpoint-30/global_step30/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:476e35fb26447a6dab0120c7bb123daf6c56d682fa095ba3520478075d361f49
|
3 |
+
size 51616005
|
checkpoint-30/rng_state_10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e2c4ae8a7a72d5fa3fffa5b1790f7c05f343652ff279b267cb02c1ea1450019
|
3 |
+
size 16404
|
checkpoint-30/rng_state_11.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12a2477af9f9146885ae739aae7fce8caed34fdcc7b6ba670110eaf5af9bbd4f
|
3 |
+
size 16468
|
checkpoint-30/rng_state_12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6e02e4e933e0565bcac02de812b35f312d7d3c1fa524e7ec565c0b58168b2c5
|
3 |
+
size 16340
|
checkpoint-30/rng_state_13.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83f60cc7fc25a6bf581d13962550c94c3f732a7fcebe112c13d88d7be34ff3ee
|
3 |
+
size 16468
|
checkpoint-30/rng_state_14.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e89c5c47e96e9a670905d6a024b717057f7e744cb15d841066c2543d38d493ca
|
3 |
+
size 16404
|
checkpoint-30/rng_state_15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62a09586fa73abe0c5bd3addd127d400057b84dc07190df99c680834ca075d87
|
3 |
+
size 16404
|
checkpoint-30/rng_state_8.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:806ea740b8aa5b01e84b7e8a2de7b547cd7ef04618091a007b4e2b7e2edcc6cb
|
3 |
+
size 16389
|
checkpoint-30/rng_state_9.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c96fddfdb910bf5037b513e22fd76243525edf1f04813a9013acd71733a822f
|
3 |
+
size 16389
|