tranbaninh commited on
Commit
02638e1
·
verified ·
1 Parent(s): fdab27f

End of training

Browse files
adapter_config.json CHANGED
@@ -24,13 +24,13 @@
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
27
- "gate_proj",
28
- "down_proj",
29
  "v_proj",
30
- "q_proj",
31
  "o_proj",
32
  "k_proj",
33
- "up_proj"
 
 
 
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
 
24
  "rank_pattern": {},
25
  "revision": null,
26
  "target_modules": [
 
 
27
  "v_proj",
 
28
  "o_proj",
29
  "k_proj",
30
+ "gate_proj",
31
+ "up_proj",
32
+ "q_proj",
33
+ "down_proj"
34
  ],
35
  "task_type": "CAUSAL_LM",
36
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:232e7e8a88bc8d6a7ad1c9a02dd8ae4c482b7dd03897fe558551bb106f1ff028
3
  size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bad1efde990dbc16a8a08a683016a42d4e555d1877ccfe293dd47291f130a9c
3
  size 35237104
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0013348124106414617,
4
- "train_runtime": 223.6386,
5
  "train_samples": 14,
6
- "train_samples_per_second": 1.431,
7
- "train_steps_per_second": 0.089
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 2.7043432623941042e-06,
4
+ "train_runtime": 967.2338,
5
  "train_samples": 14,
6
+ "train_samples_per_second": 0.331,
7
+ "train_steps_per_second": 0.021
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0013348124106414617,
4
- "train_runtime": 223.6386,
5
  "train_samples": 14,
6
- "train_samples_per_second": 1.431,
7
- "train_steps_per_second": 0.089
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 2.7043432623941042e-06,
4
+ "train_runtime": 967.2338,
5
  "train_samples": 14,
6
+ "train_samples_per_second": 0.331,
7
+ "train_steps_per_second": 0.021
8
  }
trainer_state.json CHANGED
@@ -9,203 +9,203 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 224.375,
13
  "epoch": 0.5714285714285714,
14
- "grad_norm": 4.6907958984375,
15
- "kl": 0.7289290763437748,
16
  "learning_rate": 4.965903258506806e-07,
17
- "loss": 0.0007,
18
- "reward": 2.210346505045891,
19
- "reward_std": 1.094197740778327,
20
- "rewards/concensus_correctness_reward_func": 0.23987499997019768,
21
- "rewards/consensus_reward_func": 0.25,
22
  "rewards/cumulative_reward_2": 0.0,
23
- "rewards/final_correctness_reward_func": 0.1875,
24
- "rewards/question_recreation_reward_func": 0.37503394670784473,
25
  "rewards/soft_format_reward_func": 0.0,
26
- "rewards/strict_format_reward_func": 0.21875,
27
- "rewards/xmlcount_reward_func": 0.9391875043511391,
28
  "step": 2
29
  },
30
  {
31
- "completion_length": 106.0,
32
  "epoch": 1.0,
33
- "grad_norm": 3.8641750812530518,
34
- "kl": 0.3693273241321246,
35
  "learning_rate": 4.698684378016222e-07,
36
- "loss": 0.0003,
37
- "reward": 3.373856246471405,
38
- "reward_std": 2.5780710726976395,
39
- "rewards/concensus_correctness_reward_func": 0.992999995748202,
40
- "rewards/consensus_reward_func": 0.16666666666666666,
41
  "rewards/cumulative_reward_2": 0.0,
42
- "rewards/final_correctness_reward_func": 0.3333333333333333,
43
- "rewards/question_recreation_reward_func": 0.5558145766456922,
44
  "rewards/soft_format_reward_func": 0.0,
45
- "rewards/strict_format_reward_func": 0.25,
46
- "rewards/xmlcount_reward_func": 1.075041671593984,
47
  "step": 4
48
  },
49
  {
50
- "completion_length": 190.5,
51
  "epoch": 1.5714285714285714,
52
- "grad_norm": 3.4989094734191895,
53
- "kl": 1.3430762365460396,
54
  "learning_rate": 4.193203929064353e-07,
55
- "loss": 0.0013,
56
- "reward": 1.9955407828092575,
57
- "reward_std": 1.9218887463212013,
58
- "rewards/concensus_correctness_reward_func": 0.3619999960064888,
59
- "rewards/consensus_reward_func": 0.3125,
60
  "rewards/cumulative_reward_2": 0.0,
61
- "rewards/final_correctness_reward_func": 0.25,
62
- "rewards/question_recreation_reward_func": 0.22154081566259265,
63
- "rewards/soft_format_reward_func": 0.0,
64
- "rewards/strict_format_reward_func": 0.203125,
65
- "rewards/xmlcount_reward_func": 0.6463750079274178,
66
  "step": 6
67
  },
68
  {
69
- "completion_length": 197.29166666666666,
70
  "epoch": 2.0,
71
- "grad_norm": 1.6900211572647095,
72
- "kl": 10.67092294494311,
73
  "learning_rate": 3.5042385616324236e-07,
74
- "loss": 0.008,
75
- "reward": 2.3348856767018638,
76
- "reward_std": 0.928510844707489,
77
- "rewards/concensus_correctness_reward_func": 0.15933332840601602,
78
- "rewards/consensus_reward_func": 0.16666666666666666,
79
  "rewards/cumulative_reward_2": 0.0,
80
- "rewards/final_correctness_reward_func": 0.3333333333333333,
81
- "rewards/question_recreation_reward_func": 0.5513440370559692,
82
  "rewards/soft_format_reward_func": 0.0,
83
- "rewards/strict_format_reward_func": 0.1875,
84
- "rewards/xmlcount_reward_func": 0.9367083311080933,
85
  "step": 8
86
  },
87
  {
88
- "completion_length": 176.40625,
89
  "epoch": 2.571428571428571,
90
- "grad_norm": 3.9968183040618896,
91
- "kl": 0.845702612772584,
92
  "learning_rate": 2.706448363680831e-07,
93
- "loss": 0.0008,
94
- "reward": 2.57753723859787,
95
- "reward_std": 1.3181768357753754,
96
- "rewards/concensus_correctness_reward_func": 0.3593749888241291,
97
- "rewards/consensus_reward_func": 0.375,
98
  "rewards/cumulative_reward_2": 0.0,
99
  "rewards/final_correctness_reward_func": 0.25,
100
- "rewards/question_recreation_reward_func": 0.40291227400302887,
101
  "rewards/soft_format_reward_func": 0.0,
102
- "rewards/strict_format_reward_func": 0.1875,
103
- "rewards/xmlcount_reward_func": 1.0027499943971634,
104
  "step": 10
105
  },
106
  {
107
- "completion_length": 157.41666666666666,
108
  "epoch": 3.0,
109
- "grad_norm": 2.858097553253174,
110
- "kl": 0.4163093989094098,
111
  "learning_rate": 1.886286282148002e-07,
112
- "loss": 0.0003,
113
- "reward": 2.235823631286621,
114
- "reward_std": 1.7302586088577907,
115
- "rewards/concensus_correctness_reward_func": 0.23999999463558197,
116
- "rewards/consensus_reward_func": 0.25,
117
  "rewards/cumulative_reward_2": 0.0,
118
- "rewards/final_correctness_reward_func": 0.3333333333333333,
119
- "rewards/question_recreation_reward_func": 0.4821569509804249,
120
  "rewards/soft_format_reward_func": 0.0,
121
- "rewards/strict_format_reward_func": 0.16666666666666666,
122
- "rewards/xmlcount_reward_func": 0.7636666595935822,
123
  "step": 12
124
  },
125
  {
126
- "completion_length": 119.46875,
127
  "epoch": 3.571428571428571,
128
- "grad_norm": 3.6504130363464355,
129
- "kl": 0.5300018824636936,
130
  "learning_rate": 1.1326296046939333e-07,
131
- "loss": 0.0005,
132
- "reward": 2.6175443530082703,
133
- "reward_std": 1.588691521435976,
134
- "rewards/concensus_correctness_reward_func": 0.3619999960064888,
135
- "rewards/consensus_reward_func": 0.3125,
136
  "rewards/cumulative_reward_2": 0.0,
137
- "rewards/final_correctness_reward_func": 0.1875,
138
- "rewards/question_recreation_reward_func": 0.49226302560418844,
139
  "rewards/soft_format_reward_func": 0.0,
140
- "rewards/strict_format_reward_func": 0.265625,
141
- "rewards/xmlcount_reward_func": 0.9976562485098839,
142
  "step": 14
143
  },
144
  {
145
- "completion_length": 152.16666666666666,
146
  "epoch": 4.0,
147
- "grad_norm": 1.7039144039154053,
148
- "kl": 0.3265587662657102,
149
  "learning_rate": 5.271487265090163e-08,
150
- "loss": 0.0002,
151
- "reward": 2.018751641114553,
152
- "reward_std": 1.0230716715256374,
153
- "rewards/concensus_correctness_reward_func": 0.07999999821186066,
154
- "rewards/consensus_reward_func": 0.08333333333333333,
155
  "rewards/cumulative_reward_2": 0.0,
156
- "rewards/final_correctness_reward_func": 0.3333333333333333,
157
- "rewards/question_recreation_reward_func": 0.2857933019598325,
158
  "rewards/soft_format_reward_func": 0.0,
159
- "rewards/strict_format_reward_func": 0.25,
160
- "rewards/xmlcount_reward_func": 0.9862916668256124,
161
  "step": 16
162
  },
163
  {
164
- "completion_length": 166.28125,
165
  "epoch": 4.571428571428571,
166
- "grad_norm": 3.133758544921875,
167
- "kl": 0.742509638890624,
168
  "learning_rate": 1.3545689574841341e-08,
169
- "loss": 0.0007,
170
- "reward": 2.7243216931819916,
171
- "reward_std": 2.6493867076933384,
172
- "rewards/concensus_correctness_reward_func": 0.804749995470047,
173
- "rewards/consensus_reward_func": 0.1875,
174
  "rewards/cumulative_reward_2": 0.0,
175
- "rewards/final_correctness_reward_func": 0.25,
176
- "rewards/question_recreation_reward_func": 0.4959466829895973,
177
  "rewards/soft_format_reward_func": 0.0,
178
- "rewards/strict_format_reward_func": 0.25,
179
- "rewards/xmlcount_reward_func": 0.7361249998211861,
180
  "step": 18
181
  },
182
  {
183
- "completion_length": 182.70833333333334,
184
  "epoch": 5.0,
185
- "grad_norm": 1.9381123781204224,
186
- "kl": 0.42725201696157455,
187
  "learning_rate": 0.0,
188
- "loss": 0.0003,
189
- "reward": 2.9027516742547355,
190
- "reward_std": 2.645435392856598,
191
- "rewards/concensus_correctness_reward_func": 0.9129999975363413,
192
- "rewards/consensus_reward_func": 0.08333333333333333,
193
  "rewards/cumulative_reward_2": 0.0,
194
- "rewards/final_correctness_reward_func": 0.3333333333333333,
195
- "rewards/question_recreation_reward_func": 0.3405850703517596,
196
  "rewards/soft_format_reward_func": 0.0,
197
- "rewards/strict_format_reward_func": 0.25,
198
- "rewards/xmlcount_reward_func": 0.9824999968210856,
199
  "step": 20
200
  },
201
  {
202
  "epoch": 5.0,
203
  "step": 20,
204
  "total_flos": 0.0,
205
- "train_loss": 0.0013348124106414617,
206
- "train_runtime": 223.6386,
207
- "train_samples_per_second": 1.431,
208
- "train_steps_per_second": 0.089
209
  }
210
  ],
211
  "logging_steps": 2,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 357.59375,
13
  "epoch": 0.5714285714285714,
14
+ "grad_norm": 1.550470232963562,
15
+ "kl": 0.0067819759860867634,
16
  "learning_rate": 4.965903258506806e-07,
17
+ "loss": 0.0,
18
+ "reward": 0.359556189738214,
19
+ "reward_std": 0.5430519804358482,
20
+ "rewards/concensus_correctness_reward_func": 0.015124999918043613,
21
+ "rewards/consensus_reward_func": 0.0,
22
  "rewards/cumulative_reward_2": 0.0,
23
+ "rewards/final_correctness_reward_func": 0.0,
24
+ "rewards/question_recreation_reward_func": 0.20552494376897812,
25
  "rewards/soft_format_reward_func": 0.0,
26
+ "rewards/strict_format_reward_func": 0.0,
27
+ "rewards/xmlcount_reward_func": 0.13890625163912773,
28
  "step": 2
29
  },
30
  {
31
+ "completion_length": 385.2083333333333,
32
  "epoch": 1.0,
33
+ "grad_norm": 1.0296989679336548,
34
+ "kl": 0.0013993951433803886,
35
  "learning_rate": 4.698684378016222e-07,
36
+ "loss": 0.0,
37
+ "reward": 0.7451420103510221,
38
+ "reward_std": 0.9848504811525345,
39
+ "rewards/concensus_correctness_reward_func": 0.010416666666666666,
40
+ "rewards/consensus_reward_func": 0.0,
41
  "rewards/cumulative_reward_2": 0.0,
42
+ "rewards/final_correctness_reward_func": 0.4166666666666667,
43
+ "rewards/question_recreation_reward_func": 0.3348920363932848,
44
  "rewards/soft_format_reward_func": 0.0,
45
+ "rewards/strict_format_reward_func": 0.0,
46
+ "rewards/xmlcount_reward_func": -0.01683332274357478,
47
  "step": 4
48
  },
49
  {
50
+ "completion_length": 387.0,
51
  "epoch": 1.5714285714285714,
52
+ "grad_norm": 2.4776289463043213,
53
+ "kl": 0.004400491248816252,
54
  "learning_rate": 4.193203929064353e-07,
55
+ "loss": 0.0,
56
+ "reward": 0.3268889347091317,
57
+ "reward_std": 0.5764277145499364,
58
+ "rewards/concensus_correctness_reward_func": 0.0,
59
+ "rewards/consensus_reward_func": 0.0,
60
  "rewards/cumulative_reward_2": 0.0,
61
+ "rewards/final_correctness_reward_func": 0.0,
62
+ "rewards/question_recreation_reward_func": 0.24620142811909318,
63
+ "rewards/soft_format_reward_func": 0.015625,
64
+ "rewards/strict_format_reward_func": 0.0,
65
+ "rewards/xmlcount_reward_func": 0.06506249727681279,
66
  "step": 6
67
  },
68
  {
69
+ "completion_length": 301.625,
70
  "epoch": 2.0,
71
+ "grad_norm": 4.630437850952148,
72
+ "kl": 0.008956898062024266,
73
  "learning_rate": 3.5042385616324236e-07,
74
+ "loss": 0.0,
75
+ "reward": 0.669816846648852,
76
+ "reward_std": 0.9645149211088816,
77
+ "rewards/concensus_correctness_reward_func": 0.0,
78
+ "rewards/consensus_reward_func": 0.0,
79
  "rewards/cumulative_reward_2": 0.0,
80
+ "rewards/final_correctness_reward_func": 0.16666666666666666,
81
+ "rewards/question_recreation_reward_func": 0.28952519533534843,
82
  "rewards/soft_format_reward_func": 0.0,
83
+ "rewards/strict_format_reward_func": 0.0,
84
+ "rewards/xmlcount_reward_func": 0.21362500016887984,
85
  "step": 8
86
  },
87
  {
88
+ "completion_length": 360.84375,
89
  "epoch": 2.571428571428571,
90
+ "grad_norm": 3.974666118621826,
91
+ "kl": 0.0014987692411523312,
92
  "learning_rate": 2.706448363680831e-07,
93
+ "loss": 0.0,
94
+ "reward": 1.445620215497911,
95
+ "reward_std": 1.892067939043045,
96
+ "rewards/concensus_correctness_reward_func": 0.625,
97
+ "rewards/consensus_reward_func": 0.0625,
98
  "rewards/cumulative_reward_2": 0.0,
99
  "rewards/final_correctness_reward_func": 0.25,
100
+ "rewards/question_recreation_reward_func": 0.2774638633709401,
101
  "rewards/soft_format_reward_func": 0.0,
102
+ "rewards/strict_format_reward_func": 0.0,
103
+ "rewards/xmlcount_reward_func": 0.23065625689923763,
104
  "step": 10
105
  },
106
  {
107
+ "completion_length": 430.0833333333333,
108
  "epoch": 3.0,
109
+ "grad_norm": 1.2759487628936768,
110
+ "kl": 0.0015422078043532868,
111
  "learning_rate": 1.886286282148002e-07,
112
+ "loss": 0.0,
113
+ "reward": 0.15630086387197176,
114
+ "reward_std": 0.9415244202439984,
115
+ "rewards/concensus_correctness_reward_func": 0.0,
116
+ "rewards/consensus_reward_func": 0.0,
117
  "rewards/cumulative_reward_2": 0.0,
118
+ "rewards/final_correctness_reward_func": 0.08333333333333333,
119
+ "rewards/question_recreation_reward_func": 0.2368425317108631,
120
  "rewards/soft_format_reward_func": 0.0,
121
+ "rewards/strict_format_reward_func": 0.0,
122
+ "rewards/xmlcount_reward_func": -0.16387500117222467,
123
  "step": 12
124
  },
125
  {
126
+ "completion_length": 353.65625,
127
  "epoch": 3.571428571428571,
128
+ "grad_norm": 3.5632550716400146,
129
+ "kl": 0.0016433208002126776,
130
  "learning_rate": 1.1326296046939333e-07,
131
+ "loss": 0.0,
132
+ "reward": 0.6132600959390402,
133
+ "reward_std": 0.8666674289852381,
134
+ "rewards/concensus_correctness_reward_func": 0.0,
135
+ "rewards/consensus_reward_func": 0.0,
136
  "rewards/cumulative_reward_2": 0.0,
137
+ "rewards/final_correctness_reward_func": 0.25,
138
+ "rewards/question_recreation_reward_func": 0.2559788469225168,
139
  "rewards/soft_format_reward_func": 0.0,
140
+ "rewards/strict_format_reward_func": 0.0,
141
+ "rewards/xmlcount_reward_func": 0.10728124715387821,
142
  "step": 14
143
  },
144
  {
145
+ "completion_length": 477.875,
146
  "epoch": 4.0,
147
+ "grad_norm": 2.0089707374572754,
148
+ "kl": 0.0014073919446673244,
149
  "learning_rate": 5.271487265090163e-08,
150
+ "loss": 0.0,
151
+ "reward": 0.03820791778465112,
152
+ "reward_std": 0.6295135350277027,
153
+ "rewards/concensus_correctness_reward_func": 0.020166666557391483,
154
+ "rewards/consensus_reward_func": 0.0,
155
  "rewards/cumulative_reward_2": 0.0,
156
+ "rewards/final_correctness_reward_func": 0.0,
157
+ "rewards/question_recreation_reward_func": 0.166707926740249,
158
  "rewards/soft_format_reward_func": 0.0,
159
+ "rewards/strict_format_reward_func": 0.0,
160
+ "rewards/xmlcount_reward_func": -0.14866666992505392,
161
  "step": 16
162
  },
163
  {
164
+ "completion_length": 394.34375,
165
  "epoch": 4.571428571428571,
166
+ "grad_norm": 2.704482316970825,
167
+ "kl": 0.0014581629366148263,
168
  "learning_rate": 1.3545689574841341e-08,
169
+ "loss": 0.0,
170
+ "reward": 0.3895467920228839,
171
+ "reward_std": 0.7705125007778406,
172
+ "rewards/concensus_correctness_reward_func": 0.014937499538064003,
173
+ "rewards/consensus_reward_func": 0.0,
174
  "rewards/cumulative_reward_2": 0.0,
175
+ "rewards/final_correctness_reward_func": 0.125,
176
+ "rewards/question_recreation_reward_func": 0.30085928039625287,
177
  "rewards/soft_format_reward_func": 0.0,
178
+ "rewards/strict_format_reward_func": 0.0,
179
+ "rewards/xmlcount_reward_func": -0.051249995827674866,
180
  "step": 18
181
  },
182
  {
183
+ "completion_length": 338.4583333333333,
184
  "epoch": 5.0,
185
+ "grad_norm": 1.2185847759246826,
186
+ "kl": 0.0017583037454945345,
187
  "learning_rate": 0.0,
188
+ "loss": 0.0,
189
+ "reward": 2.36764890452226,
190
+ "reward_std": 2.5429103871186576,
191
+ "rewards/concensus_correctness_reward_func": 1.6666666666666667,
192
+ "rewards/consensus_reward_func": 0.16666666666666666,
193
  "rewards/cumulative_reward_2": 0.0,
194
+ "rewards/final_correctness_reward_func": 0.16666666666666666,
195
+ "rewards/question_recreation_reward_func": 0.21544045334060988,
196
  "rewards/soft_format_reward_func": 0.0,
197
+ "rewards/strict_format_reward_func": 0.0,
198
+ "rewards/xmlcount_reward_func": 0.15220833321412405,
199
  "step": 20
200
  },
201
  {
202
  "epoch": 5.0,
203
  "step": 20,
204
  "total_flos": 0.0,
205
+ "train_loss": 2.7043432623941042e-06,
206
+ "train_runtime": 967.2338,
207
+ "train_samples_per_second": 0.331,
208
+ "train_steps_per_second": 0.021
209
  }
210
  ],
211
  "logging_steps": 2,