xaobai commited on
Commit
8e33675
·
verified ·
1 Parent(s): b975eed

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +4 -4
  2. model.safetensors +1 -1
  3. train_results.json +4 -4
  4. trainer_state.json +117 -117
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 34.63017091080546,
4
- "train_runtime": 149.3925,
5
  "train_samples": 160,
6
- "train_samples_per_second": 2.142,
7
- "train_steps_per_second": 0.134
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 6.360948879132048,
4
+ "train_runtime": 156.959,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 2.039,
7
+ "train_steps_per_second": 0.127
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc07ae4a8fbf0c1414e320168f24997972793a8b2fedef346b971fadc159d16e
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e66757d33a4ef759c72a4e60b1818a0deada2eb3bd3ac7a0577ce18a0ead3b75
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 34.63017091080546,
4
- "train_runtime": 149.3925,
5
  "train_samples": 160,
6
- "train_samples_per_second": 2.142,
7
- "train_steps_per_second": 0.134
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 6.360948879132048,
4
+ "train_runtime": 156.959,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 2.039,
7
+ "train_steps_per_second": 0.127
8
  }
trainer_state.json CHANGED
@@ -10,203 +10,203 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 205.625,
14
  "epoch": 0.1,
15
- "grad_norm": 99.96902465820312,
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
  "loss": -0.0,
19
- "reward": 4.236496409517713,
20
- "reward_std": 1.159384369617328,
21
- "rewards/concensus_correctness_reward_func": 1.258937492966652,
22
- "rewards/consensus_reward_func": 1.25,
23
  "rewards/cumulative_reward_2": 0.0,
24
- "rewards/final_correctness_reward_func": 0.3125,
25
- "rewards/question_recreation_reward_func": 0.6489027393981814,
26
  "rewards/soft_format_reward_func": 0.0,
27
- "rewards/strict_format_reward_func": 0.140625,
28
- "rewards/xmlcount_reward_func": 0.6255312513094395,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 143.25,
33
  "epoch": 0.2,
34
- "grad_norm": 484.384521484375,
35
- "kl": 36.32083459291607,
36
  "learning_rate": 4.698684378016222e-07,
37
- "loss": 0.0363,
38
- "reward": 5.732543356716633,
39
- "reward_std": 0.5354217339772731,
40
- "rewards/concensus_correctness_reward_func": 1.6823749765753746,
41
- "rewards/consensus_reward_func": 1.5625,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.25,
44
- "rewards/question_recreation_reward_func": 0.8452621176838875,
45
  "rewards/soft_format_reward_func": 0.0,
46
- "rewards/strict_format_reward_func": 0.3125,
47
- "rewards/xmlcount_reward_func": 1.0799062475562096,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 155.875,
52
  "epoch": 0.3,
53
- "grad_norm": 169.1754913330078,
54
- "kl": 67.9221153233666,
55
  "learning_rate": 4.193203929064353e-07,
56
- "loss": 0.0679,
57
- "reward": 6.084514006972313,
58
- "reward_std": 1.0455920902313665,
59
- "rewards/concensus_correctness_reward_func": 1.7961874902248383,
60
- "rewards/consensus_reward_func": 1.6875,
61
  "rewards/cumulative_reward_2": 0.0,
62
- "rewards/final_correctness_reward_func": 0.375,
63
- "rewards/question_recreation_reward_func": 0.8235765150748193,
64
  "rewards/soft_format_reward_func": 0.0,
65
- "rewards/strict_format_reward_func": 0.3125,
66
- "rewards/xmlcount_reward_func": 1.089749999344349,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 136.0625,
71
  "epoch": 0.4,
72
- "grad_norm": 203.0240478515625,
73
- "kl": 148.1647337176837,
74
  "learning_rate": 3.5042385616324236e-07,
75
- "loss": 0.1482,
76
- "reward": 5.774978786706924,
77
- "reward_std": 0.8584180986508727,
78
- "rewards/concensus_correctness_reward_func": 1.6950000068172812,
79
- "rewards/consensus_reward_func": 1.75,
80
  "rewards/cumulative_reward_2": 0.0,
81
- "rewards/final_correctness_reward_func": 0.125,
82
- "rewards/question_recreation_reward_func": 0.8123538055224344,
83
  "rewards/soft_format_reward_func": 0.0,
84
- "rewards/strict_format_reward_func": 0.3125,
85
- "rewards/xmlcount_reward_func": 1.0801249966025352,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 148.25,
90
  "epoch": 0.5,
91
- "grad_norm": 22671.439453125,
92
- "kl": 1653.8566977125593,
93
  "learning_rate": 2.706448363680831e-07,
94
- "loss": 1.6539,
95
- "reward": 6.164374992251396,
96
- "reward_std": 1.5881682708859444,
97
- "rewards/concensus_correctness_reward_func": 1.933499988168478,
98
- "rewards/consensus_reward_func": 1.625,
99
  "rewards/cumulative_reward_2": 0.0,
100
- "rewards/final_correctness_reward_func": 0.375,
101
- "rewards/question_recreation_reward_func": 0.8398749940097332,
102
  "rewards/soft_format_reward_func": 0.0,
103
- "rewards/strict_format_reward_func": 0.3125,
104
- "rewards/xmlcount_reward_func": 1.078499998897314,
105
  "step": 10
106
  },
107
  {
108
- "completion_length": 138.3125,
109
  "epoch": 0.6,
110
- "grad_norm": 17988.767578125,
111
- "kl": 1401.7687317871023,
112
  "learning_rate": 1.886286282148002e-07,
113
- "loss": 1.4018,
114
- "reward": 4.654909428209066,
115
- "reward_std": 1.6939838130492717,
116
- "rewards/concensus_correctness_reward_func": 1.2355624809861183,
117
- "rewards/consensus_reward_func": 1.375,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
120
- "rewards/question_recreation_reward_func": 0.7417220007628202,
121
  "rewards/soft_format_reward_func": 0.0,
122
- "rewards/strict_format_reward_func": 0.296875,
123
- "rewards/xmlcount_reward_func": 1.0057500004768372,
124
  "step": 12
125
  },
126
  {
127
- "completion_length": 122.75,
128
  "epoch": 0.7,
129
- "grad_norm": 112.23609924316406,
130
- "kl": 70.25626161438413,
131
  "learning_rate": 1.1326296046939333e-07,
132
- "loss": 0.0703,
133
- "reward": 5.5448384545743465,
134
- "reward_std": 0.9999404510381282,
135
- "rewards/concensus_correctness_reward_func": 1.8659375123679638,
136
  "rewards/consensus_reward_func": 1.5625,
137
  "rewards/cumulative_reward_2": 0.0,
138
- "rewards/final_correctness_reward_func": 0.1875,
139
- "rewards/question_recreation_reward_func": 0.750369711255189,
140
  "rewards/soft_format_reward_func": 0.0,
141
- "rewards/strict_format_reward_func": 0.25,
142
- "rewards/xmlcount_reward_func": 0.928531251847744,
143
  "step": 14
144
  },
145
  {
146
- "completion_length": 134.4375,
147
  "epoch": 0.8,
148
- "grad_norm": 18387000.0,
149
- "kl": 342814.56765106344,
150
  "learning_rate": 5.271487265090163e-08,
151
- "loss": 342.8146,
152
- "reward": 6.2455944791436195,
153
- "reward_std": 0.6316794383637898,
154
- "rewards/concensus_correctness_reward_func": 1.9734999909996986,
155
- "rewards/consensus_reward_func": 1.75,
156
  "rewards/cumulative_reward_2": 0.0,
157
- "rewards/final_correctness_reward_func": 0.375,
158
- "rewards/question_recreation_reward_func": 0.8281257301568985,
159
  "rewards/soft_format_reward_func": 0.0,
160
- "rewards/strict_format_reward_func": 0.265625,
161
- "rewards/xmlcount_reward_func": 1.0533437505364418,
162
  "step": 16
163
  },
164
  {
165
- "completion_length": 121.125,
166
  "epoch": 0.9,
167
- "grad_norm": 5167.2353515625,
168
- "kl": 102.93470847699791,
169
  "learning_rate": 1.3545689574841341e-08,
170
- "loss": 0.1029,
171
- "reward": 5.336302071809769,
172
- "reward_std": 1.0969595974311233,
173
- "rewards/concensus_correctness_reward_func": 1.5333124920725822,
174
- "rewards/consensus_reward_func": 1.5625,
175
  "rewards/cumulative_reward_2": 0.0,
176
- "rewards/final_correctness_reward_func": 0.0625,
177
- "rewards/question_recreation_reward_func": 0.8231771271675825,
178
  "rewards/soft_format_reward_func": 0.0,
179
- "rewards/strict_format_reward_func": 0.28125,
180
- "rewards/xmlcount_reward_func": 1.0735625009983778,
181
  "step": 18
182
  },
183
  {
184
- "completion_length": 124.8125,
185
  "epoch": 1.0,
186
- "grad_norm": 82.51132202148438,
187
- "kl": 5.90467467578128,
188
  "learning_rate": 0.0,
189
- "loss": 0.0059,
190
- "reward": 5.849171329289675,
191
- "reward_std": 0.5187715581487282,
192
- "rewards/concensus_correctness_reward_func": 1.7366249822080135,
193
- "rewards/consensus_reward_func": 1.8125,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
196
- "rewards/question_recreation_reward_func": 0.8312963852658868,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.296875,
199
- "rewards/xmlcount_reward_func": 1.171875,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 34.63017091080546,
207
- "train_runtime": 149.3925,
208
- "train_samples_per_second": 2.142,
209
- "train_steps_per_second": 0.134
210
  }
211
  ],
212
  "logging_steps": 2,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 223.9375,
14
  "epoch": 0.1,
15
+ "grad_norm": 203.7086944580078,
16
  "kl": 0.0,
17
  "learning_rate": 4.965903258506806e-07,
18
  "loss": -0.0,
19
+ "reward": 3.9444722017506137,
20
+ "reward_std": 1.790060242288746,
21
+ "rewards/concensus_correctness_reward_func": 1.4638749994337559,
22
+ "rewards/consensus_reward_func": 0.875,
23
  "rewards/cumulative_reward_2": 0.0,
24
+ "rewards/final_correctness_reward_func": 0.0625,
25
+ "rewards/question_recreation_reward_func": 0.6139097640989348,
26
  "rewards/soft_format_reward_func": 0.0,
27
+ "rewards/strict_format_reward_func": 0.1875,
28
+ "rewards/xmlcount_reward_func": 0.741687492467463,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 158.875,
33
  "epoch": 0.2,
34
+ "grad_norm": 39.68674850463867,
35
+ "kl": 1.1318362320307642,
36
  "learning_rate": 4.698684378016222e-07,
37
+ "loss": 0.0011,
38
+ "reward": 6.102113731205463,
39
+ "reward_std": 0.8478451119735837,
40
+ "rewards/concensus_correctness_reward_func": 1.8132499977946281,
41
+ "rewards/consensus_reward_func": 1.6875,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.25,
44
+ "rewards/question_recreation_reward_func": 0.9185200277715921,
45
  "rewards/soft_format_reward_func": 0.0,
46
+ "rewards/strict_format_reward_func": 0.296875,
47
+ "rewards/xmlcount_reward_func": 1.1359687484800816,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 188.875,
52
  "epoch": 0.3,
53
+ "grad_norm": 1694169.5,
54
+ "kl": 61022.59591866261,
55
  "learning_rate": 4.193203929064353e-07,
56
+ "loss": 61.0226,
57
+ "reward": 4.908621296286583,
58
+ "reward_std": 1.312034587652306,
59
+ "rewards/concensus_correctness_reward_func": 1.364124983549118,
60
+ "rewards/consensus_reward_func": 1.3125,
61
  "rewards/cumulative_reward_2": 0.0,
62
+ "rewards/final_correctness_reward_func": 0.125,
63
+ "rewards/question_recreation_reward_func": 0.7794337533414364,
64
  "rewards/soft_format_reward_func": 0.0,
65
+ "rewards/strict_format_reward_func": 0.265625,
66
+ "rewards/xmlcount_reward_func": 1.0619374997913837,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 151.875,
71
  "epoch": 0.4,
72
+ "grad_norm": 492.3763122558594,
73
+ "kl": 210.9650375645142,
74
  "learning_rate": 3.5042385616324236e-07,
75
+ "loss": 0.211,
76
+ "reward": 5.924358628690243,
77
+ "reward_std": 0.5515469368910999,
78
+ "rewards/concensus_correctness_reward_func": 1.8797499937936664,
79
+ "rewards/consensus_reward_func": 1.6875,
80
  "rewards/cumulative_reward_2": 0.0,
81
+ "rewards/final_correctness_reward_func": 0.1875,
82
+ "rewards/question_recreation_reward_func": 0.8393273764522746,
83
  "rewards/soft_format_reward_func": 0.0,
84
+ "rewards/strict_format_reward_func": 0.25,
85
+ "rewards/xmlcount_reward_func": 1.080281250178814,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 133.8125,
90
  "epoch": 0.5,
91
+ "grad_norm": 24558.71875,
92
+ "kl": 721.3866671086289,
93
  "learning_rate": 2.706448363680831e-07,
94
+ "loss": 0.7214,
95
+ "reward": 6.406012073159218,
96
+ "reward_std": 0.7330344214569777,
97
+ "rewards/concensus_correctness_reward_func": 2.0498749911785126,
98
+ "rewards/consensus_reward_func": 1.6875,
99
  "rewards/cumulative_reward_2": 0.0,
100
+ "rewards/final_correctness_reward_func": 0.4375,
101
+ "rewards/question_recreation_reward_func": 0.8526683263480663,
102
  "rewards/soft_format_reward_func": 0.0,
103
+ "rewards/strict_format_reward_func": 0.28125,
104
+ "rewards/xmlcount_reward_func": 1.0972187519073486,
105
  "step": 10
106
  },
107
  {
108
+ "completion_length": 135.65625,
109
  "epoch": 0.6,
110
+ "grad_norm": 64.38353729248047,
111
+ "kl": 263.8177743591368,
112
  "learning_rate": 1.886286282148002e-07,
113
+ "loss": 0.2638,
114
+ "reward": 5.379809558391571,
115
+ "reward_std": 0.5953573631122708,
116
+ "rewards/concensus_correctness_reward_func": 1.4174999743700027,
117
+ "rewards/consensus_reward_func": 1.5625,
118
  "rewards/cumulative_reward_2": 0.0,
119
  "rewards/final_correctness_reward_func": 0.0,
120
+ "rewards/question_recreation_reward_func": 0.8784658461809158,
121
  "rewards/soft_format_reward_func": 0.0,
122
+ "rewards/strict_format_reward_func": 0.375,
123
+ "rewards/xmlcount_reward_func": 1.1463437527418137,
124
  "step": 12
125
  },
126
  {
127
+ "completion_length": 162.0,
128
  "epoch": 0.7,
129
+ "grad_norm": 4812.80224609375,
130
+ "kl": 210.1550904882606,
131
  "learning_rate": 1.1326296046939333e-07,
132
+ "loss": 0.2102,
133
+ "reward": 5.389181062579155,
134
+ "reward_std": 1.1157905644795392,
135
+ "rewards/concensus_correctness_reward_func": 1.6220000125467777,
136
  "rewards/consensus_reward_func": 1.5625,
137
  "rewards/cumulative_reward_2": 0.0,
138
+ "rewards/final_correctness_reward_func": 0.125,
139
+ "rewards/question_recreation_reward_func": 0.8566497433930635,
140
  "rewards/soft_format_reward_func": 0.0,
141
+ "rewards/strict_format_reward_func": 0.21875,
142
+ "rewards/xmlcount_reward_func": 1.0042812526226044,
143
  "step": 14
144
  },
145
  {
146
+ "completion_length": 159.125,
147
  "epoch": 0.8,
148
+ "grad_norm": 692.3638305664062,
149
+ "kl": 134.42648913431913,
150
  "learning_rate": 5.271487265090163e-08,
151
+ "loss": 0.1344,
152
+ "reward": 6.275528252124786,
153
+ "reward_std": 0.8024359941482544,
154
+ "rewards/concensus_correctness_reward_func": 1.9732499942183495,
155
+ "rewards/consensus_reward_func": 1.875,
156
  "rewards/cumulative_reward_2": 0.0,
157
+ "rewards/final_correctness_reward_func": 0.25,
158
+ "rewards/question_recreation_reward_func": 0.8014658335596323,
159
  "rewards/soft_format_reward_func": 0.0,
160
+ "rewards/strict_format_reward_func": 0.328125,
161
+ "rewards/xmlcount_reward_func": 1.0476875007152557,
162
  "step": 16
163
  },
164
  {
165
+ "completion_length": 123.9375,
166
  "epoch": 0.9,
167
+ "grad_norm": 817.1598510742188,
168
+ "kl": 399.1941711329855,
169
  "learning_rate": 1.3545689574841341e-08,
170
+ "loss": 0.3992,
171
+ "reward": 4.676305454224348,
172
+ "reward_std": 0.88509076932678,
173
+ "rewards/concensus_correctness_reward_func": 1.2936249822378159,
174
+ "rewards/consensus_reward_func": 1.1875,
175
  "rewards/cumulative_reward_2": 0.0,
176
+ "rewards/final_correctness_reward_func": 0.25,
177
+ "rewards/question_recreation_reward_func": 0.7137429475260433,
178
  "rewards/soft_format_reward_func": 0.0,
179
+ "rewards/strict_format_reward_func": 0.234375,
180
+ "rewards/xmlcount_reward_func": 0.9970625024288893,
181
  "step": 18
182
  },
183
  {
184
+ "completion_length": 147.625,
185
  "epoch": 1.0,
186
+ "grad_norm": 1609.4205322265625,
187
+ "kl": 645.8300444511697,
188
  "learning_rate": 0.0,
189
+ "loss": 0.6458,
190
+ "reward": 4.875687658786774,
191
+ "reward_std": 1.68803179403767,
192
+ "rewards/concensus_correctness_reward_func": 1.3708749897778034,
193
+ "rewards/consensus_reward_func": 1.3125,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.0,
196
+ "rewards/question_recreation_reward_func": 0.8263127245008945,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.296875,
199
+ "rewards/xmlcount_reward_func": 1.0691250041127205,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": 6.360948879132048,
207
+ "train_runtime": 156.959,
208
+ "train_samples_per_second": 2.039,
209
+ "train_steps_per_second": 0.127
210
  }
211
  ],
212
  "logging_steps": 2,