hazentr commited on
Commit
ada236a
·
verified ·
1 Parent(s): 3cb4fd1

End of training

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44f3a685eb17c3e67b2edbe09a1dd10c49e344c3e8bf21b07081f4b556d340eb
3
  size 73911112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c7b5c4caaadadc9c09f859e0a310752d019928bf81db375200e537ba03898a3
3
  size 73911112
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.815025074047299e-07,
4
- "train_runtime": 1180.0416,
5
  "train_samples": 11,
6
- "train_samples_per_second": 0.271,
7
- "train_steps_per_second": 0.017
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 8.676245840888441e-07,
4
+ "train_runtime": 1079.3,
5
  "train_samples": 11,
6
+ "train_samples_per_second": 0.296,
7
+ "train_steps_per_second": 0.019
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.815025074047299e-07,
4
- "train_runtime": 1180.0416,
5
  "train_samples": 11,
6
- "train_samples_per_second": 0.271,
7
- "train_steps_per_second": 0.017
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 8.676245840888441e-07,
4
+ "train_runtime": 1079.3,
5
  "train_samples": 11,
6
+ "train_samples_per_second": 0.296,
7
+ "train_steps_per_second": 0.019
8
  }
trainer_state.json CHANGED
@@ -9,203 +9,203 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 508.21875,
13
  "epoch": 0.7272727272727273,
14
- "grad_norm": 0.4091319143772125,
15
- "kl": 0.0007149651682993863,
16
  "learning_rate": 4.965903258506806e-07,
17
  "loss": 0.0,
18
- "reward": 0.46734179742634296,
19
- "reward_std": 0.7034913524985313,
20
  "rewards/concensus_correctness_reward_func": 0.0,
21
  "rewards/consensus_reward_func": 0.125,
22
  "rewards/cumulative_reward_2": 0.0,
23
- "rewards/final_correctness_reward_func": 0.0,
24
- "rewards/question_recreation_reward_func": 0.4399667978286743,
25
  "rewards/soft_format_reward_func": 0.0,
26
  "rewards/strict_format_reward_func": 0.015625,
27
- "rewards/xmlcount_reward_func": -0.1132500059902668,
28
  "step": 2
29
  },
30
  {
31
- "completion_length": 532.5,
32
  "epoch": 1.3636363636363638,
33
- "grad_norm": 0.36167478561401367,
34
- "kl": 0.0010431851842440665,
35
  "learning_rate": 4.698684378016222e-07,
36
  "loss": 0.0,
37
- "reward": 1.2440873043877738,
38
- "reward_std": 1.0159797157560075,
39
- "rewards/concensus_correctness_reward_func": 0.0,
40
- "rewards/consensus_reward_func": 0.42857142857142855,
41
  "rewards/cumulative_reward_2": 0.0,
42
- "rewards/final_correctness_reward_func": 0.0,
43
- "rewards/question_recreation_reward_func": 0.38823013433388304,
44
  "rewards/soft_format_reward_func": 0.0,
45
- "rewards/strict_format_reward_func": 0.0,
46
- "rewards/xmlcount_reward_func": 0.42728572871003834,
47
  "step": 4
48
  },
49
  {
50
- "completion_length": 627.9285714285714,
51
  "epoch": 2.0,
52
- "grad_norm": 0.24820180237293243,
53
- "kl": 0.0006503500252230359,
54
  "learning_rate": 4.193203929064353e-07,
55
  "loss": 0.0,
56
- "reward": 0.3999569181885038,
57
- "reward_std": 1.3144908802849906,
58
- "rewards/concensus_correctness_reward_func": 0.0,
59
- "rewards/consensus_reward_func": 0.07142857142857142,
60
  "rewards/cumulative_reward_2": 0.0,
61
  "rewards/final_correctness_reward_func": 0.0,
62
- "rewards/question_recreation_reward_func": 0.4129926615527698,
63
  "rewards/soft_format_reward_func": 0.0,
64
  "rewards/strict_format_reward_func": 0.0,
65
- "rewards/xmlcount_reward_func": -0.08446427966867175,
66
  "step": 6
67
  },
68
  {
69
- "completion_length": 590.4375,
70
  "epoch": 2.7272727272727275,
71
- "grad_norm": 0.3695061206817627,
72
- "kl": 0.000727474907762371,
73
  "learning_rate": 3.5042385616324236e-07,
74
  "loss": 0.0,
75
- "reward": 0.5934450421482325,
76
- "reward_std": 0.9755645971745253,
77
- "rewards/concensus_correctness_reward_func": 0.0036249998956918716,
78
- "rewards/consensus_reward_func": 0.1875,
79
  "rewards/cumulative_reward_2": 0.0,
80
  "rewards/final_correctness_reward_func": 0.0,
81
- "rewards/question_recreation_reward_func": 0.5026637725532055,
82
  "rewards/soft_format_reward_func": 0.0,
83
- "rewards/strict_format_reward_func": 0.0,
84
- "rewards/xmlcount_reward_func": -0.10034374333918095,
85
  "step": 8
86
  },
87
  {
88
- "completion_length": 571.9285714285714,
89
  "epoch": 3.3636363636363638,
90
- "grad_norm": 0.3335147500038147,
91
- "kl": 0.0006040313164703548,
92
  "learning_rate": 2.706448363680831e-07,
93
  "loss": 0.0,
94
- "reward": 0.7774789035320282,
95
- "reward_std": 1.0534876585006714,
96
- "rewards/concensus_correctness_reward_func": 0.0,
97
- "rewards/consensus_reward_func": 0.14285714285714285,
98
  "rewards/cumulative_reward_2": 0.0,
99
  "rewards/final_correctness_reward_func": 0.0,
100
- "rewards/question_recreation_reward_func": 0.4371217553104673,
101
  "rewards/soft_format_reward_func": 0.0,
102
  "rewards/strict_format_reward_func": 0.0,
103
- "rewards/xmlcount_reward_func": 0.19750000536441803,
104
  "step": 10
105
  },
106
  {
107
- "completion_length": 544.1428571428571,
108
  "epoch": 4.0,
109
- "grad_norm": 0.41132229566574097,
110
- "kl": 0.0010735353820824198,
111
  "learning_rate": 1.886286282148002e-07,
112
  "loss": 0.0,
113
- "reward": 1.068406390292304,
114
- "reward_std": 0.9618105930941445,
115
- "rewards/concensus_correctness_reward_func": 0.0,
116
  "rewards/consensus_reward_func": 0.2857142857142857,
117
  "rewards/cumulative_reward_2": 0.0,
118
  "rewards/final_correctness_reward_func": 0.0,
119
- "rewards/question_recreation_reward_func": 0.41076353405203136,
120
- "rewards/soft_format_reward_func": 0.0,
121
- "rewards/strict_format_reward_func": 0.017857142857142856,
122
- "rewards/xmlcount_reward_func": 0.3540714383125305,
123
  "step": 12
124
  },
125
  {
126
- "completion_length": 576.3125,
127
  "epoch": 4.7272727272727275,
128
- "grad_norm": 0.3107761740684509,
129
- "kl": 0.0006248628851608373,
130
  "learning_rate": 1.1326296046939333e-07,
131
  "loss": 0.0,
132
- "reward": 0.7426850730553269,
133
- "reward_std": 0.7855152189731598,
134
- "rewards/concensus_correctness_reward_func": 0.0,
135
- "rewards/consensus_reward_func": 0.1875,
136
  "rewards/cumulative_reward_2": 0.0,
137
  "rewards/final_correctness_reward_func": 0.0,
138
- "rewards/question_recreation_reward_func": 0.42909131571650505,
139
- "rewards/soft_format_reward_func": 0.015625,
140
- "rewards/strict_format_reward_func": 0.015625,
141
- "rewards/xmlcount_reward_func": 0.09484375361353159,
142
  "step": 14
143
  },
144
  {
145
- "completion_length": 581.9642857142857,
146
  "epoch": 5.363636363636363,
147
- "grad_norm": 0.5137450695037842,
148
- "kl": 0.0006109345704317093,
149
  "learning_rate": 5.271487265090163e-08,
150
  "loss": 0.0,
151
- "reward": 0.569231852889061,
152
- "reward_std": 1.3280495234898158,
153
- "rewards/concensus_correctness_reward_func": 0.0069285716329302105,
154
- "rewards/consensus_reward_func": 0.2857142857142857,
155
  "rewards/cumulative_reward_2": 0.0,
156
  "rewards/final_correctness_reward_func": 0.0,
157
- "rewards/question_recreation_reward_func": 0.5462318648185048,
158
  "rewards/soft_format_reward_func": 0.0,
159
- "rewards/strict_format_reward_func": 0.0,
160
- "rewards/xmlcount_reward_func": -0.2696428426674434,
161
  "step": 16
162
  },
163
  {
164
- "completion_length": 565.7857142857143,
165
  "epoch": 6.0,
166
- "grad_norm": 0.2902137041091919,
167
- "kl": 0.0006822941941209137,
168
  "learning_rate": 1.3545689574841341e-08,
169
  "loss": 0.0,
170
- "reward": 0.3149234822818211,
171
- "reward_std": 1.0513127403599876,
172
- "rewards/concensus_correctness_reward_func": 0.0,
173
  "rewards/consensus_reward_func": 0.21428571428571427,
174
  "rewards/cumulative_reward_2": 0.0,
175
  "rewards/final_correctness_reward_func": 0.0,
176
- "rewards/question_recreation_reward_func": 0.4591734771217619,
177
  "rewards/soft_format_reward_func": 0.0,
178
- "rewards/strict_format_reward_func": 0.0,
179
- "rewards/xmlcount_reward_func": -0.3585357069969177,
180
  "step": 18
181
  },
182
  {
183
- "completion_length": 595.5625,
184
  "epoch": 6.7272727272727275,
185
- "grad_norm": 0.2633510231971741,
186
- "kl": 0.0006653657765127718,
187
  "learning_rate": 0.0,
188
  "loss": 0.0,
189
- "reward": 0.6152886636555195,
190
- "reward_std": 1.1535739079117775,
191
- "rewards/concensus_correctness_reward_func": 0.0,
192
- "rewards/consensus_reward_func": 0.25,
193
  "rewards/cumulative_reward_2": 0.0,
194
  "rewards/final_correctness_reward_func": 0.0,
195
- "rewards/question_recreation_reward_func": 0.3791011737193912,
196
  "rewards/soft_format_reward_func": 0.0,
197
  "rewards/strict_format_reward_func": 0.015625,
198
- "rewards/xmlcount_reward_func": -0.029437489807605743,
199
  "step": 20
200
  },
201
  {
202
  "epoch": 6.7272727272727275,
203
  "step": 20,
204
  "total_flos": 0.0,
205
- "train_loss": 6.815025074047299e-07,
206
- "train_runtime": 1180.0416,
207
- "train_samples_per_second": 0.271,
208
- "train_steps_per_second": 0.017
209
  }
210
  ],
211
  "logging_steps": 2,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 409.5,
13
  "epoch": 0.7272727272727273,
14
+ "grad_norm": 0.4343891441822052,
15
+ "kl": 0.0007821403632988222,
16
  "learning_rate": 4.965903258506806e-07,
17
  "loss": 0.0,
18
+ "reward": 0.758910913951695,
19
+ "reward_std": 0.9026924595236778,
20
  "rewards/concensus_correctness_reward_func": 0.0,
21
  "rewards/consensus_reward_func": 0.125,
22
  "rewards/cumulative_reward_2": 0.0,
23
+ "rewards/final_correctness_reward_func": 0.0625,
24
+ "rewards/question_recreation_reward_func": 0.35931719094514847,
25
  "rewards/soft_format_reward_func": 0.0,
26
  "rewards/strict_format_reward_func": 0.015625,
27
+ "rewards/xmlcount_reward_func": 0.19646874582394958,
28
  "step": 2
29
  },
30
  {
31
+ "completion_length": 466.92857142857144,
32
  "epoch": 1.3636363636363638,
33
+ "grad_norm": 0.4033401906490326,
34
+ "kl": 0.0007768731780483254,
35
  "learning_rate": 4.698684378016222e-07,
36
  "loss": 0.0,
37
+ "reward": 1.1353230103850365,
38
+ "reward_std": 1.2951693896736418,
39
+ "rewards/concensus_correctness_reward_func": 0.029428571462631226,
40
+ "rewards/consensus_reward_func": 0.5,
41
  "rewards/cumulative_reward_2": 0.0,
42
+ "rewards/final_correctness_reward_func": 0.07142857142857142,
43
+ "rewards/question_recreation_reward_func": 0.34993019487176624,
44
  "rewards/soft_format_reward_func": 0.0,
45
+ "rewards/strict_format_reward_func": 0.017857142857142856,
46
+ "rewards/xmlcount_reward_func": 0.1666785856442792,
47
  "step": 4
48
  },
49
  {
50
+ "completion_length": 426.7142857142857,
51
  "epoch": 2.0,
52
+ "grad_norm": 0.38663896918296814,
53
+ "kl": 0.0008058734404455338,
54
  "learning_rate": 4.193203929064353e-07,
55
  "loss": 0.0,
56
+ "reward": 0.9899007028767041,
57
+ "reward_std": 1.3681557519095284,
58
+ "rewards/concensus_correctness_reward_func": 0.052214286157063076,
59
+ "rewards/consensus_reward_func": 0.5,
60
  "rewards/cumulative_reward_2": 0.0,
61
  "rewards/final_correctness_reward_func": 0.0,
62
+ "rewards/question_recreation_reward_func": 0.3863649751458849,
63
  "rewards/soft_format_reward_func": 0.0,
64
  "rewards/strict_format_reward_func": 0.0,
65
+ "rewards/xmlcount_reward_func": 0.05132143199443817,
66
  "step": 6
67
  },
68
  {
69
+ "completion_length": 383.96875,
70
  "epoch": 2.7272727272727275,
71
+ "grad_norm": 0.4773660898208618,
72
+ "kl": 0.0008817740672384389,
73
  "learning_rate": 3.5042385616324236e-07,
74
  "loss": 0.0,
75
+ "reward": 0.8929834757000208,
76
+ "reward_std": 0.8532077670097351,
77
+ "rewards/concensus_correctness_reward_func": 0.0,
78
+ "rewards/consensus_reward_func": 0.25,
79
  "rewards/cumulative_reward_2": 0.0,
80
  "rewards/final_correctness_reward_func": 0.0,
81
+ "rewards/question_recreation_reward_func": 0.3532647155225277,
82
  "rewards/soft_format_reward_func": 0.0,
83
+ "rewards/strict_format_reward_func": 0.015625,
84
+ "rewards/xmlcount_reward_func": 0.27409375831484795,
85
  "step": 8
86
  },
87
  {
88
+ "completion_length": 529.7857142857143,
89
  "epoch": 3.3636363636363638,
90
+ "grad_norm": 1.429701566696167,
91
+ "kl": 0.0015694323062364543,
92
  "learning_rate": 2.706448363680831e-07,
93
  "loss": 0.0,
94
+ "reward": 1.2543298218931471,
95
+ "reward_std": 1.171411571758134,
96
+ "rewards/concensus_correctness_reward_func": 0.05885714292526245,
97
+ "rewards/consensus_reward_func": 0.5,
98
  "rewards/cumulative_reward_2": 0.0,
99
  "rewards/final_correctness_reward_func": 0.0,
100
+ "rewards/question_recreation_reward_func": 0.3772940848554884,
101
  "rewards/soft_format_reward_func": 0.0,
102
  "rewards/strict_format_reward_func": 0.0,
103
+ "rewards/xmlcount_reward_func": 0.3181785898549216,
104
  "step": 10
105
  },
106
  {
107
+ "completion_length": 459.2857142857143,
108
  "epoch": 4.0,
109
+ "grad_norm": 0.29276183247566223,
110
+ "kl": 0.0007172501812289868,
111
  "learning_rate": 1.886286282148002e-07,
112
  "loss": 0.0,
113
+ "reward": 0.7020246578114373,
114
+ "reward_std": 0.665808230638504,
115
+ "rewards/concensus_correctness_reward_func": 0.02278571469443185,
116
  "rewards/consensus_reward_func": 0.2857142857142857,
117
  "rewards/cumulative_reward_2": 0.0,
118
  "rewards/final_correctness_reward_func": 0.0,
119
+ "rewards/question_recreation_reward_func": 0.44752465401376995,
120
+ "rewards/soft_format_reward_func": 0.017857142857142856,
121
+ "rewards/strict_format_reward_func": 0.0,
122
+ "rewards/xmlcount_reward_func": -0.07185715011187963,
123
  "step": 12
124
  },
125
  {
126
+ "completion_length": 447.0625,
127
  "epoch": 4.7272727272727275,
128
+ "grad_norm": 0.4309176802635193,
129
+ "kl": 0.0008279064204543829,
130
  "learning_rate": 1.1326296046939333e-07,
131
  "loss": 0.0,
132
+ "reward": 1.3554431721568108,
133
+ "reward_std": 1.0588620342314243,
134
+ "rewards/concensus_correctness_reward_func": 0.025750000029802322,
135
+ "rewards/consensus_reward_func": 0.5,
136
  "rewards/cumulative_reward_2": 0.0,
137
  "rewards/final_correctness_reward_func": 0.0,
138
+ "rewards/question_recreation_reward_func": 0.4104431765154004,
139
+ "rewards/soft_format_reward_func": 0.0,
140
+ "rewards/strict_format_reward_func": 0.03125,
141
+ "rewards/xmlcount_reward_func": 0.38800000213086605,
142
  "step": 14
143
  },
144
  {
145
+ "completion_length": 485.85714285714283,
146
  "epoch": 5.363636363636363,
147
+ "grad_norm": 0.48923608660697937,
148
+ "kl": 0.0015958319917055114,
149
  "learning_rate": 5.271487265090163e-08,
150
  "loss": 0.0,
151
+ "reward": 1.2559186049870081,
152
+ "reward_std": 1.0251037265573228,
153
+ "rewards/concensus_correctness_reward_func": 0.029428571462631226,
154
+ "rewards/consensus_reward_func": 0.6428571428571429,
155
  "rewards/cumulative_reward_2": 0.0,
156
  "rewards/final_correctness_reward_func": 0.0,
157
+ "rewards/question_recreation_reward_func": 0.4494900501200131,
158
  "rewards/soft_format_reward_func": 0.0,
159
+ "rewards/strict_format_reward_func": 0.03571428571428571,
160
+ "rewards/xmlcount_reward_func": 0.0984285729272025,
161
  "step": 16
162
  },
163
  {
164
+ "completion_length": 415.5357142857143,
165
  "epoch": 6.0,
166
+ "grad_norm": 0.3563084900379181,
167
+ "kl": 0.0007512768747151963,
168
  "learning_rate": 1.3545689574841341e-08,
169
  "loss": 0.0,
170
+ "reward": 0.8440483425344739,
171
+ "reward_std": 0.7744771710463932,
172
+ "rewards/concensus_correctness_reward_func": 0.02278571469443185,
173
  "rewards/consensus_reward_func": 0.21428571428571427,
174
  "rewards/cumulative_reward_2": 0.0,
175
  "rewards/final_correctness_reward_func": 0.0,
176
+ "rewards/question_recreation_reward_func": 0.4731554814747402,
177
  "rewards/soft_format_reward_func": 0.0,
178
+ "rewards/strict_format_reward_func": 0.017857142857142856,
179
+ "rewards/xmlcount_reward_func": 0.11596427751438958,
180
  "step": 18
181
  },
182
  {
183
+ "completion_length": 445.71875,
184
  "epoch": 6.7272727272727275,
185
+ "grad_norm": 0.42769959568977356,
186
+ "kl": 0.0008261563343694434,
187
  "learning_rate": 0.0,
188
  "loss": 0.0,
189
+ "reward": 1.1794010773301125,
190
+ "reward_std": 0.8905268348753452,
191
+ "rewards/concensus_correctness_reward_func": 0.0950000025331974,
192
+ "rewards/consensus_reward_func": 0.4375,
193
  "rewards/cumulative_reward_2": 0.0,
194
  "rewards/final_correctness_reward_func": 0.0,
195
+ "rewards/question_recreation_reward_func": 0.3941511157900095,
196
  "rewards/soft_format_reward_func": 0.0,
197
  "rewards/strict_format_reward_func": 0.015625,
198
+ "rewards/xmlcount_reward_func": 0.2371250055730343,
199
  "step": 20
200
  },
201
  {
202
  "epoch": 6.7272727272727275,
203
  "step": 20,
204
  "total_flos": 0.0,
205
+ "train_loss": 8.676245840888441e-07,
206
+ "train_runtime": 1079.3,
207
+ "train_samples_per_second": 0.296,
208
+ "train_steps_per_second": 0.019
209
  }
210
  ],
211
  "logging_steps": 2,