hamedkharazmi commited on
Commit
803f928
·
verified ·
1 Parent(s): 99ed55b

End of training

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a467da6dfef24340866ff5a1d37beebfcda14ce6c55d69b6b79a1962c0a5919
3
  size 35237104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4c704ffdbac9452e06e97669259701f79604811b80818b66077ed79c2fefc7
3
  size 35237104
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00020942270639352502,
4
- "train_runtime": 1080.3296,
5
- "train_samples": 45,
6
- "train_samples_per_second": 0.296,
7
- "train_steps_per_second": 0.019
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.00019092126749455928,
4
+ "train_runtime": 976.6916,
5
+ "train_samples": 32,
6
+ "train_samples_per_second": 0.328,
7
+ "train_steps_per_second": 0.02
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.00020942270639352502,
4
- "train_runtime": 1080.3296,
5
- "train_samples": 45,
6
- "train_samples_per_second": 0.296,
7
- "train_steps_per_second": 0.019
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.00019092126749455928,
4
+ "train_runtime": 976.6916,
5
+ "train_samples": 32,
6
+ "train_samples_per_second": 0.328,
7
+ "train_steps_per_second": 0.02
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7111111111111112,
5
  "eval_steps": 500,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
@@ -9,209 +9,209 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 530.96875,
13
- "epoch": 0.17777777777777778,
14
- "grad_norm": 2.4989676475524902,
15
- "kl": 0.21733939740806818,
16
  "learning_rate": 4.965903258506806e-07,
17
  "loss": 0.0002,
18
- "reward": 1.9155707880854607,
19
- "reward_std": 0.9734031483530998,
20
- "rewards/concensus_correctness_reward_func": 0.0,
21
- "rewards/consensus_reward_func": 0.0,
22
  "rewards/cumulative_reward_2": 0.0,
23
- "rewards/final_correctness_reward_func": 1.0,
24
- "rewards/question_recreation_reward_func": 0.5404457412660122,
25
  "rewards/soft_format_reward_func": 0.0,
26
- "rewards/strict_format_reward_func": 0.0,
27
- "rewards/xmlcount_reward_func": 0.3751249983906746,
28
  "step": 2
29
  },
30
  {
31
- "completion_length": 341.71875,
32
- "epoch": 0.35555555555555557,
33
- "grad_norm": 2.766711711883545,
34
- "kl": 0.23056768160313368,
35
  "learning_rate": 4.698684378016222e-07,
36
  "loss": 0.0002,
37
- "reward": 2.9589971601963043,
38
- "reward_std": 2.357203096151352,
39
- "rewards/concensus_correctness_reward_func": 0.7106874957680702,
40
- "rewards/consensus_reward_func": 0.25,
41
  "rewards/cumulative_reward_2": 0.0,
42
- "rewards/final_correctness_reward_func": 0.875,
43
- "rewards/question_recreation_reward_func": 0.44834090769290924,
44
- "rewards/soft_format_reward_func": 0.0,
45
- "rewards/strict_format_reward_func": 0.0,
46
- "rewards/xmlcount_reward_func": 0.674968745559454,
47
  "step": 4
48
  },
49
  {
50
- "completion_length": 357.6875,
51
- "epoch": 0.5333333333333333,
52
- "grad_norm": 2.8552253246307373,
53
- "kl": 0.2545619700103998,
54
  "learning_rate": 4.193203929064353e-07,
55
- "loss": 0.0003,
56
- "reward": 1.4644720628857613,
57
- "reward_std": 2.1972636580467224,
58
- "rewards/concensus_correctness_reward_func": 0.1769999973475933,
59
- "rewards/consensus_reward_func": 0.1875,
60
  "rewards/cumulative_reward_2": 0.0,
61
- "rewards/final_correctness_reward_func": 0.4375,
62
- "rewards/question_recreation_reward_func": 0.3716908162459731,
63
  "rewards/soft_format_reward_func": 0.0,
64
- "rewards/strict_format_reward_func": 0.0625,
65
- "rewards/xmlcount_reward_func": 0.22828125581145287,
66
  "step": 6
67
  },
68
  {
69
- "completion_length": 532.40625,
70
- "epoch": 0.7111111111111111,
71
- "grad_norm": 1.9201836585998535,
72
- "kl": 0.15701038390398026,
73
  "learning_rate": 3.5042385616324236e-07,
74
  "loss": 0.0002,
75
- "reward": 1.613306075334549,
76
- "reward_std": 1.291415523737669,
77
- "rewards/concensus_correctness_reward_func": 0.09756249934434891,
78
- "rewards/consensus_reward_func": 0.0,
79
  "rewards/cumulative_reward_2": 0.0,
80
- "rewards/final_correctness_reward_func": 0.6875,
81
- "rewards/question_recreation_reward_func": 0.46139981178566813,
82
  "rewards/soft_format_reward_func": 0.0,
83
- "rewards/strict_format_reward_func": 0.015625,
84
- "rewards/xmlcount_reward_func": 0.3512187674641609,
85
  "step": 8
86
  },
87
  {
88
- "completion_length": 456.0,
89
- "epoch": 0.8888888888888888,
90
- "grad_norm": 2.7403085231781006,
91
- "kl": 0.21460377983748913,
92
  "learning_rate": 2.706448363680831e-07,
93
  "loss": 0.0002,
94
- "reward": 2.472989559173584,
95
- "reward_std": 1.5739564411342144,
96
- "rewards/concensus_correctness_reward_func": 0.12181250005960464,
97
  "rewards/consensus_reward_func": 0.0625,
98
  "rewards/cumulative_reward_2": 0.0,
99
- "rewards/final_correctness_reward_func": 1.125,
100
- "rewards/question_recreation_reward_func": 0.5545520260930061,
101
  "rewards/soft_format_reward_func": 0.0,
102
  "rewards/strict_format_reward_func": 0.015625,
103
- "rewards/xmlcount_reward_func": 0.5935000069439411,
104
  "step": 10
105
  },
106
  {
107
- "completion_length": 489.05,
108
- "epoch": 1.0,
109
- "grad_norm": 0.374750018119812,
110
- "kl": 0.2088455319404602,
111
  "learning_rate": 1.886286282148002e-07,
112
- "loss": 0.0001,
113
- "reward": 2.262187898159027,
114
- "reward_std": 1.726797068119049,
115
- "rewards/concensus_correctness_reward_func": 0.19279999732971193,
116
- "rewards/consensus_reward_func": 0.0,
117
  "rewards/cumulative_reward_2": 0.0,
118
- "rewards/final_correctness_reward_func": 1.3,
119
- "rewards/question_recreation_reward_func": 0.43668795824050904,
120
  "rewards/soft_format_reward_func": 0.0,
121
- "rewards/strict_format_reward_func": 0.025,
122
- "rewards/xmlcount_reward_func": 0.307699990272522,
123
  "step": 12
124
  },
125
  {
126
- "completion_length": 406.125,
127
- "epoch": 1.1777777777777778,
128
- "grad_norm": 2.49137544631958,
129
- "kl": 0.217071239836514,
130
  "learning_rate": 1.1326296046939333e-07,
131
  "loss": 0.0002,
132
- "reward": 2.1967150270938873,
133
- "reward_std": 1.0636013373732567,
134
- "rewards/concensus_correctness_reward_func": 0.10824999958276749,
135
- "rewards/consensus_reward_func": 0.1875,
136
  "rewards/cumulative_reward_2": 0.0,
137
- "rewards/final_correctness_reward_func": 0.5,
138
- "rewards/question_recreation_reward_func": 0.5827462673187256,
139
  "rewards/soft_format_reward_func": 0.0,
140
- "rewards/strict_format_reward_func": 0.046875,
141
- "rewards/xmlcount_reward_func": 0.7713437601923943,
142
  "step": 14
143
  },
144
  {
145
- "completion_length": 442.46875,
146
- "epoch": 1.3555555555555556,
147
- "grad_norm": 3.027392864227295,
148
- "kl": 0.22122804075479507,
149
  "learning_rate": 5.271487265090163e-08,
150
  "loss": 0.0002,
151
- "reward": 3.0656663402915,
152
- "reward_std": 2.4597536213696003,
153
- "rewards/concensus_correctness_reward_func": 0.8694375231862068,
154
- "rewards/consensus_reward_func": 0.0625,
155
  "rewards/cumulative_reward_2": 0.0,
156
- "rewards/final_correctness_reward_func": 1.125,
157
- "rewards/question_recreation_reward_func": 0.5497601740062237,
158
  "rewards/soft_format_reward_func": 0.0,
159
- "rewards/strict_format_reward_func": 0.0,
160
- "rewards/xmlcount_reward_func": 0.45896876603364944,
161
  "step": 16
162
  },
163
  {
164
- "completion_length": 501.03125,
165
- "epoch": 1.5333333333333332,
166
- "grad_norm": 1.7962393760681152,
167
- "kl": 0.22798582073301077,
168
  "learning_rate": 1.3545689574841341e-08,
169
  "loss": 0.0002,
170
- "reward": 2.4092493802309036,
171
- "reward_std": 1.1413909941911697,
172
- "rewards/concensus_correctness_reward_func": 0.18249999731779099,
173
- "rewards/consensus_reward_func": 0.0625,
174
  "rewards/cumulative_reward_2": 0.0,
175
- "rewards/final_correctness_reward_func": 1.125,
176
- "rewards/question_recreation_reward_func": 0.4226243682205677,
177
  "rewards/soft_format_reward_func": 0.0,
178
- "rewards/strict_format_reward_func": 0.015625,
179
- "rewards/xmlcount_reward_func": 0.6009999997913837,
180
  "step": 18
181
  },
182
  {
183
- "completion_length": 466.53125,
184
- "epoch": 1.7111111111111112,
185
- "grad_norm": 3.0079150199890137,
186
- "kl": 0.22341299941763282,
187
  "learning_rate": 0.0,
188
  "loss": 0.0002,
189
- "reward": 2.485578492283821,
190
- "reward_std": 1.303988529369235,
191
- "rewards/concensus_correctness_reward_func": 0.12087500095367432,
192
- "rewards/consensus_reward_func": 0.0625,
193
  "rewards/cumulative_reward_2": 0.0,
194
- "rewards/final_correctness_reward_func": 1.25,
195
- "rewards/question_recreation_reward_func": 0.5260785156860948,
196
  "rewards/soft_format_reward_func": 0.0,
197
- "rewards/strict_format_reward_func": 0.0,
198
- "rewards/xmlcount_reward_func": 0.5261249858886003,
199
  "step": 20
200
  },
201
  {
202
- "epoch": 1.7111111111111112,
203
  "step": 20,
204
  "total_flos": 0.0,
205
- "train_loss": 0.00020942270639352502,
206
- "train_runtime": 1080.3296,
207
- "train_samples_per_second": 0.296,
208
- "train_steps_per_second": 0.019
209
  }
210
  ],
211
  "logging_steps": 2,
212
  "max_steps": 20,
213
  "num_input_tokens_seen": 0,
214
- "num_train_epochs": 2,
215
  "save_steps": 25,
216
  "stateful_callbacks": {
217
  "TrainerControl": {
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5,
5
  "eval_steps": 500,
6
  "global_step": 20,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 469.4375,
13
+ "epoch": 0.25,
14
+ "grad_norm": 3.423996686935425,
15
+ "kl": 0.24222635943442583,
16
  "learning_rate": 4.965903258506806e-07,
17
  "loss": 0.0002,
18
+ "reward": 2.1027765348553658,
19
+ "reward_std": 2.2172846607863903,
20
+ "rewards/concensus_correctness_reward_func": 0.625,
21
+ "rewards/consensus_reward_func": 0.0625,
22
  "rewards/cumulative_reward_2": 0.0,
23
+ "rewards/final_correctness_reward_func": 0.5625,
24
+ "rewards/question_recreation_reward_func": 0.23833905532956123,
25
  "rewards/soft_format_reward_func": 0.0,
26
+ "rewards/strict_format_reward_func": 0.0625,
27
+ "rewards/xmlcount_reward_func": 0.551937498152256,
28
  "step": 2
29
  },
30
  {
31
+ "completion_length": 552.65625,
32
+ "epoch": 0.5,
33
+ "grad_norm": 2.1609866619110107,
34
+ "kl": 0.15485292114317417,
35
  "learning_rate": 4.698684378016222e-07,
36
  "loss": 0.0002,
37
+ "reward": 2.4315611086785793,
38
+ "reward_std": 2.3908147886395454,
39
+ "rewards/concensus_correctness_reward_func": 0.625,
40
+ "rewards/consensus_reward_func": 0.1875,
41
  "rewards/cumulative_reward_2": 0.0,
42
+ "rewards/final_correctness_reward_func": 0.75,
43
+ "rewards/question_recreation_reward_func": 0.6140298657119274,
44
+ "rewards/soft_format_reward_func": 0.015625,
45
+ "rewards/strict_format_reward_func": 0.015625,
46
+ "rewards/xmlcount_reward_func": 0.22378122806549072,
47
  "step": 4
48
  },
49
  {
50
+ "completion_length": 437.8125,
51
+ "epoch": 0.75,
52
+ "grad_norm": 3.9912779331207275,
53
+ "kl": 0.16812472557649016,
54
  "learning_rate": 4.193203929064353e-07,
55
+ "loss": 0.0002,
56
+ "reward": 1.896638199687004,
57
+ "reward_std": 1.4613052681088448,
58
+ "rewards/concensus_correctness_reward_func": 0.125,
59
+ "rewards/consensus_reward_func": 0.0625,
60
  "rewards/cumulative_reward_2": 0.0,
61
+ "rewards/final_correctness_reward_func": 0.5625,
62
+ "rewards/question_recreation_reward_func": 0.6531069651246071,
63
  "rewards/soft_format_reward_func": 0.0,
64
+ "rewards/strict_format_reward_func": 0.015625,
65
+ "rewards/xmlcount_reward_func": 0.4779062494635582,
66
  "step": 6
67
  },
68
  {
69
+ "completion_length": 411.84375,
70
+ "epoch": 1.0,
71
+ "grad_norm": 2.287323474884033,
72
+ "kl": 0.1949609499424696,
73
  "learning_rate": 3.5042385616324236e-07,
74
  "loss": 0.0002,
75
+ "reward": 1.4827010408043861,
76
+ "reward_std": 2.01706550270319,
77
+ "rewards/concensus_correctness_reward_func": 0.16625000163912773,
78
+ "rewards/consensus_reward_func": 0.1875,
79
  "rewards/cumulative_reward_2": 0.0,
80
+ "rewards/final_correctness_reward_func": 0.3125,
81
+ "rewards/question_recreation_reward_func": 0.4630447644740343,
82
  "rewards/soft_format_reward_func": 0.0,
83
+ "rewards/strict_format_reward_func": 0.0625,
84
+ "rewards/xmlcount_reward_func": 0.29090625420212746,
85
  "step": 8
86
  },
87
  {
88
+ "completion_length": 463.125,
89
+ "epoch": 1.25,
90
+ "grad_norm": 2.7352001667022705,
91
+ "kl": 0.18110767006874084,
92
  "learning_rate": 2.706448363680831e-07,
93
  "loss": 0.0002,
94
+ "reward": 1.263205625116825,
95
+ "reward_std": 1.7421698048710823,
96
+ "rewards/concensus_correctness_reward_func": 0.12274999916553497,
97
  "rewards/consensus_reward_func": 0.0625,
98
  "rewards/cumulative_reward_2": 0.0,
99
+ "rewards/final_correctness_reward_func": 0.5625,
100
+ "rewards/question_recreation_reward_func": 0.5095805916935205,
101
  "rewards/soft_format_reward_func": 0.0,
102
  "rewards/strict_format_reward_func": 0.015625,
103
+ "rewards/xmlcount_reward_func": -0.009749993681907654,
104
  "step": 10
105
  },
106
  {
107
+ "completion_length": 351.71875,
108
+ "epoch": 1.5,
109
+ "grad_norm": 2.7147586345672607,
110
+ "kl": 0.23088416643440723,
111
  "learning_rate": 1.886286282148002e-07,
112
+ "loss": 0.0002,
113
+ "reward": 1.9199633374810219,
114
+ "reward_std": 1.0638368101790547,
115
+ "rewards/concensus_correctness_reward_func": 0.1172500029206276,
116
+ "rewards/consensus_reward_func": 0.0625,
117
  "rewards/cumulative_reward_2": 0.0,
118
+ "rewards/final_correctness_reward_func": 0.75,
119
+ "rewards/question_recreation_reward_func": 0.42355713434517384,
120
  "rewards/soft_format_reward_func": 0.0,
121
+ "rewards/strict_format_reward_func": 0.0,
122
+ "rewards/xmlcount_reward_func": 0.5666562579572201,
123
  "step": 12
124
  },
125
  {
126
+ "completion_length": 437.03125,
127
+ "epoch": 1.75,
128
+ "grad_norm": 3.333895206451416,
129
+ "kl": 0.17828004295006394,
130
  "learning_rate": 1.1326296046939333e-07,
131
  "loss": 0.0002,
132
+ "reward": 1.6909483969211578,
133
+ "reward_std": 1.299113318324089,
134
+ "rewards/concensus_correctness_reward_func": 0.11999999731779099,
135
+ "rewards/consensus_reward_func": 0.125,
136
  "rewards/cumulative_reward_2": 0.0,
137
+ "rewards/final_correctness_reward_func": 0.1875,
138
+ "rewards/question_recreation_reward_func": 0.7196984179317951,
139
  "rewards/soft_format_reward_func": 0.0,
140
+ "rewards/strict_format_reward_func": 0.078125,
141
+ "rewards/xmlcount_reward_func": 0.46062498819082975,
142
  "step": 14
143
  },
144
  {
145
+ "completion_length": 535.40625,
146
+ "epoch": 2.0,
147
+ "grad_norm": 2.2899842262268066,
148
+ "kl": 0.16378580778837204,
149
  "learning_rate": 5.271487265090163e-08,
150
  "loss": 0.0002,
151
+ "reward": 3.338853247463703,
152
+ "reward_std": 4.333399765193462,
153
+ "rewards/concensus_correctness_reward_func": 1.875,
154
+ "rewards/consensus_reward_func": 0.0,
155
  "rewards/cumulative_reward_2": 0.0,
156
+ "rewards/final_correctness_reward_func": 0.9375,
157
+ "rewards/question_recreation_reward_func": 0.46935323998332024,
158
  "rewards/soft_format_reward_func": 0.0,
159
+ "rewards/strict_format_reward_func": 0.015625,
160
+ "rewards/xmlcount_reward_func": 0.04137500189244747,
161
  "step": 16
162
  },
163
  {
164
+ "completion_length": 453.0625,
165
+ "epoch": 2.25,
166
+ "grad_norm": 3.0789365768432617,
167
+ "kl": 0.19641543040052056,
168
  "learning_rate": 1.3545689574841341e-08,
169
  "loss": 0.0002,
170
+ "reward": 2.0624579712748528,
171
+ "reward_std": 1.581074796617031,
172
+ "rewards/concensus_correctness_reward_func": 0.3570624999701977,
173
+ "rewards/consensus_reward_func": 0.1875,
174
  "rewards/cumulative_reward_2": 0.0,
175
+ "rewards/final_correctness_reward_func": 0.6875,
176
+ "rewards/question_recreation_reward_func": 0.5363016966730356,
177
  "rewards/soft_format_reward_func": 0.0,
178
+ "rewards/strict_format_reward_func": 0.046875,
179
+ "rewards/xmlcount_reward_func": 0.2472187504172325,
180
  "step": 18
181
  },
182
  {
183
+ "completion_length": 435.53125,
184
+ "epoch": 2.5,
185
+ "grad_norm": 2.3260388374328613,
186
+ "kl": 0.1985931508243084,
187
  "learning_rate": 0.0,
188
  "loss": 0.0002,
189
+ "reward": 0.8565310798585415,
190
+ "reward_std": 1.408127337694168,
191
+ "rewards/concensus_correctness_reward_func": 0.0,
192
+ "rewards/consensus_reward_func": 0.125,
193
  "rewards/cumulative_reward_2": 0.0,
194
+ "rewards/final_correctness_reward_func": 0.25,
195
+ "rewards/question_recreation_reward_func": 0.33818733133375645,
196
  "rewards/soft_format_reward_func": 0.0,
197
+ "rewards/strict_format_reward_func": 0.03125,
198
+ "rewards/xmlcount_reward_func": 0.11209375411272049,
199
  "step": 20
200
  },
201
  {
202
+ "epoch": 2.5,
203
  "step": 20,
204
  "total_flos": 0.0,
205
+ "train_loss": 0.00019092126749455928,
206
+ "train_runtime": 976.6916,
207
+ "train_samples_per_second": 0.328,
208
+ "train_steps_per_second": 0.02
209
  }
210
  ],
211
  "logging_steps": 2,
212
  "max_steps": 20,
213
  "num_input_tokens_seen": 0,
214
+ "num_train_epochs": 3,
215
  "save_steps": 25,
216
  "stateful_callbacks": {
217
  "TrainerControl": {