chloeli commited on
Commit
28c454a
·
verified ·
1 Parent(s): a7762a5

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -6
  2. all_results.json +4 -9
  3. train_results.json +4 -4
  4. trainer_state.json +87 -87
README.md CHANGED
@@ -1,19 +1,17 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
3
- datasets: MelinaLaimon/stream-of-search
4
  library_name: transformers
5
- model_name: Qwen/Qwen2.5-0.5B-Instruct
6
  tags:
7
  - generated_from_trainer
8
- - alignment-handbook
9
  - trl
10
  - sft
11
  licence: license
12
  ---
13
 
14
- # Model Card for Qwen/Qwen2.5-0.5B-Instruct
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the [MelinaLaimon/stream-of-search](https://huggingface.co/datasets/MelinaLaimon/stream-of-search) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chloeli/huggingface/runs/g1ot5omk)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: Qwen/Qwen2.5-0.5B-Instruct
 
3
  library_name: transformers
4
+ model_name: qwen-2.5-0.5B-instruct-sft-lora-countdown-search-1k
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
10
  ---
11
 
12
+ # Model Card for qwen-2.5-0.5B-instruct-sft-lora-countdown-search-1k
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chloeli/huggingface/runs/fi2orchz)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "eval_loss": 0.042979925870895386,
3
- "eval_runtime": 84.0981,
4
- "eval_samples": 1000,
5
- "eval_samples_per_second": 11.891,
6
- "eval_steps_per_second": 1.486,
7
  "total_flos": 4505964279496704.0,
8
- "train_loss": 0.10702498173713684,
9
- "train_runtime": 369.1598,
10
  "train_samples": 1000,
11
- "train_samples_per_second": 2.709,
12
- "train_steps_per_second": 0.339
13
  }
 
1
  {
 
 
 
 
 
2
  "total_flos": 4505964279496704.0,
3
+ "train_loss": 0.1011103401184082,
4
+ "train_runtime": 450.8333,
5
  "train_samples": 1000,
6
+ "train_samples_per_second": 2.218,
7
+ "train_steps_per_second": 0.277
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 4505964279496704.0,
3
- "train_loss": 0.10702498173713684,
4
- "train_runtime": 369.1598,
5
  "train_samples": 1000,
6
- "train_samples_per_second": 2.709,
7
- "train_steps_per_second": 0.339
8
  }
 
1
  {
2
  "total_flos": 4505964279496704.0,
3
+ "train_loss": 0.1011103401184082,
4
+ "train_runtime": 450.8333,
5
  "train_samples": 1000,
6
+ "train_samples_per_second": 2.218,
7
+ "train_steps_per_second": 0.277
8
  }
trainer_state.json CHANGED
@@ -10,229 +10,229 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.008,
13
- "grad_norm": 1.0538341999053955,
14
  "learning_rate": 1.5384615384615387e-05,
15
- "loss": 0.5183,
16
- "mean_token_accuracy": 0.8873351514339447,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.04,
21
- "grad_norm": 0.8182336091995239,
22
  "learning_rate": 7.692307692307693e-05,
23
- "loss": 0.5607,
24
- "mean_token_accuracy": 0.8784972950816154,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 0.08,
29
- "grad_norm": 0.36211511492729187,
30
  "learning_rate": 0.00015384615384615385,
31
- "loss": 0.4512,
32
- "mean_token_accuracy": 0.8926618993282318,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.12,
37
- "grad_norm": 0.2841012179851532,
38
  "learning_rate": 0.00019984268150178167,
39
- "loss": 0.3501,
40
- "mean_token_accuracy": 0.9094601750373841,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.16,
45
- "grad_norm": 0.21819071471691132,
46
  "learning_rate": 0.00019807852804032305,
47
- "loss": 0.2307,
48
- "mean_token_accuracy": 0.9370548307895661,
49
  "step": 20
50
  },
51
  {
52
  "epoch": 0.2,
53
- "grad_norm": 0.1616477072238922,
54
  "learning_rate": 0.00019438833303083678,
55
- "loss": 0.1308,
56
- "mean_token_accuracy": 0.9601739048957825,
57
  "step": 25
58
  },
59
  {
60
  "epoch": 0.24,
61
- "grad_norm": 0.15014736354351044,
62
  "learning_rate": 0.00018884456359788724,
63
- "loss": 0.0893,
64
- "mean_token_accuracy": 0.9702124178409577,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.28,
69
- "grad_norm": 0.11667617410421371,
70
  "learning_rate": 0.00018155608689592604,
71
- "loss": 0.07,
72
- "mean_token_accuracy": 0.9737478196620941,
73
  "step": 35
74
  },
75
  {
76
  "epoch": 0.32,
77
- "grad_norm": 0.1287766844034195,
78
  "learning_rate": 0.0001726660322034027,
79
- "loss": 0.0599,
80
- "mean_token_accuracy": 0.9765061557292938,
81
  "step": 40
82
  },
83
  {
84
  "epoch": 0.36,
85
- "grad_norm": 0.15566693246364594,
86
  "learning_rate": 0.00016234898018587337,
87
- "loss": 0.0552,
88
- "mean_token_accuracy": 0.9771042406558991,
89
  "step": 45
90
  },
91
  {
92
  "epoch": 0.4,
93
- "grad_norm": 0.10337762534618378,
94
  "learning_rate": 0.00015080753452465296,
95
- "loss": 0.0524,
96
- "mean_token_accuracy": 0.977786386013031,
97
  "step": 50
98
  },
99
  {
100
  "epoch": 0.44,
101
- "grad_norm": 0.10235860198736191,
102
  "learning_rate": 0.000138268343236509,
103
- "loss": 0.046,
104
- "mean_token_accuracy": 0.9806892037391662,
105
  "step": 55
106
  },
107
  {
108
  "epoch": 0.48,
109
- "grad_norm": 0.08626584708690643,
110
  "learning_rate": 0.0001249776478167227,
111
- "loss": 0.0448,
112
- "mean_token_accuracy": 0.9810592472553253,
113
  "step": 60
114
  },
115
  {
116
  "epoch": 0.52,
117
- "grad_norm": 0.09696436673402786,
118
  "learning_rate": 0.00011119644761033078,
119
- "loss": 0.0404,
120
- "mean_token_accuracy": 0.9841841220855713,
121
  "step": 65
122
  },
123
  {
124
  "epoch": 0.56,
125
- "grad_norm": 0.10338141024112701,
126
  "learning_rate": 9.719537437241312e-05,
127
- "loss": 0.0444,
128
- "mean_token_accuracy": 0.9812480449676514,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.6,
133
- "grad_norm": 0.08246627449989319,
134
  "learning_rate": 8.324937766952638e-05,
135
- "loss": 0.0419,
136
- "mean_token_accuracy": 0.9823121666908264,
137
  "step": 75
138
  },
139
  {
140
  "epoch": 0.64,
141
- "grad_norm": 0.1194208487868309,
142
  "learning_rate": 6.963232548903853e-05,
143
- "loss": 0.0423,
144
- "mean_token_accuracy": 0.9828369557857514,
145
  "step": 80
146
  },
147
  {
148
  "epoch": 0.68,
149
- "grad_norm": 0.08132021129131317,
150
  "learning_rate": 5.6611626088244194e-05,
151
- "loss": 0.0414,
152
- "mean_token_accuracy": 0.9829596102237701,
153
  "step": 85
154
  },
155
  {
156
  "epoch": 0.72,
157
- "grad_norm": 0.07309089601039886,
158
  "learning_rate": 4.444297669803981e-05,
159
- "loss": 0.0439,
160
- "mean_token_accuracy": 0.9817502200603485,
161
  "step": 90
162
  },
163
  {
164
  "epoch": 0.76,
165
- "grad_norm": 0.07271327078342438,
166
  "learning_rate": 3.336534220479961e-05,
167
- "loss": 0.0389,
168
- "mean_token_accuracy": 0.9838515996932984,
169
  "step": 95
170
  },
171
  {
172
  "epoch": 0.8,
173
- "grad_norm": 0.08715524524450302,
174
  "learning_rate": 2.3596262417839255e-05,
175
- "loss": 0.0416,
176
- "mean_token_accuracy": 0.9820100963115692,
177
  "step": 100
178
  },
179
  {
180
  "epoch": 0.84,
181
- "grad_norm": 0.07210730016231537,
182
  "learning_rate": 1.5327580077171587e-05,
183
- "loss": 0.0436,
184
- "mean_token_accuracy": 0.9812193930149078,
185
  "step": 105
186
  },
187
  {
188
  "epoch": 0.88,
189
- "grad_norm": 0.07659150660037994,
190
  "learning_rate": 8.72167349386811e-06,
191
- "loss": 0.0415,
192
- "mean_token_accuracy": 0.9827143549919128,
193
  "step": 110
194
  },
195
  {
196
  "epoch": 0.92,
197
- "grad_norm": 0.06774768978357315,
198
  "learning_rate": 3.908267805490051e-06,
199
- "loss": 0.0417,
200
- "mean_token_accuracy": 0.9826048791408539,
201
  "step": 115
202
  },
203
  {
204
  "epoch": 0.96,
205
- "grad_norm": 0.0939616933465004,
206
  "learning_rate": 9.818874663554357e-07,
207
- "loss": 0.0411,
208
- "mean_token_accuracy": 0.98243528008461,
209
  "step": 120
210
  },
211
  {
212
  "epoch": 1.0,
213
- "grad_norm": 0.07217985391616821,
214
  "learning_rate": 0.0,
215
- "loss": 0.0404,
216
- "mean_token_accuracy": 0.9828806817531586,
217
  "step": 125
218
  },
219
  {
220
  "epoch": 1.0,
221
- "eval_loss": 0.04066629707813263,
222
- "eval_mean_token_accuracy": 0.9829456944465638,
223
- "eval_runtime": 84.041,
224
- "eval_samples_per_second": 11.899,
225
- "eval_steps_per_second": 1.487,
226
  "step": 125
227
  },
228
  {
229
  "epoch": 1.0,
230
  "step": 125,
231
  "total_flos": 4505964279496704.0,
232
- "train_loss": 0.10702498173713684,
233
- "train_runtime": 369.1598,
234
- "train_samples_per_second": 2.709,
235
- "train_steps_per_second": 0.339
236
  }
237
  ],
238
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.008,
13
+ "grad_norm": 1.0864604711532593,
14
  "learning_rate": 1.5384615384615387e-05,
15
+ "loss": 0.4927,
16
+ "mean_token_accuracy": 0.8949072062969208,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.04,
21
+ "grad_norm": 0.8317855596542358,
22
  "learning_rate": 7.692307692307693e-05,
23
+ "loss": 0.5321,
24
+ "mean_token_accuracy": 0.8870031237602234,
25
  "step": 5
26
  },
27
  {
28
  "epoch": 0.08,
29
+ "grad_norm": 0.35360249876976013,
30
  "learning_rate": 0.00015384615384615385,
31
+ "loss": 0.4318,
32
+ "mean_token_accuracy": 0.8946158409118652,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.12,
37
+ "grad_norm": 0.26475989818573,
38
  "learning_rate": 0.00019984268150178167,
39
+ "loss": 0.3243,
40
+ "mean_token_accuracy": 0.9152041494846344,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.16,
45
+ "grad_norm": 0.21459443867206573,
46
  "learning_rate": 0.00019807852804032305,
47
+ "loss": 0.2103,
48
+ "mean_token_accuracy": 0.941685950756073,
49
  "step": 20
50
  },
51
  {
52
  "epoch": 0.2,
53
+ "grad_norm": 0.15868614614009857,
54
  "learning_rate": 0.00019438833303083678,
55
+ "loss": 0.1155,
56
+ "mean_token_accuracy": 0.9637313485145569,
57
  "step": 25
58
  },
59
  {
60
  "epoch": 0.24,
61
+ "grad_norm": 0.15498687326908112,
62
  "learning_rate": 0.00018884456359788724,
63
+ "loss": 0.0816,
64
+ "mean_token_accuracy": 0.9711011052131653,
65
  "step": 30
66
  },
67
  {
68
  "epoch": 0.28,
69
+ "grad_norm": 0.11740818619728088,
70
  "learning_rate": 0.00018155608689592604,
71
+ "loss": 0.0622,
72
+ "mean_token_accuracy": 0.9760455787181854,
73
  "step": 35
74
  },
75
  {
76
  "epoch": 0.32,
77
+ "grad_norm": 0.10971173644065857,
78
  "learning_rate": 0.0001726660322034027,
79
+ "loss": 0.0545,
80
+ "mean_token_accuracy": 0.9776363372802734,
81
  "step": 40
82
  },
83
  {
84
  "epoch": 0.36,
85
+ "grad_norm": 0.08847042918205261,
86
  "learning_rate": 0.00016234898018587337,
87
+ "loss": 0.0519,
88
+ "mean_token_accuracy": 0.977908480167389,
89
  "step": 45
90
  },
91
  {
92
  "epoch": 0.4,
93
+ "grad_norm": 0.09057971835136414,
94
  "learning_rate": 0.00015080753452465296,
95
+ "loss": 0.0497,
96
+ "mean_token_accuracy": 0.9789802670478821,
97
  "step": 50
98
  },
99
  {
100
  "epoch": 0.44,
101
+ "grad_norm": 0.08262317627668381,
102
  "learning_rate": 0.000138268343236509,
103
+ "loss": 0.0434,
104
+ "mean_token_accuracy": 0.9816033959388732,
105
  "step": 55
106
  },
107
  {
108
  "epoch": 0.48,
109
+ "grad_norm": 0.08489084988832474,
110
  "learning_rate": 0.0001249776478167227,
111
+ "loss": 0.043,
112
+ "mean_token_accuracy": 0.9817807137966156,
113
  "step": 60
114
  },
115
  {
116
  "epoch": 0.52,
117
+ "grad_norm": 0.07146206498146057,
118
  "learning_rate": 0.00011119644761033078,
119
+ "loss": 0.0397,
120
+ "mean_token_accuracy": 0.9835689246654511,
121
  "step": 65
122
  },
123
  {
124
  "epoch": 0.56,
125
+ "grad_norm": 0.08493078500032425,
126
  "learning_rate": 9.719537437241312e-05,
127
+ "loss": 0.0432,
128
+ "mean_token_accuracy": 0.981769073009491,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.6,
133
+ "grad_norm": 0.06853792816400528,
134
  "learning_rate": 8.324937766952638e-05,
135
+ "loss": 0.0416,
136
+ "mean_token_accuracy": 0.9825034320354462,
137
  "step": 75
138
  },
139
  {
140
  "epoch": 0.64,
141
+ "grad_norm": 0.07850378751754761,
142
  "learning_rate": 6.963232548903853e-05,
143
+ "loss": 0.0416,
144
+ "mean_token_accuracy": 0.9826254367828369,
145
  "step": 80
146
  },
147
  {
148
  "epoch": 0.68,
149
+ "grad_norm": 0.10064064711332321,
150
  "learning_rate": 5.6611626088244194e-05,
151
+ "loss": 0.0411,
152
+ "mean_token_accuracy": 0.9827791035175324,
153
  "step": 85
154
  },
155
  {
156
  "epoch": 0.72,
157
+ "grad_norm": 0.07430274784564972,
158
  "learning_rate": 4.444297669803981e-05,
159
+ "loss": 0.0432,
160
+ "mean_token_accuracy": 0.9819111526012421,
161
  "step": 90
162
  },
163
  {
164
  "epoch": 0.76,
165
+ "grad_norm": 0.05630122497677803,
166
  "learning_rate": 3.336534220479961e-05,
167
+ "loss": 0.0381,
168
+ "mean_token_accuracy": 0.984023529291153,
169
  "step": 95
170
  },
171
  {
172
  "epoch": 0.8,
173
+ "grad_norm": 0.07422107458114624,
174
  "learning_rate": 2.3596262417839255e-05,
175
+ "loss": 0.041,
176
+ "mean_token_accuracy": 0.9828759372234345,
177
  "step": 100
178
  },
179
  {
180
  "epoch": 0.84,
181
+ "grad_norm": 0.06742699444293976,
182
  "learning_rate": 1.5327580077171587e-05,
183
+ "loss": 0.0435,
184
+ "mean_token_accuracy": 0.9813436925411224,
185
  "step": 105
186
  },
187
  {
188
  "epoch": 0.88,
189
+ "grad_norm": 0.07175164669752121,
190
  "learning_rate": 8.72167349386811e-06,
191
+ "loss": 0.0406,
192
+ "mean_token_accuracy": 0.9831586062908173,
193
  "step": 110
194
  },
195
  {
196
  "epoch": 0.92,
197
+ "grad_norm": 0.06535231322050095,
198
  "learning_rate": 3.908267805490051e-06,
199
+ "loss": 0.0411,
200
+ "mean_token_accuracy": 0.9826524317264557,
201
  "step": 115
202
  },
203
  {
204
  "epoch": 0.96,
205
+ "grad_norm": 0.07355163991451263,
206
  "learning_rate": 9.818874663554357e-07,
207
+ "loss": 0.0407,
208
+ "mean_token_accuracy": 0.9827761054039001,
209
  "step": 120
210
  },
211
  {
212
  "epoch": 1.0,
213
+ "grad_norm": 0.07261276245117188,
214
  "learning_rate": 0.0,
215
+ "loss": 0.04,
216
+ "mean_token_accuracy": 0.9830402076244354,
217
  "step": 125
218
  },
219
  {
220
  "epoch": 1.0,
221
+ "eval_loss": 0.0398690365254879,
222
+ "eval_mean_token_accuracy": 0.9831665050983429,
223
+ "eval_runtime": 167.2529,
224
+ "eval_samples_per_second": 11.958,
225
+ "eval_steps_per_second": 1.495,
226
  "step": 125
227
  },
228
  {
229
  "epoch": 1.0,
230
  "step": 125,
231
  "total_flos": 4505964279496704.0,
232
+ "train_loss": 0.1011103401184082,
233
+ "train_runtime": 450.8333,
234
+ "train_samples_per_second": 2.218,
235
+ "train_steps_per_second": 0.277
236
  }
237
  ],
238
  "logging_steps": 5,