wangrongsheng commited on
Commit
dbd399b
·
1 Parent(s): ef83107

commit from root

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +3 -0
  2. adapter_config.json +19 -0
  3. adapter_model.bin +3 -0
  4. all_results.json +7 -0
  5. checkpoint-1000/README.md +3 -0
  6. checkpoint-1000/adapter_config.json +19 -0
  7. checkpoint-1000/adapter_model.bin +3 -0
  8. checkpoint-1000/finetuning_args.json +13 -0
  9. checkpoint-1000/optimizer.pt +3 -0
  10. checkpoint-1000/rng_state_0.pth +3 -0
  11. checkpoint-1000/rng_state_1.pth +3 -0
  12. checkpoint-1000/rng_state_2.pth +3 -0
  13. checkpoint-1000/rng_state_3.pth +3 -0
  14. checkpoint-1000/scheduler.pt +3 -0
  15. checkpoint-1000/trainer_state.json +616 -0
  16. checkpoint-1000/training_args.bin +3 -0
  17. checkpoint-10000/README.md +3 -0
  18. checkpoint-10000/adapter_config.json +19 -0
  19. checkpoint-10000/adapter_model.bin +3 -0
  20. checkpoint-10000/finetuning_args.json +13 -0
  21. checkpoint-10000/optimizer.pt +3 -0
  22. checkpoint-10000/rng_state_0.pth +3 -0
  23. checkpoint-10000/rng_state_1.pth +3 -0
  24. checkpoint-10000/rng_state_2.pth +3 -0
  25. checkpoint-10000/rng_state_3.pth +3 -0
  26. checkpoint-10000/scheduler.pt +3 -0
  27. checkpoint-10000/trainer_state.json +0 -0
  28. checkpoint-10000/training_args.bin +3 -0
  29. checkpoint-2000/README.md +3 -0
  30. checkpoint-2000/adapter_config.json +19 -0
  31. checkpoint-2000/adapter_model.bin +3 -0
  32. checkpoint-2000/finetuning_args.json +13 -0
  33. checkpoint-2000/optimizer.pt +3 -0
  34. checkpoint-2000/rng_state_0.pth +3 -0
  35. checkpoint-2000/rng_state_1.pth +3 -0
  36. checkpoint-2000/rng_state_2.pth +3 -0
  37. checkpoint-2000/rng_state_3.pth +3 -0
  38. checkpoint-2000/scheduler.pt +3 -0
  39. checkpoint-2000/trainer_state.json +1216 -0
  40. checkpoint-2000/training_args.bin +3 -0
  41. checkpoint-3000/README.md +3 -0
  42. checkpoint-3000/adapter_config.json +19 -0
  43. checkpoint-3000/adapter_model.bin +3 -0
  44. checkpoint-3000/finetuning_args.json +13 -0
  45. checkpoint-3000/optimizer.pt +3 -0
  46. checkpoint-3000/rng_state_0.pth +3 -0
  47. checkpoint-3000/rng_state_1.pth +3 -0
  48. checkpoint-3000/rng_state_2.pth +3 -0
  49. checkpoint-3000/rng_state_3.pth +3 -0
  50. checkpoint-3000/scheduler.pt +3 -0
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "chatglm2-6b",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 32.0,
10
+ "lora_dropout": 0.1,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "query_key_value"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c6758d97d365c10604d10e27b6be873c3ad2e83a9785463a6ba10bfe630d8e
3
+ size 7819417
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 1.978195481530868,
4
+ "train_runtime": 26343.521,
5
+ "train_samples_per_second": 26.232,
6
+ "train_steps_per_second": 0.41
7
+ }
checkpoint-1000/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
checkpoint-1000/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "chatglm2-6b",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 32.0,
10
+ "lora_dropout": 0.1,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "query_key_value"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-1000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d168ef0b419dc788a731c7120dada236c6fbae4d8e809d70c45f93f345fa67ab
3
+ size 7819417
checkpoint-1000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "query_key_value"
8
+ ],
9
+ "name_module_trainable": "mlp",
10
+ "num_layer_trainable": 3,
11
+ "pre_seq_len": 64,
12
+ "prefix_projection": false
13
+ }
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:207da355524f4452d88bdbb9d13f0ed7b4aa02f827dc0adeaa53dd3c4a1d13a1
3
+ size 15644485
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2684ce62e8d7c210c23c57d059a952c18a26260da209f1d6ebe314693e4974
3
+ size 18679
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de719cf6845a3b19b96284a78513c960a388c70f13766d1ea991700b32e79d53
3
+ size 18679
checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e0772c04da6662579787b6678d4d2795a96e4da23118d565f8dd6c2e25617b
3
+ size 18679
checkpoint-1000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f40b16d80d11c0cf20f8dc79b69affdfca686fdae7995161a0db1c59c9ceed
3
+ size 18679
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f01619e7f8d789aa01ef80c209d2e88c5d1080670973e05d35c732aa495fa39
3
+ size 627
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.18521948508983144,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 0.0009999978838190456,
13
+ "loss": 2.9794,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 0.0009999915352940948,
19
+ "loss": 2.3885,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.01,
24
+ "learning_rate": 0.000999980954478887,
25
+ "loss": 2.3057,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.01,
30
+ "learning_rate": 0.000999966141462985,
31
+ "loss": 2.2692,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.01,
36
+ "learning_rate": 0.000999947096371777,
37
+ "loss": 2.2576,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.01,
42
+ "learning_rate": 0.0009999238193664748,
43
+ "loss": 2.2388,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 0.0009998963106441117,
49
+ "loss": 2.2523,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 0.0009998645704375414,
55
+ "loss": 2.218,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.02,
60
+ "learning_rate": 0.000999828599015436,
61
+ "loss": 2.2457,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.02,
66
+ "learning_rate": 0.0009997883966822835,
67
+ "loss": 2.198,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.02,
72
+ "learning_rate": 0.0009997439637783859,
73
+ "loss": 2.2013,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.02,
78
+ "learning_rate": 0.000999695300679855,
79
+ "loss": 2.1765,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.02,
84
+ "learning_rate": 0.0009996424077986109,
85
+ "loss": 2.1741,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.03,
90
+ "learning_rate": 0.000999585285582377,
91
+ "loss": 2.1898,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.03,
96
+ "learning_rate": 0.0009995239345146772,
97
+ "loss": 2.1466,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.03,
102
+ "learning_rate": 0.0009994583551148314,
103
+ "loss": 2.1423,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.03,
108
+ "learning_rate": 0.0009993885479379506,
109
+ "loss": 2.1451,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 0.03,
114
+ "learning_rate": 0.000999314513574934,
115
+ "loss": 2.202,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.04,
120
+ "learning_rate": 0.0009992362526524616,
121
+ "loss": 2.1208,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 0.04,
126
+ "learning_rate": 0.0009991537658329906,
127
+ "loss": 2.1591,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.04,
132
+ "learning_rate": 0.000999067053814749,
133
+ "loss": 2.1788,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.04,
138
+ "learning_rate": 0.0009989761173317304,
139
+ "loss": 2.147,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.04,
144
+ "learning_rate": 0.000998880957153687,
145
+ "loss": 2.1249,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.04,
150
+ "learning_rate": 0.000998781574086123,
151
+ "loss": 2.165,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.05,
156
+ "learning_rate": 0.000998677968970289,
157
+ "loss": 2.1428,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.05,
162
+ "learning_rate": 0.0009985701426831735,
163
+ "loss": 2.1384,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.05,
168
+ "learning_rate": 0.0009984580961374964,
169
+ "loss": 2.1585,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.05,
174
+ "learning_rate": 0.0009983418302817008,
175
+ "loss": 2.1156,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.05,
180
+ "learning_rate": 0.0009982213460999448,
181
+ "loss": 2.0811,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.06,
186
+ "learning_rate": 0.000998096644612094,
187
+ "loss": 2.1081,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.06,
192
+ "learning_rate": 0.0009979677268737118,
193
+ "loss": 2.1246,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 0.06,
198
+ "learning_rate": 0.0009978345939760515,
199
+ "loss": 2.1229,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 0.06,
204
+ "learning_rate": 0.000997697247046046,
205
+ "loss": 2.1033,
206
+ "step": 330
207
+ },
208
+ {
209
+ "epoch": 0.06,
210
+ "learning_rate": 0.0009975556872462994,
211
+ "loss": 2.0931,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 0.06,
216
+ "learning_rate": 0.000997409915775076,
217
+ "loss": 2.1206,
218
+ "step": 350
219
+ },
220
+ {
221
+ "epoch": 0.07,
222
+ "learning_rate": 0.0009972599338662915,
223
+ "loss": 2.0537,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 0.07,
228
+ "learning_rate": 0.0009971057427895012,
229
+ "loss": 2.0762,
230
+ "step": 370
231
+ },
232
+ {
233
+ "epoch": 0.07,
234
+ "learning_rate": 0.0009969473438498897,
235
+ "loss": 2.0883,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 0.07,
240
+ "learning_rate": 0.0009967847383882604,
241
+ "loss": 2.1174,
242
+ "step": 390
243
+ },
244
+ {
245
+ "epoch": 0.07,
246
+ "learning_rate": 0.0009966179277810239,
247
+ "loss": 2.1111,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 0.08,
252
+ "learning_rate": 0.0009964469134401855,
253
+ "loss": 2.1288,
254
+ "step": 410
255
+ },
256
+ {
257
+ "epoch": 0.08,
258
+ "learning_rate": 0.0009962716968133346,
259
+ "loss": 2.0967,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 0.08,
264
+ "learning_rate": 0.0009960922793836318,
265
+ "loss": 2.1216,
266
+ "step": 430
267
+ },
268
+ {
269
+ "epoch": 0.08,
270
+ "learning_rate": 0.0009959086626697955,
271
+ "loss": 2.0924,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 0.08,
276
+ "learning_rate": 0.0009957208482260908,
277
+ "loss": 2.0809,
278
+ "step": 450
279
+ },
280
+ {
281
+ "epoch": 0.09,
282
+ "learning_rate": 0.0009955288376423152,
283
+ "loss": 2.1082,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 0.09,
288
+ "learning_rate": 0.0009953326325437852,
289
+ "loss": 2.0885,
290
+ "step": 470
291
+ },
292
+ {
293
+ "epoch": 0.09,
294
+ "learning_rate": 0.0009951322345913224,
295
+ "loss": 2.1133,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 0.09,
300
+ "learning_rate": 0.0009949276454812408,
301
+ "loss": 2.0844,
302
+ "step": 490
303
+ },
304
+ {
305
+ "epoch": 0.09,
306
+ "learning_rate": 0.00099471886694533,
307
+ "loss": 2.0796,
308
+ "step": 500
309
+ },
310
+ {
311
+ "epoch": 0.09,
312
+ "learning_rate": 0.0009945059007508434,
313
+ "loss": 2.1255,
314
+ "step": 510
315
+ },
316
+ {
317
+ "epoch": 0.1,
318
+ "learning_rate": 0.0009942887487004804,
319
+ "loss": 2.0913,
320
+ "step": 520
321
+ },
322
+ {
323
+ "epoch": 0.1,
324
+ "learning_rate": 0.0009940674126323733,
325
+ "loss": 2.1003,
326
+ "step": 530
327
+ },
328
+ {
329
+ "epoch": 0.1,
330
+ "learning_rate": 0.0009938418944200709,
331
+ "loss": 2.0541,
332
+ "step": 540
333
+ },
334
+ {
335
+ "epoch": 0.1,
336
+ "learning_rate": 0.0009936121959725223,
337
+ "loss": 2.0523,
338
+ "step": 550
339
+ },
340
+ {
341
+ "epoch": 0.1,
342
+ "learning_rate": 0.0009933783192340618,
343
+ "loss": 2.1225,
344
+ "step": 560
345
+ },
346
+ {
347
+ "epoch": 0.11,
348
+ "learning_rate": 0.0009931402661843911,
349
+ "loss": 2.0446,
350
+ "step": 570
351
+ },
352
+ {
353
+ "epoch": 0.11,
354
+ "learning_rate": 0.000992898038838564,
355
+ "loss": 2.0921,
356
+ "step": 580
357
+ },
358
+ {
359
+ "epoch": 0.11,
360
+ "learning_rate": 0.0009926516392469674,
361
+ "loss": 2.1081,
362
+ "step": 590
363
+ },
364
+ {
365
+ "epoch": 0.11,
366
+ "learning_rate": 0.0009924010694953064,
367
+ "loss": 2.0734,
368
+ "step": 600
369
+ },
370
+ {
371
+ "epoch": 0.11,
372
+ "learning_rate": 0.0009921463317045843,
373
+ "loss": 2.0652,
374
+ "step": 610
375
+ },
376
+ {
377
+ "epoch": 0.11,
378
+ "learning_rate": 0.0009918874280310862,
379
+ "loss": 2.0818,
380
+ "step": 620
381
+ },
382
+ {
383
+ "epoch": 0.12,
384
+ "learning_rate": 0.0009916243606663605,
385
+ "loss": 2.0776,
386
+ "step": 630
387
+ },
388
+ {
389
+ "epoch": 0.12,
390
+ "learning_rate": 0.0009913571318371994,
391
+ "loss": 2.1025,
392
+ "step": 640
393
+ },
394
+ {
395
+ "epoch": 0.12,
396
+ "learning_rate": 0.0009910857438056215,
397
+ "loss": 2.066,
398
+ "step": 650
399
+ },
400
+ {
401
+ "epoch": 0.12,
402
+ "learning_rate": 0.0009908101988688512,
403
+ "loss": 2.0575,
404
+ "step": 660
405
+ },
406
+ {
407
+ "epoch": 0.12,
408
+ "learning_rate": 0.0009905304993593008,
409
+ "loss": 2.1269,
410
+ "step": 670
411
+ },
412
+ {
413
+ "epoch": 0.13,
414
+ "learning_rate": 0.0009902466476445486,
415
+ "loss": 2.0518,
416
+ "step": 680
417
+ },
418
+ {
419
+ "epoch": 0.13,
420
+ "learning_rate": 0.0009899586461273218,
421
+ "loss": 2.0698,
422
+ "step": 690
423
+ },
424
+ {
425
+ "epoch": 0.13,
426
+ "learning_rate": 0.000989666497245473,
427
+ "loss": 2.0988,
428
+ "step": 700
429
+ },
430
+ {
431
+ "epoch": 0.13,
432
+ "learning_rate": 0.0009893702034719624,
433
+ "loss": 2.0986,
434
+ "step": 710
435
+ },
436
+ {
437
+ "epoch": 0.13,
438
+ "learning_rate": 0.0009890697673148345,
439
+ "loss": 2.0237,
440
+ "step": 720
441
+ },
442
+ {
443
+ "epoch": 0.14,
444
+ "learning_rate": 0.0009887651913171986,
445
+ "loss": 2.0027,
446
+ "step": 730
447
+ },
448
+ {
449
+ "epoch": 0.14,
450
+ "learning_rate": 0.0009884564780572064,
451
+ "loss": 2.0563,
452
+ "step": 740
453
+ },
454
+ {
455
+ "epoch": 0.14,
456
+ "learning_rate": 0.0009881436301480305,
457
+ "loss": 2.0624,
458
+ "step": 750
459
+ },
460
+ {
461
+ "epoch": 0.14,
462
+ "learning_rate": 0.000987826650237842,
463
+ "loss": 2.0926,
464
+ "step": 760
465
+ },
466
+ {
467
+ "epoch": 0.14,
468
+ "learning_rate": 0.000987505541009788,
469
+ "loss": 2.0585,
470
+ "step": 770
471
+ },
472
+ {
473
+ "epoch": 0.14,
474
+ "learning_rate": 0.0009871803051819696,
475
+ "loss": 2.0494,
476
+ "step": 780
477
+ },
478
+ {
479
+ "epoch": 0.15,
480
+ "learning_rate": 0.0009868509455074183,
481
+ "loss": 2.0106,
482
+ "step": 790
483
+ },
484
+ {
485
+ "epoch": 0.15,
486
+ "learning_rate": 0.0009865174647740729,
487
+ "loss": 2.0861,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 0.15,
492
+ "learning_rate": 0.0009861798658047556,
493
+ "loss": 2.0478,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 0.15,
498
+ "learning_rate": 0.0009858381514571484,
499
+ "loss": 2.0469,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 0.15,
504
+ "learning_rate": 0.000985492324623769,
505
+ "loss": 2.0671,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 0.16,
510
+ "learning_rate": 0.0009851423882319458,
511
+ "loss": 2.0808,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 0.16,
516
+ "learning_rate": 0.0009847883452437937,
517
+ "loss": 2.0331,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 0.16,
522
+ "learning_rate": 0.0009844301986561893,
523
+ "loss": 2.0295,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 0.16,
528
+ "learning_rate": 0.000984067951500744,
529
+ "loss": 2.0873,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 0.16,
534
+ "learning_rate": 0.00098370160684378,
535
+ "loss": 2.1038,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 0.16,
540
+ "learning_rate": 0.0009833311677863042,
541
+ "loss": 2.0337,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 0.17,
546
+ "learning_rate": 0.0009829566374639801,
547
+ "loss": 2.0407,
548
+ "step": 900
549
+ },
550
+ {
551
+ "epoch": 0.17,
552
+ "learning_rate": 0.0009825780190471042,
553
+ "loss": 2.1049,
554
+ "step": 910
555
+ },
556
+ {
557
+ "epoch": 0.17,
558
+ "learning_rate": 0.000982195315740576,
559
+ "loss": 2.0475,
560
+ "step": 920
561
+ },
562
+ {
563
+ "epoch": 0.17,
564
+ "learning_rate": 0.0009818085307838741,
565
+ "loss": 2.0624,
566
+ "step": 930
567
+ },
568
+ {
569
+ "epoch": 0.17,
570
+ "learning_rate": 0.000981417667451026,
571
+ "loss": 2.0714,
572
+ "step": 940
573
+ },
574
+ {
575
+ "epoch": 0.18,
576
+ "learning_rate": 0.0009810227290505816,
577
+ "loss": 2.0947,
578
+ "step": 950
579
+ },
580
+ {
581
+ "epoch": 0.18,
582
+ "learning_rate": 0.0009806237189255859,
583
+ "loss": 2.0591,
584
+ "step": 960
585
+ },
586
+ {
587
+ "epoch": 0.18,
588
+ "learning_rate": 0.0009802206404535489,
589
+ "loss": 2.0301,
590
+ "step": 970
591
+ },
592
+ {
593
+ "epoch": 0.18,
594
+ "learning_rate": 0.000979813497046419,
595
+ "loss": 2.0556,
596
+ "step": 980
597
+ },
598
+ {
599
+ "epoch": 0.18,
600
+ "learning_rate": 0.0009794022921505523,
601
+ "loss": 2.0753,
602
+ "step": 990
603
+ },
604
+ {
605
+ "epoch": 0.19,
606
+ "learning_rate": 0.000978987029246685,
607
+ "loss": 2.0898,
608
+ "step": 1000
609
+ }
610
+ ],
611
+ "max_steps": 10798,
612
+ "num_train_epochs": 2,
613
+ "total_flos": 9.498667895656284e+17,
614
+ "trial_name": null,
615
+ "trial_params": null
616
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8813832b030b0a678cb3ad0abd28db5000bdb8bb1708e45ba10cfda908fcc38
3
+ size 3305
checkpoint-10000/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
checkpoint-10000/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "chatglm2-6b",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 32.0,
10
+ "lora_dropout": 0.1,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "query_key_value"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-10000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9903abff597676a5d85a3f42ba3f950256d00e4aa93564c8ebb5fd5f4a0ebbbe
3
+ size 7819417
checkpoint-10000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "query_key_value"
8
+ ],
9
+ "name_module_trainable": "mlp",
10
+ "num_layer_trainable": 3,
11
+ "pre_seq_len": 64,
12
+ "prefix_projection": false
13
+ }
checkpoint-10000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9986fb97ca3fa48e2015b70aa583802fa73acc98d578bb5ac096548c91c4f424
3
+ size 15644485
checkpoint-10000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37fd716397629e3be922b426ba94f12d00c46352e66dbe11b85822a76d3e35e1
3
+ size 18679
checkpoint-10000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74bfb7ea10ec3e65f8eca160d3bf18eef130d179b66b4138138d46fceab4daca
3
+ size 18679
checkpoint-10000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488534d9a95b06aeefe2b45dfed265600bde02bf26605bfacb8ea0bca14920a4
3
+ size 18679
checkpoint-10000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9edad9c0f7873d58b81e886e6d846f91293545f6932489450d12b86cf6e02700
3
+ size 18679
checkpoint-10000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31ab5731d5270c6c302ae9dc8c05030cf9f832744fe07067f6bed6ea7d85df0
3
+ size 627
checkpoint-10000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-10000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8813832b030b0a678cb3ad0abd28db5000bdb8bb1708e45ba10cfda908fcc38
3
+ size 3305
checkpoint-2000/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
checkpoint-2000/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "chatglm2-6b",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 32.0,
10
+ "lora_dropout": 0.1,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "query_key_value"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-2000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f081bbdfbb5e387ee0824fff7f0f47949ffa6e68e7943ffa59134934c4cc853
3
+ size 7819417
checkpoint-2000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "query_key_value"
8
+ ],
9
+ "name_module_trainable": "mlp",
10
+ "num_layer_trainable": 3,
11
+ "pre_seq_len": 64,
12
+ "prefix_projection": false
13
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f9fe37c01b31ebef944ca17d10d8d5aaacb5798001799c604caf3846715c32d
3
+ size 15644485
checkpoint-2000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d8cb970e4ced27f56008fda1204003f4d92bee7bbcb658b1d95feeb1ee2d9c9
3
+ size 18679
checkpoint-2000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc409f2fff3209bb2720c94bd3a28d16c2aea229945134be8dfd24bd233b555a
3
+ size 18679
checkpoint-2000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994276963fc670cf27a5b4003c9939fb3694b73e3468dcf68c44d82388fb664e
3
+ size 18679
checkpoint-2000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19db02794e617e7615491a559802b701bc18a3aebaf0b383447ebdfcb7a6c4ca
3
+ size 18679
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74b5152c9a31debe910da86a00a4ef326dfa0b3a55c019894c4686f1176be2b3
3
+ size 627
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,1216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.3704389701796629,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 0.0009999978838190456,
13
+ "loss": 2.9794,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 0.0009999915352940948,
19
+ "loss": 2.3885,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.01,
24
+ "learning_rate": 0.000999980954478887,
25
+ "loss": 2.3057,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.01,
30
+ "learning_rate": 0.000999966141462985,
31
+ "loss": 2.2692,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.01,
36
+ "learning_rate": 0.000999947096371777,
37
+ "loss": 2.2576,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.01,
42
+ "learning_rate": 0.0009999238193664748,
43
+ "loss": 2.2388,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 0.0009998963106441117,
49
+ "loss": 2.2523,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 0.0009998645704375414,
55
+ "loss": 2.218,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.02,
60
+ "learning_rate": 0.000999828599015436,
61
+ "loss": 2.2457,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.02,
66
+ "learning_rate": 0.0009997883966822835,
67
+ "loss": 2.198,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.02,
72
+ "learning_rate": 0.0009997439637783859,
73
+ "loss": 2.2013,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.02,
78
+ "learning_rate": 0.000999695300679855,
79
+ "loss": 2.1765,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 0.02,
84
+ "learning_rate": 0.0009996424077986109,
85
+ "loss": 2.1741,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 0.03,
90
+ "learning_rate": 0.000999585285582377,
91
+ "loss": 2.1898,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 0.03,
96
+ "learning_rate": 0.0009995239345146772,
97
+ "loss": 2.1466,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 0.03,
102
+ "learning_rate": 0.0009994583551148314,
103
+ "loss": 2.1423,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 0.03,
108
+ "learning_rate": 0.0009993885479379506,
109
+ "loss": 2.1451,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 0.03,
114
+ "learning_rate": 0.000999314513574934,
115
+ "loss": 2.202,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 0.04,
120
+ "learning_rate": 0.0009992362526524616,
121
+ "loss": 2.1208,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 0.04,
126
+ "learning_rate": 0.0009991537658329906,
127
+ "loss": 2.1591,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.04,
132
+ "learning_rate": 0.000999067053814749,
133
+ "loss": 2.1788,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.04,
138
+ "learning_rate": 0.0009989761173317304,
139
+ "loss": 2.147,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.04,
144
+ "learning_rate": 0.000998880957153687,
145
+ "loss": 2.1249,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.04,
150
+ "learning_rate": 0.000998781574086123,
151
+ "loss": 2.165,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.05,
156
+ "learning_rate": 0.000998677968970289,
157
+ "loss": 2.1428,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.05,
162
+ "learning_rate": 0.0009985701426831735,
163
+ "loss": 2.1384,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.05,
168
+ "learning_rate": 0.0009984580961374964,
169
+ "loss": 2.1585,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.05,
174
+ "learning_rate": 0.0009983418302817008,
175
+ "loss": 2.1156,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.05,
180
+ "learning_rate": 0.0009982213460999448,
181
+ "loss": 2.0811,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.06,
186
+ "learning_rate": 0.000998096644612094,
187
+ "loss": 2.1081,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.06,
192
+ "learning_rate": 0.0009979677268737118,
193
+ "loss": 2.1246,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 0.06,
198
+ "learning_rate": 0.0009978345939760515,
199
+ "loss": 2.1229,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 0.06,
204
+ "learning_rate": 0.000997697247046046,
205
+ "loss": 2.1033,
206
+ "step": 330
207
+ },
208
+ {
209
+ "epoch": 0.06,
210
+ "learning_rate": 0.0009975556872462994,
211
+ "loss": 2.0931,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 0.06,
216
+ "learning_rate": 0.000997409915775076,
217
+ "loss": 2.1206,
218
+ "step": 350
219
+ },
220
+ {
221
+ "epoch": 0.07,
222
+ "learning_rate": 0.0009972599338662915,
223
+ "loss": 2.0537,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 0.07,
228
+ "learning_rate": 0.0009971057427895012,
229
+ "loss": 2.0762,
230
+ "step": 370
231
+ },
232
+ {
233
+ "epoch": 0.07,
234
+ "learning_rate": 0.0009969473438498897,
235
+ "loss": 2.0883,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 0.07,
240
+ "learning_rate": 0.0009967847383882604,
241
+ "loss": 2.1174,
242
+ "step": 390
243
+ },
244
+ {
245
+ "epoch": 0.07,
246
+ "learning_rate": 0.0009966179277810239,
247
+ "loss": 2.1111,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 0.08,
252
+ "learning_rate": 0.0009964469134401855,
253
+ "loss": 2.1288,
254
+ "step": 410
255
+ },
256
+ {
257
+ "epoch": 0.08,
258
+ "learning_rate": 0.0009962716968133346,
259
+ "loss": 2.0967,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 0.08,
264
+ "learning_rate": 0.0009960922793836318,
265
+ "loss": 2.1216,
266
+ "step": 430
267
+ },
268
+ {
269
+ "epoch": 0.08,
270
+ "learning_rate": 0.0009959086626697955,
271
+ "loss": 2.0924,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 0.08,
276
+ "learning_rate": 0.0009957208482260908,
277
+ "loss": 2.0809,
278
+ "step": 450
279
+ },
280
+ {
281
+ "epoch": 0.09,
282
+ "learning_rate": 0.0009955288376423152,
283
+ "loss": 2.1082,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 0.09,
288
+ "learning_rate": 0.0009953326325437852,
289
+ "loss": 2.0885,
290
+ "step": 470
291
+ },
292
+ {
293
+ "epoch": 0.09,
294
+ "learning_rate": 0.0009951322345913224,
295
+ "loss": 2.1133,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 0.09,
300
+ "learning_rate": 0.0009949276454812408,
301
+ "loss": 2.0844,
302
+ "step": 490
303
+ },
304
+ {
305
+ "epoch": 0.09,
306
+ "learning_rate": 0.00099471886694533,
307
+ "loss": 2.0796,
308
+ "step": 500
309
+ },
310
+ {
311
+ "epoch": 0.09,
312
+ "learning_rate": 0.0009945059007508434,
313
+ "loss": 2.1255,
314
+ "step": 510
315
+ },
316
+ {
317
+ "epoch": 0.1,
318
+ "learning_rate": 0.0009942887487004804,
319
+ "loss": 2.0913,
320
+ "step": 520
321
+ },
322
+ {
323
+ "epoch": 0.1,
324
+ "learning_rate": 0.0009940674126323733,
325
+ "loss": 2.1003,
326
+ "step": 530
327
+ },
328
+ {
329
+ "epoch": 0.1,
330
+ "learning_rate": 0.0009938418944200709,
331
+ "loss": 2.0541,
332
+ "step": 540
333
+ },
334
+ {
335
+ "epoch": 0.1,
336
+ "learning_rate": 0.0009936121959725223,
337
+ "loss": 2.0523,
338
+ "step": 550
339
+ },
340
+ {
341
+ "epoch": 0.1,
342
+ "learning_rate": 0.0009933783192340618,
343
+ "loss": 2.1225,
344
+ "step": 560
345
+ },
346
+ {
347
+ "epoch": 0.11,
348
+ "learning_rate": 0.0009931402661843911,
349
+ "loss": 2.0446,
350
+ "step": 570
351
+ },
352
+ {
353
+ "epoch": 0.11,
354
+ "learning_rate": 0.000992898038838564,
355
+ "loss": 2.0921,
356
+ "step": 580
357
+ },
358
+ {
359
+ "epoch": 0.11,
360
+ "learning_rate": 0.0009926516392469674,
361
+ "loss": 2.1081,
362
+ "step": 590
363
+ },
364
+ {
365
+ "epoch": 0.11,
366
+ "learning_rate": 0.0009924010694953064,
367
+ "loss": 2.0734,
368
+ "step": 600
369
+ },
370
+ {
371
+ "epoch": 0.11,
372
+ "learning_rate": 0.0009921463317045843,
373
+ "loss": 2.0652,
374
+ "step": 610
375
+ },
376
+ {
377
+ "epoch": 0.11,
378
+ "learning_rate": 0.0009918874280310862,
379
+ "loss": 2.0818,
380
+ "step": 620
381
+ },
382
+ {
383
+ "epoch": 0.12,
384
+ "learning_rate": 0.0009916243606663605,
385
+ "loss": 2.0776,
386
+ "step": 630
387
+ },
388
+ {
389
+ "epoch": 0.12,
390
+ "learning_rate": 0.0009913571318371994,
391
+ "loss": 2.1025,
392
+ "step": 640
393
+ },
394
+ {
395
+ "epoch": 0.12,
396
+ "learning_rate": 0.0009910857438056215,
397
+ "loss": 2.066,
398
+ "step": 650
399
+ },
400
+ {
401
+ "epoch": 0.12,
402
+ "learning_rate": 0.0009908101988688512,
403
+ "loss": 2.0575,
404
+ "step": 660
405
+ },
406
+ {
407
+ "epoch": 0.12,
408
+ "learning_rate": 0.0009905304993593008,
409
+ "loss": 2.1269,
410
+ "step": 670
411
+ },
412
+ {
413
+ "epoch": 0.13,
414
+ "learning_rate": 0.0009902466476445486,
415
+ "loss": 2.0518,
416
+ "step": 680
417
+ },
418
+ {
419
+ "epoch": 0.13,
420
+ "learning_rate": 0.0009899586461273218,
421
+ "loss": 2.0698,
422
+ "step": 690
423
+ },
424
+ {
425
+ "epoch": 0.13,
426
+ "learning_rate": 0.000989666497245473,
427
+ "loss": 2.0988,
428
+ "step": 700
429
+ },
430
+ {
431
+ "epoch": 0.13,
432
+ "learning_rate": 0.0009893702034719624,
433
+ "loss": 2.0986,
434
+ "step": 710
435
+ },
436
+ {
437
+ "epoch": 0.13,
438
+ "learning_rate": 0.0009890697673148345,
439
+ "loss": 2.0237,
440
+ "step": 720
441
+ },
442
+ {
443
+ "epoch": 0.14,
444
+ "learning_rate": 0.0009887651913171986,
445
+ "loss": 2.0027,
446
+ "step": 730
447
+ },
448
+ {
449
+ "epoch": 0.14,
450
+ "learning_rate": 0.0009884564780572064,
451
+ "loss": 2.0563,
452
+ "step": 740
453
+ },
454
+ {
455
+ "epoch": 0.14,
456
+ "learning_rate": 0.0009881436301480305,
457
+ "loss": 2.0624,
458
+ "step": 750
459
+ },
460
+ {
461
+ "epoch": 0.14,
462
+ "learning_rate": 0.000987826650237842,
463
+ "loss": 2.0926,
464
+ "step": 760
465
+ },
466
+ {
467
+ "epoch": 0.14,
468
+ "learning_rate": 0.000987505541009788,
469
+ "loss": 2.0585,
470
+ "step": 770
471
+ },
472
+ {
473
+ "epoch": 0.14,
474
+ "learning_rate": 0.0009871803051819696,
475
+ "loss": 2.0494,
476
+ "step": 780
477
+ },
478
+ {
479
+ "epoch": 0.15,
480
+ "learning_rate": 0.0009868509455074183,
481
+ "loss": 2.0106,
482
+ "step": 790
483
+ },
484
+ {
485
+ "epoch": 0.15,
486
+ "learning_rate": 0.0009865174647740729,
487
+ "loss": 2.0861,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 0.15,
492
+ "learning_rate": 0.0009861798658047556,
493
+ "loss": 2.0478,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 0.15,
498
+ "learning_rate": 0.0009858381514571484,
499
+ "loss": 2.0469,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 0.15,
504
+ "learning_rate": 0.000985492324623769,
505
+ "loss": 2.0671,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 0.16,
510
+ "learning_rate": 0.0009851423882319458,
511
+ "loss": 2.0808,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 0.16,
516
+ "learning_rate": 0.0009847883452437937,
517
+ "loss": 2.0331,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 0.16,
522
+ "learning_rate": 0.0009844301986561893,
523
+ "loss": 2.0295,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 0.16,
528
+ "learning_rate": 0.000984067951500744,
529
+ "loss": 2.0873,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 0.16,
534
+ "learning_rate": 0.00098370160684378,
535
+ "loss": 2.1038,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 0.16,
540
+ "learning_rate": 0.0009833311677863042,
541
+ "loss": 2.0337,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 0.17,
546
+ "learning_rate": 0.0009829566374639801,
547
+ "loss": 2.0407,
548
+ "step": 900
549
+ },
550
+ {
551
+ "epoch": 0.17,
552
+ "learning_rate": 0.0009825780190471042,
553
+ "loss": 2.1049,
554
+ "step": 910
555
+ },
556
+ {
557
+ "epoch": 0.17,
558
+ "learning_rate": 0.000982195315740576,
559
+ "loss": 2.0475,
560
+ "step": 920
561
+ },
562
+ {
563
+ "epoch": 0.17,
564
+ "learning_rate": 0.0009818085307838741,
565
+ "loss": 2.0624,
566
+ "step": 930
567
+ },
568
+ {
569
+ "epoch": 0.17,
570
+ "learning_rate": 0.000981417667451026,
571
+ "loss": 2.0714,
572
+ "step": 940
573
+ },
574
+ {
575
+ "epoch": 0.18,
576
+ "learning_rate": 0.0009810227290505816,
577
+ "loss": 2.0947,
578
+ "step": 950
579
+ },
580
+ {
581
+ "epoch": 0.18,
582
+ "learning_rate": 0.0009806237189255859,
583
+ "loss": 2.0591,
584
+ "step": 960
585
+ },
586
+ {
587
+ "epoch": 0.18,
588
+ "learning_rate": 0.0009802206404535489,
589
+ "loss": 2.0301,
590
+ "step": 970
591
+ },
592
+ {
593
+ "epoch": 0.18,
594
+ "learning_rate": 0.000979813497046419,
595
+ "loss": 2.0556,
596
+ "step": 980
597
+ },
598
+ {
599
+ "epoch": 0.18,
600
+ "learning_rate": 0.0009794022921505523,
601
+ "loss": 2.0753,
602
+ "step": 990
603
+ },
604
+ {
605
+ "epoch": 0.19,
606
+ "learning_rate": 0.000978987029246685,
607
+ "loss": 2.0898,
608
+ "step": 1000
609
+ },
610
+ {
611
+ "epoch": 0.19,
612
+ "learning_rate": 0.0009785677118499029,
613
+ "loss": 2.0464,
614
+ "step": 1010
615
+ },
616
+ {
617
+ "epoch": 0.19,
618
+ "learning_rate": 0.0009781443435096116,
619
+ "loss": 2.0828,
620
+ "step": 1020
621
+ },
622
+ {
623
+ "epoch": 0.19,
624
+ "learning_rate": 0.0009777169278095074,
625
+ "loss": 2.1137,
626
+ "step": 1030
627
+ },
628
+ {
629
+ "epoch": 0.19,
630
+ "learning_rate": 0.0009772854683675462,
631
+ "loss": 2.0167,
632
+ "step": 1040
633
+ },
634
+ {
635
+ "epoch": 0.19,
636
+ "learning_rate": 0.000976849968835913,
637
+ "loss": 2.07,
638
+ "step": 1050
639
+ },
640
+ {
641
+ "epoch": 0.2,
642
+ "learning_rate": 0.0009764104329009909,
643
+ "loss": 2.0409,
644
+ "step": 1060
645
+ },
646
+ {
647
+ "epoch": 0.2,
648
+ "learning_rate": 0.0009759668642833304,
649
+ "loss": 2.015,
650
+ "step": 1070
651
+ },
652
+ {
653
+ "epoch": 0.2,
654
+ "learning_rate": 0.0009755192667376173,
655
+ "loss": 2.0175,
656
+ "step": 1080
657
+ },
658
+ {
659
+ "epoch": 0.2,
660
+ "learning_rate": 0.0009750676440526411,
661
+ "loss": 2.0773,
662
+ "step": 1090
663
+ },
664
+ {
665
+ "epoch": 0.2,
666
+ "learning_rate": 0.0009746120000512632,
667
+ "loss": 2.0245,
668
+ "step": 1100
669
+ },
670
+ {
671
+ "epoch": 0.21,
672
+ "learning_rate": 0.0009741523385903841,
673
+ "loss": 2.094,
674
+ "step": 1110
675
+ },
676
+ {
677
+ "epoch": 0.21,
678
+ "learning_rate": 0.0009736886635609112,
679
+ "loss": 2.0506,
680
+ "step": 1120
681
+ },
682
+ {
683
+ "epoch": 0.21,
684
+ "learning_rate": 0.0009732209788877258,
685
+ "loss": 2.0287,
686
+ "step": 1130
687
+ },
688
+ {
689
+ "epoch": 0.21,
690
+ "learning_rate": 0.0009727492885296489,
691
+ "loss": 2.1162,
692
+ "step": 1140
693
+ },
694
+ {
695
+ "epoch": 0.21,
696
+ "learning_rate": 0.0009722735964794099,
697
+ "loss": 2.1096,
698
+ "step": 1150
699
+ },
700
+ {
701
+ "epoch": 0.21,
702
+ "learning_rate": 0.0009717939067636099,
703
+ "loss": 2.0621,
704
+ "step": 1160
705
+ },
706
+ {
707
+ "epoch": 0.22,
708
+ "learning_rate": 0.0009713102234426903,
709
+ "loss": 2.0796,
710
+ "step": 1170
711
+ },
712
+ {
713
+ "epoch": 0.22,
714
+ "learning_rate": 0.0009708225506108965,
715
+ "loss": 2.0565,
716
+ "step": 1180
717
+ },
718
+ {
719
+ "epoch": 0.22,
720
+ "learning_rate": 0.0009703308923962447,
721
+ "loss": 2.0669,
722
+ "step": 1190
723
+ },
724
+ {
725
+ "epoch": 0.22,
726
+ "learning_rate": 0.0009698352529604857,
727
+ "loss": 2.0638,
728
+ "step": 1200
729
+ },
730
+ {
731
+ "epoch": 0.22,
732
+ "learning_rate": 0.0009693356364990705,
733
+ "loss": 2.0358,
734
+ "step": 1210
735
+ },
736
+ {
737
+ "epoch": 0.23,
738
+ "learning_rate": 0.0009688320472411143,
739
+ "loss": 2.0859,
740
+ "step": 1220
741
+ },
742
+ {
743
+ "epoch": 0.23,
744
+ "learning_rate": 0.0009683244894493613,
745
+ "loss": 2.0932,
746
+ "step": 1230
747
+ },
748
+ {
749
+ "epoch": 0.23,
750
+ "learning_rate": 0.0009678129674201479,
751
+ "loss": 2.0129,
752
+ "step": 1240
753
+ },
754
+ {
755
+ "epoch": 0.23,
756
+ "learning_rate": 0.0009672974854833669,
757
+ "loss": 2.055,
758
+ "step": 1250
759
+ },
760
+ {
761
+ "epoch": 0.23,
762
+ "learning_rate": 0.0009667780480024304,
763
+ "loss": 2.0665,
764
+ "step": 1260
765
+ },
766
+ {
767
+ "epoch": 0.24,
768
+ "learning_rate": 0.0009662546593742334,
769
+ "loss": 2.0488,
770
+ "step": 1270
771
+ },
772
+ {
773
+ "epoch": 0.24,
774
+ "learning_rate": 0.0009657273240291159,
775
+ "loss": 2.0543,
776
+ "step": 1280
777
+ },
778
+ {
779
+ "epoch": 0.24,
780
+ "learning_rate": 0.0009651960464308261,
781
+ "loss": 2.0418,
782
+ "step": 1290
783
+ },
784
+ {
785
+ "epoch": 0.24,
786
+ "learning_rate": 0.0009646608310764819,
787
+ "loss": 2.033,
788
+ "step": 1300
789
+ },
790
+ {
791
+ "epoch": 0.24,
792
+ "learning_rate": 0.0009641216824965338,
793
+ "loss": 2.1034,
794
+ "step": 1310
795
+ },
796
+ {
797
+ "epoch": 0.24,
798
+ "learning_rate": 0.0009635786052547253,
799
+ "loss": 2.0866,
800
+ "step": 1320
801
+ },
802
+ {
803
+ "epoch": 0.25,
804
+ "learning_rate": 0.0009630316039480556,
805
+ "loss": 2.0607,
806
+ "step": 1330
807
+ },
808
+ {
809
+ "epoch": 0.25,
810
+ "learning_rate": 0.0009624806832067394,
811
+ "loss": 2.0457,
812
+ "step": 1340
813
+ },
814
+ {
815
+ "epoch": 0.25,
816
+ "learning_rate": 0.0009619258476941686,
817
+ "loss": 2.032,
818
+ "step": 1350
819
+ },
820
+ {
821
+ "epoch": 0.25,
822
+ "learning_rate": 0.000961367102106873,
823
+ "loss": 2.0519,
824
+ "step": 1360
825
+ },
826
+ {
827
+ "epoch": 0.25,
828
+ "learning_rate": 0.0009608044511744791,
829
+ "loss": 2.0449,
830
+ "step": 1370
831
+ },
832
+ {
833
+ "epoch": 0.26,
834
+ "learning_rate": 0.0009602378996596721,
835
+ "loss": 1.9949,
836
+ "step": 1380
837
+ },
838
+ {
839
+ "epoch": 0.26,
840
+ "learning_rate": 0.0009596674523581539,
841
+ "loss": 2.0394,
842
+ "step": 1390
843
+ },
844
+ {
845
+ "epoch": 0.26,
846
+ "learning_rate": 0.0009590931140986035,
847
+ "loss": 2.0386,
848
+ "step": 1400
849
+ },
850
+ {
851
+ "epoch": 0.26,
852
+ "learning_rate": 0.0009585148897426354,
853
+ "loss": 2.0254,
854
+ "step": 1410
855
+ },
856
+ {
857
+ "epoch": 0.26,
858
+ "learning_rate": 0.0009579327841847593,
859
+ "loss": 2.0238,
860
+ "step": 1420
861
+ },
862
+ {
863
+ "epoch": 0.26,
864
+ "learning_rate": 0.000957346802352338,
865
+ "loss": 2.0509,
866
+ "step": 1430
867
+ },
868
+ {
869
+ "epoch": 0.27,
870
+ "learning_rate": 0.0009567569492055456,
871
+ "loss": 2.0004,
872
+ "step": 1440
873
+ },
874
+ {
875
+ "epoch": 0.27,
876
+ "learning_rate": 0.0009561632297373263,
877
+ "loss": 2.0203,
878
+ "step": 1450
879
+ },
880
+ {
881
+ "epoch": 0.27,
882
+ "learning_rate": 0.0009555656489733513,
883
+ "loss": 2.0182,
884
+ "step": 1460
885
+ },
886
+ {
887
+ "epoch": 0.27,
888
+ "learning_rate": 0.000954964211971977,
889
+ "loss": 1.9754,
890
+ "step": 1470
891
+ },
892
+ {
893
+ "epoch": 0.27,
894
+ "learning_rate": 0.0009543589238242012,
895
+ "loss": 2.0374,
896
+ "step": 1480
897
+ },
898
+ {
899
+ "epoch": 0.28,
900
+ "learning_rate": 0.000953749789653621,
901
+ "loss": 2.0367,
902
+ "step": 1490
903
+ },
904
+ {
905
+ "epoch": 0.28,
906
+ "learning_rate": 0.000953136814616389,
907
+ "loss": 2.0866,
908
+ "step": 1500
909
+ },
910
+ {
911
+ "epoch": 0.28,
912
+ "learning_rate": 0.0009525200039011694,
913
+ "loss": 2.0083,
914
+ "step": 1510
915
+ },
916
+ {
917
+ "epoch": 0.28,
918
+ "learning_rate": 0.0009518993627290948,
919
+ "loss": 2.0525,
920
+ "step": 1520
921
+ },
922
+ {
923
+ "epoch": 0.28,
924
+ "learning_rate": 0.0009512748963537212,
925
+ "loss": 2.0636,
926
+ "step": 1530
927
+ },
928
+ {
929
+ "epoch": 0.29,
930
+ "learning_rate": 0.000950646610060984,
931
+ "loss": 2.0522,
932
+ "step": 1540
933
+ },
934
+ {
935
+ "epoch": 0.29,
936
+ "learning_rate": 0.0009500145091691532,
937
+ "loss": 2.05,
938
+ "step": 1550
939
+ },
940
+ {
941
+ "epoch": 0.29,
942
+ "learning_rate": 0.0009493785990287882,
943
+ "loss": 1.9887,
944
+ "step": 1560
945
+ },
946
+ {
947
+ "epoch": 0.29,
948
+ "learning_rate": 0.0009487388850226926,
949
+ "loss": 2.0309,
950
+ "step": 1570
951
+ },
952
+ {
953
+ "epoch": 0.29,
954
+ "learning_rate": 0.000948095372565869,
955
+ "loss": 1.9954,
956
+ "step": 1580
957
+ },
958
+ {
959
+ "epoch": 0.29,
960
+ "learning_rate": 0.0009474480671054726,
961
+ "loss": 2.0078,
962
+ "step": 1590
963
+ },
964
+ {
965
+ "epoch": 0.3,
966
+ "learning_rate": 0.0009467969741207652,
967
+ "loss": 2.0395,
968
+ "step": 1600
969
+ },
970
+ {
971
+ "epoch": 0.3,
972
+ "learning_rate": 0.0009461420991230693,
973
+ "loss": 2.0415,
974
+ "step": 1610
975
+ },
976
+ {
977
+ "epoch": 0.3,
978
+ "learning_rate": 0.0009454834476557207,
979
+ "loss": 2.0308,
980
+ "step": 1620
981
+ },
982
+ {
983
+ "epoch": 0.3,
984
+ "learning_rate": 0.0009448210252940223,
985
+ "loss": 2.0826,
986
+ "step": 1630
987
+ },
988
+ {
989
+ "epoch": 0.3,
990
+ "learning_rate": 0.0009441548376451963,
991
+ "loss": 2.0424,
992
+ "step": 1640
993
+ },
994
+ {
995
+ "epoch": 0.31,
996
+ "learning_rate": 0.0009434848903483373,
997
+ "loss": 2.0125,
998
+ "step": 1650
999
+ },
1000
+ {
1001
+ "epoch": 0.31,
1002
+ "learning_rate": 0.0009428111890743639,
1003
+ "loss": 2.0139,
1004
+ "step": 1660
1005
+ },
1006
+ {
1007
+ "epoch": 0.31,
1008
+ "learning_rate": 0.0009421337395259717,
1009
+ "loss": 2.0682,
1010
+ "step": 1670
1011
+ },
1012
+ {
1013
+ "epoch": 0.31,
1014
+ "learning_rate": 0.0009414525474375837,
1015
+ "loss": 2.0577,
1016
+ "step": 1680
1017
+ },
1018
+ {
1019
+ "epoch": 0.31,
1020
+ "learning_rate": 0.0009407676185753029,
1021
+ "loss": 2.0262,
1022
+ "step": 1690
1023
+ },
1024
+ {
1025
+ "epoch": 0.31,
1026
+ "learning_rate": 0.0009400789587368632,
1027
+ "loss": 2.0515,
1028
+ "step": 1700
1029
+ },
1030
+ {
1031
+ "epoch": 0.32,
1032
+ "learning_rate": 0.0009393865737515794,
1033
+ "loss": 2.0398,
1034
+ "step": 1710
1035
+ },
1036
+ {
1037
+ "epoch": 0.32,
1038
+ "learning_rate": 0.0009386904694802997,
1039
+ "loss": 2.0146,
1040
+ "step": 1720
1041
+ },
1042
+ {
1043
+ "epoch": 0.32,
1044
+ "learning_rate": 0.0009379906518153543,
1045
+ "loss": 2.0438,
1046
+ "step": 1730
1047
+ },
1048
+ {
1049
+ "epoch": 0.32,
1050
+ "learning_rate": 0.0009372871266805063,
1051
+ "loss": 2.0377,
1052
+ "step": 1740
1053
+ },
1054
+ {
1055
+ "epoch": 0.32,
1056
+ "learning_rate": 0.000936579900030902,
1057
+ "loss": 2.0789,
1058
+ "step": 1750
1059
+ },
1060
+ {
1061
+ "epoch": 0.33,
1062
+ "learning_rate": 0.0009358689778530193,
1063
+ "loss": 2.0201,
1064
+ "step": 1760
1065
+ },
1066
+ {
1067
+ "epoch": 0.33,
1068
+ "learning_rate": 0.0009351543661646185,
1069
+ "loss": 2.0114,
1070
+ "step": 1770
1071
+ },
1072
+ {
1073
+ "epoch": 0.33,
1074
+ "learning_rate": 0.0009344360710146898,
1075
+ "loss": 2.0242,
1076
+ "step": 1780
1077
+ },
1078
+ {
1079
+ "epoch": 0.33,
1080
+ "learning_rate": 0.0009337140984834034,
1081
+ "loss": 2.0436,
1082
+ "step": 1790
1083
+ },
1084
+ {
1085
+ "epoch": 0.33,
1086
+ "learning_rate": 0.0009329884546820572,
1087
+ "loss": 2.0452,
1088
+ "step": 1800
1089
+ },
1090
+ {
1091
+ "epoch": 0.34,
1092
+ "learning_rate": 0.000932259145753026,
1093
+ "loss": 2.0254,
1094
+ "step": 1810
1095
+ },
1096
+ {
1097
+ "epoch": 0.34,
1098
+ "learning_rate": 0.0009315261778697083,
1099
+ "loss": 2.0409,
1100
+ "step": 1820
1101
+ },
1102
+ {
1103
+ "epoch": 0.34,
1104
+ "learning_rate": 0.0009307895572364746,
1105
+ "loss": 2.0301,
1106
+ "step": 1830
1107
+ },
1108
+ {
1109
+ "epoch": 0.34,
1110
+ "learning_rate": 0.0009300492900886154,
1111
+ "loss": 2.0078,
1112
+ "step": 1840
1113
+ },
1114
+ {
1115
+ "epoch": 0.34,
1116
+ "learning_rate": 0.0009293053826922873,
1117
+ "loss": 1.9851,
1118
+ "step": 1850
1119
+ },
1120
+ {
1121
+ "epoch": 0.34,
1122
+ "learning_rate": 0.0009285578413444613,
1123
+ "loss": 1.9947,
1124
+ "step": 1860
1125
+ },
1126
+ {
1127
+ "epoch": 0.35,
1128
+ "learning_rate": 0.0009278066723728682,
1129
+ "loss": 2.0331,
1130
+ "step": 1870
1131
+ },
1132
+ {
1133
+ "epoch": 0.35,
1134
+ "learning_rate": 0.0009270518821359461,
1135
+ "loss": 2.0058,
1136
+ "step": 1880
1137
+ },
1138
+ {
1139
+ "epoch": 0.35,
1140
+ "learning_rate": 0.0009262934770227858,
1141
+ "loss": 2.05,
1142
+ "step": 1890
1143
+ },
1144
+ {
1145
+ "epoch": 0.35,
1146
+ "learning_rate": 0.0009255314634530771,
1147
+ "loss": 2.0444,
1148
+ "step": 1900
1149
+ },
1150
+ {
1151
+ "epoch": 0.35,
1152
+ "learning_rate": 0.0009247658478770543,
1153
+ "loss": 2.0045,
1154
+ "step": 1910
1155
+ },
1156
+ {
1157
+ "epoch": 0.36,
1158
+ "learning_rate": 0.000923996636775442,
1159
+ "loss": 2.0211,
1160
+ "step": 1920
1161
+ },
1162
+ {
1163
+ "epoch": 0.36,
1164
+ "learning_rate": 0.0009232238366593997,
1165
+ "loss": 2.0124,
1166
+ "step": 1930
1167
+ },
1168
+ {
1169
+ "epoch": 0.36,
1170
+ "learning_rate": 0.0009224474540704671,
1171
+ "loss": 2.0067,
1172
+ "step": 1940
1173
+ },
1174
+ {
1175
+ "epoch": 0.36,
1176
+ "learning_rate": 0.0009216674955805079,
1177
+ "loss": 2.0247,
1178
+ "step": 1950
1179
+ },
1180
+ {
1181
+ "epoch": 0.36,
1182
+ "learning_rate": 0.0009208839677916557,
1183
+ "loss": 2.0314,
1184
+ "step": 1960
1185
+ },
1186
+ {
1187
+ "epoch": 0.36,
1188
+ "learning_rate": 0.0009200968773362568,
1189
+ "loss": 2.067,
1190
+ "step": 1970
1191
+ },
1192
+ {
1193
+ "epoch": 0.37,
1194
+ "learning_rate": 0.0009193062308768145,
1195
+ "loss": 2.0168,
1196
+ "step": 1980
1197
+ },
1198
+ {
1199
+ "epoch": 0.37,
1200
+ "learning_rate": 0.0009185120351059326,
1201
+ "loss": 2.0649,
1202
+ "step": 1990
1203
+ },
1204
+ {
1205
+ "epoch": 0.37,
1206
+ "learning_rate": 0.0009177142967462591,
1207
+ "loss": 2.0208,
1208
+ "step": 2000
1209
+ }
1210
+ ],
1211
+ "max_steps": 10798,
1212
+ "num_train_epochs": 2,
1213
+ "total_flos": 1.8983090426321306e+18,
1214
+ "trial_name": null,
1215
+ "trial_params": null
1216
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8813832b030b0a678cb3ad0abd28db5000bdb8bb1708e45ba10cfda908fcc38
3
+ size 3305
checkpoint-3000/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
checkpoint-3000/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "chatglm2-6b",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "layers_pattern": null,
8
+ "layers_to_transform": null,
9
+ "lora_alpha": 32.0,
10
+ "lora_dropout": 0.1,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "revision": null,
15
+ "target_modules": [
16
+ "query_key_value"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-3000/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3cd0c710c3e6e522341f9bc18338a3a5d364603e39d5a7138ce0ece693cada4
3
+ size 7819417
checkpoint-3000/finetuning_args.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "finetuning_type": "lora",
3
+ "lora_alpha": 32.0,
4
+ "lora_dropout": 0.1,
5
+ "lora_rank": 8,
6
+ "lora_target": [
7
+ "query_key_value"
8
+ ],
9
+ "name_module_trainable": "mlp",
10
+ "num_layer_trainable": 3,
11
+ "pre_seq_len": 64,
12
+ "prefix_projection": false
13
+ }
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2286a7eb891d1c724b052fc718f53e053668dd120909c7d17063e0b11f1046c7
3
+ size 15644485
checkpoint-3000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28df0ebee892c2cd596ed2d9c3c1b1bce8aacce528339850151675cb3e979e11
3
+ size 18679
checkpoint-3000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff24f859606279e5f5564c5772b357bdf34d74d1fb9f644a200359edb9d5a2bb
3
+ size 18679
checkpoint-3000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf19f9e07dfb1c6a4f9b043721d057abeb2a702aa4992a0d2dab15224d64805
3
+ size 18679
checkpoint-3000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723718e2b5b73a3d4bfee55008ffbae341c90ae5f2480575806ffc1134c69a8e
3
+ size 18679
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf9b363bf12cc68bc5c1a58285c6f89fc38fb83c9837ee26212120079972ec5
3
+ size 627