fpadovani commited on
Commit
b67fa96
·
verified ·
1 Parent(s): a73b7d6

Training in progress, step 56000, checkpoint

Browse files
Files changed (1) hide show
  1. checkpoint-56000/trainer_state.json +84 -84
checkpoint-56000/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
- "eval_runtime": 1.6226,
15
- "eval_samples_per_second": 1272.646,
16
- "eval_steps_per_second": 80.118,
17
  "step": 2000
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
- "eval_runtime": 1.6382,
30
- "eval_samples_per_second": 1260.5,
31
- "eval_steps_per_second": 79.353,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
- "eval_runtime": 3.0373,
38
- "eval_samples_per_second": 679.889,
39
- "eval_steps_per_second": 42.802,
40
  "step": 6000
41
  },
42
  {
@@ -49,17 +49,17 @@
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
- "eval_runtime": 3.0316,
53
- "eval_samples_per_second": 681.153,
54
- "eval_steps_per_second": 42.881,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
- "eval_runtime": 3.1272,
61
- "eval_samples_per_second": 660.333,
62
- "eval_steps_per_second": 41.571,
63
  "step": 10000
64
  },
65
  {
@@ -72,17 +72,17 @@
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
- "eval_runtime": 3.0078,
76
- "eval_samples_per_second": 686.558,
77
- "eval_steps_per_second": 43.222,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
- "eval_runtime": 3.0343,
84
- "eval_samples_per_second": 680.548,
85
- "eval_steps_per_second": 42.843,
86
  "step": 14000
87
  },
88
  {
@@ -95,17 +95,17 @@
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
- "eval_runtime": 3.0113,
99
- "eval_samples_per_second": 685.757,
100
- "eval_steps_per_second": 43.171,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
- "eval_runtime": 3.0569,
107
- "eval_samples_per_second": 675.527,
108
- "eval_steps_per_second": 42.527,
109
  "step": 18000
110
  },
111
  {
@@ -118,17 +118,17 @@
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
- "eval_runtime": 2.8975,
122
- "eval_samples_per_second": 712.671,
123
- "eval_steps_per_second": 44.865,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
- "eval_runtime": 3.0834,
130
- "eval_samples_per_second": 669.706,
131
- "eval_steps_per_second": 42.161,
132
  "step": 22000
133
  },
134
  {
@@ -141,17 +141,17 @@
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
- "eval_runtime": 2.6664,
145
- "eval_samples_per_second": 774.461,
146
- "eval_steps_per_second": 48.755,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
- "eval_runtime": 2.5685,
153
- "eval_samples_per_second": 803.957,
154
- "eval_steps_per_second": 50.612,
155
  "step": 26000
156
  },
157
  {
@@ -164,17 +164,17 @@
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
- "eval_runtime": 2.8723,
168
- "eval_samples_per_second": 718.927,
169
- "eval_steps_per_second": 45.259,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
- "eval_runtime": 2.9312,
176
- "eval_samples_per_second": 704.488,
177
- "eval_steps_per_second": 44.35,
178
  "step": 30000
179
  },
180
  {
@@ -187,17 +187,17 @@
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
- "eval_runtime": 3.0495,
191
- "eval_samples_per_second": 677.15,
192
- "eval_steps_per_second": 42.629,
193
  "step": 32000
194
  },
195
  {
196
  "epoch": 42.5,
197
  "eval_loss": 2.733860731124878,
198
- "eval_runtime": 2.9214,
199
- "eval_samples_per_second": 706.846,
200
- "eval_steps_per_second": 44.499,
201
  "step": 34000
202
  },
203
  {
@@ -210,17 +210,17 @@
210
  {
211
  "epoch": 45.0,
212
  "eval_loss": 2.6182146072387695,
213
- "eval_runtime": 3.223,
214
- "eval_samples_per_second": 640.711,
215
- "eval_steps_per_second": 40.335,
216
  "step": 36000
217
  },
218
  {
219
  "epoch": 47.5,
220
  "eval_loss": 2.561305046081543,
221
- "eval_runtime": 3.1317,
222
- "eval_samples_per_second": 659.384,
223
- "eval_steps_per_second": 41.511,
224
  "step": 38000
225
  },
226
  {
@@ -233,17 +233,17 @@
233
  {
234
  "epoch": 50.0,
235
  "eval_loss": 2.4945614337921143,
236
- "eval_runtime": 3.1343,
237
- "eval_samples_per_second": 658.831,
238
- "eval_steps_per_second": 41.476,
239
  "step": 40000
240
  },
241
  {
242
  "epoch": 52.5,
243
  "eval_loss": 2.4196503162384033,
244
- "eval_runtime": 3.0349,
245
- "eval_samples_per_second": 680.411,
246
- "eval_steps_per_second": 42.835,
247
  "step": 42000
248
  },
249
  {
@@ -256,17 +256,17 @@
256
  {
257
  "epoch": 55.0,
258
  "eval_loss": 2.3687477111816406,
259
- "eval_runtime": 3.0804,
260
- "eval_samples_per_second": 670.376,
261
- "eval_steps_per_second": 42.203,
262
  "step": 44000
263
  },
264
  {
265
  "epoch": 57.5,
266
  "eval_loss": 2.2801854610443115,
267
- "eval_runtime": 3.039,
268
- "eval_samples_per_second": 679.507,
269
- "eval_steps_per_second": 42.778,
270
  "step": 46000
271
  },
272
  {
@@ -279,17 +279,17 @@
279
  {
280
  "epoch": 60.0,
281
  "eval_loss": 2.262115478515625,
282
- "eval_runtime": 3.1233,
283
- "eval_samples_per_second": 661.165,
284
- "eval_steps_per_second": 41.623,
285
  "step": 48000
286
  },
287
  {
288
  "epoch": 62.5,
289
  "eval_loss": 2.2170372009277344,
290
- "eval_runtime": 3.1043,
291
- "eval_samples_per_second": 665.2,
292
- "eval_steps_per_second": 41.877,
293
  "step": 50000
294
  },
295
  {
@@ -302,17 +302,17 @@
302
  {
303
  "epoch": 65.0,
304
  "eval_loss": 2.1907379627227783,
305
- "eval_runtime": 3.0605,
306
- "eval_samples_per_second": 674.72,
307
- "eval_steps_per_second": 42.476,
308
  "step": 52000
309
  },
310
  {
311
  "epoch": 67.5,
312
  "eval_loss": 2.1659305095672607,
313
- "eval_runtime": 3.11,
314
- "eval_samples_per_second": 663.989,
315
- "eval_steps_per_second": 41.801,
316
  "step": 54000
317
  },
318
  {
@@ -325,9 +325,9 @@
325
  {
326
  "epoch": 70.0,
327
  "eval_loss": 2.127293825149536,
328
- "eval_runtime": 3.0456,
329
- "eval_samples_per_second": 678.031,
330
- "eval_steps_per_second": 42.685,
331
  "step": 56000
332
  }
333
  ],
 
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
+ "eval_runtime": 3.1848,
15
+ "eval_samples_per_second": 648.395,
16
+ "eval_steps_per_second": 40.819,
17
  "step": 2000
18
  },
19
  {
 
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
+ "eval_runtime": 3.0375,
30
+ "eval_samples_per_second": 679.836,
31
+ "eval_steps_per_second": 42.798,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
+ "eval_runtime": 2.9574,
38
+ "eval_samples_per_second": 698.238,
39
+ "eval_steps_per_second": 43.957,
40
  "step": 6000
41
  },
42
  {
 
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
+ "eval_runtime": 2.5747,
53
+ "eval_samples_per_second": 802.048,
54
+ "eval_steps_per_second": 50.492,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
+ "eval_runtime": 1.618,
61
+ "eval_samples_per_second": 1276.247,
62
+ "eval_steps_per_second": 80.345,
63
  "step": 10000
64
  },
65
  {
 
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
+ "eval_runtime": 2.872,
76
+ "eval_samples_per_second": 719.021,
77
+ "eval_steps_per_second": 45.265,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
+ "eval_runtime": 3.0079,
84
+ "eval_samples_per_second": 686.536,
85
+ "eval_steps_per_second": 43.22,
86
  "step": 14000
87
  },
88
  {
 
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
+ "eval_runtime": 2.9966,
99
+ "eval_samples_per_second": 689.104,
100
+ "eval_steps_per_second": 43.382,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
+ "eval_runtime": 3.0264,
107
+ "eval_samples_per_second": 682.332,
108
+ "eval_steps_per_second": 42.955,
109
  "step": 18000
110
  },
111
  {
 
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
+ "eval_runtime": 3.0403,
122
+ "eval_samples_per_second": 679.22,
123
+ "eval_steps_per_second": 42.76,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
+ "eval_runtime": 2.9951,
130
+ "eval_samples_per_second": 689.451,
131
+ "eval_steps_per_second": 43.404,
132
  "step": 22000
133
  },
134
  {
 
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
+ "eval_runtime": 2.4346,
145
+ "eval_samples_per_second": 848.205,
146
+ "eval_steps_per_second": 53.398,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
+ "eval_runtime": 2.8198,
153
+ "eval_samples_per_second": 732.313,
154
+ "eval_steps_per_second": 46.102,
155
  "step": 26000
156
  },
157
  {
 
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
+ "eval_runtime": 2.8452,
168
+ "eval_samples_per_second": 725.772,
169
+ "eval_steps_per_second": 45.69,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
+ "eval_runtime": 3.0576,
176
+ "eval_samples_per_second": 675.356,
177
+ "eval_steps_per_second": 42.516,
178
  "step": 30000
179
  },
180
  {
 
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
+ "eval_runtime": 3.0048,
191
+ "eval_samples_per_second": 687.232,
192
+ "eval_steps_per_second": 43.264,
193
  "step": 32000
194
  },
195
  {
196
  "epoch": 42.5,
197
  "eval_loss": 2.733860731124878,
198
+ "eval_runtime": 3.0203,
199
+ "eval_samples_per_second": 683.699,
200
+ "eval_steps_per_second": 43.042,
201
  "step": 34000
202
  },
203
  {
 
210
  {
211
  "epoch": 45.0,
212
  "eval_loss": 2.6182146072387695,
213
+ "eval_runtime": 3.0392,
214
+ "eval_samples_per_second": 679.464,
215
+ "eval_steps_per_second": 42.775,
216
  "step": 36000
217
  },
218
  {
219
  "epoch": 47.5,
220
  "eval_loss": 2.561305046081543,
221
+ "eval_runtime": 2.9856,
222
+ "eval_samples_per_second": 691.642,
223
+ "eval_steps_per_second": 43.542,
224
  "step": 38000
225
  },
226
  {
 
233
  {
234
  "epoch": 50.0,
235
  "eval_loss": 2.4945614337921143,
236
+ "eval_runtime": 3.0785,
237
+ "eval_samples_per_second": 670.787,
238
+ "eval_steps_per_second": 42.229,
239
  "step": 40000
240
  },
241
  {
242
  "epoch": 52.5,
243
  "eval_loss": 2.4196503162384033,
244
+ "eval_runtime": 3.1293,
245
+ "eval_samples_per_second": 659.901,
246
+ "eval_steps_per_second": 41.543,
247
  "step": 42000
248
  },
249
  {
 
256
  {
257
  "epoch": 55.0,
258
  "eval_loss": 2.3687477111816406,
259
+ "eval_runtime": 3.0418,
260
+ "eval_samples_per_second": 678.883,
261
+ "eval_steps_per_second": 42.738,
262
  "step": 44000
263
  },
264
  {
265
  "epoch": 57.5,
266
  "eval_loss": 2.2801854610443115,
267
+ "eval_runtime": 2.562,
268
+ "eval_samples_per_second": 805.998,
269
+ "eval_steps_per_second": 50.741,
270
  "step": 46000
271
  },
272
  {
 
279
  {
280
  "epoch": 60.0,
281
  "eval_loss": 2.262115478515625,
282
+ "eval_runtime": 1.9284,
283
+ "eval_samples_per_second": 1070.809,
284
+ "eval_steps_per_second": 67.412,
285
  "step": 48000
286
  },
287
  {
288
  "epoch": 62.5,
289
  "eval_loss": 2.2170372009277344,
290
+ "eval_runtime": 1.6322,
291
+ "eval_samples_per_second": 1265.182,
292
+ "eval_steps_per_second": 79.648,
293
  "step": 50000
294
  },
295
  {
 
302
  {
303
  "epoch": 65.0,
304
  "eval_loss": 2.1907379627227783,
305
+ "eval_runtime": 1.7,
306
+ "eval_samples_per_second": 1214.702,
307
+ "eval_steps_per_second": 76.47,
308
  "step": 52000
309
  },
310
  {
311
  "epoch": 67.5,
312
  "eval_loss": 2.1659305095672607,
313
+ "eval_runtime": 1.8055,
314
+ "eval_samples_per_second": 1143.712,
315
+ "eval_steps_per_second": 72.001,
316
  "step": 54000
317
  },
318
  {
 
325
  {
326
  "epoch": 70.0,
327
  "eval_loss": 2.127293825149536,
328
+ "eval_runtime": 1.7543,
329
+ "eval_samples_per_second": 1177.082,
330
+ "eval_steps_per_second": 74.102,
331
  "step": 56000
332
  }
333
  ],