fpadovani commited on
Commit
90dd085
·
verified ·
1 Parent(s): d99481c

Training in progress, step 88000, checkpoint

Browse files
Files changed (1) hide show
  1. checkpoint-88000/trainer_state.json +132 -132
checkpoint-88000/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
- "eval_runtime": 1.6226,
15
- "eval_samples_per_second": 1272.646,
16
- "eval_steps_per_second": 80.118,
17
  "step": 2000
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
- "eval_runtime": 1.6382,
30
- "eval_samples_per_second": 1260.5,
31
- "eval_steps_per_second": 79.353,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
- "eval_runtime": 3.0373,
38
- "eval_samples_per_second": 679.889,
39
- "eval_steps_per_second": 42.802,
40
  "step": 6000
41
  },
42
  {
@@ -49,17 +49,17 @@
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
- "eval_runtime": 3.0316,
53
- "eval_samples_per_second": 681.153,
54
- "eval_steps_per_second": 42.881,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
- "eval_runtime": 3.1272,
61
- "eval_samples_per_second": 660.333,
62
- "eval_steps_per_second": 41.571,
63
  "step": 10000
64
  },
65
  {
@@ -72,17 +72,17 @@
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
- "eval_runtime": 3.0078,
76
- "eval_samples_per_second": 686.558,
77
- "eval_steps_per_second": 43.222,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
- "eval_runtime": 3.0343,
84
- "eval_samples_per_second": 680.548,
85
- "eval_steps_per_second": 42.843,
86
  "step": 14000
87
  },
88
  {
@@ -95,17 +95,17 @@
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
- "eval_runtime": 3.0113,
99
- "eval_samples_per_second": 685.757,
100
- "eval_steps_per_second": 43.171,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
- "eval_runtime": 3.0569,
107
- "eval_samples_per_second": 675.527,
108
- "eval_steps_per_second": 42.527,
109
  "step": 18000
110
  },
111
  {
@@ -118,17 +118,17 @@
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
- "eval_runtime": 2.8975,
122
- "eval_samples_per_second": 712.671,
123
- "eval_steps_per_second": 44.865,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
- "eval_runtime": 3.0834,
130
- "eval_samples_per_second": 669.706,
131
- "eval_steps_per_second": 42.161,
132
  "step": 22000
133
  },
134
  {
@@ -141,17 +141,17 @@
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
- "eval_runtime": 2.6664,
145
- "eval_samples_per_second": 774.461,
146
- "eval_steps_per_second": 48.755,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
- "eval_runtime": 2.5685,
153
- "eval_samples_per_second": 803.957,
154
- "eval_steps_per_second": 50.612,
155
  "step": 26000
156
  },
157
  {
@@ -164,17 +164,17 @@
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
- "eval_runtime": 2.8723,
168
- "eval_samples_per_second": 718.927,
169
- "eval_steps_per_second": 45.259,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
- "eval_runtime": 2.9312,
176
- "eval_samples_per_second": 704.488,
177
- "eval_steps_per_second": 44.35,
178
  "step": 30000
179
  },
180
  {
@@ -187,17 +187,17 @@
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
- "eval_runtime": 3.0495,
191
- "eval_samples_per_second": 677.15,
192
- "eval_steps_per_second": 42.629,
193
  "step": 32000
194
  },
195
  {
196
  "epoch": 42.5,
197
  "eval_loss": 2.733860731124878,
198
- "eval_runtime": 2.9214,
199
- "eval_samples_per_second": 706.846,
200
- "eval_steps_per_second": 44.499,
201
  "step": 34000
202
  },
203
  {
@@ -210,17 +210,17 @@
210
  {
211
  "epoch": 45.0,
212
  "eval_loss": 2.6182146072387695,
213
- "eval_runtime": 3.223,
214
- "eval_samples_per_second": 640.711,
215
- "eval_steps_per_second": 40.335,
216
  "step": 36000
217
  },
218
  {
219
  "epoch": 47.5,
220
  "eval_loss": 2.561305046081543,
221
- "eval_runtime": 3.1317,
222
- "eval_samples_per_second": 659.384,
223
- "eval_steps_per_second": 41.511,
224
  "step": 38000
225
  },
226
  {
@@ -233,17 +233,17 @@
233
  {
234
  "epoch": 50.0,
235
  "eval_loss": 2.4945614337921143,
236
- "eval_runtime": 3.1343,
237
- "eval_samples_per_second": 658.831,
238
- "eval_steps_per_second": 41.476,
239
  "step": 40000
240
  },
241
  {
242
  "epoch": 52.5,
243
  "eval_loss": 2.4196503162384033,
244
- "eval_runtime": 3.0349,
245
- "eval_samples_per_second": 680.411,
246
- "eval_steps_per_second": 42.835,
247
  "step": 42000
248
  },
249
  {
@@ -256,17 +256,17 @@
256
  {
257
  "epoch": 55.0,
258
  "eval_loss": 2.3687477111816406,
259
- "eval_runtime": 3.0804,
260
- "eval_samples_per_second": 670.376,
261
- "eval_steps_per_second": 42.203,
262
  "step": 44000
263
  },
264
  {
265
  "epoch": 57.5,
266
  "eval_loss": 2.2801854610443115,
267
- "eval_runtime": 3.039,
268
- "eval_samples_per_second": 679.507,
269
- "eval_steps_per_second": 42.778,
270
  "step": 46000
271
  },
272
  {
@@ -279,17 +279,17 @@
279
  {
280
  "epoch": 60.0,
281
  "eval_loss": 2.262115478515625,
282
- "eval_runtime": 3.1233,
283
- "eval_samples_per_second": 661.165,
284
- "eval_steps_per_second": 41.623,
285
  "step": 48000
286
  },
287
  {
288
  "epoch": 62.5,
289
  "eval_loss": 2.2170372009277344,
290
- "eval_runtime": 3.1043,
291
- "eval_samples_per_second": 665.2,
292
- "eval_steps_per_second": 41.877,
293
  "step": 50000
294
  },
295
  {
@@ -302,17 +302,17 @@
302
  {
303
  "epoch": 65.0,
304
  "eval_loss": 2.1907379627227783,
305
- "eval_runtime": 3.0605,
306
- "eval_samples_per_second": 674.72,
307
- "eval_steps_per_second": 42.476,
308
  "step": 52000
309
  },
310
  {
311
  "epoch": 67.5,
312
  "eval_loss": 2.1659305095672607,
313
- "eval_runtime": 3.11,
314
- "eval_samples_per_second": 663.989,
315
- "eval_steps_per_second": 41.801,
316
  "step": 54000
317
  },
318
  {
@@ -325,17 +325,17 @@
325
  {
326
  "epoch": 70.0,
327
  "eval_loss": 2.127293825149536,
328
- "eval_runtime": 3.0456,
329
- "eval_samples_per_second": 678.031,
330
- "eval_steps_per_second": 42.685,
331
  "step": 56000
332
  },
333
  {
334
  "epoch": 72.5,
335
  "eval_loss": 2.087448835372925,
336
- "eval_runtime": 3.052,
337
- "eval_samples_per_second": 676.616,
338
- "eval_steps_per_second": 42.596,
339
  "step": 58000
340
  },
341
  {
@@ -348,17 +348,17 @@
348
  {
349
  "epoch": 75.0,
350
  "eval_loss": 2.0742504596710205,
351
- "eval_runtime": 3.3514,
352
- "eval_samples_per_second": 616.163,
353
- "eval_steps_per_second": 38.79,
354
  "step": 60000
355
  },
356
  {
357
  "epoch": 77.5,
358
  "eval_loss": 2.0582902431488037,
359
- "eval_runtime": 2.6814,
360
- "eval_samples_per_second": 770.12,
361
- "eval_steps_per_second": 48.482,
362
  "step": 62000
363
  },
364
  {
@@ -371,17 +371,17 @@
371
  {
372
  "epoch": 80.0,
373
  "eval_loss": 2.037095785140991,
374
- "eval_runtime": 1.6551,
375
- "eval_samples_per_second": 1247.667,
376
- "eval_steps_per_second": 78.546,
377
  "step": 64000
378
  },
379
  {
380
  "epoch": 82.5,
381
  "eval_loss": 2.0038888454437256,
382
- "eval_runtime": 1.7042,
383
- "eval_samples_per_second": 1211.685,
384
- "eval_steps_per_second": 76.28,
385
  "step": 66000
386
  },
387
  {
@@ -394,17 +394,17 @@
394
  {
395
  "epoch": 85.0,
396
  "eval_loss": 1.9900726079940796,
397
- "eval_runtime": 1.872,
398
- "eval_samples_per_second": 1103.097,
399
- "eval_steps_per_second": 69.444,
400
  "step": 68000
401
  },
402
  {
403
  "epoch": 87.5,
404
  "eval_loss": 1.9754005670547485,
405
- "eval_runtime": 1.6651,
406
- "eval_samples_per_second": 1240.19,
407
- "eval_steps_per_second": 78.075,
408
  "step": 70000
409
  },
410
  {
@@ -417,17 +417,17 @@
417
  {
418
  "epoch": 90.0,
419
  "eval_loss": 1.9565609693527222,
420
- "eval_runtime": 1.6688,
421
- "eval_samples_per_second": 1237.389,
422
- "eval_steps_per_second": 77.899,
423
  "step": 72000
424
  },
425
  {
426
  "epoch": 92.5,
427
  "eval_loss": 1.940429449081421,
428
- "eval_runtime": 1.67,
429
- "eval_samples_per_second": 1236.495,
430
- "eval_steps_per_second": 77.842,
431
  "step": 74000
432
  },
433
  {
@@ -440,17 +440,17 @@
440
  {
441
  "epoch": 95.0,
442
  "eval_loss": 1.9128267765045166,
443
- "eval_runtime": 1.638,
444
- "eval_samples_per_second": 1260.669,
445
- "eval_steps_per_second": 79.364,
446
  "step": 76000
447
  },
448
  {
449
  "epoch": 97.5,
450
  "eval_loss": 1.939635157585144,
451
- "eval_runtime": 1.7065,
452
- "eval_samples_per_second": 1210.111,
453
- "eval_steps_per_second": 76.181,
454
  "step": 78000
455
  },
456
  {
@@ -463,17 +463,17 @@
463
  {
464
  "epoch": 100.0,
465
  "eval_loss": 1.959123134613037,
466
- "eval_runtime": 1.7328,
467
- "eval_samples_per_second": 1191.745,
468
- "eval_steps_per_second": 75.025,
469
  "step": 80000
470
  },
471
  {
472
  "epoch": 102.5,
473
  "eval_loss": 1.9078463315963745,
474
- "eval_runtime": 1.6402,
475
- "eval_samples_per_second": 1258.997,
476
- "eval_steps_per_second": 79.259,
477
  "step": 82000
478
  },
479
  {
@@ -486,17 +486,17 @@
486
  {
487
  "epoch": 105.0,
488
  "eval_loss": 1.9178215265274048,
489
- "eval_runtime": 1.5958,
490
- "eval_samples_per_second": 1293.982,
491
- "eval_steps_per_second": 81.461,
492
  "step": 84000
493
  },
494
  {
495
  "epoch": 107.5,
496
  "eval_loss": 1.9046084880828857,
497
- "eval_runtime": 3.0454,
498
- "eval_samples_per_second": 678.079,
499
- "eval_steps_per_second": 42.688,
500
  "step": 86000
501
  },
502
  {
@@ -509,9 +509,9 @@
509
  {
510
  "epoch": 110.0,
511
  "eval_loss": 1.8918195962905884,
512
- "eval_runtime": 3.034,
513
- "eval_samples_per_second": 680.628,
514
- "eval_steps_per_second": 42.848,
515
  "step": 88000
516
  }
517
  ],
 
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
+ "eval_runtime": 3.1848,
15
+ "eval_samples_per_second": 648.395,
16
+ "eval_steps_per_second": 40.819,
17
  "step": 2000
18
  },
19
  {
 
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
+ "eval_runtime": 3.0375,
30
+ "eval_samples_per_second": 679.836,
31
+ "eval_steps_per_second": 42.798,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
+ "eval_runtime": 2.9574,
38
+ "eval_samples_per_second": 698.238,
39
+ "eval_steps_per_second": 43.957,
40
  "step": 6000
41
  },
42
  {
 
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
+ "eval_runtime": 2.5747,
53
+ "eval_samples_per_second": 802.048,
54
+ "eval_steps_per_second": 50.492,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
+ "eval_runtime": 1.618,
61
+ "eval_samples_per_second": 1276.247,
62
+ "eval_steps_per_second": 80.345,
63
  "step": 10000
64
  },
65
  {
 
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
+ "eval_runtime": 2.872,
76
+ "eval_samples_per_second": 719.021,
77
+ "eval_steps_per_second": 45.265,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
+ "eval_runtime": 3.0079,
84
+ "eval_samples_per_second": 686.536,
85
+ "eval_steps_per_second": 43.22,
86
  "step": 14000
87
  },
88
  {
 
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
+ "eval_runtime": 2.9966,
99
+ "eval_samples_per_second": 689.104,
100
+ "eval_steps_per_second": 43.382,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
+ "eval_runtime": 3.0264,
107
+ "eval_samples_per_second": 682.332,
108
+ "eval_steps_per_second": 42.955,
109
  "step": 18000
110
  },
111
  {
 
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
+ "eval_runtime": 3.0403,
122
+ "eval_samples_per_second": 679.22,
123
+ "eval_steps_per_second": 42.76,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
+ "eval_runtime": 2.9951,
130
+ "eval_samples_per_second": 689.451,
131
+ "eval_steps_per_second": 43.404,
132
  "step": 22000
133
  },
134
  {
 
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
+ "eval_runtime": 2.4346,
145
+ "eval_samples_per_second": 848.205,
146
+ "eval_steps_per_second": 53.398,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
+ "eval_runtime": 2.8198,
153
+ "eval_samples_per_second": 732.313,
154
+ "eval_steps_per_second": 46.102,
155
  "step": 26000
156
  },
157
  {
 
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
+ "eval_runtime": 2.8452,
168
+ "eval_samples_per_second": 725.772,
169
+ "eval_steps_per_second": 45.69,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
+ "eval_runtime": 3.0576,
176
+ "eval_samples_per_second": 675.356,
177
+ "eval_steps_per_second": 42.516,
178
  "step": 30000
179
  },
180
  {
 
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
+ "eval_runtime": 3.0048,
191
+ "eval_samples_per_second": 687.232,
192
+ "eval_steps_per_second": 43.264,
193
  "step": 32000
194
  },
195
  {
196
  "epoch": 42.5,
197
  "eval_loss": 2.733860731124878,
198
+ "eval_runtime": 3.0203,
199
+ "eval_samples_per_second": 683.699,
200
+ "eval_steps_per_second": 43.042,
201
  "step": 34000
202
  },
203
  {
 
210
  {
211
  "epoch": 45.0,
212
  "eval_loss": 2.6182146072387695,
213
+ "eval_runtime": 3.0392,
214
+ "eval_samples_per_second": 679.464,
215
+ "eval_steps_per_second": 42.775,
216
  "step": 36000
217
  },
218
  {
219
  "epoch": 47.5,
220
  "eval_loss": 2.561305046081543,
221
+ "eval_runtime": 2.9856,
222
+ "eval_samples_per_second": 691.642,
223
+ "eval_steps_per_second": 43.542,
224
  "step": 38000
225
  },
226
  {
 
233
  {
234
  "epoch": 50.0,
235
  "eval_loss": 2.4945614337921143,
236
+ "eval_runtime": 3.0785,
237
+ "eval_samples_per_second": 670.787,
238
+ "eval_steps_per_second": 42.229,
239
  "step": 40000
240
  },
241
  {
242
  "epoch": 52.5,
243
  "eval_loss": 2.4196503162384033,
244
+ "eval_runtime": 3.1293,
245
+ "eval_samples_per_second": 659.901,
246
+ "eval_steps_per_second": 41.543,
247
  "step": 42000
248
  },
249
  {
 
256
  {
257
  "epoch": 55.0,
258
  "eval_loss": 2.3687477111816406,
259
+ "eval_runtime": 3.0418,
260
+ "eval_samples_per_second": 678.883,
261
+ "eval_steps_per_second": 42.738,
262
  "step": 44000
263
  },
264
  {
265
  "epoch": 57.5,
266
  "eval_loss": 2.2801854610443115,
267
+ "eval_runtime": 2.562,
268
+ "eval_samples_per_second": 805.998,
269
+ "eval_steps_per_second": 50.741,
270
  "step": 46000
271
  },
272
  {
 
279
  {
280
  "epoch": 60.0,
281
  "eval_loss": 2.262115478515625,
282
+ "eval_runtime": 1.9284,
283
+ "eval_samples_per_second": 1070.809,
284
+ "eval_steps_per_second": 67.412,
285
  "step": 48000
286
  },
287
  {
288
  "epoch": 62.5,
289
  "eval_loss": 2.2170372009277344,
290
+ "eval_runtime": 1.6322,
291
+ "eval_samples_per_second": 1265.182,
292
+ "eval_steps_per_second": 79.648,
293
  "step": 50000
294
  },
295
  {
 
302
  {
303
  "epoch": 65.0,
304
  "eval_loss": 2.1907379627227783,
305
+ "eval_runtime": 1.7,
306
+ "eval_samples_per_second": 1214.702,
307
+ "eval_steps_per_second": 76.47,
308
  "step": 52000
309
  },
310
  {
311
  "epoch": 67.5,
312
  "eval_loss": 2.1659305095672607,
313
+ "eval_runtime": 1.8055,
314
+ "eval_samples_per_second": 1143.712,
315
+ "eval_steps_per_second": 72.001,
316
  "step": 54000
317
  },
318
  {
 
325
  {
326
  "epoch": 70.0,
327
  "eval_loss": 2.127293825149536,
328
+ "eval_runtime": 1.7543,
329
+ "eval_samples_per_second": 1177.082,
330
+ "eval_steps_per_second": 74.102,
331
  "step": 56000
332
  },
333
  {
334
  "epoch": 72.5,
335
  "eval_loss": 2.087448835372925,
336
+ "eval_runtime": 1.7243,
337
+ "eval_samples_per_second": 1197.568,
338
+ "eval_steps_per_second": 75.392,
339
  "step": 58000
340
  },
341
  {
 
348
  {
349
  "epoch": 75.0,
350
  "eval_loss": 2.0742504596710205,
351
+ "eval_runtime": 1.7269,
352
+ "eval_samples_per_second": 1195.792,
353
+ "eval_steps_per_second": 75.28,
354
  "step": 60000
355
  },
356
  {
357
  "epoch": 77.5,
358
  "eval_loss": 2.0582902431488037,
359
+ "eval_runtime": 1.7674,
360
+ "eval_samples_per_second": 1168.411,
361
+ "eval_steps_per_second": 73.556,
362
  "step": 62000
363
  },
364
  {
 
371
  {
372
  "epoch": 80.0,
373
  "eval_loss": 2.037095785140991,
374
+ "eval_runtime": 1.6339,
375
+ "eval_samples_per_second": 1263.872,
376
+ "eval_steps_per_second": 79.566,
377
  "step": 64000
378
  },
379
  {
380
  "epoch": 82.5,
381
  "eval_loss": 2.0038888454437256,
382
+ "eval_runtime": 1.6907,
383
+ "eval_samples_per_second": 1221.38,
384
+ "eval_steps_per_second": 76.891,
385
  "step": 66000
386
  },
387
  {
 
394
  {
395
  "epoch": 85.0,
396
  "eval_loss": 1.9900726079940796,
397
+ "eval_runtime": 1.6879,
398
+ "eval_samples_per_second": 1223.403,
399
+ "eval_steps_per_second": 77.018,
400
  "step": 68000
401
  },
402
  {
403
  "epoch": 87.5,
404
  "eval_loss": 1.9754005670547485,
405
+ "eval_runtime": 1.7314,
406
+ "eval_samples_per_second": 1192.693,
407
+ "eval_steps_per_second": 75.085,
408
  "step": 70000
409
  },
410
  {
 
417
  {
418
  "epoch": 90.0,
419
  "eval_loss": 1.9565609693527222,
420
+ "eval_runtime": 1.8855,
421
+ "eval_samples_per_second": 1095.221,
422
+ "eval_steps_per_second": 68.949,
423
  "step": 72000
424
  },
425
  {
426
  "epoch": 92.5,
427
  "eval_loss": 1.940429449081421,
428
+ "eval_runtime": 1.7766,
429
+ "eval_samples_per_second": 1162.312,
430
+ "eval_steps_per_second": 73.172,
431
  "step": 74000
432
  },
433
  {
 
440
  {
441
  "epoch": 95.0,
442
  "eval_loss": 1.9128267765045166,
443
+ "eval_runtime": 1.8509,
444
+ "eval_samples_per_second": 1115.674,
445
+ "eval_steps_per_second": 70.236,
446
  "step": 76000
447
  },
448
  {
449
  "epoch": 97.5,
450
  "eval_loss": 1.939635157585144,
451
+ "eval_runtime": 1.7798,
452
+ "eval_samples_per_second": 1160.26,
453
+ "eval_steps_per_second": 73.043,
454
  "step": 78000
455
  },
456
  {
 
463
  {
464
  "epoch": 100.0,
465
  "eval_loss": 1.959123134613037,
466
+ "eval_runtime": 1.6637,
467
+ "eval_samples_per_second": 1241.231,
468
+ "eval_steps_per_second": 78.14,
469
  "step": 80000
470
  },
471
  {
472
  "epoch": 102.5,
473
  "eval_loss": 1.9078463315963745,
474
+ "eval_runtime": 1.6559,
475
+ "eval_samples_per_second": 1247.034,
476
+ "eval_steps_per_second": 78.506,
477
  "step": 82000
478
  },
479
  {
 
486
  {
487
  "epoch": 105.0,
488
  "eval_loss": 1.9178215265274048,
489
+ "eval_runtime": 1.7131,
490
+ "eval_samples_per_second": 1205.385,
491
+ "eval_steps_per_second": 75.884,
492
  "step": 84000
493
  },
494
  {
495
  "epoch": 107.5,
496
  "eval_loss": 1.9046084880828857,
497
+ "eval_runtime": 1.7051,
498
+ "eval_samples_per_second": 1211.05,
499
+ "eval_steps_per_second": 76.24,
500
  "step": 86000
501
  },
502
  {
 
509
  {
510
  "epoch": 110.0,
511
  "eval_loss": 1.8918195962905884,
512
+ "eval_runtime": 1.7296,
513
+ "eval_samples_per_second": 1193.906,
514
+ "eval_steps_per_second": 75.161,
515
  "step": 88000
516
  }
517
  ],