alinerodrigues commited on
Commit
2e1cb14
·
1 Parent(s): ad8bf35

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +16 -0
  2. eval_results.json +10 -0
  3. train_results.json +9 -0
  4. trainer_state.json +649 -0
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.0,
3
+ "eval_cer": 0.02656322252479063,
4
+ "eval_loss": 0.1773720234632492,
5
+ "eval_runtime": 20.504,
6
+ "eval_samples": 312,
7
+ "eval_samples_per_second": 15.217,
8
+ "eval_steps_per_second": 1.902,
9
+ "eval_wer": 0.08439275087909115,
10
+ "total_flos": 8.489873279367732e+19,
11
+ "train_loss": 0.24954385395291484,
12
+ "train_runtime": 14687.2421,
13
+ "train_samples": 2517,
14
+ "train_samples_per_second": 17.137,
15
+ "train_steps_per_second": 0.538
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.0,
3
+ "eval_cer": 0.02656322252479063,
4
+ "eval_loss": 0.1773720234632492,
5
+ "eval_runtime": 20.504,
6
+ "eval_samples": 312,
7
+ "eval_samples_per_second": 15.217,
8
+ "eval_steps_per_second": 1.902,
9
+ "eval_wer": 0.08439275087909115
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.0,
3
+ "total_flos": 8.489873279367732e+19,
4
+ "train_loss": 0.24954385395291484,
5
+ "train_runtime": 14687.2421,
6
+ "train_samples": 2517,
7
+ "train_samples_per_second": 17.137,
8
+ "train_steps_per_second": 0.538
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1773720234632492,
3
+ "best_model_checkpoint": "wav2vec2-xlsr-1b-mecita-portuguese-all-text-a_coisa-os_morcegos/checkpoint-1738",
4
+ "epoch": 42.0,
5
+ "global_step": 3318,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 3e-05,
13
+ "loss": 25.5905,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_cer": 0.0734213636130093,
19
+ "eval_loss": 0.44948670268058777,
20
+ "eval_runtime": 19.9817,
21
+ "eval_samples_per_second": 15.614,
22
+ "eval_steps_per_second": 1.952,
23
+ "eval_wer": 0.25804706518799025,
24
+ "step": 79
25
+ },
26
+ {
27
+ "epoch": 1.27,
28
+ "learning_rate": 2.9631645569620254e-05,
29
+ "loss": 3.1482,
30
+ "step": 100
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_cer": 0.038020860093510765,
35
+ "eval_loss": 0.2479224056005478,
36
+ "eval_runtime": 19.8794,
37
+ "eval_samples_per_second": 15.695,
38
+ "eval_steps_per_second": 1.962,
39
+ "eval_wer": 0.12036786583716527,
40
+ "step": 158
41
+ },
42
+ {
43
+ "epoch": 2.53,
44
+ "learning_rate": 2.9251898734177216e-05,
45
+ "loss": 0.4247,
46
+ "step": 200
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_cer": 0.03447567178749422,
51
+ "eval_loss": 0.23473307490348816,
52
+ "eval_runtime": 19.9054,
53
+ "eval_samples_per_second": 15.674,
54
+ "eval_steps_per_second": 1.959,
55
+ "eval_wer": 0.10251555315120368,
56
+ "step": 237
57
+ },
58
+ {
59
+ "epoch": 3.8,
60
+ "learning_rate": 2.887215189873418e-05,
61
+ "loss": 0.3136,
62
+ "step": 300
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_cer": 0.032214971998150335,
67
+ "eval_loss": 0.2043968290090561,
68
+ "eval_runtime": 20.064,
69
+ "eval_samples_per_second": 15.55,
70
+ "eval_steps_per_second": 1.944,
71
+ "eval_wer": 0.10170408439275087,
72
+ "step": 316
73
+ },
74
+ {
75
+ "epoch": 5.0,
76
+ "eval_cer": 0.029594615424138108,
77
+ "eval_loss": 0.19060839712619781,
78
+ "eval_runtime": 19.9461,
79
+ "eval_samples_per_second": 15.642,
80
+ "eval_steps_per_second": 1.955,
81
+ "eval_wer": 0.09304841763592102,
82
+ "step": 395
83
+ },
84
+ {
85
+ "epoch": 5.06,
86
+ "learning_rate": 2.849240506329114e-05,
87
+ "loss": 0.2985,
88
+ "step": 400
89
+ },
90
+ {
91
+ "epoch": 6.0,
92
+ "eval_cer": 0.031084622103478395,
93
+ "eval_loss": 0.2050233781337738,
94
+ "eval_runtime": 20.0744,
95
+ "eval_samples_per_second": 15.542,
96
+ "eval_steps_per_second": 1.943,
97
+ "eval_wer": 0.09629429266973222,
98
+ "step": 474
99
+ },
100
+ {
101
+ "epoch": 6.33,
102
+ "learning_rate": 2.8112658227848103e-05,
103
+ "loss": 0.2413,
104
+ "step": 500
105
+ },
106
+ {
107
+ "epoch": 7.0,
108
+ "eval_cer": 0.030879103940810768,
109
+ "eval_loss": 0.20249603688716888,
110
+ "eval_runtime": 20.1379,
111
+ "eval_samples_per_second": 15.493,
112
+ "eval_steps_per_second": 1.937,
113
+ "eval_wer": 0.09710576142818501,
114
+ "step": 553
115
+ },
116
+ {
117
+ "epoch": 7.59,
118
+ "learning_rate": 2.7732911392405062e-05,
119
+ "loss": 0.2267,
120
+ "step": 600
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "eval_cer": 0.02913219955813595,
125
+ "eval_loss": 0.20055869221687317,
126
+ "eval_runtime": 19.8715,
127
+ "eval_samples_per_second": 15.701,
128
+ "eval_steps_per_second": 1.963,
129
+ "eval_wer": 0.08845009467135516,
130
+ "step": 632
131
+ },
132
+ {
133
+ "epoch": 8.86,
134
+ "learning_rate": 2.7353164556962024e-05,
135
+ "loss": 0.224,
136
+ "step": 700
137
+ },
138
+ {
139
+ "epoch": 9.0,
140
+ "eval_cer": 0.029080820017469044,
141
+ "eval_loss": 0.1990896761417389,
142
+ "eval_runtime": 19.8842,
143
+ "eval_samples_per_second": 15.691,
144
+ "eval_steps_per_second": 1.961,
145
+ "eval_wer": 0.09169596970516636,
146
+ "step": 711
147
+ },
148
+ {
149
+ "epoch": 10.0,
150
+ "eval_cer": 0.02810460874479782,
151
+ "eval_loss": 0.1880808174610138,
152
+ "eval_runtime": 19.8781,
153
+ "eval_samples_per_second": 15.696,
154
+ "eval_steps_per_second": 1.962,
155
+ "eval_wer": 0.08845009467135516,
156
+ "step": 790
157
+ },
158
+ {
159
+ "epoch": 10.13,
160
+ "learning_rate": 2.697341772151899e-05,
161
+ "loss": 0.1864,
162
+ "step": 800
163
+ },
164
+ {
165
+ "epoch": 11.0,
166
+ "eval_cer": 0.02784771104146329,
167
+ "eval_loss": 0.18414445221424103,
168
+ "eval_runtime": 20.0151,
169
+ "eval_samples_per_second": 15.588,
170
+ "eval_steps_per_second": 1.949,
171
+ "eval_wer": 0.08926156342980796,
172
+ "step": 869
173
+ },
174
+ {
175
+ "epoch": 11.39,
176
+ "learning_rate": 2.659367088607595e-05,
177
+ "loss": 0.1951,
178
+ "step": 900
179
+ },
180
+ {
181
+ "epoch": 12.0,
182
+ "eval_cer": 0.028155988285464728,
183
+ "eval_loss": 0.18085478246212006,
184
+ "eval_runtime": 20.0956,
185
+ "eval_samples_per_second": 15.526,
186
+ "eval_steps_per_second": 1.941,
187
+ "eval_wer": 0.08953205301595889,
188
+ "step": 948
189
+ },
190
+ {
191
+ "epoch": 12.66,
192
+ "learning_rate": 2.621392405063291e-05,
193
+ "loss": 0.1794,
194
+ "step": 1000
195
+ },
196
+ {
197
+ "epoch": 13.0,
198
+ "eval_cer": 0.028001849663464008,
199
+ "eval_loss": 0.19231769442558289,
200
+ "eval_runtime": 20.0602,
201
+ "eval_samples_per_second": 15.553,
202
+ "eval_steps_per_second": 1.944,
203
+ "eval_wer": 0.08331079253448742,
204
+ "step": 1027
205
+ },
206
+ {
207
+ "epoch": 13.92,
208
+ "learning_rate": 2.5834177215189873e-05,
209
+ "loss": 0.1621,
210
+ "step": 1100
211
+ },
212
+ {
213
+ "epoch": 14.0,
214
+ "eval_cer": 0.027744951960129476,
215
+ "eval_loss": 0.19489158689975739,
216
+ "eval_runtime": 20.0375,
217
+ "eval_samples_per_second": 15.571,
218
+ "eval_steps_per_second": 1.946,
219
+ "eval_wer": 0.08574519880984582,
220
+ "step": 1106
221
+ },
222
+ {
223
+ "epoch": 15.0,
224
+ "eval_cer": 0.02656322252479063,
225
+ "eval_loss": 0.19287976622581482,
226
+ "eval_runtime": 20.0101,
227
+ "eval_samples_per_second": 15.592,
228
+ "eval_steps_per_second": 1.949,
229
+ "eval_wer": 0.08168785501758183,
230
+ "step": 1185
231
+ },
232
+ {
233
+ "epoch": 15.19,
234
+ "learning_rate": 2.5454430379746836e-05,
235
+ "loss": 0.1695,
236
+ "step": 1200
237
+ },
238
+ {
239
+ "epoch": 16.0,
240
+ "eval_cer": 0.02697425885012588,
241
+ "eval_loss": 0.19071535766124725,
242
+ "eval_runtime": 20.1621,
243
+ "eval_samples_per_second": 15.475,
244
+ "eval_steps_per_second": 1.934,
245
+ "eval_wer": 0.08385177170678929,
246
+ "step": 1264
247
+ },
248
+ {
249
+ "epoch": 16.46,
250
+ "learning_rate": 2.5074683544303798e-05,
251
+ "loss": 0.1528,
252
+ "step": 1300
253
+ },
254
+ {
255
+ "epoch": 17.0,
256
+ "eval_cer": 0.02856702461079998,
257
+ "eval_loss": 0.18394820392131805,
258
+ "eval_runtime": 19.8761,
259
+ "eval_samples_per_second": 15.697,
260
+ "eval_steps_per_second": 1.962,
261
+ "eval_wer": 0.09061401136056262,
262
+ "step": 1343
263
+ },
264
+ {
265
+ "epoch": 17.72,
266
+ "learning_rate": 2.469493670886076e-05,
267
+ "loss": 0.1592,
268
+ "step": 1400
269
+ },
270
+ {
271
+ "epoch": 18.0,
272
+ "eval_cer": 0.02810460874479782,
273
+ "eval_loss": 0.18656496703624725,
274
+ "eval_runtime": 20.0039,
275
+ "eval_samples_per_second": 15.597,
276
+ "eval_steps_per_second": 1.95,
277
+ "eval_wer": 0.09034352177441168,
278
+ "step": 1422
279
+ },
280
+ {
281
+ "epoch": 18.99,
282
+ "learning_rate": 2.4315189873417722e-05,
283
+ "loss": 0.1519,
284
+ "step": 1500
285
+ },
286
+ {
287
+ "epoch": 19.0,
288
+ "eval_cer": 0.027488054256794944,
289
+ "eval_loss": 0.20305851101875305,
290
+ "eval_runtime": 20.3338,
291
+ "eval_samples_per_second": 15.344,
292
+ "eval_steps_per_second": 1.918,
293
+ "eval_wer": 0.08574519880984582,
294
+ "step": 1501
295
+ },
296
+ {
297
+ "epoch": 20.0,
298
+ "eval_cer": 0.027796331500796384,
299
+ "eval_loss": 0.19475489854812622,
300
+ "eval_runtime": 19.9135,
301
+ "eval_samples_per_second": 15.668,
302
+ "eval_steps_per_second": 1.958,
303
+ "eval_wer": 0.08601568839599676,
304
+ "step": 1580
305
+ },
306
+ {
307
+ "epoch": 20.25,
308
+ "learning_rate": 2.3935443037974685e-05,
309
+ "loss": 0.1257,
310
+ "step": 1600
311
+ },
312
+ {
313
+ "epoch": 21.0,
314
+ "eval_cer": 0.026152186199455377,
315
+ "eval_loss": 0.18495479226112366,
316
+ "eval_runtime": 19.994,
317
+ "eval_samples_per_second": 15.605,
318
+ "eval_steps_per_second": 1.951,
319
+ "eval_wer": 0.08601568839599676,
320
+ "step": 1659
321
+ },
322
+ {
323
+ "epoch": 21.52,
324
+ "learning_rate": 2.3555696202531647e-05,
325
+ "loss": 0.1288,
326
+ "step": 1700
327
+ },
328
+ {
329
+ "epoch": 22.0,
330
+ "eval_cer": 0.02656322252479063,
331
+ "eval_loss": 0.1773720234632492,
332
+ "eval_runtime": 19.9911,
333
+ "eval_samples_per_second": 15.607,
334
+ "eval_steps_per_second": 1.951,
335
+ "eval_wer": 0.08439275087909115,
336
+ "step": 1738
337
+ },
338
+ {
339
+ "epoch": 22.78,
340
+ "learning_rate": 2.317594936708861e-05,
341
+ "loss": 0.115,
342
+ "step": 1800
343
+ },
344
+ {
345
+ "epoch": 23.0,
346
+ "eval_cer": 0.02651184298412372,
347
+ "eval_loss": 0.19595499336719513,
348
+ "eval_runtime": 19.9245,
349
+ "eval_samples_per_second": 15.659,
350
+ "eval_steps_per_second": 1.957,
351
+ "eval_wer": 0.08439275087909115,
352
+ "step": 1817
353
+ },
354
+ {
355
+ "epoch": 24.0,
356
+ "eval_cer": 0.025843908955453937,
357
+ "eval_loss": 0.18322429060935974,
358
+ "eval_runtime": 19.9298,
359
+ "eval_samples_per_second": 15.655,
360
+ "eval_steps_per_second": 1.957,
361
+ "eval_wer": 0.08249932377603462,
362
+ "step": 1896
363
+ },
364
+ {
365
+ "epoch": 24.05,
366
+ "learning_rate": 2.2796202531645568e-05,
367
+ "loss": 0.1223,
368
+ "step": 1900
369
+ },
370
+ {
371
+ "epoch": 25.0,
372
+ "eval_cer": 0.02610080665878847,
373
+ "eval_loss": 0.19195543229579926,
374
+ "eval_runtime": 20.0272,
375
+ "eval_samples_per_second": 15.579,
376
+ "eval_steps_per_second": 1.947,
377
+ "eval_wer": 0.08276981336218556,
378
+ "step": 1975
379
+ },
380
+ {
381
+ "epoch": 25.32,
382
+ "learning_rate": 2.2416455696202534e-05,
383
+ "loss": 0.1175,
384
+ "step": 2000
385
+ },
386
+ {
387
+ "epoch": 26.0,
388
+ "eval_cer": 0.026049427118121565,
389
+ "eval_loss": 0.1951374113559723,
390
+ "eval_runtime": 20.0587,
391
+ "eval_samples_per_second": 15.554,
392
+ "eval_steps_per_second": 1.944,
393
+ "eval_wer": 0.08033540708682715,
394
+ "step": 2054
395
+ },
396
+ {
397
+ "epoch": 26.58,
398
+ "learning_rate": 2.2036708860759496e-05,
399
+ "loss": 0.1051,
400
+ "step": 2100
401
+ },
402
+ {
403
+ "epoch": 27.0,
404
+ "eval_cer": 0.02656322252479063,
405
+ "eval_loss": 0.1995711475610733,
406
+ "eval_runtime": 20.0348,
407
+ "eval_samples_per_second": 15.573,
408
+ "eval_steps_per_second": 1.947,
409
+ "eval_wer": 0.08249932377603462,
410
+ "step": 2133
411
+ },
412
+ {
413
+ "epoch": 27.85,
414
+ "learning_rate": 2.1656962025316455e-05,
415
+ "loss": 0.1033,
416
+ "step": 2200
417
+ },
418
+ {
419
+ "epoch": 28.0,
420
+ "eval_cer": 0.027385295175461132,
421
+ "eval_loss": 0.21518437564373016,
422
+ "eval_runtime": 20.185,
423
+ "eval_samples_per_second": 15.457,
424
+ "eval_steps_per_second": 1.932,
425
+ "eval_wer": 0.08466324046524208,
426
+ "step": 2212
427
+ },
428
+ {
429
+ "epoch": 29.0,
430
+ "eval_cer": 0.02769357241946257,
431
+ "eval_loss": 0.2082461565732956,
432
+ "eval_runtime": 20.1321,
433
+ "eval_samples_per_second": 15.498,
434
+ "eval_steps_per_second": 1.937,
435
+ "eval_wer": 0.08790911549905328,
436
+ "step": 2291
437
+ },
438
+ {
439
+ "epoch": 29.11,
440
+ "learning_rate": 2.127721518987342e-05,
441
+ "loss": 0.0961,
442
+ "step": 2300
443
+ },
444
+ {
445
+ "epoch": 30.0,
446
+ "eval_cer": 0.027385295175461132,
447
+ "eval_loss": 0.2153148353099823,
448
+ "eval_runtime": 20.1045,
449
+ "eval_samples_per_second": 15.519,
450
+ "eval_steps_per_second": 1.94,
451
+ "eval_wer": 0.08547470922369489,
452
+ "step": 2370
453
+ },
454
+ {
455
+ "epoch": 30.38,
456
+ "learning_rate": 2.089746835443038e-05,
457
+ "loss": 0.1003,
458
+ "step": 2400
459
+ },
460
+ {
461
+ "epoch": 31.0,
462
+ "eval_cer": 0.028772542773467604,
463
+ "eval_loss": 0.2043757140636444,
464
+ "eval_runtime": 20.1868,
465
+ "eval_samples_per_second": 15.456,
466
+ "eval_steps_per_second": 1.932,
467
+ "eval_wer": 0.09034352177441168,
468
+ "step": 2449
469
+ },
470
+ {
471
+ "epoch": 31.65,
472
+ "learning_rate": 2.0517721518987342e-05,
473
+ "loss": 0.1129,
474
+ "step": 2500
475
+ },
476
+ {
477
+ "epoch": 32.0,
478
+ "eval_cer": 0.026768740687458253,
479
+ "eval_loss": 0.20504404604434967,
480
+ "eval_runtime": 20.2156,
481
+ "eval_samples_per_second": 15.434,
482
+ "eval_steps_per_second": 1.929,
483
+ "eval_wer": 0.08547470922369489,
484
+ "step": 2528
485
+ },
486
+ {
487
+ "epoch": 32.91,
488
+ "learning_rate": 2.0137974683544304e-05,
489
+ "loss": 0.0939,
490
+ "step": 2600
491
+ },
492
+ {
493
+ "epoch": 33.0,
494
+ "eval_cer": 0.0271283974721266,
495
+ "eval_loss": 0.20279404520988464,
496
+ "eval_runtime": 20.1612,
497
+ "eval_samples_per_second": 15.475,
498
+ "eval_steps_per_second": 1.934,
499
+ "eval_wer": 0.08601568839599676,
500
+ "step": 2607
501
+ },
502
+ {
503
+ "epoch": 34.0,
504
+ "eval_cer": 0.027436674716128037,
505
+ "eval_loss": 0.20313425362110138,
506
+ "eval_runtime": 20.5421,
507
+ "eval_samples_per_second": 15.188,
508
+ "eval_steps_per_second": 1.899,
509
+ "eval_wer": 0.08466324046524208,
510
+ "step": 2686
511
+ },
512
+ {
513
+ "epoch": 34.18,
514
+ "learning_rate": 1.9758227848101266e-05,
515
+ "loss": 0.0846,
516
+ "step": 2700
517
+ },
518
+ {
519
+ "epoch": 35.0,
520
+ "eval_cer": 0.026922879309458973,
521
+ "eval_loss": 0.20462077856063843,
522
+ "eval_runtime": 20.1988,
523
+ "eval_samples_per_second": 15.446,
524
+ "eval_steps_per_second": 1.931,
525
+ "eval_wer": 0.08222883418988369,
526
+ "step": 2765
527
+ },
528
+ {
529
+ "epoch": 35.44,
530
+ "learning_rate": 1.937848101265823e-05,
531
+ "loss": 0.083,
532
+ "step": 2800
533
+ },
534
+ {
535
+ "epoch": 36.0,
536
+ "eval_cer": 0.02651184298412372,
537
+ "eval_loss": 0.20938025414943695,
538
+ "eval_runtime": 20.4858,
539
+ "eval_samples_per_second": 15.23,
540
+ "eval_steps_per_second": 1.904,
541
+ "eval_wer": 0.08249932377603462,
542
+ "step": 2844
543
+ },
544
+ {
545
+ "epoch": 36.71,
546
+ "learning_rate": 1.8998734177215188e-05,
547
+ "loss": 0.0844,
548
+ "step": 2900
549
+ },
550
+ {
551
+ "epoch": 37.0,
552
+ "eval_cer": 0.026768740687458253,
553
+ "eval_loss": 0.21756267547607422,
554
+ "eval_runtime": 20.1565,
555
+ "eval_samples_per_second": 15.479,
556
+ "eval_steps_per_second": 1.935,
557
+ "eval_wer": 0.08195834460373276,
558
+ "step": 2923
559
+ },
560
+ {
561
+ "epoch": 37.97,
562
+ "learning_rate": 1.8618987341772153e-05,
563
+ "loss": 0.0829,
564
+ "step": 3000
565
+ },
566
+ {
567
+ "epoch": 38.0,
568
+ "eval_cer": 0.02666598160612444,
569
+ "eval_loss": 0.20818965137004852,
570
+ "eval_runtime": 20.1541,
571
+ "eval_samples_per_second": 15.481,
572
+ "eval_steps_per_second": 1.935,
573
+ "eval_wer": 0.08168785501758183,
574
+ "step": 3002
575
+ },
576
+ {
577
+ "epoch": 39.0,
578
+ "eval_cer": 0.028618404151466884,
579
+ "eval_loss": 0.2200096845626831,
580
+ "eval_runtime": 20.1838,
581
+ "eval_samples_per_second": 15.458,
582
+ "eval_steps_per_second": 1.932,
583
+ "eval_wer": 0.08926156342980796,
584
+ "step": 3081
585
+ },
586
+ {
587
+ "epoch": 39.24,
588
+ "learning_rate": 1.8239240506329115e-05,
589
+ "loss": 0.103,
590
+ "step": 3100
591
+ },
592
+ {
593
+ "epoch": 40.0,
594
+ "eval_cer": 0.027642192878795664,
595
+ "eval_loss": 0.2102428525686264,
596
+ "eval_runtime": 20.0863,
597
+ "eval_samples_per_second": 15.533,
598
+ "eval_steps_per_second": 1.942,
599
+ "eval_wer": 0.08412226129294022,
600
+ "step": 3160
601
+ },
602
+ {
603
+ "epoch": 40.51,
604
+ "learning_rate": 1.7859493670886074e-05,
605
+ "loss": 0.0728,
606
+ "step": 3200
607
+ },
608
+ {
609
+ "epoch": 41.0,
610
+ "eval_cer": 0.027077017931459692,
611
+ "eval_loss": 0.2143275886774063,
612
+ "eval_runtime": 20.2051,
613
+ "eval_samples_per_second": 15.442,
614
+ "eval_steps_per_second": 1.93,
615
+ "eval_wer": 0.08168785501758183,
616
+ "step": 3239
617
+ },
618
+ {
619
+ "epoch": 41.77,
620
+ "learning_rate": 1.747974683544304e-05,
621
+ "loss": 0.079,
622
+ "step": 3300
623
+ },
624
+ {
625
+ "epoch": 42.0,
626
+ "eval_cer": 0.02651184298412372,
627
+ "eval_loss": 0.2131078988313675,
628
+ "eval_runtime": 20.1578,
629
+ "eval_samples_per_second": 15.478,
630
+ "eval_steps_per_second": 1.935,
631
+ "eval_wer": 0.08249932377603462,
632
+ "step": 3318
633
+ },
634
+ {
635
+ "epoch": 42.0,
636
+ "step": 3318,
637
+ "total_flos": 8.489873279367732e+19,
638
+ "train_loss": 0.24954385395291484,
639
+ "train_runtime": 14687.2421,
640
+ "train_samples_per_second": 17.137,
641
+ "train_steps_per_second": 0.538
642
+ }
643
+ ],
644
+ "max_steps": 7900,
645
+ "num_train_epochs": 100,
646
+ "total_flos": 8.489873279367732e+19,
647
+ "trial_name": null,
648
+ "trial_params": null
649
+ }