Priyanship commited on
Commit
0d1e472
·
verified ·
1 Parent(s): 04f14ae

large_sami_22k_finetuned_ft_pseudolabels

Browse files
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 1.1908
20
- - Wer: 0.5762
21
- - Cer: 0.2198
22
 
23
  ## Model description
24
 
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.4804
20
+ - Wer: 0.3867
21
+ - Cer: 0.1484
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_cer": 0.12414167860494986,
4
- "eval_loss": 0.4442386329174042,
5
- "eval_runtime": 33.9749,
6
  "eval_samples": 890,
7
- "eval_samples_per_second": 26.196,
8
- "eval_steps_per_second": 3.297,
9
- "eval_wer": 0.3508522727272727,
10
  "total_flos": 1.8440987587856836e+20,
11
- "train_loss": 0.08842882802456985,
12
- "train_runtime": 67339.4697,
13
  "train_samples": 17267,
14
- "train_samples_per_second": 15.385,
15
- "train_steps_per_second": 0.962
16
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_cer": 0.14837227683643728,
4
+ "eval_loss": 0.480378657579422,
5
+ "eval_runtime": 50.3698,
6
  "eval_samples": 890,
7
+ "eval_samples_per_second": 17.669,
8
+ "eval_steps_per_second": 2.224,
9
+ "eval_wer": 0.38667929292929293,
10
  "total_flos": 1.8440987587856836e+20,
11
+ "train_loss": 0.1667554270485301,
12
+ "train_runtime": 81955.137,
13
  "train_samples": 17267,
14
+ "train_samples_per_second": 12.641,
15
+ "train_steps_per_second": 0.791
16
  }
eval--1hr test set_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_cer": 0.12414167860494986,
4
- "eval_loss": 0.4442386329174042,
5
- "eval_runtime": 33.9749,
6
  "eval_samples": 890,
7
- "eval_samples_per_second": 26.196,
8
- "eval_steps_per_second": 3.297,
9
- "eval_wer": 0.3508522727272727
10
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_cer": 0.14837227683643728,
4
+ "eval_loss": 0.480378657579422,
5
+ "eval_runtime": 50.3698,
6
  "eval_samples": 890,
7
+ "eval_samples_per_second": 17.669,
8
+ "eval_steps_per_second": 2.224,
9
+ "eval_wer": 0.38667929292929293
10
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.8440987587856836e+20,
4
- "train_loss": 0.08842882802456985,
5
- "train_runtime": 67339.4697,
6
  "train_samples": 17267,
7
- "train_samples_per_second": 15.385,
8
- "train_steps_per_second": 0.962
9
  }
 
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.8440987587856836e+20,
4
+ "train_loss": 0.1667554270485301,
5
+ "train_runtime": 81955.137,
6
  "train_samples": 17267,
7
+ "train_samples_per_second": 12.641,
8
+ "train_steps_per_second": 0.791
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.35132575757575757,
3
- "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-cont-pt-22k-finetuned/outputs/checkpoint-1080",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
  "global_step": 64800,
@@ -10,1032 +10,1032 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.6521588563919067,
14
- "learning_rate": 3.3302469135802465e-05,
15
- "loss": 0.1355,
16
  "step": 1080
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_cer": 0.12424047818999161,
21
- "eval_loss": 0.4442259669303894,
22
- "eval_runtime": 33.5884,
23
- "eval_samples_per_second": 26.497,
24
- "eval_steps_per_second": 3.334,
25
- "eval_wer": 0.35132575757575757,
26
  "step": 1080
27
  },
28
  {
29
  "epoch": 2.0,
30
- "grad_norm": 4.438964366912842,
31
- "learning_rate": 6.66358024691358e-05,
32
- "loss": 0.1271,
33
  "step": 2160
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_cer": 0.13202094551202884,
38
- "eval_loss": 0.4743848443031311,
39
- "eval_runtime": 33.2763,
40
- "eval_samples_per_second": 26.746,
41
- "eval_steps_per_second": 3.366,
42
- "eval_wer": 0.37247474747474746,
43
  "step": 2160
44
  },
45
  {
46
  "epoch": 3.0,
47
- "grad_norm": 4.780850887298584,
48
- "learning_rate": 9.993827160493828e-05,
49
- "loss": 0.1255,
50
  "step": 3240
51
  },
52
  {
53
  "epoch": 3.0,
54
- "eval_cer": 0.15150916366151262,
55
- "eval_loss": 0.5318964123725891,
56
- "eval_runtime": 34.1253,
57
- "eval_samples_per_second": 26.08,
58
- "eval_steps_per_second": 3.282,
59
- "eval_wer": 0.38620580808080807,
60
  "step": 3240
61
  },
62
  {
63
  "epoch": 4.0,
64
- "grad_norm": 1.0735939741134644,
65
- "learning_rate": 0.0001332716049382716,
66
- "loss": 0.1335,
67
  "step": 4320
68
  },
69
  {
70
  "epoch": 4.0,
71
- "eval_cer": 0.13908511584251346,
72
- "eval_loss": 0.5112303495407104,
73
- "eval_runtime": 34.1035,
74
- "eval_samples_per_second": 26.097,
75
- "eval_steps_per_second": 3.284,
76
- "eval_wer": 0.39662247474747475,
77
  "step": 4320
78
  },
79
  {
80
  "epoch": 5.0,
81
- "grad_norm": 1.963199257850647,
82
- "learning_rate": 0.00016660493827160493,
83
- "loss": 0.1387,
84
  "step": 5400
85
  },
86
  {
87
  "epoch": 5.0,
88
- "eval_cer": 0.1588944326433829,
89
- "eval_loss": 0.5254867672920227,
90
- "eval_runtime": 34.9852,
91
- "eval_samples_per_second": 25.439,
92
- "eval_steps_per_second": 3.201,
93
- "eval_wer": 0.41745580808080807,
94
  "step": 5400
95
  },
96
  {
97
  "epoch": 6.0,
98
- "grad_norm": 7.2430219650268555,
99
- "learning_rate": 0.0001999074074074074,
100
- "loss": 0.1441,
101
  "step": 6480
102
  },
103
  {
104
  "epoch": 6.0,
105
- "eval_cer": 0.1554364471669219,
106
- "eval_loss": 0.6283074021339417,
107
- "eval_runtime": 34.01,
108
- "eval_samples_per_second": 26.169,
109
- "eval_steps_per_second": 3.293,
110
- "eval_wer": 0.43513257575757575,
111
  "step": 6480
112
  },
113
  {
114
  "epoch": 7.0,
115
- "grad_norm": 7.408224105834961,
116
- "learning_rate": 0.00023324074074074076,
117
- "loss": 0.1513,
118
  "step": 7560
119
  },
120
  {
121
  "epoch": 7.0,
122
- "eval_cer": 0.15200316158672134,
123
- "eval_loss": 0.5582847595214844,
124
- "eval_runtime": 36.0318,
125
- "eval_samples_per_second": 24.7,
126
- "eval_steps_per_second": 3.108,
127
- "eval_wer": 0.43308080808080807,
128
  "step": 7560
129
  },
130
  {
131
  "epoch": 8.0,
132
- "grad_norm": 0.40320536494255066,
133
- "learning_rate": 0.0002665740740740741,
134
- "loss": 0.1557,
135
  "step": 8640
136
  },
137
  {
138
  "epoch": 8.0,
139
- "eval_cer": 0.18389072765894382,
140
- "eval_loss": 0.7035284638404846,
141
- "eval_runtime": 34.1655,
142
- "eval_samples_per_second": 26.05,
143
- "eval_steps_per_second": 3.278,
144
- "eval_wer": 0.5014204545454546,
145
  "step": 8640
146
  },
147
  {
148
  "epoch": 9.0,
149
- "grad_norm": 0.3960403501987457,
150
- "learning_rate": 0.0002998765432098766,
151
- "loss": 0.1634,
152
  "step": 9720
153
  },
154
  {
155
  "epoch": 9.0,
156
- "eval_cer": 0.18984340265770883,
157
- "eval_loss": 0.6695614457130432,
158
- "eval_runtime": 35.0021,
159
- "eval_samples_per_second": 25.427,
160
- "eval_steps_per_second": 3.2,
161
- "eval_wer": 0.47111742424242425,
162
  "step": 9720
163
  },
164
  {
165
  "epoch": 10.0,
166
- "grad_norm": 12.15442943572998,
167
- "learning_rate": 0.00033320987654320987,
168
- "loss": 0.1696,
169
  "step": 10800
170
  },
171
  {
172
  "epoch": 10.0,
173
- "eval_cer": 0.16003062787136293,
174
- "eval_loss": 0.6147846579551697,
175
- "eval_runtime": 34.8188,
176
- "eval_samples_per_second": 25.561,
177
- "eval_steps_per_second": 3.217,
178
- "eval_wer": 0.4605429292929293,
179
  "step": 10800
180
  },
181
  {
182
  "epoch": 11.0,
183
- "grad_norm": 5.4118757247924805,
184
- "learning_rate": 0.0003665432098765432,
185
- "loss": 0.1774,
186
  "step": 11880
187
  },
188
  {
189
  "epoch": 11.0,
190
- "eval_cer": 0.18080324062638936,
191
- "eval_loss": 0.7655736207962036,
192
- "eval_runtime": 34.3969,
193
- "eval_samples_per_second": 25.874,
194
- "eval_steps_per_second": 3.256,
195
- "eval_wer": 0.4981060606060606,
196
  "step": 11880
197
  },
198
  {
199
  "epoch": 12.0,
200
- "grad_norm": NaN,
201
- "learning_rate": 0.0003998456790123457,
202
- "loss": 0.1687,
203
  "step": 12960
204
  },
205
  {
206
  "epoch": 12.0,
207
- "eval_cer": 0.19537617942004643,
208
- "eval_loss": 0.7117329239845276,
209
- "eval_runtime": 33.6673,
210
- "eval_samples_per_second": 26.435,
211
- "eval_steps_per_second": 3.327,
212
- "eval_wer": 0.5232007575757576,
213
  "step": 12960
214
  },
215
  {
216
  "epoch": 13.0,
217
- "grad_norm": 0.43450596928596497,
218
- "learning_rate": 0.000433179012345679,
219
- "loss": 0.1825,
220
  "step": 14040
221
  },
222
  {
223
  "epoch": 13.0,
224
- "eval_cer": 0.18658301635133132,
225
- "eval_loss": 0.6913736462593079,
226
- "eval_runtime": 34.2044,
227
- "eval_samples_per_second": 26.02,
228
- "eval_steps_per_second": 3.274,
229
- "eval_wer": 0.5255681818181818,
230
  "step": 14040
231
  },
232
  {
233
  "epoch": 14.0,
234
- "grad_norm": 2.9280543327331543,
235
- "learning_rate": 0.00046651234567901236,
236
- "loss": 0.1781,
237
  "step": 15120
238
  },
239
  {
240
  "epoch": 14.0,
241
- "eval_cer": 0.20901052215580695,
242
- "eval_loss": 0.7841426730155945,
243
- "eval_runtime": 34.3961,
244
- "eval_samples_per_second": 25.875,
245
- "eval_steps_per_second": 3.256,
246
- "eval_wer": 0.5473484848484849,
247
  "step": 15120
248
  },
249
  {
250
  "epoch": 15.0,
251
- "grad_norm": 0.16209368407726288,
252
- "learning_rate": 0.0004998148148148148,
253
- "loss": 0.186,
254
  "step": 16200
255
  },
256
  {
257
  "epoch": 15.0,
258
- "eval_cer": 0.1943140838808477,
259
- "eval_loss": 0.6776570677757263,
260
- "eval_runtime": 34.0657,
261
- "eval_samples_per_second": 26.126,
262
- "eval_steps_per_second": 3.288,
263
- "eval_wer": 0.538510101010101,
264
  "step": 16200
265
  },
266
  {
267
  "epoch": 16.0,
268
- "grad_norm": 3.6948094367980957,
269
- "learning_rate": 0.0004889506172839507,
270
- "loss": 0.1784,
271
  "step": 17280
272
  },
273
  {
274
  "epoch": 16.0,
275
- "eval_cer": 0.19367188657807638,
276
- "eval_loss": 0.8023106455802917,
277
- "eval_runtime": 33.7998,
278
- "eval_samples_per_second": 26.332,
279
- "eval_steps_per_second": 3.314,
280
- "eval_wer": 0.5694444444444444,
281
  "step": 17280
282
  },
283
  {
284
  "epoch": 17.0,
285
- "grad_norm": 0.0647168681025505,
286
- "learning_rate": 0.00047784979423868315,
287
- "loss": 0.1666,
288
  "step": 18360
289
  },
290
  {
291
  "epoch": 17.0,
292
- "eval_cer": 0.20740502889887863,
293
- "eval_loss": 0.6939805746078491,
294
- "eval_runtime": 42.0792,
295
- "eval_samples_per_second": 21.151,
296
- "eval_steps_per_second": 2.662,
297
- "eval_wer": 0.5599747474747475,
298
  "step": 18360
299
  },
300
  {
301
  "epoch": 18.0,
302
- "grad_norm": 0.016156639903783798,
303
- "learning_rate": 0.000466738683127572,
304
- "loss": 0.1583,
305
  "step": 19440
306
  },
307
  {
308
  "epoch": 18.0,
309
- "eval_cer": 0.24267648075878082,
310
- "eval_loss": 0.8497462868690491,
311
- "eval_runtime": 34.5186,
312
- "eval_samples_per_second": 25.783,
313
- "eval_steps_per_second": 3.245,
314
- "eval_wer": 0.6180555555555556,
315
  "step": 19440
316
  },
317
  {
318
  "epoch": 19.0,
319
- "grad_norm": 0.07191939651966095,
320
- "learning_rate": 0.00045562757201646095,
321
- "loss": 0.1532,
322
  "step": 20520
323
  },
324
  {
325
  "epoch": 19.0,
326
- "eval_cer": 0.19596897693029688,
327
- "eval_loss": 0.6177073121070862,
328
- "eval_runtime": 37.0842,
329
- "eval_samples_per_second": 23.999,
330
- "eval_steps_per_second": 3.02,
331
- "eval_wer": 0.5247790404040404,
332
  "step": 20520
333
  },
334
  {
335
  "epoch": 20.0,
336
- "grad_norm": 0.5676774382591248,
337
- "learning_rate": 0.0004445164609053498,
338
- "loss": 0.138,
339
  "step": 21600
340
  },
341
  {
342
  "epoch": 20.0,
343
- "eval_cer": 0.21503729684335326,
344
- "eval_loss": 0.7171837091445923,
345
- "eval_runtime": 33.7172,
346
- "eval_samples_per_second": 26.396,
347
- "eval_steps_per_second": 3.322,
348
- "eval_wer": 0.5287247474747475,
349
  "step": 21600
350
  },
351
  {
352
  "epoch": 21.0,
353
- "grad_norm": 1.0357455015182495,
354
  "learning_rate": 0.0004334362139917696,
355
- "loss": 0.1342,
356
  "step": 22680
357
  },
358
  {
359
  "epoch": 21.0,
360
- "eval_cer": 0.2018475522402806,
361
- "eval_loss": 0.7149861454963684,
362
- "eval_runtime": 34.7823,
363
- "eval_samples_per_second": 25.588,
364
- "eval_steps_per_second": 3.22,
365
- "eval_wer": 0.5334595959595959,
366
  "step": 22680
367
  },
368
  {
369
  "epoch": 22.0,
370
- "grad_norm": 14.927299499511719,
371
- "learning_rate": 0.00042232510288065845,
372
- "loss": 0.1258,
373
  "step": 23760
374
  },
375
  {
376
  "epoch": 22.0,
377
- "eval_cer": 0.2112582127155066,
378
- "eval_loss": 0.7246299386024475,
379
- "eval_runtime": 34.2959,
380
- "eval_samples_per_second": 25.951,
381
- "eval_steps_per_second": 3.266,
382
- "eval_wer": 0.5443497474747475,
383
  "step": 23760
384
  },
385
  {
386
  "epoch": 23.0,
387
- "grad_norm": 2.454263925552368,
388
  "learning_rate": 0.00041122427983539094,
389
- "loss": 0.1228,
390
  "step": 24840
391
  },
392
  {
393
  "epoch": 23.0,
394
- "eval_cer": 0.21503729684335326,
395
- "eval_loss": 0.7502724528312683,
396
- "eval_runtime": 34.0741,
397
- "eval_samples_per_second": 26.12,
398
- "eval_steps_per_second": 3.287,
399
- "eval_wer": 0.5336174242424242,
400
  "step": 24840
401
  },
402
  {
403
  "epoch": 24.0,
404
- "grad_norm": 0.7376189827919006,
405
  "learning_rate": 0.00040011316872427984,
406
- "loss": 0.1153,
407
  "step": 25920
408
  },
409
  {
410
  "epoch": 24.0,
411
- "eval_cer": 0.19337548782295114,
412
- "eval_loss": 0.6936143040657043,
413
- "eval_runtime": 33.9341,
414
- "eval_samples_per_second": 26.227,
415
- "eval_steps_per_second": 3.301,
416
- "eval_wer": 0.5146780303030303,
417
  "step": 25920
418
  },
419
  {
420
  "epoch": 25.0,
421
- "grad_norm": 19.62969207763672,
422
- "learning_rate": 0.00038900205761316874,
423
- "loss": 0.1097,
424
  "step": 27000
425
  },
426
  {
427
  "epoch": 25.0,
428
- "eval_cer": 0.19305438917156548,
429
- "eval_loss": 0.7183641791343689,
430
- "eval_runtime": 34.3195,
431
- "eval_samples_per_second": 25.933,
432
- "eval_steps_per_second": 3.263,
433
- "eval_wer": 0.5124684343434344,
434
  "step": 27000
435
  },
436
  {
437
  "epoch": 26.0,
438
- "grad_norm": 0.0018467491026967764,
439
- "learning_rate": 0.00037789094650205764,
440
- "loss": 0.1018,
441
  "step": 28080
442
  },
443
  {
444
  "epoch": 26.0,
445
- "eval_cer": 0.1954255792125673,
446
- "eval_loss": 0.742782473564148,
447
- "eval_runtime": 34.148,
448
- "eval_samples_per_second": 26.063,
449
- "eval_steps_per_second": 3.28,
450
- "eval_wer": 0.5290404040404041,
451
  "step": 28080
452
  },
453
  {
454
  "epoch": 27.0,
455
- "grad_norm": 0.7026299834251404,
456
- "learning_rate": 0.0003667798353909465,
457
- "loss": 0.0951,
458
  "step": 29160
459
  },
460
  {
461
  "epoch": 27.0,
462
- "eval_cer": 0.2779726325149434,
463
- "eval_loss": 0.9667614698410034,
464
- "eval_runtime": 34.0235,
465
- "eval_samples_per_second": 26.158,
466
- "eval_steps_per_second": 3.292,
467
- "eval_wer": 0.5683396464646465,
468
  "step": 29160
469
  },
470
  {
471
  "epoch": 28.0,
472
- "grad_norm": 0.02587762102484703,
473
- "learning_rate": 0.0003556687242798354,
474
- "loss": 0.0883,
475
  "step": 30240
476
  },
477
  {
478
  "epoch": 28.0,
479
- "eval_cer": 0.19475868201353555,
480
- "eval_loss": 0.8088436722755432,
481
- "eval_runtime": 33.7463,
482
- "eval_samples_per_second": 26.373,
483
- "eval_steps_per_second": 3.319,
484
- "eval_wer": 0.5249368686868687,
485
  "step": 30240
486
  },
487
  {
488
  "epoch": 29.0,
489
- "grad_norm": 0.006352627649903297,
490
- "learning_rate": 0.00034456790123456787,
491
- "loss": 0.0888,
492
  "step": 31320
493
  },
494
  {
495
  "epoch": 29.0,
496
- "eval_cer": 0.18826260929704094,
497
- "eval_loss": 0.7689303159713745,
498
- "eval_runtime": 34.1947,
499
- "eval_samples_per_second": 26.027,
500
- "eval_steps_per_second": 3.275,
501
- "eval_wer": 0.5063131313131313,
502
  "step": 31320
503
  },
504
  {
505
  "epoch": 30.0,
506
- "grad_norm": 0.6352426409721375,
507
- "learning_rate": 0.00033345679012345683,
508
- "loss": 0.0779,
509
  "step": 32400
510
  },
511
  {
512
  "epoch": 30.0,
513
- "eval_cer": 0.20224275058044755,
514
- "eval_loss": 0.7801844477653503,
515
- "eval_runtime": 35.6742,
516
- "eval_samples_per_second": 24.948,
517
- "eval_steps_per_second": 3.14,
518
- "eval_wer": 0.52114898989899,
519
  "step": 32400
520
  },
521
  {
522
  "epoch": 31.0,
523
- "grad_norm": 3.922788143157959,
524
- "learning_rate": 0.0003223456790123457,
525
- "loss": 0.0781,
526
  "step": 33480
527
  },
528
  {
529
  "epoch": 31.0,
530
- "eval_cer": 0.19910586375537223,
531
- "eval_loss": 0.7932357788085938,
532
- "eval_runtime": 33.9958,
533
- "eval_samples_per_second": 26.18,
534
- "eval_steps_per_second": 3.295,
535
- "eval_wer": 0.5290404040404041,
536
  "step": 33480
537
  },
538
  {
539
  "epoch": 32.0,
540
- "grad_norm": 0.4319939911365509,
541
  "learning_rate": 0.0003112448559670782,
542
- "loss": 0.0729,
543
  "step": 34560
544
  },
545
  {
546
  "epoch": 32.0,
547
- "eval_cer": 0.1849034234056217,
548
- "eval_loss": 0.7396170496940613,
549
- "eval_runtime": 33.5836,
550
- "eval_samples_per_second": 26.501,
551
- "eval_steps_per_second": 3.335,
552
- "eval_wer": 0.5034722222222222,
553
  "step": 34560
554
  },
555
  {
556
  "epoch": 33.0,
557
- "grad_norm": 4.486342906951904,
558
- "learning_rate": 0.00030013374485596706,
559
- "loss": 0.0662,
560
  "step": 35640
561
  },
562
  {
563
  "epoch": 33.0,
564
- "eval_cer": 0.20654053252976337,
565
- "eval_loss": 0.734420895576477,
566
- "eval_runtime": 34.1026,
567
- "eval_samples_per_second": 26.098,
568
- "eval_steps_per_second": 3.284,
569
- "eval_wer": 0.5157828282828283,
570
  "step": 35640
571
  },
572
  {
573
  "epoch": 34.0,
574
- "grad_norm": 0.5255006551742554,
575
- "learning_rate": 0.00028902263374485596,
576
- "loss": 0.0653,
577
  "step": 36720
578
  },
579
  {
580
  "epoch": 34.0,
581
- "eval_cer": 0.19868596551894482,
582
- "eval_loss": 0.7865384817123413,
583
- "eval_runtime": 34.0866,
584
- "eval_samples_per_second": 26.11,
585
- "eval_steps_per_second": 3.286,
586
- "eval_wer": 0.5190972222222222,
587
  "step": 36720
588
  },
589
  {
590
  "epoch": 35.0,
591
- "grad_norm": 7.736340045928955,
592
- "learning_rate": 0.0002779218106995885,
593
- "loss": 0.0636,
594
  "step": 37800
595
  },
596
  {
597
  "epoch": 35.0,
598
- "eval_cer": 0.18742281282418613,
599
- "eval_loss": 0.7694341540336609,
600
- "eval_runtime": 34.057,
601
- "eval_samples_per_second": 26.133,
602
- "eval_steps_per_second": 3.289,
603
- "eval_wer": 0.4952651515151515,
604
  "step": 37800
605
  },
606
  {
607
  "epoch": 36.0,
608
- "grad_norm": 0.1871989518404007,
609
  "learning_rate": 0.000266820987654321,
610
- "loss": 0.0618,
611
  "step": 38880
612
  },
613
  {
614
  "epoch": 36.0,
615
- "eval_cer": 0.1889295064960727,
616
- "eval_loss": 0.7620729207992554,
617
- "eval_runtime": 34.1669,
618
- "eval_samples_per_second": 26.049,
619
- "eval_steps_per_second": 3.278,
620
- "eval_wer": 0.5104166666666666,
621
  "step": 38880
622
  },
623
  {
624
  "epoch": 37.0,
625
- "grad_norm": 0.010062905959784985,
626
  "learning_rate": 0.0002557098765432099,
627
- "loss": 0.0549,
628
  "step": 39960
629
  },
630
  {
631
  "epoch": 37.0,
632
- "eval_cer": 0.188435508570864,
633
- "eval_loss": 0.7830834984779358,
634
- "eval_runtime": 34.0922,
635
- "eval_samples_per_second": 26.106,
636
- "eval_steps_per_second": 3.285,
637
- "eval_wer": 0.5058396464646465,
638
  "step": 39960
639
  },
640
  {
641
  "epoch": 38.0,
642
- "grad_norm": 0.012355574406683445,
643
  "learning_rate": 0.0002445987654320988,
644
- "loss": 0.0548,
645
  "step": 41040
646
  },
647
  {
648
  "epoch": 38.0,
649
- "eval_cer": 0.19144889591463715,
650
- "eval_loss": 0.8387961387634277,
651
- "eval_runtime": 34.0941,
652
- "eval_samples_per_second": 26.104,
653
- "eval_steps_per_second": 3.285,
654
- "eval_wer": 0.5074179292929293,
655
  "step": 41040
656
  },
657
  {
658
  "epoch": 39.0,
659
- "grad_norm": 5.520277976989746,
660
- "learning_rate": 0.00023348765432098766,
661
- "loss": 0.0479,
662
  "step": 42120
663
  },
664
  {
665
  "epoch": 39.0,
666
- "eval_cer": 0.19283209010522157,
667
- "eval_loss": 0.8141205310821533,
668
- "eval_runtime": 33.7176,
669
- "eval_samples_per_second": 26.396,
670
- "eval_steps_per_second": 3.322,
671
- "eval_wer": 0.5115214646464646,
672
  "step": 42120
673
  },
674
  {
675
  "epoch": 40.0,
676
- "grad_norm": 0.06226100027561188,
677
  "learning_rate": 0.00022238683127572017,
678
- "loss": 0.0467,
679
  "step": 43200
680
  },
681
  {
682
  "epoch": 40.0,
683
- "eval_cer": 0.1860890184261226,
684
- "eval_loss": 0.8152617812156677,
685
- "eval_runtime": 34.0751,
686
- "eval_samples_per_second": 26.119,
687
- "eval_steps_per_second": 3.287,
688
- "eval_wer": 0.4971590909090909,
689
  "step": 43200
690
  },
691
  {
692
  "epoch": 41.0,
693
- "grad_norm": 0.9956463575363159,
694
- "learning_rate": 0.00021127572016460904,
695
- "loss": 0.0425,
696
  "step": 44280
697
  },
698
  {
699
  "epoch": 41.0,
700
- "eval_cer": 0.17685125722471964,
701
- "eval_loss": 0.7244598865509033,
702
- "eval_runtime": 34.7079,
703
- "eval_samples_per_second": 25.643,
704
- "eval_steps_per_second": 3.227,
705
- "eval_wer": 0.4878472222222222,
706
  "step": 44280
707
  },
708
  {
709
  "epoch": 42.0,
710
- "grad_norm": 0.3334626257419586,
711
  "learning_rate": 0.00020017489711934155,
712
- "loss": 0.0402,
713
  "step": 45360
714
  },
715
  {
716
  "epoch": 42.0,
717
- "eval_cer": 0.18507632267944474,
718
- "eval_loss": 0.8202406167984009,
719
- "eval_runtime": 34.196,
720
- "eval_samples_per_second": 26.026,
721
- "eval_steps_per_second": 3.275,
722
- "eval_wer": 0.49936868686868685,
723
  "step": 45360
724
  },
725
  {
726
  "epoch": 43.0,
727
- "grad_norm": 0.09065477550029755,
728
  "learning_rate": 0.00018907407407407406,
729
- "loss": 0.0408,
730
  "step": 46440
731
  },
732
  {
733
  "epoch": 43.0,
734
- "eval_cer": 0.19241219186879416,
735
- "eval_loss": 0.7523993849754333,
736
- "eval_runtime": 34.1394,
737
- "eval_samples_per_second": 26.07,
738
- "eval_steps_per_second": 3.281,
739
- "eval_wer": 0.5041035353535354,
740
  "step": 46440
741
  },
742
  {
743
  "epoch": 44.0,
744
- "grad_norm": 0.03374771401286125,
745
- "learning_rate": 0.00017796296296296296,
746
- "loss": 0.039,
747
  "step": 47520
748
  },
749
  {
750
  "epoch": 44.0,
751
- "eval_cer": 0.18695351479523786,
752
- "eval_loss": 0.7909926772117615,
753
- "eval_runtime": 34.3484,
754
- "eval_samples_per_second": 25.911,
755
- "eval_steps_per_second": 3.261,
756
- "eval_wer": 0.4936868686868687,
757
  "step": 47520
758
  },
759
  {
760
  "epoch": 45.0,
761
- "grad_norm": 0.044817935675382614,
762
- "learning_rate": 0.00016685185185185187,
763
- "loss": 0.0319,
764
  "step": 48600
765
  },
766
  {
767
  "epoch": 45.0,
768
- "eval_cer": 0.19157239539593934,
769
- "eval_loss": 0.8348931670188904,
770
- "eval_runtime": 33.9046,
771
- "eval_samples_per_second": 26.25,
772
- "eval_steps_per_second": 3.303,
773
- "eval_wer": 0.5026830808080808,
774
  "step": 48600
775
  },
776
  {
777
  "epoch": 46.0,
778
- "grad_norm": 0.09955661743879318,
779
  "learning_rate": 0.00015575102880658438,
780
- "loss": 0.0315,
781
  "step": 49680
782
  },
783
  {
784
  "epoch": 46.0,
785
- "eval_cer": 0.18660771624759176,
786
- "eval_loss": 0.8406158089637756,
787
- "eval_runtime": 34.5537,
788
- "eval_samples_per_second": 25.757,
789
- "eval_steps_per_second": 3.241,
790
- "eval_wer": 0.489425505050505,
791
  "step": 49680
792
  },
793
  {
794
  "epoch": 47.0,
795
- "grad_norm": 1.9601191282272339,
796
  "learning_rate": 0.00014463991769547325,
797
- "loss": 0.0324,
798
  "step": 50760
799
  },
800
  {
801
  "epoch": 47.0,
802
- "eval_cer": 0.1777157535938349,
803
- "eval_loss": 0.8840826749801636,
804
- "eval_runtime": 33.7677,
805
- "eval_samples_per_second": 26.357,
806
- "eval_steps_per_second": 3.317,
807
- "eval_wer": 0.4876893939393939,
808
  "step": 50760
809
  },
810
  {
811
  "epoch": 48.0,
812
- "grad_norm": 0.08828981965780258,
813
- "learning_rate": 0.00013352880658436215,
814
- "loss": 0.0267,
815
  "step": 51840
816
  },
817
  {
818
  "epoch": 48.0,
819
- "eval_cer": 0.18964580348762536,
820
- "eval_loss": 0.8589528203010559,
821
- "eval_runtime": 34.2131,
822
- "eval_samples_per_second": 26.013,
823
- "eval_steps_per_second": 3.274,
824
- "eval_wer": 0.5039457070707071,
825
  "step": 51840
826
  },
827
  {
828
  "epoch": 49.0,
829
- "grad_norm": 0.02356182597577572,
830
- "learning_rate": 0.00012241769547325103,
831
- "loss": 0.0263,
832
  "step": 52920
833
  },
834
  {
835
  "epoch": 49.0,
836
- "eval_cer": 0.18045744207874326,
837
- "eval_loss": 0.7702302932739258,
838
- "eval_runtime": 33.9692,
839
- "eval_samples_per_second": 26.2,
840
- "eval_steps_per_second": 3.297,
841
- "eval_wer": 0.48327020202020204,
842
  "step": 52920
843
  },
844
  {
845
  "epoch": 50.0,
846
- "grad_norm": 0.005286052357405424,
847
  "learning_rate": 0.00011131687242798354,
848
- "loss": 0.0242,
849
  "step": 54000
850
  },
851
  {
852
  "epoch": 50.0,
853
- "eval_cer": 0.18334732994121425,
854
- "eval_loss": 0.8420283794403076,
855
- "eval_runtime": 34.5458,
856
- "eval_samples_per_second": 25.763,
857
- "eval_steps_per_second": 3.242,
858
- "eval_wer": 0.48011363636363635,
859
  "step": 54000
860
  },
861
  {
862
  "epoch": 51.0,
863
- "grad_norm": 0.002476485911756754,
864
- "learning_rate": 0.00010021604938271606,
865
- "loss": 0.0237,
866
  "step": 55080
867
  },
868
  {
869
  "epoch": 51.0,
870
- "eval_cer": 0.1833967297337351,
871
- "eval_loss": 0.8621743321418762,
872
- "eval_runtime": 33.473,
873
- "eval_samples_per_second": 26.589,
874
- "eval_steps_per_second": 3.346,
875
- "eval_wer": 0.48137626262626265,
876
  "step": 55080
877
  },
878
  {
879
  "epoch": 52.0,
880
- "grad_norm": 0.01960768923163414,
881
- "learning_rate": 8.910493827160495e-05,
882
- "loss": 0.0214,
883
  "step": 56160
884
  },
885
  {
886
  "epoch": 52.0,
887
- "eval_cer": 0.18495282319814257,
888
- "eval_loss": 0.8834346532821655,
889
- "eval_runtime": 33.7718,
890
- "eval_samples_per_second": 26.353,
891
- "eval_steps_per_second": 3.316,
892
- "eval_wer": 0.47679924242424243,
893
  "step": 56160
894
  },
895
  {
896
  "epoch": 53.0,
897
- "grad_norm": 0.0076888990588486195,
898
  "learning_rate": 7.799382716049382e-05,
899
- "loss": 0.0188,
900
  "step": 57240
901
  },
902
  {
903
  "epoch": 53.0,
904
- "eval_cer": 0.18396482734772515,
905
- "eval_loss": 0.9398559331893921,
906
- "eval_runtime": 33.7324,
907
- "eval_samples_per_second": 26.384,
908
- "eval_steps_per_second": 3.32,
909
- "eval_wer": 0.47664141414141414,
910
  "step": 57240
911
  },
912
  {
913
  "epoch": 54.0,
914
- "grad_norm": 0.044491663575172424,
915
- "learning_rate": 6.689300411522633e-05,
916
- "loss": 0.0194,
917
  "step": 58320
918
  },
919
  {
920
  "epoch": 54.0,
921
- "eval_cer": 0.18218643481697377,
922
- "eval_loss": 0.880720853805542,
923
- "eval_runtime": 35.0466,
924
- "eval_samples_per_second": 25.395,
925
- "eval_steps_per_second": 3.196,
926
- "eval_wer": 0.4734848484848485,
927
  "step": 58320
928
  },
929
  {
930
  "epoch": 55.0,
931
- "grad_norm": 0.2765955924987793,
932
- "learning_rate": 5.578189300411523e-05,
933
- "loss": 0.0165,
934
  "step": 59400
935
  },
936
  {
937
  "epoch": 55.0,
938
- "eval_cer": 0.1782591513115645,
939
- "eval_loss": 0.8844161629676819,
940
- "eval_runtime": 33.9045,
941
- "eval_samples_per_second": 26.25,
942
- "eval_steps_per_second": 3.303,
943
- "eval_wer": 0.46464646464646464,
944
  "step": 59400
945
  },
946
  {
947
  "epoch": 56.0,
948
- "grad_norm": 0.0016414269339293242,
949
- "learning_rate": 4.4670781893004116e-05,
950
- "loss": 0.0145,
951
  "step": 60480
952
  },
953
  {
954
  "epoch": 56.0,
955
- "eval_cer": 0.17428246801363434,
956
- "eval_loss": 0.9099779725074768,
957
- "eval_runtime": 35.1941,
958
- "eval_samples_per_second": 25.288,
959
- "eval_steps_per_second": 3.182,
960
- "eval_wer": 0.46622474747474746,
961
  "step": 60480
962
  },
963
  {
964
  "epoch": 57.0,
965
- "grad_norm": 0.13948026299476624,
966
- "learning_rate": 3.3559670781893004e-05,
967
- "loss": 0.0143,
968
  "step": 61560
969
  },
970
  {
971
  "epoch": 57.0,
972
- "eval_cer": 0.17682655732845923,
973
- "eval_loss": 0.9427079558372498,
974
- "eval_runtime": 34.2545,
975
- "eval_samples_per_second": 25.982,
976
- "eval_steps_per_second": 3.27,
977
- "eval_wer": 0.461489898989899,
978
  "step": 61560
979
  },
980
  {
981
  "epoch": 58.0,
982
- "grad_norm": 0.01065619383007288,
983
- "learning_rate": 2.2448559670781893e-05,
984
- "loss": 0.0134,
985
  "step": 62640
986
  },
987
  {
988
  "epoch": 58.0,
989
- "eval_cer": 0.1746776663538013,
990
- "eval_loss": 0.9330604076385498,
991
- "eval_runtime": 34.438,
992
- "eval_samples_per_second": 25.844,
993
- "eval_steps_per_second": 3.252,
994
- "eval_wer": 0.4586489898989899,
995
  "step": 62640
996
  },
997
  {
998
  "epoch": 59.0,
999
- "grad_norm": 0.003782533574849367,
1000
- "learning_rate": 1.1337448559670783e-05,
1001
- "loss": 0.0139,
1002
  "step": 63720
1003
  },
1004
  {
1005
  "epoch": 59.0,
1006
- "eval_cer": 0.17136788025490293,
1007
- "eval_loss": 0.9063072204589844,
1008
- "eval_runtime": 36.0426,
1009
- "eval_samples_per_second": 24.693,
1010
- "eval_steps_per_second": 3.107,
1011
- "eval_wer": 0.45533459595959597,
1012
  "step": 63720
1013
  },
1014
  {
1015
  "epoch": 60.0,
1016
- "grad_norm": 5.7463275879854336e-05,
1017
- "learning_rate": 2.366255144032922e-07,
1018
- "loss": 0.011,
1019
  "step": 64800
1020
  },
1021
  {
1022
  "epoch": 60.0,
1023
- "eval_cer": 0.1716148792175073,
1024
- "eval_loss": 0.9166492819786072,
1025
- "eval_runtime": 34.3521,
1026
- "eval_samples_per_second": 25.908,
1027
- "eval_steps_per_second": 3.26,
1028
- "eval_wer": 0.4569128787878788,
1029
  "step": 64800
1030
  },
1031
  {
1032
  "epoch": 60.0,
1033
  "step": 64800,
1034
  "total_flos": 1.8440987587856836e+20,
1035
- "train_loss": 0.08842882802456985,
1036
- "train_runtime": 67339.4697,
1037
- "train_samples_per_second": 15.385,
1038
- "train_steps_per_second": 0.962
1039
  }
1040
  ],
1041
  "logging_steps": 500,
 
1
  {
2
+ "best_metric": 0.38667929292929293,
3
+ "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-22k-finetuned/outputs/checkpoint-1080",
4
  "epoch": 60.0,
5
  "eval_steps": 500,
6
  "global_step": 64800,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.5049565434455872,
14
+ "learning_rate": 3.32716049382716e-05,
15
+ "loss": 0.126,
16
  "step": 1080
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_cer": 0.1485204762139999,
21
+ "eval_loss": 0.4803544282913208,
22
+ "eval_runtime": 50.5555,
23
+ "eval_samples_per_second": 17.604,
24
+ "eval_steps_per_second": 2.215,
25
+ "eval_wer": 0.38667929292929293,
26
  "step": 1080
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "grad_norm": 2.3093390464782715,
31
+ "learning_rate": 6.660493827160493e-05,
32
+ "loss": 0.1441,
33
  "step": 2160
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_cer": 0.19503038087240035,
38
+ "eval_loss": 0.6097356677055359,
39
+ "eval_runtime": 49.9713,
40
+ "eval_samples_per_second": 17.81,
41
+ "eval_steps_per_second": 2.241,
42
+ "eval_wer": 0.44239267676767674,
43
  "step": 2160
44
  },
45
  {
46
  "epoch": 3.0,
47
+ "grad_norm": 3.067934513092041,
48
+ "learning_rate": 9.99074074074074e-05,
49
+ "loss": 0.1675,
50
  "step": 3240
51
  },
52
  {
53
  "epoch": 3.0,
54
+ "eval_cer": 0.16756409623079582,
55
+ "eval_loss": 0.5237330198287964,
56
+ "eval_runtime": 52.1692,
57
+ "eval_samples_per_second": 17.06,
58
+ "eval_steps_per_second": 2.147,
59
+ "eval_wer": 0.444760101010101,
60
  "step": 3240
61
  },
62
  {
63
  "epoch": 4.0,
64
+ "grad_norm": 6.190335750579834,
65
+ "learning_rate": 0.00013324074074074074,
66
+ "loss": 0.1919,
67
  "step": 4320
68
  },
69
  {
70
  "epoch": 4.0,
71
+ "eval_cer": 0.188435508570864,
72
+ "eval_loss": 0.6256272196769714,
73
+ "eval_runtime": 49.8649,
74
+ "eval_samples_per_second": 17.848,
75
+ "eval_steps_per_second": 2.246,
76
+ "eval_wer": 0.484375,
77
  "step": 4320
78
  },
79
  {
80
  "epoch": 5.0,
81
+ "grad_norm": 10.29676342010498,
82
+ "learning_rate": 0.0001665432098765432,
83
+ "loss": 0.2168,
84
  "step": 5400
85
  },
86
  {
87
  "epoch": 5.0,
88
+ "eval_cer": 0.1991552635478931,
89
+ "eval_loss": 0.6817235946655273,
90
+ "eval_runtime": 49.9056,
91
+ "eval_samples_per_second": 17.834,
92
+ "eval_steps_per_second": 2.244,
93
+ "eval_wer": 0.5130997474747475,
94
  "step": 5400
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "grad_norm": 16.29789924621582,
99
+ "learning_rate": 0.00019987654320987656,
100
+ "loss": 0.2411,
101
  "step": 6480
102
  },
103
  {
104
  "epoch": 6.0,
105
+ "eval_cer": 0.20411994269624067,
106
+ "eval_loss": 0.6815704703330994,
107
+ "eval_runtime": 47.2826,
108
+ "eval_samples_per_second": 18.823,
109
+ "eval_steps_per_second": 2.369,
110
+ "eval_wer": 0.5233585858585859,
111
  "step": 6480
112
  },
113
  {
114
  "epoch": 7.0,
115
+ "grad_norm": 13.96838665008545,
116
+ "learning_rate": 0.000233179012345679,
117
+ "loss": 0.2493,
118
  "step": 7560
119
  },
120
  {
121
  "epoch": 7.0,
122
+ "eval_cer": 0.2558662253618535,
123
+ "eval_loss": 0.8295482993125916,
124
+ "eval_runtime": 47.3326,
125
+ "eval_samples_per_second": 18.803,
126
+ "eval_steps_per_second": 2.366,
127
+ "eval_wer": 0.6788194444444444,
128
  "step": 7560
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "grad_norm": 12.302577018737793,
133
+ "learning_rate": 0.0002665123456790123,
134
+ "loss": 0.2718,
135
  "step": 8640
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "eval_cer": 0.2669070789902682,
140
+ "eval_loss": 0.8849073648452759,
141
+ "eval_runtime": 47.1385,
142
+ "eval_samples_per_second": 18.881,
143
+ "eval_steps_per_second": 2.376,
144
+ "eval_wer": 0.6756628787878788,
145
  "step": 8640
146
  },
147
  {
148
  "epoch": 9.0,
149
+ "grad_norm": 0.21664512157440186,
150
+ "learning_rate": 0.0002998148148148148,
151
+ "loss": 0.2922,
152
  "step": 9720
153
  },
154
  {
155
  "epoch": 9.0,
156
+ "eval_cer": 0.3401422714024601,
157
+ "eval_loss": 1.0527104139328003,
158
+ "eval_runtime": 49.2164,
159
+ "eval_samples_per_second": 18.083,
160
+ "eval_steps_per_second": 2.276,
161
+ "eval_wer": 0.6721906565656566,
162
  "step": 9720
163
  },
164
  {
165
  "epoch": 10.0,
166
+ "grad_norm": 7.9486260414123535,
167
+ "learning_rate": 0.0003331172839506173,
168
+ "loss": 0.3156,
169
  "step": 10800
170
  },
171
  {
172
  "epoch": 10.0,
173
+ "eval_cer": 0.35758039816232773,
174
+ "eval_loss": 1.0661259889602661,
175
+ "eval_runtime": 48.5028,
176
+ "eval_samples_per_second": 18.349,
177
+ "eval_steps_per_second": 2.309,
178
+ "eval_wer": 0.7528409090909091,
179
  "step": 10800
180
  },
181
  {
182
  "epoch": 11.0,
183
+ "grad_norm": 8.1552095413208,
184
+ "learning_rate": 0.0003664506172839506,
185
+ "loss": 0.3273,
186
  "step": 11880
187
  },
188
  {
189
  "epoch": 11.0,
190
+ "eval_cer": 0.2929654695450279,
191
+ "eval_loss": 1.0082694292068481,
192
+ "eval_runtime": 48.9797,
193
+ "eval_samples_per_second": 18.171,
194
+ "eval_steps_per_second": 2.287,
195
+ "eval_wer": 0.7840909090909091,
196
  "step": 11880
197
  },
198
  {
199
  "epoch": 12.0,
200
+ "grad_norm": 8.20614242553711,
201
+ "learning_rate": 0.00039978395061728396,
202
+ "loss": 0.3216,
203
  "step": 12960
204
  },
205
  {
206
  "epoch": 12.0,
207
+ "eval_cer": 0.3153682754532431,
208
+ "eval_loss": 1.130453109741211,
209
+ "eval_runtime": 48.376,
210
+ "eval_samples_per_second": 18.398,
211
+ "eval_steps_per_second": 2.315,
212
+ "eval_wer": 0.728219696969697,
213
  "step": 12960
214
  },
215
  {
216
  "epoch": 13.0,
217
+ "grad_norm": 14.636846542358398,
218
+ "learning_rate": 0.00043311728395061726,
219
+ "loss": 0.3498,
220
  "step": 14040
221
  },
222
  {
223
  "epoch": 13.0,
224
+ "eval_cer": 0.3106258953712394,
225
+ "eval_loss": 1.0758916139602661,
226
+ "eval_runtime": 48.0575,
227
+ "eval_samples_per_second": 18.519,
228
+ "eval_steps_per_second": 2.331,
229
+ "eval_wer": 0.7312184343434344,
230
  "step": 14040
231
  },
232
  {
233
  "epoch": 14.0,
234
+ "grad_norm": 4.806951999664307,
235
+ "learning_rate": 0.0004664506172839506,
236
+ "loss": 0.3553,
237
  "step": 15120
238
  },
239
  {
240
  "epoch": 14.0,
241
+ "eval_cer": 0.28031912265968484,
242
+ "eval_loss": 0.8731944561004639,
243
+ "eval_runtime": 47.2505,
244
+ "eval_samples_per_second": 18.836,
245
+ "eval_steps_per_second": 2.37,
246
+ "eval_wer": 0.6756628787878788,
247
  "step": 15120
248
  },
249
  {
250
  "epoch": 15.0,
251
+ "grad_norm": 0.5450202822685242,
252
+ "learning_rate": 0.0004997530864197531,
253
+ "loss": 0.3582,
254
  "step": 16200
255
  },
256
  {
257
  "epoch": 15.0,
258
+ "eval_cer": 0.31852986217457885,
259
+ "eval_loss": 1.055077075958252,
260
+ "eval_runtime": 46.8181,
261
+ "eval_samples_per_second": 19.01,
262
+ "eval_steps_per_second": 2.392,
263
+ "eval_wer": 0.7623106060606061,
264
  "step": 16200
265
  },
266
  {
267
  "epoch": 16.0,
268
+ "grad_norm": 5.1030144691467285,
269
+ "learning_rate": 0.0004889814814814815,
270
+ "loss": 0.3607,
271
  "step": 17280
272
  },
273
  {
274
  "epoch": 16.0,
275
+ "eval_cer": 0.3101071975497703,
276
+ "eval_loss": 1.0534826517105103,
277
+ "eval_runtime": 47.5102,
278
+ "eval_samples_per_second": 18.733,
279
+ "eval_steps_per_second": 2.357,
280
+ "eval_wer": 0.7482638888888888,
281
  "step": 17280
282
  },
283
  {
284
  "epoch": 17.0,
285
+ "grad_norm": 0.22218887507915497,
286
+ "learning_rate": 0.0004778703703703704,
287
+ "loss": 0.3447,
288
  "step": 18360
289
  },
290
  {
291
  "epoch": 17.0,
292
+ "eval_cer": 0.30813120584893544,
293
+ "eval_loss": 1.064017415046692,
294
+ "eval_runtime": 48.3671,
295
+ "eval_samples_per_second": 18.401,
296
+ "eval_steps_per_second": 2.316,
297
+ "eval_wer": 0.7369002525252525,
298
  "step": 18360
299
  },
300
  {
301
  "epoch": 18.0,
302
+ "grad_norm": 0.14536279439926147,
303
+ "learning_rate": 0.00046675925925925926,
304
+ "loss": 0.325,
305
  "step": 19440
306
  },
307
  {
308
  "epoch": 18.0,
309
+ "eval_cer": 0.2905448797115052,
310
+ "eval_loss": 1.0327048301696777,
311
+ "eval_runtime": 48.9592,
312
+ "eval_samples_per_second": 18.178,
313
+ "eval_steps_per_second": 2.288,
314
+ "eval_wer": 0.7534722222222222,
315
  "step": 19440
316
  },
317
  {
318
  "epoch": 19.0,
319
+ "grad_norm": 1.5726815462112427,
320
+ "learning_rate": 0.00045564814814814817,
321
+ "loss": 0.3022,
322
  "step": 20520
323
  },
324
  {
325
  "epoch": 19.0,
326
+ "eval_cer": 0.2886923874919725,
327
+ "eval_loss": 0.9869930148124695,
328
+ "eval_runtime": 49.3541,
329
+ "eval_samples_per_second": 18.033,
330
+ "eval_steps_per_second": 2.269,
331
+ "eval_wer": 0.7231691919191919,
332
  "step": 20520
333
  },
334
  {
335
  "epoch": 20.0,
336
+ "grad_norm": 0.41919103264808655,
337
+ "learning_rate": 0.00044454732510288065,
338
+ "loss": 0.2825,
339
  "step": 21600
340
  },
341
  {
342
  "epoch": 20.0,
343
+ "eval_cer": 0.28056612162228917,
344
+ "eval_loss": 0.9183225035667419,
345
+ "eval_runtime": 49.2359,
346
+ "eval_samples_per_second": 18.076,
347
+ "eval_steps_per_second": 2.275,
348
+ "eval_wer": 0.686395202020202,
349
  "step": 21600
350
  },
351
  {
352
  "epoch": 21.0,
353
+ "grad_norm": 12.236234664916992,
354
  "learning_rate": 0.0004334362139917696,
355
+ "loss": 0.2706,
356
  "step": 22680
357
  },
358
  {
359
  "epoch": 21.0,
360
+ "eval_cer": 0.28604949859210593,
361
+ "eval_loss": 0.9366316795349121,
362
+ "eval_runtime": 49.1391,
363
+ "eval_samples_per_second": 18.112,
364
+ "eval_steps_per_second": 2.279,
365
+ "eval_wer": 0.6811868686868687,
366
  "step": 22680
367
  },
368
  {
369
  "epoch": 22.0,
370
+ "grad_norm": 4.797195911407471,
371
+ "learning_rate": 0.0004223353909465021,
372
+ "loss": 0.2507,
373
  "step": 23760
374
  },
375
  {
376
  "epoch": 22.0,
377
+ "eval_cer": 0.2608062046139406,
378
+ "eval_loss": 0.9585080146789551,
379
+ "eval_runtime": 48.7093,
380
+ "eval_samples_per_second": 18.272,
381
+ "eval_steps_per_second": 2.299,
382
+ "eval_wer": 0.6941287878787878,
383
  "step": 23760
384
  },
385
  {
386
  "epoch": 23.0,
387
+ "grad_norm": 4.625443935394287,
388
  "learning_rate": 0.00041122427983539094,
389
+ "loss": 0.237,
390
  "step": 24840
391
  },
392
  {
393
  "epoch": 23.0,
394
+ "eval_cer": 0.28024502297090353,
395
+ "eval_loss": 1.010016918182373,
396
+ "eval_runtime": 50.1358,
397
+ "eval_samples_per_second": 17.752,
398
+ "eval_steps_per_second": 2.234,
399
+ "eval_wer": 0.6797664141414141,
400
  "step": 24840
401
  },
402
  {
403
  "epoch": 24.0,
404
+ "grad_norm": 0.49481087923049927,
405
  "learning_rate": 0.00040011316872427984,
406
+ "loss": 0.2298,
407
  "step": 25920
408
  },
409
  {
410
  "epoch": 24.0,
411
+ "eval_cer": 0.24492417131848046,
412
+ "eval_loss": 0.9184597730636597,
413
+ "eval_runtime": 48.7455,
414
+ "eval_samples_per_second": 18.258,
415
+ "eval_steps_per_second": 2.298,
416
+ "eval_wer": 0.6349431818181818,
417
  "step": 25920
418
  },
419
  {
420
  "epoch": 25.0,
421
+ "grad_norm": 1.7336276769638062,
422
+ "learning_rate": 0.0003890123456790123,
423
+ "loss": 0.221,
424
  "step": 27000
425
  },
426
  {
427
  "epoch": 25.0,
428
+ "eval_cer": 0.27846663044015213,
429
+ "eval_loss": 0.9352790713310242,
430
+ "eval_runtime": 48.8906,
431
+ "eval_samples_per_second": 18.204,
432
+ "eval_steps_per_second": 2.291,
433
+ "eval_wer": 0.6579861111111112,
434
  "step": 27000
435
  },
436
  {
437
  "epoch": 26.0,
438
+ "grad_norm": 0.02212027832865715,
439
+ "learning_rate": 0.0003779012345679013,
440
+ "loss": 0.2052,
441
  "step": 28080
442
  },
443
  {
444
  "epoch": 26.0,
445
+ "eval_cer": 0.2507039470434224,
446
+ "eval_loss": 0.8651528358459473,
447
+ "eval_runtime": 49.0769,
448
+ "eval_samples_per_second": 18.135,
449
+ "eval_steps_per_second": 2.282,
450
+ "eval_wer": 0.6493055555555556,
451
  "step": 28080
452
  },
453
  {
454
  "epoch": 27.0,
455
+ "grad_norm": 2.215277910232544,
456
+ "learning_rate": 0.0003667901234567901,
457
+ "loss": 0.1928,
458
  "step": 29160
459
  },
460
  {
461
  "epoch": 27.0,
462
+ "eval_cer": 0.2630785950699007,
463
+ "eval_loss": 0.8858852386474609,
464
+ "eval_runtime": 49.657,
465
+ "eval_samples_per_second": 17.923,
466
+ "eval_steps_per_second": 2.255,
467
+ "eval_wer": 0.6775568181818182,
468
  "step": 29160
469
  },
470
  {
471
  "epoch": 28.0,
472
+ "grad_norm": 0.10988181829452515,
473
+ "learning_rate": 0.000355679012345679,
474
+ "loss": 0.1889,
475
  "step": 30240
476
  },
477
  {
478
  "epoch": 28.0,
479
+ "eval_cer": 0.2666353801314034,
480
+ "eval_loss": 0.9239539504051208,
481
+ "eval_runtime": 49.2302,
482
+ "eval_samples_per_second": 18.078,
483
+ "eval_steps_per_second": 2.275,
484
+ "eval_wer": 0.6636679292929293,
485
  "step": 30240
486
  },
487
  {
488
  "epoch": 29.0,
489
+ "grad_norm": 0.5829525589942932,
490
+ "learning_rate": 0.0003445781893004115,
491
+ "loss": 0.1771,
492
  "step": 31320
493
  },
494
  {
495
  "epoch": 29.0,
496
+ "eval_cer": 0.24934545274909845,
497
+ "eval_loss": 0.9042806625366211,
498
+ "eval_runtime": 52.6225,
499
+ "eval_samples_per_second": 16.913,
500
+ "eval_steps_per_second": 2.128,
501
+ "eval_wer": 0.6256313131313131,
502
  "step": 31320
503
  },
504
  {
505
  "epoch": 30.0,
506
+ "grad_norm": 3.2479238510131836,
507
+ "learning_rate": 0.00033346707818930046,
508
+ "loss": 0.163,
509
  "step": 32400
510
  },
511
  {
512
  "epoch": 30.0,
513
+ "eval_cer": 0.26213999901200413,
514
+ "eval_loss": 0.9130964875221252,
515
+ "eval_runtime": 50.9345,
516
+ "eval_samples_per_second": 17.473,
517
+ "eval_steps_per_second": 2.199,
518
+ "eval_wer": 0.6504103535353535,
519
  "step": 32400
520
  },
521
  {
522
  "epoch": 31.0,
523
+ "grad_norm": 2.047846555709839,
524
+ "learning_rate": 0.0003223559670781893,
525
+ "loss": 0.1603,
526
  "step": 33480
527
  },
528
  {
529
  "epoch": 31.0,
530
+ "eval_cer": 0.24055228968038334,
531
+ "eval_loss": 0.8102329969406128,
532
+ "eval_runtime": 50.6115,
533
+ "eval_samples_per_second": 17.585,
534
+ "eval_steps_per_second": 2.213,
535
+ "eval_wer": 0.6319444444444444,
536
  "step": 33480
537
  },
538
  {
539
  "epoch": 32.0,
540
+ "grad_norm": 0.3893296420574188,
541
  "learning_rate": 0.0003112448559670782,
542
+ "loss": 0.1447,
543
  "step": 34560
544
  },
545
  {
546
  "epoch": 32.0,
547
+ "eval_cer": 0.2447512720446574,
548
+ "eval_loss": 0.9245155453681946,
549
+ "eval_runtime": 51.908,
550
+ "eval_samples_per_second": 17.146,
551
+ "eval_steps_per_second": 2.158,
552
+ "eval_wer": 0.6336805555555556,
553
  "step": 34560
554
  },
555
  {
556
  "epoch": 33.0,
557
+ "grad_norm": 2.6302273273468018,
558
+ "learning_rate": 0.0003001440329218107,
559
+ "loss": 0.1418,
560
  "step": 35640
561
  },
562
  {
563
  "epoch": 33.0,
564
+ "eval_cer": 0.25300103739564295,
565
+ "eval_loss": 0.9590283632278442,
566
+ "eval_runtime": 52.0031,
567
+ "eval_samples_per_second": 17.114,
568
+ "eval_steps_per_second": 2.154,
569
+ "eval_wer": 0.6235795454545454,
570
  "step": 35640
571
  },
572
  {
573
  "epoch": 34.0,
574
+ "grad_norm": 3.61879301071167,
575
+ "learning_rate": 0.0002890432098765432,
576
+ "loss": 0.1415,
577
  "step": 36720
578
  },
579
  {
580
  "epoch": 34.0,
581
+ "eval_cer": 0.2578916168552092,
582
+ "eval_loss": 0.92754727602005,
583
+ "eval_runtime": 52.0318,
584
+ "eval_samples_per_second": 17.105,
585
+ "eval_steps_per_second": 2.153,
586
+ "eval_wer": 0.634469696969697,
587
  "step": 36720
588
  },
589
  {
590
  "epoch": 35.0,
591
+ "grad_norm": 6.908621311187744,
592
+ "learning_rate": 0.00027793209876543213,
593
+ "loss": 0.1313,
594
  "step": 37800
595
  },
596
  {
597
  "epoch": 35.0,
598
+ "eval_cer": 0.24981475077804674,
599
+ "eval_loss": 0.8644362688064575,
600
+ "eval_runtime": 53.8225,
601
+ "eval_samples_per_second": 16.536,
602
+ "eval_steps_per_second": 2.081,
603
+ "eval_wer": 0.6279987373737373,
604
  "step": 37800
605
  },
606
  {
607
  "epoch": 36.0,
608
+ "grad_norm": 2.5687201023101807,
609
  "learning_rate": 0.000266820987654321,
610
+ "loss": 0.1285,
611
  "step": 38880
612
  },
613
  {
614
  "epoch": 36.0,
615
+ "eval_cer": 0.26505458677073557,
616
+ "eval_loss": 0.9070570468902588,
617
+ "eval_runtime": 55.322,
618
+ "eval_samples_per_second": 16.088,
619
+ "eval_steps_per_second": 2.025,
620
+ "eval_wer": 0.625,
621
  "step": 38880
622
  },
623
  {
624
  "epoch": 37.0,
625
+ "grad_norm": 0.1792680323123932,
626
  "learning_rate": 0.0002557098765432099,
627
+ "loss": 0.1204,
628
  "step": 39960
629
  },
630
  {
631
  "epoch": 37.0,
632
+ "eval_cer": 0.2386503976683298,
633
+ "eval_loss": 0.8658037185668945,
634
+ "eval_runtime": 54.276,
635
+ "eval_samples_per_second": 16.398,
636
+ "eval_steps_per_second": 2.064,
637
+ "eval_wer": 0.6092171717171717,
638
  "step": 39960
639
  },
640
  {
641
  "epoch": 38.0,
642
+ "grad_norm": 0.05945800244808197,
643
  "learning_rate": 0.0002445987654320988,
644
+ "loss": 0.1116,
645
  "step": 41040
646
  },
647
  {
648
  "epoch": 38.0,
649
+ "eval_cer": 0.24588746727263747,
650
+ "eval_loss": 0.8684060573577881,
651
+ "eval_runtime": 55.9431,
652
+ "eval_samples_per_second": 15.909,
653
+ "eval_steps_per_second": 2.002,
654
+ "eval_wer": 0.6267361111111112,
655
  "step": 41040
656
  },
657
  {
658
  "epoch": 39.0,
659
+ "grad_norm": 2.164262056350708,
660
+ "learning_rate": 0.00023349794238683127,
661
+ "loss": 0.102,
662
  "step": 42120
663
  },
664
  {
665
  "epoch": 39.0,
666
+ "eval_cer": 0.24102158770933163,
667
+ "eval_loss": 0.9792320728302002,
668
+ "eval_runtime": 54.7942,
669
+ "eval_samples_per_second": 16.243,
670
+ "eval_steps_per_second": 2.044,
671
+ "eval_wer": 0.6245265151515151,
672
  "step": 42120
673
  },
674
  {
675
  "epoch": 40.0,
676
+ "grad_norm": 7.841192722320557,
677
  "learning_rate": 0.00022238683127572017,
678
+ "loss": 0.0966,
679
  "step": 43200
680
  },
681
  {
682
  "epoch": 40.0,
683
+ "eval_cer": 0.2466037642641901,
684
+ "eval_loss": 0.8880752325057983,
685
+ "eval_runtime": 57.0632,
686
+ "eval_samples_per_second": 15.597,
687
+ "eval_steps_per_second": 1.963,
688
+ "eval_wer": 0.6163194444444444,
689
  "step": 43200
690
  },
691
  {
692
  "epoch": 41.0,
693
+ "grad_norm": 0.5480403304100037,
694
+ "learning_rate": 0.00021128600823045268,
695
+ "loss": 0.0934,
696
  "step": 44280
697
  },
698
  {
699
  "epoch": 41.0,
700
+ "eval_cer": 0.23398211727510745,
701
+ "eval_loss": 0.8669174909591675,
702
+ "eval_runtime": 56.5233,
703
+ "eval_samples_per_second": 15.746,
704
+ "eval_steps_per_second": 1.981,
705
+ "eval_wer": 0.5970643939393939,
706
  "step": 44280
707
  },
708
  {
709
  "epoch": 42.0,
710
+ "grad_norm": 2.996035099029541,
711
  "learning_rate": 0.00020017489711934155,
712
+ "loss": 0.0847,
713
  "step": 45360
714
  },
715
  {
716
  "epoch": 42.0,
717
+ "eval_cer": 0.2370696043076619,
718
+ "eval_loss": 0.9717867970466614,
719
+ "eval_runtime": 55.4728,
720
+ "eval_samples_per_second": 16.044,
721
+ "eval_steps_per_second": 2.019,
722
+ "eval_wer": 0.6207386363636364,
723
  "step": 45360
724
  },
725
  {
726
  "epoch": 43.0,
727
+ "grad_norm": 0.41690441966056824,
728
  "learning_rate": 0.00018907407407407406,
729
+ "loss": 0.0828,
730
  "step": 46440
731
  },
732
  {
733
  "epoch": 43.0,
734
+ "eval_cer": 0.2392925949711011,
735
+ "eval_loss": 0.957336962223053,
736
+ "eval_runtime": 54.9772,
737
+ "eval_samples_per_second": 16.189,
738
+ "eval_steps_per_second": 2.037,
739
+ "eval_wer": 0.6223169191919192,
740
  "step": 46440
741
  },
742
  {
743
  "epoch": 44.0,
744
+ "grad_norm": 0.07533986121416092,
745
+ "learning_rate": 0.0001779732510288066,
746
+ "loss": 0.0727,
747
  "step": 47520
748
  },
749
  {
750
  "epoch": 44.0,
751
+ "eval_cer": 0.2357605098058588,
752
+ "eval_loss": 0.9871988892555237,
753
+ "eval_runtime": 57.6886,
754
+ "eval_samples_per_second": 15.428,
755
+ "eval_steps_per_second": 1.941,
756
+ "eval_wer": 0.6096906565656566,
757
  "step": 47520
758
  },
759
  {
760
  "epoch": 45.0,
761
+ "grad_norm": 0.7598063945770264,
762
+ "learning_rate": 0.00016686213991769547,
763
+ "loss": 0.0701,
764
  "step": 48600
765
  },
766
  {
767
  "epoch": 45.0,
768
+ "eval_cer": 0.24457837277083436,
769
+ "eval_loss": 0.9421331882476807,
770
+ "eval_runtime": 55.063,
771
+ "eval_samples_per_second": 16.163,
772
+ "eval_steps_per_second": 2.034,
773
+ "eval_wer": 0.6115845959595959,
774
  "step": 48600
775
  },
776
  {
777
  "epoch": 46.0,
778
+ "grad_norm": 0.43303415179252625,
779
  "learning_rate": 0.00015575102880658438,
780
+ "loss": 0.0648,
781
  "step": 49680
782
  },
783
  {
784
  "epoch": 46.0,
785
+ "eval_cer": 0.24672726374549228,
786
+ "eval_loss": 0.9590614438056946,
787
+ "eval_runtime": 57.1789,
788
+ "eval_samples_per_second": 15.565,
789
+ "eval_steps_per_second": 1.959,
790
+ "eval_wer": 0.6043244949494949,
791
  "step": 49680
792
  },
793
  {
794
  "epoch": 47.0,
795
+ "grad_norm": 6.171388626098633,
796
  "learning_rate": 0.00014463991769547325,
797
+ "loss": 0.0634,
798
  "step": 50760
799
  },
800
  {
801
  "epoch": 47.0,
802
+ "eval_cer": 0.23551351084325445,
803
+ "eval_loss": 0.9990620017051697,
804
+ "eval_runtime": 55.5622,
805
+ "eval_samples_per_second": 16.018,
806
+ "eval_steps_per_second": 2.016,
807
+ "eval_wer": 0.6109532828282829,
808
  "step": 50760
809
  },
810
  {
811
  "epoch": 48.0,
812
+ "grad_norm": 0.05001814663410187,
813
+ "learning_rate": 0.0001335390946502058,
814
+ "loss": 0.0573,
815
  "step": 51840
816
  },
817
  {
818
  "epoch": 48.0,
819
+ "eval_cer": 0.23452551499283703,
820
+ "eval_loss": 0.9873119592666626,
821
+ "eval_runtime": 55.0833,
822
+ "eval_samples_per_second": 16.157,
823
+ "eval_steps_per_second": 2.033,
824
+ "eval_wer": 0.6054292929292929,
825
  "step": 51840
826
  },
827
  {
828
  "epoch": 49.0,
829
+ "grad_norm": 3.651003360748291,
830
+ "learning_rate": 0.00012242798353909466,
831
+ "loss": 0.0527,
832
  "step": 52920
833
  },
834
  {
835
  "epoch": 49.0,
836
+ "eval_cer": 0.23247542360322088,
837
+ "eval_loss": 0.9885514974594116,
838
+ "eval_runtime": 52.5162,
839
+ "eval_samples_per_second": 16.947,
840
+ "eval_steps_per_second": 2.133,
841
+ "eval_wer": 0.5935921717171717,
842
  "step": 52920
843
  },
844
  {
845
  "epoch": 50.0,
846
+ "grad_norm": 3.5055177211761475,
847
  "learning_rate": 0.00011131687242798354,
848
+ "loss": 0.0506,
849
  "step": 54000
850
  },
851
  {
852
  "epoch": 50.0,
853
+ "eval_cer": 0.22867163957911377,
854
+ "eval_loss": 1.0199133157730103,
855
+ "eval_runtime": 51.406,
856
+ "eval_samples_per_second": 17.313,
857
+ "eval_steps_per_second": 2.179,
858
+ "eval_wer": 0.5940656565656566,
859
  "step": 54000
860
  },
861
  {
862
  "epoch": 51.0,
863
+ "grad_norm": 0.08695941418409348,
864
+ "learning_rate": 0.00010020576131687243,
865
+ "loss": 0.0486,
866
  "step": 55080
867
  },
868
  {
869
  "epoch": 51.0,
870
+ "eval_cer": 0.22634984933063282,
871
+ "eval_loss": 1.0691256523132324,
872
+ "eval_runtime": 54.2523,
873
+ "eval_samples_per_second": 16.405,
874
+ "eval_steps_per_second": 2.064,
875
+ "eval_wer": 0.5880681818181818,
876
  "step": 55080
877
  },
878
  {
879
  "epoch": 52.0,
880
+ "grad_norm": 0.4256766438484192,
881
+ "learning_rate": 8.909465020576133e-05,
882
+ "loss": 0.0447,
883
  "step": 56160
884
  },
885
  {
886
  "epoch": 52.0,
887
+ "eval_cer": 0.22963493553327077,
888
+ "eval_loss": 1.0140999555587769,
889
+ "eval_runtime": 58.925,
890
+ "eval_samples_per_second": 15.104,
891
+ "eval_steps_per_second": 1.901,
892
+ "eval_wer": 0.5893308080808081,
893
  "step": 56160
894
  },
895
  {
896
  "epoch": 53.0,
897
+ "grad_norm": 3.884925365447998,
898
  "learning_rate": 7.799382716049382e-05,
899
+ "loss": 0.0419,
900
  "step": 57240
901
  },
902
  {
903
  "epoch": 53.0,
904
+ "eval_cer": 0.2279306426913007,
905
+ "eval_loss": 1.0658098459243774,
906
+ "eval_runtime": 50.8901,
907
+ "eval_samples_per_second": 17.489,
908
+ "eval_steps_per_second": 2.201,
909
+ "eval_wer": 0.5872790404040404,
910
  "step": 57240
911
  },
912
  {
913
  "epoch": 54.0,
914
+ "grad_norm": 0.5678676962852478,
915
+ "learning_rate": 6.690329218106995e-05,
916
+ "loss": 0.0376,
917
  "step": 58320
918
  },
919
  {
920
  "epoch": 54.0,
921
+ "eval_cer": 0.2253618534802154,
922
+ "eval_loss": 1.144079327583313,
923
+ "eval_runtime": 52.3564,
924
+ "eval_samples_per_second": 16.999,
925
+ "eval_steps_per_second": 2.139,
926
+ "eval_wer": 0.5888573232323232,
927
  "step": 58320
928
  },
929
  {
930
  "epoch": 55.0,
931
+ "grad_norm": 1.0211379528045654,
932
+ "learning_rate": 5.579218106995885e-05,
933
+ "loss": 0.0355,
934
  "step": 59400
935
  },
936
  {
937
  "epoch": 55.0,
938
+ "eval_cer": 0.22486785555500666,
939
+ "eval_loss": 1.146174430847168,
940
+ "eval_runtime": 50.8316,
941
+ "eval_samples_per_second": 17.509,
942
+ "eval_steps_per_second": 2.203,
943
+ "eval_wer": 0.5880681818181818,
944
  "step": 59400
945
  },
946
  {
947
  "epoch": 56.0,
948
+ "grad_norm": 0.02778603509068489,
949
+ "learning_rate": 4.468106995884774e-05,
950
+ "loss": 0.0335,
951
  "step": 60480
952
  },
953
  {
954
  "epoch": 56.0,
955
+ "eval_cer": 0.22442325742231883,
956
+ "eval_loss": 1.1712491512298584,
957
+ "eval_runtime": 51.7561,
958
+ "eval_samples_per_second": 17.196,
959
+ "eval_steps_per_second": 2.164,
960
+ "eval_wer": 0.5860164141414141,
961
  "step": 60480
962
  },
963
  {
964
  "epoch": 57.0,
965
+ "grad_norm": 0.13397055864334106,
966
+ "learning_rate": 3.3569958847736626e-05,
967
+ "loss": 0.0296,
968
  "step": 61560
969
  },
970
  {
971
  "epoch": 57.0,
972
+ "eval_cer": 0.22180506841871264,
973
+ "eval_loss": 1.162169337272644,
974
+ "eval_runtime": 51.0452,
975
+ "eval_samples_per_second": 17.436,
976
+ "eval_steps_per_second": 2.194,
977
+ "eval_wer": 0.5785984848484849,
978
  "step": 61560
979
  },
980
  {
981
  "epoch": 58.0,
982
+ "grad_norm": 0.001944132731296122,
983
+ "learning_rate": 2.246913580246914e-05,
984
+ "loss": 0.0301,
985
  "step": 62640
986
  },
987
  {
988
  "epoch": 58.0,
989
+ "eval_cer": 0.22350936126068272,
990
+ "eval_loss": 1.170377492904663,
991
+ "eval_runtime": 50.364,
992
+ "eval_samples_per_second": 17.671,
993
+ "eval_steps_per_second": 2.224,
994
+ "eval_wer": 0.5839646464646465,
995
  "step": 62640
996
  },
997
  {
998
  "epoch": 59.0,
999
+ "grad_norm": 0.18270032107830048,
1000
+ "learning_rate": 1.1358024691358025e-05,
1001
+ "loss": 0.0283,
1002
  "step": 63720
1003
  },
1004
  {
1005
  "epoch": 59.0,
1006
+ "eval_cer": 0.22133577038976437,
1007
+ "eval_loss": 1.1973356008529663,
1008
+ "eval_runtime": 50.9914,
1009
+ "eval_samples_per_second": 17.454,
1010
+ "eval_steps_per_second": 2.196,
1011
+ "eval_wer": 0.5804924242424242,
1012
  "step": 63720
1013
  },
1014
  {
1015
  "epoch": 60.0,
1016
+ "grad_norm": 0.00017149873019661754,
1017
+ "learning_rate": 2.469135802469136e-07,
1018
+ "loss": 0.0245,
1019
  "step": 64800
1020
  },
1021
  {
1022
  "epoch": 60.0,
1023
+ "eval_cer": 0.2198290767178778,
1024
+ "eval_loss": 1.1907662153244019,
1025
+ "eval_runtime": 51.7658,
1026
+ "eval_samples_per_second": 17.193,
1027
+ "eval_steps_per_second": 2.164,
1028
+ "eval_wer": 0.5762310606060606,
1029
  "step": 64800
1030
  },
1031
  {
1032
  "epoch": 60.0,
1033
  "step": 64800,
1034
  "total_flos": 1.8440987587856836e+20,
1035
+ "train_loss": 0.1667554270485301,
1036
+ "train_runtime": 81955.137,
1037
+ "train_samples_per_second": 12.641,
1038
+ "train_steps_per_second": 0.791
1039
  }
1040
  ],
1041
  "logging_steps": 500,