alinerodrigues commited on
Commit
167ee21
·
1 Parent(s): 9878e25

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +16 -0
  2. eval_results.json +10 -0
  3. train_results.json +9 -0
  4. trainer_state.json +1413 -0
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 128.0,
3
+ "eval_cer": 0.17881710646041857,
4
+ "eval_loss": 0.5552828907966614,
5
+ "eval_runtime": 52.7828,
6
+ "eval_samples": 542,
7
+ "eval_samples_per_second": 10.269,
8
+ "eval_steps_per_second": 1.288,
9
+ "eval_wer": 0.3466070702966274,
10
+ "total_flos": 1.5995217836155625e+19,
11
+ "train_loss": 1.3990157055003303,
12
+ "train_runtime": 26333.2616,
13
+ "train_samples": 448,
14
+ "train_samples_per_second": 2.552,
15
+ "train_steps_per_second": 0.08
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 128.0,
3
+ "eval_cer": 0.17881710646041857,
4
+ "eval_loss": 0.5552828907966614,
5
+ "eval_runtime": 52.7828,
6
+ "eval_samples": 542,
7
+ "eval_samples_per_second": 10.269,
8
+ "eval_steps_per_second": 1.288,
9
+ "eval_wer": 0.3466070702966274
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 128.0,
3
+ "total_flos": 1.5995217836155625e+19,
4
+ "train_loss": 1.3990157055003303,
5
+ "train_runtime": 26333.2616,
6
+ "train_samples": 448,
7
+ "train_samples_per_second": 2.552,
8
+ "train_steps_per_second": 0.08
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5552828907966614,
3
+ "best_model_checkpoint": "wav2vec2-large-xlsr-coraa-exp-9/checkpoint-1512",
4
+ "epoch": 128.0,
5
+ "global_step": 1792,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 3e-05,
13
+ "loss": 37.5508,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_cer": 0.9618562329390354,
19
+ "eval_loss": 23.137550354003906,
20
+ "eval_runtime": 52.9452,
21
+ "eval_samples_per_second": 10.237,
22
+ "eval_steps_per_second": 1.284,
23
+ "eval_wer": 1.0,
24
+ "step": 14
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "eval_cer": 0.9618562329390354,
29
+ "eval_loss": 6.50360107421875,
30
+ "eval_runtime": 52.53,
31
+ "eval_samples_per_second": 10.318,
32
+ "eval_steps_per_second": 1.294,
33
+ "eval_wer": 1.0,
34
+ "step": 28
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_cer": 0.9618562329390354,
39
+ "eval_loss": 4.391931056976318,
40
+ "eval_runtime": 52.983,
41
+ "eval_samples_per_second": 10.23,
42
+ "eval_steps_per_second": 1.283,
43
+ "eval_wer": 1.0,
44
+ "step": 42
45
+ },
46
+ {
47
+ "epoch": 4.0,
48
+ "eval_cer": 0.9618562329390354,
49
+ "eval_loss": 3.944124221801758,
50
+ "eval_runtime": 52.4812,
51
+ "eval_samples_per_second": 10.328,
52
+ "eval_steps_per_second": 1.296,
53
+ "eval_wer": 1.0,
54
+ "step": 56
55
+ },
56
+ {
57
+ "epoch": 5.0,
58
+ "eval_cer": 0.9618562329390354,
59
+ "eval_loss": 3.730649709701538,
60
+ "eval_runtime": 52.6687,
61
+ "eval_samples_per_second": 10.291,
62
+ "eval_steps_per_second": 1.291,
63
+ "eval_wer": 1.0,
64
+ "step": 70
65
+ },
66
+ {
67
+ "epoch": 6.0,
68
+ "eval_cer": 0.9618562329390354,
69
+ "eval_loss": 3.5761733055114746,
70
+ "eval_runtime": 52.4991,
71
+ "eval_samples_per_second": 10.324,
72
+ "eval_steps_per_second": 1.295,
73
+ "eval_wer": 1.0,
74
+ "step": 84
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_cer": 0.9618562329390354,
79
+ "eval_loss": 3.4128713607788086,
80
+ "eval_runtime": 52.9491,
81
+ "eval_samples_per_second": 10.236,
82
+ "eval_steps_per_second": 1.284,
83
+ "eval_wer": 1.0,
84
+ "step": 98
85
+ },
86
+ {
87
+ "epoch": 7.14,
88
+ "learning_rate": 2.8614285714285714e-05,
89
+ "loss": 8.6902,
90
+ "step": 100
91
+ },
92
+ {
93
+ "epoch": 8.0,
94
+ "eval_cer": 0.9618562329390354,
95
+ "eval_loss": 3.285918951034546,
96
+ "eval_runtime": 52.5221,
97
+ "eval_samples_per_second": 10.319,
98
+ "eval_steps_per_second": 1.295,
99
+ "eval_wer": 1.0,
100
+ "step": 112
101
+ },
102
+ {
103
+ "epoch": 9.0,
104
+ "eval_cer": 0.9618562329390354,
105
+ "eval_loss": 3.219179391860962,
106
+ "eval_runtime": 53.0718,
107
+ "eval_samples_per_second": 10.213,
108
+ "eval_steps_per_second": 1.281,
109
+ "eval_wer": 1.0,
110
+ "step": 126
111
+ },
112
+ {
113
+ "epoch": 10.0,
114
+ "eval_cer": 0.9618562329390354,
115
+ "eval_loss": 3.1479437351226807,
116
+ "eval_runtime": 52.485,
117
+ "eval_samples_per_second": 10.327,
118
+ "eval_steps_per_second": 1.296,
119
+ "eval_wer": 1.0,
120
+ "step": 140
121
+ },
122
+ {
123
+ "epoch": 11.0,
124
+ "eval_cer": 0.9618562329390354,
125
+ "eval_loss": 3.106292963027954,
126
+ "eval_runtime": 52.586,
127
+ "eval_samples_per_second": 10.307,
128
+ "eval_steps_per_second": 1.293,
129
+ "eval_wer": 1.0,
130
+ "step": 154
131
+ },
132
+ {
133
+ "epoch": 12.0,
134
+ "eval_cer": 0.9618562329390354,
135
+ "eval_loss": 3.0896878242492676,
136
+ "eval_runtime": 52.4874,
137
+ "eval_samples_per_second": 10.326,
138
+ "eval_steps_per_second": 1.296,
139
+ "eval_wer": 1.0,
140
+ "step": 168
141
+ },
142
+ {
143
+ "epoch": 13.0,
144
+ "eval_cer": 0.9618562329390354,
145
+ "eval_loss": 3.084855794906616,
146
+ "eval_runtime": 52.6206,
147
+ "eval_samples_per_second": 10.3,
148
+ "eval_steps_per_second": 1.292,
149
+ "eval_wer": 1.0,
150
+ "step": 182
151
+ },
152
+ {
153
+ "epoch": 14.0,
154
+ "eval_cer": 0.9618562329390354,
155
+ "eval_loss": 3.048473596572876,
156
+ "eval_runtime": 52.5487,
157
+ "eval_samples_per_second": 10.314,
158
+ "eval_steps_per_second": 1.294,
159
+ "eval_wer": 1.0,
160
+ "step": 196
161
+ },
162
+ {
163
+ "epoch": 14.29,
164
+ "learning_rate": 2.7185714285714287e-05,
165
+ "loss": 3.059,
166
+ "step": 200
167
+ },
168
+ {
169
+ "epoch": 15.0,
170
+ "eval_cer": 0.9618562329390354,
171
+ "eval_loss": 3.0495524406433105,
172
+ "eval_runtime": 52.5935,
173
+ "eval_samples_per_second": 10.305,
174
+ "eval_steps_per_second": 1.293,
175
+ "eval_wer": 1.0,
176
+ "step": 210
177
+ },
178
+ {
179
+ "epoch": 16.0,
180
+ "eval_cer": 0.9618562329390354,
181
+ "eval_loss": 3.0509705543518066,
182
+ "eval_runtime": 52.5135,
183
+ "eval_samples_per_second": 10.321,
184
+ "eval_steps_per_second": 1.295,
185
+ "eval_wer": 1.0,
186
+ "step": 224
187
+ },
188
+ {
189
+ "epoch": 17.0,
190
+ "eval_cer": 0.9618562329390354,
191
+ "eval_loss": 3.042844533920288,
192
+ "eval_runtime": 52.5458,
193
+ "eval_samples_per_second": 10.315,
194
+ "eval_steps_per_second": 1.294,
195
+ "eval_wer": 1.0,
196
+ "step": 238
197
+ },
198
+ {
199
+ "epoch": 18.0,
200
+ "eval_cer": 0.9618562329390354,
201
+ "eval_loss": 3.033111572265625,
202
+ "eval_runtime": 52.5883,
203
+ "eval_samples_per_second": 10.306,
204
+ "eval_steps_per_second": 1.293,
205
+ "eval_wer": 1.0,
206
+ "step": 252
207
+ },
208
+ {
209
+ "epoch": 19.0,
210
+ "eval_cer": 0.9618562329390354,
211
+ "eval_loss": 3.035349130630493,
212
+ "eval_runtime": 52.5405,
213
+ "eval_samples_per_second": 10.316,
214
+ "eval_steps_per_second": 1.294,
215
+ "eval_wer": 1.0,
216
+ "step": 266
217
+ },
218
+ {
219
+ "epoch": 20.0,
220
+ "eval_cer": 0.9618562329390354,
221
+ "eval_loss": 3.0217459201812744,
222
+ "eval_runtime": 52.6413,
223
+ "eval_samples_per_second": 10.296,
224
+ "eval_steps_per_second": 1.292,
225
+ "eval_wer": 1.0,
226
+ "step": 280
227
+ },
228
+ {
229
+ "epoch": 21.0,
230
+ "eval_cer": 0.9618562329390354,
231
+ "eval_loss": 3.0107431411743164,
232
+ "eval_runtime": 52.5958,
233
+ "eval_samples_per_second": 10.305,
234
+ "eval_steps_per_second": 1.293,
235
+ "eval_wer": 1.0,
236
+ "step": 294
237
+ },
238
+ {
239
+ "epoch": 21.43,
240
+ "learning_rate": 2.5757142857142857e-05,
241
+ "loss": 2.9492,
242
+ "step": 300
243
+ },
244
+ {
245
+ "epoch": 22.0,
246
+ "eval_cer": 0.9618562329390354,
247
+ "eval_loss": 3.0068299770355225,
248
+ "eval_runtime": 52.8053,
249
+ "eval_samples_per_second": 10.264,
250
+ "eval_steps_per_second": 1.288,
251
+ "eval_wer": 1.0,
252
+ "step": 308
253
+ },
254
+ {
255
+ "epoch": 23.0,
256
+ "eval_cer": 0.9618562329390354,
257
+ "eval_loss": 2.9949989318847656,
258
+ "eval_runtime": 52.5717,
259
+ "eval_samples_per_second": 10.31,
260
+ "eval_steps_per_second": 1.293,
261
+ "eval_wer": 1.0,
262
+ "step": 322
263
+ },
264
+ {
265
+ "epoch": 24.0,
266
+ "eval_cer": 0.9618562329390354,
267
+ "eval_loss": 2.989611864089966,
268
+ "eval_runtime": 53.0076,
269
+ "eval_samples_per_second": 10.225,
270
+ "eval_steps_per_second": 1.283,
271
+ "eval_wer": 1.0,
272
+ "step": 336
273
+ },
274
+ {
275
+ "epoch": 25.0,
276
+ "eval_cer": 0.9618562329390354,
277
+ "eval_loss": 2.9687271118164062,
278
+ "eval_runtime": 52.6125,
279
+ "eval_samples_per_second": 10.302,
280
+ "eval_steps_per_second": 1.292,
281
+ "eval_wer": 1.0,
282
+ "step": 350
283
+ },
284
+ {
285
+ "epoch": 26.0,
286
+ "eval_cer": 0.9618562329390354,
287
+ "eval_loss": 2.947374105453491,
288
+ "eval_runtime": 52.5637,
289
+ "eval_samples_per_second": 10.311,
290
+ "eval_steps_per_second": 1.294,
291
+ "eval_wer": 1.0,
292
+ "step": 364
293
+ },
294
+ {
295
+ "epoch": 27.0,
296
+ "eval_cer": 0.9618562329390354,
297
+ "eval_loss": 2.9414024353027344,
298
+ "eval_runtime": 52.648,
299
+ "eval_samples_per_second": 10.295,
300
+ "eval_steps_per_second": 1.292,
301
+ "eval_wer": 1.0,
302
+ "step": 378
303
+ },
304
+ {
305
+ "epoch": 28.0,
306
+ "eval_cer": 0.9618562329390354,
307
+ "eval_loss": 2.84250807762146,
308
+ "eval_runtime": 52.5712,
309
+ "eval_samples_per_second": 10.31,
310
+ "eval_steps_per_second": 1.293,
311
+ "eval_wer": 1.0,
312
+ "step": 392
313
+ },
314
+ {
315
+ "epoch": 28.57,
316
+ "learning_rate": 2.432857142857143e-05,
317
+ "loss": 2.8892,
318
+ "step": 400
319
+ },
320
+ {
321
+ "epoch": 29.0,
322
+ "eval_cer": 0.9618562329390354,
323
+ "eval_loss": 2.7813332080841064,
324
+ "eval_runtime": 52.64,
325
+ "eval_samples_per_second": 10.296,
326
+ "eval_steps_per_second": 1.292,
327
+ "eval_wer": 1.0,
328
+ "step": 406
329
+ },
330
+ {
331
+ "epoch": 30.0,
332
+ "eval_cer": 0.9618562329390354,
333
+ "eval_loss": 2.7269773483276367,
334
+ "eval_runtime": 52.5989,
335
+ "eval_samples_per_second": 10.304,
336
+ "eval_steps_per_second": 1.293,
337
+ "eval_wer": 1.0,
338
+ "step": 420
339
+ },
340
+ {
341
+ "epoch": 31.0,
342
+ "eval_cer": 0.9606187443130119,
343
+ "eval_loss": 2.6644980907440186,
344
+ "eval_runtime": 52.6591,
345
+ "eval_samples_per_second": 10.293,
346
+ "eval_steps_per_second": 1.291,
347
+ "eval_wer": 1.0,
348
+ "step": 434
349
+ },
350
+ {
351
+ "epoch": 32.0,
352
+ "eval_cer": 0.9138853503184713,
353
+ "eval_loss": 2.5593061447143555,
354
+ "eval_runtime": 52.5942,
355
+ "eval_samples_per_second": 10.305,
356
+ "eval_steps_per_second": 1.293,
357
+ "eval_wer": 1.0,
358
+ "step": 448
359
+ },
360
+ {
361
+ "epoch": 33.0,
362
+ "eval_cer": 0.7002729754322111,
363
+ "eval_loss": 2.3229565620422363,
364
+ "eval_runtime": 53.1245,
365
+ "eval_samples_per_second": 10.202,
366
+ "eval_steps_per_second": 1.28,
367
+ "eval_wer": 1.0,
368
+ "step": 462
369
+ },
370
+ {
371
+ "epoch": 34.0,
372
+ "eval_cer": 0.535796178343949,
373
+ "eval_loss": 1.9706292152404785,
374
+ "eval_runtime": 52.7199,
375
+ "eval_samples_per_second": 10.281,
376
+ "eval_steps_per_second": 1.29,
377
+ "eval_wer": 1.0,
378
+ "step": 476
379
+ },
380
+ {
381
+ "epoch": 35.0,
382
+ "eval_cer": 0.45481346678798906,
383
+ "eval_loss": 1.7085232734680176,
384
+ "eval_runtime": 52.7281,
385
+ "eval_samples_per_second": 10.279,
386
+ "eval_steps_per_second": 1.29,
387
+ "eval_wer": 0.9997968305566842,
388
+ "step": 490
389
+ },
390
+ {
391
+ "epoch": 35.71,
392
+ "learning_rate": 2.29e-05,
393
+ "loss": 2.3937,
394
+ "step": 500
395
+ },
396
+ {
397
+ "epoch": 36.0,
398
+ "eval_cer": 0.4063694267515924,
399
+ "eval_loss": 1.449361801147461,
400
+ "eval_runtime": 52.6273,
401
+ "eval_samples_per_second": 10.299,
402
+ "eval_steps_per_second": 1.292,
403
+ "eval_wer": 1.0,
404
+ "step": 504
405
+ },
406
+ {
407
+ "epoch": 37.0,
408
+ "eval_cer": 0.38471337579617837,
409
+ "eval_loss": 1.286526083946228,
410
+ "eval_runtime": 52.7236,
411
+ "eval_samples_per_second": 10.28,
412
+ "eval_steps_per_second": 1.29,
413
+ "eval_wer": 1.0,
414
+ "step": 518
415
+ },
416
+ {
417
+ "epoch": 38.0,
418
+ "eval_cer": 0.3659326660600546,
419
+ "eval_loss": 1.1509252786636353,
420
+ "eval_runtime": 52.6429,
421
+ "eval_samples_per_second": 10.296,
422
+ "eval_steps_per_second": 1.292,
423
+ "eval_wer": 0.9947175944737912,
424
+ "step": 532
425
+ },
426
+ {
427
+ "epoch": 39.0,
428
+ "eval_cer": 0.3182893539581438,
429
+ "eval_loss": 1.0466662645339966,
430
+ "eval_runtime": 52.7531,
431
+ "eval_samples_per_second": 10.274,
432
+ "eval_steps_per_second": 1.289,
433
+ "eval_wer": 0.903088175538399,
434
+ "step": 546
435
+ },
436
+ {
437
+ "epoch": 40.0,
438
+ "eval_cer": 0.24043676069153777,
439
+ "eval_loss": 0.9831815361976624,
440
+ "eval_runtime": 52.6494,
441
+ "eval_samples_per_second": 10.295,
442
+ "eval_steps_per_second": 1.292,
443
+ "eval_wer": 0.5960991466883381,
444
+ "step": 560
445
+ },
446
+ {
447
+ "epoch": 41.0,
448
+ "eval_cer": 0.22227479526842583,
449
+ "eval_loss": 0.892058789730072,
450
+ "eval_runtime": 53.2461,
451
+ "eval_samples_per_second": 10.179,
452
+ "eval_steps_per_second": 1.277,
453
+ "eval_wer": 0.5048760666395774,
454
+ "step": 574
455
+ },
456
+ {
457
+ "epoch": 42.0,
458
+ "eval_cer": 0.21233848953594175,
459
+ "eval_loss": 0.8306074738502502,
460
+ "eval_runtime": 52.8348,
461
+ "eval_samples_per_second": 10.258,
462
+ "eval_steps_per_second": 1.287,
463
+ "eval_wer": 0.4687119057293783,
464
+ "step": 588
465
+ },
466
+ {
467
+ "epoch": 42.86,
468
+ "learning_rate": 2.1471428571428574e-05,
469
+ "loss": 1.0877,
470
+ "step": 600
471
+ },
472
+ {
473
+ "epoch": 43.0,
474
+ "eval_cer": 0.20880800727934487,
475
+ "eval_loss": 0.8016981482505798,
476
+ "eval_runtime": 52.6157,
477
+ "eval_samples_per_second": 10.301,
478
+ "eval_steps_per_second": 1.292,
479
+ "eval_wer": 0.45631856968711904,
480
+ "step": 602
481
+ },
482
+ {
483
+ "epoch": 44.0,
484
+ "eval_cer": 0.2045859872611465,
485
+ "eval_loss": 0.7715625166893005,
486
+ "eval_runtime": 52.6399,
487
+ "eval_samples_per_second": 10.296,
488
+ "eval_steps_per_second": 1.292,
489
+ "eval_wer": 0.4404713531084925,
490
+ "step": 616
491
+ },
492
+ {
493
+ "epoch": 45.0,
494
+ "eval_cer": 0.2053867151956324,
495
+ "eval_loss": 0.7693753838539124,
496
+ "eval_runtime": 52.708,
497
+ "eval_samples_per_second": 10.283,
498
+ "eval_steps_per_second": 1.29,
499
+ "eval_wer": 0.4406745225518082,
500
+ "step": 630
501
+ },
502
+ {
503
+ "epoch": 46.0,
504
+ "eval_cer": 0.20367606915377617,
505
+ "eval_loss": 0.7451383471488953,
506
+ "eval_runtime": 52.7002,
507
+ "eval_samples_per_second": 10.285,
508
+ "eval_steps_per_second": 1.29,
509
+ "eval_wer": 0.4315318976026006,
510
+ "step": 644
511
+ },
512
+ {
513
+ "epoch": 47.0,
514
+ "eval_cer": 0.19956323930846223,
515
+ "eval_loss": 0.7111817598342896,
516
+ "eval_runtime": 53.2448,
517
+ "eval_samples_per_second": 10.179,
518
+ "eval_steps_per_second": 1.277,
519
+ "eval_wer": 0.42503047541649736,
520
+ "step": 658
521
+ },
522
+ {
523
+ "epoch": 48.0,
524
+ "eval_cer": 0.19581437670609644,
525
+ "eval_loss": 0.7007989883422852,
526
+ "eval_runtime": 52.7775,
527
+ "eval_samples_per_second": 10.27,
528
+ "eval_steps_per_second": 1.288,
529
+ "eval_wer": 0.41162129215765947,
530
+ "step": 672
531
+ },
532
+ {
533
+ "epoch": 49.0,
534
+ "eval_cer": 0.19799818016378526,
535
+ "eval_loss": 0.7140166163444519,
536
+ "eval_runtime": 52.6495,
537
+ "eval_samples_per_second": 10.294,
538
+ "eval_steps_per_second": 1.292,
539
+ "eval_wer": 0.40572937830150346,
540
+ "step": 686
541
+ },
542
+ {
543
+ "epoch": 50.0,
544
+ "learning_rate": 2.004285714285714e-05,
545
+ "loss": 0.6292,
546
+ "step": 700
547
+ },
548
+ {
549
+ "epoch": 50.0,
550
+ "eval_cer": 0.19883530482256598,
551
+ "eval_loss": 0.7207656502723694,
552
+ "eval_runtime": 52.6919,
553
+ "eval_samples_per_second": 10.286,
554
+ "eval_steps_per_second": 1.291,
555
+ "eval_wer": 0.41141812271434375,
556
+ "step": 700
557
+ },
558
+ {
559
+ "epoch": 51.0,
560
+ "eval_cer": 0.19366696997270247,
561
+ "eval_loss": 0.6675068140029907,
562
+ "eval_runtime": 52.8778,
563
+ "eval_samples_per_second": 10.25,
564
+ "eval_steps_per_second": 1.286,
565
+ "eval_wer": 0.40329134498171476,
566
+ "step": 714
567
+ },
568
+ {
569
+ "epoch": 52.0,
570
+ "eval_cer": 0.19384895359417653,
571
+ "eval_loss": 0.6650232672691345,
572
+ "eval_runtime": 52.6583,
573
+ "eval_samples_per_second": 10.293,
574
+ "eval_steps_per_second": 1.291,
575
+ "eval_wer": 0.4014628199918732,
576
+ "step": 728
577
+ },
578
+ {
579
+ "epoch": 53.0,
580
+ "eval_cer": 0.1937761601455869,
581
+ "eval_loss": 0.6550155878067017,
582
+ "eval_runtime": 52.7667,
583
+ "eval_samples_per_second": 10.272,
584
+ "eval_steps_per_second": 1.289,
585
+ "eval_wer": 0.4012596505485575,
586
+ "step": 742
587
+ },
588
+ {
589
+ "epoch": 54.0,
590
+ "eval_cer": 0.19315741583257506,
591
+ "eval_loss": 0.6477207541465759,
592
+ "eval_runtime": 52.6665,
593
+ "eval_samples_per_second": 10.291,
594
+ "eval_steps_per_second": 1.291,
595
+ "eval_wer": 0.3990247866720845,
596
+ "step": 756
597
+ },
598
+ {
599
+ "epoch": 55.0,
600
+ "eval_cer": 0.19319381255686988,
601
+ "eval_loss": 0.6361698508262634,
602
+ "eval_runtime": 52.6325,
603
+ "eval_samples_per_second": 10.298,
604
+ "eval_steps_per_second": 1.292,
605
+ "eval_wer": 0.39597724502234866,
606
+ "step": 770
607
+ },
608
+ {
609
+ "epoch": 56.0,
610
+ "eval_cer": 0.192975432211101,
611
+ "eval_loss": 0.632267951965332,
612
+ "eval_runtime": 52.7108,
613
+ "eval_samples_per_second": 10.283,
614
+ "eval_steps_per_second": 1.29,
615
+ "eval_wer": 0.39191385615603413,
616
+ "step": 784
617
+ },
618
+ {
619
+ "epoch": 57.0,
620
+ "eval_cer": 0.19206551410373066,
621
+ "eval_loss": 0.626406192779541,
622
+ "eval_runtime": 52.675,
623
+ "eval_samples_per_second": 10.29,
624
+ "eval_steps_per_second": 1.291,
625
+ "eval_wer": 0.38703778951645673,
626
+ "step": 798
627
+ },
628
+ {
629
+ "epoch": 57.14,
630
+ "learning_rate": 1.8614285714285715e-05,
631
+ "loss": 0.4739,
632
+ "step": 800
633
+ },
634
+ {
635
+ "epoch": 58.0,
636
+ "eval_cer": 0.19210191082802547,
637
+ "eval_loss": 0.6290065050125122,
638
+ "eval_runtime": 52.661,
639
+ "eval_samples_per_second": 10.292,
640
+ "eval_steps_per_second": 1.291,
641
+ "eval_wer": 0.38724095895977245,
642
+ "step": 812
643
+ },
644
+ {
645
+ "epoch": 59.0,
646
+ "eval_cer": 0.19253867151956325,
647
+ "eval_loss": 0.6206580400466919,
648
+ "eval_runtime": 52.6443,
649
+ "eval_samples_per_second": 10.296,
650
+ "eval_steps_per_second": 1.292,
651
+ "eval_wer": 0.38642828118650957,
652
+ "step": 826
653
+ },
654
+ {
655
+ "epoch": 60.0,
656
+ "eval_cer": 0.19184713375796178,
657
+ "eval_loss": 0.6177613735198975,
658
+ "eval_runtime": 52.6855,
659
+ "eval_samples_per_second": 10.287,
660
+ "eval_steps_per_second": 1.291,
661
+ "eval_wer": 0.38581877285656235,
662
+ "step": 840
663
+ },
664
+ {
665
+ "epoch": 61.0,
666
+ "eval_cer": 0.19177434030937215,
667
+ "eval_loss": 0.6217214465141296,
668
+ "eval_runtime": 53.1515,
669
+ "eval_samples_per_second": 10.197,
670
+ "eval_steps_per_second": 1.279,
671
+ "eval_wer": 0.3860219422998781,
672
+ "step": 854
673
+ },
674
+ {
675
+ "epoch": 62.0,
676
+ "eval_cer": 0.1899545040946315,
677
+ "eval_loss": 0.6078172922134399,
678
+ "eval_runtime": 52.6994,
679
+ "eval_samples_per_second": 10.285,
680
+ "eval_steps_per_second": 1.29,
681
+ "eval_wer": 0.37992685900040635,
682
+ "step": 868
683
+ },
684
+ {
685
+ "epoch": 63.0,
686
+ "eval_cer": 0.1889353958143767,
687
+ "eval_loss": 0.6072229146957397,
688
+ "eval_runtime": 52.8038,
689
+ "eval_samples_per_second": 10.264,
690
+ "eval_steps_per_second": 1.288,
691
+ "eval_wer": 0.3780983340105648,
692
+ "step": 882
693
+ },
694
+ {
695
+ "epoch": 64.0,
696
+ "eval_cer": 0.18831665150136487,
697
+ "eval_loss": 0.6067555546760559,
698
+ "eval_runtime": 52.6766,
699
+ "eval_samples_per_second": 10.289,
700
+ "eval_steps_per_second": 1.291,
701
+ "eval_wer": 0.37606663957740755,
702
+ "step": 896
703
+ },
704
+ {
705
+ "epoch": 64.29,
706
+ "learning_rate": 1.7185714285714285e-05,
707
+ "loss": 0.3855,
708
+ "step": 900
709
+ },
710
+ {
711
+ "epoch": 65.0,
712
+ "eval_cer": 0.1870427661510464,
713
+ "eval_loss": 0.5944907665252686,
714
+ "eval_runtime": 52.7345,
715
+ "eval_samples_per_second": 10.278,
716
+ "eval_steps_per_second": 1.289,
717
+ "eval_wer": 0.3748476229175132,
718
+ "step": 910
719
+ },
720
+ {
721
+ "epoch": 66.0,
722
+ "eval_cer": 0.1899545040946315,
723
+ "eval_loss": 0.6194169521331787,
724
+ "eval_runtime": 52.7092,
725
+ "eval_samples_per_second": 10.283,
726
+ "eval_steps_per_second": 1.29,
727
+ "eval_wer": 0.37992685900040635,
728
+ "step": 924
729
+ },
730
+ {
731
+ "epoch": 67.0,
732
+ "eval_cer": 0.18846223839854412,
733
+ "eval_loss": 0.6044437289237976,
734
+ "eval_runtime": 52.6167,
735
+ "eval_samples_per_second": 10.301,
736
+ "eval_steps_per_second": 1.292,
737
+ "eval_wer": 0.3793173506704592,
738
+ "step": 938
739
+ },
740
+ {
741
+ "epoch": 68.0,
742
+ "eval_cer": 0.18798908098271155,
743
+ "eval_loss": 0.5945860147476196,
744
+ "eval_runtime": 52.6643,
745
+ "eval_samples_per_second": 10.292,
746
+ "eval_steps_per_second": 1.291,
747
+ "eval_wer": 0.37505079236082894,
748
+ "step": 952
749
+ },
750
+ {
751
+ "epoch": 69.0,
752
+ "eval_cer": 0.18798908098271155,
753
+ "eval_loss": 0.6115556955337524,
754
+ "eval_runtime": 52.6187,
755
+ "eval_samples_per_second": 10.301,
756
+ "eval_steps_per_second": 1.292,
757
+ "eval_wer": 0.37139374238114586,
758
+ "step": 966
759
+ },
760
+ {
761
+ "epoch": 70.0,
762
+ "eval_cer": 0.18609645131938127,
763
+ "eval_loss": 0.5876884460449219,
764
+ "eval_runtime": 52.6386,
765
+ "eval_samples_per_second": 10.297,
766
+ "eval_steps_per_second": 1.292,
767
+ "eval_wer": 0.36793986184477856,
768
+ "step": 980
769
+ },
770
+ {
771
+ "epoch": 71.0,
772
+ "eval_cer": 0.18627843494085533,
773
+ "eval_loss": 0.5861026048660278,
774
+ "eval_runtime": 52.6444,
775
+ "eval_samples_per_second": 10.296,
776
+ "eval_steps_per_second": 1.292,
777
+ "eval_wer": 0.36793986184477856,
778
+ "step": 994
779
+ },
780
+ {
781
+ "epoch": 71.43,
782
+ "learning_rate": 1.5757142857142858e-05,
783
+ "loss": 0.3302,
784
+ "step": 1000
785
+ },
786
+ {
787
+ "epoch": 72.0,
788
+ "eval_cer": 0.18562329390354868,
789
+ "eval_loss": 0.580510139465332,
790
+ "eval_runtime": 52.6419,
791
+ "eval_samples_per_second": 10.296,
792
+ "eval_steps_per_second": 1.292,
793
+ "eval_wer": 0.3685493701747257,
794
+ "step": 1008
795
+ },
796
+ {
797
+ "epoch": 73.0,
798
+ "eval_cer": 0.1862056414922657,
799
+ "eval_loss": 0.586155354976654,
800
+ "eval_runtime": 52.6421,
801
+ "eval_samples_per_second": 10.296,
802
+ "eval_steps_per_second": 1.292,
803
+ "eval_wer": 0.37139374238114586,
804
+ "step": 1022
805
+ },
806
+ {
807
+ "epoch": 74.0,
808
+ "eval_cer": 0.18656960873521383,
809
+ "eval_loss": 0.5920763611793518,
810
+ "eval_runtime": 52.6765,
811
+ "eval_samples_per_second": 10.289,
812
+ "eval_steps_per_second": 1.291,
813
+ "eval_wer": 0.372003250711093,
814
+ "step": 1036
815
+ },
816
+ {
817
+ "epoch": 75.0,
818
+ "eval_cer": 0.185368516833485,
819
+ "eval_loss": 0.5692393779754639,
820
+ "eval_runtime": 52.7333,
821
+ "eval_samples_per_second": 10.278,
822
+ "eval_steps_per_second": 1.29,
823
+ "eval_wer": 0.36834620073141,
824
+ "step": 1050
825
+ },
826
+ {
827
+ "epoch": 76.0,
828
+ "eval_cer": 0.18777070063694268,
829
+ "eval_loss": 0.5922245979309082,
830
+ "eval_runtime": 52.7512,
831
+ "eval_samples_per_second": 10.275,
832
+ "eval_steps_per_second": 1.289,
833
+ "eval_wer": 0.37017472572125154,
834
+ "step": 1064
835
+ },
836
+ {
837
+ "epoch": 77.0,
838
+ "eval_cer": 0.18828025477707006,
839
+ "eval_loss": 0.6105178594589233,
840
+ "eval_runtime": 52.65,
841
+ "eval_samples_per_second": 10.294,
842
+ "eval_steps_per_second": 1.292,
843
+ "eval_wer": 0.3709874034945144,
844
+ "step": 1078
845
+ },
846
+ {
847
+ "epoch": 78.0,
848
+ "eval_cer": 0.18558689717925386,
849
+ "eval_loss": 0.5873062014579773,
850
+ "eval_runtime": 53.0336,
851
+ "eval_samples_per_second": 10.22,
852
+ "eval_steps_per_second": 1.282,
853
+ "eval_wer": 0.36834620073141,
854
+ "step": 1092
855
+ },
856
+ {
857
+ "epoch": 78.57,
858
+ "learning_rate": 1.4328571428571428e-05,
859
+ "loss": 0.3046,
860
+ "step": 1100
861
+ },
862
+ {
863
+ "epoch": 79.0,
864
+ "eval_cer": 0.18587807097361236,
865
+ "eval_loss": 0.5825892090797424,
866
+ "eval_runtime": 52.7458,
867
+ "eval_samples_per_second": 10.276,
868
+ "eval_steps_per_second": 1.289,
869
+ "eval_wer": 0.3681430312880943,
870
+ "step": 1106
871
+ },
872
+ {
873
+ "epoch": 80.0,
874
+ "eval_cer": 0.18445859872611464,
875
+ "eval_loss": 0.5792315006256104,
876
+ "eval_runtime": 53.1292,
877
+ "eval_samples_per_second": 10.202,
878
+ "eval_steps_per_second": 1.28,
879
+ "eval_wer": 0.3632669646485169,
880
+ "step": 1120
881
+ },
882
+ {
883
+ "epoch": 81.0,
884
+ "eval_cer": 0.18347588717015467,
885
+ "eval_loss": 0.5737511515617371,
886
+ "eval_runtime": 52.6677,
887
+ "eval_samples_per_second": 10.291,
888
+ "eval_steps_per_second": 1.291,
889
+ "eval_wer": 0.3610321007720439,
890
+ "step": 1134
891
+ },
892
+ {
893
+ "epoch": 82.0,
894
+ "eval_cer": 0.1843130118289354,
895
+ "eval_loss": 0.579399585723877,
896
+ "eval_runtime": 53.0003,
897
+ "eval_samples_per_second": 10.226,
898
+ "eval_steps_per_second": 1.283,
899
+ "eval_wer": 0.36245428687525394,
900
+ "step": 1148
901
+ },
902
+ {
903
+ "epoch": 83.0,
904
+ "eval_cer": 0.18289353958143767,
905
+ "eval_loss": 0.5766276121139526,
906
+ "eval_runtime": 52.6724,
907
+ "eval_samples_per_second": 10.29,
908
+ "eval_steps_per_second": 1.291,
909
+ "eval_wer": 0.3563592035757822,
910
+ "step": 1162
911
+ },
912
+ {
913
+ "epoch": 84.0,
914
+ "eval_cer": 0.1830027297543221,
915
+ "eval_loss": 0.5745117664337158,
916
+ "eval_runtime": 52.7431,
917
+ "eval_samples_per_second": 10.276,
918
+ "eval_steps_per_second": 1.289,
919
+ "eval_wer": 0.35778138967899226,
920
+ "step": 1176
921
+ },
922
+ {
923
+ "epoch": 85.0,
924
+ "eval_cer": 0.18140127388535032,
925
+ "eval_loss": 0.561495840549469,
926
+ "eval_runtime": 52.6016,
927
+ "eval_samples_per_second": 10.304,
928
+ "eval_steps_per_second": 1.293,
929
+ "eval_wer": 0.3555465258025193,
930
+ "step": 1190
931
+ },
932
+ {
933
+ "epoch": 85.71,
934
+ "learning_rate": 1.29e-05,
935
+ "loss": 0.2927,
936
+ "step": 1200
937
+ },
938
+ {
939
+ "epoch": 86.0,
940
+ "eval_cer": 0.18282074613284804,
941
+ "eval_loss": 0.5853651762008667,
942
+ "eval_runtime": 52.6345,
943
+ "eval_samples_per_second": 10.297,
944
+ "eval_steps_per_second": 1.292,
945
+ "eval_wer": 0.36143843965867534,
946
+ "step": 1204
947
+ },
948
+ {
949
+ "epoch": 87.0,
950
+ "eval_cer": 0.1835122838944495,
951
+ "eval_loss": 0.5817971229553223,
952
+ "eval_runtime": 52.7194,
953
+ "eval_samples_per_second": 10.281,
954
+ "eval_steps_per_second": 1.29,
955
+ "eval_wer": 0.36245428687525394,
956
+ "step": 1218
957
+ },
958
+ {
959
+ "epoch": 88.0,
960
+ "eval_cer": 0.18154686078252957,
961
+ "eval_loss": 0.5613023638725281,
962
+ "eval_runtime": 52.6918,
963
+ "eval_samples_per_second": 10.286,
964
+ "eval_steps_per_second": 1.291,
965
+ "eval_wer": 0.35778138967899226,
966
+ "step": 1232
967
+ },
968
+ {
969
+ "epoch": 89.0,
970
+ "eval_cer": 0.18125568698817107,
971
+ "eval_loss": 0.5661023259162903,
972
+ "eval_runtime": 52.6276,
973
+ "eval_samples_per_second": 10.299,
974
+ "eval_steps_per_second": 1.292,
975
+ "eval_wer": 0.3549370174725721,
976
+ "step": 1246
977
+ },
978
+ {
979
+ "epoch": 90.0,
980
+ "eval_cer": 0.18202001819836214,
981
+ "eval_loss": 0.5794751048088074,
982
+ "eval_runtime": 52.7536,
983
+ "eval_samples_per_second": 10.274,
984
+ "eval_steps_per_second": 1.289,
985
+ "eval_wer": 0.36042259244209673,
986
+ "step": 1260
987
+ },
988
+ {
989
+ "epoch": 91.0,
990
+ "eval_cer": 0.18020018198362148,
991
+ "eval_loss": 0.5604133605957031,
992
+ "eval_runtime": 52.6351,
993
+ "eval_samples_per_second": 10.297,
994
+ "eval_steps_per_second": 1.292,
995
+ "eval_wer": 0.35209264526615197,
996
+ "step": 1274
997
+ },
998
+ {
999
+ "epoch": 92.0,
1000
+ "eval_cer": 0.1821656050955414,
1001
+ "eval_loss": 0.5738141536712646,
1002
+ "eval_runtime": 52.6638,
1003
+ "eval_samples_per_second": 10.292,
1004
+ "eval_steps_per_second": 1.291,
1005
+ "eval_wer": 0.35900040633888664,
1006
+ "step": 1288
1007
+ },
1008
+ {
1009
+ "epoch": 92.86,
1010
+ "learning_rate": 1.1471428571428572e-05,
1011
+ "loss": 0.2576,
1012
+ "step": 1300
1013
+ },
1014
+ {
1015
+ "epoch": 93.0,
1016
+ "eval_cer": 0.18140127388535032,
1017
+ "eval_loss": 0.5658465623855591,
1018
+ "eval_runtime": 52.9846,
1019
+ "eval_samples_per_second": 10.229,
1020
+ "eval_steps_per_second": 1.283,
1021
+ "eval_wer": 0.3573750507923608,
1022
+ "step": 1302
1023
+ },
1024
+ {
1025
+ "epoch": 94.0,
1026
+ "eval_cer": 0.1808189262966333,
1027
+ "eval_loss": 0.5620054006576538,
1028
+ "eval_runtime": 52.617,
1029
+ "eval_samples_per_second": 10.301,
1030
+ "eval_steps_per_second": 1.292,
1031
+ "eval_wer": 0.35107679804957337,
1032
+ "step": 1316
1033
+ },
1034
+ {
1035
+ "epoch": 95.0,
1036
+ "eval_cer": 0.18100090991810738,
1037
+ "eval_loss": 0.5709471106529236,
1038
+ "eval_runtime": 52.6962,
1039
+ "eval_samples_per_second": 10.285,
1040
+ "eval_steps_per_second": 1.29,
1041
+ "eval_wer": 0.35412433969930923,
1042
+ "step": 1330
1043
+ },
1044
+ {
1045
+ "epoch": 96.0,
1046
+ "eval_cer": 0.1799454049135578,
1047
+ "eval_loss": 0.5674740076065063,
1048
+ "eval_runtime": 52.6892,
1049
+ "eval_samples_per_second": 10.287,
1050
+ "eval_steps_per_second": 1.291,
1051
+ "eval_wer": 0.35026412027631043,
1052
+ "step": 1344
1053
+ },
1054
+ {
1055
+ "epoch": 97.0,
1056
+ "eval_cer": 0.18151046405823476,
1057
+ "eval_loss": 0.5788221955299377,
1058
+ "eval_runtime": 53.0799,
1059
+ "eval_samples_per_second": 10.211,
1060
+ "eval_steps_per_second": 1.281,
1061
+ "eval_wer": 0.3549370174725721,
1062
+ "step": 1358
1063
+ },
1064
+ {
1065
+ "epoch": 98.0,
1066
+ "eval_cer": 0.18096451319381257,
1067
+ "eval_loss": 0.5730317234992981,
1068
+ "eval_runtime": 52.5988,
1069
+ "eval_samples_per_second": 10.304,
1070
+ "eval_steps_per_second": 1.293,
1071
+ "eval_wer": 0.3524989841527834,
1072
+ "step": 1372
1073
+ },
1074
+ {
1075
+ "epoch": 99.0,
1076
+ "eval_cer": 0.1802729754322111,
1077
+ "eval_loss": 0.5693602561950684,
1078
+ "eval_runtime": 52.6503,
1079
+ "eval_samples_per_second": 10.294,
1080
+ "eval_steps_per_second": 1.292,
1081
+ "eval_wer": 0.35107679804957337,
1082
+ "step": 1386
1083
+ },
1084
+ {
1085
+ "epoch": 100.0,
1086
+ "learning_rate": 1.0042857142857144e-05,
1087
+ "loss": 0.2273,
1088
+ "step": 1400
1089
+ },
1090
+ {
1091
+ "epoch": 100.0,
1092
+ "eval_cer": 0.18070973612374885,
1093
+ "eval_loss": 0.5747684240341187,
1094
+ "eval_runtime": 52.7106,
1095
+ "eval_samples_per_second": 10.283,
1096
+ "eval_steps_per_second": 1.29,
1097
+ "eval_wer": 0.35270215359609913,
1098
+ "step": 1400
1099
+ },
1100
+ {
1101
+ "epoch": 101.0,
1102
+ "eval_cer": 0.17965423111919926,
1103
+ "eval_loss": 0.5688263773918152,
1104
+ "eval_runtime": 52.691,
1105
+ "eval_samples_per_second": 10.286,
1106
+ "eval_steps_per_second": 1.291,
1107
+ "eval_wer": 0.3512799674928891,
1108
+ "step": 1414
1109
+ },
1110
+ {
1111
+ "epoch": 102.0,
1112
+ "eval_cer": 0.18045495905368517,
1113
+ "eval_loss": 0.5767450332641602,
1114
+ "eval_runtime": 52.6967,
1115
+ "eval_samples_per_second": 10.285,
1116
+ "eval_steps_per_second": 1.29,
1117
+ "eval_wer": 0.35534335635920355,
1118
+ "step": 1428
1119
+ },
1120
+ {
1121
+ "epoch": 103.0,
1122
+ "eval_cer": 0.18118289353958145,
1123
+ "eval_loss": 0.5758454203605652,
1124
+ "eval_runtime": 52.6575,
1125
+ "eval_samples_per_second": 10.293,
1126
+ "eval_steps_per_second": 1.291,
1127
+ "eval_wer": 0.35290532303941485,
1128
+ "step": 1442
1129
+ },
1130
+ {
1131
+ "epoch": 104.0,
1132
+ "eval_cer": 0.17929026387625113,
1133
+ "eval_loss": 0.5641180872917175,
1134
+ "eval_runtime": 52.7234,
1135
+ "eval_samples_per_second": 10.28,
1136
+ "eval_steps_per_second": 1.29,
1137
+ "eval_wer": 0.35067045916294187,
1138
+ "step": 1456
1139
+ },
1140
+ {
1141
+ "epoch": 105.0,
1142
+ "eval_cer": 0.178926296633303,
1143
+ "eval_loss": 0.5628452897071838,
1144
+ "eval_runtime": 52.9721,
1145
+ "eval_samples_per_second": 10.232,
1146
+ "eval_steps_per_second": 1.284,
1147
+ "eval_wer": 0.34945144250304755,
1148
+ "step": 1470
1149
+ },
1150
+ {
1151
+ "epoch": 106.0,
1152
+ "eval_cer": 0.1788898999090082,
1153
+ "eval_loss": 0.5728613138198853,
1154
+ "eval_runtime": 52.6612,
1155
+ "eval_samples_per_second": 10.292,
1156
+ "eval_steps_per_second": 1.291,
1157
+ "eval_wer": 0.3466070702966274,
1158
+ "step": 1484
1159
+ },
1160
+ {
1161
+ "epoch": 107.0,
1162
+ "eval_cer": 0.17983621474067335,
1163
+ "eval_loss": 0.5722076892852783,
1164
+ "eval_runtime": 52.7987,
1165
+ "eval_samples_per_second": 10.265,
1166
+ "eval_steps_per_second": 1.288,
1167
+ "eval_wer": 0.34965461194636327,
1168
+ "step": 1498
1169
+ },
1170
+ {
1171
+ "epoch": 107.14,
1172
+ "learning_rate": 8.614285714285714e-06,
1173
+ "loss": 0.2181,
1174
+ "step": 1500
1175
+ },
1176
+ {
1177
+ "epoch": 108.0,
1178
+ "eval_cer": 0.17881710646041857,
1179
+ "eval_loss": 0.5552828907966614,
1180
+ "eval_runtime": 52.6793,
1181
+ "eval_samples_per_second": 10.289,
1182
+ "eval_steps_per_second": 1.291,
1183
+ "eval_wer": 0.3466070702966274,
1184
+ "step": 1512
1185
+ },
1186
+ {
1187
+ "epoch": 109.0,
1188
+ "eval_cer": 0.1792174704276615,
1189
+ "eval_loss": 0.5581598877906799,
1190
+ "eval_runtime": 52.8837,
1191
+ "eval_samples_per_second": 10.249,
1192
+ "eval_steps_per_second": 1.286,
1193
+ "eval_wer": 0.3484355952864689,
1194
+ "step": 1526
1195
+ },
1196
+ {
1197
+ "epoch": 110.0,
1198
+ "eval_cer": 0.18020018198362148,
1199
+ "eval_loss": 0.5701535940170288,
1200
+ "eval_runtime": 52.7198,
1201
+ "eval_samples_per_second": 10.281,
1202
+ "eval_steps_per_second": 1.29,
1203
+ "eval_wer": 0.35209264526615197,
1204
+ "step": 1540
1205
+ },
1206
+ {
1207
+ "epoch": 111.0,
1208
+ "eval_cer": 0.17976342129208372,
1209
+ "eval_loss": 0.5691486597061157,
1210
+ "eval_runtime": 52.7934,
1211
+ "eval_samples_per_second": 10.266,
1212
+ "eval_steps_per_second": 1.288,
1213
+ "eval_wer": 0.35046728971962615,
1214
+ "step": 1554
1215
+ },
1216
+ {
1217
+ "epoch": 112.0,
1218
+ "eval_cer": 0.17856232939035488,
1219
+ "eval_loss": 0.560352087020874,
1220
+ "eval_runtime": 52.6568,
1221
+ "eval_samples_per_second": 10.293,
1222
+ "eval_steps_per_second": 1.291,
1223
+ "eval_wer": 0.34701340918325885,
1224
+ "step": 1568
1225
+ },
1226
+ {
1227
+ "epoch": 113.0,
1228
+ "eval_cer": 0.17950864422202,
1229
+ "eval_loss": 0.5661062002182007,
1230
+ "eval_runtime": 52.8124,
1231
+ "eval_samples_per_second": 10.263,
1232
+ "eval_steps_per_second": 1.288,
1233
+ "eval_wer": 0.34823242584315317,
1234
+ "step": 1582
1235
+ },
1236
+ {
1237
+ "epoch": 114.0,
1238
+ "eval_cer": 0.17961783439490445,
1239
+ "eval_loss": 0.5682941675186157,
1240
+ "eval_runtime": 52.7057,
1241
+ "eval_samples_per_second": 10.284,
1242
+ "eval_steps_per_second": 1.29,
1243
+ "eval_wer": 0.35107679804957337,
1244
+ "step": 1596
1245
+ },
1246
+ {
1247
+ "epoch": 114.29,
1248
+ "learning_rate": 7.185714285714286e-06,
1249
+ "loss": 0.2171,
1250
+ "step": 1600
1251
+ },
1252
+ {
1253
+ "epoch": 115.0,
1254
+ "eval_cer": 0.17979981801637854,
1255
+ "eval_loss": 0.573845624923706,
1256
+ "eval_runtime": 52.7754,
1257
+ "eval_samples_per_second": 10.27,
1258
+ "eval_steps_per_second": 1.288,
1259
+ "eval_wer": 0.3508736286062576,
1260
+ "step": 1610
1261
+ },
1262
+ {
1263
+ "epoch": 116.0,
1264
+ "eval_cer": 0.17932666060054595,
1265
+ "eval_loss": 0.5730240941047668,
1266
+ "eval_runtime": 52.7098,
1267
+ "eval_samples_per_second": 10.283,
1268
+ "eval_steps_per_second": 1.29,
1269
+ "eval_wer": 0.34579439252336447,
1270
+ "step": 1624
1271
+ },
1272
+ {
1273
+ "epoch": 117.0,
1274
+ "eval_cer": 0.178926296633303,
1275
+ "eval_loss": 0.5704598426818848,
1276
+ "eval_runtime": 52.6695,
1277
+ "eval_samples_per_second": 10.291,
1278
+ "eval_steps_per_second": 1.291,
1279
+ "eval_wer": 0.34559122308004875,
1280
+ "step": 1638
1281
+ },
1282
+ {
1283
+ "epoch": 118.0,
1284
+ "eval_cer": 0.17961783439490445,
1285
+ "eval_loss": 0.5813525319099426,
1286
+ "eval_runtime": 53.1651,
1287
+ "eval_samples_per_second": 10.195,
1288
+ "eval_steps_per_second": 1.279,
1289
+ "eval_wer": 0.3466070702966274,
1290
+ "step": 1652
1291
+ },
1292
+ {
1293
+ "epoch": 119.0,
1294
+ "eval_cer": 0.17907188353048226,
1295
+ "eval_loss": 0.5714964866638184,
1296
+ "eval_runtime": 52.764,
1297
+ "eval_samples_per_second": 10.272,
1298
+ "eval_steps_per_second": 1.289,
1299
+ "eval_wer": 0.3441690369768387,
1300
+ "step": 1666
1301
+ },
1302
+ {
1303
+ "epoch": 120.0,
1304
+ "eval_cer": 0.17976342129208372,
1305
+ "eval_loss": 0.5720311403274536,
1306
+ "eval_runtime": 53.0408,
1307
+ "eval_samples_per_second": 10.219,
1308
+ "eval_steps_per_second": 1.282,
1309
+ "eval_wer": 0.34701340918325885,
1310
+ "step": 1680
1311
+ },
1312
+ {
1313
+ "epoch": 121.0,
1314
+ "eval_cer": 0.17969062784349407,
1315
+ "eval_loss": 0.5768777132034302,
1316
+ "eval_runtime": 52.7851,
1317
+ "eval_samples_per_second": 10.268,
1318
+ "eval_steps_per_second": 1.288,
1319
+ "eval_wer": 0.34701340918325885,
1320
+ "step": 1694
1321
+ },
1322
+ {
1323
+ "epoch": 121.43,
1324
+ "learning_rate": 5.7571428571428574e-06,
1325
+ "loss": 0.1986,
1326
+ "step": 1700
1327
+ },
1328
+ {
1329
+ "epoch": 122.0,
1330
+ "eval_cer": 0.1791810737033667,
1331
+ "eval_loss": 0.571117639541626,
1332
+ "eval_runtime": 52.9656,
1333
+ "eval_samples_per_second": 10.233,
1334
+ "eval_steps_per_second": 1.284,
1335
+ "eval_wer": 0.3464039008533117,
1336
+ "step": 1708
1337
+ },
1338
+ {
1339
+ "epoch": 123.0,
1340
+ "eval_cer": 0.17903548680618744,
1341
+ "eval_loss": 0.5728168487548828,
1342
+ "eval_runtime": 52.927,
1343
+ "eval_samples_per_second": 10.241,
1344
+ "eval_steps_per_second": 1.285,
1345
+ "eval_wer": 0.3441690369768387,
1346
+ "step": 1722
1347
+ },
1348
+ {
1349
+ "epoch": 124.0,
1350
+ "eval_cer": 0.17830755232029116,
1351
+ "eval_loss": 0.5667761564254761,
1352
+ "eval_runtime": 52.78,
1353
+ "eval_samples_per_second": 10.269,
1354
+ "eval_steps_per_second": 1.288,
1355
+ "eval_wer": 0.3449817147501016,
1356
+ "step": 1736
1357
+ },
1358
+ {
1359
+ "epoch": 125.0,
1360
+ "eval_cer": 0.17969062784349407,
1361
+ "eval_loss": 0.5855135321617126,
1362
+ "eval_runtime": 52.7911,
1363
+ "eval_samples_per_second": 10.267,
1364
+ "eval_steps_per_second": 1.288,
1365
+ "eval_wer": 0.3484355952864689,
1366
+ "step": 1750
1367
+ },
1368
+ {
1369
+ "epoch": 126.0,
1370
+ "eval_cer": 0.17827115559599635,
1371
+ "eval_loss": 0.5667468905448914,
1372
+ "eval_runtime": 52.8968,
1373
+ "eval_samples_per_second": 10.246,
1374
+ "eval_steps_per_second": 1.286,
1375
+ "eval_wer": 0.3427468508736286,
1376
+ "step": 1764
1377
+ },
1378
+ {
1379
+ "epoch": 127.0,
1380
+ "eval_cer": 0.1788898999090082,
1381
+ "eval_loss": 0.5710840821266174,
1382
+ "eval_runtime": 52.7705,
1383
+ "eval_samples_per_second": 10.271,
1384
+ "eval_steps_per_second": 1.289,
1385
+ "eval_wer": 0.3459975619666802,
1386
+ "step": 1778
1387
+ },
1388
+ {
1389
+ "epoch": 128.0,
1390
+ "eval_cer": 0.1781255686988171,
1391
+ "eval_loss": 0.5681577920913696,
1392
+ "eval_runtime": 52.7919,
1393
+ "eval_samples_per_second": 10.267,
1394
+ "eval_steps_per_second": 1.288,
1395
+ "eval_wer": 0.3443722064201544,
1396
+ "step": 1792
1397
+ },
1398
+ {
1399
+ "epoch": 128.0,
1400
+ "step": 1792,
1401
+ "total_flos": 1.5995217836155625e+19,
1402
+ "train_loss": 1.3990157055003303,
1403
+ "train_runtime": 26333.2616,
1404
+ "train_samples_per_second": 2.552,
1405
+ "train_steps_per_second": 0.08
1406
+ }
1407
+ ],
1408
+ "max_steps": 2100,
1409
+ "num_train_epochs": 150,
1410
+ "total_flos": 1.5995217836155625e+19,
1411
+ "trial_name": null,
1412
+ "trial_params": null
1413
+ }