csikasote commited on
Commit
e4bc8d6
·
verified ·
1 Parent(s): d19363e

End of training

Browse files
README.md CHANGED
@@ -4,11 +4,23 @@ license: apache-2.0
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - wer
9
  model-index:
10
  - name: whisper-medium-swagen-combined-30hrs-model
11
- results: []
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # whisper-medium-swagen-combined-30hrs-model
18
 
19
- This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3702
22
- - Wer: 0.2201
23
 
24
  ## Model description
25
 
 
4
  base_model: openai/whisper-medium
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - swagen
9
  metrics:
10
  - wer
11
  model-index:
12
  - name: whisper-medium-swagen-combined-30hrs-model
13
+ results:
14
+ - task:
15
+ name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
+ dataset:
18
+ name: swagen
19
+ type: swagen
20
+ metrics:
21
+ - name: Wer
22
+ type: wer
23
+ value: 0.2234172077922078
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
28
 
29
  # whisper-medium-swagen-combined-30hrs-model
30
 
31
+ This model is a fine-tuned version of [openai/whisper-medium](https://huggingface.co/openai/whisper-medium) on the swagen dataset.
32
  It achieves the following results on the evaluation set:
33
+ - Loss: 0.3610
34
+ - Wer: 0.2234
35
 
36
  ## Model description
37
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.2421608196212357,
3
+ "eval_loss": 0.36100757122039795,
4
+ "eval_runtime": 571.0474,
5
+ "eval_samples": 1087,
6
+ "eval_samples_per_second": 1.904,
7
+ "eval_steps_per_second": 0.953,
8
+ "eval_wer": 0.2234172077922078,
9
+ "total_flos": 2.449860020011008e+19,
10
+ "train_loss": 1.494960307121277,
11
+ "train_runtime": 14532.737,
12
+ "train_samples": 19326,
13
+ "train_samples_per_second": 39.895,
14
+ "train_steps_per_second": 4.985
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.2421608196212357,
3
+ "eval_loss": 0.36100757122039795,
4
+ "eval_runtime": 571.0474,
5
+ "eval_samples": 1087,
6
+ "eval_samples_per_second": 1.904,
7
+ "eval_steps_per_second": 0.953,
8
+ "eval_wer": 0.2234172077922078
9
+ }
runs/Jan05_03-06-11_srvrocgpu011.uct.ac.za/events.out.tfevents.1736054665.srvrocgpu011.uct.ac.za ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df6a290bf4dd264e9ac5ccbb93b9a786229006cdfaded18f1080b4067730cc2d
3
+ size 40
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.2421608196212357,
3
+ "total_flos": 2.449860020011008e+19,
4
+ "train_loss": 1.494960307121277,
5
+ "train_runtime": 14532.737,
6
+ "train_samples": 19326,
7
+ "train_samples_per_second": 39.895,
8
+ "train_steps_per_second": 4.985
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1026 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.36100757122039795,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-30hrs-model/checkpoint-2400",
4
+ "epoch": 1.2421608196212357,
5
+ "eval_steps": 200,
6
+ "global_step": 3000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01034875297526648,
13
+ "grad_norm": 139.34642028808594,
14
+ "learning_rate": 4.0000000000000003e-07,
15
+ "loss": 10.2474,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.02069750595053296,
20
+ "grad_norm": 89.0820541381836,
21
+ "learning_rate": 9.000000000000001e-07,
22
+ "loss": 8.2652,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.03104625892579944,
27
+ "grad_norm": 85.51993560791016,
28
+ "learning_rate": 1.4000000000000001e-06,
29
+ "loss": 6.0026,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.04139501190106592,
34
+ "grad_norm": 91.87001037597656,
35
+ "learning_rate": 1.9000000000000002e-06,
36
+ "loss": 4.5811,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.0517437648763324,
41
+ "grad_norm": 79.70684814453125,
42
+ "learning_rate": 2.4000000000000003e-06,
43
+ "loss": 3.2507,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.06209251785159888,
48
+ "grad_norm": 72.15753936767578,
49
+ "learning_rate": 2.9e-06,
50
+ "loss": 3.2154,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.07244127082686536,
55
+ "grad_norm": 60.531593322753906,
56
+ "learning_rate": 3.4000000000000005e-06,
57
+ "loss": 2.8239,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.08279002380213184,
62
+ "grad_norm": 60.771507263183594,
63
+ "learning_rate": 3.900000000000001e-06,
64
+ "loss": 2.7508,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.08279002380213184,
69
+ "eval_loss": 0.8133686184883118,
70
+ "eval_runtime": 566.5007,
71
+ "eval_samples_per_second": 1.919,
72
+ "eval_steps_per_second": 0.96,
73
+ "eval_wer": 0.4877232142857143,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.09313877677739832,
78
+ "grad_norm": 51.035518646240234,
79
+ "learning_rate": 4.4e-06,
80
+ "loss": 2.2578,
81
+ "step": 225
82
+ },
83
+ {
84
+ "epoch": 0.1034875297526648,
85
+ "grad_norm": 69.9592514038086,
86
+ "learning_rate": 4.9000000000000005e-06,
87
+ "loss": 2.4931,
88
+ "step": 250
89
+ },
90
+ {
91
+ "epoch": 0.11383628272793128,
92
+ "grad_norm": 55.8157844543457,
93
+ "learning_rate": 5.400000000000001e-06,
94
+ "loss": 2.2654,
95
+ "step": 275
96
+ },
97
+ {
98
+ "epoch": 0.12418503570319776,
99
+ "grad_norm": 57.47214889526367,
100
+ "learning_rate": 5.9e-06,
101
+ "loss": 2.1863,
102
+ "step": 300
103
+ },
104
+ {
105
+ "epoch": 0.13453378867846424,
106
+ "grad_norm": 48.41241455078125,
107
+ "learning_rate": 6.4000000000000006e-06,
108
+ "loss": 2.1504,
109
+ "step": 325
110
+ },
111
+ {
112
+ "epoch": 0.1448825416537307,
113
+ "grad_norm": 61.10322570800781,
114
+ "learning_rate": 6.9e-06,
115
+ "loss": 2.1106,
116
+ "step": 350
117
+ },
118
+ {
119
+ "epoch": 0.1552312946289972,
120
+ "grad_norm": 49.85371017456055,
121
+ "learning_rate": 7.4e-06,
122
+ "loss": 1.9696,
123
+ "step": 375
124
+ },
125
+ {
126
+ "epoch": 0.16558004760426367,
127
+ "grad_norm": 78.61709594726562,
128
+ "learning_rate": 7.9e-06,
129
+ "loss": 1.8748,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.16558004760426367,
134
+ "eval_loss": 0.6291218996047974,
135
+ "eval_runtime": 561.4046,
136
+ "eval_samples_per_second": 1.936,
137
+ "eval_steps_per_second": 0.969,
138
+ "eval_wer": 0.3898133116883117,
139
+ "step": 400
140
+ },
141
+ {
142
+ "epoch": 0.17592880057953017,
143
+ "grad_norm": 58.551658630371094,
144
+ "learning_rate": 8.400000000000001e-06,
145
+ "loss": 1.848,
146
+ "step": 425
147
+ },
148
+ {
149
+ "epoch": 0.18627755355479664,
150
+ "grad_norm": 37.77573013305664,
151
+ "learning_rate": 8.900000000000001e-06,
152
+ "loss": 1.9659,
153
+ "step": 450
154
+ },
155
+ {
156
+ "epoch": 0.19662630653006313,
157
+ "grad_norm": 67.77395629882812,
158
+ "learning_rate": 9.4e-06,
159
+ "loss": 2.0312,
160
+ "step": 475
161
+ },
162
+ {
163
+ "epoch": 0.2069750595053296,
164
+ "grad_norm": 48.11214828491211,
165
+ "learning_rate": 9.9e-06,
166
+ "loss": 1.795,
167
+ "step": 500
168
+ },
169
+ {
170
+ "epoch": 0.2173238124805961,
171
+ "grad_norm": 54.6898307800293,
172
+ "learning_rate": 9.997220291869355e-06,
173
+ "loss": 1.8759,
174
+ "step": 525
175
+ },
176
+ {
177
+ "epoch": 0.22767256545586256,
178
+ "grad_norm": 56.6167106628418,
179
+ "learning_rate": 9.993745656706047e-06,
180
+ "loss": 1.9263,
181
+ "step": 550
182
+ },
183
+ {
184
+ "epoch": 0.23802131843112906,
185
+ "grad_norm": 43.32560729980469,
186
+ "learning_rate": 9.990271021542739e-06,
187
+ "loss": 1.889,
188
+ "step": 575
189
+ },
190
+ {
191
+ "epoch": 0.24837007140639553,
192
+ "grad_norm": 49.59733581542969,
193
+ "learning_rate": 9.986796386379432e-06,
194
+ "loss": 1.6214,
195
+ "step": 600
196
+ },
197
+ {
198
+ "epoch": 0.24837007140639553,
199
+ "eval_loss": 0.5559969544410706,
200
+ "eval_runtime": 561.0579,
201
+ "eval_samples_per_second": 1.937,
202
+ "eval_steps_per_second": 0.97,
203
+ "eval_wer": 0.3431412337662338,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 0.258718824381662,
208
+ "grad_norm": 40.30105972290039,
209
+ "learning_rate": 9.983321751216123e-06,
210
+ "loss": 1.6189,
211
+ "step": 625
212
+ },
213
+ {
214
+ "epoch": 0.2690675773569285,
215
+ "grad_norm": 42.67938995361328,
216
+ "learning_rate": 9.979847116052815e-06,
217
+ "loss": 1.3983,
218
+ "step": 650
219
+ },
220
+ {
221
+ "epoch": 0.279416330332195,
222
+ "grad_norm": 36.10395431518555,
223
+ "learning_rate": 9.976372480889508e-06,
224
+ "loss": 1.6815,
225
+ "step": 675
226
+ },
227
+ {
228
+ "epoch": 0.2897650833074614,
229
+ "grad_norm": 42.29354476928711,
230
+ "learning_rate": 9.9728978457262e-06,
231
+ "loss": 1.5022,
232
+ "step": 700
233
+ },
234
+ {
235
+ "epoch": 0.3001138362827279,
236
+ "grad_norm": 50.72279357910156,
237
+ "learning_rate": 9.969423210562891e-06,
238
+ "loss": 1.5644,
239
+ "step": 725
240
+ },
241
+ {
242
+ "epoch": 0.3104625892579944,
243
+ "grad_norm": 49.18547821044922,
244
+ "learning_rate": 9.965948575399585e-06,
245
+ "loss": 1.6269,
246
+ "step": 750
247
+ },
248
+ {
249
+ "epoch": 0.3208113422332609,
250
+ "grad_norm": 42.0450325012207,
251
+ "learning_rate": 9.962473940236276e-06,
252
+ "loss": 1.4277,
253
+ "step": 775
254
+ },
255
+ {
256
+ "epoch": 0.33116009520852735,
257
+ "grad_norm": 58.25082015991211,
258
+ "learning_rate": 9.958999305072968e-06,
259
+ "loss": 1.559,
260
+ "step": 800
261
+ },
262
+ {
263
+ "epoch": 0.33116009520852735,
264
+ "eval_loss": 0.49684250354766846,
265
+ "eval_runtime": 559.6818,
266
+ "eval_samples_per_second": 1.942,
267
+ "eval_steps_per_second": 0.972,
268
+ "eval_wer": 0.29525162337662336,
269
+ "step": 800
270
+ },
271
+ {
272
+ "epoch": 0.34150884818379384,
273
+ "grad_norm": 37.699710845947266,
274
+ "learning_rate": 9.955524669909661e-06,
275
+ "loss": 1.2958,
276
+ "step": 825
277
+ },
278
+ {
279
+ "epoch": 0.35185760115906034,
280
+ "grad_norm": 30.309236526489258,
281
+ "learning_rate": 9.952050034746353e-06,
282
+ "loss": 1.4174,
283
+ "step": 850
284
+ },
285
+ {
286
+ "epoch": 0.36220635413432684,
287
+ "grad_norm": 43.847652435302734,
288
+ "learning_rate": 9.948575399583044e-06,
289
+ "loss": 1.5185,
290
+ "step": 875
291
+ },
292
+ {
293
+ "epoch": 0.3725551071095933,
294
+ "grad_norm": 57.50359344482422,
295
+ "learning_rate": 9.945100764419738e-06,
296
+ "loss": 1.5072,
297
+ "step": 900
298
+ },
299
+ {
300
+ "epoch": 0.38290386008485977,
301
+ "grad_norm": 43.106422424316406,
302
+ "learning_rate": 9.941626129256429e-06,
303
+ "loss": 1.4437,
304
+ "step": 925
305
+ },
306
+ {
307
+ "epoch": 0.39325261306012627,
308
+ "grad_norm": 38.97978591918945,
309
+ "learning_rate": 9.93815149409312e-06,
310
+ "loss": 1.4267,
311
+ "step": 950
312
+ },
313
+ {
314
+ "epoch": 0.40360136603539276,
315
+ "grad_norm": 39.729026794433594,
316
+ "learning_rate": 9.934676858929814e-06,
317
+ "loss": 1.4166,
318
+ "step": 975
319
+ },
320
+ {
321
+ "epoch": 0.4139501190106592,
322
+ "grad_norm": 45.119964599609375,
323
+ "learning_rate": 9.931202223766506e-06,
324
+ "loss": 1.3616,
325
+ "step": 1000
326
+ },
327
+ {
328
+ "epoch": 0.4139501190106592,
329
+ "eval_loss": 0.4720439910888672,
330
+ "eval_runtime": 559.355,
331
+ "eval_samples_per_second": 1.943,
332
+ "eval_steps_per_second": 0.973,
333
+ "eval_wer": 0.2872362012987013,
334
+ "step": 1000
335
+ },
336
+ {
337
+ "epoch": 0.4242988719859257,
338
+ "grad_norm": 39.95614242553711,
339
+ "learning_rate": 9.927727588603197e-06,
340
+ "loss": 1.4632,
341
+ "step": 1025
342
+ },
343
+ {
344
+ "epoch": 0.4346476249611922,
345
+ "grad_norm": 26.40887451171875,
346
+ "learning_rate": 9.92425295343989e-06,
347
+ "loss": 1.2984,
348
+ "step": 1050
349
+ },
350
+ {
351
+ "epoch": 0.44499637793645863,
352
+ "grad_norm": 42.91450500488281,
353
+ "learning_rate": 9.920778318276582e-06,
354
+ "loss": 1.5535,
355
+ "step": 1075
356
+ },
357
+ {
358
+ "epoch": 0.4553451309117251,
359
+ "grad_norm": 61.96196746826172,
360
+ "learning_rate": 9.917303683113274e-06,
361
+ "loss": 1.2644,
362
+ "step": 1100
363
+ },
364
+ {
365
+ "epoch": 0.4656938838869916,
366
+ "grad_norm": 54.67252731323242,
367
+ "learning_rate": 9.913829047949967e-06,
368
+ "loss": 1.1995,
369
+ "step": 1125
370
+ },
371
+ {
372
+ "epoch": 0.4760426368622581,
373
+ "grad_norm": 46.1182975769043,
374
+ "learning_rate": 9.910354412786658e-06,
375
+ "loss": 1.5119,
376
+ "step": 1150
377
+ },
378
+ {
379
+ "epoch": 0.48639138983752456,
380
+ "grad_norm": 43.22040557861328,
381
+ "learning_rate": 9.90687977762335e-06,
382
+ "loss": 1.244,
383
+ "step": 1175
384
+ },
385
+ {
386
+ "epoch": 0.49674014281279105,
387
+ "grad_norm": 45.609161376953125,
388
+ "learning_rate": 9.903405142460043e-06,
389
+ "loss": 1.3078,
390
+ "step": 1200
391
+ },
392
+ {
393
+ "epoch": 0.49674014281279105,
394
+ "eval_loss": 0.4577382802963257,
395
+ "eval_runtime": 550.5837,
396
+ "eval_samples_per_second": 1.974,
397
+ "eval_steps_per_second": 0.988,
398
+ "eval_wer": 0.29778814935064934,
399
+ "step": 1200
400
+ },
401
+ {
402
+ "epoch": 0.5070888957880575,
403
+ "grad_norm": 57.24745178222656,
404
+ "learning_rate": 9.899930507296735e-06,
405
+ "loss": 1.2625,
406
+ "step": 1225
407
+ },
408
+ {
409
+ "epoch": 0.517437648763324,
410
+ "grad_norm": 29.896320343017578,
411
+ "learning_rate": 9.896455872133426e-06,
412
+ "loss": 1.2438,
413
+ "step": 1250
414
+ },
415
+ {
416
+ "epoch": 0.5277864017385905,
417
+ "grad_norm": 56.462646484375,
418
+ "learning_rate": 9.89298123697012e-06,
419
+ "loss": 1.3759,
420
+ "step": 1275
421
+ },
422
+ {
423
+ "epoch": 0.538135154713857,
424
+ "grad_norm": 39.78219985961914,
425
+ "learning_rate": 9.889506601806811e-06,
426
+ "loss": 1.3728,
427
+ "step": 1300
428
+ },
429
+ {
430
+ "epoch": 0.5484839076891235,
431
+ "grad_norm": 45.62682342529297,
432
+ "learning_rate": 9.886031966643503e-06,
433
+ "loss": 1.3165,
434
+ "step": 1325
435
+ },
436
+ {
437
+ "epoch": 0.55883266066439,
438
+ "grad_norm": 41.924949645996094,
439
+ "learning_rate": 9.882557331480196e-06,
440
+ "loss": 1.3968,
441
+ "step": 1350
442
+ },
443
+ {
444
+ "epoch": 0.5691814136396565,
445
+ "grad_norm": 39.86012649536133,
446
+ "learning_rate": 9.879082696316888e-06,
447
+ "loss": 1.1669,
448
+ "step": 1375
449
+ },
450
+ {
451
+ "epoch": 0.5795301666149228,
452
+ "grad_norm": 30.276840209960938,
453
+ "learning_rate": 9.87560806115358e-06,
454
+ "loss": 1.2579,
455
+ "step": 1400
456
+ },
457
+ {
458
+ "epoch": 0.5795301666149228,
459
+ "eval_loss": 0.42178860306739807,
460
+ "eval_runtime": 565.0754,
461
+ "eval_samples_per_second": 1.924,
462
+ "eval_steps_per_second": 0.963,
463
+ "eval_wer": 0.2757711038961039,
464
+ "step": 1400
465
+ },
466
+ {
467
+ "epoch": 0.5898789195901893,
468
+ "grad_norm": 27.80983543395996,
469
+ "learning_rate": 9.872133425990272e-06,
470
+ "loss": 1.1973,
471
+ "step": 1425
472
+ },
473
+ {
474
+ "epoch": 0.6002276725654558,
475
+ "grad_norm": 54.13436508178711,
476
+ "learning_rate": 9.868658790826964e-06,
477
+ "loss": 1.3214,
478
+ "step": 1450
479
+ },
480
+ {
481
+ "epoch": 0.6105764255407223,
482
+ "grad_norm": 31.381301879882812,
483
+ "learning_rate": 9.865184155663656e-06,
484
+ "loss": 1.3413,
485
+ "step": 1475
486
+ },
487
+ {
488
+ "epoch": 0.6209251785159888,
489
+ "grad_norm": 35.9166259765625,
490
+ "learning_rate": 9.861709520500349e-06,
491
+ "loss": 1.2117,
492
+ "step": 1500
493
+ },
494
+ {
495
+ "epoch": 0.6312739314912553,
496
+ "grad_norm": 37.55325698852539,
497
+ "learning_rate": 9.85823488533704e-06,
498
+ "loss": 1.2422,
499
+ "step": 1525
500
+ },
501
+ {
502
+ "epoch": 0.6416226844665218,
503
+ "grad_norm": 32.475807189941406,
504
+ "learning_rate": 9.854760250173732e-06,
505
+ "loss": 1.2666,
506
+ "step": 1550
507
+ },
508
+ {
509
+ "epoch": 0.6519714374417883,
510
+ "grad_norm": 35.703243255615234,
511
+ "learning_rate": 9.851285615010425e-06,
512
+ "loss": 1.1171,
513
+ "step": 1575
514
+ },
515
+ {
516
+ "epoch": 0.6623201904170547,
517
+ "grad_norm": 41.8420524597168,
518
+ "learning_rate": 9.847810979847117e-06,
519
+ "loss": 1.214,
520
+ "step": 1600
521
+ },
522
+ {
523
+ "epoch": 0.6623201904170547,
524
+ "eval_loss": 0.41559898853302,
525
+ "eval_runtime": 555.4115,
526
+ "eval_samples_per_second": 1.957,
527
+ "eval_steps_per_second": 0.979,
528
+ "eval_wer": 0.26542207792207795,
529
+ "step": 1600
530
+ },
531
+ {
532
+ "epoch": 0.6726689433923212,
533
+ "grad_norm": 38.391361236572266,
534
+ "learning_rate": 9.844336344683808e-06,
535
+ "loss": 1.2246,
536
+ "step": 1625
537
+ },
538
+ {
539
+ "epoch": 0.6830176963675877,
540
+ "grad_norm": 57.34391784667969,
541
+ "learning_rate": 9.840861709520502e-06,
542
+ "loss": 1.0536,
543
+ "step": 1650
544
+ },
545
+ {
546
+ "epoch": 0.6933664493428542,
547
+ "grad_norm": 19.72743034362793,
548
+ "learning_rate": 9.837387074357193e-06,
549
+ "loss": 1.2277,
550
+ "step": 1675
551
+ },
552
+ {
553
+ "epoch": 0.7037152023181207,
554
+ "grad_norm": 38.91509246826172,
555
+ "learning_rate": 9.833912439193885e-06,
556
+ "loss": 1.2657,
557
+ "step": 1700
558
+ },
559
+ {
560
+ "epoch": 0.7140639552933872,
561
+ "grad_norm": 51.42982864379883,
562
+ "learning_rate": 9.830437804030578e-06,
563
+ "loss": 1.2865,
564
+ "step": 1725
565
+ },
566
+ {
567
+ "epoch": 0.7244127082686537,
568
+ "grad_norm": 37.256927490234375,
569
+ "learning_rate": 9.82696316886727e-06,
570
+ "loss": 1.1125,
571
+ "step": 1750
572
+ },
573
+ {
574
+ "epoch": 0.73476146124392,
575
+ "grad_norm": 37.06630325317383,
576
+ "learning_rate": 9.823488533703961e-06,
577
+ "loss": 1.2179,
578
+ "step": 1775
579
+ },
580
+ {
581
+ "epoch": 0.7451102142191866,
582
+ "grad_norm": 28.06951332092285,
583
+ "learning_rate": 9.820013898540655e-06,
584
+ "loss": 1.0719,
585
+ "step": 1800
586
+ },
587
+ {
588
+ "epoch": 0.7451102142191866,
589
+ "eval_loss": 0.40048521757125854,
590
+ "eval_runtime": 544.8903,
591
+ "eval_samples_per_second": 1.995,
592
+ "eval_steps_per_second": 0.998,
593
+ "eval_wer": 0.2315340909090909,
594
+ "step": 1800
595
+ },
596
+ {
597
+ "epoch": 0.755458967194453,
598
+ "grad_norm": 43.74811553955078,
599
+ "learning_rate": 9.816539263377346e-06,
600
+ "loss": 1.1466,
601
+ "step": 1825
602
+ },
603
+ {
604
+ "epoch": 0.7658077201697195,
605
+ "grad_norm": 67.86227416992188,
606
+ "learning_rate": 9.813064628214038e-06,
607
+ "loss": 1.2406,
608
+ "step": 1850
609
+ },
610
+ {
611
+ "epoch": 0.776156473144986,
612
+ "grad_norm": 21.888675689697266,
613
+ "learning_rate": 9.809589993050731e-06,
614
+ "loss": 1.0934,
615
+ "step": 1875
616
+ },
617
+ {
618
+ "epoch": 0.7865052261202525,
619
+ "grad_norm": 34.76344680786133,
620
+ "learning_rate": 9.806115357887423e-06,
621
+ "loss": 1.3778,
622
+ "step": 1900
623
+ },
624
+ {
625
+ "epoch": 0.796853979095519,
626
+ "grad_norm": 37.02476501464844,
627
+ "learning_rate": 9.802640722724114e-06,
628
+ "loss": 1.1266,
629
+ "step": 1925
630
+ },
631
+ {
632
+ "epoch": 0.8072027320707855,
633
+ "grad_norm": 25.008710861206055,
634
+ "learning_rate": 9.799166087560807e-06,
635
+ "loss": 0.9663,
636
+ "step": 1950
637
+ },
638
+ {
639
+ "epoch": 0.8175514850460519,
640
+ "grad_norm": 43.19198989868164,
641
+ "learning_rate": 9.795691452397499e-06,
642
+ "loss": 1.2192,
643
+ "step": 1975
644
+ },
645
+ {
646
+ "epoch": 0.8279002380213184,
647
+ "grad_norm": 42.85536193847656,
648
+ "learning_rate": 9.79221681723419e-06,
649
+ "loss": 1.0432,
650
+ "step": 2000
651
+ },
652
+ {
653
+ "epoch": 0.8279002380213184,
654
+ "eval_loss": 0.3864189684391022,
655
+ "eval_runtime": 555.4403,
656
+ "eval_samples_per_second": 1.957,
657
+ "eval_steps_per_second": 0.979,
658
+ "eval_wer": 0.24330357142857142,
659
+ "step": 2000
660
+ },
661
+ {
662
+ "epoch": 0.8382489909965849,
663
+ "grad_norm": 42.24909591674805,
664
+ "learning_rate": 9.788742182070884e-06,
665
+ "loss": 1.2698,
666
+ "step": 2025
667
+ },
668
+ {
669
+ "epoch": 0.8485977439718514,
670
+ "grad_norm": 30.07208824157715,
671
+ "learning_rate": 9.785267546907575e-06,
672
+ "loss": 1.0744,
673
+ "step": 2050
674
+ },
675
+ {
676
+ "epoch": 0.8589464969471179,
677
+ "grad_norm": 40.065677642822266,
678
+ "learning_rate": 9.781792911744267e-06,
679
+ "loss": 1.0424,
680
+ "step": 2075
681
+ },
682
+ {
683
+ "epoch": 0.8692952499223844,
684
+ "grad_norm": 33.75371170043945,
685
+ "learning_rate": 9.77831827658096e-06,
686
+ "loss": 1.1718,
687
+ "step": 2100
688
+ },
689
+ {
690
+ "epoch": 0.8796440028976509,
691
+ "grad_norm": 29.951263427734375,
692
+ "learning_rate": 9.774843641417652e-06,
693
+ "loss": 1.0589,
694
+ "step": 2125
695
+ },
696
+ {
697
+ "epoch": 0.8899927558729173,
698
+ "grad_norm": 48.64168930053711,
699
+ "learning_rate": 9.771369006254343e-06,
700
+ "loss": 1.253,
701
+ "step": 2150
702
+ },
703
+ {
704
+ "epoch": 0.9003415088481838,
705
+ "grad_norm": 50.58803939819336,
706
+ "learning_rate": 9.767894371091037e-06,
707
+ "loss": 1.1886,
708
+ "step": 2175
709
+ },
710
+ {
711
+ "epoch": 0.9106902618234503,
712
+ "grad_norm": 40.60319900512695,
713
+ "learning_rate": 9.764419735927728e-06,
714
+ "loss": 0.9825,
715
+ "step": 2200
716
+ },
717
+ {
718
+ "epoch": 0.9106902618234503,
719
+ "eval_loss": 0.3742503523826599,
720
+ "eval_runtime": 564.0293,
721
+ "eval_samples_per_second": 1.927,
722
+ "eval_steps_per_second": 0.964,
723
+ "eval_wer": 0.22067775974025974,
724
+ "step": 2200
725
+ },
726
+ {
727
+ "epoch": 0.9210390147987167,
728
+ "grad_norm": 42.79588317871094,
729
+ "learning_rate": 9.760945100764422e-06,
730
+ "loss": 1.0184,
731
+ "step": 2225
732
+ },
733
+ {
734
+ "epoch": 0.9313877677739832,
735
+ "grad_norm": 59.941490173339844,
736
+ "learning_rate": 9.757470465601113e-06,
737
+ "loss": 1.2345,
738
+ "step": 2250
739
+ },
740
+ {
741
+ "epoch": 0.9417365207492497,
742
+ "grad_norm": 45.79833984375,
743
+ "learning_rate": 9.753995830437805e-06,
744
+ "loss": 1.0212,
745
+ "step": 2275
746
+ },
747
+ {
748
+ "epoch": 0.9520852737245162,
749
+ "grad_norm": 41.137176513671875,
750
+ "learning_rate": 9.750521195274498e-06,
751
+ "loss": 1.2013,
752
+ "step": 2300
753
+ },
754
+ {
755
+ "epoch": 0.9624340266997827,
756
+ "grad_norm": 36.94132614135742,
757
+ "learning_rate": 9.74704656011119e-06,
758
+ "loss": 1.1972,
759
+ "step": 2325
760
+ },
761
+ {
762
+ "epoch": 0.9727827796750491,
763
+ "grad_norm": 27.98811912536621,
764
+ "learning_rate": 9.743571924947881e-06,
765
+ "loss": 1.088,
766
+ "step": 2350
767
+ },
768
+ {
769
+ "epoch": 0.9831315326503156,
770
+ "grad_norm": 20.012388229370117,
771
+ "learning_rate": 9.740097289784574e-06,
772
+ "loss": 0.9881,
773
+ "step": 2375
774
+ },
775
+ {
776
+ "epoch": 0.9934802856255821,
777
+ "grad_norm": 40.9871940612793,
778
+ "learning_rate": 9.736622654621266e-06,
779
+ "loss": 1.0952,
780
+ "step": 2400
781
+ },
782
+ {
783
+ "epoch": 0.9934802856255821,
784
+ "eval_loss": 0.36100757122039795,
785
+ "eval_runtime": 567.8727,
786
+ "eval_samples_per_second": 1.914,
787
+ "eval_steps_per_second": 0.958,
788
+ "eval_wer": 0.2234172077922078,
789
+ "step": 2400
790
+ },
791
+ {
792
+ "epoch": 1.0041395011901066,
793
+ "grad_norm": 36.53849411010742,
794
+ "learning_rate": 9.733148019457958e-06,
795
+ "loss": 0.8935,
796
+ "step": 2425
797
+ },
798
+ {
799
+ "epoch": 1.014488254165373,
800
+ "grad_norm": 34.85436248779297,
801
+ "learning_rate": 9.72967338429465e-06,
802
+ "loss": 0.6152,
803
+ "step": 2450
804
+ },
805
+ {
806
+ "epoch": 1.0248370071406396,
807
+ "grad_norm": 22.534700393676758,
808
+ "learning_rate": 9.726198749131342e-06,
809
+ "loss": 0.5735,
810
+ "step": 2475
811
+ },
812
+ {
813
+ "epoch": 1.035185760115906,
814
+ "grad_norm": 23.778539657592773,
815
+ "learning_rate": 9.722724113968034e-06,
816
+ "loss": 0.5496,
817
+ "step": 2500
818
+ },
819
+ {
820
+ "epoch": 1.0455345130911726,
821
+ "grad_norm": 36.943050384521484,
822
+ "learning_rate": 9.719249478804727e-06,
823
+ "loss": 0.6067,
824
+ "step": 2525
825
+ },
826
+ {
827
+ "epoch": 1.055883266066439,
828
+ "grad_norm": 21.048763275146484,
829
+ "learning_rate": 9.715774843641419e-06,
830
+ "loss": 0.5577,
831
+ "step": 2550
832
+ },
833
+ {
834
+ "epoch": 1.0662320190417054,
835
+ "grad_norm": 31.97542953491211,
836
+ "learning_rate": 9.71230020847811e-06,
837
+ "loss": 0.5266,
838
+ "step": 2575
839
+ },
840
+ {
841
+ "epoch": 1.076580772016972,
842
+ "grad_norm": 36.05984115600586,
843
+ "learning_rate": 9.708825573314804e-06,
844
+ "loss": 0.6001,
845
+ "step": 2600
846
+ },
847
+ {
848
+ "epoch": 1.076580772016972,
849
+ "eval_loss": 0.388786643743515,
850
+ "eval_runtime": 572.4362,
851
+ "eval_samples_per_second": 1.899,
852
+ "eval_steps_per_second": 0.95,
853
+ "eval_wer": 0.24228896103896103,
854
+ "step": 2600
855
+ },
856
+ {
857
+ "epoch": 1.0869295249922384,
858
+ "grad_norm": 35.275272369384766,
859
+ "learning_rate": 9.705350938151495e-06,
860
+ "loss": 0.6265,
861
+ "step": 2625
862
+ },
863
+ {
864
+ "epoch": 1.097278277967505,
865
+ "grad_norm": 23.014320373535156,
866
+ "learning_rate": 9.701876302988187e-06,
867
+ "loss": 0.5507,
868
+ "step": 2650
869
+ },
870
+ {
871
+ "epoch": 1.1076270309427714,
872
+ "grad_norm": 18.49266242980957,
873
+ "learning_rate": 9.69840166782488e-06,
874
+ "loss": 0.6563,
875
+ "step": 2675
876
+ },
877
+ {
878
+ "epoch": 1.117975783918038,
879
+ "grad_norm": 22.754758834838867,
880
+ "learning_rate": 9.694927032661572e-06,
881
+ "loss": 0.633,
882
+ "step": 2700
883
+ },
884
+ {
885
+ "epoch": 1.1283245368933044,
886
+ "grad_norm": 24.042457580566406,
887
+ "learning_rate": 9.691452397498263e-06,
888
+ "loss": 0.5714,
889
+ "step": 2725
890
+ },
891
+ {
892
+ "epoch": 1.1386732898685707,
893
+ "grad_norm": 31.35757827758789,
894
+ "learning_rate": 9.687977762334956e-06,
895
+ "loss": 0.6269,
896
+ "step": 2750
897
+ },
898
+ {
899
+ "epoch": 1.1490220428438374,
900
+ "grad_norm": 15.74731731414795,
901
+ "learning_rate": 9.684503127171648e-06,
902
+ "loss": 0.633,
903
+ "step": 2775
904
+ },
905
+ {
906
+ "epoch": 1.1593707958191037,
907
+ "grad_norm": 25.903976440429688,
908
+ "learning_rate": 9.68102849200834e-06,
909
+ "loss": 0.5491,
910
+ "step": 2800
911
+ },
912
+ {
913
+ "epoch": 1.1593707958191037,
914
+ "eval_loss": 0.37300390005111694,
915
+ "eval_runtime": 573.7758,
916
+ "eval_samples_per_second": 1.894,
917
+ "eval_steps_per_second": 0.948,
918
+ "eval_wer": 0.22646103896103897,
919
+ "step": 2800
920
+ },
921
+ {
922
+ "epoch": 1.1697195487943703,
923
+ "grad_norm": 36.39768981933594,
924
+ "learning_rate": 9.677553856845033e-06,
925
+ "loss": 0.5838,
926
+ "step": 2825
927
+ },
928
+ {
929
+ "epoch": 1.1800683017696367,
930
+ "grad_norm": 36.48637390136719,
931
+ "learning_rate": 9.674079221681724e-06,
932
+ "loss": 0.5071,
933
+ "step": 2850
934
+ },
935
+ {
936
+ "epoch": 1.1904170547449033,
937
+ "grad_norm": 43.22209167480469,
938
+ "learning_rate": 9.670604586518416e-06,
939
+ "loss": 0.5702,
940
+ "step": 2875
941
+ },
942
+ {
943
+ "epoch": 1.2007658077201697,
944
+ "grad_norm": 26.855955123901367,
945
+ "learning_rate": 9.66712995135511e-06,
946
+ "loss": 0.6014,
947
+ "step": 2900
948
+ },
949
+ {
950
+ "epoch": 1.211114560695436,
951
+ "grad_norm": 28.16461181640625,
952
+ "learning_rate": 9.663655316191801e-06,
953
+ "loss": 0.6213,
954
+ "step": 2925
955
+ },
956
+ {
957
+ "epoch": 1.2214633136707027,
958
+ "grad_norm": 19.875337600708008,
959
+ "learning_rate": 9.660180681028492e-06,
960
+ "loss": 0.5896,
961
+ "step": 2950
962
+ },
963
+ {
964
+ "epoch": 1.231812066645969,
965
+ "grad_norm": 24.210161209106445,
966
+ "learning_rate": 9.656706045865186e-06,
967
+ "loss": 0.6079,
968
+ "step": 2975
969
+ },
970
+ {
971
+ "epoch": 1.2421608196212357,
972
+ "grad_norm": 21.59490394592285,
973
+ "learning_rate": 9.653231410701877e-06,
974
+ "loss": 0.6732,
975
+ "step": 3000
976
+ },
977
+ {
978
+ "epoch": 1.2421608196212357,
979
+ "eval_loss": 0.3701952397823334,
980
+ "eval_runtime": 580.4786,
981
+ "eval_samples_per_second": 1.873,
982
+ "eval_steps_per_second": 0.937,
983
+ "eval_wer": 0.2200689935064935,
984
+ "step": 3000
985
+ },
986
+ {
987
+ "epoch": 1.2421608196212357,
988
+ "step": 3000,
989
+ "total_flos": 2.449860020011008e+19,
990
+ "train_loss": 1.494960307121277,
991
+ "train_runtime": 14532.737,
992
+ "train_samples_per_second": 39.895,
993
+ "train_steps_per_second": 4.985
994
+ }
995
+ ],
996
+ "logging_steps": 25,
997
+ "max_steps": 72450,
998
+ "num_input_tokens_seen": 0,
999
+ "num_train_epochs": 30,
1000
+ "save_steps": 200,
1001
+ "stateful_callbacks": {
1002
+ "EarlyStoppingCallback": {
1003
+ "args": {
1004
+ "early_stopping_patience": 3,
1005
+ "early_stopping_threshold": 0.0
1006
+ },
1007
+ "attributes": {
1008
+ "early_stopping_patience_counter": 3
1009
+ }
1010
+ },
1011
+ "TrainerControl": {
1012
+ "args": {
1013
+ "should_epoch_stop": false,
1014
+ "should_evaluate": false,
1015
+ "should_log": false,
1016
+ "should_save": true,
1017
+ "should_training_stop": true
1018
+ },
1019
+ "attributes": {}
1020
+ }
1021
+ },
1022
+ "total_flos": 2.449860020011008e+19,
1023
+ "train_batch_size": 2,
1024
+ "trial_name": null,
1025
+ "trial_params": null
1026
+ }