csikasote commited on
Commit
9bc0752
·
verified ·
1 Parent(s): 80a03b4

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-all-bemgen-combined-fusion
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2151
22
- - Wer: 0.4005
23
 
24
  ## Model description
25
 
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bemgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-all-bemgen-combined-fusion
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BEMGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.2195
25
+ - Wer: 0.4013
26
 
27
  ## Model description
28
 
adapter.bem.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c65d17a6216c261a43a89fb4a5e2c2f7027cf20ea1ba75f86555c44848632f4
3
  size 8798532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26faad5fc9feb70d3adbdf3b1cf7ed72f27bdcb18da554de3e5f8fc564d08e98
3
  size 8798532
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.21571367979049683,
4
- "eval_runtime": 107.5327,
5
  "eval_samples": 1676,
6
- "eval_samples_per_second": 15.586,
7
- "eval_steps_per_second": 3.896,
8
- "eval_wer": 0.39534107595781603,
9
  "total_flos": 1.9392924694598373e+19,
10
- "train_loss": 1.307294990810646,
11
- "train_runtime": 6210.2648,
12
  "train_samples": 6299,
13
- "train_samples_per_second": 5.071,
14
- "train_steps_per_second": 0.317
15
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.21950095891952515,
4
+ "eval_runtime": 69.6984,
5
  "eval_samples": 1676,
6
+ "eval_samples_per_second": 24.046,
7
+ "eval_steps_per_second": 6.012,
8
+ "eval_wer": 0.4012815378454145,
9
  "total_flos": 1.9392924694598373e+19,
10
+ "train_loss": 1.2170672131068816,
11
+ "train_runtime": 4223.7671,
12
  "train_samples": 6299,
13
+ "train_samples_per_second": 7.457,
14
+ "train_steps_per_second": 0.466
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.21571367979049683,
4
- "eval_runtime": 107.5327,
5
  "eval_samples": 1676,
6
- "eval_samples_per_second": 15.586,
7
- "eval_steps_per_second": 3.896,
8
- "eval_wer": 0.39534107595781603
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.21950095891952515,
4
+ "eval_runtime": 69.6984,
5
  "eval_samples": 1676,
6
+ "eval_samples_per_second": 24.046,
7
+ "eval_steps_per_second": 6.012,
8
+ "eval_wer": 0.4012815378454145
9
  }
runs/Aug12_02-58-33_srvrocgpu013.uct.ac.za/events.out.tfevents.1754965037.srvrocgpu013.uct.ac.za.381040.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a89748381909fe35ad336462b15aa0268aa2eba6b0a6b7ec7174689982db6d3
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.9392924694598373e+19,
4
- "train_loss": 1.307294990810646,
5
- "train_runtime": 6210.2648,
6
  "train_samples": 6299,
7
- "train_samples_per_second": 5.071,
8
- "train_steps_per_second": 0.317
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.9392924694598373e+19,
4
+ "train_loss": 1.2170672131068816,
5
+ "train_runtime": 4223.7671,
6
  "train_samples": 6299,
7
+ "train_samples_per_second": 7.457,
8
+ "train_steps_per_second": 0.466
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_global_step": 1900,
3
- "best_metric": 0.21413084864616394,
4
- "best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/mms-1b-all-bemgen-combined-fusion/checkpoint-1600",
5
  "epoch": 5.0,
6
  "eval_steps": 100,
7
  "global_step": 1970,
@@ -11,316 +11,316 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.25380710659898476,
14
- "grad_norm": 32.122833251953125,
15
  "learning_rate": 0.000285,
16
- "loss": 7.7684,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.25380710659898476,
21
- "eval_loss": 5.640964031219482,
22
- "eval_runtime": 109.1867,
23
- "eval_samples_per_second": 15.35,
24
- "eval_steps_per_second": 3.837,
25
- "eval_wer": 1.4059538112401548,
26
  "step": 100
27
  },
28
  {
29
  "epoch": 0.5076142131979695,
30
- "grad_norm": 12.221145629882812,
31
  "learning_rate": 0.00028475935828877006,
32
- "loss": 4.84,
33
  "step": 200
34
  },
35
  {
36
  "epoch": 0.5076142131979695,
37
- "eval_loss": 5.061452865600586,
38
- "eval_runtime": 108.0732,
39
- "eval_samples_per_second": 15.508,
40
- "eval_steps_per_second": 3.877,
41
- "eval_wer": 1.0636096649312508,
42
  "step": 200
43
  },
44
  {
45
  "epoch": 0.7614213197969543,
46
- "grad_norm": 8.020641326904297,
47
  "learning_rate": 0.00026871657754010695,
48
- "loss": 4.3769,
49
  "step": 300
50
  },
51
  {
52
  "epoch": 0.7614213197969543,
53
- "eval_loss": 4.394653797149658,
54
- "eval_runtime": 108.1582,
55
- "eval_samples_per_second": 15.496,
56
- "eval_steps_per_second": 3.874,
57
- "eval_wer": 1.1531170738219196,
58
  "step": 300
59
  },
60
  {
61
  "epoch": 1.015228426395939,
62
- "grad_norm": 0.8574035167694092,
63
- "learning_rate": 0.00025267379679144383,
64
- "loss": 2.759,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 1.015228426395939,
69
- "eval_loss": 0.3453950583934784,
70
- "eval_runtime": 107.6705,
71
- "eval_samples_per_second": 15.566,
72
- "eval_steps_per_second": 3.892,
73
- "eval_wer": 0.5700173541583233,
74
  "step": 400
75
  },
76
  {
77
  "epoch": 1.2690355329949239,
78
- "grad_norm": 0.6176797747612,
79
- "learning_rate": 0.00023663101604278074,
80
- "loss": 0.4928,
81
  "step": 500
82
  },
83
  {
84
  "epoch": 1.2690355329949239,
85
- "eval_loss": 0.25626739859580994,
86
- "eval_runtime": 108.7806,
87
- "eval_samples_per_second": 15.407,
88
- "eval_steps_per_second": 3.852,
89
- "eval_wer": 0.45568014951274866,
90
  "step": 500
91
  },
92
  {
93
  "epoch": 1.5228426395939088,
94
- "grad_norm": 1.2089505195617676,
95
- "learning_rate": 0.00022058823529411765,
96
- "loss": 0.4437,
97
  "step": 600
98
  },
99
  {
100
  "epoch": 1.5228426395939088,
101
- "eval_loss": 0.23927858471870422,
102
- "eval_runtime": 107.2813,
103
- "eval_samples_per_second": 15.622,
104
- "eval_steps_per_second": 3.906,
105
- "eval_wer": 0.4425977840074756,
106
  "step": 600
107
  },
108
  {
109
  "epoch": 1.7766497461928934,
110
- "grad_norm": 0.5251367092132568,
111
- "learning_rate": 0.0002045454545454545,
112
- "loss": 0.4061,
113
  "step": 700
114
  },
115
  {
116
  "epoch": 1.7766497461928934,
117
- "eval_loss": 0.23556636273860931,
118
- "eval_runtime": 107.5712,
119
- "eval_samples_per_second": 15.58,
120
- "eval_steps_per_second": 3.895,
121
- "eval_wer": 0.44179682285409155,
122
  "step": 700
123
  },
124
  {
125
  "epoch": 2.030456852791878,
126
- "grad_norm": 0.9855093359947205,
127
- "learning_rate": 0.00018850267379679142,
128
- "loss": 0.3903,
129
  "step": 800
130
  },
131
  {
132
  "epoch": 2.030456852791878,
133
- "eval_loss": 0.227129265666008,
134
- "eval_runtime": 107.653,
135
- "eval_samples_per_second": 15.569,
136
- "eval_steps_per_second": 3.892,
137
- "eval_wer": 0.42417567747964224,
138
  "step": 800
139
  },
140
  {
141
  "epoch": 2.284263959390863,
142
- "grad_norm": 0.7405723333358765,
143
- "learning_rate": 0.0001724598930481283,
144
- "loss": 0.3924,
145
  "step": 900
146
  },
147
  {
148
  "epoch": 2.284263959390863,
149
- "eval_loss": 0.22408504784107208,
150
- "eval_runtime": 110.2112,
151
- "eval_samples_per_second": 15.207,
152
- "eval_steps_per_second": 3.802,
153
- "eval_wer": 0.4186356961687358,
154
  "step": 900
155
  },
156
  {
157
  "epoch": 2.5380710659898478,
158
- "grad_norm": 1.1984844207763672,
159
- "learning_rate": 0.00015641711229946522,
160
- "loss": 0.3835,
161
  "step": 1000
162
  },
163
  {
164
  "epoch": 2.5380710659898478,
165
- "eval_loss": 0.2239576131105423,
166
- "eval_runtime": 108.7325,
167
- "eval_samples_per_second": 15.414,
168
- "eval_steps_per_second": 3.853,
169
- "eval_wer": 0.4171672673875317,
170
  "step": 1000
171
  },
172
  {
173
  "epoch": 2.7918781725888326,
174
- "grad_norm": 0.6451926231384277,
175
- "learning_rate": 0.00014037433155080213,
176
- "loss": 0.372,
177
  "step": 1100
178
  },
179
  {
180
  "epoch": 2.7918781725888326,
181
- "eval_loss": 0.22267985343933105,
182
- "eval_runtime": 107.5115,
183
- "eval_samples_per_second": 15.589,
184
- "eval_steps_per_second": 3.897,
185
- "eval_wer": 0.407956214123615,
186
  "step": 1100
187
  },
188
  {
189
  "epoch": 3.045685279187817,
190
- "grad_norm": 0.40041837096214294,
191
- "learning_rate": 0.00012433155080213902,
192
- "loss": 0.3645,
193
  "step": 1200
194
  },
195
  {
196
  "epoch": 3.045685279187817,
197
- "eval_loss": 0.2206098586320877,
198
- "eval_runtime": 107.6785,
199
- "eval_samples_per_second": 15.565,
200
- "eval_steps_per_second": 3.891,
201
- "eval_wer": 0.4009478040315045,
202
  "step": 1200
203
  },
204
  {
205
  "epoch": 3.299492385786802,
206
- "grad_norm": 3.658419609069824,
207
- "learning_rate": 0.00010828877005347593,
208
- "loss": 0.3657,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 3.299492385786802,
213
- "eval_loss": 0.22048015892505646,
214
- "eval_runtime": 108.5101,
215
- "eval_samples_per_second": 15.446,
216
- "eval_steps_per_second": 3.861,
217
- "eval_wer": 0.3978774529435322,
218
  "step": 1300
219
  },
220
  {
221
  "epoch": 3.553299492385787,
222
- "grad_norm": 1.8381335735321045,
223
- "learning_rate": 9.224598930481283e-05,
224
- "loss": 0.3603,
225
  "step": 1400
226
  },
227
  {
228
  "epoch": 3.553299492385787,
229
- "eval_loss": 0.2172713428735733,
230
- "eval_runtime": 107.0495,
231
- "eval_samples_per_second": 15.656,
232
- "eval_steps_per_second": 3.914,
233
- "eval_wer": 0.39947937525030036,
234
  "step": 1400
235
  },
236
  {
237
  "epoch": 3.8071065989847717,
238
- "grad_norm": 1.1935131549835205,
239
- "learning_rate": 7.620320855614973e-05,
240
- "loss": 0.3595,
241
  "step": 1500
242
  },
243
  {
244
  "epoch": 3.8071065989847717,
245
- "eval_loss": 0.21687445044517517,
246
- "eval_runtime": 106.8635,
247
- "eval_samples_per_second": 15.684,
248
- "eval_steps_per_second": 3.921,
249
- "eval_wer": 0.3987451608596983,
250
  "step": 1500
251
  },
252
  {
253
  "epoch": 4.060913705583756,
254
- "grad_norm": 0.9769509434700012,
255
- "learning_rate": 6.016042780748663e-05,
256
- "loss": 0.3641,
257
  "step": 1600
258
  },
259
  {
260
  "epoch": 4.060913705583756,
261
- "eval_loss": 0.2157205492258072,
262
- "eval_runtime": 106.6704,
263
- "eval_samples_per_second": 15.712,
264
- "eval_steps_per_second": 3.928,
265
- "eval_wer": 0.39547456948338006,
266
  "step": 1600
267
  },
268
  {
269
  "epoch": 4.314720812182741,
270
- "grad_norm": 0.6243143081665039,
271
- "learning_rate": 4.4117647058823526e-05,
272
- "loss": 0.3501,
273
  "step": 1700
274
  },
275
  {
276
  "epoch": 4.314720812182741,
277
- "eval_loss": 0.2155493199825287,
278
- "eval_runtime": 108.4911,
279
- "eval_samples_per_second": 15.448,
280
- "eval_steps_per_second": 3.862,
281
- "eval_wer": 0.39894540114804433,
282
  "step": 1700
283
  },
284
  {
285
  "epoch": 4.568527918781726,
286
- "grad_norm": 0.5864003896713257,
287
- "learning_rate": 2.8074866310160424e-05,
288
- "loss": 0.3527,
289
  "step": 1800
290
  },
291
  {
292
  "epoch": 4.568527918781726,
293
- "eval_loss": 0.21467459201812744,
294
- "eval_runtime": 107.3959,
295
- "eval_samples_per_second": 15.606,
296
- "eval_steps_per_second": 3.901,
297
- "eval_wer": 0.39714323855293016,
298
  "step": 1800
299
  },
300
  {
301
  "epoch": 4.822335025380711,
302
- "grad_norm": 0.8512787818908691,
303
- "learning_rate": 1.2032085561497326e-05,
304
- "loss": 0.3607,
305
  "step": 1900
306
  },
307
  {
308
  "epoch": 4.822335025380711,
309
- "eval_loss": 0.21413084864616394,
310
- "eval_runtime": 107.1817,
311
- "eval_samples_per_second": 15.637,
312
- "eval_steps_per_second": 3.909,
313
- "eval_wer": 0.39373915365104795,
314
  "step": 1900
315
  },
316
  {
317
  "epoch": 5.0,
318
  "step": 1970,
319
  "total_flos": 1.9392924694598373e+19,
320
- "train_loss": 1.307294990810646,
321
- "train_runtime": 6210.2648,
322
- "train_samples_per_second": 5.071,
323
- "train_steps_per_second": 0.317
324
  }
325
  ],
326
  "logging_steps": 100,
 
1
  {
2
  "best_global_step": 1900,
3
+ "best_metric": 0.2150741070508957,
4
+ "best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/mms-1b-all-bemgen-combined-fusion/checkpoint-1200",
5
  "epoch": 5.0,
6
  "eval_steps": 100,
7
  "global_step": 1970,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.25380710659898476,
14
+ "grad_norm": 32.2292594909668,
15
  "learning_rate": 0.000285,
16
+ "loss": 7.7625,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.25380710659898476,
21
+ "eval_loss": 5.588669300079346,
22
+ "eval_runtime": 69.3281,
23
+ "eval_samples_per_second": 24.175,
24
+ "eval_steps_per_second": 6.044,
25
+ "eval_wer": 0.999933253237218,
26
  "step": 100
27
  },
28
  {
29
  "epoch": 0.5076142131979695,
30
+ "grad_norm": 10.083724975585938,
31
  "learning_rate": 0.00028475935828877006,
32
+ "loss": 4.8937,
33
  "step": 200
34
  },
35
  {
36
  "epoch": 0.5076142131979695,
37
+ "eval_loss": 4.956684589385986,
38
+ "eval_runtime": 68.9661,
39
+ "eval_samples_per_second": 24.302,
40
+ "eval_steps_per_second": 6.075,
41
+ "eval_wer": 1.0,
42
  "step": 200
43
  },
44
  {
45
  "epoch": 0.7614213197969543,
46
+ "grad_norm": 5.518433094024658,
47
  "learning_rate": 0.00026871657754010695,
48
+ "loss": 4.2312,
49
  "step": 300
50
  },
51
  {
52
  "epoch": 0.7614213197969543,
53
+ "eval_loss": 3.7783877849578857,
54
+ "eval_runtime": 68.6885,
55
+ "eval_samples_per_second": 24.4,
56
+ "eval_steps_per_second": 6.1,
57
+ "eval_wer": 0.99966626618609,
58
  "step": 300
59
  },
60
  {
61
  "epoch": 1.015228426395939,
62
+ "grad_norm": 0.7774800062179565,
63
+ "learning_rate": 0.00025283422459893045,
64
+ "loss": 1.194,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 1.015228426395939,
69
+ "eval_loss": 0.25593841075897217,
70
+ "eval_runtime": 68.6022,
71
+ "eval_samples_per_second": 24.431,
72
+ "eval_steps_per_second": 6.108,
73
+ "eval_wer": 0.46515818982779333,
74
  "step": 400
75
  },
76
  {
77
  "epoch": 1.2690355329949239,
78
+ "grad_norm": 0.8243533372879028,
79
+ "learning_rate": 0.00023679144385026736,
80
+ "loss": 0.4372,
81
  "step": 500
82
  },
83
  {
84
  "epoch": 1.2690355329949239,
85
+ "eval_loss": 0.2409835159778595,
86
+ "eval_runtime": 70.2102,
87
+ "eval_samples_per_second": 23.871,
88
+ "eval_steps_per_second": 5.968,
89
+ "eval_wer": 0.4499399279134962,
90
  "step": 500
91
  },
92
  {
93
  "epoch": 1.5228426395939088,
94
+ "grad_norm": 0.6686663031578064,
95
+ "learning_rate": 0.00022074866310160427,
96
+ "loss": 0.4276,
97
  "step": 600
98
  },
99
  {
100
  "epoch": 1.5228426395939088,
101
+ "eval_loss": 0.2348148673772812,
102
+ "eval_runtime": 69.4735,
103
+ "eval_samples_per_second": 24.124,
104
+ "eval_steps_per_second": 6.031,
105
+ "eval_wer": 0.4280469897209985,
106
  "step": 600
107
  },
108
  {
109
  "epoch": 1.7766497461928934,
110
+ "grad_norm": 1.5514487028121948,
111
+ "learning_rate": 0.00020470588235294116,
112
+ "loss": 0.4003,
113
  "step": 700
114
  },
115
  {
116
  "epoch": 1.7766497461928934,
117
+ "eval_loss": 0.2333621233701706,
118
+ "eval_runtime": 69.3321,
119
+ "eval_samples_per_second": 24.174,
120
+ "eval_steps_per_second": 6.043,
121
+ "eval_wer": 0.44132959551461753,
122
  "step": 700
123
  },
124
  {
125
  "epoch": 2.030456852791878,
126
+ "grad_norm": 4.034536838531494,
127
+ "learning_rate": 0.00018866310160427807,
128
+ "loss": 0.3853,
129
  "step": 800
130
  },
131
  {
132
  "epoch": 2.030456852791878,
133
+ "eval_loss": 0.22295093536376953,
134
+ "eval_runtime": 69.6283,
135
+ "eval_samples_per_second": 24.071,
136
+ "eval_steps_per_second": 6.018,
137
+ "eval_wer": 0.4092911493792551,
138
  "step": 800
139
  },
140
  {
141
  "epoch": 2.284263959390863,
142
+ "grad_norm": 0.36776381731033325,
143
+ "learning_rate": 0.00017262032085561496,
144
+ "loss": 0.3852,
145
  "step": 900
146
  },
147
  {
148
  "epoch": 2.284263959390863,
149
+ "eval_loss": 0.22227536141872406,
150
+ "eval_runtime": 70.6197,
151
+ "eval_samples_per_second": 23.733,
152
+ "eval_steps_per_second": 5.933,
153
+ "eval_wer": 0.40875717527699906,
154
  "step": 900
155
  },
156
  {
157
  "epoch": 2.5380710659898478,
158
+ "grad_norm": 0.44797080755233765,
159
+ "learning_rate": 0.00015657754010695187,
160
+ "loss": 0.3811,
161
  "step": 1000
162
  },
163
  {
164
  "epoch": 2.5380710659898478,
165
+ "eval_loss": 0.2219884991645813,
166
+ "eval_runtime": 70.6408,
167
+ "eval_samples_per_second": 23.726,
168
+ "eval_steps_per_second": 5.931,
169
+ "eval_wer": 0.408022960886397,
170
  "step": 1000
171
  },
172
  {
173
  "epoch": 2.7918781725888326,
174
+ "grad_norm": 0.8320724964141846,
175
+ "learning_rate": 0.00014053475935828875,
176
+ "loss": 0.3705,
177
  "step": 1100
178
  },
179
  {
180
  "epoch": 2.7918781725888326,
181
+ "eval_loss": 0.22171413898468018,
182
+ "eval_runtime": 69.9602,
183
+ "eval_samples_per_second": 23.956,
184
+ "eval_steps_per_second": 5.989,
185
+ "eval_wer": 0.40975837671872917,
186
  "step": 1100
187
  },
188
  {
189
  "epoch": 3.045685279187817,
190
+ "grad_norm": 0.6590794324874878,
191
+ "learning_rate": 0.00012449197860962566,
192
+ "loss": 0.3604,
193
  "step": 1200
194
  },
195
  {
196
  "epoch": 3.045685279187817,
197
+ "eval_loss": 0.21949037909507751,
198
+ "eval_runtime": 69.9308,
199
+ "eval_samples_per_second": 23.967,
200
+ "eval_steps_per_second": 5.992,
201
+ "eval_wer": 0.4010812975570685,
202
  "step": 1200
203
  },
204
  {
205
  "epoch": 3.299492385786802,
206
+ "grad_norm": 0.6556515693664551,
207
+ "learning_rate": 0.00010844919786096256,
208
+ "loss": 0.3593,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 3.299492385786802,
213
+ "eval_loss": 0.21909502148628235,
214
+ "eval_runtime": 71.1369,
215
+ "eval_samples_per_second": 23.56,
216
+ "eval_steps_per_second": 5.89,
217
+ "eval_wer": 0.3984114270457883,
218
  "step": 1300
219
  },
220
  {
221
  "epoch": 3.553299492385787,
222
+ "grad_norm": 0.5768907070159912,
223
+ "learning_rate": 9.240641711229946e-05,
224
+ "loss": 0.3595,
225
  "step": 1400
226
  },
227
  {
228
  "epoch": 3.553299492385787,
229
+ "eval_loss": 0.2161073535680771,
230
+ "eval_runtime": 69.8416,
231
+ "eval_samples_per_second": 23.997,
232
+ "eval_steps_per_second": 5.999,
233
+ "eval_wer": 0.4010812975570685,
234
  "step": 1400
235
  },
236
  {
237
  "epoch": 3.8071065989847717,
238
+ "grad_norm": 1.000182867050171,
239
+ "learning_rate": 7.636363636363635e-05,
240
+ "loss": 0.3594,
241
  "step": 1500
242
  },
243
  {
244
  "epoch": 3.8071065989847717,
245
+ "eval_loss": 0.21695125102996826,
246
+ "eval_runtime": 69.6215,
247
+ "eval_samples_per_second": 24.073,
248
+ "eval_steps_per_second": 6.018,
249
+ "eval_wer": 0.4058203177145908,
250
  "step": 1500
251
  },
252
  {
253
  "epoch": 4.060913705583756,
254
+ "grad_norm": 0.5270859003067017,
255
+ "learning_rate": 6.032085561497326e-05,
256
+ "loss": 0.3635,
257
  "step": 1600
258
  },
259
  {
260
  "epoch": 4.060913705583756,
261
+ "eval_loss": 0.216335266828537,
262
+ "eval_runtime": 69.5286,
263
+ "eval_samples_per_second": 24.105,
264
+ "eval_steps_per_second": 6.026,
265
+ "eval_wer": 0.407956214123615,
266
  "step": 1600
267
  },
268
  {
269
  "epoch": 4.314720812182741,
270
+ "grad_norm": 1.7513196468353271,
271
+ "learning_rate": 4.427807486631015e-05,
272
+ "loss": 0.3511,
273
  "step": 1700
274
  },
275
  {
276
  "epoch": 4.314720812182741,
277
+ "eval_loss": 0.21683281660079956,
278
+ "eval_runtime": 70.5268,
279
+ "eval_samples_per_second": 23.764,
280
+ "eval_steps_per_second": 5.941,
281
+ "eval_wer": 0.4115605393138433,
282
  "step": 1700
283
  },
284
  {
285
  "epoch": 4.568527918781726,
286
+ "grad_norm": 1.0023202896118164,
287
+ "learning_rate": 2.8235294117647056e-05,
288
+ "loss": 0.3455,
289
  "step": 1800
290
  },
291
  {
292
  "epoch": 4.568527918781726,
293
+ "eval_loss": 0.21529528498649597,
294
+ "eval_runtime": 69.8807,
295
+ "eval_samples_per_second": 23.984,
296
+ "eval_steps_per_second": 5.996,
297
+ "eval_wer": 0.40308370044052866,
298
  "step": 1800
299
  },
300
  {
301
  "epoch": 4.822335025380711,
302
+ "grad_norm": 2.876286506652832,
303
+ "learning_rate": 1.2192513368983957e-05,
304
+ "loss": 0.3603,
305
  "step": 1900
306
  },
307
  {
308
  "epoch": 4.822335025380711,
309
+ "eval_loss": 0.2150741070508957,
310
+ "eval_runtime": 69.8864,
311
+ "eval_samples_per_second": 23.982,
312
+ "eval_steps_per_second": 5.995,
313
+ "eval_wer": 0.4005473234548124,
314
  "step": 1900
315
  },
316
  {
317
  "epoch": 5.0,
318
  "step": 1970,
319
  "total_flos": 1.9392924694598373e+19,
320
+ "train_loss": 1.2170672131068816,
321
+ "train_runtime": 4223.7671,
322
+ "train_samples_per_second": 7.457,
323
+ "train_steps_per_second": 0.466
324
  }
325
  ],
326
  "logging_steps": 100,