csikasote commited on
Commit
2d6c694
·
verified ·
1 Parent(s): abc7467

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-all-bemgen-combined-fusion
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.2150
22
- - Wer: 0.3920
23
 
24
  ## Model description
25
 
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - bemgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-all-bemgen-combined-fusion
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the BEMGEN - ADA dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.2155
25
+ - Wer: 0.3934
26
 
27
  ## Model description
28
 
adapter.ada.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce3059e642d3e94368a6a85be2615ff63d73c69951820ae4d10c4f861519095
3
+ size 8798532
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.21950095891952515,
4
- "eval_runtime": 69.6984,
5
  "eval_samples": 1676,
6
- "eval_samples_per_second": 24.046,
7
- "eval_steps_per_second": 6.012,
8
- "eval_wer": 0.4012815378454145,
9
  "total_flos": 1.9392924694598373e+19,
10
- "train_loss": 1.2170672131068816,
11
- "train_runtime": 4223.7671,
12
  "train_samples": 6299,
13
- "train_samples_per_second": 7.457,
14
- "train_steps_per_second": 0.466
15
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.21548815071582794,
4
+ "eval_runtime": 71.9815,
5
  "eval_samples": 1676,
6
+ "eval_samples_per_second": 23.284,
7
+ "eval_steps_per_second": 5.821,
8
+ "eval_wer": 0.3934054198371379,
9
  "total_flos": 1.9392924694598373e+19,
10
+ "train_loss": 1.2152001163076023,
11
+ "train_runtime": 4269.1717,
12
  "train_samples": 6299,
13
+ "train_samples_per_second": 7.377,
14
+ "train_steps_per_second": 0.461
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 0.21950095891952515,
4
- "eval_runtime": 69.6984,
5
  "eval_samples": 1676,
6
- "eval_samples_per_second": 24.046,
7
- "eval_steps_per_second": 6.012,
8
- "eval_wer": 0.4012815378454145
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 0.21548815071582794,
4
+ "eval_runtime": 71.9815,
5
  "eval_samples": 1676,
6
+ "eval_samples_per_second": 23.284,
7
+ "eval_steps_per_second": 5.821,
8
+ "eval_wer": 0.3934054198371379
9
  }
runs/Aug12_14-49-50_srvrocgpu014.uct.ac.za/events.out.tfevents.1755007778.srvrocgpu014.uct.ac.za.314165.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7779914368d0aed4bef47c1109dbe5dade3c61ac8fa246d8e23c4216616cbaf6
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.9392924694598373e+19,
4
- "train_loss": 1.2170672131068816,
5
- "train_runtime": 4223.7671,
6
  "train_samples": 6299,
7
- "train_samples_per_second": 7.457,
8
- "train_steps_per_second": 0.466
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.9392924694598373e+19,
4
+ "train_loss": 1.2152001163076023,
5
+ "train_runtime": 4269.1717,
6
  "train_samples": 6299,
7
+ "train_samples_per_second": 7.377,
8
+ "train_steps_per_second": 0.461
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_global_step": 1900,
3
- "best_metric": 0.2150741070508957,
4
- "best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/mms-1b-all-bemgen-combined-fusion/checkpoint-1200",
5
  "epoch": 5.0,
6
  "eval_steps": 100,
7
  "global_step": 1970,
@@ -11,316 +11,316 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.25380710659898476,
14
- "grad_norm": 32.2292594909668,
15
- "learning_rate": 0.000285,
16
- "loss": 7.7625,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.25380710659898476,
21
- "eval_loss": 5.588669300079346,
22
- "eval_runtime": 69.3281,
23
- "eval_samples_per_second": 24.175,
24
- "eval_steps_per_second": 6.044,
25
- "eval_wer": 0.999933253237218,
26
  "step": 100
27
  },
28
  {
29
  "epoch": 0.5076142131979695,
30
- "grad_norm": 10.083724975585938,
31
  "learning_rate": 0.00028475935828877006,
32
- "loss": 4.8937,
33
  "step": 200
34
  },
35
  {
36
  "epoch": 0.5076142131979695,
37
- "eval_loss": 4.956684589385986,
38
- "eval_runtime": 68.9661,
39
- "eval_samples_per_second": 24.302,
40
- "eval_steps_per_second": 6.075,
41
  "eval_wer": 1.0,
42
  "step": 200
43
  },
44
  {
45
  "epoch": 0.7614213197969543,
46
- "grad_norm": 5.518433094024658,
47
  "learning_rate": 0.00026871657754010695,
48
- "loss": 4.2312,
49
  "step": 300
50
  },
51
  {
52
  "epoch": 0.7614213197969543,
53
- "eval_loss": 3.7783877849578857,
54
- "eval_runtime": 68.6885,
55
- "eval_samples_per_second": 24.4,
56
- "eval_steps_per_second": 6.1,
57
- "eval_wer": 0.99966626618609,
58
  "step": 300
59
  },
60
  {
61
  "epoch": 1.015228426395939,
62
- "grad_norm": 0.7774800062179565,
63
- "learning_rate": 0.00025283422459893045,
64
- "loss": 1.194,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 1.015228426395939,
69
- "eval_loss": 0.25593841075897217,
70
- "eval_runtime": 68.6022,
71
- "eval_samples_per_second": 24.431,
72
- "eval_steps_per_second": 6.108,
73
- "eval_wer": 0.46515818982779333,
74
  "step": 400
75
  },
76
  {
77
  "epoch": 1.2690355329949239,
78
- "grad_norm": 0.8243533372879028,
79
- "learning_rate": 0.00023679144385026736,
80
- "loss": 0.4372,
81
  "step": 500
82
  },
83
  {
84
  "epoch": 1.2690355329949239,
85
- "eval_loss": 0.2409835159778595,
86
- "eval_runtime": 70.2102,
87
- "eval_samples_per_second": 23.871,
88
- "eval_steps_per_second": 5.968,
89
- "eval_wer": 0.4499399279134962,
90
  "step": 500
91
  },
92
  {
93
  "epoch": 1.5228426395939088,
94
- "grad_norm": 0.6686663031578064,
95
- "learning_rate": 0.00022074866310160427,
96
- "loss": 0.4276,
97
  "step": 600
98
  },
99
  {
100
  "epoch": 1.5228426395939088,
101
- "eval_loss": 0.2348148673772812,
102
- "eval_runtime": 69.4735,
103
- "eval_samples_per_second": 24.124,
104
- "eval_steps_per_second": 6.031,
105
- "eval_wer": 0.4280469897209985,
106
  "step": 600
107
  },
108
  {
109
  "epoch": 1.7766497461928934,
110
- "grad_norm": 1.5514487028121948,
111
- "learning_rate": 0.00020470588235294116,
112
- "loss": 0.4003,
113
  "step": 700
114
  },
115
  {
116
  "epoch": 1.7766497461928934,
117
- "eval_loss": 0.2333621233701706,
118
- "eval_runtime": 69.3321,
119
- "eval_samples_per_second": 24.174,
120
- "eval_steps_per_second": 6.043,
121
- "eval_wer": 0.44132959551461753,
122
  "step": 700
123
  },
124
  {
125
  "epoch": 2.030456852791878,
126
- "grad_norm": 4.034536838531494,
127
- "learning_rate": 0.00018866310160427807,
128
- "loss": 0.3853,
129
  "step": 800
130
  },
131
  {
132
  "epoch": 2.030456852791878,
133
- "eval_loss": 0.22295093536376953,
134
- "eval_runtime": 69.6283,
135
- "eval_samples_per_second": 24.071,
136
- "eval_steps_per_second": 6.018,
137
- "eval_wer": 0.4092911493792551,
138
  "step": 800
139
  },
140
  {
141
  "epoch": 2.284263959390863,
142
- "grad_norm": 0.36776381731033325,
143
- "learning_rate": 0.00017262032085561496,
144
- "loss": 0.3852,
145
  "step": 900
146
  },
147
  {
148
  "epoch": 2.284263959390863,
149
- "eval_loss": 0.22227536141872406,
150
- "eval_runtime": 70.6197,
151
- "eval_samples_per_second": 23.733,
152
- "eval_steps_per_second": 5.933,
153
- "eval_wer": 0.40875717527699906,
154
  "step": 900
155
  },
156
  {
157
  "epoch": 2.5380710659898478,
158
- "grad_norm": 0.44797080755233765,
159
- "learning_rate": 0.00015657754010695187,
160
- "loss": 0.3811,
161
  "step": 1000
162
  },
163
  {
164
  "epoch": 2.5380710659898478,
165
- "eval_loss": 0.2219884991645813,
166
- "eval_runtime": 70.6408,
167
- "eval_samples_per_second": 23.726,
168
- "eval_steps_per_second": 5.931,
169
- "eval_wer": 0.408022960886397,
170
  "step": 1000
171
  },
172
  {
173
  "epoch": 2.7918781725888326,
174
- "grad_norm": 0.8320724964141846,
175
- "learning_rate": 0.00014053475935828875,
176
- "loss": 0.3705,
177
  "step": 1100
178
  },
179
  {
180
  "epoch": 2.7918781725888326,
181
- "eval_loss": 0.22171413898468018,
182
- "eval_runtime": 69.9602,
183
- "eval_samples_per_second": 23.956,
184
- "eval_steps_per_second": 5.989,
185
- "eval_wer": 0.40975837671872917,
186
  "step": 1100
187
  },
188
  {
189
  "epoch": 3.045685279187817,
190
- "grad_norm": 0.6590794324874878,
191
- "learning_rate": 0.00012449197860962566,
192
- "loss": 0.3604,
193
  "step": 1200
194
  },
195
  {
196
  "epoch": 3.045685279187817,
197
- "eval_loss": 0.21949037909507751,
198
- "eval_runtime": 69.9308,
199
- "eval_samples_per_second": 23.967,
200
- "eval_steps_per_second": 5.992,
201
- "eval_wer": 0.4010812975570685,
202
  "step": 1200
203
  },
204
  {
205
  "epoch": 3.299492385786802,
206
- "grad_norm": 0.6556515693664551,
207
- "learning_rate": 0.00010844919786096256,
208
- "loss": 0.3593,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 3.299492385786802,
213
- "eval_loss": 0.21909502148628235,
214
- "eval_runtime": 71.1369,
215
- "eval_samples_per_second": 23.56,
216
- "eval_steps_per_second": 5.89,
217
- "eval_wer": 0.3984114270457883,
218
  "step": 1300
219
  },
220
  {
221
  "epoch": 3.553299492385787,
222
- "grad_norm": 0.5768907070159912,
223
- "learning_rate": 9.240641711229946e-05,
224
- "loss": 0.3595,
225
  "step": 1400
226
  },
227
  {
228
  "epoch": 3.553299492385787,
229
- "eval_loss": 0.2161073535680771,
230
- "eval_runtime": 69.8416,
231
- "eval_samples_per_second": 23.997,
232
- "eval_steps_per_second": 5.999,
233
- "eval_wer": 0.4010812975570685,
234
  "step": 1400
235
  },
236
  {
237
  "epoch": 3.8071065989847717,
238
- "grad_norm": 1.000182867050171,
239
- "learning_rate": 7.636363636363635e-05,
240
- "loss": 0.3594,
241
  "step": 1500
242
  },
243
  {
244
  "epoch": 3.8071065989847717,
245
- "eval_loss": 0.21695125102996826,
246
- "eval_runtime": 69.6215,
247
- "eval_samples_per_second": 24.073,
248
- "eval_steps_per_second": 6.018,
249
- "eval_wer": 0.4058203177145908,
250
  "step": 1500
251
  },
252
  {
253
  "epoch": 4.060913705583756,
254
- "grad_norm": 0.5270859003067017,
255
- "learning_rate": 6.032085561497326e-05,
256
- "loss": 0.3635,
257
  "step": 1600
258
  },
259
  {
260
  "epoch": 4.060913705583756,
261
- "eval_loss": 0.216335266828537,
262
- "eval_runtime": 69.5286,
263
- "eval_samples_per_second": 24.105,
264
- "eval_steps_per_second": 6.026,
265
- "eval_wer": 0.407956214123615,
266
  "step": 1600
267
  },
268
  {
269
  "epoch": 4.314720812182741,
270
- "grad_norm": 1.7513196468353271,
271
- "learning_rate": 4.427807486631015e-05,
272
- "loss": 0.3511,
273
  "step": 1700
274
  },
275
  {
276
  "epoch": 4.314720812182741,
277
- "eval_loss": 0.21683281660079956,
278
- "eval_runtime": 70.5268,
279
- "eval_samples_per_second": 23.764,
280
- "eval_steps_per_second": 5.941,
281
- "eval_wer": 0.4115605393138433,
282
  "step": 1700
283
  },
284
  {
285
  "epoch": 4.568527918781726,
286
- "grad_norm": 1.0023202896118164,
287
- "learning_rate": 2.8235294117647056e-05,
288
- "loss": 0.3455,
289
  "step": 1800
290
  },
291
  {
292
  "epoch": 4.568527918781726,
293
- "eval_loss": 0.21529528498649597,
294
- "eval_runtime": 69.8807,
295
- "eval_samples_per_second": 23.984,
296
- "eval_steps_per_second": 5.996,
297
- "eval_wer": 0.40308370044052866,
298
  "step": 1800
299
  },
300
  {
301
  "epoch": 4.822335025380711,
302
- "grad_norm": 2.876286506652832,
303
- "learning_rate": 1.2192513368983957e-05,
304
- "loss": 0.3603,
305
  "step": 1900
306
  },
307
  {
308
  "epoch": 4.822335025380711,
309
- "eval_loss": 0.2150741070508957,
310
- "eval_runtime": 69.8864,
311
- "eval_samples_per_second": 23.982,
312
- "eval_steps_per_second": 5.995,
313
- "eval_wer": 0.4005473234548124,
314
  "step": 1900
315
  },
316
  {
317
  "epoch": 5.0,
318
  "step": 1970,
319
  "total_flos": 1.9392924694598373e+19,
320
- "train_loss": 1.2170672131068816,
321
- "train_runtime": 4223.7671,
322
- "train_samples_per_second": 7.457,
323
- "train_steps_per_second": 0.466
324
  }
325
  ],
326
  "logging_steps": 100,
 
1
  {
2
  "best_global_step": 1900,
3
+ "best_metric": 0.21497154235839844,
4
+ "best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/mms-1b-all-bemgen-combined-fusion/checkpoint-1600",
5
  "epoch": 5.0,
6
  "eval_steps": 100,
7
  "global_step": 1970,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.25380710659898476,
14
+ "grad_norm": 32.639888763427734,
15
+ "learning_rate": 0.00028799999999999995,
16
+ "loss": 7.7249,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.25380710659898476,
21
+ "eval_loss": 5.611884117126465,
22
+ "eval_runtime": 69.791,
23
+ "eval_samples_per_second": 24.015,
24
+ "eval_steps_per_second": 6.004,
25
+ "eval_wer": 0.99966626618609,
26
  "step": 100
27
  },
28
  {
29
  "epoch": 0.5076142131979695,
30
+ "grad_norm": 11.450201988220215,
31
  "learning_rate": 0.00028475935828877006,
32
+ "loss": 4.8515,
33
  "step": 200
34
  },
35
  {
36
  "epoch": 0.5076142131979695,
37
+ "eval_loss": 5.008387565612793,
38
+ "eval_runtime": 70.0202,
39
+ "eval_samples_per_second": 23.936,
40
+ "eval_steps_per_second": 5.984,
41
  "eval_wer": 1.0,
42
  "step": 200
43
  },
44
  {
45
  "epoch": 0.7614213197969543,
46
+ "grad_norm": 2.9968459606170654,
47
  "learning_rate": 0.00026871657754010695,
48
+ "loss": 4.2325,
49
  "step": 300
50
  },
51
  {
52
  "epoch": 0.7614213197969543,
53
+ "eval_loss": 3.5047295093536377,
54
+ "eval_runtime": 69.8473,
55
+ "eval_samples_per_second": 23.995,
56
+ "eval_steps_per_second": 5.999,
57
+ "eval_wer": 0.999933253237218,
58
  "step": 300
59
  },
60
  {
61
  "epoch": 1.015228426395939,
62
+ "grad_norm": 0.4712130129337311,
63
+ "learning_rate": 0.00025267379679144383,
64
+ "loss": 1.2178,
65
  "step": 400
66
  },
67
  {
68
  "epoch": 1.015228426395939,
69
+ "eval_loss": 0.2545737326145172,
70
+ "eval_runtime": 70.1278,
71
+ "eval_samples_per_second": 23.899,
72
+ "eval_steps_per_second": 5.975,
73
+ "eval_wer": 0.4574823121078628,
74
  "step": 400
75
  },
76
  {
77
  "epoch": 1.2690355329949239,
78
+ "grad_norm": 0.5717469453811646,
79
+ "learning_rate": 0.00023663101604278074,
80
+ "loss": 0.4385,
81
  "step": 500
82
  },
83
  {
84
  "epoch": 1.2690355329949239,
85
+ "eval_loss": 0.23700203001499176,
86
+ "eval_runtime": 71.5732,
87
+ "eval_samples_per_second": 23.417,
88
+ "eval_steps_per_second": 5.854,
89
+ "eval_wer": 0.42170604725670807,
90
  "step": 500
91
  },
92
  {
93
  "epoch": 1.5228426395939088,
94
+ "grad_norm": 0.5493173003196716,
95
+ "learning_rate": 0.00022058823529411765,
96
+ "loss": 0.4282,
97
  "step": 600
98
  },
99
  {
100
  "epoch": 1.5228426395939088,
101
+ "eval_loss": 0.23334264755249023,
102
+ "eval_runtime": 70.7242,
103
+ "eval_samples_per_second": 23.698,
104
+ "eval_steps_per_second": 5.924,
105
+ "eval_wer": 0.42984915231611265,
106
  "step": 600
107
  },
108
  {
109
  "epoch": 1.7766497461928934,
110
+ "grad_norm": 0.7042660117149353,
111
+ "learning_rate": 0.0002045454545454545,
112
+ "loss": 0.4004,
113
  "step": 700
114
  },
115
  {
116
  "epoch": 1.7766497461928934,
117
+ "eval_loss": 0.233428955078125,
118
+ "eval_runtime": 70.5744,
119
+ "eval_samples_per_second": 23.748,
120
+ "eval_steps_per_second": 5.937,
121
+ "eval_wer": 0.4421305566680016,
122
  "step": 700
123
  },
124
  {
125
  "epoch": 2.030456852791878,
126
+ "grad_norm": 0.7014455199241638,
127
+ "learning_rate": 0.00018850267379679142,
128
+ "loss": 0.3883,
129
  "step": 800
130
  },
131
  {
132
  "epoch": 2.030456852791878,
133
+ "eval_loss": 0.2236906737089157,
134
+ "eval_runtime": 70.3928,
135
+ "eval_samples_per_second": 23.809,
136
+ "eval_steps_per_second": 5.952,
137
+ "eval_wer": 0.4049526097984248,
138
  "step": 800
139
  },
140
  {
141
  "epoch": 2.284263959390863,
142
+ "grad_norm": 0.39537644386291504,
143
+ "learning_rate": 0.0001724598930481283,
144
+ "loss": 0.3884,
145
  "step": 900
146
  },
147
  {
148
  "epoch": 2.284263959390863,
149
+ "eval_loss": 0.22256948053836823,
150
+ "eval_runtime": 71.8539,
151
+ "eval_samples_per_second": 23.325,
152
+ "eval_steps_per_second": 5.831,
153
+ "eval_wer": 0.40835669470030705,
154
  "step": 900
155
  },
156
  {
157
  "epoch": 2.5380710659898478,
158
+ "grad_norm": 0.5684704780578613,
159
+ "learning_rate": 0.00015641711229946522,
160
+ "loss": 0.3827,
161
  "step": 1000
162
  },
163
  {
164
  "epoch": 2.5380710659898478,
165
+ "eval_loss": 0.22316201031208038,
166
+ "eval_runtime": 71.0137,
167
+ "eval_samples_per_second": 23.601,
168
+ "eval_steps_per_second": 5.9,
169
+ "eval_wer": 0.4066212788679749,
170
  "step": 1000
171
  },
172
  {
173
  "epoch": 2.7918781725888326,
174
+ "grad_norm": 0.5047805905342102,
175
+ "learning_rate": 0.00014037433155080213,
176
+ "loss": 0.3718,
177
  "step": 1100
178
  },
179
  {
180
  "epoch": 2.7918781725888326,
181
+ "eval_loss": 0.22224539518356323,
182
+ "eval_runtime": 70.0209,
183
+ "eval_samples_per_second": 23.936,
184
+ "eval_steps_per_second": 5.984,
185
+ "eval_wer": 0.40421839540782273,
186
  "step": 1100
187
  },
188
  {
189
  "epoch": 3.045685279187817,
190
+ "grad_norm": 0.389790803194046,
191
+ "learning_rate": 0.00012433155080213902,
192
+ "loss": 0.3619,
193
  "step": 1200
194
  },
195
  {
196
  "epoch": 3.045685279187817,
197
+ "eval_loss": 0.21895790100097656,
198
+ "eval_runtime": 69.1851,
199
+ "eval_samples_per_second": 24.225,
200
+ "eval_steps_per_second": 6.056,
201
+ "eval_wer": 0.39587505006007206,
202
  "step": 1200
203
  },
204
  {
205
  "epoch": 3.299492385786802,
206
+ "grad_norm": 0.4273822605609894,
207
+ "learning_rate": 0.00010828877005347593,
208
+ "loss": 0.3609,
209
  "step": 1300
210
  },
211
  {
212
  "epoch": 3.299492385786802,
213
+ "eval_loss": 0.21885482966899872,
214
+ "eval_runtime": 72.5472,
215
+ "eval_samples_per_second": 23.102,
216
+ "eval_steps_per_second": 5.776,
217
+ "eval_wer": 0.3921372313442798,
218
  "step": 1300
219
  },
220
  {
221
  "epoch": 3.553299492385787,
222
+ "grad_norm": 0.4862494170665741,
223
+ "learning_rate": 9.224598930481283e-05,
224
+ "loss": 0.3579,
225
  "step": 1400
226
  },
227
  {
228
  "epoch": 3.553299492385787,
229
+ "eval_loss": 0.21627625823020935,
230
+ "eval_runtime": 71.1133,
231
+ "eval_samples_per_second": 23.568,
232
+ "eval_steps_per_second": 5.892,
233
+ "eval_wer": 0.39774395941796825,
234
  "step": 1400
235
  },
236
  {
237
  "epoch": 3.8071065989847717,
238
+ "grad_norm": 0.4581854045391083,
239
+ "learning_rate": 7.620320855614973e-05,
240
+ "loss": 0.3602,
241
  "step": 1500
242
  },
243
  {
244
  "epoch": 3.8071065989847717,
245
+ "eval_loss": 0.21612538397312164,
246
+ "eval_runtime": 71.2145,
247
+ "eval_samples_per_second": 23.535,
248
+ "eval_steps_per_second": 5.884,
249
+ "eval_wer": 0.3991456414363903,
250
  "step": 1500
251
  },
252
  {
253
  "epoch": 4.060913705583756,
254
+ "grad_norm": 1.5124748945236206,
255
+ "learning_rate": 6.016042780748663e-05,
256
+ "loss": 0.3663,
257
  "step": 1600
258
  },
259
  {
260
  "epoch": 4.060913705583756,
261
+ "eval_loss": 0.2154930680990219,
262
+ "eval_runtime": 71.5979,
263
+ "eval_samples_per_second": 23.409,
264
+ "eval_steps_per_second": 5.852,
265
+ "eval_wer": 0.3935389133627019,
266
  "step": 1600
267
  },
268
  {
269
  "epoch": 4.314720812182741,
270
+ "grad_norm": 0.6621213555335999,
271
+ "learning_rate": 4.4117647058823526e-05,
272
+ "loss": 0.3525,
273
  "step": 1700
274
  },
275
  {
276
  "epoch": 4.314720812182741,
277
+ "eval_loss": 0.2160252183675766,
278
+ "eval_runtime": 73.0283,
279
+ "eval_samples_per_second": 22.95,
280
+ "eval_steps_per_second": 5.738,
281
+ "eval_wer": 0.3961420371112001,
282
  "step": 1700
283
  },
284
  {
285
  "epoch": 4.568527918781726,
286
+ "grad_norm": 0.40791934728622437,
287
+ "learning_rate": 2.8074866310160424e-05,
288
+ "loss": 0.3435,
289
  "step": 1800
290
  },
291
  {
292
  "epoch": 4.568527918781726,
293
+ "eval_loss": 0.21518246829509735,
294
+ "eval_runtime": 71.6347,
295
+ "eval_samples_per_second": 23.396,
296
+ "eval_steps_per_second": 5.849,
297
+ "eval_wer": 0.3956080630089441,
298
  "step": 1800
299
  },
300
  {
301
  "epoch": 4.822335025380711,
302
+ "grad_norm": 0.7308972477912903,
303
+ "learning_rate": 1.2032085561497326e-05,
304
+ "loss": 0.3626,
305
  "step": 1900
306
  },
307
  {
308
  "epoch": 4.822335025380711,
309
+ "eval_loss": 0.21497154235839844,
310
+ "eval_runtime": 72.1344,
311
+ "eval_samples_per_second": 23.234,
312
+ "eval_steps_per_second": 5.809,
313
+ "eval_wer": 0.3920037378187158,
314
  "step": 1900
315
  },
316
  {
317
  "epoch": 5.0,
318
  "step": 1970,
319
  "total_flos": 1.9392924694598373e+19,
320
+ "train_loss": 1.2152001163076023,
321
+ "train_runtime": 4269.1717,
322
+ "train_samples_per_second": 7.377,
323
+ "train_steps_per_second": 0.461
324
  }
325
  ],
326
  "logging_steps": 100,