csikasote commited on
Commit
e6f933b
·
verified ·
1 Parent(s): a4f6e94

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-nyagen-balanced-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1803
22
- - Wer: 0.2544
23
 
24
  ## Model description
25
 
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - nyagen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-nyagen-balanced-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the NYAGEN - NYA dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.1803
25
+ - Wer: 0.2549
26
 
27
  ## Model description
28
 
adapter.nya.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bb14171de66a5e31753888c936ff5d4bf731c422bb11be5f8c96b2f9926be7a
3
+ size 8798532
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.314917127071823,
3
+ "eval_loss": 0.18031810224056244,
4
+ "eval_runtime": 15.2431,
5
+ "eval_samples": 169,
6
+ "eval_samples_per_second": 11.087,
7
+ "eval_steps_per_second": 2.821,
8
+ "eval_wer": 0.25486645540968766,
9
+ "total_flos": 4.522297158190472e+18,
10
+ "train_loss": 0.9028558111190796,
11
+ "train_runtime": 1060.5768,
12
+ "train_samples": 1445,
13
+ "train_samples_per_second": 40.874,
14
+ "train_steps_per_second": 10.24
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.314917127071823,
3
+ "eval_loss": 0.18031810224056244,
4
+ "eval_runtime": 15.2431,
5
+ "eval_samples": 169,
6
+ "eval_samples_per_second": 11.087,
7
+ "eval_steps_per_second": 2.821,
8
+ "eval_wer": 0.25486645540968766
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.314917127071823,
3
+ "total_flos": 4.522297158190472e+18,
4
+ "train_loss": 0.9028558111190796,
5
+ "train_runtime": 1060.5768,
6
+ "train_samples": 1445,
7
+ "train_samples_per_second": 40.874,
8
+ "train_steps_per_second": 10.24
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.17163243889808655,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-balanced-model/checkpoint-900",
4
+ "epoch": 3.314917127071823,
5
+ "eval_steps": 100,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.27624309392265195,
13
+ "grad_norm": 4.279232025146484,
14
+ "learning_rate": 0.00028799999999999995,
15
+ "loss": 7.181,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.27624309392265195,
20
+ "eval_loss": 0.6055206656455994,
21
+ "eval_runtime": 16.1135,
22
+ "eval_samples_per_second": 10.488,
23
+ "eval_steps_per_second": 2.669,
24
+ "eval_wer": 0.5246717971933001,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.5524861878453039,
29
+ "grad_norm": 2.128457546234131,
30
+ "learning_rate": 0.00029735130111524163,
31
+ "loss": 0.5071,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.5524861878453039,
36
+ "eval_loss": 0.24523495137691498,
37
+ "eval_runtime": 15.4101,
38
+ "eval_samples_per_second": 10.967,
39
+ "eval_steps_per_second": 2.79,
40
+ "eval_wer": 0.3594386600271616,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.8287292817679558,
45
+ "grad_norm": 1.3095550537109375,
46
+ "learning_rate": 0.0002945631970260223,
47
+ "loss": 0.3791,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.8287292817679558,
52
+ "eval_loss": 0.21587276458740234,
53
+ "eval_runtime": 15.3962,
54
+ "eval_samples_per_second": 10.977,
55
+ "eval_steps_per_second": 2.793,
56
+ "eval_wer": 0.3232231779085559,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 1.1049723756906078,
61
+ "grad_norm": 4.117229461669922,
62
+ "learning_rate": 0.0002917750929368029,
63
+ "loss": 0.3464,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 1.1049723756906078,
68
+ "eval_loss": 0.20587413012981415,
69
+ "eval_runtime": 15.3374,
70
+ "eval_samples_per_second": 11.019,
71
+ "eval_steps_per_second": 2.804,
72
+ "eval_wer": 0.3046627433227705,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.3812154696132597,
77
+ "grad_norm": 1.0849329233169556,
78
+ "learning_rate": 0.0002889869888475836,
79
+ "loss": 0.3326,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 1.3812154696132597,
84
+ "eval_loss": 0.19188211858272552,
85
+ "eval_runtime": 15.5114,
86
+ "eval_samples_per_second": 10.895,
87
+ "eval_steps_per_second": 2.772,
88
+ "eval_wer": 0.29425079221367134,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 1.6574585635359116,
93
+ "grad_norm": 1.2119916677474976,
94
+ "learning_rate": 0.00028619888475836427,
95
+ "loss": 0.322,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 1.6574585635359116,
100
+ "eval_loss": 0.18680231273174286,
101
+ "eval_runtime": 15.4576,
102
+ "eval_samples_per_second": 10.933,
103
+ "eval_steps_per_second": 2.782,
104
+ "eval_wer": 0.28610230873698506,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.9337016574585635,
109
+ "grad_norm": 1.408340573310852,
110
+ "learning_rate": 0.00028341078066914494,
111
+ "loss": 0.3025,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 1.9337016574585635,
116
+ "eval_loss": 0.1849866360425949,
117
+ "eval_runtime": 15.3489,
118
+ "eval_samples_per_second": 11.011,
119
+ "eval_steps_per_second": 2.801,
120
+ "eval_wer": 0.2901765504753282,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 2.2099447513812156,
125
+ "grad_norm": 0.6426145434379578,
126
+ "learning_rate": 0.0002806226765799256,
127
+ "loss": 0.2939,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 2.2099447513812156,
132
+ "eval_loss": 0.17766940593719482,
133
+ "eval_runtime": 15.4593,
134
+ "eval_samples_per_second": 10.932,
135
+ "eval_steps_per_second": 2.781,
136
+ "eval_wer": 0.2698053417836125,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 2.4861878453038675,
141
+ "grad_norm": 0.5668926239013672,
142
+ "learning_rate": 0.0002778345724907063,
143
+ "loss": 0.2971,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.4861878453038675,
148
+ "eval_loss": 0.17163243889808655,
149
+ "eval_runtime": 15.5806,
150
+ "eval_samples_per_second": 10.847,
151
+ "eval_steps_per_second": 2.76,
152
+ "eval_wer": 0.2675418741511996,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 2.7624309392265194,
157
+ "grad_norm": 0.42915207147598267,
158
+ "learning_rate": 0.00027504646840148696,
159
+ "loss": 0.2787,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 2.7624309392265194,
164
+ "eval_loss": 0.17503149807453156,
165
+ "eval_runtime": 15.5493,
166
+ "eval_samples_per_second": 10.869,
167
+ "eval_steps_per_second": 2.765,
168
+ "eval_wer": 0.27161611588954276,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 3.0386740331491713,
173
+ "grad_norm": 0.994978129863739,
174
+ "learning_rate": 0.00027225836431226763,
175
+ "loss": 0.32,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 3.0386740331491713,
180
+ "eval_loss": 0.1725001335144043,
181
+ "eval_runtime": 15.57,
182
+ "eval_samples_per_second": 10.854,
183
+ "eval_steps_per_second": 2.762,
184
+ "eval_wer": 0.27342688999547304,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 3.314917127071823,
189
+ "grad_norm": 0.6422222852706909,
190
+ "learning_rate": 0.0002694702602230483,
191
+ "loss": 0.2738,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 3.314917127071823,
196
+ "eval_loss": 0.18027880787849426,
197
+ "eval_runtime": 15.2933,
198
+ "eval_samples_per_second": 11.051,
199
+ "eval_steps_per_second": 2.812,
200
+ "eval_wer": 0.2544137618832051,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 3.314917127071823,
205
+ "step": 1200,
206
+ "total_flos": 4.522297158190472e+18,
207
+ "train_loss": 0.9028558111190796,
208
+ "train_runtime": 1060.5768,
209
+ "train_samples_per_second": 40.874,
210
+ "train_steps_per_second": 10.24
211
+ }
212
+ ],
213
+ "logging_steps": 100,
214
+ "max_steps": 10860,
215
+ "num_input_tokens_seen": 0,
216
+ "num_train_epochs": 30,
217
+ "save_steps": 400,
218
+ "stateful_callbacks": {
219
+ "EarlyStoppingCallback": {
220
+ "args": {
221
+ "early_stopping_patience": 3,
222
+ "early_stopping_threshold": 0.0
223
+ },
224
+ "attributes": {
225
+ "early_stopping_patience_counter": 3
226
+ }
227
+ },
228
+ "TrainerControl": {
229
+ "args": {
230
+ "should_epoch_stop": false,
231
+ "should_evaluate": false,
232
+ "should_log": false,
233
+ "should_save": true,
234
+ "should_training_stop": true
235
+ },
236
+ "attributes": {}
237
+ }
238
+ },
239
+ "total_flos": 4.522297158190472e+18,
240
+ "train_batch_size": 4,
241
+ "trial_name": null,
242
+ "trial_params": null
243
+ }