Priyanship commited on
Commit
c02e18b
·
verified ·
1 Parent(s): 4088bb4

End of training

Browse files
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 419.5198
20
- - Wer: 0.7614
21
- - Cer: 0.3089
22
 
23
  ## Model description
24
 
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 271.5626
20
+ - Wer: 0.4858
21
+ - Cer: 0.1540
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_cer": 0.15402855308007707,
4
+ "eval_loss": 271.56256103515625,
5
+ "eval_runtime": 26.2754,
6
+ "eval_samples": 890,
7
+ "eval_samples_per_second": 33.872,
8
+ "eval_steps_per_second": 4.263,
9
+ "eval_wer": 0.48579545454545453,
10
+ "total_flos": 3.6380056389804335e+19,
11
+ "train_loss": 870.7509072951121,
12
+ "train_runtime": 30399.5697,
13
+ "train_samples": 58040,
14
+ "train_samples_per_second": 114.554,
15
+ "train_steps_per_second": 7.161
16
+ }
eval--in domain_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_cer": 0.15402855308007707,
4
+ "eval_loss": 271.56256103515625,
5
+ "eval_runtime": 26.2754,
6
+ "eval_samples": 890,
7
+ "eval_samples_per_second": 33.872,
8
+ "eval_steps_per_second": 4.263,
9
+ "eval_wer": 0.48579545454545453
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "total_flos": 3.6380056389804335e+19,
4
+ "train_loss": 870.7509072951121,
5
+ "train_runtime": 30399.5697,
6
+ "train_samples": 58040,
7
+ "train_samples_per_second": 114.554,
8
+ "train_steps_per_second": 7.161
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4881628787878788,
3
+ "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-cont-pt-22k/outputs/shuff_100_no_idv/wr20/checkpoint-7256",
4
+ "epoch": 12.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43536,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 3081.265380859375,
14
+ "learning_rate": 4.1482910694597574e-05,
15
+ "loss": 2875.4683,
16
+ "step": 3628
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_cer": 0.17331917205947736,
21
+ "eval_loss": 287.1173400878906,
22
+ "eval_runtime": 25.5716,
23
+ "eval_samples_per_second": 34.804,
24
+ "eval_steps_per_second": 4.38,
25
+ "eval_wer": 0.5699179292929293,
26
+ "step": 3628
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 591.99072265625,
31
+ "learning_rate": 8.313809261300991e-05,
32
+ "loss": 758.2716,
33
+ "step": 7256
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_cer": 0.1550906486192758,
38
+ "eval_loss": 272.05938720703125,
39
+ "eval_runtime": 24.8059,
40
+ "eval_samples_per_second": 35.879,
41
+ "eval_steps_per_second": 4.515,
42
+ "eval_wer": 0.4881628787878788,
43
+ "step": 7256
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 477.86785888671875,
48
+ "learning_rate": 0.0001248047592796766,
49
+ "loss": 644.8381,
50
+ "step": 10884
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_cer": 0.1729486736155708,
55
+ "eval_loss": 290.3243713378906,
56
+ "eval_runtime": 25.3553,
57
+ "eval_samples_per_second": 35.101,
58
+ "eval_steps_per_second": 4.417,
59
+ "eval_wer": 0.506155303030303,
60
+ "step": 10884
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 748.2718505859375,
65
+ "learning_rate": 0.00016645994119808893,
66
+ "loss": 609.5405,
67
+ "step": 14512
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_cer": 0.18673121572889395,
72
+ "eval_loss": 257.9621887207031,
73
+ "eval_runtime": 24.6972,
74
+ "eval_samples_per_second": 36.037,
75
+ "eval_steps_per_second": 4.535,
76
+ "eval_wer": 0.5173611111111112,
77
+ "step": 14512
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 547.5361938476562,
82
+ "learning_rate": 0.0002081151231165013,
83
+ "loss": 606.3339,
84
+ "step": 18140
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_cer": 0.22798004248382156,
89
+ "eval_loss": 296.2996520996094,
90
+ "eval_runtime": 26.0793,
91
+ "eval_samples_per_second": 34.127,
92
+ "eval_steps_per_second": 4.295,
93
+ "eval_wer": 0.5667613636363636,
94
+ "step": 18140
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 480.7348937988281,
99
+ "learning_rate": 0.00024977030503491363,
100
+ "loss": 619.5554,
101
+ "step": 21768
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_cer": 0.2187916810749395,
106
+ "eval_loss": 324.2591247558594,
107
+ "eval_runtime": 24.5568,
108
+ "eval_samples_per_second": 36.243,
109
+ "eval_steps_per_second": 4.561,
110
+ "eval_wer": 0.5901199494949495,
111
+ "step": 21768
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 429.8402404785156,
116
+ "learning_rate": 0.00029141400220507167,
117
+ "loss": 646.4168,
118
+ "step": 25396
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_cer": 0.24396087536432348,
123
+ "eval_loss": 341.2834167480469,
124
+ "eval_runtime": 25.9793,
125
+ "eval_samples_per_second": 34.258,
126
+ "eval_steps_per_second": 4.311,
127
+ "eval_wer": 0.5953282828282829,
128
+ "step": 25396
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "grad_norm": 477.2936096191406,
133
+ "learning_rate": 0.0003330576993752297,
134
+ "loss": 667.3437,
135
+ "step": 29024
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_cer": 0.22546065306525712,
140
+ "eval_loss": 361.0743713378906,
141
+ "eval_runtime": 25.9631,
142
+ "eval_samples_per_second": 34.279,
143
+ "eval_steps_per_second": 4.314,
144
+ "eval_wer": 0.6283143939393939,
145
+ "step": 29024
146
+ },
147
+ {
148
+ "epoch": 9.0,
149
+ "grad_norm": 485.8271179199219,
150
+ "learning_rate": 0.00037471288129364204,
151
+ "loss": 693.7482,
152
+ "step": 32652
153
+ },
154
+ {
155
+ "epoch": 9.0,
156
+ "eval_cer": 0.268537272143457,
157
+ "eval_loss": 364.4578857421875,
158
+ "eval_runtime": 24.4432,
159
+ "eval_samples_per_second": 36.411,
160
+ "eval_steps_per_second": 4.582,
161
+ "eval_wer": 0.6597222222222222,
162
+ "step": 32652
163
+ },
164
+ {
165
+ "epoch": 10.0,
166
+ "grad_norm": 167.16514587402344,
167
+ "learning_rate": 0.0004163450937155457,
168
+ "loss": 733.3208,
169
+ "step": 36280
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "eval_cer": 0.28022032307464306,
174
+ "eval_loss": 409.5367736816406,
175
+ "eval_runtime": 26.9757,
176
+ "eval_samples_per_second": 32.993,
177
+ "eval_steps_per_second": 4.152,
178
+ "eval_wer": 0.7001262626262627,
179
+ "step": 36280
180
+ },
181
+ {
182
+ "epoch": 11.0,
183
+ "grad_norm": 588.0139770507812,
184
+ "learning_rate": 0.0004580002756339581,
185
+ "loss": 780.5633,
186
+ "step": 39908
187
+ },
188
+ {
189
+ "epoch": 11.0,
190
+ "eval_cer": 0.2849380032603863,
191
+ "eval_loss": 410.3908386230469,
192
+ "eval_runtime": 24.8976,
193
+ "eval_samples_per_second": 35.746,
194
+ "eval_steps_per_second": 4.498,
195
+ "eval_wer": 0.7154356060606061,
196
+ "step": 39908
197
+ },
198
+ {
199
+ "epoch": 12.0,
200
+ "grad_norm": 167.97271728515625,
201
+ "learning_rate": 0.0004996669423006247,
202
+ "loss": 813.6104,
203
+ "step": 43536
204
+ },
205
+ {
206
+ "epoch": 12.0,
207
+ "eval_cer": 0.3088722027367485,
208
+ "eval_loss": 419.51983642578125,
209
+ "eval_runtime": 27.7709,
210
+ "eval_samples_per_second": 32.048,
211
+ "eval_steps_per_second": 4.033,
212
+ "eval_wer": 0.7613636363636364,
213
+ "step": 43536
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "step": 43536,
218
+ "total_flos": 3.6380056389804335e+19,
219
+ "train_loss": 870.7509072951121,
220
+ "train_runtime": 30399.5697,
221
+ "train_samples_per_second": 114.554,
222
+ "train_steps_per_second": 7.161
223
+ }
224
+ ],
225
+ "logging_steps": 500,
226
+ "max_steps": 217680,
227
+ "num_input_tokens_seen": 0,
228
+ "num_train_epochs": 60,
229
+ "save_steps": 500,
230
+ "stateful_callbacks": {
231
+ "EarlyStoppingCallback": {
232
+ "args": {
233
+ "early_stopping_patience": 10,
234
+ "early_stopping_threshold": 0.0
235
+ },
236
+ "attributes": {
237
+ "early_stopping_patience_counter": 10
238
+ }
239
+ },
240
+ "TrainerControl": {
241
+ "args": {
242
+ "should_epoch_stop": false,
243
+ "should_evaluate": false,
244
+ "should_log": false,
245
+ "should_save": true,
246
+ "should_training_stop": true
247
+ },
248
+ "attributes": {}
249
+ }
250
+ },
251
+ "total_flos": 3.6380056389804335e+19,
252
+ "train_batch_size": 16,
253
+ "trial_name": null,
254
+ "trial_params": null
255
+ }