Priyanship commited on
Commit
717cd93
·
verified ·
1 Parent(s): bdde839

End of training

Browse files
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 418.4753
20
- - Wer: 0.7314
21
- - Cer: 0.2984
22
 
23
  ## Model description
24
 
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 279.1175
20
+ - Wer: 0.5077
21
+ - Cer: 0.1722
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_cer": 0.1721829768314973,
4
+ "eval_loss": 279.11749267578125,
5
+ "eval_runtime": 24.6629,
6
+ "eval_samples": 890,
7
+ "eval_samples_per_second": 36.087,
8
+ "eval_steps_per_second": 4.541,
9
+ "eval_wer": 0.5077335858585859,
10
+ "total_flos": 3.6380056389804335e+19,
11
+ "train_loss": 929.5830404722528,
12
+ "train_runtime": 31654.3209,
13
+ "train_samples": 58040,
14
+ "train_samples_per_second": 110.013,
15
+ "train_steps_per_second": 6.877
16
+ }
eval--in domain_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "eval_cer": 0.1721829768314973,
4
+ "eval_loss": 279.11749267578125,
5
+ "eval_runtime": 24.6629,
6
+ "eval_samples": 890,
7
+ "eval_samples_per_second": 36.087,
8
+ "eval_steps_per_second": 4.541,
9
+ "eval_wer": 0.5077335858585859
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "total_flos": 3.6380056389804335e+19,
4
+ "train_loss": 929.5830404722528,
5
+ "train_runtime": 31654.3209,
6
+ "train_samples": 58040,
7
+ "train_samples_per_second": 110.013,
8
+ "train_steps_per_second": 6.877
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5064709595959596,
3
+ "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-cont-pt-22k/outputs/shuff_100_no_idv/wr15/checkpoint-7256",
4
+ "epoch": 12.0,
5
+ "eval_steps": 500,
6
+ "global_step": 43536,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 953.8694458007812,
14
+ "learning_rate": 5.5341173588141614e-05,
15
+ "loss": 2898.1974,
16
+ "step": 3628
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_cer": 0.18934940473250012,
21
+ "eval_loss": 315.9604187011719,
22
+ "eval_runtime": 25.3347,
23
+ "eval_samples_per_second": 35.13,
24
+ "eval_steps_per_second": 4.421,
25
+ "eval_wer": 0.6330492424242424,
26
+ "step": 3628
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "grad_norm": 666.8331909179688,
31
+ "learning_rate": 0.00011088141614602474,
32
+ "loss": 757.7022,
33
+ "step": 7256
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_cer": 0.17208417724645556,
38
+ "eval_loss": 279.52197265625,
39
+ "eval_runtime": 25.5129,
40
+ "eval_samples_per_second": 34.884,
41
+ "eval_steps_per_second": 4.39,
42
+ "eval_wer": 0.5064709595959596,
43
+ "step": 7256
44
+ },
45
+ {
46
+ "epoch": 3.0,
47
+ "grad_norm": 236.5342559814453,
48
+ "learning_rate": 0.0001664216587039079,
49
+ "loss": 662.3705,
50
+ "step": 10884
51
+ },
52
+ {
53
+ "epoch": 3.0,
54
+ "eval_cer": 0.17640665909203182,
55
+ "eval_loss": 301.2926330566406,
56
+ "eval_runtime": 25.9701,
57
+ "eval_samples_per_second": 34.27,
58
+ "eval_steps_per_second": 4.313,
59
+ "eval_wer": 0.525094696969697,
60
+ "step": 10884
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "grad_norm": 595.1166381835938,
65
+ "learning_rate": 0.000221961901261791,
66
+ "loss": 647.4321,
67
+ "step": 14512
68
+ },
69
+ {
70
+ "epoch": 4.0,
71
+ "eval_cer": 0.2026873487131354,
72
+ "eval_loss": 309.8589172363281,
73
+ "eval_runtime": 32.0472,
74
+ "eval_samples_per_second": 27.772,
75
+ "eval_steps_per_second": 3.495,
76
+ "eval_wer": 0.5494002525252525,
77
+ "step": 14512
78
+ },
79
+ {
80
+ "epoch": 5.0,
81
+ "grad_norm": 320.42572021484375,
82
+ "learning_rate": 0.00027747151782432926,
83
+ "loss": 659.7851,
84
+ "step": 18140
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "eval_cer": 0.20700983055871167,
89
+ "eval_loss": 309.31988525390625,
90
+ "eval_runtime": 25.5466,
91
+ "eval_samples_per_second": 34.838,
92
+ "eval_steps_per_second": 4.384,
93
+ "eval_wer": 0.5861742424242424,
94
+ "step": 18140
95
+ },
96
+ {
97
+ "epoch": 6.0,
98
+ "grad_norm": 1806.6942138671875,
99
+ "learning_rate": 0.00033302707337988486,
100
+ "loss": 692.9787,
101
+ "step": 21768
102
+ },
103
+ {
104
+ "epoch": 6.0,
105
+ "eval_cer": 0.24119448698315468,
106
+ "eval_loss": 373.3182678222656,
107
+ "eval_runtime": 30.9297,
108
+ "eval_samples_per_second": 28.775,
109
+ "eval_steps_per_second": 3.621,
110
+ "eval_wer": 0.6486742424242424,
111
+ "step": 21768
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "grad_norm": 305.56292724609375,
116
+ "learning_rate": 0.00038855200294009556,
117
+ "loss": 739.3389,
118
+ "step": 25396
119
+ },
120
+ {
121
+ "epoch": 7.0,
122
+ "eval_cer": 0.2555945265029887,
123
+ "eval_loss": 379.1961975097656,
124
+ "eval_runtime": 24.9788,
125
+ "eval_samples_per_second": 35.63,
126
+ "eval_steps_per_second": 4.484,
127
+ "eval_wer": 0.6764520202020202,
128
+ "step": 25396
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "grad_norm": 1148.437744140625,
133
+ "learning_rate": 0.0004440769325003063,
134
+ "loss": 786.0181,
135
+ "step": 29024
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "eval_cer": 0.2728597539890332,
140
+ "eval_loss": 414.1454772949219,
141
+ "eval_runtime": 25.8475,
142
+ "eval_samples_per_second": 34.433,
143
+ "eval_steps_per_second": 4.333,
144
+ "eval_wer": 0.7021780303030303,
145
+ "step": 29024
146
+ },
147
+ {
148
+ "epoch": 9.0,
149
+ "grad_norm": 790.4903564453125,
150
+ "learning_rate": 0.000499601862060517,
151
+ "loss": 827.0145,
152
+ "step": 32652
153
+ },
154
+ {
155
+ "epoch": 9.0,
156
+ "eval_cer": 0.34004347181741834,
157
+ "eval_loss": 441.2162170410156,
158
+ "eval_runtime": 27.3805,
159
+ "eval_samples_per_second": 32.505,
160
+ "eval_steps_per_second": 4.09,
161
+ "eval_wer": 0.7870896464646465,
162
+ "step": 32652
163
+ },
164
+ {
165
+ "epoch": 10.0,
166
+ "grad_norm": 779.6360473632812,
167
+ "learning_rate": 0.0004902690403614588,
168
+ "loss": 849.8929,
169
+ "step": 36280
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "eval_cer": 0.2962752556439263,
174
+ "eval_loss": 422.385498046875,
175
+ "eval_runtime": 28.0021,
176
+ "eval_samples_per_second": 31.783,
177
+ "eval_steps_per_second": 4.0,
178
+ "eval_wer": 0.7258522727272727,
179
+ "step": 36280
180
+ },
181
+ {
182
+ "epoch": 11.0,
183
+ "grad_norm": 399.57818603515625,
184
+ "learning_rate": 0.0004804651187928314,
185
+ "loss": 830.547,
186
+ "step": 39908
187
+ },
188
+ {
189
+ "epoch": 11.0,
190
+ "eval_cer": 0.3045744207874327,
191
+ "eval_loss": 426.62811279296875,
192
+ "eval_runtime": 24.2088,
193
+ "eval_samples_per_second": 36.763,
194
+ "eval_steps_per_second": 4.626,
195
+ "eval_wer": 0.7506313131313131,
196
+ "step": 39908
197
+ },
198
+ {
199
+ "epoch": 12.0,
200
+ "grad_norm": 1422.100341796875,
201
+ "learning_rate": 0.0004706693041053246,
202
+ "loss": 803.7191,
203
+ "step": 43536
204
+ },
205
+ {
206
+ "epoch": 12.0,
207
+ "eval_cer": 0.29839944672232377,
208
+ "eval_loss": 418.475341796875,
209
+ "eval_runtime": 26.6689,
210
+ "eval_samples_per_second": 33.372,
211
+ "eval_steps_per_second": 4.2,
212
+ "eval_wer": 0.7313762626262627,
213
+ "step": 43536
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "step": 43536,
218
+ "total_flos": 3.6380056389804335e+19,
219
+ "train_loss": 929.5830404722528,
220
+ "train_runtime": 31654.3209,
221
+ "train_samples_per_second": 110.013,
222
+ "train_steps_per_second": 6.877
223
+ }
224
+ ],
225
+ "logging_steps": 500,
226
+ "max_steps": 217680,
227
+ "num_input_tokens_seen": 0,
228
+ "num_train_epochs": 60,
229
+ "save_steps": 500,
230
+ "stateful_callbacks": {
231
+ "EarlyStoppingCallback": {
232
+ "args": {
233
+ "early_stopping_patience": 10,
234
+ "early_stopping_threshold": 0.0
235
+ },
236
+ "attributes": {
237
+ "early_stopping_patience_counter": 10
238
+ }
239
+ },
240
+ "TrainerControl": {
241
+ "args": {
242
+ "should_epoch_stop": false,
243
+ "should_evaluate": false,
244
+ "should_log": false,
245
+ "should_save": true,
246
+ "should_training_stop": true
247
+ },
248
+ "attributes": {}
249
+ }
250
+ },
251
+ "total_flos": 3.6380056389804335e+19,
252
+ "train_batch_size": 16,
253
+ "trial_name": null,
254
+ "trial_params": null
255
+ }