Priyanship commited on
Commit
4b5921e
·
verified ·
1 Parent(s): f7f9ea2

Training in progress, step 500

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 152,
3
+ "<s>": 151
4
+ }
config.json CHANGED
@@ -104,6 +104,6 @@
104
  "torch_dtype": "float32",
105
  "transformers_version": "4.43.1",
106
  "use_weighted_layer_sum": false,
107
- "vocab_size": 151,
108
  "xvector_output_dim": 512
109
  }
 
104
  "torch_dtype": "float32",
105
  "transformers_version": "4.43.1",
106
  "use_weighted_layer_sum": false,
107
+ "vocab_size": 153,
108
  "xvector_output_dim": 512
109
  }
indicwav2vec_trainwtagsv2_MUCS_warmup500_s300shuff100_2604375.out ADDED
The diff for this file is too large to render. See raw diff
 
indicwav2vec_trainwtagsv2_MUCS_warmup500_s300shuff100_2604567.out ADDED
The diff for this file is too large to render. See raw diff
 
indicwav2vec_trainwtagsv2_MUCS_warmup500_s300shuff100_2604812.out ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/5000 [00:00<?, ?it/s]/scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/models/wav2vec2/processing_wav2vec2.py:157: UserWarning: `as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.
 
 
 
 
 
 
1
  0%| | 1/5000 [00:29<40:18:00, 29.02s/it]
2
 
3
  0%| | 1/5000 [00:29<40:18:00, 29.02s/it]
4
  0%| | 2/5000 [00:37<23:19:44, 16.80s/it]
5
 
6
  0%| | 2/5000 [00:37<23:19:44, 16.80s/it]
7
  0%| | 3/5000 [00:44<17:11:59, 12.39s/it]
8
 
9
  0%| | 3/5000 [00:44<17:11:59, 12.39s/it]
10
  0%| | 4/5000 [00:50<13:57:56, 10.06s/it]
11
 
12
  0%| | 4/5000 [00:50<13:57:56, 10.06s/it]
13
  0%| | 5/5000 [00:56<11:57:47, 8.62s/it]
14
 
15
  0%| | 5/5000 [00:56<11:57:47, 8.62s/it]
16
  0%| | 6/5000 [01:02<10:40:40, 7.70s/it]
17
 
18
  0%| | 6/5000 [01:02<10:40:40, 7.70s/it]
19
  0%| | 7/5000 [01:08<9:37:29, 6.94s/it]
20
 
21
  0%| | 7/5000 [01:08<9:37:29, 6.94s/it]
22
  0%| | 8/5000 [01:13<8:55:38, 6.44s/it]
23
 
24
  0%| | 8/5000 [01:13<8:55:38, 6.44s/it]
25
  0%| | 9/5000 [01:18<8:18:48, 6.00s/it]
26
 
27
  0%| | 9/5000 [01:18<8:18:48, 6.00s/it]
28
  0%| | 10/5000 [01:23<7:45:40, 5.60s/it]
29
 
30
  0%| | 10/5000 [01:23<7:45:40, 5.60s/it]
31
  0%| | 11/5000 [01:28<7:23:07, 5.33s/it]
32
 
33
  0%| | 11/5000 [01:28<7:23:07, 5.33s/it]
34
  0%| | 12/5000 [01:32<7:04:45, 5.11s/it]
35
 
36
  0%| | 12/5000 [01:32<7:04:45, 5.11s/it]
37
  0%| | 13/5000 [01:36<6:39:23, 4.81s/it]
38
 
39
  0%| | 13/5000 [01:36<6:39:23, 4.81s/it]
40
  0%| | 14/5000 [01:40<6:21:49, 4.59s/it]
41
 
42
  0%| | 14/5000 [01:40<6:21:49, 4.59s/it]
43
  0%| | 15/5000 [01:44<6:09:14, 4.44s/it]
44
 
45
  0%| | 15/5000 [01:45<6:09:14, 4.44s/it]
46
  0%| | 16/5000 [01:49<6:00:43, 4.34s/it]
47
 
48
  0%| | 16/5000 [01:49<6:00:43, 4.34s/it]
49
  0%| | 17/5000 [01:53<5:54:11, 4.26s/it]
50
 
51
  0%| | 17/5000 [01:53<5:54:11, 4.26s/it]
52
  0%| | 18/5000 [01:57<5:46:37, 4.17s/it]
53
 
54
  0%| | 18/5000 [01:57<5:46:37, 4.17s/it]
55
  0%| | 19/5000 [02:00<5:29:26, 3.97s/it]
56
 
57
  0%| | 19/5000 [02:00<5:29:26, 3.97s/it]
58
  0%| | 20/5000 [02:04<5:17:04, 3.82s/it]
59
 
60
  0%| | 20/5000 [02:04<5:17:04, 3.82s/it]
61
  0%| | 21/5000 [02:07<5:08:53, 3.72s/it]
62
 
63
  0%| | 21/5000 [02:07<5:08:53, 3.72s/it]
64
  0%| | 22/5000 [02:11<5:02:43, 3.65s/it]
65
 
66
  0%| | 22/5000 [02:11<5:02:43, 3.65s/it]
67
  0%| | 23/5000 [02:14<4:58:09, 3.59s/it]
68
 
69
  0%| | 23/5000 [02:14<4:58:09, 3.59s/it]
70
  0%| | 24/5000 [02:17<4:53:07, 3.53s/it]
71
 
72
  0%| | 24/5000 [02:17<4:53:07, 3.53s/it]
73
  0%| | 25/5000 [02:20<4:40:47, 3.39s/it]
74
 
75
  0%| | 25/5000 [02:21<4:40:47, 3.39s/it]
76
  1%| | 26/5000 [02:23<4:28:32, 3.24s/it]
77
 
78
  1%| | 26/5000 [02:23<4:28:32, 3.24s/it]
79
  1%| | 27/5000 [02:26<4:20:26, 3.14s/it]
80
 
81
  1%| | 27/5000 [02:26<4:20:26, 3.14s/it]
82
  1%| | 28/5000 [02:29<4:14:25, 3.07s/it]
83
 
84
  1%| | 28/5000 [02:29<4:14:25, 3.07s/it]
85
  1%| | 29/5000 [02:32<4:10:13, 3.02s/it]
86
 
87
  1%| | 29/5000 [02:32<4:10:13, 3.02s/it]
88
  1%| | 30/5000 [02:35<4:07:35, 2.99s/it]
89
 
90
  1%| | 30/5000 [02:35<4:07:35, 2.99s/it]
91
  1%| | 31/5000 [02:38<4:05:17, 2.96s/it]
92
 
93
  1%| | 31/5000 [02:38<4:05:17, 2.96s/it]
94
  1%| | 32/5000 [02:41<4:02:49, 2.93s/it]
95
 
96
  1%| | 32/5000 [02:41<4:02:49, 2.93s/it]
97
  1%| | 33/5000 [02:43<3:52:45, 2.81s/it]
98
 
99
  1%| | 33/5000 [02:43<3:52:45, 2.81s/it]
100
  1%| | 34/5000 [02:46<3:40:59, 2.67s/it]
101
 
102
  1%| | 34/5000 [02:46<3:40:59, 2.67s/it]
103
  1%| | 35/5000 [02:48<3:32:36, 2.57s/it]
104
 
105
  1%| | 35/5000 [02:48<3:32:36, 2.57s/it]
106
  1%| | 36/5000 [02:50<3:27:01, 2.50s/it]
107
 
108
  1%| | 36/5000 [02:50<3:27:01, 2.50s/it]
109
  1%| | 37/5000 [02:53<3:23:48, 2.46s/it]
110
 
111
  1%| | 37/5000 [02:53<3:23:48, 2.46s/it]
112
  1%| | 38/5000 [02:55<3:20:45, 2.43s/it]
113
 
114
  1%| | 38/5000 [02:55<3:20:45, 2.43s/it]
115
  1%| | 39/5000 [02:57<3:19:09, 2.41s/it]
116
 
117
  1%| | 39/5000 [02:57<3:19:09, 2.41s/it]
118
  1%| | 40/5000 [03:00<3:16:32, 2.38s/it]
119
 
120
  1%| | 40/5000 [03:00<3:16:32, 2.38s/it]
121
  1%| | 41/5000 [03:02<3:06:39, 2.26s/it]
122
 
123
  1%| | 41/5000 [03:02<3:06:39, 2.26s/it]
124
  1%| | 42/5000 [03:03<2:54:05, 2.11s/it]
125
 
126
  1%| | 42/5000 [03:03<2:54:05, 2.11s/it]
127
  1%| | 43/5000 [03:05<2:45:18, 2.00s/it]
128
 
129
  1%| | 43/5000 [03:05<2:45:18, 2.00s/it]
130
  1%| | 44/5000 [03:07<2:39:10, 1.93s/it]
131
 
132
  1%| | 44/5000 [03:07<2:39:10, 1.93s/it]
133
  1%| | 45/5000 [03:09<2:34:53, 1.88s/it]
134
 
135
  1%| | 45/5000 [03:09<2:34:53, 1.88s/it]
136
  1%| | 46/5000 [03:10<2:31:36, 1.84s/it]
137
 
138
  1%| | 46/5000 [03:10<2:31:36, 1.84s/it]
139
  1%| | 47/5000 [03:12<2:26:21, 1.77s/it]
140
 
141
  1%| | 47/5000 [03:12<2:26:21, 1.77s/it]
142
  1%| | 48/5000 [03:13<2:12:05, 1.60s/it]
143
 
144
  1%| | 48/5000 [03:13<2:12:05, 1.60s/it]
145
  1%| | 49/5000 [03:14<2:02:16, 1.48s/it]
146
 
147
  1%| | 49/5000 [03:15<2:02:16, 1.48s/it]
148
  1%| | 50/5000 [03:18<2:44:51, 2.00s/it]
149
 
150
  1%| | 50/5000 [03:18<2:44:51, 2.00s/it]
151
  1%| | 51/5000 [03:34<8:36:31, 6.26s/it]
152
 
153
  1%| | 51/5000 [03:34<8:36:31, 6.26s/it]
154
  1%| | 52/5000 [03:43<9:46:07, 7.11s/it]
155
 
156
  1%| | 52/5000 [03:43<9:46:07, 7.11s/it]
157
  1%| | 53/5000 [03:51<10:03:31, 7.32s/it]
158
 
159
  1%| | 53/5000 [03:51<10:03:31, 7.32s/it]
160
  1%| | 54/5000 [03:58<9:52:33, 7.19s/it]
161
 
162
  1%| | 54/5000 [03:58<9:52:33, 7.19s/it]
163
  1%| | 55/5000 [04:04<9:35:40, 6.98s/it]
164
 
165
  1%| | 55/5000 [04:04<9:35:40, 6.98s/it]
166
  1%| | 56/5000 [04:10<9:09:54, 6.67s/it]
167
 
168
  1%| | 56/5000 [04:10<9:09:54, 6.67s/it]
169
  1%| | 57/5000 [04:16<8:42:59, 6.35s/it]
170
 
171
  1%| | 57/5000 [04:16<8:42:59, 6.35s/it]
172
  1%| | 58/5000 [04:21<8:19:26, 6.06s/it]
173
 
174
  1%| | 58/5000 [04:21<8:19:26, 6.06s/it]
175
  1%| | 59/5000 [04:26<7:52:24, 5.74s/it]
176
 
177
  1%| | 59/5000 [04:26<7:52:24, 5.74s/it]
178
  1%| | 60/5000 [04:31<7:27:01, 5.43s/it]
179
 
180
  1%| | 60/5000 [04:31<7:27:01, 5.43s/it]
181
  1%| | 61/5000 [04:36<7:09:13, 5.21s/it]
182
 
183
  1%| | 61/5000 [04:36<7:09:13, 5.21s/it]
184
  1%| | 62/5000 [04:40<6:47:20, 4.95s/it]
185
 
186
  1%| | 62/5000 [04:40<6:47:20, 4.95s/it]
187
  1%|▏ | 63/5000 [04:44<6:26:34, 4.70s/it]
188
 
189
  1%|▏ | 63/5000 [04:44<6:26:34, 4.70s/it]
190
  1%|▏ | 64/5000 [04:48<6:12:16, 4.53s/it]
191
 
192
  1%|▏ | 64/5000 [04:48<6:12:16, 4.53s/it]
193
  1%|▏ | 65/5000 [04:52<6:02:02, 4.40s/it]
194
 
195
  1%|▏ | 65/5000 [04:52<6:02:02, 4.40s/it]
196
  1%|▏ | 66/5000 [04:56<5:56:16, 4.33s/it]
197
 
198
  1%|▏ | 66/5000 [04:56<5:56:16, 4.33s/it]
199
  1%|▏ | 67/5000 [05:00<5:49:42, 4.25s/it]
200
 
201
  1%|▏ | 67/5000 [05:00<5:49:42, 4.25s/it]
202
  1%|▏ | 68/5000 [05:04<5:36:05, 4.09s/it]
203
 
204
  1%|▏ | 68/5000 [05:04<5:36:05, 4.09s/it]
205
  1%|▏ | 69/5000 [05:08<5:21:05, 3.91s/it]
206
 
207
  1%|▏ | 69/5000 [05:08<5:21:05, 3.91s/it]
208
  1%|▏ | 70/5000 [05:11<5:10:13, 3.78s/it]
209
 
210
  1%|▏ | 70/5000 [05:11<5:10:13, 3.78s/it]
211
  1%|▏ | 71/5000 [05:15<5:02:22, 3.68s/it]
212
 
213
  1%|▏ | 71/5000 [05:15<5:02:22, 3.68s/it]
214
  1%|▏ | 72/5000 [05:18<4:57:31, 3.62s/it]
215
 
216
  1%|▏ | 72/5000 [05:18<4:57:31, 3.62s/it]
217
  1%|▏ | 73/5000 [05:22<4:54:11, 3.58s/it]
218
 
219
  1%|▏ | 73/5000 [05:22<4:54:11, 3.58s/it]
220
  1%|▏ | 74/5000 [05:25<4:48:19, 3.51s/it]
221
 
222
  1%|▏ | 74/5000 [05:25<4:48:19, 3.51s/it]
223
  2%|▏ | 75/5000 [05:28<4:32:54, 3.32s/it]
224
 
225
  2%|▏ | 75/5000 [05:28<4:32:54, 3.32s/it]
226
  2%|▏ | 76/5000 [05:31<4:22:10, 3.19s/it]
227
 
228
  2%|▏ | 76/5000 [05:31<4:22:10, 3.19s/it]
229
  2%|▏ | 77/5000 [05:34<4:14:46, 3.11s/it]
230
 
231
  2%|▏ | 77/5000 [05:34<4:14:46, 3.11s/it]
232
  2%|▏ | 78/5000 [05:36<4:09:24, 3.04s/it]
233
 
234
  2%|▏ | 78/5000 [05:36<4:09:24, 3.04s/it]
235
  2%|▏ | 79/5000 [05:39<4:06:06, 3.00s/it]
236
 
237
  2%|▏ | 79/5000 [05:39<4:06:06, 3.00s/it]
238
  2%|▏ | 80/5000 [05:42<4:03:53, 2.97s/it]
239
 
240
  2%|▏ | 80/5000 [05:42<4:03:53, 2.97s/it]
241
  2%|▏ | 81/5000 [05:45<4:02:14, 2.95s/it]
242
 
243
  2%|▏ | 81/5000 [05:45<4:02:14, 2.95s/it]
244
  2%|▏ | 82/5000 [05:48<4:00:09, 2.93s/it]
245
 
246
  2%|▏ | 82/5000 [05:48<4:00:09, 2.93s/it]
247
  2%|▏ | 83/5000 [05:51<3:50:27, 2.81s/it]
248
 
249
  2%|▏ | 83/5000 [05:51<3:50:27, 2.81s/it]
250
  2%|▏ | 84/5000 [05:53<3:38:44, 2.67s/it]
251
 
252
  2%|▏ | 84/5000 [05:53<3:38:44, 2.67s/it]
253
  2%|▏ | 85/5000 [05:55<3:30:25, 2.57s/it]
254
 
255
  2%|▏ | 85/5000 [05:55<3:30:25, 2.57s/it]
256
  2%|▏ | 86/5000 [05:58<3:24:40, 2.50s/it]
257
 
258
  2%|▏ | 86/5000 [05:58<3:24:40, 2.50s/it]
259
  2%|▏ | 87/5000 [06:00<3:20:40, 2.45s/it]
260
 
261
  2%|▏ | 87/5000 [06:00<3:20:40, 2.45s/it]
262
  2%|▏ | 88/5000 [06:02<3:18:13, 2.42s/it]
263
 
264
  2%|▏ | 88/5000 [06:02<3:18:13, 2.42s/it]
265
  2%|▏ | 89/5000 [06:05<3:16:04, 2.40s/it]
266
 
267
  2%|▏ | 89/5000 [06:05<3:16:04, 2.40s/it]
268
  2%|▏ | 90/5000 [06:07<3:14:09, 2.37s/it]
269
 
270
  2%|▏ | 90/5000 [06:07<3:14:09, 2.37s/it]
271
  2%|▏ | 91/5000 [06:09<3:04:04, 2.25s/it]
272
 
273
  2%|▏ | 91/5000 [06:09<3:04:04, 2.25s/it]
274
  2%|▏ | 92/5000 [06:11<2:51:48, 2.10s/it]
275
 
276
  2%|▏ | 92/5000 [06:11<2:51:48, 2.10s/it]
277
  2%|▏ | 93/5000 [06:12<2:43:14, 2.00s/it]
278
 
279
  2%|▏ | 93/5000 [06:12<2:43:14, 2.00s/it]
280
  2%|▏ | 94/5000 [06:14<2:37:10, 1.92s/it]
281
 
282
  2%|▏ | 94/5000 [06:14<2:37:10, 1.92s/it]
283
  2%|▏ | 95/5000 [06:16<2:33:11, 1.87s/it]
284
 
285
  2%|▏ | 95/5000 [06:16<2:33:11, 1.87s/it]
286
  2%|▏ | 96/5000 [06:18<2:30:32, 1.84s/it]
287
 
288
  2%|▏ | 96/5000 [06:18<2:30:32, 1.84s/it]
289
  2%|▏ | 97/5000 [06:19<2:25:12, 1.78s/it]
290
 
291
  2%|▏ | 97/5000 [06:19<2:25:12, 1.78s/it]
292
  2%|▏ | 98/5000 [06:21<2:10:59, 1.60s/it]
293
 
294
  2%|▏ | 98/5000 [06:21<2:10:59, 1.60s/it]
295
  2%|▏ | 99/5000 [06:22<2:01:03, 1.48s/it]
296
 
297
  2%|▏ | 99/5000 [06:22<2:01:03, 1.48s/it]
298
  2%|▏ | 100/5000 [06:25<2:44:15, 2.01s/it]
299
 
300
  2%|▏ | 100/5000 [06:25<2:44:15, 2.01s/it]{'loss': 53.3154, 'grad_norm': 79.69747924804688, 'learning_rate': 1.2e-06, 'epoch': 0.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  0%| | 0/196 [00:00<?, ?it/s]
 
302
  1%| | 2/196 [00:00<01:35, 2.03it/s]
 
303
  2%|▏ | 3/196 [00:02<02:33, 1.26it/s]
 
304
  2%|▏ | 4/196 [00:03<03:12, 1.00s/it]
 
305
  3%|▎ | 5/196 [00:04<03:31, 1.11s/it]
 
306
  3%|▎ | 6/196 [00:06<04:02, 1.28s/it]
 
307
  4%|▎ | 7/196 [00:08<04:19, 1.37s/it]
 
308
  4%|▍ | 8/196 [00:09<04:18, 1.37s/it]
 
309
  5%|▍ | 9/196 [00:12<05:38, 1.81s/it]
 
310
  5%|▌ | 10/196 [00:15<06:51, 2.21s/it]
 
311
  6%|▌ | 11/196 [00:18<07:59, 2.59s/it]
 
312
  6%|▌ | 12/196 [00:22<08:35, 2.80s/it]
 
313
  7%|▋ | 13/196 [00:23<07:27, 2.44s/it]
 
314
  7%|▋ | 14/196 [00:24<06:11, 2.04s/it]
 
315
  8%|▊ | 15/196 [00:25<05:08, 1.70s/it]
 
316
  8%|▊ | 16/196 [00:27<04:46, 1.59s/it]
 
317
  9%|▊ | 17/196 [00:28<04:32, 1.52s/it]
 
318
  9%|▉ | 18/196 [00:30<04:55, 1.66s/it]
 
319
  10%|▉ | 19/196 [00:33<06:19, 2.15s/it]
 
320
  10%|█ | 20/196 [00:36<06:54, 2.35s/it]
 
321
  11%|█ | 21/196 [00:39<07:06, 2.44s/it]
 
322
  11%|█ | 22/196 [00:41<06:52, 2.37s/it]
 
323
  12%|█▏ | 23/196 [00:43<06:21, 2.21s/it]
 
324
  12%|█▏ | 24/196 [00:43<05:00, 1.75s/it]
 
325
  13%|█▎ | 25/196 [00:44<04:12, 1.48s/it]
 
326
  13%|█▎ | 26/196 [00:45<03:39, 1.29s/it]
 
327
  14%|█▍ | 27/196 [00:46<03:21, 1.19s/it]
 
328
  14%|█▍ | 28/196 [00:47<03:10, 1.14s/it]
 
329
  15%|█▍ | 29/196 [00:48<03:10, 1.14s/it]
 
330
  15%|█▌ | 30/196 [00:49<03:09, 1.14s/it]
 
331
  16%|█▌ | 31/196 [00:50<02:53, 1.05s/it]
 
332
  16%|█▋ | 32/196 [00:51<02:42, 1.01it/s]
 
333
  17%|█▋ | 33/196 [00:53<03:10, 1.17s/it]
 
334
  17%|█▋ | 34/196 [00:55<04:14, 1.57s/it]
 
335
  18%|█▊ | 35/196 [00:57<04:21, 1.62s/it]
 
336
  18%|█▊ | 36/196 [00:59<05:04, 1.90s/it]
 
337
  19%|█▉ | 37/196 [01:01<05:02, 1.90s/it]
 
338
  19%|█▉ | 38/196 [01:03<04:32, 1.72s/it]
 
339
  20%|█▉ | 39/196 [01:04<04:11, 1.60s/it]
 
340
  20%|██ | 40/196 [01:05<03:49, 1.47s/it]
 
341
  21%|██ | 41/196 [01:06<03:29, 1.35s/it]
 
342
  21%|██▏ | 42/196 [01:07<03:13, 1.26s/it]
 
343
  22%|██▏ | 43/196 [01:08<03:09, 1.24s/it]
 
344
  22%|██▏ | 44/196 [01:10<03:04, 1.21s/it]
 
345
  23%|██▎ | 45/196 [01:11<02:52, 1.14s/it]
 
346
  23%|██▎ | 46/196 [01:11<02:38, 1.06s/it]
 
347
  24%|██▍ | 47/196 [01:12<02:39, 1.07s/it]
 
348
  24%|██▍ | 48/196 [01:13<02:30, 1.02s/it]
 
349
  25%|██▌ | 49/196 [01:14<02:26, 1.00it/s]
 
350
  26%|██▌ | 50/196 [01:15<02:24, 1.01it/s]
 
351
  26%|██▌ | 51/196 [01:16<02:20, 1.03it/s]
 
352
  27%|██▋ | 52/196 [01:17<02:19, 1.04it/s]
 
353
  27%|██▋ | 53/196 [01:18<02:29, 1.05s/it]
 
354
  28%|██▊ | 54/196 [01:19<02:25, 1.02s/it]
 
355
  28%|██▊ | 55/196 [01:21<02:40, 1.14s/it]
 
356
  29%|██▊ | 56/196 [01:22<02:51, 1.22s/it]
 
357
  29%|██▉ | 57/196 [01:24<03:04, 1.33s/it]
 
358
  30%|██▉ | 58/196 [01:25<03:11, 1.39s/it]
 
359
  30%|███ | 59/196 [01:27<03:09, 1.38s/it]
 
360
  31%|███ | 60/196 [01:28<02:53, 1.27s/it]
 
361
  31%|███ | 61/196 [01:29<02:35, 1.15s/it]
 
362
  32%|███▏ | 62/196 [01:30<02:30, 1.12s/it]
 
363
  32%|███▏ | 63/196 [01:31<02:30, 1.13s/it]
 
364
  33%|███▎ | 64/196 [01:32<02:30, 1.14s/it]
 
365
  33%|███▎ | 65/196 [01:33<02:25, 1.11s/it]
 
366
  34%|███▎ | 66/196 [01:34<02:30, 1.16s/it]
 
367
  34%|███▍ | 67/196 [01:36<02:35, 1.20s/it]
 
368
  35%|███▍ | 68/196 [01:37<02:59, 1.40s/it]
 
369
  35%|███▌ | 69/196 [01:39<03:06, 1.47s/it]
 
370
  36%|███▌ | 70/196 [01:40<02:51, 1.36s/it]
 
371
  36%|███▌ | 71/196 [01:41<02:39, 1.27s/it]
 
372
  37%|███▋ | 72/196 [01:42<02:25, 1.17s/it]
 
373
  37%|███▋ | 73/196 [01:43<02:11, 1.07s/it]
 
374
  38%|███▊ | 74/196 [01:44<02:00, 1.01it/s]
 
375
  38%|███▊ | 75/196 [01:45<01:57, 1.03it/s]
 
376
  39%|███▉ | 76/196 [01:46<01:53, 1.05it/s]
 
377
  39%|███▉ | 77/196 [01:47<01:58, 1.00it/s]
 
378
  40%|███▉ | 78/196 [01:48<02:08, 1.09s/it]
 
379
  40%|████ | 79/196 [01:49<02:02, 1.05s/it]
 
380
  41%|████ | 80/196 [01:50<02:09, 1.12s/it]
 
381
  41%|████▏ | 81/196 [01:52<02:13, 1.16s/it]
 
382
  42%|████▏ | 82/196 [01:53<02:08, 1.13s/it]
 
383
  42%|████▏ | 83/196 [01:54<02:10, 1.15s/it]
 
384
  43%|████▎ | 84/196 [01:55<02:13, 1.19s/it]
 
385
  43%|████▎ | 85/196 [01:56<02:11, 1.18s/it]
 
386
  44%|████▍ | 86/196 [01:58<02:15, 1.23s/it]
 
387
  44%|████▍ | 87/196 [01:59<02:11, 1.20s/it]
 
388
  45%|████▍ | 88/196 [02:00<02:15, 1.25s/it]
 
389
  45%|████▌ | 89/196 [02:01<02:19, 1.30s/it]
 
390
  46%|████▌ | 90/196 [02:03<02:13, 1.26s/it]
 
391
  46%|████▋ | 91/196 [02:04<02:08, 1.22s/it]
 
392
  47%|████▋ | 92/196 [02:05<02:03, 1.18s/it]
 
393
  47%|████▋ | 93/196 [02:06<02:08, 1.25s/it]
 
394
  48%|████▊ | 94/196 [02:08<02:11, 1.29s/it]
 
395
  48%|████▊ | 95/196 [02:09<02:04, 1.23s/it]
 
396
  49%|████▉ | 96/196 [02:10<02:10, 1.30s/it]
 
397
  49%|████▉ | 97/196 [02:11<02:06, 1.28s/it]
 
398
  50%|█████ | 98/196 [02:13<02:10, 1.33s/it]
 
399
  51%|█████ | 99/196 [02:14<01:57, 1.21s/it]
 
400
  51%|█████ | 100/196 [02:15<01:43, 1.08s/it]
 
401
  52%|█████▏ | 101/196 [02:15<01:35, 1.00s/it]
 
402
  52%|█████▏ | 102/196 [02:17<01:40, 1.07s/it]
 
403
  53%|█████▎ | 103/196 [02:18<01:46, 1.15s/it]
 
404
  53%|█████▎ | 104/196 [02:20<02:03, 1.34s/it]
 
405
  54%|█████▎ | 105/196 [02:21<02:06, 1.39s/it]
 
406
  54%|█████▍ | 106/196 [02:23<02:05, 1.40s/it]
 
407
  55%|█████▍ | 107/196 [02:24<01:57, 1.32s/it]
 
408
  55%|█████▌ | 108/196 [02:25<01:42, 1.17s/it]
 
409
  56%|█████▌ | 109/196 [02:26<01:34, 1.09s/it]
 
410
  56%|█████▌ | 110/196 [02:26<01:29, 1.04s/it]
 
411
  57%|█████▋ | 111/196 [02:28<01:29, 1.06s/it]
 
412
  57%|█████▋ | 112/196 [02:29<01:31, 1.09s/it]
 
413
  58%|█████▊ | 113/196 [02:30<01:33, 1.13s/it]
 
414
  58%|█████▊ | 114/196 [02:31<01:25, 1.04s/it]
 
415
  59%|█████▊ | 115/196 [02:32<01:21, 1.00s/it]
 
416
  59%|█████▉ | 116/196 [02:33<01:18, 1.02it/s]
 
417
  60%|█████▉ | 117/196 [02:34<01:14, 1.06it/s]
 
418
  60%|██████ | 118/196 [02:34<01:06, 1.17it/s]
 
419
  61%|██████ | 119/196 [02:35<01:08, 1.13it/s]
 
420
  61%|██████ | 120/196 [02:36<01:11, 1.06it/s]
 
421
  62%|██████▏ | 121/196 [02:37<01:13, 1.02it/s]
 
422
  62%|██████▏ | 122/196 [02:38<01:14, 1.01s/it]
 
423
  63%|██████▎ | 123/196 [02:39<01:13, 1.00s/it]
 
424
  63%|██████▎ | 124/196 [02:40<01:10, 1.02it/s]
 
425
  64%|██████▍ | 125/196 [02:41<01:10, 1.01it/s]
 
426
  64%|██████▍ | 126/196 [02:43<01:18, 1.12s/it]
 
427
  65%|██████▍ | 127/196 [02:44<01:17, 1.12s/it]
 
428
  65%|██████▌ | 128/196 [02:45<01:12, 1.06s/it]
 
429
  66%|██████▌ | 129/196 [02:46<01:11, 1.06s/it]
 
430
  66%|██████▋ | 130/196 [02:47<01:09, 1.05s/it]
 
431
  67%|██████▋ | 131/196 [02:48<01:08, 1.06s/it]
 
432
  67%|██████▋ | 132/196 [02:49<01:03, 1.01it/s]
 
433
  68%|██████▊ | 133/196 [02:50<01:01, 1.02it/s]
 
434
  68%|██████▊ | 134/196 [02:51<01:06, 1.07s/it]
 
435
  69%|██████▉ | 135/196 [02:52<01:03, 1.04s/it]
 
436
  69%|██████▉ | 136/196 [02:53<00:59, 1.00it/s]
 
437
  70%|██████▉ | 137/196 [02:54<00:58, 1.00it/s]
 
438
  70%|███████ | 138/196 [02:55<00:58, 1.01s/it]
 
439
  71%|███████ | 139/196 [02:56<00:57, 1.01s/it]
 
440
  71%|███████▏ | 140/196 [02:57<00:56, 1.00s/it]
 
441
  72%|███████▏ | 141/196 [02:58<00:55, 1.01s/it]
 
442
  72%|███████▏ | 142/196 [02:59<00:56, 1.04s/it]
 
443
  73%|███████▎ | 143/196 [03:00<00:59, 1.13s/it]
 
444
  73%|███████▎ | 144/196 [03:01<00:54, 1.05s/it]
 
445
  74%|███████▍ | 145/196 [03:02<00:49, 1.04it/s]
 
446
  74%|███████▍ | 146/196 [03:03<00:44, 1.11it/s]
 
447
  75%|███████▌ | 147/196 [03:04<00:44, 1.10it/s]
 
448
  76%|███████▌ | 148/196 [03:05<00:43, 1.10it/s]
 
449
  76%|███████▌ | 149/196 [03:05<00:41, 1.14it/s]
 
450
  77%|███████▋ | 150/196 [03:06<00:42, 1.09it/s]
 
451
  77%|███████▋ | 151/196 [03:07<00:43, 1.02it/s]
 
452
  78%|███████▊ | 152/196 [03:08<00:42, 1.04it/s]
 
453
  78%|███████▊ | 153/196 [03:09<00:41, 1.05it/s]
 
454
  79%|███████▊ | 154/196 [03:10<00:41, 1.02it/s]
 
455
  79%|███████▉ | 155/196 [03:12<00:43, 1.05s/it]
 
456
  80%|███████▉ | 156/196 [03:13<00:49, 1.25s/it]
 
457
  80%|████████ | 157/196 [03:15<00:53, 1.37s/it]
 
458
  81%|████████ | 158/196 [03:16<00:46, 1.22s/it]
 
459
  81%|████████ | 159/196 [03:17<00:41, 1.11s/it]
 
460
  82%|████████▏ | 160/196 [03:18<00:37, 1.04s/it]
 
461
  82%|████████▏ | 161/196 [03:19<00:36, 1.04s/it]
 
462
  83%|████████▎ | 162/196 [03:20<00:35, 1.04s/it]
 
463
  83%|████████▎ | 163/196 [03:21<00:33, 1.01s/it]
 
464
  84%|████████▎ | 164/196 [03:22<00:31, 1.01it/s]
 
465
  84%|████████▍ | 165/196 [03:23<00:32, 1.04s/it]
 
466
  85%|████████▍ | 166/196 [03:24<00:30, 1.01s/it]
 
467
  85%|████████▌ | 167/196 [03:25<00:28, 1.02it/s]
 
468
  86%|████████▌ | 168/196 [03:25<00:26, 1.07it/s]
 
469
  86%|████████▌ | 169/196 [03:26<00:25, 1.05it/s]
 
470
  87%|████████▋ | 170/196 [03:28<00:27, 1.06s/it]
 
471
  87%|████████▋ | 171/196 [03:29<00:25, 1.02s/it]
 
472
  88%|████████▊ | 172/196 [03:30<00:25, 1.05s/it]
 
473
  88%|████████▊ | 173/196 [03:31<00:23, 1.04s/it]
 
474
  89%|████████▉ | 174/196 [03:32<00:24, 1.09s/it]
 
475
  89%|████████▉ | 175/196 [03:34<00:27, 1.32s/it]
 
476
  90%|████████▉ | 176/196 [03:38<00:41, 2.08s/it]
 
477
  90%|█████████ | 177/196 [03:41<00:46, 2.43s/it]
 
478
  91%|█████████ | 178/196 [03:44<00:47, 2.64s/it]
 
479
  91%|█████████▏| 179/196 [03:48<00:50, 2.96s/it]
 
480
  92%|█████████▏| 180/196 [03:49<00:37, 2.36s/it]
 
481
  92%|█████████▏| 181/196 [03:50<00:29, 2.00s/it]
 
482
  93%|█████████▎| 182/196 [03:51<00:23, 1.69s/it]
 
483
  93%|█████████▎| 183/196 [03:53<00:22, 1.75s/it]
 
484
  94%|█████████▍| 184/196 [03:54<00:18, 1.50s/it]
 
485
  94%|█████████▍| 185/196 [03:55<00:15, 1.43s/it]
 
486
  95%|█████████▍| 186/196 [03:57<00:15, 1.50s/it]
 
487
  95%|█████████▌| 187/196 [03:58<00:12, 1.38s/it]
 
488
  96%|█████████▌| 188/196 [03:59<00:09, 1.24s/it]
 
489
  96%|█████████▋| 189/196 [04:00<00:08, 1.19s/it]
 
490
  97%|█████████▋| 190/196 [04:01<00:06, 1.12s/it]
 
491
  97%|█████████▋| 191/196 [04:01<00:05, 1.03s/it]
 
492
  98%|█████████▊| 192/196 [04:03<00:04, 1.03s/it]
 
493
  98%|█████████▊| 193/196 [04:04<00:03, 1.04s/it]
 
494
  99%|█████████▉| 194/196 [04:05<00:02, 1.04s/it]
 
495
  99%|█████████▉| 195/196 [04:06<00:01, 1.01s/it]
 
496
 
 
497
 
498
  2%|▏ | 100/5000 [10:40<2:44:15, 2.01s/it]
 
 
499
  
500
  2%|▏ | 101/5000 [10:56<112:44:57, 82.85s/it]
501
 
502
  2%|▏ | 101/5000 [10:56<112:44:57, 82.85s/it]
503
  2%|▏ | 102/5000 [11:05<82:27:49, 60.61s/it]
504
 
505
  2%|▏ | 102/5000 [11:05<82:27:49, 60.61s/it]
506
  2%|▏ | 103/5000 [11:13<60:46:59, 44.68s/it]
507
 
508
  2%|▏ | 103/5000 [11:13<60:46:59, 44.68s/it]
509
  2%|▏ | 104/5000 [11:19<45:14:51, 33.27s/it]
510
 
511
  2%|▏ | 104/5000 [11:19<45:14:51, 33.27s/it]
512
  2%|▏ | 105/5000 [11:26<34:13:05, 25.17s/it]
513
 
514
  2%|▏ | 105/5000 [11:26<34:13:05, 25.17s/it]
515
  2%|▏ | 106/5000 [11:32<26:22:42, 19.40s/it]
516
 
517
  2%|▏ | 106/5000 [11:32<26:22:42, 19.40s/it]
518
  2%|▏ | 107/5000 [11:37<20:40:35, 15.21s/it]
519
 
520
  2%|▏ | 107/5000 [11:37<20:40:35, 15.21s/it]
521
  2%|▏ | 108/5000 [11:42<16:39:34, 12.26s/it]
522
 
523
  2%|▏ | 108/5000 [11:42<16:39:34, 12.26s/it]
524
  2%|▏ | 109/5000 [11:47<13:41:45, 10.08s/it]
525
 
526
  2%|▏ | 109/5000 [11:47<13:41:45, 10.08s/it]
527
  2%|▏ | 110/5000 [11:52<11:31:20, 8.48s/it]
528
 
529
  2%|▏ | 110/5000 [11:52<11:31:20, 8.48s/it]
530
  2%|▏ | 111/5000 [11:57<10:00:45, 7.37s/it]
531
 
532
  2%|▏ | 111/5000 [11:57<10:00:45, 7.37s/it]
533
  2%|▏ | 112/5000 [12:02<8:54:57, 6.57s/it]
534
 
535
  2%|▏ | 112/5000 [12:02<8:54:57, 6.57s/it]
536
  2%|▏ | 113/5000 [12:06<8:00:26, 5.90s/it]
537
 
538
  2%|▏ | 113/5000 [12:06<8:00:26, 5.90s/it]
539
  2%|▏ | 114/5000 [12:10<7:16:59, 5.37s/it]
540
 
541
  2%|▏ | 114/5000 [12:10<7:16:59, 5.37s/it]
542
  2%|▏ | 115/5000 [12:14<6:46:31, 4.99s/it]
543
 
544
  2%|▏ | 115/5000 [12:14<6:46:31, 4.99s/it]
545
  2%|▏ | 116/5000 [12:18<6:25:16, 4.73s/it]
546
 
547
  2%|▏ | 116/5000 [12:18<6:25:16, 4.73s/it]
548
  2%|▏ | 117/5000 [12:22<6:07:22, 4.51s/it]
549
 
550
  2%|▏ | 117/5000 [12:22<6:07:22, 4.51s/it]
551
  2%|▏ | 118/5000 [12:26<5:43:11, 4.22s/it]
552
 
553
  2%|▏ | 118/5000 [12:26<5:43:11, 4.22s/it]
554
  2%|▏ | 119/5000 [12:29<5:25:03, 4.00s/it]
555
 
556
  2%|▏ | 119/5000 [12:29<5:25:03, 4.00s/it]
557
  2%|▏ | 120/5000 [12:33<5:12:22, 3.84s/it]
558
 
559
  2%|▏ | 120/5000 [12:33<5:12:22, 3.84s/it]
560
  2%|▏ | 121/5000 [12:36<5:03:16, 3.73s/it]
561
 
562
  2%|▏ | 121/5000 [12:36<5:03:16, 3.73s/it]
563
  2%|▏ | 122/5000 [12:40<4:56:44, 3.65s/it]
564
 
565
  2%|▏ | 122/5000 [12:40<4:56:44, 3.65s/it]
566
  2%|▏ | 123/5000 [12:43<4:52:45, 3.60s/it]
567
 
568
  2%|▏ | 123/5000 [12:43<4:52:45, 3.60s/it]
569
  2%|▏ | 124/5000 [12:47<4:47:27, 3.54s/it]
570
 
571
  2%|▏ | 124/5000 [12:47<4:47:27, 3.54s/it]
572
  2%|▎ | 125/5000 [12:49<4:31:41, 3.34s/it]
573
 
574
  2%|▎ | 125/5000 [12:49<4:31:41, 3.34s/it]
575
  3%|▎ | 126/5000 [12:52<4:20:32, 3.21s/it]
576
 
577
  3%|▎ | 126/5000 [12:52<4:20:32, 3.21s/it]
578
  3%|▎ | 127/5000 [12:55<4:12:56, 3.11s/it]
579
 
580
  3%|▎ | 127/5000 [12:55<4:12:56, 3.11s/it]
581
  3%|▎ | 128/5000 [12:58<4:07:34, 3.05s/it]
582
 
583
  3%|▎ | 128/5000 [12:58<4:07:34, 3.05s/it]
584
  3%|▎ | 129/5000 [13:01<4:03:57, 3.01s/it]
585
 
586
  3%|▎ | 129/5000 [13:01<4:03:57, 3.01s/it]
587
  3%|▎ | 130/5000 [13:04<4:01:39, 2.98s/it]
588
 
589
  3%|▎ | 130/5000 [13:04<4:01:39, 2.98s/it]
590
  3%|▎ | 131/5000 [13:07<3:59:31, 2.95s/it]
591
 
592
  3%|▎ | 131/5000 [13:07<3:59:31, 2.95s/it]
593
  3%|▎ | 132/5000 [13:10<3:54:56, 2.90s/it]
594
 
595
  3%|▎ | 132/5000 [13:10<3:54:56, 2.90s/it]
596
  3%|▎ | 133/5000 [13:12<3:41:04, 2.73s/it]
597
 
598
  3%|▎ | 133/5000 [13:12<3:41:04, 2.73s/it]
599
  3%|▎ | 134/5000 [13:14<3:31:40, 2.61s/it]
600
 
601
  3%|▎ | 134/5000 [13:14<3:31:40, 2.61s/it]
602
  3%|▎ | 135/5000 [13:17<3:25:01, 2.53s/it]
603
 
604
  3%|▎ | 135/5000 [13:17<3:25:01, 2.53s/it]
605
  3%|▎ | 136/5000 [13:19<3:20:37, 2.47s/it]
606
 
607
  3%|▎ | 136/5000 [13:19<3:20:37, 2.47s/it]
608
  3%|▎ | 137/5000 [13:21<3:17:14, 2.43s/it]
609
 
610
  3%|▎ | 137/5000 [13:21<3:17:14, 2.43s/it]
611
  3%|▎ | 138/5000 [13:24<3:15:02, 2.41s/it]
612
 
613
  3%|▎ | 138/5000 [13:24<3:15:02, 2.41s/it]
614
  3%|▎ | 139/5000 [13:26<3:13:07, 2.38s/it]
615
 
616
  3%|▎ | 139/5000 [13:26<3:13:07, 2.38s/it]
617
  3%|▎ | 140/5000 [13:28<3:08:53, 2.33s/it]
618
 
619
  3%|▎ | 140/5000 [13:28<3:08:53, 2.33s/it]
620
  3%|▎ | 141/5000 [13:30<2:54:40, 2.16s/it]
621
 
622
  3%|▎ | 141/5000 [13:30<2:54:40, 2.16s/it]
623
  3%|▎ | 142/5000 [13:32<2:44:45, 2.03s/it]
624
 
625
  3%|▎ | 142/5000 [13:32<2:44:45, 2.03s/it]
626
  3%|▎ | 143/5000 [13:33<2:37:59, 1.95s/it]
627
 
628
  3%|▎ | 143/5000 [13:33<2:37:59, 1.95s/it]
629
  3%|▎ | 144/5000 [13:35<2:33:23, 1.90s/it]
630
 
631
  3%|▎ | 144/5000 [13:35<2:33:23, 1.90s/it]
632
  3%|▎ | 145/5000 [13:37<2:29:54, 1.85s/it]
633
 
634
  3%|▎ | 145/5000 [13:37<2:29:54, 1.85s/it]
635
  3%|▎ | 146/5000 [13:39<2:26:05, 1.81s/it]
636
 
637
  3%|▎ | 146/5000 [13:39<2:26:05, 1.81s/it]
638
  3%|▎ | 147/5000 [13:40<2:16:10, 1.68s/it]
639
 
640
  3%|▎ | 147/5000 [13:40<2:16:10, 1.68s/it]
641
  3%|▎ | 148/5000 [13:41<2:04:16, 1.54s/it]
642
 
643
  3%|▎ | 148/5000 [13:41<2:04:16, 1.54s/it]
644
  3%|▎ | 149/5000 [13:42<1:55:55, 1.43s/it]
645
 
646
  3%|▎ | 149/5000 [13:42<1:55:55, 1.43s/it]
647
  3%|▎ | 150/5000 [13:46<2:35:29, 1.92s/it]
648
 
649
  3%|▎ | 150/5000 [13:46<2:35:29, 1.92s/it]
650
  3%|▎ | 151/5000 [14:03<8:56:47, 6.64s/it]
651
 
652
  3%|▎ | 151/5000 [14:03<8:56:47, 6.64s/it]
653
  3%|▎ | 152/5000 [14:13<10:06:23, 7.50s/it]
654
 
655
  3%|▎ | 152/5000 [14:13<10:06:23, 7.50s/it]
656
  3%|▎ | 153/5000 [14:21<10:23:38, 7.72s/it]
657
 
658
  3%|▎ | 153/5000 [14:21<10:23:38, 7.72s/it]
659
  3%|▎ | 154/5000 [14:28<10:11:29, 7.57s/it]
660
 
661
  3%|▎ | 154/5000 [14:28<10:11:29, 7.57s/it]
662
  3%|▎ | 155/5000 [14:35<9:52:15, 7.33s/it]
663
 
664
  3%|▎ | 155/5000 [14:35<9:52:15, 7.33s/it]
665
  3%|▎ | 156/5000 [14:41<9:25:01, 7.00s/it]
666
 
667
  3%|▎ | 156/5000 [14:41<9:25:01, 7.00s/it]
668
  3%|▎ | 157/5000 [14:47<8:57:19, 6.66s/it]
669
 
670
  3%|▎ | 157/5000 [14:47<8:57:19, 6.66s/it]
671
  3%|▎ | 158/5000 [14:52<8:26:30, 6.28s/it]
672
 
673
  3%|▎ | 158/5000 [14:52<8:26:30, 6.28s/it]
674
  3%|▎ | 159/5000 [14:58<8:04:16, 6.00s/it]
675
 
676
  3%|▎ | 159/5000 [14:58<8:04:16, 6.00s/it]
677
  3%|▎ | 160/5000 [15:03<7:39:05, 5.69s/it]
678
 
679
  3%|▎ | 160/5000 [15:03<7:39:05, 5.69s/it]
680
  3%|▎ | 161/5000 [15:07<7:15:00, 5.39s/it]
681
 
682
  3%|▎ | 161/5000 [15:07<7:15:00, 5.39s/it]
683
  3%|▎ | 162/5000 [15:12<6:58:22, 5.19s/it]
684
 
685
  3%|▎ | 162/5000 [15:12<6:58:22, 5.19s/it]
686
  3%|▎ | 163/5000 [15:16<6:36:59, 4.92s/it]
687
 
688
  3%|▎ | 163/5000 [15:16<6:36:59, 4.92s/it]
689
  3%|▎ | 164/5000 [15:20<6:16:31, 4.67s/it]
690
 
691
  3%|▎ | 164/5000 [15:21<6:16:31, 4.67s/it]
692
  3%|▎ | 165/5000 [15:25<6:02:06, 4.49s/it]
693
 
694
  3%|▎ | 165/5000 [15:25<6:02:06, 4.49s/it]
695
  3%|▎ | 166/5000 [15:29<5:51:45, 4.37s/it]
696
 
697
  3%|▎ | 166/5000 [15:29<5:51:45, 4.37s/it]
698
  3%|▎ | 167/5000 [15:33<5:44:41, 4.28s/it]
699
 
700
  3%|▎ | 167/5000 [15:33<5:44:41, 4.28s/it]
701
  3%|▎ | 168/5000 [15:37<5:37:26, 4.19s/it]
702
 
703
  3%|▎ | 168/5000 [15:37<5:37:26, 4.19s/it]
704
  3%|▎ | 169/5000 [15:40<5:19:41, 3.97s/it]
705
 
706
  3%|▎ | 169/5000 [15:40<5:19:41, 3.97s/it]
707
  3%|▎ | 170/5000 [15:44<5:07:48, 3.82s/it]
708
 
709
  3%|▎ | 170/5000 [15:44<5:07:48, 3.82s/it]
710
  3%|▎ | 171/5000 [15:47<4:58:58, 3.71s/it]
711
 
712
  3%|▎ | 171/5000 [15:47<4:58:58, 3.71s/it]
713
  3%|▎ | 172/5000 [15:51<4:52:54, 3.64s/it]
714
 
715
  3%|▎ | 172/5000 [15:51<4:52:54, 3.64s/it]
716
  3%|▎ | 173/5000 [15:54<4:48:25, 3.59s/it]
717
 
718
  3%|▎ | 173/5000 [15:54<4:48:25, 3.59s/it]
719
  3%|▎ | 174/5000 [15:57<4:44:43, 3.54s/it]
720
 
721
  3%|▎ | 174/5000 [15:57<4:44:43, 3.54s/it]
722
  4%|▎ | 175/5000 [16:01<4:33:54, 3.41s/it]
723
 
724
  4%|▎ | 175/5000 [16:01<4:33:54, 3.41s/it]
725
  4%|▎ | 176/5000 [16:03<4:21:31, 3.25s/it]
726
 
727
  4%|▎ | 176/5000 [16:03<4:21:31, 3.25s/it]
728
  4%|▎ | 177/5000 [16:06<4:12:46, 3.14s/it]
729
 
730
  4%|▎ | 177/5000 [16:06<4:12:46, 3.14s/it]
731
  4%|▎ | 178/5000 [16:09<4:06:53, 3.07s/it]
732
 
733
  4%|▎ | 178/5000 [16:09<4:06:53, 3.07s/it]
734
  4%|▎ | 179/5000 [16:12<4:02:20, 3.02s/it]
735
 
736
  4%|▎ | 179/5000 [16:12<4:02:20, 3.02s/it]
737
  4%|▎ | 180/5000 [16:15<3:59:14, 2.98s/it]
738
 
739
  4%|▎ | 180/5000 [16:15<3:59:14, 2.98s/it]
740
  4%|▎ | 181/5000 [16:18<3:57:04, 2.95s/it]
741
 
742
  4%|▎ | 181/5000 [16:18<3:57:04, 2.95s/it]
743
  4%|▎ | 182/5000 [16:21<3:52:28, 2.90s/it]
744
 
745
  4%|▎ | 182/5000 [16:21<3:52:28, 2.90s/it]
746
  4%|▎ | 183/5000 [16:23<3:39:25, 2.73s/it]
747
 
748
  4%|▎ | 183/5000 [16:23<3:39:25, 2.73s/it]
749
  4%|▎ | 184/5000 [16:25<3:29:44, 2.61s/it]
750
 
751
  4%|▎ | 184/5000 [16:25<3:29:44, 2.61s/it]
752
  4%|▎ | 185/5000 [16:28<3:23:06, 2.53s/it]
753
 
754
  4%|▎ | 185/5000 [16:28<3:23:06, 2.53s/it]
755
  4%|▎ | 186/5000 [16:30<3:18:13, 2.47s/it]
756
 
757
  4%|▎ | 186/5000 [16:30<3:18:13, 2.47s/it]
758
  4%|▎ | 187/5000 [16:32<3:14:56, 2.43s/it]
759
 
760
  4%|▎ | 187/5000 [16:32<3:14:56, 2.43s/it]
761
  4%|▍ | 188/5000 [16:35<3:12:59, 2.41s/it]
762
 
763
  4%|▍ | 188/5000 [16:35<3:12:59, 2.41s/it]
764
  4%|▍ | 189/5000 [16:37<3:11:18, 2.39s/it]
765
 
766
  4%|▍ | 189/5000 [16:37<3:11:18, 2.39s/it]
767
  4%|▍ | 190/5000 [16:39<3:01:04, 2.26s/it]
768
 
769
  4%|▍ | 190/5000 [16:39<3:01:04, 2.26s/it]
770
  4%|▍ | 191/5000 [16:41<2:48:45, 2.11s/it]
771
 
772
  4%|▍ | 191/5000 [16:41<2:48:45, 2.11s/it]
773
  4%|▍ | 192/5000 [16:43<2:40:21, 2.00s/it]
774
 
775
  4%|▍ | 192/5000 [16:43<2:40:21, 2.00s/it]
776
  4%|▍ | 193/5000 [16:44<2:34:24, 1.93s/it]
777
 
778
  4%|▍ | 193/5000 [16:44<2:34:24, 1.93s/it]
779
  4%|▍ | 194/5000 [16:46<2:30:21, 1.88s/it]
780
 
781
  4%|▍ | 194/5000 [16:46<2:30:21, 1.88s/it]
782
  4%|▍ | 195/5000 [16:48<2:27:19, 1.84s/it]
783
 
784
  4%|▍ | 195/5000 [16:48<2:27:19, 1.84s/it]
785
  4%|▍ | 196/5000 [16:50<2:24:45, 1.81s/it]
786
 
787
  4%|▍ | 196/5000 [16:50<2:24:45, 1.81s/it]
788
  4%|▍ | 197/5000 [16:51<2:14:45, 1.68s/it]
789
 
790
  4%|▍ | 197/5000 [16:51<2:14:45, 1.68s/it]
791
  4%|▍ | 198/5000 [16:52<2:03:05, 1.54s/it]
792
 
793
  4%|▍ | 198/5000 [16:52<2:03:05, 1.54s/it]
794
  4%|▍ | 199/5000 [16:53<1:54:58, 1.44s/it]
795
 
796
  4%|▍ | 199/5000 [16:53<1:54:58, 1.44s/it]
797
  4%|▍ | 200/5000 [16:56<2:35:34, 1.94s/it]
798
 
799
  4%|▍ | 200/5000 [16:56<2:35:34, 1.94s/it]predictions output file at: /scratch/elec/puhe/p/palp3/MUCS/mucs_language_segregated_data/trainwithtagsv2_warmup500_s300_shuff100/predictionswtagsv2_indicw2v_ad0_3_hd_02_featd_0_2_lr6e-4_warmup500_s300_shuff100.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  0%| | 0/196 [00:00<?, ?it/s]
 
801
  1%| | 2/196 [00:00<01:31, 2.11it/s]
 
802
  2%|▏ | 3/196 [00:02<02:30, 1.28it/s]
 
803
  2%|▏ | 4/196 [00:03<03:07, 1.02it/s]
 
804
  3%|▎ | 5/196 [00:04<03:25, 1.07s/it]
 
805
  3%|▎ | 6/196 [00:06<03:55, 1.24s/it]
 
806
  4%|▎ | 7/196 [00:07<04:13, 1.34s/it]
 
807
  4%|▍ | 8/196 [00:09<04:12, 1.34s/it]
 
808
  5%|▍ | 9/196 [00:11<05:32, 1.78s/it]
 
809
  5%|▌ | 10/196 [00:14<06:43, 2.17s/it]
 
810
  6%|▌ | 11/196 [00:18<07:48, 2.53s/it]
 
811
  6%|▌ | 12/196 [00:21<08:27, 2.76s/it]
 
812
  7%|▋ | 13/196 [00:23<07:20, 2.41s/it]
 
813
  7%|▋ | 14/196 [00:24<06:05, 2.01s/it]
 
814
  8%|▊ | 15/196 [00:25<05:03, 1.68s/it]
 
815
  8%|▊ | 16/196 [00:26<04:41, 1.57s/it]
 
816
  9%|▊ | 17/196 [00:27<04:27, 1.49s/it]
 
817
  9%|▉ | 18/196 [00:29<04:53, 1.65s/it]
 
818
  10%|▉ | 19/196 [00:33<06:17, 2.13s/it]
 
819
  10%|█ | 20/196 [00:35<06:49, 2.33s/it]
 
820
  11%|█ | 21/196 [00:38<06:58, 2.39s/it]
 
821
  11%|█ | 22/196 [00:40<06:43, 2.32s/it]
 
822
  12%|█▏ | 23/196 [00:42<06:15, 2.17s/it]
 
823
  12%|█▏ | 24/196 [00:43<04:57, 1.73s/it]
 
824
  13%|█▎ | 25/196 [00:43<04:09, 1.46s/it]
 
825
  13%|█▎ | 26/196 [00:44<03:36, 1.28s/it]
 
826
  14%|█▍ | 27/196 [00:45<03:18, 1.17s/it]
 
827
  14%|█▍ | 28/196 [00:46<03:08, 1.12s/it]
 
828
  15%|█▍ | 29/196 [00:47<03:07, 1.12s/it]
 
829
  15%|█▌ | 30/196 [00:48<03:07, 1.13s/it]
 
830
  16%|█▌ | 31/196 [00:49<02:51, 1.04s/it]
 
831
  16%|█▋ | 32/196 [00:50<02:42, 1.01it/s]
 
832
  17%|█▋ | 33/196 [00:52<03:09, 1.16s/it]
 
833
  17%|█▋ | 34/196 [00:54<04:13, 1.56s/it]
 
834
  18%|█▊ | 35/196 [00:56<04:16, 1.59s/it]
 
835
  18%|█▊ | 36/196 [00:58<04:59, 1.87s/it]
 
836
  19%|█▉ | 37/196 [01:00<04:59, 1.88s/it]
 
837
  19%|█▉ | 38/196 [01:02<04:30, 1.71s/it]
 
838
  20%|█▉ | 39/196 [01:03<04:10, 1.59s/it]
 
839
  20%|██ | 40/196 [01:04<03:48, 1.46s/it]
 
840
  21%|██ | 41/196 [01:05<03:27, 1.34s/it]
 
841
  21%|██▏ | 42/196 [01:06<03:10, 1.24s/it]
 
842
  22%|██▏ | 43/196 [01:07<03:06, 1.22s/it]
 
843
  22%|██▏ | 44/196 [01:09<03:01, 1.20s/it]
 
844
  23%|██▎ | 45/196 [01:09<02:49, 1.12s/it]
 
845
  23%|██▎ | 46/196 [01:10<02:36, 1.05s/it]
 
846
  24%|██▍ | 47/196 [01:11<02:36, 1.05s/it]
 
847
  24%|██▍ | 48/196 [01:12<02:27, 1.00it/s]
 
848
  25%|██▌ | 49/196 [01:13<02:23, 1.02it/s]
 
849
  26%|██▌ | 50/196 [01:14<02:21, 1.03it/s]
 
850
  26%|██▌ | 51/196 [01:15<02:16, 1.06it/s]
 
851
  27%|██▋ | 52/196 [01:16<02:14, 1.07it/s]
 
852
  27%|██▋ | 53/196 [01:17<02:26, 1.02s/it]
 
853
  28%|██▊ | 54/196 [01:18<02:22, 1.00s/it]
 
854
  28%|██▊ | 55/196 [01:20<02:37, 1.12s/it]
 
855
  29%|██▊ | 56/196 [01:21<02:47, 1.20s/it]
 
856
  29%|██▉ | 57/196 [01:22<03:00, 1.30s/it]
 
857
  30%|██▉ | 58/196 [01:24<03:07, 1.36s/it]
 
858
  30%|███ | 59/196 [01:25<03:06, 1.36s/it]
 
859
  31%|███ | 60/196 [01:26<02:50, 1.25s/it]
 
860
  31%|███ | 61/196 [01:27<02:33, 1.13s/it]
 
861
  32%|███▏ | 62/196 [01:28<02:29, 1.11s/it]
 
862
  32%|███▏ | 63/196 [01:29<02:30, 1.13s/it]
 
863
  33%|███▎ | 64/196 [01:31<02:29, 1.14s/it]
 
864
  33%|███▎ | 65/196 [01:32<02:25, 1.11s/it]
 
865
  34%|███▎ | 66/196 [01:33<02:30, 1.16s/it]
 
866
  34%|███▍ | 67/196 [01:34<02:34, 1.20s/it]
 
867
  35%|███▍ | 68/196 [01:36<02:59, 1.40s/it]
 
868
  35%|███▌ | 69/196 [01:38<03:06, 1.47s/it]
 
869
  36%|███▌ | 70/196 [01:39<02:51, 1.36s/it]
 
870
  36%|███▌ | 71/196 [01:40<02:39, 1.27s/it]
 
871
  37%|███▋ | 72/196 [01:41<02:24, 1.17s/it]
 
872
  37%|███▋ | 73/196 [01:42<02:11, 1.07s/it]
 
873
  38%|███▊ | 74/196 [01:42<02:00, 1.01it/s]
 
874
  38%|███▊ | 75/196 [01:43<01:56, 1.04it/s]
 
875
  39%|███▉ | 76/196 [01:44<01:52, 1.06it/s]
 
876
  39%|███▉ | 77/196 [01:45<01:57, 1.01it/s]
 
877
  40%|███▉ | 78/196 [01:47<02:08, 1.09s/it]
 
878
  40%|████ | 79/196 [01:48<02:02, 1.04s/it]
 
879
  41%|████ | 80/196 [01:49<02:09, 1.11s/it]
 
880
  41%|████▏ | 81/196 [01:50<02:13, 1.16s/it]
 
881
  42%|████▏ | 82/196 [01:51<02:08, 1.12s/it]
 
882
  42%|████▏ | 83/196 [01:52<02:10, 1.15s/it]
 
883
  43%|████▎ | 84/196 [01:54<02:12, 1.19s/it]
 
884
  43%|████▎ | 85/196 [01:55<02:11, 1.18s/it]
 
885
  44%|████▍ | 86/196 [01:56<02:13, 1.21s/it]
 
886
  44%|████▍ | 87/196 [01:57<02:09, 1.19s/it]
 
887
  45%|████▍ | 88/196 [01:59<02:13, 1.24s/it]
 
888
  45%|████▌ | 89/196 [02:00<02:17, 1.29s/it]
 
889
  46%|████▌ | 90/196 [02:01<02:12, 1.25s/it]
 
890
  46%|████▋ | 91/196 [02:02<02:07, 1.22s/it]
 
891
  47%|████▋ | 92/196 [02:03<02:02, 1.18s/it]
 
892
  47%|████▋ | 93/196 [02:05<02:08, 1.25s/it]
 
893
  48%|████▊ | 94/196 [02:06<02:10, 1.28s/it]
 
894
  48%|████▊ | 95/196 [02:07<02:03, 1.22s/it]
 
895
  49%|████▉ | 96/196 [02:09<02:09, 1.29s/it]
 
896
  49%|████▉ | 97/196 [02:10<02:05, 1.27s/it]
 
897
  50%|█████ | 98/196 [02:11<02:09, 1.32s/it]
 
898
  51%|█████ | 99/196 [02:12<01:57, 1.21s/it]
 
899
  51%|█████ | 100/196 [02:13<01:43, 1.07s/it]
 
900
  52%|█████▏ | 101/196 [02:14<01:34, 1.00it/s]
 
901
  52%|█████▏ | 102/196 [02:15<01:39, 1.06s/it]
 
902
  53%|█████▎ | 103/196 [02:16<01:45, 1.14s/it]
 
903
  53%|█████▎ | 104/196 [02:18<02:02, 1.34s/it]
 
904
  54%|█████▎ | 105/196 [02:20<02:05, 1.38s/it]
 
905
  54%|█████▍ | 106/196 [02:21<02:05, 1.40s/it]
 
906
  55%|█████▍ | 107/196 [02:22<01:56, 1.31s/it]
 
907
  55%|█████▌ | 108/196 [02:23<01:42, 1.16s/it]
 
908
  56%|█████▌ | 109/196 [02:24<01:33, 1.08s/it]
 
909
  56%|█████▌ | 110/196 [02:25<01:28, 1.03s/it]
 
910
  57%|█████▋ | 111/196 [02:26<01:27, 1.03s/it]
 
911
  57%|█████▋ | 112/196 [02:27<01:30, 1.07s/it]
 
912
  58%|█████▊ | 113/196 [02:28<01:32, 1.11s/it]
 
913
  58%|█████▊ | 114/196 [02:29<01:25, 1.04s/it]
 
914
  59%|█████▊ | 115/196 [02:30<01:21, 1.01s/it]
 
915
  59%|█████▉ | 116/196 [02:31<01:18, 1.02it/s]
 
916
  60%|█████▉ | 117/196 [02:32<01:14, 1.06it/s]
 
917
  60%|██████ | 118/196 [02:32<01:06, 1.17it/s]
 
918
  61%|██████ | 119/196 [02:33<01:08, 1.13it/s]
 
919
  61%|██████ | 120/196 [02:34<01:11, 1.06it/s]
 
920
  62%|██████▏ | 121/196 [02:36<01:13, 1.02it/s]
 
921
  62%|██████▏ | 122/196 [02:37<01:14, 1.01s/it]
 
922
  63%|██████▎ | 123/196 [02:38<01:13, 1.00s/it]
 
923
  63%|██████▎ | 124/196 [02:39<01:10, 1.02it/s]
 
924
  64%|██████▍ | 125/196 [02:40<01:10, 1.01it/s]
 
925
  64%|██████▍ | 126/196 [02:41<01:18, 1.12s/it]
 
926
  65%|██████▍ | 127/196 [02:42<01:17, 1.12s/it]
 
927
  65%|██████▌ | 128/196 [02:43<01:12, 1.06s/it]
 
928
  66%|██████▌ | 129/196 [02:44<01:10, 1.06s/it]
 
929
  66%|██████▋ | 130/196 [02:45<01:08, 1.04s/it]
 
930
  67%|██████▋ | 131/196 [02:46<01:08, 1.05s/it]
 
931
  67%|██████▋ | 132/196 [02:47<01:03, 1.02it/s]
 
932
  68%|██████▊ | 133/196 [02:48<01:00, 1.03it/s]
 
933
  68%|██████▊ | 134/196 [02:49<01:05, 1.06s/it]
 
934
  69%|██████▉ | 135/196 [02:50<01:03, 1.03s/it]
 
935
  69%|██████▉ | 136/196 [02:51<00:59, 1.00it/s]
 
936
  70%|██████▉ | 137/196 [02:52<00:58, 1.01it/s]
 
937
  70%|███████ | 138/196 [02:53<00:58, 1.00s/it]
 
938
  71%|███████ | 139/196 [02:54<00:57, 1.00s/it]
 
939
  71%|███████▏ | 140/196 [02:55<00:55, 1.00it/s]
 
940
  72%|███████▏ | 141/196 [02:56<00:55, 1.00s/it]
 
941
  72%|███████▏ | 142/196 [02:57<00:55, 1.03s/it]
 
942
  73%|███████▎ | 143/196 [02:59<00:59, 1.13s/it]
 
943
  73%|███████▎ | 144/196 [02:59<00:55, 1.07s/it]
 
944
  74%|███████▍ | 145/196 [03:00<00:49, 1.03it/s]
 
945
  74%|███████▍ | 146/196 [03:01<00:45, 1.10it/s]
 
946
  75%|███████▌ | 147/196 [03:02<00:44, 1.10it/s]
 
947
  76%|███████▌ | 148/196 [03:03<00:43, 1.10it/s]
 
948
  76%|███████▌ | 149/196 [03:04<00:40, 1.15it/s]
 
949
  77%|███████▋ | 150/196 [03:05<00:42, 1.10it/s]
 
950
  77%|███████▋ | 151/196 [03:06<00:43, 1.03it/s]
 
951
  78%|███████▊ | 152/196 [03:07<00:42, 1.04it/s]
 
952
  78%|███████▊ | 153/196 [03:08<00:41, 1.05it/s]
 
953
  79%|███████▊ | 154/196 [03:09<00:41, 1.02it/s]
 
954
  79%|███████▉ | 155/196 [03:10<00:42, 1.05s/it]
 
955
  80%|███████▉ | 156/196 [03:12<00:49, 1.25s/it]
 
956
  80%|████████ | 157/196 [03:13<00:53, 1.36s/it]
 
957
  81%|████████ | 158/196 [03:14<00:46, 1.22s/it]
 
958
  81%|████████ | 159/196 [03:15<00:40, 1.10s/it]
 
959
  82%|████████▏ | 160/196 [03:16<00:37, 1.04s/it]
 
960
  82%|████████▏ | 161/196 [03:17<00:36, 1.04s/it]
 
961
  83%|████████▎ | 162/196 [03:18<00:35, 1.03s/it]
 
962
  83%|████████▎ | 163/196 [03:19<00:33, 1.01s/it]
 
963
  84%|████████▎ | 164/196 [03:20<00:31, 1.01it/s]
 
964
  84%|████████▍ | 165/196 [03:21<00:31, 1.03s/it]
 
965
  85%|████████▍ | 166/196 [03:22<00:30, 1.01s/it]
 
966
  85%|████████▌ | 167/196 [03:23<00:28, 1.03it/s]
 
967
  86%|████████▌ | 168/196 [03:23<00:25, 1.08it/s]
 
968
  86%|████████▌ | 169/196 [03:24<00:25, 1.06it/s]
 
969
  87%|████████▋ | 170/196 [03:26<00:27, 1.05s/it]
 
970
  87%|████████▋ | 171/196 [03:27<00:25, 1.01s/it]
 
971
  88%|████████▊ | 172/196 [03:28<00:24, 1.04s/it]
 
972
  88%|████████▊ | 173/196 [03:29<00:23, 1.03s/it]
 
973
  89%|████████▉ | 174/196 [03:30<00:24, 1.09s/it]
 
974
  89%|████████▉ | 175/196 [03:32<00:27, 1.31s/it]
 
975
  90%|████████▉ | 176/196 [03:36<00:41, 2.07s/it]
 
976
  90%|█████████ | 177/196 [03:39<00:46, 2.42s/it]
 
977
  91%|█████████ | 178/196 [03:42<00:47, 2.63s/it]
 
978
  91%|█████████▏| 179/196 [03:46<00:49, 2.89s/it]
 
979
  92%|█████████▏| 180/196 [03:47<00:37, 2.32s/it]
 
980
  92%|█████████▏| 181/196 [03:48<00:29, 1.96s/it]
 
981
  93%|█████████▎| 182/196 [03:49<00:23, 1.66s/it]
 
982
  93%|█████████▎| 183/196 [03:51<00:22, 1.73s/it]
 
983
  94%|█████████▍| 184/196 [03:51<00:17, 1.48s/it]
 
984
  94%|█████████▍| 185/196 [03:53<00:15, 1.42s/it]
 
985
  95%|█████████▍| 186/196 [03:54<00:14, 1.49s/it]
 
986
  95%|█████████▌| 187/196 [03:55<00:12, 1.36s/it]
 
987
  96%|█████████▌| 188/196 [03:56<00:09, 1.22s/it]
 
988
  96%|█████████▋| 189/196 [03:57<00:08, 1.18s/it]
 
989
  97%|█████████▋| 190/196 [03:58<00:06, 1.11s/it]
 
990
  97%|█████████▋| 191/196 [03:59<00:05, 1.03s/it]
 
991
  98%|█████████▊| 192/196 [04:00<00:04, 1.03s/it]
 
992
  98%|█████████▊| 193/196 [04:01<00:03, 1.03s/it]
 
993
  99%|█████████▉| 194/196 [04:02<00:02, 1.04s/it]
 
994
  99%|█████████▉| 195/196 [04:03<00:01, 1.01s/it]
 
995
 
 
996
 
997
  4%|▍ | 200/5000 [21:10<2:35:34, 1.94s/it]
 
 
998
  
999
  4%|▍ | 201/5000 [21:26<109:42:43, 82.30s/it]
1000
 
1001
  4%|▍ | 201/5000 [21:26<109:42:43, 82.30s/it]
1002
  4%|▍ | 202/5000 [21:35<80:16:32, 60.23s/it]
1003
 
1004
  4%|▍ | 202/5000 [21:35<80:16:32, 60.23s/it]
1005
  4%|▍ | 203/5000 [21:43<59:13:26, 44.45s/it]
1006
 
1007
  4%|▍ | 203/5000 [21:43<59:13:26, 44.45s/it]
1008
  4%|▍ | 204/5000 [21:50<44:17:23, 33.25s/it]
1009
 
1010
  4%|▍ | 204/5000 [21:50<44:17:23, 33.25s/it]
1011
  4%|▍ | 205/5000 [21:56<33:35:12, 25.22s/it]
1012
 
1013
  4%|▍ | 205/5000 [21:56<33:35:12, 25.22s/it]
1014
  4%|▍ | 206/5000 [22:02<25:54:24, 19.45s/it]
1015
 
1016
  4%|▍ | 206/5000 [22:02<25:54:24, 19.45s/it]
1017
  4%|▍ | 207/5000 [22:08<20:29:24, 15.39s/it]
1018
 
1019
  4%|▍ | 207/5000 [22:08<20:29:24, 15.39s/it]
1020
  4%|▍ | 208/5000 [22:13<16:29:27, 12.39s/it]
1021
 
1022
  4%|▍ | 208/5000 [22:14<16:29:27, 12.39s/it]
1023
  4%|▍ | 209/5000 [22:19<13:41:28, 10.29s/it]
1024
 
1025
  4%|▍ | 209/5000 [22:19<13:41:28, 10.29s/it]
1026
  4%|▍ | 210/5000 [22:24<11:33:46, 8.69s/it]
1027
 
1028
  4%|▍ | 210/5000 [22:24<11:33:46, 8.69s/it]
1029
  4%|▍ | 211/5000 [22:29<9:58:05, 7.49s/it]
1030
 
1031
  4%|▍ | 211/5000 [22:29<9:58:05, 7.49s/it]
1032
  4%|▍ | 212/5000 [22:33<8:50:15, 6.64s/it]
1033
 
1034
  4%|▍ | 212/5000 [22:33<8:50:15, 6.64s/it]
1035
  4%|▍ | 213/5000 [22:38<7:59:27, 6.01s/it]
1036
 
1037
  4%|▍ | 213/5000 [22:38<7:59:27, 6.01s/it]
1038
  4%|▍ | 214/5000 [22:42<7:14:17, 5.44s/it]
1039
 
1040
  4%|▍ | 214/5000 [22:42<7:14:17, 5.44s/it]
1041
  4%|▍ | 215/5000 [22:46<6:42:02, 5.04s/it]
1042
 
1043
  4%|▍ | 215/5000 [22:46<6:42:02, 5.04s/it]
1044
  4%|▍ | 216/5000 [22:50<6:19:32, 4.76s/it]
1045
 
1046
  4%|▍ | 216/5000 [22:50<6:19:32, 4.76s/it]
1047
  4%|▍ | 217/5000 [22:54<6:03:14, 4.56s/it]
1048
 
1049
  4%|▍ | 217/5000 [22:54<6:03:14, 4.56s/it]
1050
  4%|▍ | 218/5000 [22:58<5:43:18, 4.31s/it]
1051
 
1052
  4%|▍ | 218/5000 [22:58<5:43:18, 4.31s/it]
1053
  4%|▍ | 219/5000 [23:01<5:24:54, 4.08s/it]
1054
 
1055
  4%|▍ | 219/5000 [23:01<5:24:54, 4.08s/it]
1056
  4%|▍ | 220/5000 [23:05<5:10:19, 3.90s/it]
1057
 
1058
  4%|▍ | 220/5000 [23:05<5:10:19, 3.90s/it]
1059
  4%|▍ | 221/5000 [23:08<5:00:17, 3.77s/it]
1060
 
1061
  4%|▍ | 221/5000 [23:08<5:00:17, 3.77s/it]
1062
  4%|▍ | 222/5000 [23:12<4:53:21, 3.68s/it]
1063
 
1064
  4%|▍ | 222/5000 [23:12<4:53:21, 3.68s/it]
1065
  4%|▍ | 223/5000 [23:15<4:48:44, 3.63s/it]
1066
 
1067
  4%|▍ | 223/5000 [23:15<4:48:44, 3.63s/it]
1068
  4%|▍ | 224/5000 [23:19<4:45:03, 3.58s/it]
1069
 
1070
  4%|▍ | 224/5000 [23:19<4:45:03, 3.58s/it]
1071
  4%|▍ | 225/5000 [23:22<4:38:56, 3.50s/it]
1072
 
1073
  4%|▍ | 225/5000 [23:22<4:38:56, 3.50s/it]
1074
  5%|▍ | 226/5000 [23:25<4:24:21, 3.32s/it]
1075
 
1076
  5%|▍ | 226/5000 [23:25<4:24:21, 3.32s/it]
1077
  5%|▍ | 227/5000 [23:28<4:14:14, 3.20s/it]
1078
 
1079
  5%|▍ | 227/5000 [23:28<4:14:14, 3.20s/it]
1080
  5%|▍ | 228/5000 [23:31<4:06:55, 3.10s/it]
1081
 
1082
  5%|▍ | 228/5000 [23:31<4:06:55, 3.10s/it]
1083
  5%|▍ | 229/5000 [23:34<4:01:25, 3.04s/it]
1084
 
1085
  5%|▍ | 229/5000 [23:34<4:01:25, 3.04s/it]
1086
  5%|▍ | 230/5000 [23:37<3:57:38, 2.99s/it]
1087
 
1088
  5%|▍ | 230/5000 [23:37<3:57:38, 2.99s/it]
1089
  5%|▍ | 231/5000 [23:39<3:54:46, 2.95s/it]
1090
 
1091
  5%|▍ | 231/5000 [23:39<3:54:46, 2.95s/it]
1092
  5%|▍ | 232/5000 [23:42<3:52:48, 2.93s/it]
1093
 
1094
  5%|▍ | 232/5000 [23:42<3:52:48, 2.93s/it]
1095
  5%|▍ | 233/5000 [23:45<3:50:13, 2.90s/it]
1096
 
1097
  5%|▍ | 233/5000 [23:45<3:50:13, 2.90s/it]
1098
  5%|▍ | 234/5000 [23:48<3:41:16, 2.79s/it]
1099
 
1100
  5%|▍ | 234/5000 [23:48<3:41:16, 2.79s/it]
1101
  5%|▍ | 235/5000 [23:50<3:30:34, 2.65s/it]
1102
 
1103
  5%|▍ | 235/5000 [23:50<3:30:34, 2.65s/it]
1104
  5%|▍ | 236/5000 [23:52<3:22:52, 2.56s/it]
1105
 
1106
  5%|▍ | 236/5000 [23:52<3:22:52, 2.56s/it]
1107
  5%|▍ | 237/5000 [23:55<3:17:33, 2.49s/it]
1108
 
1109
  5%|▍ | 237/5000 [23:55<3:17:33, 2.49s/it]
1110
  5%|▍ | 238/5000 [23:57<3:13:44, 2.44s/it]
1111
 
1112
  5%|▍ | 238/5000 [23:57<3:13:44, 2.44s/it]
1113
  5%|▍ | 239/5000 [23:59<3:11:03, 2.41s/it]
1114
 
1115
  5%|▍ | 239/5000 [23:59<3:11:03, 2.41s/it]
1116
  5%|▍ | 240/5000 [24:02<3:09:01, 2.38s/it]
1117
 
1118
  5%|▍ | 240/5000 [24:02<3:09:01, 2.38s/it]
1119
  5%|▍ | 241/5000 [24:04<2:58:50, 2.25s/it]
1120
 
1121
  5%|▍ | 241/5000 [24:04<2:58:50, 2.25s/it]
1122
  5%|▍ | 242/5000 [24:05<2:46:53, 2.10s/it]
1123
 
1124
  5%|▍ | 242/5000 [24:05<2:46:53, 2.10s/it]
1125
  5%|▍ | 243/5000 [24:07<2:38:29, 2.00s/it]
1126
 
1127
  5%|▍ | 243/5000 [24:07<2:38:29, 2.00s/it]
1128
  5%|▍ | 244/5000 [24:09<2:32:26, 1.92s/it]
1129
 
1130
  5%|▍ | 244/5000 [24:09<2:32:26, 1.92s/it]
1131
  5%|▍ | 245/5000 [24:11<2:28:22, 1.87s/it]
1132
 
1133
  5%|▍ | 245/5000 [24:11<2:28:22, 1.87s/it]
1134
  5%|▍ | 246/5000 [24:12<2:25:36, 1.84s/it]
1135
 
1136
  5%|▍ | 246/5000 [24:12<2:25:36, 1.84s/it]
1137
  5%|▍ | 247/5000 [24:14<2:20:40, 1.78s/it]
1138
 
1139
  5%|▍ | 247/5000 [24:14<2:20:40, 1.78s/it]
1140
  5%|▍ | 248/5000 [24:15<2:06:52, 1.60s/it]
1141
 
1142
  5%|▍ | 248/5000 [24:15<2:06:52, 1.60s/it]
1143
  5%|▍ | 249/5000 [24:16<1:57:05, 1.48s/it]
1144
 
1145
  5%|▍ | 249/5000 [24:16<1:57:05, 1.48s/it]
1146
  5%|▌ | 250/5000 [24:19<2:31:59, 1.92s/it]
1147
 
1148
  5%|▌ | 250/5000 [24:19<2:31:59, 1.92s/it]
1149
  5%|▌ | 251/5000 [24:36<8:30:18, 6.45s/it]
1150
 
1151
  5%|▌ | 251/5000 [24:36<8:30:18, 6.45s/it]
1152
  5%|▌ | 252/5000 [24:45<9:22:20, 7.11s/it]
1153
 
1154
  5%|▌ | 252/5000 [24:45<9:22:20, 7.11s/it]
1155
  5%|▌ | 253/5000 [24:53<9:32:15, 7.23s/it]
1156
 
1157
  5%|▌ | 253/5000 [24:53<9:32:15, 7.23s/it]
1158
  5%|▌ | 254/5000 [24:59<9:22:48, 7.12s/it]
1159
 
1160
  5%|▌ | 254/5000 [24:59<9:22:48, 7.12s/it]
1161
  5%|▌ | 255/5000 [25:06<9:06:29, 6.91s/it]
1162
 
1163
  5%|▌ | 255/5000 [25:06<9:06:29, 6.91s/it]slurmstepd: error: *** JOB 2604812 ON gpu37 CANCELLED AT 2024-09-15T20:24:08 ***
 
1
+ wandb: Currently logged in as: priyanshi-pal (priyanshipal). Use `wandb login --relogin` to force relogin
2
+ wandb: wandb version 0.18.0 is available! To upgrade, please run:
3
+ wandb: $ pip install wandb --upgrade
4
+ wandb: Tracking run with wandb version 0.17.6
5
+ wandb: Run data is saved locally in /scratch/elec/t405-puhe/p/palp3/MUCS/wandb/run-20240915_194917-g83rt34t
6
+ wandb: Run `wandb offline` to turn off syncing.
7
+ wandb: Syncing run retrainwithtagsv2_indicw2v_ad0_3_hd_02_featd_0_3_lr6e-4_warmup500_s300_shuff100
8
+ wandb: ⭐️ View project at https://wandb.ai/priyanshipal/huggingface
9
+ wandb: 🚀 View run at https://wandb.ai/priyanshipal/huggingface/runs/g83rt34t
10
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
11
+ warnings.warn(
12
+ 09/15/2024 19:49:21 - WARNING - __main__ - device: cuda:0, n_gpu: 116-bits training: False
13
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py:957: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
14
+ warnings.warn(
15
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/configuration_utils.py:364: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
16
+ warnings.warn(
17
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/models/auto/feature_extraction_auto.py:329: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
18
+ warnings.warn(
19
+ /scratch/elec/puhe/p/palp3/MUCS/finetune_script_wtags_partdata.py:509: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
20
+ state_dict = torch.load(f"{model_args.model_name_or_path}/pytorch_model.bin")
21
+ Some weights of the model checkpoint at /m/triton/scratch/elec/puhe/p/palp3/MUCS/indicwav2vec-hindi were not used when initializing Wav2Vec2ForCTC: ['wav2vec2.encoder.pos_conv_embed.conv.weight_g', 'wav2vec2.encoder.pos_conv_embed.conv.weight_v']
22
+ - This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
23
+ - This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
24
+ Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at /m/triton/scratch/elec/puhe/p/palp3/MUCS/indicwav2vec-hindi and are newly initialized: ['lm_head.bias', 'lm_head.weight', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
25
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
26
+ max_steps is given, it will override any value given in num_train_epochs
27
+ Wav2Vec2CTCTokenizer(name_or_path='', vocab_size=151, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '[UNK]', 'pad_token': '[PAD]'}, clean_up_tokenization_spaces=True), added_tokens_decoder={
28
+ 149: AddedToken("[UNK]", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
29
+ 150: AddedToken("[PAD]", rstrip=True, lstrip=True, single_word=False, normalized=False, special=False),
30
+ 151: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
31
+ 152: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32
+ }
33
+ CHECK MODEL PARAMS Wav2Vec2ForCTC(
34
+ (wav2vec2): Wav2Vec2Model(
35
+ (feature_extractor): Wav2Vec2FeatureEncoder(
36
+ (conv_layers): ModuleList(
37
+ (0): Wav2Vec2LayerNormConvLayer(
38
+ (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
39
+ (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
40
+ (activation): GELUActivation()
41
+ )
42
+ (1-4): 4 x Wav2Vec2LayerNormConvLayer(
43
+ (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
44
+ (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
45
+ (activation): GELUActivation()
46
+ )
47
+ (5-6): 2 x Wav2Vec2LayerNormConvLayer(
48
+ (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))
49
+ (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
50
+ (activation): GELUActivation()
51
+ )
52
+ )
53
+ )
54
+ (feature_projection): Wav2Vec2FeatureProjection(
55
+ (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
56
+ (projection): Linear(in_features=512, out_features=1024, bias=True)
57
+ (dropout): Dropout(p=0.3, inplace=False)
58
+ )
59
+ (encoder): Wav2Vec2EncoderStableLayerNorm(
60
+ (pos_conv_embed): Wav2Vec2PositionalConvEmbedding(
61
+ (conv): ParametrizedConv1d(
62
+ 1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16
63
+ (parametrizations): ModuleDict(
64
+ (weight): ParametrizationList(
65
+ (0): _WeightNorm()
66
+ )
67
+ )
68
+ )
69
+ (padding): Wav2Vec2SamePadLayer()
70
+ (activation): GELUActivation()
71
+ )
72
+ (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
73
+ (dropout): Dropout(p=0.2, inplace=False)
74
+ (layers): ModuleList(
75
+ (0-23): 24 x Wav2Vec2EncoderLayerStableLayerNorm(
76
+ (attention): Wav2Vec2SdpaAttention(
77
+ (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
78
+ (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
79
+ (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
80
+ (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
81
+ )
82
+ (dropout): Dropout(p=0.2, inplace=False)
83
+ (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
84
+ (feed_forward): Wav2Vec2FeedForward(
85
+ (intermediate_dropout): Dropout(p=0.0, inplace=False)
86
+ (intermediate_dense): Linear(in_features=1024, out_features=4096, bias=True)
87
+ (intermediate_act_fn): GELUActivation()
88
+ (output_dense): Linear(in_features=4096, out_features=1024, bias=True)
89
+ (output_dropout): Dropout(p=0.2, inplace=False)
90
+ )
91
+ (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
92
+ )
93
+ )
94
+ )
95
+ )
96
+ (dropout): Dropout(p=0.0, inplace=False)
97
+ (lm_head): Linear(in_features=1024, out_features=153, bias=True)
98
+ )
99
+
100
  0%| | 0/5000 [00:00<?, ?it/s]/scratch/work/palp3/myenv/lib/python3.11/site-packages/transformers/models/wav2vec2/processing_wav2vec2.py:157: UserWarning: `as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.
101
+ warnings.warn(
102
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py:600: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
103
+ return fn(*args, **kwargs)
104
+ /scratch/work/palp3/myenv/lib/python3.11/site-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
105
+ with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
106
+
107
  0%| | 1/5000 [00:29<40:18:00, 29.02s/it]
108
 
109
  0%| | 1/5000 [00:29<40:18:00, 29.02s/it]
110
  0%| | 2/5000 [00:37<23:19:44, 16.80s/it]
111
 
112
  0%| | 2/5000 [00:37<23:19:44, 16.80s/it]
113
  0%| | 3/5000 [00:44<17:11:59, 12.39s/it]
114
 
115
  0%| | 3/5000 [00:44<17:11:59, 12.39s/it]
116
  0%| | 4/5000 [00:50<13:57:56, 10.06s/it]
117
 
118
  0%| | 4/5000 [00:50<13:57:56, 10.06s/it]
119
  0%| | 5/5000 [00:56<11:57:47, 8.62s/it]
120
 
121
  0%| | 5/5000 [00:56<11:57:47, 8.62s/it]
122
  0%| | 6/5000 [01:02<10:40:40, 7.70s/it]
123
 
124
  0%| | 6/5000 [01:02<10:40:40, 7.70s/it]
125
  0%| | 7/5000 [01:08<9:37:29, 6.94s/it]
126
 
127
  0%| | 7/5000 [01:08<9:37:29, 6.94s/it]
128
  0%| | 8/5000 [01:13<8:55:38, 6.44s/it]
129
 
130
  0%| | 8/5000 [01:13<8:55:38, 6.44s/it]
131
  0%| | 9/5000 [01:18<8:18:48, 6.00s/it]
132
 
133
  0%| | 9/5000 [01:18<8:18:48, 6.00s/it]
134
  0%| | 10/5000 [01:23<7:45:40, 5.60s/it]
135
 
136
  0%| | 10/5000 [01:23<7:45:40, 5.60s/it]
137
  0%| | 11/5000 [01:28<7:23:07, 5.33s/it]
138
 
139
  0%| | 11/5000 [01:28<7:23:07, 5.33s/it]
140
  0%| | 12/5000 [01:32<7:04:45, 5.11s/it]
141
 
142
  0%| | 12/5000 [01:32<7:04:45, 5.11s/it]
143
  0%| | 13/5000 [01:36<6:39:23, 4.81s/it]
144
 
145
  0%| | 13/5000 [01:36<6:39:23, 4.81s/it]
146
  0%| | 14/5000 [01:40<6:21:49, 4.59s/it]
147
 
148
  0%| | 14/5000 [01:40<6:21:49, 4.59s/it]
149
  0%| | 15/5000 [01:44<6:09:14, 4.44s/it]
150
 
151
  0%| | 15/5000 [01:45<6:09:14, 4.44s/it]
152
  0%| | 16/5000 [01:49<6:00:43, 4.34s/it]
153
 
154
  0%| | 16/5000 [01:49<6:00:43, 4.34s/it]
155
  0%| | 17/5000 [01:53<5:54:11, 4.26s/it]
156
 
157
  0%| | 17/5000 [01:53<5:54:11, 4.26s/it]
158
  0%| | 18/5000 [01:57<5:46:37, 4.17s/it]
159
 
160
  0%| | 18/5000 [01:57<5:46:37, 4.17s/it]
161
  0%| | 19/5000 [02:00<5:29:26, 3.97s/it]
162
 
163
  0%| | 19/5000 [02:00<5:29:26, 3.97s/it]
164
  0%| | 20/5000 [02:04<5:17:04, 3.82s/it]
165
 
166
  0%| | 20/5000 [02:04<5:17:04, 3.82s/it]
167
  0%| | 21/5000 [02:07<5:08:53, 3.72s/it]
168
 
169
  0%| | 21/5000 [02:07<5:08:53, 3.72s/it]
170
  0%| | 22/5000 [02:11<5:02:43, 3.65s/it]
171
 
172
  0%| | 22/5000 [02:11<5:02:43, 3.65s/it]
173
  0%| | 23/5000 [02:14<4:58:09, 3.59s/it]
174
 
175
  0%| | 23/5000 [02:14<4:58:09, 3.59s/it]
176
  0%| | 24/5000 [02:17<4:53:07, 3.53s/it]
177
 
178
  0%| | 24/5000 [02:17<4:53:07, 3.53s/it]
179
  0%| | 25/5000 [02:20<4:40:47, 3.39s/it]
180
 
181
  0%| | 25/5000 [02:21<4:40:47, 3.39s/it]
182
  1%| | 26/5000 [02:23<4:28:32, 3.24s/it]
183
 
184
  1%| | 26/5000 [02:23<4:28:32, 3.24s/it]
185
  1%| | 27/5000 [02:26<4:20:26, 3.14s/it]
186
 
187
  1%| | 27/5000 [02:26<4:20:26, 3.14s/it]
188
  1%| | 28/5000 [02:29<4:14:25, 3.07s/it]
189
 
190
  1%| | 28/5000 [02:29<4:14:25, 3.07s/it]
191
  1%| | 29/5000 [02:32<4:10:13, 3.02s/it]
192
 
193
  1%| | 29/5000 [02:32<4:10:13, 3.02s/it]
194
  1%| | 30/5000 [02:35<4:07:35, 2.99s/it]
195
 
196
  1%| | 30/5000 [02:35<4:07:35, 2.99s/it]
197
  1%| | 31/5000 [02:38<4:05:17, 2.96s/it]
198
 
199
  1%| | 31/5000 [02:38<4:05:17, 2.96s/it]
200
  1%| | 32/5000 [02:41<4:02:49, 2.93s/it]
201
 
202
  1%| | 32/5000 [02:41<4:02:49, 2.93s/it]
203
  1%| | 33/5000 [02:43<3:52:45, 2.81s/it]
204
 
205
  1%| | 33/5000 [02:43<3:52:45, 2.81s/it]
206
  1%| | 34/5000 [02:46<3:40:59, 2.67s/it]
207
 
208
  1%| | 34/5000 [02:46<3:40:59, 2.67s/it]
209
  1%| | 35/5000 [02:48<3:32:36, 2.57s/it]
210
 
211
  1%| | 35/5000 [02:48<3:32:36, 2.57s/it]
212
  1%| | 36/5000 [02:50<3:27:01, 2.50s/it]
213
 
214
  1%| | 36/5000 [02:50<3:27:01, 2.50s/it]
215
  1%| | 37/5000 [02:53<3:23:48, 2.46s/it]
216
 
217
  1%| | 37/5000 [02:53<3:23:48, 2.46s/it]
218
  1%| | 38/5000 [02:55<3:20:45, 2.43s/it]
219
 
220
  1%| | 38/5000 [02:55<3:20:45, 2.43s/it]
221
  1%| | 39/5000 [02:57<3:19:09, 2.41s/it]
222
 
223
  1%| | 39/5000 [02:57<3:19:09, 2.41s/it]
224
  1%| | 40/5000 [03:00<3:16:32, 2.38s/it]
225
 
226
  1%| | 40/5000 [03:00<3:16:32, 2.38s/it]
227
  1%| | 41/5000 [03:02<3:06:39, 2.26s/it]
228
 
229
  1%| | 41/5000 [03:02<3:06:39, 2.26s/it]
230
  1%| | 42/5000 [03:03<2:54:05, 2.11s/it]
231
 
232
  1%| | 42/5000 [03:03<2:54:05, 2.11s/it]
233
  1%| | 43/5000 [03:05<2:45:18, 2.00s/it]
234
 
235
  1%| | 43/5000 [03:05<2:45:18, 2.00s/it]
236
  1%| | 44/5000 [03:07<2:39:10, 1.93s/it]
237
 
238
  1%| | 44/5000 [03:07<2:39:10, 1.93s/it]
239
  1%| | 45/5000 [03:09<2:34:53, 1.88s/it]
240
 
241
  1%| | 45/5000 [03:09<2:34:53, 1.88s/it]
242
  1%| | 46/5000 [03:10<2:31:36, 1.84s/it]
243
 
244
  1%| | 46/5000 [03:10<2:31:36, 1.84s/it]
245
  1%| | 47/5000 [03:12<2:26:21, 1.77s/it]
246
 
247
  1%| | 47/5000 [03:12<2:26:21, 1.77s/it]
248
  1%| | 48/5000 [03:13<2:12:05, 1.60s/it]
249
 
250
  1%| | 48/5000 [03:13<2:12:05, 1.60s/it]
251
  1%| | 49/5000 [03:14<2:02:16, 1.48s/it]
252
 
253
  1%| | 49/5000 [03:15<2:02:16, 1.48s/it]
254
  1%| | 50/5000 [03:18<2:44:51, 2.00s/it]
255
 
256
  1%| | 50/5000 [03:18<2:44:51, 2.00s/it]
257
  1%| | 51/5000 [03:34<8:36:31, 6.26s/it]
258
 
259
  1%| | 51/5000 [03:34<8:36:31, 6.26s/it]
260
  1%| | 52/5000 [03:43<9:46:07, 7.11s/it]
261
 
262
  1%| | 52/5000 [03:43<9:46:07, 7.11s/it]
263
  1%| | 53/5000 [03:51<10:03:31, 7.32s/it]
264
 
265
  1%| | 53/5000 [03:51<10:03:31, 7.32s/it]
266
  1%| | 54/5000 [03:58<9:52:33, 7.19s/it]
267
 
268
  1%| | 54/5000 [03:58<9:52:33, 7.19s/it]
269
  1%| | 55/5000 [04:04<9:35:40, 6.98s/it]
270
 
271
  1%| | 55/5000 [04:04<9:35:40, 6.98s/it]
272
  1%| | 56/5000 [04:10<9:09:54, 6.67s/it]
273
 
274
  1%| | 56/5000 [04:10<9:09:54, 6.67s/it]
275
  1%| | 57/5000 [04:16<8:42:59, 6.35s/it]
276
 
277
  1%| | 57/5000 [04:16<8:42:59, 6.35s/it]
278
  1%| | 58/5000 [04:21<8:19:26, 6.06s/it]
279
 
280
  1%| | 58/5000 [04:21<8:19:26, 6.06s/it]
281
  1%| | 59/5000 [04:26<7:52:24, 5.74s/it]
282
 
283
  1%| | 59/5000 [04:26<7:52:24, 5.74s/it]
284
  1%| | 60/5000 [04:31<7:27:01, 5.43s/it]
285
 
286
  1%| | 60/5000 [04:31<7:27:01, 5.43s/it]
287
  1%| | 61/5000 [04:36<7:09:13, 5.21s/it]
288
 
289
  1%| | 61/5000 [04:36<7:09:13, 5.21s/it]
290
  1%| | 62/5000 [04:40<6:47:20, 4.95s/it]
291
 
292
  1%| | 62/5000 [04:40<6:47:20, 4.95s/it]
293
  1%|▏ | 63/5000 [04:44<6:26:34, 4.70s/it]
294
 
295
  1%|▏ | 63/5000 [04:44<6:26:34, 4.70s/it]
296
  1%|▏ | 64/5000 [04:48<6:12:16, 4.53s/it]
297
 
298
  1%|▏ | 64/5000 [04:48<6:12:16, 4.53s/it]
299
  1%|▏ | 65/5000 [04:52<6:02:02, 4.40s/it]
300
 
301
  1%|▏ | 65/5000 [04:52<6:02:02, 4.40s/it]
302
  1%|▏ | 66/5000 [04:56<5:56:16, 4.33s/it]
303
 
304
  1%|▏ | 66/5000 [04:56<5:56:16, 4.33s/it]
305
  1%|▏ | 67/5000 [05:00<5:49:42, 4.25s/it]
306
 
307
  1%|▏ | 67/5000 [05:00<5:49:42, 4.25s/it]
308
  1%|▏ | 68/5000 [05:04<5:36:05, 4.09s/it]
309
 
310
  1%|▏ | 68/5000 [05:04<5:36:05, 4.09s/it]
311
  1%|▏ | 69/5000 [05:08<5:21:05, 3.91s/it]
312
 
313
  1%|▏ | 69/5000 [05:08<5:21:05, 3.91s/it]
314
  1%|▏ | 70/5000 [05:11<5:10:13, 3.78s/it]
315
 
316
  1%|▏ | 70/5000 [05:11<5:10:13, 3.78s/it]
317
  1%|▏ | 71/5000 [05:15<5:02:22, 3.68s/it]
318
 
319
  1%|▏ | 71/5000 [05:15<5:02:22, 3.68s/it]
320
  1%|▏ | 72/5000 [05:18<4:57:31, 3.62s/it]
321
 
322
  1%|▏ | 72/5000 [05:18<4:57:31, 3.62s/it]
323
  1%|▏ | 73/5000 [05:22<4:54:11, 3.58s/it]
324
 
325
  1%|▏ | 73/5000 [05:22<4:54:11, 3.58s/it]
326
  1%|▏ | 74/5000 [05:25<4:48:19, 3.51s/it]
327
 
328
  1%|▏ | 74/5000 [05:25<4:48:19, 3.51s/it]
329
  2%|▏ | 75/5000 [05:28<4:32:54, 3.32s/it]
330
 
331
  2%|▏ | 75/5000 [05:28<4:32:54, 3.32s/it]
332
  2%|▏ | 76/5000 [05:31<4:22:10, 3.19s/it]
333
 
334
  2%|▏ | 76/5000 [05:31<4:22:10, 3.19s/it]
335
  2%|▏ | 77/5000 [05:34<4:14:46, 3.11s/it]
336
 
337
  2%|▏ | 77/5000 [05:34<4:14:46, 3.11s/it]
338
  2%|▏ | 78/5000 [05:36<4:09:24, 3.04s/it]
339
 
340
  2%|▏ | 78/5000 [05:36<4:09:24, 3.04s/it]
341
  2%|▏ | 79/5000 [05:39<4:06:06, 3.00s/it]
342
 
343
  2%|▏ | 79/5000 [05:39<4:06:06, 3.00s/it]
344
  2%|▏ | 80/5000 [05:42<4:03:53, 2.97s/it]
345
 
346
  2%|▏ | 80/5000 [05:42<4:03:53, 2.97s/it]
347
  2%|▏ | 81/5000 [05:45<4:02:14, 2.95s/it]
348
 
349
  2%|▏ | 81/5000 [05:45<4:02:14, 2.95s/it]
350
  2%|▏ | 82/5000 [05:48<4:00:09, 2.93s/it]
351
 
352
  2%|▏ | 82/5000 [05:48<4:00:09, 2.93s/it]
353
  2%|▏ | 83/5000 [05:51<3:50:27, 2.81s/it]
354
 
355
  2%|▏ | 83/5000 [05:51<3:50:27, 2.81s/it]
356
  2%|▏ | 84/5000 [05:53<3:38:44, 2.67s/it]
357
 
358
  2%|▏ | 84/5000 [05:53<3:38:44, 2.67s/it]
359
  2%|▏ | 85/5000 [05:55<3:30:25, 2.57s/it]
360
 
361
  2%|▏ | 85/5000 [05:55<3:30:25, 2.57s/it]
362
  2%|▏ | 86/5000 [05:58<3:24:40, 2.50s/it]
363
 
364
  2%|▏ | 86/5000 [05:58<3:24:40, 2.50s/it]
365
  2%|▏ | 87/5000 [06:00<3:20:40, 2.45s/it]
366
 
367
  2%|▏ | 87/5000 [06:00<3:20:40, 2.45s/it]
368
  2%|▏ | 88/5000 [06:02<3:18:13, 2.42s/it]
369
 
370
  2%|▏ | 88/5000 [06:02<3:18:13, 2.42s/it]
371
  2%|▏ | 89/5000 [06:05<3:16:04, 2.40s/it]
372
 
373
  2%|▏ | 89/5000 [06:05<3:16:04, 2.40s/it]
374
  2%|▏ | 90/5000 [06:07<3:14:09, 2.37s/it]
375
 
376
  2%|▏ | 90/5000 [06:07<3:14:09, 2.37s/it]
377
  2%|▏ | 91/5000 [06:09<3:04:04, 2.25s/it]
378
 
379
  2%|▏ | 91/5000 [06:09<3:04:04, 2.25s/it]
380
  2%|▏ | 92/5000 [06:11<2:51:48, 2.10s/it]
381
 
382
  2%|▏ | 92/5000 [06:11<2:51:48, 2.10s/it]
383
  2%|▏ | 93/5000 [06:12<2:43:14, 2.00s/it]
384
 
385
  2%|▏ | 93/5000 [06:12<2:43:14, 2.00s/it]
386
  2%|▏ | 94/5000 [06:14<2:37:10, 1.92s/it]
387
 
388
  2%|▏ | 94/5000 [06:14<2:37:10, 1.92s/it]
389
  2%|▏ | 95/5000 [06:16<2:33:11, 1.87s/it]
390
 
391
  2%|▏ | 95/5000 [06:16<2:33:11, 1.87s/it]
392
  2%|▏ | 96/5000 [06:18<2:30:32, 1.84s/it]
393
 
394
  2%|▏ | 96/5000 [06:18<2:30:32, 1.84s/it]
395
  2%|▏ | 97/5000 [06:19<2:25:12, 1.78s/it]
396
 
397
  2%|▏ | 97/5000 [06:19<2:25:12, 1.78s/it]
398
  2%|▏ | 98/5000 [06:21<2:10:59, 1.60s/it]
399
 
400
  2%|▏ | 98/5000 [06:21<2:10:59, 1.60s/it]
401
  2%|▏ | 99/5000 [06:22<2:01:03, 1.48s/it]
402
 
403
  2%|▏ | 99/5000 [06:22<2:01:03, 1.48s/it]
404
  2%|▏ | 100/5000 [06:25<2:44:15, 2.01s/it]
405
 
406
  2%|▏ | 100/5000 [06:25<2:44:15, 2.01s/it]{'loss': 53.3154, 'grad_norm': 79.69747924804688, 'learning_rate': 1.2e-06, 'epoch': 0.0}
407
+ {'loss': 34.361, 'grad_norm': 24.41111946105957, 'learning_rate': 2.4e-06, 'epoch': 0.0}
408
+ {'loss': 30.4549, 'grad_norm': 36.592803955078125, 'learning_rate': 3.6e-06, 'epoch': 0.0}
409
+ {'loss': 23.957, 'grad_norm': 15.393040657043457, 'learning_rate': 4.8e-06, 'epoch': 0.01}
410
+ {'loss': 23.1553, 'grad_norm': 16.698022842407227, 'learning_rate': 5.999999999999999e-06, 'epoch': 0.01}
411
+ {'loss': 23.6211, 'grad_norm': 15.845901489257812, 'learning_rate': 7.2e-06, 'epoch': 0.01}
412
+ {'loss': 23.8485, 'grad_norm': 16.488929748535156, 'learning_rate': 8.4e-06, 'epoch': 0.01}
413
+ {'loss': 22.1915, 'grad_norm': 14.807597160339355, 'learning_rate': 9.6e-06, 'epoch': 0.01}
414
+ {'loss': 19.4207, 'grad_norm': 15.219635009765625, 'learning_rate': 1.0799999999999998e-05, 'epoch': 0.01}
415
+ {'loss': 20.869, 'grad_norm': 14.109861373901367, 'learning_rate': 1.1999999999999999e-05, 'epoch': 0.02}
416
+ {'loss': 20.723, 'grad_norm': 15.45991325378418, 'learning_rate': 1.3199999999999997e-05, 'epoch': 0.02}
417
+ {'loss': 20.1225, 'grad_norm': 14.389110565185547, 'learning_rate': 1.44e-05, 'epoch': 0.02}
418
+ {'loss': 19.3834, 'grad_norm': 13.629838943481445, 'learning_rate': 1.5599999999999996e-05, 'epoch': 0.02}
419
+ {'loss': 19.8481, 'grad_norm': 14.719141006469727, 'learning_rate': 1.68e-05, 'epoch': 0.02}
420
+ {'loss': 18.9288, 'grad_norm': 13.934913635253906, 'learning_rate': 1.7999999999999997e-05, 'epoch': 0.02}
421
+ {'loss': 17.2817, 'grad_norm': 13.111680030822754, 'learning_rate': 1.92e-05, 'epoch': 0.03}
422
+ {'loss': 16.5192, 'grad_norm': 40.639015197753906, 'learning_rate': 2.04e-05, 'epoch': 0.03}
423
+ {'loss': 17.0874, 'grad_norm': 13.131857872009277, 'learning_rate': 2.1599999999999996e-05, 'epoch': 0.03}
424
+ {'loss': 18.3955, 'grad_norm': 14.969550132751465, 'learning_rate': 2.28e-05, 'epoch': 0.03}
425
+ {'loss': 16.4885, 'grad_norm': 12.997845649719238, 'learning_rate': 2.3999999999999997e-05, 'epoch': 0.03}
426
+ {'loss': 16.4608, 'grad_norm': 13.298186302185059, 'learning_rate': 2.52e-05, 'epoch': 0.03}
427
+ {'loss': 18.4918, 'grad_norm': 17.314250946044922, 'learning_rate': 2.6399999999999995e-05, 'epoch': 0.04}
428
+ {'loss': 16.9535, 'grad_norm': 14.80049991607666, 'learning_rate': 2.7599999999999997e-05, 'epoch': 0.04}
429
+ {'loss': 16.5916, 'grad_norm': 15.080041885375977, 'learning_rate': 2.88e-05, 'epoch': 0.04}
430
+ {'loss': 17.0922, 'grad_norm': 15.787080764770508, 'learning_rate': 2.9999999999999997e-05, 'epoch': 0.04}
431
+ {'loss': 17.6368, 'grad_norm': 17.13389015197754, 'learning_rate': 3.119999999999999e-05, 'epoch': 0.04}
432
+ {'loss': 14.6459, 'grad_norm': 13.813786506652832, 'learning_rate': 3.2399999999999995e-05, 'epoch': 0.04}
433
+ {'loss': 18.4586, 'grad_norm': 19.571237564086914, 'learning_rate': 3.36e-05, 'epoch': 0.04}
434
+ {'loss': 15.8893, 'grad_norm': 16.844707489013672, 'learning_rate': 3.48e-05, 'epoch': 0.05}
435
+ {'loss': 15.1497, 'grad_norm': 18.0694637298584, 'learning_rate': 3.5999999999999994e-05, 'epoch': 0.05}
436
+ {'loss': 16.198, 'grad_norm': 18.38799476623535, 'learning_rate': 3.7199999999999996e-05, 'epoch': 0.05}
437
+ {'loss': 15.5918, 'grad_norm': 18.61817741394043, 'learning_rate': 3.84e-05, 'epoch': 0.05}
438
+ {'loss': 16.1928, 'grad_norm': 20.656465530395508, 'learning_rate': 3.96e-05, 'epoch': 0.05}
439
+ {'loss': 13.5195, 'grad_norm': 16.263877868652344, 'learning_rate': 4.08e-05, 'epoch': 0.05}
440
+ {'loss': 14.3946, 'grad_norm': 18.767169952392578, 'learning_rate': 4.2e-05, 'epoch': 0.06}
441
+ {'loss': 14.2461, 'grad_norm': 19.480947494506836, 'learning_rate': 4.319999999999999e-05, 'epoch': 0.06}
442
+ {'loss': 14.2695, 'grad_norm': 20.49869155883789, 'learning_rate': 4.4399999999999995e-05, 'epoch': 0.06}
443
+ {'loss': 13.8486, 'grad_norm': 20.939620971679688, 'learning_rate': 4.56e-05, 'epoch': 0.06}
444
+ {'loss': 13.7119, 'grad_norm': 22.123334884643555, 'learning_rate': 4.68e-05, 'epoch': 0.06}
445
+ {'loss': 14.1091, 'grad_norm': 29.032527923583984, 'learning_rate': 4.7999999999999994e-05, 'epoch': 0.06}
446
+ {'loss': 14.999, 'grad_norm': 28.069717407226562, 'learning_rate': 4.9199999999999997e-05, 'epoch': 0.07}
447
+ {'loss': 14.6043, 'grad_norm': 30.723072052001953, 'learning_rate': 5.04e-05, 'epoch': 0.07}
448
+ {'loss': 14.0164, 'grad_norm': 30.142724990844727, 'learning_rate': 5.1599999999999994e-05, 'epoch': 0.07}
449
+ {'loss': 12.8172, 'grad_norm': 28.958240509033203, 'learning_rate': 5.279999999999999e-05, 'epoch': 0.07}
450
+ {'loss': 12.7462, 'grad_norm': 31.854141235351562, 'learning_rate': 5.399999999999999e-05, 'epoch': 0.07}
451
+ {'loss': 11.4412, 'grad_norm': 28.455211639404297, 'learning_rate': 5.519999999999999e-05, 'epoch': 0.07}
452
+ {'loss': 10.4074, 'grad_norm': 27.766897201538086, 'learning_rate': 5.6399999999999995e-05, 'epoch': 0.08}
453
+ {'loss': 11.4181, 'grad_norm': 35.45139694213867, 'learning_rate': 5.76e-05, 'epoch': 0.08}
454
+ {'loss': 10.5777, 'grad_norm': 35.00629806518555, 'learning_rate': 5.88e-05, 'epoch': 0.08}
455
+ {'loss': 9.2136, 'grad_norm': 30.027040481567383, 'learning_rate': 5.9999999999999995e-05, 'epoch': 0.08}
456
+ {'loss': 18.2865, 'grad_norm': 103.56065368652344, 'learning_rate': 6.12e-05, 'epoch': 0.08}
457
+ {'loss': 15.0562, 'grad_norm': 86.56218719482422, 'learning_rate': 6.239999999999999e-05, 'epoch': 0.08}
458
+ {'loss': 14.4796, 'grad_norm': 82.1945571899414, 'learning_rate': 6.359999999999999e-05, 'epoch': 0.08}
459
+ {'loss': 10.162, 'grad_norm': 52.5987434387207, 'learning_rate': 6.479999999999999e-05, 'epoch': 0.09}
460
+ {'loss': 12.2572, 'grad_norm': 263.50726318359375, 'learning_rate': 6.599999999999999e-05, 'epoch': 0.09}
461
+ {'loss': 8.1613, 'grad_norm': 40.53132247924805, 'learning_rate': 6.72e-05, 'epoch': 0.09}
462
+ {'loss': 10.1649, 'grad_norm': 64.0225601196289, 'learning_rate': 6.84e-05, 'epoch': 0.09}
463
+ {'loss': 7.0805, 'grad_norm': 35.1747932434082, 'learning_rate': 6.96e-05, 'epoch': 0.09}
464
+ {'loss': 6.6396, 'grad_norm': 32.24826431274414, 'learning_rate': 7.079999999999999e-05, 'epoch': 0.09}
465
+ {'loss': 6.4643, 'grad_norm': 31.730484008789062, 'learning_rate': 7.199999999999999e-05, 'epoch': 0.1}
466
+ {'loss': 6.4411, 'grad_norm': 32.03666687011719, 'learning_rate': 7.319999999999999e-05, 'epoch': 0.1}
467
+ {'loss': 5.66, 'grad_norm': 23.286136627197266, 'learning_rate': 7.439999999999999e-05, 'epoch': 0.1}
468
+ {'loss': 5.3729, 'grad_norm': 20.670251846313477, 'learning_rate': 7.56e-05, 'epoch': 0.1}
469
+ {'loss': 5.1286, 'grad_norm': 15.871679306030273, 'learning_rate': 7.68e-05, 'epoch': 0.1}
470
+ {'loss': 4.8803, 'grad_norm': 11.15343189239502, 'learning_rate': 7.8e-05, 'epoch': 0.1}
471
+ {'loss': 4.9008, 'grad_norm': 12.387083053588867, 'learning_rate': 7.92e-05, 'epoch': 0.11}
472
+ {'loss': 4.705, 'grad_norm': 8.835503578186035, 'learning_rate': 8.04e-05, 'epoch': 0.11}
473
+ {'loss': 4.6347, 'grad_norm': 5.96044397354126, 'learning_rate': 8.16e-05, 'epoch': 0.11}
474
+ {'loss': 4.55, 'grad_norm': 4.373923301696777, 'learning_rate': 8.28e-05, 'epoch': 0.11}
475
+ {'loss': 4.5437, 'grad_norm': 3.950120449066162, 'learning_rate': 8.4e-05, 'epoch': 0.11}
476
+ {'loss': 4.475, 'grad_norm': 4.327428817749023, 'learning_rate': 8.519999999999998e-05, 'epoch': 0.11}
477
+ {'loss': 4.4206, 'grad_norm': 5.038540840148926, 'learning_rate': 8.639999999999999e-05, 'epoch': 0.12}
478
+ {'loss': 4.3619, 'grad_norm': 5.283208847045898, 'learning_rate': 8.759999999999999e-05, 'epoch': 0.12}
479
+ {'loss': 4.3522, 'grad_norm': 5.429969787597656, 'learning_rate': 8.879999999999999e-05, 'epoch': 0.12}
480
+ {'loss': 4.2836, 'grad_norm': 4.981645107269287, 'learning_rate': 8.999999999999999e-05, 'epoch': 0.12}
481
+ {'loss': 4.2366, 'grad_norm': 4.838561058044434, 'learning_rate': 9.12e-05, 'epoch': 0.12}
482
+ {'loss': 4.1618, 'grad_norm': 4.037646293640137, 'learning_rate': 9.24e-05, 'epoch': 0.12}
483
+ {'loss': 4.1763, 'grad_norm': 2.755617380142212, 'learning_rate': 9.36e-05, 'epoch': 0.12}
484
+ {'loss': 4.0608, 'grad_norm': 2.9847090244293213, 'learning_rate': 9.479999999999999e-05, 'epoch': 0.13}
485
+ {'loss': 4.1162, 'grad_norm': 3.5956788063049316, 'learning_rate': 9.599999999999999e-05, 'epoch': 0.13}
486
+ {'loss': 3.9706, 'grad_norm': 3.1633763313293457, 'learning_rate': 9.719999999999999e-05, 'epoch': 0.13}
487
+ {'loss': 4.0539, 'grad_norm': 6.8584303855896, 'learning_rate': 9.839999999999999e-05, 'epoch': 0.13}
488
+ {'loss': 3.9233, 'grad_norm': 2.2297003269195557, 'learning_rate': 9.96e-05, 'epoch': 0.13}
489
+ {'loss': 4.0172, 'grad_norm': 6.489461898803711, 'learning_rate': 0.0001008, 'epoch': 0.13}
490
+ {'loss': 3.9076, 'grad_norm': 2.9540462493896484, 'learning_rate': 0.000102, 'epoch': 0.14}
491
+ {'loss': 3.9456, 'grad_norm': 1.7731634378433228, 'learning_rate': 0.00010319999999999999, 'epoch': 0.14}
492
+ {'loss': 3.9093, 'grad_norm': 1.2879796028137207, 'learning_rate': 0.00010439999999999999, 'epoch': 0.14}
493
+ {'loss': 3.7957, 'grad_norm': 1.4996570348739624, 'learning_rate': 0.00010559999999999998, 'epoch': 0.14}
494
+ {'loss': 3.7925, 'grad_norm': 3.2898361682891846, 'learning_rate': 0.00010679999999999998, 'epoch': 0.14}
495
+ {'loss': 3.8016, 'grad_norm': 2.096083641052246, 'learning_rate': 0.00010799999999999998, 'epoch': 0.14}
496
+ {'loss': 3.8623, 'grad_norm': 5.319262504577637, 'learning_rate': 0.00010919999999999998, 'epoch': 0.15}
497
+ {'loss': 3.8429, 'grad_norm': 1.6835908889770508, 'learning_rate': 0.00011039999999999999, 'epoch': 0.15}
498
+ {'loss': 3.7729, 'grad_norm': 1.5660072565078735, 'learning_rate': 0.00011159999999999999, 'epoch': 0.15}
499
+ {'loss': 3.8433, 'grad_norm': 0.8665121793746948, 'learning_rate': 0.00011279999999999999, 'epoch': 0.15}
500
+ {'loss': 3.8249, 'grad_norm': 5.862734317779541, 'learning_rate': 0.00011399999999999999, 'epoch': 0.15}
501
+ {'loss': 3.8095, 'grad_norm': 2.4911062717437744, 'learning_rate': 0.0001152, 'epoch': 0.15}
502
+ {'loss': 3.9602, 'grad_norm': 7.126469135284424, 'learning_rate': 0.0001164, 'epoch': 0.16}
503
+ {'loss': 3.8379, 'grad_norm': 0.8465694189071655, 'learning_rate': 0.0001176, 'epoch': 0.16}
504
+ {'loss': 3.8803, 'grad_norm': 1.3771048784255981, 'learning_rate': 0.0001188, 'epoch': 0.16}
505
+ {'loss': 3.8788, 'grad_norm': 1.4679234027862549, 'learning_rate': 0.00011999999999999999, 'epoch': 0.16}
506
+
507
+
508
  0%| | 0/196 [00:00<?, ?it/s]
509
+
510
  1%| | 2/196 [00:00<01:35, 2.03it/s]
511
+
512
  2%|▏ | 3/196 [00:02<02:33, 1.26it/s]
513
+
514
  2%|▏ | 4/196 [00:03<03:12, 1.00s/it]
515
+
516
  3%|▎ | 5/196 [00:04<03:31, 1.11s/it]
517
+
518
  3%|▎ | 6/196 [00:06<04:02, 1.28s/it]
519
+
520
  4%|▎ | 7/196 [00:08<04:19, 1.37s/it]
521
+
522
  4%|▍ | 8/196 [00:09<04:18, 1.37s/it]
523
+
524
  5%|▍ | 9/196 [00:12<05:38, 1.81s/it]
525
+
526
  5%|▌ | 10/196 [00:15<06:51, 2.21s/it]
527
+
528
  6%|▌ | 11/196 [00:18<07:59, 2.59s/it]
529
+
530
  6%|▌ | 12/196 [00:22<08:35, 2.80s/it]
531
+
532
  7%|▋ | 13/196 [00:23<07:27, 2.44s/it]
533
+
534
  7%|▋ | 14/196 [00:24<06:11, 2.04s/it]
535
+
536
  8%|▊ | 15/196 [00:25<05:08, 1.70s/it]
537
+
538
  8%|▊ | 16/196 [00:27<04:46, 1.59s/it]
539
+
540
  9%|▊ | 17/196 [00:28<04:32, 1.52s/it]
541
+
542
  9%|▉ | 18/196 [00:30<04:55, 1.66s/it]
543
+
544
  10%|▉ | 19/196 [00:33<06:19, 2.15s/it]
545
+
546
  10%|█ | 20/196 [00:36<06:54, 2.35s/it]
547
+
548
  11%|█ | 21/196 [00:39<07:06, 2.44s/it]
549
+
550
  11%|█ | 22/196 [00:41<06:52, 2.37s/it]
551
+
552
  12%|█▏ | 23/196 [00:43<06:21, 2.21s/it]
553
+
554
  12%|█▏ | 24/196 [00:43<05:00, 1.75s/it]
555
+
556
  13%|█▎ | 25/196 [00:44<04:12, 1.48s/it]
557
+
558
  13%|█▎ | 26/196 [00:45<03:39, 1.29s/it]
559
+
560
  14%|█▍ | 27/196 [00:46<03:21, 1.19s/it]
561
+
562
  14%|█▍ | 28/196 [00:47<03:10, 1.14s/it]
563
+
564
  15%|█▍ | 29/196 [00:48<03:10, 1.14s/it]
565
+
566
  15%|█▌ | 30/196 [00:49<03:09, 1.14s/it]
567
+
568
  16%|█▌ | 31/196 [00:50<02:53, 1.05s/it]
569
+
570
  16%|█▋ | 32/196 [00:51<02:42, 1.01it/s]
571
+
572
  17%|█▋ | 33/196 [00:53<03:10, 1.17s/it]
573
+
574
  17%|█▋ | 34/196 [00:55<04:14, 1.57s/it]
575
+
576
  18%|█▊ | 35/196 [00:57<04:21, 1.62s/it]
577
+
578
  18%|█▊ | 36/196 [00:59<05:04, 1.90s/it]
579
+
580
  19%|█▉ | 37/196 [01:01<05:02, 1.90s/it]
581
+
582
  19%|█▉ | 38/196 [01:03<04:32, 1.72s/it]
583
+
584
  20%|█▉ | 39/196 [01:04<04:11, 1.60s/it]
585
+
586
  20%|██ | 40/196 [01:05<03:49, 1.47s/it]
587
+
588
  21%|██ | 41/196 [01:06<03:29, 1.35s/it]
589
+
590
  21%|██▏ | 42/196 [01:07<03:13, 1.26s/it]
591
+
592
  22%|██▏ | 43/196 [01:08<03:09, 1.24s/it]
593
+
594
  22%|██▏ | 44/196 [01:10<03:04, 1.21s/it]
595
+
596
  23%|██▎ | 45/196 [01:11<02:52, 1.14s/it]
597
+
598
  23%|██▎ | 46/196 [01:11<02:38, 1.06s/it]
599
+
600
  24%|██▍ | 47/196 [01:12<02:39, 1.07s/it]
601
+
602
  24%|██▍ | 48/196 [01:13<02:30, 1.02s/it]
603
+
604
  25%|██▌ | 49/196 [01:14<02:26, 1.00it/s]
605
+
606
  26%|██▌ | 50/196 [01:15<02:24, 1.01it/s]
607
+
608
  26%|██▌ | 51/196 [01:16<02:20, 1.03it/s]
609
+
610
  27%|██▋ | 52/196 [01:17<02:19, 1.04it/s]
611
+
612
  27%|██▋ | 53/196 [01:18<02:29, 1.05s/it]
613
+
614
  28%|██▊ | 54/196 [01:19<02:25, 1.02s/it]
615
+
616
  28%|██▊ | 55/196 [01:21<02:40, 1.14s/it]
617
+
618
  29%|██▊ | 56/196 [01:22<02:51, 1.22s/it]
619
+
620
  29%|██▉ | 57/196 [01:24<03:04, 1.33s/it]
621
+
622
  30%|██▉ | 58/196 [01:25<03:11, 1.39s/it]
623
+
624
  30%|███ | 59/196 [01:27<03:09, 1.38s/it]
625
+
626
  31%|███ | 60/196 [01:28<02:53, 1.27s/it]
627
+
628
  31%|███ | 61/196 [01:29<02:35, 1.15s/it]
629
+
630
  32%|███▏ | 62/196 [01:30<02:30, 1.12s/it]
631
+
632
  32%|███▏ | 63/196 [01:31<02:30, 1.13s/it]
633
+
634
  33%|███▎ | 64/196 [01:32<02:30, 1.14s/it]
635
+
636
  33%|███▎ | 65/196 [01:33<02:25, 1.11s/it]
637
+
638
  34%|███▎ | 66/196 [01:34<02:30, 1.16s/it]
639
+
640
  34%|███▍ | 67/196 [01:36<02:35, 1.20s/it]
641
+
642
  35%|███▍ | 68/196 [01:37<02:59, 1.40s/it]
643
+
644
  35%|███▌ | 69/196 [01:39<03:06, 1.47s/it]
645
+
646
  36%|███▌ | 70/196 [01:40<02:51, 1.36s/it]
647
+
648
  36%|███▌ | 71/196 [01:41<02:39, 1.27s/it]
649
+
650
  37%|███▋ | 72/196 [01:42<02:25, 1.17s/it]
651
+
652
  37%|███▋ | 73/196 [01:43<02:11, 1.07s/it]
653
+
654
  38%|███▊ | 74/196 [01:44<02:00, 1.01it/s]
655
+
656
  38%|███▊ | 75/196 [01:45<01:57, 1.03it/s]
657
+
658
  39%|███▉ | 76/196 [01:46<01:53, 1.05it/s]
659
+
660
  39%|███▉ | 77/196 [01:47<01:58, 1.00it/s]
661
+
662
  40%|███▉ | 78/196 [01:48<02:08, 1.09s/it]
663
+
664
  40%|████ | 79/196 [01:49<02:02, 1.05s/it]
665
+
666
  41%|████ | 80/196 [01:50<02:09, 1.12s/it]
667
+
668
  41%|████▏ | 81/196 [01:52<02:13, 1.16s/it]
669
+
670
  42%|████▏ | 82/196 [01:53<02:08, 1.13s/it]
671
+
672
  42%|████▏ | 83/196 [01:54<02:10, 1.15s/it]
673
+
674
  43%|████▎ | 84/196 [01:55<02:13, 1.19s/it]
675
+
676
  43%|████▎ | 85/196 [01:56<02:11, 1.18s/it]
677
+
678
  44%|████▍ | 86/196 [01:58<02:15, 1.23s/it]
679
+
680
  44%|████▍ | 87/196 [01:59<02:11, 1.20s/it]
681
+
682
  45%|████▍ | 88/196 [02:00<02:15, 1.25s/it]
683
+
684
  45%|████▌ | 89/196 [02:01<02:19, 1.30s/it]
685
+
686
  46%|████▌ | 90/196 [02:03<02:13, 1.26s/it]
687
+
688
  46%|████▋ | 91/196 [02:04<02:08, 1.22s/it]
689
+
690
  47%|████▋ | 92/196 [02:05<02:03, 1.18s/it]
691
+
692
  47%|████▋ | 93/196 [02:06<02:08, 1.25s/it]
693
+
694
  48%|████▊ | 94/196 [02:08<02:11, 1.29s/it]
695
+
696
  48%|████▊ | 95/196 [02:09<02:04, 1.23s/it]
697
+
698
  49%|████▉ | 96/196 [02:10<02:10, 1.30s/it]
699
+
700
  49%|████▉ | 97/196 [02:11<02:06, 1.28s/it]
701
+
702
  50%|█████ | 98/196 [02:13<02:10, 1.33s/it]
703
+
704
  51%|█████ | 99/196 [02:14<01:57, 1.21s/it]
705
+
706
  51%|█████ | 100/196 [02:15<01:43, 1.08s/it]
707
+
708
  52%|█████▏ | 101/196 [02:15<01:35, 1.00s/it]
709
+
710
  52%|█████▏ | 102/196 [02:17<01:40, 1.07s/it]
711
+
712
  53%|█████▎ | 103/196 [02:18<01:46, 1.15s/it]
713
+
714
  53%|█████▎ | 104/196 [02:20<02:03, 1.34s/it]
715
+
716
  54%|█████▎ | 105/196 [02:21<02:06, 1.39s/it]
717
+
718
  54%|█████▍ | 106/196 [02:23<02:05, 1.40s/it]
719
+
720
  55%|█████▍ | 107/196 [02:24<01:57, 1.32s/it]
721
+
722
  55%|█████▌ | 108/196 [02:25<01:42, 1.17s/it]
723
+
724
  56%|█████▌ | 109/196 [02:26<01:34, 1.09s/it]
725
+
726
  56%|█████▌ | 110/196 [02:26<01:29, 1.04s/it]
727
+
728
  57%|█████▋ | 111/196 [02:28<01:29, 1.06s/it]
729
+
730
  57%|█████▋ | 112/196 [02:29<01:31, 1.09s/it]
731
+
732
  58%|█████▊ | 113/196 [02:30<01:33, 1.13s/it]
733
+
734
  58%|█████▊ | 114/196 [02:31<01:25, 1.04s/it]
735
+
736
  59%|█████▊ | 115/196 [02:32<01:21, 1.00s/it]
737
+
738
  59%|█████▉ | 116/196 [02:33<01:18, 1.02it/s]
739
+
740
  60%|█████▉ | 117/196 [02:34<01:14, 1.06it/s]
741
+
742
  60%|██████ | 118/196 [02:34<01:06, 1.17it/s]
743
+
744
  61%|██████ | 119/196 [02:35<01:08, 1.13it/s]
745
+
746
  61%|██████ | 120/196 [02:36<01:11, 1.06it/s]
747
+
748
  62%|██████▏ | 121/196 [02:37<01:13, 1.02it/s]
749
+
750
  62%|██████▏ | 122/196 [02:38<01:14, 1.01s/it]
751
+
752
  63%|██████▎ | 123/196 [02:39<01:13, 1.00s/it]
753
+
754
  63%|██████▎ | 124/196 [02:40<01:10, 1.02it/s]
755
+
756
  64%|██████▍ | 125/196 [02:41<01:10, 1.01it/s]
757
+
758
  64%|██████▍ | 126/196 [02:43<01:18, 1.12s/it]
759
+
760
  65%|██████▍ | 127/196 [02:44<01:17, 1.12s/it]
761
+
762
  65%|██████▌ | 128/196 [02:45<01:12, 1.06s/it]
763
+
764
  66%|██████▌ | 129/196 [02:46<01:11, 1.06s/it]
765
+
766
  66%|██████▋ | 130/196 [02:47<01:09, 1.05s/it]
767
+
768
  67%|██████▋ | 131/196 [02:48<01:08, 1.06s/it]
769
+
770
  67%|██████▋ | 132/196 [02:49<01:03, 1.01it/s]
771
+
772
  68%|██████▊ | 133/196 [02:50<01:01, 1.02it/s]
773
+
774
  68%|██████▊ | 134/196 [02:51<01:06, 1.07s/it]
775
+
776
  69%|██████▉ | 135/196 [02:52<01:03, 1.04s/it]
777
+
778
  69%|██████▉ | 136/196 [02:53<00:59, 1.00it/s]
779
+
780
  70%|██████▉ | 137/196 [02:54<00:58, 1.00it/s]
781
+
782
  70%|███████ | 138/196 [02:55<00:58, 1.01s/it]
783
+
784
  71%|███████ | 139/196 [02:56<00:57, 1.01s/it]
785
+
786
  71%|███████▏ | 140/196 [02:57<00:56, 1.00s/it]
787
+
788
  72%|███████▏ | 141/196 [02:58<00:55, 1.01s/it]
789
+
790
  72%|███████▏ | 142/196 [02:59<00:56, 1.04s/it]
791
+
792
  73%|███████▎ | 143/196 [03:00<00:59, 1.13s/it]
793
+
794
  73%|███████▎ | 144/196 [03:01<00:54, 1.05s/it]
795
+
796
  74%|███████▍ | 145/196 [03:02<00:49, 1.04it/s]
797
+
798
  74%|███████▍ | 146/196 [03:03<00:44, 1.11it/s]
799
+
800
  75%|███████▌ | 147/196 [03:04<00:44, 1.10it/s]
801
+
802
  76%|███████▌ | 148/196 [03:05<00:43, 1.10it/s]
803
+
804
  76%|███████▌ | 149/196 [03:05<00:41, 1.14it/s]
805
+
806
  77%|███████▋ | 150/196 [03:06<00:42, 1.09it/s]
807
+
808
  77%|███████▋ | 151/196 [03:07<00:43, 1.02it/s]
809
+
810
  78%|███████▊ | 152/196 [03:08<00:42, 1.04it/s]
811
+
812
  78%|███████▊ | 153/196 [03:09<00:41, 1.05it/s]
813
+
814
  79%|███████▊ | 154/196 [03:10<00:41, 1.02it/s]
815
+
816
  79%|███████▉ | 155/196 [03:12<00:43, 1.05s/it]
817
+
818
  80%|███████▉ | 156/196 [03:13<00:49, 1.25s/it]
819
+
820
  80%|████████ | 157/196 [03:15<00:53, 1.37s/it]
821
+
822
  81%|████████ | 158/196 [03:16<00:46, 1.22s/it]
823
+
824
  81%|████████ | 159/196 [03:17<00:41, 1.11s/it]
825
+
826
  82%|████████▏ | 160/196 [03:18<00:37, 1.04s/it]
827
+
828
  82%|████████▏ | 161/196 [03:19<00:36, 1.04s/it]
829
+
830
  83%|████████▎ | 162/196 [03:20<00:35, 1.04s/it]
831
+
832
  83%|████████▎ | 163/196 [03:21<00:33, 1.01s/it]
833
+
834
  84%|████████▎ | 164/196 [03:22<00:31, 1.01it/s]
835
+
836
  84%|████████▍ | 165/196 [03:23<00:32, 1.04s/it]
837
+
838
  85%|████████▍ | 166/196 [03:24<00:30, 1.01s/it]
839
+
840
  85%|████████▌ | 167/196 [03:25<00:28, 1.02it/s]
841
+
842
  86%|████████▌ | 168/196 [03:25<00:26, 1.07it/s]
843
+
844
  86%|████████▌ | 169/196 [03:26<00:25, 1.05it/s]
845
+
846
  87%|████████▋ | 170/196 [03:28<00:27, 1.06s/it]
847
+
848
  87%|████████▋ | 171/196 [03:29<00:25, 1.02s/it]
849
+
850
  88%|████████▊ | 172/196 [03:30<00:25, 1.05s/it]
851
+
852
  88%|████████▊ | 173/196 [03:31<00:23, 1.04s/it]
853
+
854
  89%|████████▉ | 174/196 [03:32<00:24, 1.09s/it]
855
+
856
  89%|████████▉ | 175/196 [03:34<00:27, 1.32s/it]
857
+
858
  90%|████████▉ | 176/196 [03:38<00:41, 2.08s/it]
859
+
860
  90%|█████████ | 177/196 [03:41<00:46, 2.43s/it]
861
+
862
  91%|█████████ | 178/196 [03:44<00:47, 2.64s/it]
863
+
864
  91%|█████████▏| 179/196 [03:48<00:50, 2.96s/it]
865
+
866
  92%|█████████▏| 180/196 [03:49<00:37, 2.36s/it]
867
+
868
  92%|█████████▏| 181/196 [03:50<00:29, 2.00s/it]
869
+
870
  93%|█████████▎| 182/196 [03:51<00:23, 1.69s/it]
871
+
872
  93%|█████████▎| 183/196 [03:53<00:22, 1.75s/it]
873
+
874
  94%|█████████▍| 184/196 [03:54<00:18, 1.50s/it]
875
+
876
  94%|█████████▍| 185/196 [03:55<00:15, 1.43s/it]
877
+
878
  95%|█████████▍| 186/196 [03:57<00:15, 1.50s/it]
879
+
880
  95%|█████████▌| 187/196 [03:58<00:12, 1.38s/it]
881
+
882
  96%|█████████▌| 188/196 [03:59<00:09, 1.24s/it]
883
+
884
  96%|█████████▋| 189/196 [04:00<00:08, 1.19s/it]
885
+
886
  97%|█████████▋| 190/196 [04:01<00:06, 1.12s/it]
887
+
888
  97%|█████████▋| 191/196 [04:01<00:05, 1.03s/it]
889
+
890
  98%|█████████▊| 192/196 [04:03<00:04, 1.03s/it]
891
+
892
  98%|█████████▊| 193/196 [04:04<00:03, 1.04s/it]
893
+
894
  99%|█████████▉| 194/196 [04:05<00:02, 1.04s/it]
895
+
896
  99%|█████████▉| 195/196 [04:06<00:01, 1.01s/it]
897
+
898
 
899
+
900
 
901
  2%|▏ | 100/5000 [10:40<2:44:15, 2.01s/it]
902
+
903
+
904
  
905
  2%|▏ | 101/5000 [10:56<112:44:57, 82.85s/it]
906
 
907
  2%|▏ | 101/5000 [10:56<112:44:57, 82.85s/it]
908
  2%|▏ | 102/5000 [11:05<82:27:49, 60.61s/it]
909
 
910
  2%|▏ | 102/5000 [11:05<82:27:49, 60.61s/it]
911
  2%|▏ | 103/5000 [11:13<60:46:59, 44.68s/it]
912
 
913
  2%|▏ | 103/5000 [11:13<60:46:59, 44.68s/it]
914
  2%|▏ | 104/5000 [11:19<45:14:51, 33.27s/it]
915
 
916
  2%|▏ | 104/5000 [11:19<45:14:51, 33.27s/it]
917
  2%|▏ | 105/5000 [11:26<34:13:05, 25.17s/it]
918
 
919
  2%|▏ | 105/5000 [11:26<34:13:05, 25.17s/it]
920
  2%|▏ | 106/5000 [11:32<26:22:42, 19.40s/it]
921
 
922
  2%|▏ | 106/5000 [11:32<26:22:42, 19.40s/it]
923
  2%|▏ | 107/5000 [11:37<20:40:35, 15.21s/it]
924
 
925
  2%|▏ | 107/5000 [11:37<20:40:35, 15.21s/it]
926
  2%|▏ | 108/5000 [11:42<16:39:34, 12.26s/it]
927
 
928
  2%|▏ | 108/5000 [11:42<16:39:34, 12.26s/it]
929
  2%|▏ | 109/5000 [11:47<13:41:45, 10.08s/it]
930
 
931
  2%|▏ | 109/5000 [11:47<13:41:45, 10.08s/it]
932
  2%|▏ | 110/5000 [11:52<11:31:20, 8.48s/it]
933
 
934
  2%|▏ | 110/5000 [11:52<11:31:20, 8.48s/it]
935
  2%|▏ | 111/5000 [11:57<10:00:45, 7.37s/it]
936
 
937
  2%|▏ | 111/5000 [11:57<10:00:45, 7.37s/it]
938
  2%|▏ | 112/5000 [12:02<8:54:57, 6.57s/it]
939
 
940
  2%|▏ | 112/5000 [12:02<8:54:57, 6.57s/it]
941
  2%|▏ | 113/5000 [12:06<8:00:26, 5.90s/it]
942
 
943
  2%|▏ | 113/5000 [12:06<8:00:26, 5.90s/it]
944
  2%|▏ | 114/5000 [12:10<7:16:59, 5.37s/it]
945
 
946
  2%|▏ | 114/5000 [12:10<7:16:59, 5.37s/it]
947
  2%|▏ | 115/5000 [12:14<6:46:31, 4.99s/it]
948
 
949
  2%|▏ | 115/5000 [12:14<6:46:31, 4.99s/it]
950
  2%|▏ | 116/5000 [12:18<6:25:16, 4.73s/it]
951
 
952
  2%|▏ | 116/5000 [12:18<6:25:16, 4.73s/it]
953
  2%|▏ | 117/5000 [12:22<6:07:22, 4.51s/it]
954
 
955
  2%|▏ | 117/5000 [12:22<6:07:22, 4.51s/it]
956
  2%|▏ | 118/5000 [12:26<5:43:11, 4.22s/it]
957
 
958
  2%|▏ | 118/5000 [12:26<5:43:11, 4.22s/it]
959
  2%|▏ | 119/5000 [12:29<5:25:03, 4.00s/it]
960
 
961
  2%|▏ | 119/5000 [12:29<5:25:03, 4.00s/it]
962
  2%|▏ | 120/5000 [12:33<5:12:22, 3.84s/it]
963
 
964
  2%|▏ | 120/5000 [12:33<5:12:22, 3.84s/it]
965
  2%|▏ | 121/5000 [12:36<5:03:16, 3.73s/it]
966
 
967
  2%|▏ | 121/5000 [12:36<5:03:16, 3.73s/it]
968
  2%|▏ | 122/5000 [12:40<4:56:44, 3.65s/it]
969
 
970
  2%|▏ | 122/5000 [12:40<4:56:44, 3.65s/it]
971
  2%|▏ | 123/5000 [12:43<4:52:45, 3.60s/it]
972
 
973
  2%|▏ | 123/5000 [12:43<4:52:45, 3.60s/it]
974
  2%|▏ | 124/5000 [12:47<4:47:27, 3.54s/it]
975
 
976
  2%|▏ | 124/5000 [12:47<4:47:27, 3.54s/it]
977
  2%|▎ | 125/5000 [12:49<4:31:41, 3.34s/it]
978
 
979
  2%|▎ | 125/5000 [12:49<4:31:41, 3.34s/it]
980
  3%|▎ | 126/5000 [12:52<4:20:32, 3.21s/it]
981
 
982
  3%|▎ | 126/5000 [12:52<4:20:32, 3.21s/it]
983
  3%|▎ | 127/5000 [12:55<4:12:56, 3.11s/it]
984
 
985
  3%|▎ | 127/5000 [12:55<4:12:56, 3.11s/it]
986
  3%|▎ | 128/5000 [12:58<4:07:34, 3.05s/it]
987
 
988
  3%|▎ | 128/5000 [12:58<4:07:34, 3.05s/it]
989
  3%|▎ | 129/5000 [13:01<4:03:57, 3.01s/it]
990
 
991
  3%|▎ | 129/5000 [13:01<4:03:57, 3.01s/it]
992
  3%|▎ | 130/5000 [13:04<4:01:39, 2.98s/it]
993
 
994
  3%|▎ | 130/5000 [13:04<4:01:39, 2.98s/it]
995
  3%|▎ | 131/5000 [13:07<3:59:31, 2.95s/it]
996
 
997
  3%|▎ | 131/5000 [13:07<3:59:31, 2.95s/it]
998
  3%|▎ | 132/5000 [13:10<3:54:56, 2.90s/it]
999
 
1000
  3%|▎ | 132/5000 [13:10<3:54:56, 2.90s/it]
1001
  3%|▎ | 133/5000 [13:12<3:41:04, 2.73s/it]
1002
 
1003
  3%|▎ | 133/5000 [13:12<3:41:04, 2.73s/it]
1004
  3%|▎ | 134/5000 [13:14<3:31:40, 2.61s/it]
1005
 
1006
  3%|▎ | 134/5000 [13:14<3:31:40, 2.61s/it]
1007
  3%|▎ | 135/5000 [13:17<3:25:01, 2.53s/it]
1008
 
1009
  3%|▎ | 135/5000 [13:17<3:25:01, 2.53s/it]
1010
  3%|▎ | 136/5000 [13:19<3:20:37, 2.47s/it]
1011
 
1012
  3%|▎ | 136/5000 [13:19<3:20:37, 2.47s/it]
1013
  3%|▎ | 137/5000 [13:21<3:17:14, 2.43s/it]
1014
 
1015
  3%|▎ | 137/5000 [13:21<3:17:14, 2.43s/it]
1016
  3%|▎ | 138/5000 [13:24<3:15:02, 2.41s/it]
1017
 
1018
  3%|▎ | 138/5000 [13:24<3:15:02, 2.41s/it]
1019
  3%|▎ | 139/5000 [13:26<3:13:07, 2.38s/it]
1020
 
1021
  3%|▎ | 139/5000 [13:26<3:13:07, 2.38s/it]
1022
  3%|▎ | 140/5000 [13:28<3:08:53, 2.33s/it]
1023
 
1024
  3%|▎ | 140/5000 [13:28<3:08:53, 2.33s/it]
1025
  3%|▎ | 141/5000 [13:30<2:54:40, 2.16s/it]
1026
 
1027
  3%|▎ | 141/5000 [13:30<2:54:40, 2.16s/it]
1028
  3%|▎ | 142/5000 [13:32<2:44:45, 2.03s/it]
1029
 
1030
  3%|▎ | 142/5000 [13:32<2:44:45, 2.03s/it]
1031
  3%|▎ | 143/5000 [13:33<2:37:59, 1.95s/it]
1032
 
1033
  3%|▎ | 143/5000 [13:33<2:37:59, 1.95s/it]
1034
  3%|▎ | 144/5000 [13:35<2:33:23, 1.90s/it]
1035
 
1036
  3%|▎ | 144/5000 [13:35<2:33:23, 1.90s/it]
1037
  3%|▎ | 145/5000 [13:37<2:29:54, 1.85s/it]
1038
 
1039
  3%|▎ | 145/5000 [13:37<2:29:54, 1.85s/it]
1040
  3%|▎ | 146/5000 [13:39<2:26:05, 1.81s/it]
1041
 
1042
  3%|▎ | 146/5000 [13:39<2:26:05, 1.81s/it]
1043
  3%|▎ | 147/5000 [13:40<2:16:10, 1.68s/it]
1044
 
1045
  3%|▎ | 147/5000 [13:40<2:16:10, 1.68s/it]
1046
  3%|▎ | 148/5000 [13:41<2:04:16, 1.54s/it]
1047
 
1048
  3%|▎ | 148/5000 [13:41<2:04:16, 1.54s/it]
1049
  3%|▎ | 149/5000 [13:42<1:55:55, 1.43s/it]
1050
 
1051
  3%|▎ | 149/5000 [13:42<1:55:55, 1.43s/it]
1052
  3%|▎ | 150/5000 [13:46<2:35:29, 1.92s/it]
1053
 
1054
  3%|▎ | 150/5000 [13:46<2:35:29, 1.92s/it]
1055
  3%|▎ | 151/5000 [14:03<8:56:47, 6.64s/it]
1056
 
1057
  3%|▎ | 151/5000 [14:03<8:56:47, 6.64s/it]
1058
  3%|▎ | 152/5000 [14:13<10:06:23, 7.50s/it]
1059
 
1060
  3%|▎ | 152/5000 [14:13<10:06:23, 7.50s/it]
1061
  3%|▎ | 153/5000 [14:21<10:23:38, 7.72s/it]
1062
 
1063
  3%|▎ | 153/5000 [14:21<10:23:38, 7.72s/it]
1064
  3%|▎ | 154/5000 [14:28<10:11:29, 7.57s/it]
1065
 
1066
  3%|▎ | 154/5000 [14:28<10:11:29, 7.57s/it]
1067
  3%|▎ | 155/5000 [14:35<9:52:15, 7.33s/it]
1068
 
1069
  3%|▎ | 155/5000 [14:35<9:52:15, 7.33s/it]
1070
  3%|▎ | 156/5000 [14:41<9:25:01, 7.00s/it]
1071
 
1072
  3%|▎ | 156/5000 [14:41<9:25:01, 7.00s/it]
1073
  3%|▎ | 157/5000 [14:47<8:57:19, 6.66s/it]
1074
 
1075
  3%|▎ | 157/5000 [14:47<8:57:19, 6.66s/it]
1076
  3%|▎ | 158/5000 [14:52<8:26:30, 6.28s/it]
1077
 
1078
  3%|▎ | 158/5000 [14:52<8:26:30, 6.28s/it]
1079
  3%|▎ | 159/5000 [14:58<8:04:16, 6.00s/it]
1080
 
1081
  3%|▎ | 159/5000 [14:58<8:04:16, 6.00s/it]
1082
  3%|▎ | 160/5000 [15:03<7:39:05, 5.69s/it]
1083
 
1084
  3%|▎ | 160/5000 [15:03<7:39:05, 5.69s/it]
1085
  3%|▎ | 161/5000 [15:07<7:15:00, 5.39s/it]
1086
 
1087
  3%|▎ | 161/5000 [15:07<7:15:00, 5.39s/it]
1088
  3%|▎ | 162/5000 [15:12<6:58:22, 5.19s/it]
1089
 
1090
  3%|▎ | 162/5000 [15:12<6:58:22, 5.19s/it]
1091
  3%|▎ | 163/5000 [15:16<6:36:59, 4.92s/it]
1092
 
1093
  3%|▎ | 163/5000 [15:16<6:36:59, 4.92s/it]
1094
  3%|▎ | 164/5000 [15:20<6:16:31, 4.67s/it]
1095
 
1096
  3%|▎ | 164/5000 [15:21<6:16:31, 4.67s/it]
1097
  3%|▎ | 165/5000 [15:25<6:02:06, 4.49s/it]
1098
 
1099
  3%|▎ | 165/5000 [15:25<6:02:06, 4.49s/it]
1100
  3%|▎ | 166/5000 [15:29<5:51:45, 4.37s/it]
1101
 
1102
  3%|▎ | 166/5000 [15:29<5:51:45, 4.37s/it]
1103
  3%|▎ | 167/5000 [15:33<5:44:41, 4.28s/it]
1104
 
1105
  3%|▎ | 167/5000 [15:33<5:44:41, 4.28s/it]
1106
  3%|▎ | 168/5000 [15:37<5:37:26, 4.19s/it]
1107
 
1108
  3%|▎ | 168/5000 [15:37<5:37:26, 4.19s/it]
1109
  3%|▎ | 169/5000 [15:40<5:19:41, 3.97s/it]
1110
 
1111
  3%|▎ | 169/5000 [15:40<5:19:41, 3.97s/it]
1112
  3%|▎ | 170/5000 [15:44<5:07:48, 3.82s/it]
1113
 
1114
  3%|▎ | 170/5000 [15:44<5:07:48, 3.82s/it]
1115
  3%|▎ | 171/5000 [15:47<4:58:58, 3.71s/it]
1116
 
1117
  3%|▎ | 171/5000 [15:47<4:58:58, 3.71s/it]
1118
  3%|▎ | 172/5000 [15:51<4:52:54, 3.64s/it]
1119
 
1120
  3%|▎ | 172/5000 [15:51<4:52:54, 3.64s/it]
1121
  3%|▎ | 173/5000 [15:54<4:48:25, 3.59s/it]
1122
 
1123
  3%|▎ | 173/5000 [15:54<4:48:25, 3.59s/it]
1124
  3%|▎ | 174/5000 [15:57<4:44:43, 3.54s/it]
1125
 
1126
  3%|▎ | 174/5000 [15:57<4:44:43, 3.54s/it]
1127
  4%|▎ | 175/5000 [16:01<4:33:54, 3.41s/it]
1128
 
1129
  4%|▎ | 175/5000 [16:01<4:33:54, 3.41s/it]
1130
  4%|▎ | 176/5000 [16:03<4:21:31, 3.25s/it]
1131
 
1132
  4%|▎ | 176/5000 [16:03<4:21:31, 3.25s/it]
1133
  4%|▎ | 177/5000 [16:06<4:12:46, 3.14s/it]
1134
 
1135
  4%|▎ | 177/5000 [16:06<4:12:46, 3.14s/it]
1136
  4%|▎ | 178/5000 [16:09<4:06:53, 3.07s/it]
1137
 
1138
  4%|▎ | 178/5000 [16:09<4:06:53, 3.07s/it]
1139
  4%|▎ | 179/5000 [16:12<4:02:20, 3.02s/it]
1140
 
1141
  4%|▎ | 179/5000 [16:12<4:02:20, 3.02s/it]
1142
  4%|▎ | 180/5000 [16:15<3:59:14, 2.98s/it]
1143
 
1144
  4%|▎ | 180/5000 [16:15<3:59:14, 2.98s/it]
1145
  4%|▎ | 181/5000 [16:18<3:57:04, 2.95s/it]
1146
 
1147
  4%|▎ | 181/5000 [16:18<3:57:04, 2.95s/it]
1148
  4%|▎ | 182/5000 [16:21<3:52:28, 2.90s/it]
1149
 
1150
  4%|▎ | 182/5000 [16:21<3:52:28, 2.90s/it]
1151
  4%|▎ | 183/5000 [16:23<3:39:25, 2.73s/it]
1152
 
1153
  4%|▎ | 183/5000 [16:23<3:39:25, 2.73s/it]
1154
  4%|▎ | 184/5000 [16:25<3:29:44, 2.61s/it]
1155
 
1156
  4%|▎ | 184/5000 [16:25<3:29:44, 2.61s/it]
1157
  4%|▎ | 185/5000 [16:28<3:23:06, 2.53s/it]
1158
 
1159
  4%|▎ | 185/5000 [16:28<3:23:06, 2.53s/it]
1160
  4%|▎ | 186/5000 [16:30<3:18:13, 2.47s/it]
1161
 
1162
  4%|▎ | 186/5000 [16:30<3:18:13, 2.47s/it]
1163
  4%|▎ | 187/5000 [16:32<3:14:56, 2.43s/it]
1164
 
1165
  4%|▎ | 187/5000 [16:32<3:14:56, 2.43s/it]
1166
  4%|▍ | 188/5000 [16:35<3:12:59, 2.41s/it]
1167
 
1168
  4%|▍ | 188/5000 [16:35<3:12:59, 2.41s/it]
1169
  4%|▍ | 189/5000 [16:37<3:11:18, 2.39s/it]
1170
 
1171
  4%|▍ | 189/5000 [16:37<3:11:18, 2.39s/it]
1172
  4%|▍ | 190/5000 [16:39<3:01:04, 2.26s/it]
1173
 
1174
  4%|▍ | 190/5000 [16:39<3:01:04, 2.26s/it]
1175
  4%|▍ | 191/5000 [16:41<2:48:45, 2.11s/it]
1176
 
1177
  4%|▍ | 191/5000 [16:41<2:48:45, 2.11s/it]
1178
  4%|▍ | 192/5000 [16:43<2:40:21, 2.00s/it]
1179
 
1180
  4%|▍ | 192/5000 [16:43<2:40:21, 2.00s/it]
1181
  4%|▍ | 193/5000 [16:44<2:34:24, 1.93s/it]
1182
 
1183
  4%|▍ | 193/5000 [16:44<2:34:24, 1.93s/it]
1184
  4%|▍ | 194/5000 [16:46<2:30:21, 1.88s/it]
1185
 
1186
  4%|▍ | 194/5000 [16:46<2:30:21, 1.88s/it]
1187
  4%|▍ | 195/5000 [16:48<2:27:19, 1.84s/it]
1188
 
1189
  4%|▍ | 195/5000 [16:48<2:27:19, 1.84s/it]
1190
  4%|▍ | 196/5000 [16:50<2:24:45, 1.81s/it]
1191
 
1192
  4%|▍ | 196/5000 [16:50<2:24:45, 1.81s/it]
1193
  4%|▍ | 197/5000 [16:51<2:14:45, 1.68s/it]
1194
 
1195
  4%|▍ | 197/5000 [16:51<2:14:45, 1.68s/it]
1196
  4%|▍ | 198/5000 [16:52<2:03:05, 1.54s/it]
1197
 
1198
  4%|▍ | 198/5000 [16:52<2:03:05, 1.54s/it]
1199
  4%|▍ | 199/5000 [16:53<1:54:58, 1.44s/it]
1200
 
1201
  4%|▍ | 199/5000 [16:53<1:54:58, 1.44s/it]
1202
  4%|▍ | 200/5000 [16:56<2:35:34, 1.94s/it]
1203
 
1204
  4%|▍ | 200/5000 [16:56<2:35:34, 1.94s/it]predictions output file at: /scratch/elec/puhe/p/palp3/MUCS/mucs_language_segregated_data/trainwithtagsv2_warmup500_s300_shuff100/predictionswtagsv2_indicw2v_ad0_3_hd_02_featd_0_2_lr6e-4_warmup500_s300_shuff100.txt
1205
+ Printing predictions for a few samples:
1206
+ Sample 1:
1207
+ Reference: लिबर ऑफिस ∅ impress ∇ में एक प्रस्तुति ∅ document ∇ बनाना और बुनियादी ∅ formatting ∇ के इस ∅ spoken tutorial ∇ में आपका स्वागत है
1208
+ ######
1209
+
1210
+
1211
+ Prediction:
1212
+
1213
+
1214
+
1215
+ Sample 2:
1216
+ Reference: इस ∅ tutorial ∇ में हम ∅ impress window ∇ के भागों के बारे में सीखेंगे और कैसे स्लाइड इन्सर्ट करें और कॉपी करें फॉन्ट तथा फॉन्ट को फॉर्मेट करना सीखेंगे
1217
+ ######
1218
+
1219
+
1220
+ Prediction: ee
1221
+
1222
+
1223
+
1224
+ Sample 3:
1225
+ Reference: यहाँ हम अपने ऑपरेटिंग सिस्टम के रूप में gnu/linux और लिबरऑफिस वर्जन ∅ 334 ∇ का उपयोग कर रहे हैं
1226
+ ######
1227
+
1228
+
1229
+ Prediction:
1230
+
1231
+
1232
+
1233
+ Sample 4:
1234
+ Reference: चलिए अपनी प्रस्तुति प्रेजैटेशन ∅ sample impress open ∇ करते हैं जिसे पिछले ∅ tutorial ∇ में बनाया था
1235
+ ######
1236
+
1237
+
1238
+ Prediction:
1239
+
1240
+
1241
+
1242
+ Sample 5:
1243
+ Reference: चलिए देखते हैं कि ∅ screen ∇ पर क्या क्या है
1244
+ ######
1245
+
1246
+
1247
+ Prediction:
1248
+
1249
+
1250
+
1251
+ last Reference string यह स्क्रिप्ट लता द्वारा अनुवादित है आईआईटी मुंबई की ओर से मैं रवि कुमार अब आपसे विदा ���ेता हूँहमसे जुड़ने के लिए धन्यवाद
1252
+
1253
+
1254
+ last prediction string
1255
+ cleaned cs transcription pred and ref
1256
+
1257
+
1258
+ लिबर ऑफिस impress में एक प्रस्तुति document बनाना और बुनियादी formatting के इस spoken tutorial में आपका स्वागत है
1259
+ {'eval_loss': 3.756589651107788, 'eval_cer': 0.9993136082367011, 'eval_wer': 1.0, 'eval_runtime': 255.4395, 'eval_samples_per_second': 12.277, 'eval_steps_per_second': 0.767, 'epoch': 0.16}
1260
+ {'loss': 4.8265, 'grad_norm': 38.48965835571289, 'learning_rate': 0.00012119999999999999, 'epoch': 0.16}
1261
+ {'loss': 3.995, 'grad_norm': 14.283730506896973, 'learning_rate': 0.0001224, 'epoch': 0.16}
1262
+ {'loss': 3.7879, 'grad_norm': 8.576812744140625, 'learning_rate': 0.0001236, 'epoch': 0.16}
1263
+ {'loss': 3.9799, 'grad_norm': 21.387664794921875, 'learning_rate': 0.00012479999999999997, 'epoch': 0.17}
1264
+ {'loss': 3.7848, 'grad_norm': 2.427260398864746, 'learning_rate': 0.00012599999999999997, 'epoch': 0.17}
1265
+ {'loss': 3.7703, 'grad_norm': 1.3663990497589111, 'learning_rate': 0.00012719999999999997, 'epoch': 0.17}
1266
+ {'loss': 3.8065, 'grad_norm': 1.7364495992660522, 'learning_rate': 0.00012839999999999998, 'epoch': 0.17}
1267
+ {'loss': 3.6977, 'grad_norm': 3.71764874458313, 'learning_rate': 0.00012959999999999998, 'epoch': 0.17}
1268
+ {'loss': 3.705, 'grad_norm': 2.993403434753418, 'learning_rate': 0.00013079999999999998, 'epoch': 0.17}
1269
+ {'loss': 3.695, 'grad_norm': 3.1739470958709717, 'learning_rate': 0.00013199999999999998, 'epoch': 0.18}
1270
+ {'loss': 3.6989, 'grad_norm': 2.5547573566436768, 'learning_rate': 0.00013319999999999999, 'epoch': 0.18}
1271
+ {'loss': 3.6776, 'grad_norm': 1.4511839151382446, 'learning_rate': 0.0001344, 'epoch': 0.18}
1272
+ {'loss': 3.7139, 'grad_norm': 1.6091411113739014, 'learning_rate': 0.0001356, 'epoch': 0.18}
1273
+ {'loss': 3.6809, 'grad_norm': 1.7235424518585205, 'learning_rate': 0.0001368, 'epoch': 0.18}
1274
+ {'loss': 3.6593, 'grad_norm': 1.596807837486267, 'learning_rate': 0.000138, 'epoch': 0.18}
1275
+ {'loss': 3.625, 'grad_norm': 1.3405994176864624, 'learning_rate': 0.0001392, 'epoch': 0.19}
1276
+ {'loss': 3.7073, 'grad_norm': 1.2730778455734253, 'learning_rate': 0.0001404, 'epoch': 0.19}
1277
+ {'loss': 3.6353, 'grad_norm': 5.288895130157471, 'learning_rate': 0.00014159999999999997, 'epoch': 0.19}
1278
+ {'loss': 3.7244, 'grad_norm': 1.5006777048110962, 'learning_rate': 0.00014279999999999997, 'epoch': 0.19}
1279
+ {'loss': 3.6375, 'grad_norm': 2.001701831817627, 'learning_rate': 0.00014399999999999998, 'epoch': 0.19}
1280
+ {'loss': 3.7359, 'grad_norm': 1.6458996534347534, 'learning_rate': 0.00014519999999999998, 'epoch': 0.19}
1281
+ {'loss': 3.6629, 'grad_norm': 3.332979917526245, 'learning_rate': 0.00014639999999999998, 'epoch': 0.2}
1282
+ {'loss': 3.6118, 'grad_norm': 1.4556349515914917, 'learning_rate': 0.00014759999999999998, 'epoch': 0.2}
1283
+ {'loss': 3.6565, 'grad_norm': 1.3131866455078125, 'learning_rate': 0.00014879999999999998, 'epoch': 0.2}
1284
+ {'loss': 3.6411, 'grad_norm': 3.0149691104888916, 'learning_rate': 0.00015, 'epoch': 0.2}
1285
+ {'loss': 3.7214, 'grad_norm': 7.048384189605713, 'learning_rate': 0.0001512, 'epoch': 0.2}
1286
+ {'loss': 3.6838, 'grad_norm': 4.727232456207275, 'learning_rate': 0.0001524, 'epoch': 0.2}
1287
+ {'loss': 3.6321, 'grad_norm': 4.120789051055908, 'learning_rate': 0.0001536, 'epoch': 0.2}
1288
+ {'loss': 3.6596, 'grad_norm': 2.701378345489502, 'learning_rate': 0.0001548, 'epoch': 0.21}
1289
+ {'loss': 3.6455, 'grad_norm': 1.1040818691253662, 'learning_rate': 0.000156, 'epoch': 0.21}
1290
+ {'loss': 3.6392, 'grad_norm': 2.2652924060821533, 'learning_rate': 0.0001572, 'epoch': 0.21}
1291
+ {'loss': 3.6564, 'grad_norm': 1.944316029548645, 'learning_rate': 0.0001584, 'epoch': 0.21}
1292
+ {'loss': 3.6666, 'grad_norm': 2.841169834136963, 'learning_rate': 0.0001596, 'epoch': 0.21}
1293
+ {'loss': 3.6769, 'grad_norm': 1.1487207412719727, 'learning_rate': 0.0001608, 'epoch': 0.21}
1294
+ {'loss': 3.6356, 'grad_norm': 2.734001636505127, 'learning_rate': 0.000162, 'epoch': 0.22}
1295
+ {'loss': 3.7166, 'grad_norm': 7.825503349304199, 'learning_rate': 0.0001632, 'epoch': 0.22}
1296
+ {'loss': 3.6611, 'grad_norm': 2.640146493911743, 'learning_rate': 0.0001644, 'epoch': 0.22}
1297
+ {'loss': 3.787, 'grad_norm': 6.790426254272461, 'learning_rate': 0.0001656, 'epoch': 0.22}
1298
+ {'loss': 3.7059, 'grad_norm': 5.147435665130615, 'learning_rate': 0.0001668, 'epoch': 0.22}
1299
+ {'loss': 3.6741, 'grad_norm': 1.0099550485610962, 'learning_rate': 0.000168, 'epoch': 0.22}
1300
+ {'loss': 3.5931, 'grad_norm': 0.6957960724830627, 'learning_rate': 0.00016919999999999997, 'epoch': 0.23}
1301
+ {'loss': 3.6941, 'grad_norm': 0.8608220219612122, 'learning_rate': 0.00017039999999999997, 'epoch': 0.23}
1302
+ {'loss': 3.6514, 'grad_norm': 1.106878399848938, 'learning_rate': 0.00017159999999999997, 'epoch': 0.23}
1303
+ {'loss': 3.6309, 'grad_norm': 1.4964262247085571, 'learning_rate': 0.00017279999999999997, 'epoch': 0.23}
1304
+ {'loss': 3.5667, 'grad_norm': 1.8776533603668213, 'learning_rate': 0.00017399999999999997, 'epoch': 0.23}
1305
+ {'loss': 3.6641, 'grad_norm': 1.1998003721237183, 'learning_rate': 0.00017519999999999998, 'epoch': 0.23}
1306
+ {'loss': 3.6057, 'grad_norm': 0.7275452017784119, 'learning_rate': 0.00017639999999999998, 'epoch': 0.24}
1307
+ {'loss': 3.6273, 'grad_norm': 1.6733245849609375, 'learning_rate': 0.00017759999999999998, 'epoch': 0.24}
1308
+ {'loss': 3.6672, 'grad_norm': 1.0381203889846802, 'learning_rate': 0.00017879999999999998, 'epoch': 0.24}
1309
+ {'loss': 3.7225, 'grad_norm': 2.804208993911743, 'learning_rate': 0.00017999999999999998, 'epoch': 0.24}
1310
+ {'loss': 4.8053, 'grad_norm': 37.25443649291992, 'learning_rate': 0.00018119999999999999, 'epoch': 0.24}
1311
+ {'loss': 3.8555, 'grad_norm': 12.150286674499512, 'learning_rate': 0.0001824, 'epoch': 0.24}
1312
+ {'loss': 3.8873, 'grad_norm': 9.963776588439941, 'learning_rate': 0.0001836, 'epoch': 0.24}
1313
+ {'loss': 3.715, 'grad_norm': 4.422680854797363, 'learning_rate': 0.0001848, 'epoch': 0.25}
1314
+ {'loss': 3.9109, 'grad_norm': 7.834131717681885, 'learning_rate': 0.000186, 'epoch': 0.25}
1315
+ {'loss': 3.7019, 'grad_norm': 6.25940465927124, 'learning_rate': 0.0001872, 'epoch': 0.25}
1316
+ {'loss': 3.7603, 'grad_norm': 8.240117073059082, 'learning_rate': 0.00018839999999999997, 'epoch': 0.25}
1317
+ {'loss': 3.8329, 'grad_norm': 8.491787910461426, 'learning_rate': 0.00018959999999999997, 'epoch': 0.25}
1318
+ {'loss': 3.833, 'grad_norm': 8.898347854614258, 'learning_rate': 0.00019079999999999998, 'epoch': 0.25}
1319
+ {'loss': 3.7302, 'grad_norm': 8.100727081298828, 'learning_rate': 0.00019199999999999998, 'epoch': 0.26}
1320
+ {'loss': 3.6727, 'grad_norm': 6.175380229949951, 'learning_rate': 0.00019319999999999998, 'epoch': 0.26}
1321
+ {'loss': 3.6329, 'grad_norm': 3.0743486881256104, 'learning_rate': 0.00019439999999999998, 'epoch': 0.26}
1322
+ {'loss': 3.6433, 'grad_norm': 0.708350658416748, 'learning_rate': 0.00019559999999999998, 'epoch': 0.26}
1323
+ {'loss': 3.6271, 'grad_norm': 5.063936233520508, 'learning_rate': 0.00019679999999999999, 'epoch': 0.26}
1324
+ {'loss': 3.7516, 'grad_norm': 9.512127876281738, 'learning_rate': 0.000198, 'epoch': 0.26}
1325
+ {'loss': 3.73, 'grad_norm': 9.915925979614258, 'learning_rate': 0.0001992, 'epoch': 0.27}
1326
+ {'loss': 3.7317, 'grad_norm': 10.222678184509277, 'learning_rate': 0.0002004, 'epoch': 0.27}
1327
+ {'loss': 3.6102, 'grad_norm': 3.9347116947174072, 'learning_rate': 0.0002016, 'epoch': 0.27}
1328
+ {'loss': 3.762, 'grad_norm': 7.799042701721191, 'learning_rate': 0.0002028, 'epoch': 0.27}
1329
+ {'loss': 3.5983, 'grad_norm': 1.9994298219680786, 'learning_rate': 0.000204, 'epoch': 0.27}
1330
+ {'loss': 3.5832, 'grad_norm': 1.4586771726608276, 'learning_rate': 0.0002052, 'epoch': 0.27}
1331
+ {'loss': 3.5878, 'grad_norm': 2.8306543827056885, 'learning_rate': 0.00020639999999999998, 'epoch': 0.28}
1332
+ {'loss': 3.5579, 'grad_norm': 1.621443510055542, 'learning_rate': 0.00020759999999999998, 'epoch': 0.28}
1333
+ {'loss': 3.6199, 'grad_norm': 1.133254885673523, 'learning_rate': 0.00020879999999999998, 'epoch': 0.28}
1334
+ {'loss': 3.6086, 'grad_norm': 1.8510676622390747, 'learning_rate': 0.00020999999999999998, 'epoch': 0.28}
1335
+ {'loss': 3.5462, 'grad_norm': 1.194270133972168, 'learning_rate': 0.00021119999999999996, 'epoch': 0.28}
1336
+ {'loss': 3.4962, 'grad_norm': 1.256672739982605, 'learning_rate': 0.00021239999999999996, 'epoch': 0.28}
1337
+ {'loss': 3.5698, 'grad_norm': 1.386613368988037, 'learning_rate': 0.00021359999999999996, 'epoch': 0.28}
1338
+ {'loss': 3.5602, 'grad_norm': 3.1640172004699707, 'learning_rate': 0.00021479999999999996, 'epoch': 0.29}
1339
+ {'loss': 3.539, 'grad_norm': 1.1303514242172241, 'learning_rate': 0.00021599999999999996, 'epoch': 0.29}
1340
+ {'loss': 3.4954, 'grad_norm': 0.9495574831962585, 'learning_rate': 0.00021719999999999997, 'epoch': 0.29}
1341
+ {'loss': 3.5047, 'grad_norm': 1.191848635673523, 'learning_rate': 0.00021839999999999997, 'epoch': 0.29}
1342
+ {'loss': 3.584, 'grad_norm': 3.533217191696167, 'learning_rate': 0.00021959999999999997, 'epoch': 0.29}
1343
+ {'loss': 3.5125, 'grad_norm': 1.7034047842025757, 'learning_rate': 0.00022079999999999997, 'epoch': 0.29}
1344
+ {'loss': 3.5211, 'grad_norm': 2.700714111328125, 'learning_rate': 0.00022199999999999998, 'epoch': 0.3}
1345
+ {'loss': 3.5052, 'grad_norm': 1.1122382879257202, 'learning_rate': 0.00022319999999999998, 'epoch': 0.3}
1346
+ {'loss': 3.5532, 'grad_norm': 1.1783989667892456, 'learning_rate': 0.00022439999999999998, 'epoch': 0.3}
1347
+ {'loss': 3.475, 'grad_norm': 1.685303807258606, 'learning_rate': 0.00022559999999999998, 'epoch': 0.3}
1348
+ {'loss': 3.4674, 'grad_norm': 3.506218671798706, 'learning_rate': 0.00022679999999999998, 'epoch': 0.3}
1349
+ {'loss': 3.4742, 'grad_norm': 1.5587297677993774, 'learning_rate': 0.00022799999999999999, 'epoch': 0.3}
1350
+ {'loss': 3.4753, 'grad_norm': 1.766122817993164, 'learning_rate': 0.0002292, 'epoch': 0.31}
1351
+ {'loss': 3.5442, 'grad_norm': 2.4157192707061768, 'learning_rate': 0.0002304, 'epoch': 0.31}
1352
+ {'loss': 3.5577, 'grad_norm': 1.7758774757385254, 'learning_rate': 0.0002316, 'epoch': 0.31}
1353
+ {'loss': 3.4964, 'grad_norm': 1.9026403427124023, 'learning_rate': 0.0002328, 'epoch': 0.31}
1354
+ {'loss': 3.4947, 'grad_norm': 1.9833109378814697, 'learning_rate': 0.000234, 'epoch': 0.31}
1355
+ {'loss': 3.4913, 'grad_norm': 1.6161296367645264, 'learning_rate': 0.0002352, 'epoch': 0.31}
1356
+ {'loss': 3.3045, 'grad_norm': 1.121405839920044, 'learning_rate': 0.0002364, 'epoch': 0.32}
1357
+ {'loss': 3.4437, 'grad_norm': 1.822557806968689, 'learning_rate': 0.0002376, 'epoch': 0.32}
1358
+ {'loss': 3.5102, 'grad_norm': 1.3127074241638184, 'learning_rate': 0.0002388, 'epoch': 0.32}
1359
+ {'loss': 3.7032, 'grad_norm': nan, 'learning_rate': 0.00023999999999999998, 'epoch': 0.32}
1360
+
1361
+
1362
  0%| | 0/196 [00:00<?, ?it/s]
1363
+
1364
  1%| | 2/196 [00:00<01:31, 2.11it/s]
1365
+
1366
  2%|▏ | 3/196 [00:02<02:30, 1.28it/s]
1367
+
1368
  2%|▏ | 4/196 [00:03<03:07, 1.02it/s]
1369
+
1370
  3%|▎ | 5/196 [00:04<03:25, 1.07s/it]
1371
+
1372
  3%|▎ | 6/196 [00:06<03:55, 1.24s/it]
1373
+
1374
  4%|▎ | 7/196 [00:07<04:13, 1.34s/it]
1375
+
1376
  4%|▍ | 8/196 [00:09<04:12, 1.34s/it]
1377
+
1378
  5%|▍ | 9/196 [00:11<05:32, 1.78s/it]
1379
+
1380
  5%|▌ | 10/196 [00:14<06:43, 2.17s/it]
1381
+
1382
  6%|▌ | 11/196 [00:18<07:48, 2.53s/it]
1383
+
1384
  6%|▌ | 12/196 [00:21<08:27, 2.76s/it]
1385
+
1386
  7%|▋ | 13/196 [00:23<07:20, 2.41s/it]
1387
+
1388
  7%|▋ | 14/196 [00:24<06:05, 2.01s/it]
1389
+
1390
  8%|▊ | 15/196 [00:25<05:03, 1.68s/it]
1391
+
1392
  8%|▊ | 16/196 [00:26<04:41, 1.57s/it]
1393
+
1394
  9%|▊ | 17/196 [00:27<04:27, 1.49s/it]
1395
+
1396
  9%|▉ | 18/196 [00:29<04:53, 1.65s/it]
1397
+
1398
  10%|▉ | 19/196 [00:33<06:17, 2.13s/it]
1399
+
1400
  10%|█ | 20/196 [00:35<06:49, 2.33s/it]
1401
+
1402
  11%|█ | 21/196 [00:38<06:58, 2.39s/it]
1403
+
1404
  11%|█ | 22/196 [00:40<06:43, 2.32s/it]
1405
+
1406
  12%|█▏ | 23/196 [00:42<06:15, 2.17s/it]
1407
+
1408
  12%|█▏ | 24/196 [00:43<04:57, 1.73s/it]
1409
+
1410
  13%|█▎ | 25/196 [00:43<04:09, 1.46s/it]
1411
+
1412
  13%|█▎ | 26/196 [00:44<03:36, 1.28s/it]
1413
+
1414
  14%|█▍ | 27/196 [00:45<03:18, 1.17s/it]
1415
+
1416
  14%|█▍ | 28/196 [00:46<03:08, 1.12s/it]
1417
+
1418
  15%|█▍ | 29/196 [00:47<03:07, 1.12s/it]
1419
+
1420
  15%|█▌ | 30/196 [00:48<03:07, 1.13s/it]
1421
+
1422
  16%|█▌ | 31/196 [00:49<02:51, 1.04s/it]
1423
+
1424
  16%|█▋ | 32/196 [00:50<02:42, 1.01it/s]
1425
+
1426
  17%|█▋ | 33/196 [00:52<03:09, 1.16s/it]
1427
+
1428
  17%|█▋ | 34/196 [00:54<04:13, 1.56s/it]
1429
+
1430
  18%|█▊ | 35/196 [00:56<04:16, 1.59s/it]
1431
+
1432
  18%|█▊ | 36/196 [00:58<04:59, 1.87s/it]
1433
+
1434
  19%|█▉ | 37/196 [01:00<04:59, 1.88s/it]
1435
+
1436
  19%|█▉ | 38/196 [01:02<04:30, 1.71s/it]
1437
+
1438
  20%|█▉ | 39/196 [01:03<04:10, 1.59s/it]
1439
+
1440
  20%|██ | 40/196 [01:04<03:48, 1.46s/it]
1441
+
1442
  21%|██ | 41/196 [01:05<03:27, 1.34s/it]
1443
+
1444
  21%|██▏ | 42/196 [01:06<03:10, 1.24s/it]
1445
+
1446
  22%|██▏ | 43/196 [01:07<03:06, 1.22s/it]
1447
+
1448
  22%|██▏ | 44/196 [01:09<03:01, 1.20s/it]
1449
+
1450
  23%|██▎ | 45/196 [01:09<02:49, 1.12s/it]
1451
+
1452
  23%|██▎ | 46/196 [01:10<02:36, 1.05s/it]
1453
+
1454
  24%|██▍ | 47/196 [01:11<02:36, 1.05s/it]
1455
+
1456
  24%|██▍ | 48/196 [01:12<02:27, 1.00it/s]
1457
+
1458
  25%|██▌ | 49/196 [01:13<02:23, 1.02it/s]
1459
+
1460
  26%|██▌ | 50/196 [01:14<02:21, 1.03it/s]
1461
+
1462
  26%|██▌ | 51/196 [01:15<02:16, 1.06it/s]
1463
+
1464
  27%|██▋ | 52/196 [01:16<02:14, 1.07it/s]
1465
+
1466
  27%|██▋ | 53/196 [01:17<02:26, 1.02s/it]
1467
+
1468
  28%|██▊ | 54/196 [01:18<02:22, 1.00s/it]
1469
+
1470
  28%|██▊ | 55/196 [01:20<02:37, 1.12s/it]
1471
+
1472
  29%|██▊ | 56/196 [01:21<02:47, 1.20s/it]
1473
+
1474
  29%|██▉ | 57/196 [01:22<03:00, 1.30s/it]
1475
+
1476
  30%|██▉ | 58/196 [01:24<03:07, 1.36s/it]
1477
+
1478
  30%|███ | 59/196 [01:25<03:06, 1.36s/it]
1479
+
1480
  31%|███ | 60/196 [01:26<02:50, 1.25s/it]
1481
+
1482
  31%|███ | 61/196 [01:27<02:33, 1.13s/it]
1483
+
1484
  32%|███▏ | 62/196 [01:28<02:29, 1.11s/it]
1485
+
1486
  32%|███▏ | 63/196 [01:29<02:30, 1.13s/it]
1487
+
1488
  33%|███▎ | 64/196 [01:31<02:29, 1.14s/it]
1489
+
1490
  33%|███▎ | 65/196 [01:32<02:25, 1.11s/it]
1491
+
1492
  34%|███▎ | 66/196 [01:33<02:30, 1.16s/it]
1493
+
1494
  34%|███▍ | 67/196 [01:34<02:34, 1.20s/it]
1495
+
1496
  35%|███▍ | 68/196 [01:36<02:59, 1.40s/it]
1497
+
1498
  35%|███▌ | 69/196 [01:38<03:06, 1.47s/it]
1499
+
1500
  36%|███▌ | 70/196 [01:39<02:51, 1.36s/it]
1501
+
1502
  36%|███▌ | 71/196 [01:40<02:39, 1.27s/it]
1503
+
1504
  37%|███▋ | 72/196 [01:41<02:24, 1.17s/it]
1505
+
1506
  37%|███▋ | 73/196 [01:42<02:11, 1.07s/it]
1507
+
1508
  38%|███▊ | 74/196 [01:42<02:00, 1.01it/s]
1509
+
1510
  38%|███▊ | 75/196 [01:43<01:56, 1.04it/s]
1511
+
1512
  39%|███▉ | 76/196 [01:44<01:52, 1.06it/s]
1513
+
1514
  39%|███▉ | 77/196 [01:45<01:57, 1.01it/s]
1515
+
1516
  40%|███▉ | 78/196 [01:47<02:08, 1.09s/it]
1517
+
1518
  40%|████ | 79/196 [01:48<02:02, 1.04s/it]
1519
+
1520
  41%|████ | 80/196 [01:49<02:09, 1.11s/it]
1521
+
1522
  41%|████▏ | 81/196 [01:50<02:13, 1.16s/it]
1523
+
1524
  42%|████▏ | 82/196 [01:51<02:08, 1.12s/it]
1525
+
1526
  42%|████▏ | 83/196 [01:52<02:10, 1.15s/it]
1527
+
1528
  43%|████▎ | 84/196 [01:54<02:12, 1.19s/it]
1529
+
1530
  43%|████▎ | 85/196 [01:55<02:11, 1.18s/it]
1531
+
1532
  44%|████▍ | 86/196 [01:56<02:13, 1.21s/it]
1533
+
1534
  44%|████▍ | 87/196 [01:57<02:09, 1.19s/it]
1535
+
1536
  45%|████▍ | 88/196 [01:59<02:13, 1.24s/it]
1537
+
1538
  45%|████▌ | 89/196 [02:00<02:17, 1.29s/it]
1539
+
1540
  46%|████▌ | 90/196 [02:01<02:12, 1.25s/it]
1541
+
1542
  46%|████▋ | 91/196 [02:02<02:07, 1.22s/it]
1543
+
1544
  47%|████▋ | 92/196 [02:03<02:02, 1.18s/it]
1545
+
1546
  47%|████▋ | 93/196 [02:05<02:08, 1.25s/it]
1547
+
1548
  48%|████▊ | 94/196 [02:06<02:10, 1.28s/it]
1549
+
1550
  48%|████▊ | 95/196 [02:07<02:03, 1.22s/it]
1551
+
1552
  49%|████▉ | 96/196 [02:09<02:09, 1.29s/it]
1553
+
1554
  49%|████▉ | 97/196 [02:10<02:05, 1.27s/it]
1555
+
1556
  50%|█████ | 98/196 [02:11<02:09, 1.32s/it]
1557
+
1558
  51%|█████ | 99/196 [02:12<01:57, 1.21s/it]
1559
+
1560
  51%|█████ | 100/196 [02:13<01:43, 1.07s/it]
1561
+
1562
  52%|█████▏ | 101/196 [02:14<01:34, 1.00it/s]
1563
+
1564
  52%|█████▏ | 102/196 [02:15<01:39, 1.06s/it]
1565
+
1566
  53%|█████▎ | 103/196 [02:16<01:45, 1.14s/it]
1567
+
1568
  53%|█████▎ | 104/196 [02:18<02:02, 1.34s/it]
1569
+
1570
  54%|█████▎ | 105/196 [02:20<02:05, 1.38s/it]
1571
+
1572
  54%|█████▍ | 106/196 [02:21<02:05, 1.40s/it]
1573
+
1574
  55%|█████▍ | 107/196 [02:22<01:56, 1.31s/it]
1575
+
1576
  55%|█████▌ | 108/196 [02:23<01:42, 1.16s/it]
1577
+
1578
  56%|█████▌ | 109/196 [02:24<01:33, 1.08s/it]
1579
+
1580
  56%|█████▌ | 110/196 [02:25<01:28, 1.03s/it]
1581
+
1582
  57%|█████▋ | 111/196 [02:26<01:27, 1.03s/it]
1583
+
1584
  57%|█████▋ | 112/196 [02:27<01:30, 1.07s/it]
1585
+
1586
  58%|█████▊ | 113/196 [02:28<01:32, 1.11s/it]
1587
+
1588
  58%|█████▊ | 114/196 [02:29<01:25, 1.04s/it]
1589
+
1590
  59%|█████▊ | 115/196 [02:30<01:21, 1.01s/it]
1591
+
1592
  59%|█████▉ | 116/196 [02:31<01:18, 1.02it/s]
1593
+
1594
  60%|█████▉ | 117/196 [02:32<01:14, 1.06it/s]
1595
+
1596
  60%|██████ | 118/196 [02:32<01:06, 1.17it/s]
1597
+
1598
  61%|██████ | 119/196 [02:33<01:08, 1.13it/s]
1599
+
1600
  61%|██████ | 120/196 [02:34<01:11, 1.06it/s]
1601
+
1602
  62%|██████▏ | 121/196 [02:36<01:13, 1.02it/s]
1603
+
1604
  62%|██████▏ | 122/196 [02:37<01:14, 1.01s/it]
1605
+
1606
  63%|██████▎ | 123/196 [02:38<01:13, 1.00s/it]
1607
+
1608
  63%|██████▎ | 124/196 [02:39<01:10, 1.02it/s]
1609
+
1610
  64%|██████▍ | 125/196 [02:40<01:10, 1.01it/s]
1611
+
1612
  64%|██████▍ | 126/196 [02:41<01:18, 1.12s/it]
1613
+
1614
  65%|██████▍ | 127/196 [02:42<01:17, 1.12s/it]
1615
+
1616
  65%|██████▌ | 128/196 [02:43<01:12, 1.06s/it]
1617
+
1618
  66%|██████▌ | 129/196 [02:44<01:10, 1.06s/it]
1619
+
1620
  66%|██████▋ | 130/196 [02:45<01:08, 1.04s/it]
1621
+
1622
  67%|██████▋ | 131/196 [02:46<01:08, 1.05s/it]
1623
+
1624
  67%|██████▋ | 132/196 [02:47<01:03, 1.02it/s]
1625
+
1626
  68%|██████▊ | 133/196 [02:48<01:00, 1.03it/s]
1627
+
1628
  68%|██████▊ | 134/196 [02:49<01:05, 1.06s/it]
1629
+
1630
  69%|██████▉ | 135/196 [02:50<01:03, 1.03s/it]
1631
+
1632
  69%|██████▉ | 136/196 [02:51<00:59, 1.00it/s]
1633
+
1634
  70%|██████▉ | 137/196 [02:52<00:58, 1.01it/s]
1635
+
1636
  70%|███████ | 138/196 [02:53<00:58, 1.00s/it]
1637
+
1638
  71%|███████ | 139/196 [02:54<00:57, 1.00s/it]
1639
+
1640
  71%|███████▏ | 140/196 [02:55<00:55, 1.00it/s]
1641
+
1642
  72%|███████▏ | 141/196 [02:56<00:55, 1.00s/it]
1643
+
1644
  72%|███████▏ | 142/196 [02:57<00:55, 1.03s/it]
1645
+
1646
  73%|███████▎ | 143/196 [02:59<00:59, 1.13s/it]
1647
+
1648
  73%|███████▎ | 144/196 [02:59<00:55, 1.07s/it]
1649
+
1650
  74%|███████▍ | 145/196 [03:00<00:49, 1.03it/s]
1651
+
1652
  74%|███████▍ | 146/196 [03:01<00:45, 1.10it/s]
1653
+
1654
  75%|███████▌ | 147/196 [03:02<00:44, 1.10it/s]
1655
+
1656
  76%|███████▌ | 148/196 [03:03<00:43, 1.10it/s]
1657
+
1658
  76%|███████▌ | 149/196 [03:04<00:40, 1.15it/s]
1659
+
1660
  77%|███████▋ | 150/196 [03:05<00:42, 1.10it/s]
1661
+
1662
  77%|███████▋ | 151/196 [03:06<00:43, 1.03it/s]
1663
+
1664
  78%|███████▊ | 152/196 [03:07<00:42, 1.04it/s]
1665
+
1666
  78%|███████▊ | 153/196 [03:08<00:41, 1.05it/s]
1667
+
1668
  79%|███████▊ | 154/196 [03:09<00:41, 1.02it/s]
1669
+
1670
  79%|███████▉ | 155/196 [03:10<00:42, 1.05s/it]
1671
+
1672
  80%|███████▉ | 156/196 [03:12<00:49, 1.25s/it]
1673
+
1674
  80%|████████ | 157/196 [03:13<00:53, 1.36s/it]
1675
+
1676
  81%|████████ | 158/196 [03:14<00:46, 1.22s/it]
1677
+
1678
  81%|████████ | 159/196 [03:15<00:40, 1.10s/it]
1679
+
1680
  82%|████████▏ | 160/196 [03:16<00:37, 1.04s/it]
1681
+
1682
  82%|████████▏ | 161/196 [03:17<00:36, 1.04s/it]
1683
+
1684
  83%|████████▎ | 162/196 [03:18<00:35, 1.03s/it]
1685
+
1686
  83%|████████▎ | 163/196 [03:19<00:33, 1.01s/it]
1687
+
1688
  84%|████████▎ | 164/196 [03:20<00:31, 1.01it/s]
1689
+
1690
  84%|████████▍ | 165/196 [03:21<00:31, 1.03s/it]
1691
+
1692
  85%|████████▍ | 166/196 [03:22<00:30, 1.01s/it]
1693
+
1694
  85%|████████▌ | 167/196 [03:23<00:28, 1.03it/s]
1695
+
1696
  86%|████████▌ | 168/196 [03:23<00:25, 1.08it/s]
1697
+
1698
  86%|████████▌ | 169/196 [03:24<00:25, 1.06it/s]
1699
+
1700
  87%|████████▋ | 170/196 [03:26<00:27, 1.05s/it]
1701
+
1702
  87%|████████▋ | 171/196 [03:27<00:25, 1.01s/it]
1703
+
1704
  88%|████████▊ | 172/196 [03:28<00:24, 1.04s/it]
1705
+
1706
  88%|████████▊ | 173/196 [03:29<00:23, 1.03s/it]
1707
+
1708
  89%|████████▉ | 174/196 [03:30<00:24, 1.09s/it]
1709
+
1710
  89%|████████▉ | 175/196 [03:32<00:27, 1.31s/it]
1711
+
1712
  90%|████████▉ | 176/196 [03:36<00:41, 2.07s/it]
1713
+
1714
  90%|█████████ | 177/196 [03:39<00:46, 2.42s/it]
1715
+
1716
  91%|█████████ | 178/196 [03:42<00:47, 2.63s/it]
1717
+
1718
  91%|█████████▏| 179/196 [03:46<00:49, 2.89s/it]
1719
+
1720
  92%|█████████▏| 180/196 [03:47<00:37, 2.32s/it]
1721
+
1722
  92%|█████████▏| 181/196 [03:48<00:29, 1.96s/it]
1723
+
1724
  93%|█████████▎| 182/196 [03:49<00:23, 1.66s/it]
1725
+
1726
  93%|█████████▎| 183/196 [03:51<00:22, 1.73s/it]
1727
+
1728
  94%|█████████▍| 184/196 [03:51<00:17, 1.48s/it]
1729
+
1730
  94%|█████████▍| 185/196 [03:53<00:15, 1.42s/it]
1731
+
1732
  95%|█████████▍| 186/196 [03:54<00:14, 1.49s/it]
1733
+
1734
  95%|█████████▌| 187/196 [03:55<00:12, 1.36s/it]
1735
+
1736
  96%|█████████▌| 188/196 [03:56<00:09, 1.22s/it]
1737
+
1738
  96%|█████████▋| 189/196 [03:57<00:08, 1.18s/it]
1739
+
1740
  97%|█████████▋| 190/196 [03:58<00:06, 1.11s/it]
1741
+
1742
  97%|█████████▋| 191/196 [03:59<00:05, 1.03s/it]
1743
+
1744
  98%|█████████▊| 192/196 [04:00<00:04, 1.03s/it]
1745
+
1746
  98%|█████████▊| 193/196 [04:01<00:03, 1.03s/it]
1747
+
1748
  99%|█████████▉| 194/196 [04:02<00:02, 1.04s/it]
1749
+
1750
  99%|█████████▉| 195/196 [04:03<00:01, 1.01s/it]
1751
+
1752
 
1753
+
1754
 
1755
  4%|▍ | 200/5000 [21:10<2:35:34, 1.94s/it]
1756
+
1757
+
1758
  
1759
  4%|▍ | 201/5000 [21:26<109:42:43, 82.30s/it]
1760
 
1761
  4%|▍ | 201/5000 [21:26<109:42:43, 82.30s/it]
1762
  4%|▍ | 202/5000 [21:35<80:16:32, 60.23s/it]
1763
 
1764
  4%|▍ | 202/5000 [21:35<80:16:32, 60.23s/it]
1765
  4%|▍ | 203/5000 [21:43<59:13:26, 44.45s/it]
1766
 
1767
  4%|▍ | 203/5000 [21:43<59:13:26, 44.45s/it]
1768
  4%|▍ | 204/5000 [21:50<44:17:23, 33.25s/it]
1769
 
1770
  4%|▍ | 204/5000 [21:50<44:17:23, 33.25s/it]
1771
  4%|▍ | 205/5000 [21:56<33:35:12, 25.22s/it]
1772
 
1773
  4%|▍ | 205/5000 [21:56<33:35:12, 25.22s/it]
1774
  4%|▍ | 206/5000 [22:02<25:54:24, 19.45s/it]
1775
 
1776
  4%|▍ | 206/5000 [22:02<25:54:24, 19.45s/it]
1777
  4%|▍ | 207/5000 [22:08<20:29:24, 15.39s/it]
1778
 
1779
  4%|▍ | 207/5000 [22:08<20:29:24, 15.39s/it]
1780
  4%|▍ | 208/5000 [22:13<16:29:27, 12.39s/it]
1781
 
1782
  4%|▍ | 208/5000 [22:14<16:29:27, 12.39s/it]
1783
  4%|▍ | 209/5000 [22:19<13:41:28, 10.29s/it]
1784
 
1785
  4%|▍ | 209/5000 [22:19<13:41:28, 10.29s/it]
1786
  4%|▍ | 210/5000 [22:24<11:33:46, 8.69s/it]
1787
 
1788
  4%|▍ | 210/5000 [22:24<11:33:46, 8.69s/it]
1789
  4%|▍ | 211/5000 [22:29<9:58:05, 7.49s/it]
1790
 
1791
  4%|▍ | 211/5000 [22:29<9:58:05, 7.49s/it]
1792
  4%|▍ | 212/5000 [22:33<8:50:15, 6.64s/it]
1793
 
1794
  4%|▍ | 212/5000 [22:33<8:50:15, 6.64s/it]
1795
  4%|▍ | 213/5000 [22:38<7:59:27, 6.01s/it]
1796
 
1797
  4%|▍ | 213/5000 [22:38<7:59:27, 6.01s/it]
1798
  4%|▍ | 214/5000 [22:42<7:14:17, 5.44s/it]
1799
 
1800
  4%|▍ | 214/5000 [22:42<7:14:17, 5.44s/it]
1801
  4%|▍ | 215/5000 [22:46<6:42:02, 5.04s/it]
1802
 
1803
  4%|▍ | 215/5000 [22:46<6:42:02, 5.04s/it]
1804
  4%|▍ | 216/5000 [22:50<6:19:32, 4.76s/it]
1805
 
1806
  4%|▍ | 216/5000 [22:50<6:19:32, 4.76s/it]
1807
  4%|▍ | 217/5000 [22:54<6:03:14, 4.56s/it]
1808
 
1809
  4%|▍ | 217/5000 [22:54<6:03:14, 4.56s/it]
1810
  4%|▍ | 218/5000 [22:58<5:43:18, 4.31s/it]
1811
 
1812
  4%|▍ | 218/5000 [22:58<5:43:18, 4.31s/it]
1813
  4%|▍ | 219/5000 [23:01<5:24:54, 4.08s/it]
1814
 
1815
  4%|▍ | 219/5000 [23:01<5:24:54, 4.08s/it]
1816
  4%|▍ | 220/5000 [23:05<5:10:19, 3.90s/it]
1817
 
1818
  4%|▍ | 220/5000 [23:05<5:10:19, 3.90s/it]
1819
  4%|▍ | 221/5000 [23:08<5:00:17, 3.77s/it]
1820
 
1821
  4%|▍ | 221/5000 [23:08<5:00:17, 3.77s/it]
1822
  4%|▍ | 222/5000 [23:12<4:53:21, 3.68s/it]
1823
 
1824
  4%|▍ | 222/5000 [23:12<4:53:21, 3.68s/it]
1825
  4%|▍ | 223/5000 [23:15<4:48:44, 3.63s/it]
1826
 
1827
  4%|▍ | 223/5000 [23:15<4:48:44, 3.63s/it]
1828
  4%|▍ | 224/5000 [23:19<4:45:03, 3.58s/it]
1829
 
1830
  4%|▍ | 224/5000 [23:19<4:45:03, 3.58s/it]
1831
  4%|▍ | 225/5000 [23:22<4:38:56, 3.50s/it]
1832
 
1833
  4%|▍ | 225/5000 [23:22<4:38:56, 3.50s/it]
1834
  5%|▍ | 226/5000 [23:25<4:24:21, 3.32s/it]
1835
 
1836
  5%|▍ | 226/5000 [23:25<4:24:21, 3.32s/it]
1837
  5%|▍ | 227/5000 [23:28<4:14:14, 3.20s/it]
1838
 
1839
  5%|▍ | 227/5000 [23:28<4:14:14, 3.20s/it]
1840
  5%|▍ | 228/5000 [23:31<4:06:55, 3.10s/it]
1841
 
1842
  5%|▍ | 228/5000 [23:31<4:06:55, 3.10s/it]
1843
  5%|▍ | 229/5000 [23:34<4:01:25, 3.04s/it]
1844
 
1845
  5%|▍ | 229/5000 [23:34<4:01:25, 3.04s/it]
1846
  5%|▍ | 230/5000 [23:37<3:57:38, 2.99s/it]
1847
 
1848
  5%|▍ | 230/5000 [23:37<3:57:38, 2.99s/it]
1849
  5%|▍ | 231/5000 [23:39<3:54:46, 2.95s/it]
1850
 
1851
  5%|▍ | 231/5000 [23:39<3:54:46, 2.95s/it]
1852
  5%|▍ | 232/5000 [23:42<3:52:48, 2.93s/it]
1853
 
1854
  5%|▍ | 232/5000 [23:42<3:52:48, 2.93s/it]
1855
  5%|▍ | 233/5000 [23:45<3:50:13, 2.90s/it]
1856
 
1857
  5%|▍ | 233/5000 [23:45<3:50:13, 2.90s/it]
1858
  5%|▍ | 234/5000 [23:48<3:41:16, 2.79s/it]
1859
 
1860
  5%|▍ | 234/5000 [23:48<3:41:16, 2.79s/it]
1861
  5%|▍ | 235/5000 [23:50<3:30:34, 2.65s/it]
1862
 
1863
  5%|▍ | 235/5000 [23:50<3:30:34, 2.65s/it]
1864
  5%|▍ | 236/5000 [23:52<3:22:52, 2.56s/it]
1865
 
1866
  5%|▍ | 236/5000 [23:52<3:22:52, 2.56s/it]
1867
  5%|▍ | 237/5000 [23:55<3:17:33, 2.49s/it]
1868
 
1869
  5%|▍ | 237/5000 [23:55<3:17:33, 2.49s/it]
1870
  5%|▍ | 238/5000 [23:57<3:13:44, 2.44s/it]
1871
 
1872
  5%|▍ | 238/5000 [23:57<3:13:44, 2.44s/it]
1873
  5%|▍ | 239/5000 [23:59<3:11:03, 2.41s/it]
1874
 
1875
  5%|▍ | 239/5000 [23:59<3:11:03, 2.41s/it]
1876
  5%|▍ | 240/5000 [24:02<3:09:01, 2.38s/it]
1877
 
1878
  5%|▍ | 240/5000 [24:02<3:09:01, 2.38s/it]
1879
  5%|▍ | 241/5000 [24:04<2:58:50, 2.25s/it]
1880
 
1881
  5%|▍ | 241/5000 [24:04<2:58:50, 2.25s/it]
1882
  5%|▍ | 242/5000 [24:05<2:46:53, 2.10s/it]
1883
 
1884
  5%|▍ | 242/5000 [24:05<2:46:53, 2.10s/it]
1885
  5%|▍ | 243/5000 [24:07<2:38:29, 2.00s/it]
1886
 
1887
  5%|▍ | 243/5000 [24:07<2:38:29, 2.00s/it]
1888
  5%|▍ | 244/5000 [24:09<2:32:26, 1.92s/it]
1889
 
1890
  5%|▍ | 244/5000 [24:09<2:32:26, 1.92s/it]
1891
  5%|▍ | 245/5000 [24:11<2:28:22, 1.87s/it]
1892
 
1893
  5%|▍ | 245/5000 [24:11<2:28:22, 1.87s/it]
1894
  5%|▍ | 246/5000 [24:12<2:25:36, 1.84s/it]
1895
 
1896
  5%|▍ | 246/5000 [24:12<2:25:36, 1.84s/it]
1897
  5%|▍ | 247/5000 [24:14<2:20:40, 1.78s/it]
1898
 
1899
  5%|▍ | 247/5000 [24:14<2:20:40, 1.78s/it]
1900
  5%|▍ | 248/5000 [24:15<2:06:52, 1.60s/it]
1901
 
1902
  5%|▍ | 248/5000 [24:15<2:06:52, 1.60s/it]
1903
  5%|▍ | 249/5000 [24:16<1:57:05, 1.48s/it]
1904
 
1905
  5%|▍ | 249/5000 [24:16<1:57:05, 1.48s/it]
1906
  5%|▌ | 250/5000 [24:19<2:31:59, 1.92s/it]
1907
 
1908
  5%|▌ | 250/5000 [24:19<2:31:59, 1.92s/it]
1909
  5%|▌ | 251/5000 [24:36<8:30:18, 6.45s/it]
1910
 
1911
  5%|▌ | 251/5000 [24:36<8:30:18, 6.45s/it]
1912
  5%|▌ | 252/5000 [24:45<9:22:20, 7.11s/it]
1913
 
1914
  5%|▌ | 252/5000 [24:45<9:22:20, 7.11s/it]
1915
  5%|▌ | 253/5000 [24:53<9:32:15, 7.23s/it]
1916
 
1917
  5%|▌ | 253/5000 [24:53<9:32:15, 7.23s/it]
1918
  5%|▌ | 254/5000 [24:59<9:22:48, 7.12s/it]
1919
 
1920
  5%|▌ | 254/5000 [24:59<9:22:48, 7.12s/it]
1921
  5%|▌ | 255/5000 [25:06<9:06:29, 6.91s/it]
1922
 
1923
  5%|▌ | 255/5000 [25:06<9:06:29, 6.91s/it]slurmstepd: error: *** JOB 2604812 ON gpu37 CANCELLED AT 2024-09-15T20:24:08 ***
indicwav2vec_trainwtagsv2_MUCS_warmup500_s300shuff100_2604931.out ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6e542b15e77cd1bb8a7693945d939d7b6324a0178a2a54a151c7870a9a3a70
3
- size 1262426580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c8413b0a8098781dd4041c03523bde4a6a9d1f89399e1e1205151e76c329614
3
+ size 1262434780
tokenizer_config.json CHANGED
@@ -1,36 +1,36 @@
1
  {
2
  "added_tokens_decoder": {
3
- "147": {
4
- "content": "<s>",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
- "148": {
12
- "content": "</s>",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
- "149": {
20
- "content": "[UNK]",
21
- "lstrip": true,
22
  "normalized": false,
23
- "rstrip": true,
24
  "single_word": false,
25
- "special": false
26
  },
27
- "150": {
28
- "content": "[PAD]",
29
- "lstrip": true,
30
  "normalized": false,
31
- "rstrip": true,
32
  "single_word": false,
33
- "special": false
34
  }
35
  },
36
  "bos_token": "<s>",
 
1
  {
2
  "added_tokens_decoder": {
3
+ "149": {
4
+ "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
7
  "rstrip": true,
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "150": {
12
+ "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
15
  "rstrip": true,
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "151": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
  "normalized": false,
23
+ "rstrip": false,
24
  "single_word": false,
25
+ "special": true
26
  },
27
+ "152": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
  "normalized": false,
31
+ "rstrip": false,
32
  "single_word": false,
33
+ "special": true
34
  }
35
  },
36
  "bos_token": "<s>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:673e3429059d894b526752b0cd1eeaf19c902ddeb9ee9960f32941ecb04f2125
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d94d7876ff002d45f2ef145dcdddba9849824007cb4b40b05b98fdbdff808728
3
  size 5496
vocab.json CHANGED
@@ -20,8 +20,6 @@
20
  ":": 19,
21
  ";": 20,
22
  "<": 21,
23
- "</s>": 148,
24
- "<s>": 147,
25
  "=": 22,
26
  ">": 23,
27
  "@": 24,
@@ -149,5 +147,7 @@
149
  "›": 143,
150
  "⅓": 144,
151
  "→": 145,
 
 
152
  "−": 146
153
  }
 
20
  ":": 19,
21
  ";": 20,
22
  "<": 21,
 
 
23
  "=": 22,
24
  ">": 23,
25
  "@": 24,
 
147
  "›": 143,
148
  "⅓": 144,
149
  "→": 145,
150
+ "∅": 147,
151
+ "∇": 148,
152
  "−": 146
153
  }