bensapir commited on
Commit
9c5f93b
·
1 Parent(s): ccbf683

Training in progress, step 220000

Browse files
backup-290000/GoNotoCurrent.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ab5c39e2b1c34a955136275ce0db068cb20d9643ead033d6b8124a73ab4f64
3
+ size 15645492
backup-290000/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PIXELForPreTraining"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "cache_dir": null,
7
+ "decoder_hidden_size": 512,
8
+ "decoder_intermediate_size": 2048,
9
+ "decoder_num_attention_heads": 16,
10
+ "decoder_num_hidden_layers": 8,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "image_size": [
15
+ 16,
16
+ 8464
17
+ ],
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 3072,
20
+ "layer_norm_eps": 1e-12,
21
+ "mask_ratio": 0.25,
22
+ "model_type": "pixel",
23
+ "norm_pix_loss": true,
24
+ "num_attention_heads": 12,
25
+ "num_channels": 3,
26
+ "num_hidden_layers": 12,
27
+ "patch_size": 16,
28
+ "qkv_bias": true,
29
+ "revision": "main",
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.17.0",
32
+ "use_auth_token": false
33
+ }
backup-290000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9dcbb20629922bf85067e22faf5d25195f8e035f7d57e5f526ba804954689c
3
+ size 449474626
backup-290000/text_renderer_config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "background_color": "white",
3
+ "dpi": 120,
4
+ "font_color": "black",
5
+ "font_file": "GoNotoCurrent.ttf",
6
+ "font_size": 8,
7
+ "fonts_list": [
8
+ "Abyssinica SIL",
9
+ "Ani",
10
+ "AnjaliOldLipi",
11
+ "Bitstream Vera Sans",
12
+ "Bitstream Vera Sans Mono",
13
+ "Bitstream Vera Serif",
14
+ "C059",
15
+ "Chandas",
16
+ "Chilanka",
17
+ "D050000L",
18
+ "DejaVu Math TeX Gyre",
19
+ "DejaVu Sans",
20
+ "DejaVu Sans Mono",
21
+ "DejaVu Serif",
22
+ "Droid Sans Fallback",
23
+ "Dyuthi",
24
+ "FreeMono",
25
+ "FreeSans",
26
+ "FreeSerif",
27
+ "Gargi",
28
+ "Garuda",
29
+ "Gayathri",
30
+ "Go Noto Current",
31
+ "Gubbi",
32
+ "Inconsolata",
33
+ "Jamrul",
34
+ "KacstArt",
35
+ "KacstBook",
36
+ "KacstDecorative",
37
+ "KacstDigital",
38
+ "KacstFarsi",
39
+ "KacstLetter",
40
+ "KacstNaskh",
41
+ "KacstOffice",
42
+ "KacstOne",
43
+ "KacstPen",
44
+ "KacstPoster",
45
+ "KacstQurn",
46
+ "KacstScreen",
47
+ "KacstTitle",
48
+ "KacstTitleL",
49
+ "Kalapi",
50
+ "Kalimati",
51
+ "Karumbi",
52
+ "Keraleeyam",
53
+ "Khmer OS",
54
+ "Khmer OS System",
55
+ "Kinnari",
56
+ "LKLUG",
57
+ "Laksaman",
58
+ "Liberation Mono",
59
+ "Liberation Sans",
60
+ "Liberation Sans Narrow",
61
+ "Liberation Serif",
62
+ "Likhan",
63
+ "Lohit Assamese",
64
+ "Lohit Bengali",
65
+ "Lohit Devanagari",
66
+ "Lohit Gujarati",
67
+ "Lohit Gurmukhi",
68
+ "Lohit Kannada",
69
+ "Lohit Malayalam",
70
+ "Lohit Odia",
71
+ "Lohit Tamil",
72
+ "Lohit Tamil Classical",
73
+ "Lohit Telugu",
74
+ "Loma",
75
+ "Manjari",
76
+ "Meera",
77
+ "Mitra Mono",
78
+ "Monospace",
79
+ "Mukti Narrow",
80
+ "Nakula",
81
+ "Navilu",
82
+ "Nimbus Mono PS",
83
+ "Nimbus Roman",
84
+ "Nimbus Sans",
85
+ "Nimbus Sans Narrow",
86
+ "Norasi",
87
+ "Noto Color Emoji",
88
+ "Noto Mono",
89
+ "Noto Sans CJK HK",
90
+ "Noto Sans CJK JP",
91
+ "Noto Sans CJK KR",
92
+ "Noto Sans CJK SC",
93
+ "Noto Sans CJK TC",
94
+ "Noto Sans Mono CJK HK",
95
+ "Noto Sans Mono CJK JP",
96
+ "Noto Sans Mono CJK KR",
97
+ "Noto Sans Mono CJK SC",
98
+ "Noto Sans Mono CJK TC",
99
+ "Noto Serif CJK JP",
100
+ "Noto Serif CJK KR",
101
+ "Noto Serif CJK SC",
102
+ "Noto Serif CJK TC",
103
+ "OpenSymbol",
104
+ "P052",
105
+ "Padauk",
106
+ "Padauk Book",
107
+ "Pagul",
108
+ "Phetsarath OT",
109
+ "Pothana2000",
110
+ "Purisa",
111
+ "Rachana",
112
+ "RaghuMalayalamSans",
113
+ "Rasa",
114
+ "Rekha",
115
+ "Saab",
116
+ "Sahadeva",
117
+ "Samanata",
118
+ "Samyak Devanagari",
119
+ "Samyak Gujarati",
120
+ "Samyak Malayalam",
121
+ "Samyak Tamil",
122
+ "Sans",
123
+ "Sarai",
124
+ "Sawasdee",
125
+ "Serif",
126
+ "Source Code Pro",
127
+ "Standard Symbols PS",
128
+ "Suruma",
129
+ "System-ui",
130
+ "Tibetan Machine Uni",
131
+ "Tlwg Mono",
132
+ "Tlwg Typewriter",
133
+ "Tlwg Typist",
134
+ "Tlwg Typo",
135
+ "URW Bookman",
136
+ "URW Gothic",
137
+ "Ubuntu",
138
+ "Ubuntu Condensed",
139
+ "Ubuntu Mono",
140
+ "Umpush",
141
+ "Uroob",
142
+ "Vemana2000",
143
+ "Waree",
144
+ "Yrsa",
145
+ "Z003",
146
+ "aakar",
147
+ "cmex10",
148
+ "cmmi10",
149
+ "cmr10",
150
+ "cmsy10",
151
+ "esint10",
152
+ "eufm10",
153
+ "mry_KacstQurn",
154
+ "msam10",
155
+ "msbm10",
156
+ "ori1Uni",
157
+ "padmaa",
158
+ "padmaa-Bold.1.1",
159
+ "rsfs10",
160
+ "stmary10",
161
+ "wasy10"
162
+ ],
163
+ "max_seq_length": 529,
164
+ "pad_size": 3,
165
+ "pixels_per_patch": 16,
166
+ "rgb": false,
167
+ "text_renderer_type": "PangoCairoTextRenderer"
168
+ }
backup-290000/trainer_state.json ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 111.9194180190263,
5
+ "global_step": 200000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 11.19,
12
+ "learning_rate": 1.8749999999999998e-06,
13
+ "loss": 0.8164,
14
+ "step": 10000
15
+ },
16
+ {
17
+ "epoch": 11.19,
18
+ "eval_loss": 0.7568955421447754,
19
+ "eval_runtime": 301.8174,
20
+ "eval_samples_per_second": 25.618,
21
+ "eval_steps_per_second": 1.604,
22
+ "step": 10000
23
+ },
24
+ {
25
+ "epoch": 22.37,
26
+ "learning_rate": 3.7499999999999997e-06,
27
+ "loss": 0.7702,
28
+ "step": 20000
29
+ },
30
+ {
31
+ "epoch": 22.37,
32
+ "eval_loss": 0.7498099207878113,
33
+ "eval_runtime": 249.204,
34
+ "eval_samples_per_second": 31.027,
35
+ "eval_steps_per_second": 1.942,
36
+ "step": 20000
37
+ },
38
+ {
39
+ "epoch": 33.56,
40
+ "learning_rate": 5.6249999999999995e-06,
41
+ "loss": 0.7668,
42
+ "step": 30000
43
+ },
44
+ {
45
+ "epoch": 33.56,
46
+ "eval_loss": 0.7477062344551086,
47
+ "eval_runtime": 189.0409,
48
+ "eval_samples_per_second": 40.901,
49
+ "eval_steps_per_second": 2.56,
50
+ "step": 30000
51
+ },
52
+ {
53
+ "epoch": 44.74,
54
+ "learning_rate": 7.499999999999999e-06,
55
+ "loss": 0.7655,
56
+ "step": 40000
57
+ },
58
+ {
59
+ "epoch": 44.74,
60
+ "eval_loss": 0.7450574040412903,
61
+ "eval_runtime": 66.2284,
62
+ "eval_samples_per_second": 116.748,
63
+ "eval_steps_per_second": 7.308,
64
+ "step": 40000
65
+ },
66
+ {
67
+ "epoch": 27.98,
68
+ "learning_rate": 9.375e-06,
69
+ "loss": 0.7653,
70
+ "step": 50000
71
+ },
72
+ {
73
+ "epoch": 27.98,
74
+ "eval_loss": 0.7478589415550232,
75
+ "eval_runtime": 76.5001,
76
+ "eval_samples_per_second": 101.072,
77
+ "eval_steps_per_second": 12.641,
78
+ "step": 50000
79
+ },
80
+ {
81
+ "epoch": 33.58,
82
+ "learning_rate": 1.1249999999999999e-05,
83
+ "loss": 0.7648,
84
+ "step": 60000
85
+ },
86
+ {
87
+ "epoch": 33.58,
88
+ "eval_loss": 0.7447686195373535,
89
+ "eval_runtime": 76.3539,
90
+ "eval_samples_per_second": 101.265,
91
+ "eval_steps_per_second": 12.665,
92
+ "step": 60000
93
+ },
94
+ {
95
+ "epoch": 39.17,
96
+ "learning_rate": 1.3124999999999999e-05,
97
+ "loss": 0.7645,
98
+ "step": 70000
99
+ },
100
+ {
101
+ "epoch": 39.17,
102
+ "eval_loss": 0.7464274764060974,
103
+ "eval_runtime": 76.7958,
104
+ "eval_samples_per_second": 100.683,
105
+ "eval_steps_per_second": 12.592,
106
+ "step": 70000
107
+ },
108
+ {
109
+ "epoch": 44.77,
110
+ "learning_rate": 1.4999999999999999e-05,
111
+ "loss": 0.7642,
112
+ "step": 80000
113
+ },
114
+ {
115
+ "epoch": 44.77,
116
+ "eval_loss": 0.7449608445167542,
117
+ "eval_runtime": 122.9116,
118
+ "eval_samples_per_second": 62.907,
119
+ "eval_steps_per_second": 7.867,
120
+ "step": 80000
121
+ },
122
+ {
123
+ "epoch": 50.36,
124
+ "learning_rate": 1.6875e-05,
125
+ "loss": 0.7636,
126
+ "step": 90000
127
+ },
128
+ {
129
+ "epoch": 50.36,
130
+ "eval_loss": 0.7427342534065247,
131
+ "eval_runtime": 76.4172,
132
+ "eval_samples_per_second": 101.181,
133
+ "eval_steps_per_second": 12.654,
134
+ "step": 90000
135
+ },
136
+ {
137
+ "epoch": 55.96,
138
+ "learning_rate": 2e-05,
139
+ "loss": 0.7602,
140
+ "step": 100000
141
+ },
142
+ {
143
+ "epoch": 55.96,
144
+ "eval_loss": 0.726163387298584,
145
+ "eval_runtime": 76.3938,
146
+ "eval_samples_per_second": 101.212,
147
+ "eval_steps_per_second": 12.658,
148
+ "step": 100000
149
+ },
150
+ {
151
+ "epoch": 61.56,
152
+ "learning_rate": 2e-05,
153
+ "loss": 0.7279,
154
+ "step": 110000
155
+ },
156
+ {
157
+ "epoch": 61.56,
158
+ "eval_loss": 0.6971690654754639,
159
+ "eval_runtime": 76.7625,
160
+ "eval_samples_per_second": 100.726,
161
+ "eval_steps_per_second": 12.597,
162
+ "step": 110000
163
+ },
164
+ {
165
+ "epoch": 67.15,
166
+ "learning_rate": 2e-05,
167
+ "loss": 0.6981,
168
+ "step": 120000
169
+ },
170
+ {
171
+ "epoch": 67.15,
172
+ "eval_loss": 0.6809367537498474,
173
+ "eval_runtime": 76.4831,
174
+ "eval_samples_per_second": 101.094,
175
+ "eval_steps_per_second": 12.643,
176
+ "step": 120000
177
+ },
178
+ {
179
+ "epoch": 72.75,
180
+ "learning_rate": 2e-05,
181
+ "loss": 0.6781,
182
+ "step": 130000
183
+ },
184
+ {
185
+ "epoch": 72.75,
186
+ "eval_loss": 0.6643149852752686,
187
+ "eval_runtime": 76.5075,
188
+ "eval_samples_per_second": 101.062,
189
+ "eval_steps_per_second": 12.639,
190
+ "step": 130000
191
+ },
192
+ {
193
+ "epoch": 78.34,
194
+ "learning_rate": 2e-05,
195
+ "loss": 0.6612,
196
+ "step": 140000
197
+ },
198
+ {
199
+ "epoch": 78.34,
200
+ "eval_loss": 0.653438150882721,
201
+ "eval_runtime": 76.7069,
202
+ "eval_samples_per_second": 100.799,
203
+ "eval_steps_per_second": 12.606,
204
+ "step": 140000
205
+ },
206
+ {
207
+ "epoch": 83.94,
208
+ "learning_rate": 2e-05,
209
+ "loss": 0.6483,
210
+ "step": 150000
211
+ },
212
+ {
213
+ "epoch": 83.94,
214
+ "eval_loss": 0.6426078081130981,
215
+ "eval_runtime": 76.587,
216
+ "eval_samples_per_second": 100.957,
217
+ "eval_steps_per_second": 12.626,
218
+ "step": 150000
219
+ },
220
+ {
221
+ "epoch": 89.54,
222
+ "learning_rate": 2e-05,
223
+ "loss": 0.6389,
224
+ "step": 160000
225
+ },
226
+ {
227
+ "epoch": 89.54,
228
+ "eval_loss": 0.6356751918792725,
229
+ "eval_runtime": 76.2962,
230
+ "eval_samples_per_second": 101.342,
231
+ "eval_steps_per_second": 12.674,
232
+ "step": 160000
233
+ },
234
+ {
235
+ "epoch": 95.13,
236
+ "learning_rate": 2e-05,
237
+ "loss": 0.6318,
238
+ "step": 170000
239
+ },
240
+ {
241
+ "epoch": 95.13,
242
+ "eval_loss": 0.6319578289985657,
243
+ "eval_runtime": 134.8378,
244
+ "eval_samples_per_second": 57.343,
245
+ "eval_steps_per_second": 7.172,
246
+ "step": 170000
247
+ },
248
+ {
249
+ "epoch": 100.73,
250
+ "learning_rate": 2e-05,
251
+ "loss": 0.6261,
252
+ "step": 180000
253
+ },
254
+ {
255
+ "epoch": 100.73,
256
+ "eval_loss": 0.6279829740524292,
257
+ "eval_runtime": 76.2996,
258
+ "eval_samples_per_second": 101.337,
259
+ "eval_steps_per_second": 12.674,
260
+ "step": 180000
261
+ },
262
+ {
263
+ "epoch": 106.32,
264
+ "learning_rate": 2e-05,
265
+ "loss": 0.6214,
266
+ "step": 190000
267
+ },
268
+ {
269
+ "epoch": 106.32,
270
+ "eval_loss": 0.6199918389320374,
271
+ "eval_runtime": 76.2832,
272
+ "eval_samples_per_second": 101.359,
273
+ "eval_steps_per_second": 12.676,
274
+ "step": 190000
275
+ },
276
+ {
277
+ "epoch": 111.92,
278
+ "learning_rate": 2e-05,
279
+ "loss": 0.6177,
280
+ "step": 200000
281
+ },
282
+ {
283
+ "epoch": 111.92,
284
+ "eval_loss": 0.6199995875358582,
285
+ "eval_runtime": 76.4051,
286
+ "eval_samples_per_second": 101.197,
287
+ "eval_steps_per_second": 12.656,
288
+ "step": 200000
289
+ },
290
+ {
291
+ "epoch": 111.92,
292
+ "step": 200000,
293
+ "total_flos": 1.0512018951481177e+21,
294
+ "train_loss": 0.5566074145507812,
295
+ "train_runtime": 84698.4102,
296
+ "train_samples_per_second": 37.781,
297
+ "train_steps_per_second": 2.361
298
+ }
299
+ ],
300
+ "max_steps": 200000,
301
+ "num_train_epochs": 112,
302
+ "total_flos": 1.0512018951481177e+21,
303
+ "trial_name": null,
304
+ "trial_params": null
305
+ }
backup-290000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038e617dff220000f31c4466086e928ab769fb3dea37924e2293e04e7cc1b50e
3
+ size 3512
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa9f45bcfe7ade94da8070fb641dadb99c295d78f116fbcf194f0b4b3efe7f5
3
  size 449474626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe03a0ad7b9683981e4caf01b17bfe2cf452df3b7b9c3116d8e49a92a0dd2a3
3
  size 449474626
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3eef506434d84b6048bd48859e91edb3cf72837ea07cb9913ed23969f5d79423
3
- size 3576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ae2c800e5a18cc08d55905c495c8e84c79de6592e8c823e6150aa5b70f4654
3
+ size 3640