hqfang commited on
Commit
b815ebb
·
verified ·
1 Parent(s): 1d9e688

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<im_col>": 152067,
4
+ "<im_end>": 152065,
5
+ "<im_low>": 152069,
6
+ "<im_patch>": 152066,
7
+ "<im_start>": 152064,
8
+ "<tool_call>": 151657,
9
+ "<|box_end|>": 151649,
10
+ "<|box_start|>": 151648,
11
+ "<|endoftext|>": 151643,
12
+ "<|file_sep|>": 151664,
13
+ "<|fim_middle|>": 151660,
14
+ "<|fim_pad|>": 151662,
15
+ "<|fim_prefix|>": 151659,
16
+ "<|fim_suffix|>": 151661,
17
+ "<|im_end|>": 151645,
18
+ "<|im_start|>": 151644,
19
+ "<|image_pad|>": 151655,
20
+ "<|image|>": 152068,
21
+ "<|object_ref_end|>": 151647,
22
+ "<|object_ref_start|>": 151646,
23
+ "<|quad_end|>": 151651,
24
+ "<|quad_start|>": 151650,
25
+ "<|repo_name|>": 151663,
26
+ "<|video_pad|>": 151656,
27
+ "<|vision_end|>": 151653,
28
+ "<|vision_pad|>": 151654,
29
+ "<|vision_start|>": 151652,
30
+ "|<EXTRA_TOKENS_0>|": 151665,
31
+ "|<EXTRA_TOKENS_100>|": 151765,
32
+ "|<EXTRA_TOKENS_101>|": 151766,
33
+ "|<EXTRA_TOKENS_102>|": 151767,
34
+ "|<EXTRA_TOKENS_103>|": 151768,
35
+ "|<EXTRA_TOKENS_104>|": 151769,
36
+ "|<EXTRA_TOKENS_105>|": 151770,
37
+ "|<EXTRA_TOKENS_106>|": 151771,
38
+ "|<EXTRA_TOKENS_107>|": 151772,
39
+ "|<EXTRA_TOKENS_108>|": 151773,
40
+ "|<EXTRA_TOKENS_109>|": 151774,
41
+ "|<EXTRA_TOKENS_10>|": 151675,
42
+ "|<EXTRA_TOKENS_110>|": 151775,
43
+ "|<EXTRA_TOKENS_111>|": 151776,
44
+ "|<EXTRA_TOKENS_112>|": 151777,
45
+ "|<EXTRA_TOKENS_113>|": 151778,
46
+ "|<EXTRA_TOKENS_114>|": 151779,
47
+ "|<EXTRA_TOKENS_115>|": 151780,
48
+ "|<EXTRA_TOKENS_116>|": 151781,
49
+ "|<EXTRA_TOKENS_117>|": 151782,
50
+ "|<EXTRA_TOKENS_118>|": 151783,
51
+ "|<EXTRA_TOKENS_119>|": 151784,
52
+ "|<EXTRA_TOKENS_11>|": 151676,
53
+ "|<EXTRA_TOKENS_120>|": 151785,
54
+ "|<EXTRA_TOKENS_121>|": 151786,
55
+ "|<EXTRA_TOKENS_122>|": 151787,
56
+ "|<EXTRA_TOKENS_123>|": 151788,
57
+ "|<EXTRA_TOKENS_124>|": 151789,
58
+ "|<EXTRA_TOKENS_125>|": 151790,
59
+ "|<EXTRA_TOKENS_126>|": 151791,
60
+ "|<EXTRA_TOKENS_127>|": 151792,
61
+ "|<EXTRA_TOKENS_128>|": 151793,
62
+ "|<EXTRA_TOKENS_129>|": 151794,
63
+ "|<EXTRA_TOKENS_12>|": 151677,
64
+ "|<EXTRA_TOKENS_130>|": 151795,
65
+ "|<EXTRA_TOKENS_131>|": 151796,
66
+ "|<EXTRA_TOKENS_132>|": 151797,
67
+ "|<EXTRA_TOKENS_133>|": 151798,
68
+ "|<EXTRA_TOKENS_134>|": 151799,
69
+ "|<EXTRA_TOKENS_135>|": 151800,
70
+ "|<EXTRA_TOKENS_136>|": 151801,
71
+ "|<EXTRA_TOKENS_137>|": 151802,
72
+ "|<EXTRA_TOKENS_138>|": 151803,
73
+ "|<EXTRA_TOKENS_139>|": 151804,
74
+ "|<EXTRA_TOKENS_13>|": 151678,
75
+ "|<EXTRA_TOKENS_140>|": 151805,
76
+ "|<EXTRA_TOKENS_141>|": 151806,
77
+ "|<EXTRA_TOKENS_142>|": 151807,
78
+ "|<EXTRA_TOKENS_143>|": 151808,
79
+ "|<EXTRA_TOKENS_144>|": 151809,
80
+ "|<EXTRA_TOKENS_145>|": 151810,
81
+ "|<EXTRA_TOKENS_146>|": 151811,
82
+ "|<EXTRA_TOKENS_147>|": 151812,
83
+ "|<EXTRA_TOKENS_148>|": 151813,
84
+ "|<EXTRA_TOKENS_149>|": 151814,
85
+ "|<EXTRA_TOKENS_14>|": 151679,
86
+ "|<EXTRA_TOKENS_150>|": 151815,
87
+ "|<EXTRA_TOKENS_151>|": 151816,
88
+ "|<EXTRA_TOKENS_152>|": 151817,
89
+ "|<EXTRA_TOKENS_153>|": 151818,
90
+ "|<EXTRA_TOKENS_154>|": 151819,
91
+ "|<EXTRA_TOKENS_155>|": 151820,
92
+ "|<EXTRA_TOKENS_156>|": 151821,
93
+ "|<EXTRA_TOKENS_157>|": 151822,
94
+ "|<EXTRA_TOKENS_158>|": 151823,
95
+ "|<EXTRA_TOKENS_159>|": 151824,
96
+ "|<EXTRA_TOKENS_15>|": 151680,
97
+ "|<EXTRA_TOKENS_160>|": 151825,
98
+ "|<EXTRA_TOKENS_161>|": 151826,
99
+ "|<EXTRA_TOKENS_162>|": 151827,
100
+ "|<EXTRA_TOKENS_163>|": 151828,
101
+ "|<EXTRA_TOKENS_164>|": 151829,
102
+ "|<EXTRA_TOKENS_165>|": 151830,
103
+ "|<EXTRA_TOKENS_166>|": 151831,
104
+ "|<EXTRA_TOKENS_167>|": 151832,
105
+ "|<EXTRA_TOKENS_168>|": 151833,
106
+ "|<EXTRA_TOKENS_169>|": 151834,
107
+ "|<EXTRA_TOKENS_16>|": 151681,
108
+ "|<EXTRA_TOKENS_170>|": 151835,
109
+ "|<EXTRA_TOKENS_171>|": 151836,
110
+ "|<EXTRA_TOKENS_172>|": 151837,
111
+ "|<EXTRA_TOKENS_173>|": 151838,
112
+ "|<EXTRA_TOKENS_174>|": 151839,
113
+ "|<EXTRA_TOKENS_175>|": 151840,
114
+ "|<EXTRA_TOKENS_176>|": 151841,
115
+ "|<EXTRA_TOKENS_177>|": 151842,
116
+ "|<EXTRA_TOKENS_178>|": 151843,
117
+ "|<EXTRA_TOKENS_179>|": 151844,
118
+ "|<EXTRA_TOKENS_17>|": 151682,
119
+ "|<EXTRA_TOKENS_180>|": 151845,
120
+ "|<EXTRA_TOKENS_181>|": 151846,
121
+ "|<EXTRA_TOKENS_182>|": 151847,
122
+ "|<EXTRA_TOKENS_183>|": 151848,
123
+ "|<EXTRA_TOKENS_184>|": 151849,
124
+ "|<EXTRA_TOKENS_185>|": 151850,
125
+ "|<EXTRA_TOKENS_186>|": 151851,
126
+ "|<EXTRA_TOKENS_187>|": 151852,
127
+ "|<EXTRA_TOKENS_188>|": 151853,
128
+ "|<EXTRA_TOKENS_189>|": 151854,
129
+ "|<EXTRA_TOKENS_18>|": 151683,
130
+ "|<EXTRA_TOKENS_190>|": 151855,
131
+ "|<EXTRA_TOKENS_191>|": 151856,
132
+ "|<EXTRA_TOKENS_192>|": 151857,
133
+ "|<EXTRA_TOKENS_193>|": 151858,
134
+ "|<EXTRA_TOKENS_194>|": 151859,
135
+ "|<EXTRA_TOKENS_195>|": 151860,
136
+ "|<EXTRA_TOKENS_196>|": 151861,
137
+ "|<EXTRA_TOKENS_197>|": 151862,
138
+ "|<EXTRA_TOKENS_198>|": 151863,
139
+ "|<EXTRA_TOKENS_199>|": 151864,
140
+ "|<EXTRA_TOKENS_19>|": 151684,
141
+ "|<EXTRA_TOKENS_1>|": 151666,
142
+ "|<EXTRA_TOKENS_200>|": 151865,
143
+ "|<EXTRA_TOKENS_201>|": 151866,
144
+ "|<EXTRA_TOKENS_202>|": 151867,
145
+ "|<EXTRA_TOKENS_203>|": 151868,
146
+ "|<EXTRA_TOKENS_204>|": 151869,
147
+ "|<EXTRA_TOKENS_205>|": 151870,
148
+ "|<EXTRA_TOKENS_206>|": 151871,
149
+ "|<EXTRA_TOKENS_207>|": 151872,
150
+ "|<EXTRA_TOKENS_208>|": 151873,
151
+ "|<EXTRA_TOKENS_209>|": 151874,
152
+ "|<EXTRA_TOKENS_20>|": 151685,
153
+ "|<EXTRA_TOKENS_210>|": 151875,
154
+ "|<EXTRA_TOKENS_211>|": 151876,
155
+ "|<EXTRA_TOKENS_212>|": 151877,
156
+ "|<EXTRA_TOKENS_213>|": 151878,
157
+ "|<EXTRA_TOKENS_214>|": 151879,
158
+ "|<EXTRA_TOKENS_215>|": 151880,
159
+ "|<EXTRA_TOKENS_216>|": 151881,
160
+ "|<EXTRA_TOKENS_217>|": 151882,
161
+ "|<EXTRA_TOKENS_218>|": 151883,
162
+ "|<EXTRA_TOKENS_219>|": 151884,
163
+ "|<EXTRA_TOKENS_21>|": 151686,
164
+ "|<EXTRA_TOKENS_220>|": 151885,
165
+ "|<EXTRA_TOKENS_221>|": 151886,
166
+ "|<EXTRA_TOKENS_222>|": 151887,
167
+ "|<EXTRA_TOKENS_223>|": 151888,
168
+ "|<EXTRA_TOKENS_224>|": 151889,
169
+ "|<EXTRA_TOKENS_225>|": 151890,
170
+ "|<EXTRA_TOKENS_226>|": 151891,
171
+ "|<EXTRA_TOKENS_227>|": 151892,
172
+ "|<EXTRA_TOKENS_228>|": 151893,
173
+ "|<EXTRA_TOKENS_229>|": 151894,
174
+ "|<EXTRA_TOKENS_22>|": 151687,
175
+ "|<EXTRA_TOKENS_230>|": 151895,
176
+ "|<EXTRA_TOKENS_231>|": 151896,
177
+ "|<EXTRA_TOKENS_232>|": 151897,
178
+ "|<EXTRA_TOKENS_233>|": 151898,
179
+ "|<EXTRA_TOKENS_234>|": 151899,
180
+ "|<EXTRA_TOKENS_235>|": 151900,
181
+ "|<EXTRA_TOKENS_236>|": 151901,
182
+ "|<EXTRA_TOKENS_237>|": 151902,
183
+ "|<EXTRA_TOKENS_238>|": 151903,
184
+ "|<EXTRA_TOKENS_239>|": 151904,
185
+ "|<EXTRA_TOKENS_23>|": 151688,
186
+ "|<EXTRA_TOKENS_240>|": 151905,
187
+ "|<EXTRA_TOKENS_241>|": 151906,
188
+ "|<EXTRA_TOKENS_242>|": 151907,
189
+ "|<EXTRA_TOKENS_243>|": 151908,
190
+ "|<EXTRA_TOKENS_244>|": 151909,
191
+ "|<EXTRA_TOKENS_245>|": 151910,
192
+ "|<EXTRA_TOKENS_246>|": 151911,
193
+ "|<EXTRA_TOKENS_247>|": 151912,
194
+ "|<EXTRA_TOKENS_248>|": 151913,
195
+ "|<EXTRA_TOKENS_249>|": 151914,
196
+ "|<EXTRA_TOKENS_24>|": 151689,
197
+ "|<EXTRA_TOKENS_250>|": 151915,
198
+ "|<EXTRA_TOKENS_251>|": 151916,
199
+ "|<EXTRA_TOKENS_252>|": 151917,
200
+ "|<EXTRA_TOKENS_253>|": 151918,
201
+ "|<EXTRA_TOKENS_254>|": 151919,
202
+ "|<EXTRA_TOKENS_255>|": 151920,
203
+ "|<EXTRA_TOKENS_256>|": 151921,
204
+ "|<EXTRA_TOKENS_257>|": 151922,
205
+ "|<EXTRA_TOKENS_258>|": 151923,
206
+ "|<EXTRA_TOKENS_259>|": 151924,
207
+ "|<EXTRA_TOKENS_25>|": 151690,
208
+ "|<EXTRA_TOKENS_260>|": 151925,
209
+ "|<EXTRA_TOKENS_261>|": 151926,
210
+ "|<EXTRA_TOKENS_262>|": 151927,
211
+ "|<EXTRA_TOKENS_263>|": 151928,
212
+ "|<EXTRA_TOKENS_264>|": 151929,
213
+ "|<EXTRA_TOKENS_265>|": 151930,
214
+ "|<EXTRA_TOKENS_266>|": 151931,
215
+ "|<EXTRA_TOKENS_267>|": 151932,
216
+ "|<EXTRA_TOKENS_268>|": 151933,
217
+ "|<EXTRA_TOKENS_269>|": 151934,
218
+ "|<EXTRA_TOKENS_26>|": 151691,
219
+ "|<EXTRA_TOKENS_270>|": 151935,
220
+ "|<EXTRA_TOKENS_271>|": 151936,
221
+ "|<EXTRA_TOKENS_272>|": 151937,
222
+ "|<EXTRA_TOKENS_273>|": 151938,
223
+ "|<EXTRA_TOKENS_274>|": 151939,
224
+ "|<EXTRA_TOKENS_275>|": 151940,
225
+ "|<EXTRA_TOKENS_276>|": 151941,
226
+ "|<EXTRA_TOKENS_277>|": 151942,
227
+ "|<EXTRA_TOKENS_278>|": 151943,
228
+ "|<EXTRA_TOKENS_279>|": 151944,
229
+ "|<EXTRA_TOKENS_27>|": 151692,
230
+ "|<EXTRA_TOKENS_280>|": 151945,
231
+ "|<EXTRA_TOKENS_281>|": 151946,
232
+ "|<EXTRA_TOKENS_282>|": 151947,
233
+ "|<EXTRA_TOKENS_283>|": 151948,
234
+ "|<EXTRA_TOKENS_284>|": 151949,
235
+ "|<EXTRA_TOKENS_285>|": 151950,
236
+ "|<EXTRA_TOKENS_286>|": 151951,
237
+ "|<EXTRA_TOKENS_287>|": 151952,
238
+ "|<EXTRA_TOKENS_288>|": 151953,
239
+ "|<EXTRA_TOKENS_289>|": 151954,
240
+ "|<EXTRA_TOKENS_28>|": 151693,
241
+ "|<EXTRA_TOKENS_290>|": 151955,
242
+ "|<EXTRA_TOKENS_291>|": 151956,
243
+ "|<EXTRA_TOKENS_292>|": 151957,
244
+ "|<EXTRA_TOKENS_293>|": 151958,
245
+ "|<EXTRA_TOKENS_294>|": 151959,
246
+ "|<EXTRA_TOKENS_295>|": 151960,
247
+ "|<EXTRA_TOKENS_296>|": 151961,
248
+ "|<EXTRA_TOKENS_297>|": 151962,
249
+ "|<EXTRA_TOKENS_298>|": 151963,
250
+ "|<EXTRA_TOKENS_299>|": 151964,
251
+ "|<EXTRA_TOKENS_29>|": 151694,
252
+ "|<EXTRA_TOKENS_2>|": 151667,
253
+ "|<EXTRA_TOKENS_300>|": 151965,
254
+ "|<EXTRA_TOKENS_301>|": 151966,
255
+ "|<EXTRA_TOKENS_302>|": 151967,
256
+ "|<EXTRA_TOKENS_303>|": 151968,
257
+ "|<EXTRA_TOKENS_304>|": 151969,
258
+ "|<EXTRA_TOKENS_305>|": 151970,
259
+ "|<EXTRA_TOKENS_306>|": 151971,
260
+ "|<EXTRA_TOKENS_307>|": 151972,
261
+ "|<EXTRA_TOKENS_308>|": 151973,
262
+ "|<EXTRA_TOKENS_309>|": 151974,
263
+ "|<EXTRA_TOKENS_30>|": 151695,
264
+ "|<EXTRA_TOKENS_310>|": 151975,
265
+ "|<EXTRA_TOKENS_311>|": 151976,
266
+ "|<EXTRA_TOKENS_312>|": 151977,
267
+ "|<EXTRA_TOKENS_313>|": 151978,
268
+ "|<EXTRA_TOKENS_314>|": 151979,
269
+ "|<EXTRA_TOKENS_315>|": 151980,
270
+ "|<EXTRA_TOKENS_316>|": 151981,
271
+ "|<EXTRA_TOKENS_317>|": 151982,
272
+ "|<EXTRA_TOKENS_318>|": 151983,
273
+ "|<EXTRA_TOKENS_319>|": 151984,
274
+ "|<EXTRA_TOKENS_31>|": 151696,
275
+ "|<EXTRA_TOKENS_320>|": 151985,
276
+ "|<EXTRA_TOKENS_321>|": 151986,
277
+ "|<EXTRA_TOKENS_322>|": 151987,
278
+ "|<EXTRA_TOKENS_323>|": 151988,
279
+ "|<EXTRA_TOKENS_324>|": 151989,
280
+ "|<EXTRA_TOKENS_325>|": 151990,
281
+ "|<EXTRA_TOKENS_326>|": 151991,
282
+ "|<EXTRA_TOKENS_327>|": 151992,
283
+ "|<EXTRA_TOKENS_328>|": 151993,
284
+ "|<EXTRA_TOKENS_329>|": 151994,
285
+ "|<EXTRA_TOKENS_32>|": 151697,
286
+ "|<EXTRA_TOKENS_330>|": 151995,
287
+ "|<EXTRA_TOKENS_331>|": 151996,
288
+ "|<EXTRA_TOKENS_332>|": 151997,
289
+ "|<EXTRA_TOKENS_333>|": 151998,
290
+ "|<EXTRA_TOKENS_334>|": 151999,
291
+ "|<EXTRA_TOKENS_335>|": 152000,
292
+ "|<EXTRA_TOKENS_336>|": 152001,
293
+ "|<EXTRA_TOKENS_337>|": 152002,
294
+ "|<EXTRA_TOKENS_338>|": 152003,
295
+ "|<EXTRA_TOKENS_339>|": 152004,
296
+ "|<EXTRA_TOKENS_33>|": 151698,
297
+ "|<EXTRA_TOKENS_340>|": 152005,
298
+ "|<EXTRA_TOKENS_341>|": 152006,
299
+ "|<EXTRA_TOKENS_342>|": 152007,
300
+ "|<EXTRA_TOKENS_343>|": 152008,
301
+ "|<EXTRA_TOKENS_344>|": 152009,
302
+ "|<EXTRA_TOKENS_345>|": 152010,
303
+ "|<EXTRA_TOKENS_346>|": 152011,
304
+ "|<EXTRA_TOKENS_347>|": 152012,
305
+ "|<EXTRA_TOKENS_348>|": 152013,
306
+ "|<EXTRA_TOKENS_349>|": 152014,
307
+ "|<EXTRA_TOKENS_34>|": 151699,
308
+ "|<EXTRA_TOKENS_350>|": 152015,
309
+ "|<EXTRA_TOKENS_351>|": 152016,
310
+ "|<EXTRA_TOKENS_352>|": 152017,
311
+ "|<EXTRA_TOKENS_353>|": 152018,
312
+ "|<EXTRA_TOKENS_354>|": 152019,
313
+ "|<EXTRA_TOKENS_355>|": 152020,
314
+ "|<EXTRA_TOKENS_356>|": 152021,
315
+ "|<EXTRA_TOKENS_357>|": 152022,
316
+ "|<EXTRA_TOKENS_358>|": 152023,
317
+ "|<EXTRA_TOKENS_359>|": 152024,
318
+ "|<EXTRA_TOKENS_35>|": 151700,
319
+ "|<EXTRA_TOKENS_360>|": 152025,
320
+ "|<EXTRA_TOKENS_361>|": 152026,
321
+ "|<EXTRA_TOKENS_362>|": 152027,
322
+ "|<EXTRA_TOKENS_363>|": 152028,
323
+ "|<EXTRA_TOKENS_364>|": 152029,
324
+ "|<EXTRA_TOKENS_365>|": 152030,
325
+ "|<EXTRA_TOKENS_366>|": 152031,
326
+ "|<EXTRA_TOKENS_367>|": 152032,
327
+ "|<EXTRA_TOKENS_368>|": 152033,
328
+ "|<EXTRA_TOKENS_369>|": 152034,
329
+ "|<EXTRA_TOKENS_36>|": 151701,
330
+ "|<EXTRA_TOKENS_370>|": 152035,
331
+ "|<EXTRA_TOKENS_371>|": 152036,
332
+ "|<EXTRA_TOKENS_372>|": 152037,
333
+ "|<EXTRA_TOKENS_373>|": 152038,
334
+ "|<EXTRA_TOKENS_374>|": 152039,
335
+ "|<EXTRA_TOKENS_375>|": 152040,
336
+ "|<EXTRA_TOKENS_376>|": 152041,
337
+ "|<EXTRA_TOKENS_377>|": 152042,
338
+ "|<EXTRA_TOKENS_378>|": 152043,
339
+ "|<EXTRA_TOKENS_379>|": 152044,
340
+ "|<EXTRA_TOKENS_37>|": 151702,
341
+ "|<EXTRA_TOKENS_380>|": 152045,
342
+ "|<EXTRA_TOKENS_381>|": 152046,
343
+ "|<EXTRA_TOKENS_382>|": 152047,
344
+ "|<EXTRA_TOKENS_383>|": 152048,
345
+ "|<EXTRA_TOKENS_384>|": 152049,
346
+ "|<EXTRA_TOKENS_385>|": 152050,
347
+ "|<EXTRA_TOKENS_386>|": 152051,
348
+ "|<EXTRA_TOKENS_387>|": 152052,
349
+ "|<EXTRA_TOKENS_388>|": 152053,
350
+ "|<EXTRA_TOKENS_389>|": 152054,
351
+ "|<EXTRA_TOKENS_38>|": 151703,
352
+ "|<EXTRA_TOKENS_390>|": 152055,
353
+ "|<EXTRA_TOKENS_391>|": 152056,
354
+ "|<EXTRA_TOKENS_392>|": 152057,
355
+ "|<EXTRA_TOKENS_393>|": 152058,
356
+ "|<EXTRA_TOKENS_394>|": 152059,
357
+ "|<EXTRA_TOKENS_395>|": 152060,
358
+ "|<EXTRA_TOKENS_396>|": 152061,
359
+ "|<EXTRA_TOKENS_397>|": 152062,
360
+ "|<EXTRA_TOKENS_398>|": 152063,
361
+ "|<EXTRA_TOKENS_39>|": 151704,
362
+ "|<EXTRA_TOKENS_3>|": 151668,
363
+ "|<EXTRA_TOKENS_40>|": 151705,
364
+ "|<EXTRA_TOKENS_41>|": 151706,
365
+ "|<EXTRA_TOKENS_42>|": 151707,
366
+ "|<EXTRA_TOKENS_43>|": 151708,
367
+ "|<EXTRA_TOKENS_44>|": 151709,
368
+ "|<EXTRA_TOKENS_45>|": 151710,
369
+ "|<EXTRA_TOKENS_46>|": 151711,
370
+ "|<EXTRA_TOKENS_47>|": 151712,
371
+ "|<EXTRA_TOKENS_48>|": 151713,
372
+ "|<EXTRA_TOKENS_49>|": 151714,
373
+ "|<EXTRA_TOKENS_4>|": 151669,
374
+ "|<EXTRA_TOKENS_50>|": 151715,
375
+ "|<EXTRA_TOKENS_51>|": 151716,
376
+ "|<EXTRA_TOKENS_52>|": 151717,
377
+ "|<EXTRA_TOKENS_53>|": 151718,
378
+ "|<EXTRA_TOKENS_54>|": 151719,
379
+ "|<EXTRA_TOKENS_55>|": 151720,
380
+ "|<EXTRA_TOKENS_56>|": 151721,
381
+ "|<EXTRA_TOKENS_57>|": 151722,
382
+ "|<EXTRA_TOKENS_58>|": 151723,
383
+ "|<EXTRA_TOKENS_59>|": 151724,
384
+ "|<EXTRA_TOKENS_5>|": 151670,
385
+ "|<EXTRA_TOKENS_60>|": 151725,
386
+ "|<EXTRA_TOKENS_61>|": 151726,
387
+ "|<EXTRA_TOKENS_62>|": 151727,
388
+ "|<EXTRA_TOKENS_63>|": 151728,
389
+ "|<EXTRA_TOKENS_64>|": 151729,
390
+ "|<EXTRA_TOKENS_65>|": 151730,
391
+ "|<EXTRA_TOKENS_66>|": 151731,
392
+ "|<EXTRA_TOKENS_67>|": 151732,
393
+ "|<EXTRA_TOKENS_68>|": 151733,
394
+ "|<EXTRA_TOKENS_69>|": 151734,
395
+ "|<EXTRA_TOKENS_6>|": 151671,
396
+ "|<EXTRA_TOKENS_70>|": 151735,
397
+ "|<EXTRA_TOKENS_71>|": 151736,
398
+ "|<EXTRA_TOKENS_72>|": 151737,
399
+ "|<EXTRA_TOKENS_73>|": 151738,
400
+ "|<EXTRA_TOKENS_74>|": 151739,
401
+ "|<EXTRA_TOKENS_75>|": 151740,
402
+ "|<EXTRA_TOKENS_76>|": 151741,
403
+ "|<EXTRA_TOKENS_77>|": 151742,
404
+ "|<EXTRA_TOKENS_78>|": 151743,
405
+ "|<EXTRA_TOKENS_79>|": 151744,
406
+ "|<EXTRA_TOKENS_7>|": 151672,
407
+ "|<EXTRA_TOKENS_80>|": 151745,
408
+ "|<EXTRA_TOKENS_81>|": 151746,
409
+ "|<EXTRA_TOKENS_82>|": 151747,
410
+ "|<EXTRA_TOKENS_83>|": 151748,
411
+ "|<EXTRA_TOKENS_84>|": 151749,
412
+ "|<EXTRA_TOKENS_85>|": 151750,
413
+ "|<EXTRA_TOKENS_86>|": 151751,
414
+ "|<EXTRA_TOKENS_87>|": 151752,
415
+ "|<EXTRA_TOKENS_88>|": 151753,
416
+ "|<EXTRA_TOKENS_89>|": 151754,
417
+ "|<EXTRA_TOKENS_8>|": 151673,
418
+ "|<EXTRA_TOKENS_90>|": 151755,
419
+ "|<EXTRA_TOKENS_91>|": 151756,
420
+ "|<EXTRA_TOKENS_92>|": 151757,
421
+ "|<EXTRA_TOKENS_93>|": 151758,
422
+ "|<EXTRA_TOKENS_94>|": 151759,
423
+ "|<EXTRA_TOKENS_95>|": 151760,
424
+ "|<EXTRA_TOKENS_96>|": 151761,
425
+ "|<EXTRA_TOKENS_97>|": 151762,
426
+ "|<EXTRA_TOKENS_98>|": 151763,
427
+ "|<EXTRA_TOKENS_99>|": 151764,
428
+ "|<EXTRA_TOKENS_9>|": 151674
429
+ }
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ long_caption 65:
config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_config": {
3
+ "attention_dropout": 0.0,
4
+ "float32_attention": true,
5
+ "head_dim": 72,
6
+ "hidden_act": "silu",
7
+ "hidden_size": 1152,
8
+ "image_feature_dropout": 0.0,
9
+ "image_padding_embed": null,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "model_type": "",
13
+ "num_attention_heads": 16,
14
+ "num_key_value_heads": 16,
15
+ "residual_dropout": 0.0,
16
+ "text_hidden_size": 3584,
17
+ "vit_layers": [
18
+ -3,
19
+ -9
20
+ ]
21
+ },
22
+ "architectures": [
23
+ "MolmoActForActionReasoning"
24
+ ],
25
+ "auto_map": {
26
+ "AutoConfig": "configuration_molmoact.MolmoActConfig",
27
+ "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning"
28
+ },
29
+ "image_patch_id": 152066,
30
+ "initializer_range": 0.02,
31
+ "llm_config": {
32
+ "additional_vocab_size": 128,
33
+ "attention_dropout": 0.0,
34
+ "embedding_dropout": 0.0,
35
+ "head_dim": 128,
36
+ "hidden_act": "silu",
37
+ "hidden_size": 3584,
38
+ "initializer_range": 0.02,
39
+ "intermediate_size": 18944,
40
+ "layer_norm_eps": 1e-06,
41
+ "max_position_embeddings": 4096,
42
+ "model_type": "molmoact_llm",
43
+ "norm_after": false,
44
+ "num_attention_heads": 28,
45
+ "num_hidden_layers": 28,
46
+ "num_key_value_heads": 4,
47
+ "qk_norm_type": "olmo",
48
+ "qkv_bias": true,
49
+ "residual_dropout": 0.0,
50
+ "rope_scaling": null,
51
+ "rope_theta": 1000000.0,
52
+ "use_cache": true,
53
+ "use_qk_norm": false,
54
+ "vocab_size": 152064
55
+ },
56
+ "model_type": "molmoact",
57
+ "n_action_bins": 256,
58
+ "norm_stats": null,
59
+ "tie_word_embeddings": false,
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.52.3",
62
+ "use_cache": true,
63
+ "vit_config": {
64
+ "attention_dropout": 0.0,
65
+ "float32_attention": true,
66
+ "head_dim": 72,
67
+ "hidden_act": "gelu_pytorch_tanh",
68
+ "hidden_size": 1152,
69
+ "image_default_input_size": [
70
+ 378,
71
+ 378
72
+ ],
73
+ "image_num_pos": 729,
74
+ "image_patch_size": 14,
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 4304,
77
+ "layer_norm_eps": 1e-06,
78
+ "model_type": "molmoact_vit",
79
+ "num_attention_heads": 16,
80
+ "num_hidden_layers": 27,
81
+ "num_key_value_heads": 16,
82
+ "patch_bias": true,
83
+ "pre_layernorm": false,
84
+ "residual_dropout": 0.0,
85
+ "use_cls_token": false
86
+ }
87
+ }
configuration_molmoact.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MolmoAct configuration
3
+ """
4
+
5
+ from typing import Tuple, Optional, Dict, Any
6
+
7
+ from transformers import PretrainedConfig
8
+ from transformers.modeling_rope_utils import rope_config_validation
9
+ from transformers.utils import logging
10
+
11
+ logger = logging.get_logger(__name__)
12
+
13
+
14
+ class MolmoActVitConfig(PretrainedConfig):
15
+ r"""
16
+ This is the configuration class to store the configuration of a [`MolmoActVisionTransformer`].
17
+ It is used to instantiate a `MolmoActVisionTransformer` according to the specified arguments,
18
+ defining the model architecture.
19
+
20
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
21
+ documentation from [`PretrainedConfig`] for more information.
22
+
23
+ Example:
24
+ ```python
25
+ >>> from transformers import MolmoActVitConfig, MolmoActVisionTransformer
26
+
27
+ >>> # Initializing a MolmoActVitConfig
28
+ >>> configuration = MolmoActVitConfig()
29
+
30
+ >>> # Initializing a MolmoActVisionTransformer (with random weights)
31
+ >>> model = MolmoActVisionTransformer(configuration)
32
+
33
+ >>> # Accessing the model configuration
34
+ >>> configuration = model.config
35
+ ```"""
36
+
37
+ model_type = "molmoact_vit"
38
+
39
+ def __init__(
40
+ self,
41
+ hidden_size: int = 1152,
42
+ intermediate_size: int = 4304,
43
+ num_hidden_layers: int = 27,
44
+ num_attention_heads: int = 16,
45
+ num_key_value_heads: int = 16,
46
+ head_dim: int = 72,
47
+ hidden_act: str = "gelu_pytorch_tanh",
48
+ layer_norm_eps: float = 1e-6,
49
+ image_default_input_size: Tuple[int, int] = (378, 378),
50
+ image_patch_size: int = 14,
51
+ image_num_pos: int = 577,
52
+ attention_dropout: float = 0.0,
53
+ residual_dropout: float = 0.0,
54
+ initializer_range: float = 0.02,
55
+ float32_attention: bool = True,
56
+ use_cls_token: bool = False, # True for OpenCLIP
57
+ patch_bias: bool = True, # False for OpenCLIP
58
+ pre_layernorm: bool = False, # True for OpenCLIP
59
+ **kwargs,
60
+ ):
61
+ super().__init__(**kwargs)
62
+ self.hidden_size = hidden_size
63
+ self.intermediate_size = intermediate_size
64
+ self.num_hidden_layers = num_hidden_layers
65
+ self.num_attention_heads = num_attention_heads
66
+ self.num_key_value_heads = num_key_value_heads
67
+ self.head_dim = head_dim
68
+ self.hidden_act = hidden_act
69
+ self.layer_norm_eps = layer_norm_eps
70
+ self.image_default_input_size = image_default_input_size
71
+ self.image_patch_size = image_patch_size
72
+ self.image_num_pos = image_num_pos
73
+ self.attention_dropout = attention_dropout
74
+ self.residual_dropout = residual_dropout
75
+ self.initializer_range = initializer_range
76
+ self.float32_attention = float32_attention
77
+ self.use_cls_token = use_cls_token
78
+ self.patch_bias = patch_bias
79
+ self.pre_layernorm = pre_layernorm
80
+
81
+ @property
82
+ def image_num_patch(self):
83
+ h, w = self.image_default_input_size
84
+ return h // self.image_patch_size, w // self.image_patch_size
85
+
86
+
87
+ class MolmoActAdapterConfig(PretrainedConfig):
88
+ r"""
89
+ This is the configuration class to store the configuration of MolmoActAdapter. With MolmoActVitConfig,
90
+ It is used to instantiate an MolmoActVisionBackbone according to the specified arguments,
91
+ defining the model architecture.
92
+
93
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
94
+ documentation from [`PretrainedConfig`] for more information.
95
+
96
+ Example:
97
+
98
+ ```python
99
+ >>> from transformers import MolmoActVitConfig, MolmoActAdapterConfig, MolmoActVisionBackbone
100
+
101
+ >>> # Initializing a MolmoActVitConfig and a MolmoActAdapterConfig
102
+ >>> vit_config = MolmoActVitConfig()
103
+ >>> adapter_config = MolmoPoolingConfig()
104
+
105
+ >>> # Initializing a MolmoActVisionBackbone (with random weights)
106
+ >>> model = MolmoActVisionBackbone(vit_config, adapter_config)
107
+
108
+ >>> # Accessing the model configuration
109
+ >>> vit_configuration = model.vit_config
110
+ >>> adapter_configuration = model.adapter_config
111
+ ```"""
112
+
113
+ def __init__(
114
+ self,
115
+ vit_layers: Tuple = (-3, -9),
116
+ hidden_size: int = 1152,
117
+ num_attention_heads: int = 16,
118
+ num_key_value_heads: int = 16,
119
+ head_dim: int = 72,
120
+ float32_attention: bool = True,
121
+ attention_dropout: float = 0.0,
122
+ residual_dropout: float = 0.0,
123
+ hidden_act: str = "silu",
124
+ intermediate_size: int = 18944,
125
+ text_hidden_size: int = 3584,
126
+ image_feature_dropout: float = 0.0,
127
+ initializer_range: float = 0.02,
128
+ # pooling_mode: str = "indices", # "indices" (SigLIP) or "2x2_attention" (OpenCLIP)
129
+ image_padding_embed: Optional[str] = None, # e.g. "pad_and_partial_pad"
130
+ **kwargs,
131
+ ):
132
+ super().__init__(**kwargs)
133
+ self.vit_layers = vit_layers
134
+ self.hidden_size = hidden_size
135
+ self.num_attention_heads = num_attention_heads
136
+ self.num_key_value_heads = num_key_value_heads
137
+ self.head_dim = head_dim
138
+ self.float32_attention = float32_attention
139
+ self.attention_dropout = attention_dropout
140
+ self.residual_dropout = residual_dropout
141
+ self.hidden_act = hidden_act
142
+ self.intermediate_size = intermediate_size
143
+ self.text_hidden_size = text_hidden_size
144
+ self.image_feature_dropout = image_feature_dropout
145
+ self.initializer_range = initializer_range
146
+ # self.pooling_mode = pooling_mode
147
+ self.image_padding_embed = image_padding_embed
148
+
149
+
150
+ class MolmoActLlmConfig(PretrainedConfig):
151
+ r"""
152
+ This is the configuration class to store the configuration of a [`MolmoActLlm`]. It is used to instantiate a
153
+ `MolmoActLlm` according to the specified arguments, defining the model architecture.
154
+
155
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
156
+ documentation from [`PretrainedConfig`] for more information.
157
+
158
+ Example:
159
+ ```python
160
+ >>> from transformers import MolmoActLlmConfig, MolmoActLlm
161
+
162
+ >>> # Initializing a MolmoActLlmConfig
163
+ >>> configuration = MolmoActLlmConfig()
164
+
165
+ >>> # Initializing a MolmoActLlm (with random weights)
166
+ >>> model = MolmoActLlm(configuration)
167
+
168
+ >>> # Accessing the model configuration
169
+ >>> configuration = model.config
170
+ ```"""
171
+
172
+ model_type = "molmoact_llm"
173
+ keys_to_ignore_at_inference = ["past_key_values"]
174
+ base_model_tp_plan = {
175
+ "blocks.*.self_attn.att_proj": "colwise",
176
+ "blocks.*.self_attn.attn_out": "rowwise",
177
+ "blocks.*.mlp.ff_proj": "colwise",
178
+ "blocks.*.mlp.ff_out": "rowwise",
179
+ }
180
+ base_model_pp_plan = {
181
+ "wte": (["input_ids"], ["inputs_embeds"]),
182
+ "blocks": (["hidden_states", "attention_mask"], ["hidden_states"]),
183
+ "ln_f": (["hidden_states"], ["hidden_states"]),
184
+ }
185
+
186
+ def __init__(
187
+ self,
188
+ hidden_size: int = 3584,
189
+ num_attention_heads: int = 28,
190
+ num_key_value_heads: Optional[int] = 4,
191
+ head_dim: int = 128,
192
+ vocab_size: int = 152064,
193
+ additional_vocab_size: int = 128,
194
+ qkv_bias: bool = True,
195
+ num_hidden_layers: int = 48,
196
+ intermediate_size: int = 18944,
197
+ hidden_act: str = "silu",
198
+ embedding_dropout: float=0.0,
199
+ attention_dropout: float=0.0,
200
+ residual_dropout: float = 0.0,
201
+ max_position_embeddings: int = 4096,
202
+ rope_theta: float = 1000000.0,
203
+ rope_scaling: Dict[str, Any] = None,
204
+ use_qk_norm: bool = False,
205
+ qk_norm_type: str = "olmo",
206
+ layer_norm_eps: int = 1e-6,
207
+ norm_after: bool = False,
208
+ initializer_range: float = 0.02,
209
+ use_cache=True,
210
+ tie_word_embeddings=False,
211
+ **kwargs,
212
+ ):
213
+ super().__init__(
214
+ tie_word_embeddings=tie_word_embeddings,
215
+ **kwargs
216
+ )
217
+ self.hidden_size = hidden_size
218
+ self.num_attention_heads = num_attention_heads
219
+ if num_key_value_heads is None:
220
+ num_key_value_heads = num_attention_heads
221
+ self.num_key_value_heads = num_key_value_heads
222
+ self.head_dim = head_dim
223
+ self.vocab_size = vocab_size
224
+ self.additional_vocab_size = additional_vocab_size
225
+ self.qkv_bias = qkv_bias
226
+ self.num_hidden_layers = num_hidden_layers
227
+ self.intermediate_size = intermediate_size
228
+ self.hidden_act = hidden_act
229
+ self.embedding_dropout = embedding_dropout
230
+ self.attention_dropout = attention_dropout
231
+ self.residual_dropout = residual_dropout
232
+ self.max_position_embeddings = max_position_embeddings
233
+ self.rope_theta = rope_theta
234
+ self.rope_scaling = rope_scaling
235
+ self.use_qk_norm = use_qk_norm
236
+ self.qk_norm_type = qk_norm_type
237
+ self.layer_norm_eps = layer_norm_eps
238
+ self.norm_after = norm_after
239
+ self.initializer_range = initializer_range
240
+ self.use_cache = use_cache
241
+
242
+ # Validate the correctness of rotary position embeddings parameters
243
+ rope_config_validation(self)
244
+
245
+
246
+ class MolmoActConfig(PretrainedConfig):
247
+ r"""
248
+ This is the configuration class to store the configuration of a [`MolmoActForActionReasoning`].
249
+ It is used to instantiate an MolmoAct model according to the specified arguments, defining the model architecture.
250
+
251
+ Example:
252
+
253
+ ```python
254
+ >>> from transformers import MolmoActConfig, MolmoActVitConfig, MolmoActAdapterConfig, MolmoActLlmConfig
255
+
256
+ >>> # Initializing a MolmoActVitConfig
257
+ >>> vit_config = MolmoActVitConfig()
258
+
259
+ >>> # Initializing a MolmoActAdapterConfig
260
+ >>> adapter_config = MolmoActAdapterConfig()
261
+
262
+ >>> # Initializing a MolmoActLlmConfig
263
+ >>> llm_config = MolmoActLlmConfig()
264
+
265
+ >>> # Initializing a MolmoActConfig
266
+ >>> configuration = MolmoActConfig(vit_config, adapter_config, llm_config, image_patch_id=152069)
267
+
268
+ >>> # Initializing a model
269
+ >>> model = MolmoActForActionReasoning(configuration)
270
+
271
+ >>> # Accessing the model configuration
272
+ >>> configuration = model.config
273
+ ```"""
274
+
275
+ model_type = "molmoact"
276
+ sub_configs = {
277
+ "llm_config": MolmoActLlmConfig,
278
+ "vit_config": MolmoActVitConfig,
279
+ "adapter_config": MolmoActAdapterConfig,
280
+ }
281
+
282
+ def __init__(
283
+ self,
284
+ vit_config: MolmoActVitConfig = None,
285
+ adapter_config: MolmoActAdapterConfig = None,
286
+ llm_config: MolmoActLlmConfig = None,
287
+ image_patch_id: int = None,
288
+ initializer_range: float = 0.02,
289
+ n_action_bins: int = 256,
290
+ norm_stats: dict = {},
291
+ **kwargs,
292
+ ):
293
+ super().__init__(**kwargs)
294
+ if vit_config is None:
295
+ self.vit_config = MolmoActVitConfig()
296
+ elif isinstance(vit_config, dict):
297
+ self.vit_config = MolmoActVitConfig(**vit_config)
298
+ else:
299
+ self.vit_config = vit_config
300
+ if adapter_config is None:
301
+ self.adapter_config = MolmoActAdapterConfig()
302
+ elif isinstance(adapter_config, dict):
303
+ self.adapter_config = MolmoActAdapterConfig(**adapter_config)
304
+ else:
305
+ self.adapter_config = adapter_config
306
+ if llm_config is None:
307
+ self.llm_config = MolmoActLlmConfig()
308
+ elif isinstance(llm_config, dict):
309
+ self.llm_config = MolmoActLlmConfig(**llm_config)
310
+ else:
311
+ self.llm_config = llm_config
312
+ self.image_patch_id = image_patch_id
313
+ self.initializer_range = initializer_range
314
+
315
+ self.n_action_bins = n_action_bins
316
+ self.norm_stats = norm_stats
317
+
318
+ @property
319
+ def image_num_patch(self):
320
+ assert self.vit_config is not None
321
+ return self.vit_config.image_num_patch
322
+
323
+ @property
324
+ def num_attention_heads(self):
325
+ return self.llm_config.num_attention_heads
326
+
327
+ @property
328
+ def num_key_value_heads(self):
329
+ return self.llm_config.num_key_value_heads
330
+
331
+ @property
332
+ def head_dim(self):
333
+ return self.llm_config.head_dim
334
+
335
+ @property
336
+ def num_hidden_layers(self):
337
+ return self.llm_config.num_hidden_layers
338
+
339
+ @property
340
+ def hidden_size(self):
341
+ return self.llm_config.hidden_size
342
+
343
+ @property
344
+ def vocab_size(self):
345
+ return self.llm_config.vocab_size
346
+
347
+ @property
348
+ def max_position_embeddings(self):
349
+ return self.llm_config.max_position_embeddings
350
+
351
+
352
+ MolmoActVitConfig.register_for_auto_class()
353
+ MolmoActAdapterConfig.register_for_auto_class()
354
+ MolmoActLlmConfig.register_for_auto_class()
355
+ MolmoActConfig.register_for_auto_class()
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "pad_token_id": 151643,
5
+ "transformers_version": "4.52.3"
6
+ }
image_processing_molmoact.py ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image processor class for MolmoAct"""
2
+ from typing import TYPE_CHECKING, Tuple, List, Optional, Union, Dict, Any
3
+ import numpy as np
4
+ import einops
5
+ import torch
6
+ import torchvision.transforms
7
+ from torchvision.transforms import InterpolationMode
8
+ from torchvision.transforms.functional import convert_image_dtype
9
+
10
+ from transformers.image_utils import (
11
+ OPENAI_CLIP_MEAN,
12
+ OPENAI_CLIP_STD,
13
+ ChannelDimension,
14
+ ImageInput,
15
+ is_valid_image,
16
+ valid_images,
17
+ to_numpy_array,
18
+ )
19
+ from transformers.image_transforms import convert_to_rgb, to_channel_dimension_format
20
+ from transformers.processing_utils import ImagesKwargs
21
+ from transformers.image_processing_utils import BaseImageProcessor
22
+ from transformers.utils import logging
23
+ from transformers.feature_extraction_utils import BatchFeature
24
+ from transformers.utils import TensorType, logging
25
+
26
+
27
+ if TYPE_CHECKING:
28
+ from transformers.utils import TensorType, logging
29
+
30
+
31
+ logger = logging.get_logger(__name__)
32
+
33
+
34
+ def is_multi_image(image: Union[ImageInput, List[ImageInput]]) -> bool:
35
+ return isinstance(image, (list, tuple))
36
+
37
+
38
+ def make_batched_images(images) -> List[ImageInput]:
39
+ """
40
+ Accepts images in list or nested list format.
41
+
42
+ Args:
43
+ images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
44
+ The input image.
45
+
46
+ Returns:
47
+ list: A list of images or a list of lists of images.
48
+ """
49
+ if isinstance(images, (list, tuple)) and isinstance(images[0], (list, tuple)) and is_valid_image(images[0][0]):
50
+ return images
51
+
52
+ elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
53
+ return images
54
+
55
+ elif is_valid_image(images):
56
+ return [images]
57
+
58
+ raise ValueError(f"Could not make batched images from {images}")
59
+
60
+
61
+ def normalize_image(image: np.ndarray, normalize_mode: str) -> np.ndarray:
62
+ if normalize_mode == "openai":
63
+ image -= np.array(OPENAI_CLIP_MEAN, dtype=np.float32)[None, None, :]
64
+ image /= np.array(OPENAI_CLIP_STD, dtype=np.float32)[None, None, :]
65
+ elif normalize_mode == "siglip":
66
+ image = np.asarray(-1.0, dtype=np.float32) + image * np.asarray(2.0, dtype=np.float32)
67
+ elif normalize_mode == "dino":
68
+ image -= np.array([0.485, 0.456, 0.406], dtype=np.float32)[None, None, :]
69
+ image /= np.array([0.229, 0.224, 0.225], dtype=np.float32)[None, None, :]
70
+ else:
71
+ raise NotImplementedError(normalize_mode)
72
+ return image
73
+
74
+
75
+ # Helper to ensure output_size is a 2-tuple of built-in Python ints
76
+ def _ensure_pyint_size2(size):
77
+ """
78
+ Ensure `size` is a 2-tuple of built-in Python ints.
79
+ Accepts int, list/tuple, or numpy array of length 1 or 2.
80
+ """
81
+ import numpy as np
82
+ # If it's an array-like, normalize to length-2 tuple
83
+ if isinstance(size, (list, tuple, np.ndarray)):
84
+ if len(size) == 2:
85
+ return (int(size[0]), int(size[1]))
86
+ elif len(size) == 1:
87
+ s = int(size[0])
88
+ return (s, s)
89
+ else:
90
+ # Fallback: try to interpret as square size using first element
91
+ s = int(size[0])
92
+ return (s, s)
93
+ # Scalar → square size
94
+ s = int(size)
95
+ return (s, s)
96
+
97
+
98
+ def resize_and_pad(
99
+ image,
100
+ desired_output_size,
101
+ resize_method="torch-bilinear",
102
+ pad_value=0,
103
+ ):
104
+ """Resize an image while padding to preserve uts aspect ratio."""
105
+ desired_output_size = _ensure_pyint_size2(desired_output_size)
106
+ desired_height, desired_width = desired_output_size
107
+ height, width = image.shape[:2]
108
+
109
+ # Cast into float32 since the training code did this in float32 and it (very rarely) effects
110
+ # the results after rounding.
111
+ image_scale_y = np.array(desired_height, np.float32) / np.array(height, np.float32)
112
+ image_scale_x = np.array(desired_width, np.float32) / np.array(width, np.float32)
113
+ image_scale = min(image_scale_x, image_scale_y)
114
+ scaled_height = int(np.array(height, np.float32) * image_scale)
115
+ scaled_width = int(np.array(width, np.float32) * image_scale)
116
+
117
+ if resize_method in ["torch-bilinear"]:
118
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
119
+ image = convert_image_dtype(image) # resize in float32 to match the training code
120
+ mode = InterpolationMode.BILINEAR
121
+ image = torchvision.transforms.Resize([scaled_height, scaled_width], mode, antialias=True)(image)
122
+ image = torch.clip(image, 0.0, 1.0)
123
+ image = torch.permute(image, [1, 2, 0]).numpy()
124
+ else:
125
+ raise NotImplementedError(resize_method)
126
+
127
+ top_pad = (desired_height - scaled_height) // 2
128
+ left_pad = (desired_width - scaled_width) // 2
129
+ padding = [
130
+ [top_pad, desired_height - scaled_height - top_pad],
131
+ [left_pad, desired_width - scaled_width - left_pad],
132
+ [0, 0]
133
+ ]
134
+ image_mask = np.pad(np.ones_like(image[:, :, 0], dtype=bool), padding[:2])
135
+ image = np.pad(image, padding, constant_values=pad_value)
136
+ return image, image_mask
137
+
138
+
139
+ def metaclip_resize(image, desired_output_size):
140
+ desired_output_size = _ensure_pyint_size2(desired_output_size)
141
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
142
+ if torch.is_floating_point(image):
143
+ image = torchvision.transforms.Resize(
144
+ desired_output_size, InterpolationMode.BICUBIC, antialias=True)(image)
145
+ image = torch.clip(image, 0.0, 1.0)
146
+ else:
147
+ assert image.dtype == torch.uint8, "Expected float images or uint8 images, but got {}".format(image.dtype)
148
+ image = torchvision.transforms.Resize(
149
+ desired_output_size, InterpolationMode.BICUBIC, antialias=True)(image)
150
+ image = image.to(torch.float32)
151
+ image = torch.clip(image, 0, 255)
152
+ image = image / 255.0
153
+ resized = torch.permute(image, [1, 2, 0]).numpy()
154
+ image_mask = np.ones_like(resized[:, :, 0], dtype=np.bool_)
155
+ return resized, image_mask
156
+
157
+
158
+ def siglip_resize_and_pad(
159
+ image: np.ndarray,
160
+ desired_output_size: Tuple[int, int],
161
+ ) -> Tuple[np.ndarray, np.ndarray]:
162
+ desired_output_size = _ensure_pyint_size2(desired_output_size)
163
+ # by default, image is a single image
164
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
165
+ dtype = image.dtype
166
+ if torch.is_floating_point(image):
167
+ in_min = 0.0
168
+ in_max = 1.0
169
+ resized = torchvision.transforms.Resize(
170
+ desired_output_size,
171
+ InterpolationMode.BILINEAR,
172
+ antialias=False,
173
+ )(image)
174
+ resized = torch.clip(resized, 0.0, 1.0).to(dtype)
175
+ else:
176
+ assert image.dtype == torch.uint8, "SigLIP expects float images or uint8 images, but got {}".format(image.dtype)
177
+ in_min = 0.0
178
+ in_max = 255.0
179
+ resized = torchvision.transforms.Resize(
180
+ desired_output_size,
181
+ InterpolationMode.BILINEAR,
182
+ antialias=False,
183
+ )(image)
184
+ resized = torch.clip(resized, 0, 255).to(dtype)
185
+
186
+ resized = resized.to(torch.float32)
187
+ resized = (resized - in_min) / (in_max - in_min)
188
+
189
+ resized = torch.permute(resized, [1, 2, 0]).numpy()
190
+ image_mask = np.ones_like(resized[:, :, 0], dtype=np.bool_)
191
+
192
+ return resized, image_mask
193
+
194
+
195
+ def dino_resize_and_pad(
196
+ image: np.ndarray,
197
+ desired_output_size: Tuple[int, int],
198
+ ) -> Tuple[np.ndarray, np.ndarray]:
199
+ desired_output_size = _ensure_pyint_size2(desired_output_size)
200
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
201
+ dtype = image.dtype
202
+ if torch.is_floating_point(image):
203
+ resized = torchvision.transforms.Resize(
204
+ desired_output_size,
205
+ InterpolationMode.BICUBIC,
206
+ antialias=True,
207
+ )(image)
208
+ resized = torch.clip(resized, 0.0, 1.0).to(torch.float32)
209
+ else:
210
+ assert image.dtype == torch.uint8, "DINOv2 expects float images or uint8 images, but got {}".format(image.dtype)
211
+ resized = torchvision.transforms.Resize(
212
+ desired_output_size,
213
+ InterpolationMode.BICUBIC,
214
+ antialias=True,
215
+ )(image)
216
+ resized = torch.clip(resized, 0, 255).to(torch.float32)
217
+ resized = resized / 255.0
218
+
219
+ resized = torch.permute(resized, [1, 2, 0]).numpy()
220
+ image_mask = np.ones_like(resized[:, :, 0], dtype=np.bool_)
221
+
222
+ return resized, image_mask
223
+
224
+
225
+ def resize_image(
226
+ image: np.ndarray,
227
+ resize_mode: str,
228
+ output_size: Tuple[int, int],
229
+ pad_value: float,
230
+ ) -> Tuple[np.ndarray, np.ndarray]:
231
+ if resize_mode == "siglip":
232
+ return siglip_resize_and_pad(image, output_size)
233
+ elif resize_mode == "dino":
234
+ return dino_resize_and_pad(image, output_size)
235
+ elif resize_mode == "metaclip":
236
+ return metaclip_resize(image, output_size)
237
+ else:
238
+ resize = "torch-bilinear" if resize_mode == "default" else resize_mode
239
+ return resize_and_pad(
240
+ image, output_size, resize_method=resize, pad_value=pad_value,
241
+ )
242
+
243
+
244
+ def select_tiling(h, w, patch_size, max_num_crops):
245
+ """Divide in image of size [w, h] in up to max_num_patches of size patch_size"""
246
+ original_size = np.stack([h, w]) # [1, 2]
247
+ original_res = h * w
248
+ tilings = []
249
+ for i in range(1, max_num_crops + 1):
250
+ for j in range(1, max_num_crops + 1):
251
+ if i*j <= max_num_crops:
252
+ tilings.append((i, j))
253
+ # sort so argmin and argmax favour smaller tilings in the event of a tie
254
+ tilings.sort(key=lambda x: (x[0]*x[1], x[0]))
255
+ candidate_tilings = np.array(tilings, dtype=np.int32) # [n_resolutions, 2]
256
+ candidate_resolutions = candidate_tilings * patch_size # [n_resolutions, 2]
257
+
258
+ # How much we would need to scale the image to fit exactly in each tiling
259
+ original_size = np.stack([h, w], dtype=np.float32) # [1, 2]
260
+
261
+ # The original size can be zero in rare cases if the image is smaller than the margin
262
+ # In those cases letting the scale become infinite means the tiling is based on the
263
+ # other side, or falls back to the smallest tiling
264
+ with np.errstate(divide='ignore'):
265
+ required_scale_d = candidate_resolutions.astype(np.float32) / original_size,
266
+ required_scale = np.min(required_scale_d, axis=-1, keepdims=True) # [n_resolutions, 1]
267
+ if np.all(required_scale < 1):
268
+ # We are forced to downscale, so try to minimize the amount of downscaling
269
+ ix = np.argmax(required_scale)
270
+ else:
271
+ # Pick the resolution that required the least upscaling so that it most closely fits the image
272
+ required_scale = np.where(required_scale < 1.0, 10e9, required_scale)
273
+ ix = np.argmin(required_scale)
274
+ return candidate_tilings[ix]
275
+
276
+
277
+ def build_resized_image(
278
+ image: np.ndarray,
279
+ resize_mode: str,
280
+ normalized_mode: str,
281
+ base_image_input_size: List[int],
282
+ pad_value: float,
283
+ image_patch_size: int,
284
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
285
+ resized, resized_mask = resize_image(
286
+ image, resize_mode, base_image_input_size, pad_value,
287
+ )
288
+ resized = normalize_image(resized, normalized_mode)
289
+ if len(resized.shape) == 3:
290
+ resized = np.expand_dims(resized, 0)
291
+ resized_mask = np.expand_dims(resized_mask, 0)
292
+ crop_patch_w = base_image_input_size[1] // image_patch_size
293
+ crop_patch_h = base_image_input_size[0] // image_patch_size
294
+ resize_idx = np.arange(crop_patch_w*crop_patch_h).reshape([crop_patch_h, crop_patch_w])
295
+ return resized, resized_mask, resize_idx
296
+
297
+
298
+ def build_overlapping_crops(
299
+ image: np.ndarray,
300
+ resize_mode: str,
301
+ normalize_mode: str,
302
+ max_crops: int,
303
+ overlap_margins: List[int],
304
+ base_image_input_size: List[int],
305
+ pad_value: float,
306
+ image_patch_size: int,
307
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
308
+ """Decompose an image into a set of overlapping crops
309
+
310
+ :return crop_arr: [n_crops, h, w, 3] The crops
311
+ :return mask_arr: [n_crops, h, w] The padding masks
312
+ :return patch_idx: [overlap_patch_h, overlap_patch_w] For each patch in the resized image
313
+ the crops were extracted from, what patch in `crop_arr` it corresponds to
314
+ """
315
+ original_image_h, original_image_w = image.shape[:2]
316
+ crop_size = base_image_input_size[0]
317
+ assert base_image_input_size[0] == base_image_input_size[1]
318
+
319
+ left_margin, right_margin = overlap_margins
320
+ total_margin_pixels = image_patch_size * (right_margin + left_margin) # pixels removed per dim
321
+ crop_patches = base_image_input_size[0] // image_patch_size # patches per crop dim
322
+ crop_window_patches = crop_patches - (right_margin + left_margin) # usable patches
323
+ crop_window_size = crop_window_patches * image_patch_size
324
+ crop_patch_w = base_image_input_size[1] // image_patch_size
325
+ crop_patch_h = base_image_input_size[0] // image_patch_size
326
+ original_image_h, original_image_w = image.shape[:2]
327
+ crop_size = base_image_input_size[0]
328
+
329
+ # Decide how to tile the image, to account for the overlap margins we compute the tiling
330
+ # as if we had an image without the margins and were using a crop size without the margins
331
+ tiling = select_tiling(
332
+ original_image_h - total_margin_pixels,
333
+ original_image_w - total_margin_pixels,
334
+ crop_window_size,
335
+ max_crops,
336
+ )
337
+
338
+ src, img_mask = resize_image(
339
+ image,
340
+ resize_mode,
341
+ [tiling[0]*crop_window_size+total_margin_pixels, tiling[1]*crop_window_size+total_margin_pixels],
342
+ pad_value,
343
+ )
344
+ src = normalize_image(src, normalize_mode)
345
+
346
+ # Now we have to split the image into crops, and track what patches came from
347
+ # where in `patch_idx_arr`
348
+ n_crops = tiling[0] * tiling[1]
349
+ crop_arr = np.zeros([n_crops, crop_size, crop_size, 3], dtype=src.dtype)
350
+ mask_arr = np.zeros([n_crops, crop_size, crop_size], dtype=img_mask.dtype)
351
+ patch_idx_arr = np.zeros([n_crops, crop_patch_h, crop_patch_w], dtype=np.int32)
352
+ on = 0
353
+ on_crop = 0
354
+ for i in range(tiling[0]):
355
+ # Slide over `src` by `crop_window_size` steps, but extract crops of size `crops_size`
356
+ # which results in overlapping crop windows
357
+ y0 = i*crop_window_size
358
+ for j in range(tiling[1]):
359
+ x0 = j*crop_window_size
360
+ crop_arr[on_crop] = src[y0:y0+crop_size, x0:x0+crop_size]
361
+ mask_arr[on_crop] = img_mask[y0:y0+crop_size, x0:x0+crop_size]
362
+ patch_idx = np.arange(crop_patch_w*crop_patch_h).reshape(crop_patch_h, crop_patch_w)
363
+ patch_idx += on_crop * crop_patch_h * crop_patch_w
364
+
365
+ # Mask out idx that are in the overlap region
366
+ if i != 0:
367
+ patch_idx[:left_margin, :] = -1
368
+ if j != 0:
369
+ patch_idx[:, :left_margin] = -1
370
+ if i != tiling[0]-1:
371
+ patch_idx[-right_margin:, :] = -1
372
+ if j != tiling[1]-1:
373
+ patch_idx[:, -right_margin:] = -1
374
+ patch_idx_arr[on_crop] = patch_idx
375
+ on_crop += 1
376
+
377
+ # `patch_idx_arr` is ordered crop-by-crop, here we transpose `patch_idx_arr`
378
+ # so it is ordered left-to-right order
379
+ patch_idx_arr = np.reshape(
380
+ patch_idx_arr,
381
+ [tiling[0], tiling[1], crop_patch_h, crop_patch_w]
382
+ )
383
+ patch_idx_arr = np.transpose(patch_idx_arr, [0, 2, 1, 3])
384
+ patch_idx_arr = np.reshape(patch_idx_arr, [-1])
385
+
386
+ # Now get the parts not in the overlap region, so it should map each patch in `src`
387
+ # to the correct patch it should come from in `crop_arr`
388
+ patch_idx_arr = patch_idx_arr[patch_idx_arr >= 0].reshape(
389
+ src.shape[0]//image_patch_size,
390
+ src.shape[1]//image_patch_size,
391
+ )
392
+ return crop_arr, mask_arr, patch_idx_arr
393
+
394
+
395
+ def batch_pixels_to_patches(array: np.ndarray, patch_size: int) -> np.ndarray:
396
+ """Reshape images of [n_images, h, w, 3] -> [n_images, n_patches, pixels_per_patch]"""
397
+ if len(array.shape) == 3:
398
+ n_crops, h, w = array.shape
399
+ h_patches = h//patch_size
400
+ w_patches = w//patch_size
401
+ array = np.reshape(array, [n_crops, h_patches, patch_size, w_patches, patch_size])
402
+ array = np.transpose(array, [0, 1, 3, 2, 4])
403
+ array = np.reshape(array, [n_crops, h_patches*w_patches, patch_size*patch_size])
404
+ return array
405
+ else:
406
+ n_crops, h, w, c = array.shape
407
+ h_patches = h//patch_size
408
+ w_patches = w//patch_size
409
+ array = np.reshape(array, [n_crops, h_patches, patch_size, w_patches, patch_size, c])
410
+ array = np.transpose(array, [0, 1, 3, 2, 4, 5])
411
+ array = np.reshape(array, [n_crops, h_patches*w_patches, patch_size*patch_size*c])
412
+ return array
413
+
414
+
415
+ def arange_for_pooling(
416
+ idx_arr: np.ndarray,
417
+ pool_h: int,
418
+ pool_w: int,
419
+ ) -> np.ndarray:
420
+ h_pad = pool_h * ((idx_arr.shape[0] + pool_h - 1) // pool_h) - idx_arr.shape[0]
421
+ w_pad = pool_w * ((idx_arr.shape[1] + pool_w - 1) // pool_w) - idx_arr.shape[1]
422
+ idx_arr = np.pad(idx_arr, [[h_pad//2, (h_pad+1)//2], [w_pad//2, (w_pad+1)//2]],
423
+ mode='constant',constant_values=-1)
424
+ return einops.rearrange(
425
+ idx_arr, "(h dh) (w dw) -> h w (dh dw)", dh=pool_h, dw=pool_w)
426
+
427
+
428
+ def image_to_patches_and_grids(
429
+ image: ImageInput,
430
+ crop_mode: str,
431
+ resize_mode: str,
432
+ normalize_mode: str,
433
+ max_crops: int,
434
+ overlap_margins: List[int],
435
+ base_image_input_size: List[int],
436
+ pad_value: float,
437
+ image_patch_size: int,
438
+ image_pooling_w: int,
439
+ image_pooling_h: int,
440
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
441
+ """
442
+ :return image_grids, the shape of each (low-res, high-res) image after pooling
443
+ :return crops, the image crops to processes with the ViT
444
+ :return mask, the padding mask for each crop
445
+ :return pooled_patch_idx, for each patch_id tokens in `image_tokens`, the indices of the
446
+ patches in `crops` to pool for that token, masked with -1
447
+ """
448
+ if isinstance(base_image_input_size, int):
449
+ base_image_input_size = (base_image_input_size, base_image_input_size)
450
+
451
+ base_image_input_d = image_patch_size
452
+ pooling_w = image_pooling_w
453
+ pooling_h = image_pooling_h
454
+ crop_patch_w = base_image_input_size[1] // base_image_input_d
455
+ crop_patch_h = base_image_input_size[0] // base_image_input_d
456
+
457
+ if crop_mode == "resize":
458
+ resized, resized_mask, resize_idx = build_resized_image(
459
+ image,
460
+ resize_mode,
461
+ normalize_mode,
462
+ base_image_input_size,
463
+ pad_value,
464
+ image_patch_size
465
+ )
466
+ pooling_idx = arange_for_pooling(resize_idx, pooling_h, pooling_w)
467
+ h, w = pooling_idx.shape[:2]
468
+ pooling_idx = pooling_idx.reshape([-1, pooling_h*pooling_w])
469
+ image_grid = [np.array([h, w])]
470
+ return (
471
+ np.stack(image_grid, 0),
472
+ batch_pixels_to_patches(resized, image_patch_size),
473
+ batch_pixels_to_patches(resized_mask, image_patch_size).mean(-1),
474
+ pooling_idx,
475
+ )
476
+
477
+ if crop_mode in ["overlap-and-resize-c2", "overlap-and-resize"]:
478
+ crop_arr, mask_arr, patch_idx_arr = build_overlapping_crops(
479
+ image,
480
+ resize_mode,
481
+ normalize_mode,
482
+ max_crops,
483
+ overlap_margins,
484
+ base_image_input_size,
485
+ pad_value,
486
+ image_patch_size,
487
+ )
488
+ pooling_idx = arange_for_pooling(patch_idx_arr, pooling_h, pooling_w)
489
+ h, w = pooling_idx.shape[:2]
490
+ pooling_idx = pooling_idx.reshape([-1, pooling_h*pooling_w])
491
+ image_grid = [np.array([h, w])]
492
+
493
+ if crop_mode == "overlap-and-resize":
494
+ crop_arr = batch_pixels_to_patches(crop_arr, image_patch_size)
495
+ mask_arr = batch_pixels_to_patches(mask_arr, image_patch_size).astype(np.float32).mean(axis=-1)
496
+ return np.stack(image_grid, 0), crop_arr, mask_arr, pooling_idx
497
+
498
+ # Finally do the same for the global image
499
+ resized, resized_mask, resize_idx = build_resized_image(
500
+ image,
501
+ resize_mode,
502
+ normalize_mode,
503
+ base_image_input_size,
504
+ pad_value,
505
+ image_patch_size
506
+ )
507
+ crop_arr = np.concatenate([resized, crop_arr], 0)
508
+
509
+ mask_arr = np.concatenate([resized_mask, mask_arr], 0)
510
+
511
+ resize_idx = arange_for_pooling(resize_idx, pooling_h, pooling_w)
512
+ h, w = resize_idx.shape[:2]
513
+ resize_idx = resize_idx.reshape([-1, pooling_h*pooling_w])
514
+
515
+ # Global image goes first, so the order of patches in previous crops gets increased
516
+ pooling_idx = np.where(
517
+ pooling_idx >= 0,
518
+ pooling_idx + crop_patch_h*crop_patch_w,
519
+ -1
520
+ )
521
+ pooling_idx = np.concatenate([resize_idx, pooling_idx])
522
+ image_grid = [
523
+ np.array([h, w]),
524
+ ] + image_grid
525
+
526
+ mask_arr = batch_pixels_to_patches(mask_arr, image_patch_size).astype(np.float32).mean(axis=-1)
527
+ return (
528
+ np.stack(image_grid, 0),
529
+ batch_pixels_to_patches(crop_arr, image_patch_size),
530
+ mask_arr,
531
+ pooling_idx
532
+ )
533
+ else:
534
+ raise NotImplementedError(crop_mode)
535
+
536
+
537
+ def image_to_patches_and_tokens(
538
+ image: ImageInput,
539
+ crop_mode: str,
540
+ use_col_tokens: bool,
541
+ resize_mode: str,
542
+ normalize_mode: str,
543
+ max_crops: int,
544
+ overlap_margins: List[int],
545
+ base_image_input_size: List[int],
546
+ pad_value: float,
547
+ image_patch_size: int,
548
+ image_pooling_w: int,
549
+ image_pooling_h: int,
550
+ image_patch_token_id: int,
551
+ image_col_token_id: int,
552
+ image_start_token_id: int,
553
+ image_end_token_id: int,
554
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
555
+ """
556
+ :return image_tokens, the token IDS for this image, including special tokens
557
+ :return crops, the image crops to processes with the ViT
558
+ :return mask, the padding mask for each crop
559
+ :return pooled_patch_idx, for each patch_id tokens in `image_tokens`, the indices of the
560
+ patches in `crops` to pool for that token, masked with -1
561
+ """
562
+
563
+ if isinstance(base_image_input_size, int):
564
+ base_image_input_size = (base_image_input_size, base_image_input_size)
565
+
566
+ base_image_input_d = image_patch_size
567
+ pooling_w = image_pooling_w
568
+ pooling_h = image_pooling_h
569
+ patch_id = image_patch_token_id
570
+ col_id = image_col_token_id
571
+ start_id = image_start_token_id
572
+ end_id = image_end_token_id
573
+ crop_patch_w = base_image_input_size[1] // base_image_input_d
574
+ crop_patch_h = base_image_input_size[0] // base_image_input_d
575
+
576
+ if crop_mode == "resize":
577
+ resized, resized_mask, resize_idx = build_resized_image(
578
+ image,
579
+ resize_mode,
580
+ normalize_mode,
581
+ base_image_input_size,
582
+ pad_value,
583
+ image_patch_size
584
+ )
585
+ pooling_idx = arange_for_pooling(resize_idx, pooling_h, pooling_w)
586
+ h, w = pooling_idx.shape[:2]
587
+ pooling_idx = pooling_idx.reshape([-1, pooling_h*pooling_w])
588
+ per_row = np.full(
589
+ (w,),
590
+ patch_id,
591
+ dtype=np.int32
592
+ )
593
+ if use_col_tokens:
594
+ per_row = np.concatenate([per_row, [col_id]], 0)
595
+ extra_tokens = np.tile(per_row, [h])
596
+ joint = [
597
+ [start_id],
598
+ extra_tokens,
599
+ [end_id],
600
+ ]
601
+ return (
602
+ np.concatenate(joint, 0),
603
+ batch_pixels_to_patches(resized, image_patch_size),
604
+ batch_pixels_to_patches(resized_mask, image_patch_size).mean(-1),
605
+ pooling_idx,
606
+ )
607
+
608
+ if crop_mode in ["overlap-and-resize-c2", "overlap-and-resize"]:
609
+ crop_arr, mask_arr, patch_idx_arr = build_overlapping_crops(
610
+ image,
611
+ resize_mode,
612
+ normalize_mode,
613
+ max_crops,
614
+ overlap_margins,
615
+ base_image_input_size,
616
+ pad_value,
617
+ image_patch_size,
618
+ )
619
+ pooling_idx = arange_for_pooling(patch_idx_arr, pooling_h, pooling_w)
620
+ h, w = pooling_idx.shape[:2]
621
+ pooling_idx = pooling_idx.reshape([-1, pooling_h*pooling_w])
622
+
623
+ # Now build the output tokens
624
+ per_row = np.full(w, patch_id, dtype=np.int32)
625
+ if use_col_tokens:
626
+ per_row = np.concatenate([per_row, [col_id]], 0)
627
+ joint = np.tile(per_row, [h])
628
+ joint = [
629
+ [start_id],
630
+ joint,
631
+ [end_id]
632
+ ]
633
+
634
+ if crop_mode == "overlap-and-resize":
635
+ crop_arr = batch_pixels_to_patches(crop_arr, image_patch_size)
636
+ mask_arr = batch_pixels_to_patches(mask_arr, image_patch_size).astype(np.float32).mean(axis=-1)
637
+ return np.concatenate(joint, 0), crop_arr, mask_arr, pooling_idx
638
+
639
+ # Finally do the same for the global image
640
+ resized, resized_mask, resize_idx = build_resized_image(
641
+ image,
642
+ resize_mode,
643
+ normalize_mode,
644
+ base_image_input_size,
645
+ pad_value,
646
+ image_patch_size
647
+ )
648
+ crop_arr = np.concatenate([resized, crop_arr], 0)
649
+
650
+ mask_arr = np.concatenate([resized_mask, mask_arr], 0)
651
+
652
+ resize_idx = arange_for_pooling(resize_idx, pooling_h, pooling_w)
653
+ h, w = resize_idx.shape[:2]
654
+ resize_idx = resize_idx.reshape([-1, pooling_h*pooling_w])
655
+
656
+ # Global image goes first, so the order of patches in previous crops gets increased
657
+ pooling_idx = np.where(
658
+ pooling_idx >= 0,
659
+ pooling_idx + crop_patch_h*crop_patch_w,
660
+ -1
661
+ )
662
+ pooling_idx = np.concatenate([resize_idx, pooling_idx])
663
+
664
+ per_row = np.full(
665
+ (w,),
666
+ patch_id,
667
+ dtype=np.int32
668
+ )
669
+ if use_col_tokens:
670
+ per_row = np.concatenate([per_row, [col_id]], 0)
671
+ extra_tokens = np.tile(per_row, [h])
672
+ joint = [
673
+ [start_id],
674
+ extra_tokens,
675
+ [end_id],
676
+ ] + joint
677
+ mask_arr = batch_pixels_to_patches(mask_arr, image_patch_size).astype(np.float32).mean(axis=-1)
678
+ return (
679
+ np.concatenate(joint, 0),
680
+ batch_pixels_to_patches(crop_arr, image_patch_size),
681
+ mask_arr,
682
+ pooling_idx
683
+ )
684
+ else:
685
+ raise NotImplementedError(crop_mode)
686
+
687
+
688
+ class MolmoActImagesKwargs(ImagesKwargs, total=False):
689
+ crop_mode: Optional[str]
690
+ resize_mode: Optional[str]
691
+ normalize_mode: Optional[str]
692
+ max_crops: Optional[int]
693
+ max_multi_image_crops: Optional[int]
694
+ overlap_margins: Optional[List[int]]
695
+ base_image_input_size: Optional[List[int]]
696
+ pad_value: Optional[float]
697
+ image_patch_size: Optional[int]
698
+ image_pooling_w: Optional[int]
699
+ image_pooling_h: Optional[int]
700
+
701
+
702
+ class MolmoActImageProcessor(BaseImageProcessor):
703
+
704
+ model_input_names = ["images", "pooled_patches_idx", "image_masks"]
705
+
706
+ def __init__(
707
+ self,
708
+ crop_mode: str = "overlap-and-resize-c2",
709
+ resize_mode: str = "siglip",
710
+ normalize_mode: str = "siglip",
711
+ max_crops: int = 8,
712
+ max_multi_image_crops: int = 4,
713
+ overlap_margins: List[int] = [4, 4],
714
+ base_image_input_size: List[int] = (378, 378),
715
+ pad_value: float = 0.0,
716
+ image_patch_size: int = 14,
717
+ image_pooling_w: int = 2,
718
+ image_pooling_h: int = 2,
719
+ do_convert_rgb: bool = True,
720
+ do_pad: Optional[bool] = True,
721
+ **kwargs,
722
+ ) -> None:
723
+ super().__init__(**kwargs)
724
+ self.crop_mode = crop_mode
725
+ self.resize_mode = resize_mode
726
+ self.normalize_mode = normalize_mode
727
+ self.overlap_margins = overlap_margins
728
+ self.max_crops = max_crops
729
+ self.max_multi_image_crops = max_multi_image_crops
730
+ self.overlap_margins = overlap_margins
731
+ self.base_image_input_size = base_image_input_size
732
+ self.pad_value = pad_value
733
+ self.image_patch_size = image_patch_size
734
+ self.image_pooling_w = image_pooling_w
735
+ self.image_pooling_h = image_pooling_h
736
+ self.do_convert_rgb = do_convert_rgb
737
+ self.do_pad = do_pad
738
+
739
+ def to_channel_dimension_last(
740
+ self,
741
+ images: List[ImageInput],
742
+ ) -> List[ImageInput]:
743
+ """
744
+ Convert images to channel dimension last.
745
+ """
746
+ new_images = []
747
+ for image in images:
748
+ if is_multi_image(image):
749
+ new_images.append([to_channel_dimension_format(img, ChannelDimension.LAST) for img in image])
750
+ else:
751
+ new_images.append(to_channel_dimension_format(image, ChannelDimension.LAST))
752
+ return new_images
753
+
754
+ def to_numpy_array(
755
+ self,
756
+ images: List[ImageInput],
757
+ ) -> List[np.ndarray]:
758
+ """
759
+ Convert images to numpy array.
760
+ """
761
+ new_images = []
762
+ for image in images:
763
+ if is_multi_image(image):
764
+ new_images.append([to_numpy_array(img) for img in image])
765
+ else:
766
+ new_images.append(to_numpy_array(image))
767
+ return new_images
768
+
769
+ def to_rgb(
770
+ self,
771
+ images: List[ImageInput],
772
+ ) -> List[ImageInput]:
773
+ """
774
+ Convert images to RGB.
775
+ """
776
+ new_images = []
777
+ for image in images:
778
+ if is_multi_image(image):
779
+ new_images.append([convert_to_rgb(img) for img in image])
780
+ else:
781
+ new_images.append(convert_to_rgb(image))
782
+ return new_images
783
+
784
+ def pad_arrays(self, arrays: List[np.ndarray], pad_value: float = -1) -> np.ndarray:
785
+ max_len = max(arr.shape[0] for arr in arrays)
786
+ padded_arr = np.full(
787
+ [len(arrays), max_len] + list(arrays[0].shape[1:]), pad_value, dtype=arrays[0].dtype
788
+ )
789
+ for ix, arr in enumerate(arrays):
790
+ padded_arr[ix, :len(arr)] = arr[:max_len]
791
+ return padded_arr
792
+
793
+ def pad_for_batching(self, data: Dict[str, Any]) -> Dict[str, Any]:
794
+ """
795
+ Pad the data for batching.
796
+ """
797
+ images = self.pad_arrays(data["images"])
798
+ pooled_patches_idx = self.pad_arrays(data["pooled_patches_idx"])
799
+ image_masks = self.pad_arrays(data["image_masks"])
800
+ image_grids = self.pad_arrays(data["image_grids"])
801
+ new_data = dict(
802
+ images=images,
803
+ pooled_patches_idx=pooled_patches_idx,
804
+ image_masks=image_masks,
805
+ image_grids=image_grids,
806
+ )
807
+ return new_data
808
+
809
+ def preprocess(
810
+ self,
811
+ images: Union[ImageInput, List[ImageInput]],
812
+ crop_mode: Optional[str] = None,
813
+ resize_mode: Optional[str] = None,
814
+ normalize_mode: Optional[str] = None,
815
+ max_crops: Optional[int] = None,
816
+ max_multi_image_crops: Optional[int] = None,
817
+ overlap_margins: Optional[List[int]] = None,
818
+ base_image_input_size: Optional[List[int]] = None,
819
+ pad_value: Optional[float] = None,
820
+ image_patch_size: Optional[int] = None,
821
+ image_pooling_w: Optional[int] = None,
822
+ image_pooling_h: Optional[int] = None,
823
+ do_convert_rgb: Optional[bool] = None,
824
+ do_pad: Optional[bool] = None,
825
+ return_tensors: Optional[Union[str, TensorType]] = None,
826
+ **kwargs,
827
+ ) -> BatchFeature:
828
+ """
829
+ Preprocess an image for the model.
830
+ Args:
831
+ image: The image to preprocess.
832
+ crop_mode: The crop mode to use. If None, use the default crop mode.
833
+ resize_mode: The resize mode to use. If None, use the default resize mode.
834
+ normalize_mode: The normalization mode to use. If None, use the default normalization mode.
835
+ max_crops: The maximum number of crops to use. If None, use the default value.
836
+ max_multi_image_crops: The maximum number of crops to use for multi-image inputs.
837
+ overlap_margins: The overlap margins to use. If None, use the default values.
838
+ base_image_input_size: The base image input size to use. If None, use the default size.
839
+ pad_value: The padding value to use. If None, use the default value.
840
+ image_patch_size: The size of the image patches. If None, use the default size.
841
+ image_pooling_h: The height of the image pooling. If None, use the default height.
842
+ image_pooling_w: The width of the image pooling. If None, use the default width.
843
+ do_convert_rgb: Whether to convert the image to RGB. If None, use the default value.
844
+ do_pad: Whether to pad image features. If None, use the default value.
845
+
846
+ Returns:
847
+ A tuple containing:
848
+ - The image grids
849
+ - The preprocessed images
850
+ - The padding masks
851
+ - The pooling indices
852
+ """
853
+ images = make_batched_images(images)
854
+
855
+ if not valid_images(images):
856
+ raise ValueError("Invalid image input")
857
+
858
+ crop_mode = crop_mode or self.crop_mode
859
+ normalize_mode = normalize_mode or self.normalize_mode
860
+ resize_mode = resize_mode or self.resize_mode
861
+ max_crops = max_crops or self.max_crops
862
+ max_multi_image_crops = max_multi_image_crops or self.max_multi_image_crops
863
+ overlap_margins = overlap_margins or self.overlap_margins
864
+ base_image_input_size = base_image_input_size or self.base_image_input_size
865
+ pad_value = pad_value or self.pad_value
866
+ image_patch_size = image_patch_size or self.image_patch_size
867
+ image_pooling_w = image_pooling_w or self.image_pooling_w
868
+ image_pooling_h = image_pooling_h or self.image_pooling_h
869
+ do_convert_rgb = do_convert_rgb or self.do_convert_rgb
870
+ do_pad = do_pad or self.do_pad
871
+
872
+ if do_convert_rgb:
873
+ images = self.to_rgb(images)
874
+
875
+ # All transformations expect numpy arrays.
876
+ images = self.to_numpy_array(images)
877
+
878
+ # All transformations expect channel dimension last.
879
+ images = self.to_channel_dimension_last(images)
880
+
881
+ batch_image_grids = []
882
+ batch_crops = []
883
+ batch_crop_masks = []
884
+ batch_pooled_patches_idx = []
885
+
886
+ for image in images:
887
+ if is_multi_image(image):
888
+ all_image_grids = []
889
+ all_crops = []
890
+ all_crop_masks = []
891
+ pooled_patches_idx = []
892
+ for img in image:
893
+ image_grid, crops, img_mask, pooled_idx = image_to_patches_and_grids(
894
+ img,
895
+ crop_mode,
896
+ resize_mode,
897
+ normalize_mode,
898
+ max_multi_image_crops,
899
+ overlap_margins,
900
+ base_image_input_size,
901
+ pad_value,
902
+ image_patch_size,
903
+ image_pooling_w,
904
+ image_pooling_h,
905
+ )
906
+ pooled_patches_idx.append(pooled_idx + sum(np.prod(x.shape[:2]) for x in all_crops))
907
+ all_crops.append(crops)
908
+ all_crop_masks.append(img_mask)
909
+ all_image_grids.append(image_grid)
910
+ all_image_grids = np.concatenate(all_image_grids, 0)
911
+ all_crops = np.concatenate(all_crops, 0)
912
+ all_crop_masks = np.concatenate(all_crop_masks, 0)
913
+ pooled_patches_idx = np.concatenate(pooled_patches_idx, 0)
914
+
915
+ batch_image_grids.append(all_image_grids)
916
+ batch_crops.append(all_crops)
917
+ batch_crop_masks.append(all_crop_masks)
918
+ batch_pooled_patches_idx.append(pooled_patches_idx)
919
+ else:
920
+ image_grid, crops, img_mask, pooled_idx = image_to_patches_and_grids(
921
+ image,
922
+ crop_mode,
923
+ resize_mode,
924
+ normalize_mode,
925
+ max_crops,
926
+ overlap_margins,
927
+ base_image_input_size,
928
+ pad_value,
929
+ image_patch_size,
930
+ image_pooling_w,
931
+ image_pooling_h,
932
+ )
933
+ batch_image_grids.append(image_grid)
934
+ batch_crops.append(crops)
935
+ batch_crop_masks.append(img_mask)
936
+ batch_pooled_patches_idx.append(pooled_idx)
937
+
938
+ data =dict(
939
+ images=batch_crops,
940
+ pooled_patches_idx=batch_pooled_patches_idx,
941
+ image_masks=batch_crop_masks,
942
+ image_grids=batch_image_grids,
943
+ )
944
+
945
+ if do_pad:
946
+ data = self.pad_for_batching(data)
947
+
948
+ return BatchFeature(data, tensor_type=return_tensors)
949
+
950
+
951
+ MolmoActImageProcessor.register_for_auto_class()
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cc5346067dc12baef9243481d577b9890ccdf74335d6c054af41cee777f5dad
3
+ size 4978520816
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5f5835368c732c52f68845d059d5ceb8010a550160a43a908f0b2026fd9bb07
3
+ size 4778633920
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad1e2e9197dd5dc3b437942e241f64199c030d1f12f393a3c8ec1f501b1bada
3
+ size 4661160168
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3854563f2bd5f13f1248ab93693caeac99d9ea49caa293e02ecf1762336c8b19
3
+ size 4661160192
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b06c258bc49dcf238bdca891e1326369734c5f06f7abb0db5ecd2a5d8755299
3
+ size 4661160192
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:415f8d4c4271fbe72897abb6871df291c1ed428daa3924144120c9f4fc526adb
3
+ size 4997750712
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c7a360fee0894f583a750cb6689d1fc063f3d58f32176bf04c24398af47379
3
+ size 3739371680
model.safetensors.index.json ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 32477671232
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00007-of-00007.safetensors",
7
+ "model.transformer.blocks.0.attn_norm.weight": "model-00001-of-00007.safetensors",
8
+ "model.transformer.blocks.0.ff_norm.weight": "model-00001-of-00007.safetensors",
9
+ "model.transformer.blocks.0.mlp.ff_out.weight": "model-00001-of-00007.safetensors",
10
+ "model.transformer.blocks.0.mlp.ff_proj.weight": "model-00001-of-00007.safetensors",
11
+ "model.transformer.blocks.0.self_attn.att_proj.bias": "model-00001-of-00007.safetensors",
12
+ "model.transformer.blocks.0.self_attn.att_proj.weight": "model-00001-of-00007.safetensors",
13
+ "model.transformer.blocks.0.self_attn.attn_out.weight": "model-00001-of-00007.safetensors",
14
+ "model.transformer.blocks.1.attn_norm.weight": "model-00001-of-00007.safetensors",
15
+ "model.transformer.blocks.1.ff_norm.weight": "model-00001-of-00007.safetensors",
16
+ "model.transformer.blocks.1.mlp.ff_out.weight": "model-00001-of-00007.safetensors",
17
+ "model.transformer.blocks.1.mlp.ff_proj.weight": "model-00001-of-00007.safetensors",
18
+ "model.transformer.blocks.1.self_attn.att_proj.bias": "model-00001-of-00007.safetensors",
19
+ "model.transformer.blocks.1.self_attn.att_proj.weight": "model-00001-of-00007.safetensors",
20
+ "model.transformer.blocks.1.self_attn.attn_out.weight": "model-00001-of-00007.safetensors",
21
+ "model.transformer.blocks.10.attn_norm.weight": "model-00003-of-00007.safetensors",
22
+ "model.transformer.blocks.10.ff_norm.weight": "model-00003-of-00007.safetensors",
23
+ "model.transformer.blocks.10.mlp.ff_out.weight": "model-00003-of-00007.safetensors",
24
+ "model.transformer.blocks.10.mlp.ff_proj.weight": "model-00003-of-00007.safetensors",
25
+ "model.transformer.blocks.10.self_attn.att_proj.bias": "model-00003-of-00007.safetensors",
26
+ "model.transformer.blocks.10.self_attn.att_proj.weight": "model-00003-of-00007.safetensors",
27
+ "model.transformer.blocks.10.self_attn.attn_out.weight": "model-00003-of-00007.safetensors",
28
+ "model.transformer.blocks.11.attn_norm.weight": "model-00003-of-00007.safetensors",
29
+ "model.transformer.blocks.11.ff_norm.weight": "model-00003-of-00007.safetensors",
30
+ "model.transformer.blocks.11.mlp.ff_out.weight": "model-00003-of-00007.safetensors",
31
+ "model.transformer.blocks.11.mlp.ff_proj.weight": "model-00003-of-00007.safetensors",
32
+ "model.transformer.blocks.11.self_attn.att_proj.bias": "model-00003-of-00007.safetensors",
33
+ "model.transformer.blocks.11.self_attn.att_proj.weight": "model-00003-of-00007.safetensors",
34
+ "model.transformer.blocks.11.self_attn.attn_out.weight": "model-00003-of-00007.safetensors",
35
+ "model.transformer.blocks.12.attn_norm.weight": "model-00003-of-00007.safetensors",
36
+ "model.transformer.blocks.12.ff_norm.weight": "model-00003-of-00007.safetensors",
37
+ "model.transformer.blocks.12.mlp.ff_out.weight": "model-00003-of-00007.safetensors",
38
+ "model.transformer.blocks.12.mlp.ff_proj.weight": "model-00003-of-00007.safetensors",
39
+ "model.transformer.blocks.12.self_attn.att_proj.bias": "model-00003-of-00007.safetensors",
40
+ "model.transformer.blocks.12.self_attn.att_proj.weight": "model-00003-of-00007.safetensors",
41
+ "model.transformer.blocks.12.self_attn.attn_out.weight": "model-00003-of-00007.safetensors",
42
+ "model.transformer.blocks.13.attn_norm.weight": "model-00003-of-00007.safetensors",
43
+ "model.transformer.blocks.13.ff_norm.weight": "model-00004-of-00007.safetensors",
44
+ "model.transformer.blocks.13.mlp.ff_out.weight": "model-00004-of-00007.safetensors",
45
+ "model.transformer.blocks.13.mlp.ff_proj.weight": "model-00004-of-00007.safetensors",
46
+ "model.transformer.blocks.13.self_attn.att_proj.bias": "model-00003-of-00007.safetensors",
47
+ "model.transformer.blocks.13.self_attn.att_proj.weight": "model-00003-of-00007.safetensors",
48
+ "model.transformer.blocks.13.self_attn.attn_out.weight": "model-00003-of-00007.safetensors",
49
+ "model.transformer.blocks.14.attn_norm.weight": "model-00004-of-00007.safetensors",
50
+ "model.transformer.blocks.14.ff_norm.weight": "model-00004-of-00007.safetensors",
51
+ "model.transformer.blocks.14.mlp.ff_out.weight": "model-00004-of-00007.safetensors",
52
+ "model.transformer.blocks.14.mlp.ff_proj.weight": "model-00004-of-00007.safetensors",
53
+ "model.transformer.blocks.14.self_attn.att_proj.bias": "model-00004-of-00007.safetensors",
54
+ "model.transformer.blocks.14.self_attn.att_proj.weight": "model-00004-of-00007.safetensors",
55
+ "model.transformer.blocks.14.self_attn.attn_out.weight": "model-00004-of-00007.safetensors",
56
+ "model.transformer.blocks.15.attn_norm.weight": "model-00004-of-00007.safetensors",
57
+ "model.transformer.blocks.15.ff_norm.weight": "model-00004-of-00007.safetensors",
58
+ "model.transformer.blocks.15.mlp.ff_out.weight": "model-00004-of-00007.safetensors",
59
+ "model.transformer.blocks.15.mlp.ff_proj.weight": "model-00004-of-00007.safetensors",
60
+ "model.transformer.blocks.15.self_attn.att_proj.bias": "model-00004-of-00007.safetensors",
61
+ "model.transformer.blocks.15.self_attn.att_proj.weight": "model-00004-of-00007.safetensors",
62
+ "model.transformer.blocks.15.self_attn.attn_out.weight": "model-00004-of-00007.safetensors",
63
+ "model.transformer.blocks.16.attn_norm.weight": "model-00004-of-00007.safetensors",
64
+ "model.transformer.blocks.16.ff_norm.weight": "model-00004-of-00007.safetensors",
65
+ "model.transformer.blocks.16.mlp.ff_out.weight": "model-00004-of-00007.safetensors",
66
+ "model.transformer.blocks.16.mlp.ff_proj.weight": "model-00004-of-00007.safetensors",
67
+ "model.transformer.blocks.16.self_attn.att_proj.bias": "model-00004-of-00007.safetensors",
68
+ "model.transformer.blocks.16.self_attn.att_proj.weight": "model-00004-of-00007.safetensors",
69
+ "model.transformer.blocks.16.self_attn.attn_out.weight": "model-00004-of-00007.safetensors",
70
+ "model.transformer.blocks.17.attn_norm.weight": "model-00004-of-00007.safetensors",
71
+ "model.transformer.blocks.17.ff_norm.weight": "model-00004-of-00007.safetensors",
72
+ "model.transformer.blocks.17.mlp.ff_out.weight": "model-00004-of-00007.safetensors",
73
+ "model.transformer.blocks.17.mlp.ff_proj.weight": "model-00004-of-00007.safetensors",
74
+ "model.transformer.blocks.17.self_attn.att_proj.bias": "model-00004-of-00007.safetensors",
75
+ "model.transformer.blocks.17.self_attn.att_proj.weight": "model-00004-of-00007.safetensors",
76
+ "model.transformer.blocks.17.self_attn.attn_out.weight": "model-00004-of-00007.safetensors",
77
+ "model.transformer.blocks.18.attn_norm.weight": "model-00004-of-00007.safetensors",
78
+ "model.transformer.blocks.18.ff_norm.weight": "model-00005-of-00007.safetensors",
79
+ "model.transformer.blocks.18.mlp.ff_out.weight": "model-00005-of-00007.safetensors",
80
+ "model.transformer.blocks.18.mlp.ff_proj.weight": "model-00005-of-00007.safetensors",
81
+ "model.transformer.blocks.18.self_attn.att_proj.bias": "model-00004-of-00007.safetensors",
82
+ "model.transformer.blocks.18.self_attn.att_proj.weight": "model-00004-of-00007.safetensors",
83
+ "model.transformer.blocks.18.self_attn.attn_out.weight": "model-00004-of-00007.safetensors",
84
+ "model.transformer.blocks.19.attn_norm.weight": "model-00005-of-00007.safetensors",
85
+ "model.transformer.blocks.19.ff_norm.weight": "model-00005-of-00007.safetensors",
86
+ "model.transformer.blocks.19.mlp.ff_out.weight": "model-00005-of-00007.safetensors",
87
+ "model.transformer.blocks.19.mlp.ff_proj.weight": "model-00005-of-00007.safetensors",
88
+ "model.transformer.blocks.19.self_attn.att_proj.bias": "model-00005-of-00007.safetensors",
89
+ "model.transformer.blocks.19.self_attn.att_proj.weight": "model-00005-of-00007.safetensors",
90
+ "model.transformer.blocks.19.self_attn.attn_out.weight": "model-00005-of-00007.safetensors",
91
+ "model.transformer.blocks.2.attn_norm.weight": "model-00001-of-00007.safetensors",
92
+ "model.transformer.blocks.2.ff_norm.weight": "model-00001-of-00007.safetensors",
93
+ "model.transformer.blocks.2.mlp.ff_out.weight": "model-00001-of-00007.safetensors",
94
+ "model.transformer.blocks.2.mlp.ff_proj.weight": "model-00001-of-00007.safetensors",
95
+ "model.transformer.blocks.2.self_attn.att_proj.bias": "model-00001-of-00007.safetensors",
96
+ "model.transformer.blocks.2.self_attn.att_proj.weight": "model-00001-of-00007.safetensors",
97
+ "model.transformer.blocks.2.self_attn.attn_out.weight": "model-00001-of-00007.safetensors",
98
+ "model.transformer.blocks.20.attn_norm.weight": "model-00005-of-00007.safetensors",
99
+ "model.transformer.blocks.20.ff_norm.weight": "model-00005-of-00007.safetensors",
100
+ "model.transformer.blocks.20.mlp.ff_out.weight": "model-00005-of-00007.safetensors",
101
+ "model.transformer.blocks.20.mlp.ff_proj.weight": "model-00005-of-00007.safetensors",
102
+ "model.transformer.blocks.20.self_attn.att_proj.bias": "model-00005-of-00007.safetensors",
103
+ "model.transformer.blocks.20.self_attn.att_proj.weight": "model-00005-of-00007.safetensors",
104
+ "model.transformer.blocks.20.self_attn.attn_out.weight": "model-00005-of-00007.safetensors",
105
+ "model.transformer.blocks.21.attn_norm.weight": "model-00005-of-00007.safetensors",
106
+ "model.transformer.blocks.21.ff_norm.weight": "model-00005-of-00007.safetensors",
107
+ "model.transformer.blocks.21.mlp.ff_out.weight": "model-00005-of-00007.safetensors",
108
+ "model.transformer.blocks.21.mlp.ff_proj.weight": "model-00005-of-00007.safetensors",
109
+ "model.transformer.blocks.21.self_attn.att_proj.bias": "model-00005-of-00007.safetensors",
110
+ "model.transformer.blocks.21.self_attn.att_proj.weight": "model-00005-of-00007.safetensors",
111
+ "model.transformer.blocks.21.self_attn.attn_out.weight": "model-00005-of-00007.safetensors",
112
+ "model.transformer.blocks.22.attn_norm.weight": "model-00005-of-00007.safetensors",
113
+ "model.transformer.blocks.22.ff_norm.weight": "model-00005-of-00007.safetensors",
114
+ "model.transformer.blocks.22.mlp.ff_out.weight": "model-00005-of-00007.safetensors",
115
+ "model.transformer.blocks.22.mlp.ff_proj.weight": "model-00005-of-00007.safetensors",
116
+ "model.transformer.blocks.22.self_attn.att_proj.bias": "model-00005-of-00007.safetensors",
117
+ "model.transformer.blocks.22.self_attn.att_proj.weight": "model-00005-of-00007.safetensors",
118
+ "model.transformer.blocks.22.self_attn.attn_out.weight": "model-00005-of-00007.safetensors",
119
+ "model.transformer.blocks.23.attn_norm.weight": "model-00005-of-00007.safetensors",
120
+ "model.transformer.blocks.23.ff_norm.weight": "model-00006-of-00007.safetensors",
121
+ "model.transformer.blocks.23.mlp.ff_out.weight": "model-00006-of-00007.safetensors",
122
+ "model.transformer.blocks.23.mlp.ff_proj.weight": "model-00006-of-00007.safetensors",
123
+ "model.transformer.blocks.23.self_attn.att_proj.bias": "model-00005-of-00007.safetensors",
124
+ "model.transformer.blocks.23.self_attn.att_proj.weight": "model-00005-of-00007.safetensors",
125
+ "model.transformer.blocks.23.self_attn.attn_out.weight": "model-00005-of-00007.safetensors",
126
+ "model.transformer.blocks.24.attn_norm.weight": "model-00006-of-00007.safetensors",
127
+ "model.transformer.blocks.24.ff_norm.weight": "model-00006-of-00007.safetensors",
128
+ "model.transformer.blocks.24.mlp.ff_out.weight": "model-00006-of-00007.safetensors",
129
+ "model.transformer.blocks.24.mlp.ff_proj.weight": "model-00006-of-00007.safetensors",
130
+ "model.transformer.blocks.24.self_attn.att_proj.bias": "model-00006-of-00007.safetensors",
131
+ "model.transformer.blocks.24.self_attn.att_proj.weight": "model-00006-of-00007.safetensors",
132
+ "model.transformer.blocks.24.self_attn.attn_out.weight": "model-00006-of-00007.safetensors",
133
+ "model.transformer.blocks.25.attn_norm.weight": "model-00006-of-00007.safetensors",
134
+ "model.transformer.blocks.25.ff_norm.weight": "model-00006-of-00007.safetensors",
135
+ "model.transformer.blocks.25.mlp.ff_out.weight": "model-00006-of-00007.safetensors",
136
+ "model.transformer.blocks.25.mlp.ff_proj.weight": "model-00006-of-00007.safetensors",
137
+ "model.transformer.blocks.25.self_attn.att_proj.bias": "model-00006-of-00007.safetensors",
138
+ "model.transformer.blocks.25.self_attn.att_proj.weight": "model-00006-of-00007.safetensors",
139
+ "model.transformer.blocks.25.self_attn.attn_out.weight": "model-00006-of-00007.safetensors",
140
+ "model.transformer.blocks.26.attn_norm.weight": "model-00006-of-00007.safetensors",
141
+ "model.transformer.blocks.26.ff_norm.weight": "model-00006-of-00007.safetensors",
142
+ "model.transformer.blocks.26.mlp.ff_out.weight": "model-00006-of-00007.safetensors",
143
+ "model.transformer.blocks.26.mlp.ff_proj.weight": "model-00006-of-00007.safetensors",
144
+ "model.transformer.blocks.26.self_attn.att_proj.bias": "model-00006-of-00007.safetensors",
145
+ "model.transformer.blocks.26.self_attn.att_proj.weight": "model-00006-of-00007.safetensors",
146
+ "model.transformer.blocks.26.self_attn.attn_out.weight": "model-00006-of-00007.safetensors",
147
+ "model.transformer.blocks.27.attn_norm.weight": "model-00006-of-00007.safetensors",
148
+ "model.transformer.blocks.27.ff_norm.weight": "model-00006-of-00007.safetensors",
149
+ "model.transformer.blocks.27.mlp.ff_out.weight": "model-00006-of-00007.safetensors",
150
+ "model.transformer.blocks.27.mlp.ff_proj.weight": "model-00006-of-00007.safetensors",
151
+ "model.transformer.blocks.27.self_attn.att_proj.bias": "model-00006-of-00007.safetensors",
152
+ "model.transformer.blocks.27.self_attn.att_proj.weight": "model-00006-of-00007.safetensors",
153
+ "model.transformer.blocks.27.self_attn.attn_out.weight": "model-00006-of-00007.safetensors",
154
+ "model.transformer.blocks.3.attn_norm.weight": "model-00002-of-00007.safetensors",
155
+ "model.transformer.blocks.3.ff_norm.weight": "model-00002-of-00007.safetensors",
156
+ "model.transformer.blocks.3.mlp.ff_out.weight": "model-00002-of-00007.safetensors",
157
+ "model.transformer.blocks.3.mlp.ff_proj.weight": "model-00002-of-00007.safetensors",
158
+ "model.transformer.blocks.3.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
159
+ "model.transformer.blocks.3.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
160
+ "model.transformer.blocks.3.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
161
+ "model.transformer.blocks.4.attn_norm.weight": "model-00002-of-00007.safetensors",
162
+ "model.transformer.blocks.4.ff_norm.weight": "model-00002-of-00007.safetensors",
163
+ "model.transformer.blocks.4.mlp.ff_out.weight": "model-00002-of-00007.safetensors",
164
+ "model.transformer.blocks.4.mlp.ff_proj.weight": "model-00002-of-00007.safetensors",
165
+ "model.transformer.blocks.4.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
166
+ "model.transformer.blocks.4.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
167
+ "model.transformer.blocks.4.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
168
+ "model.transformer.blocks.5.attn_norm.weight": "model-00002-of-00007.safetensors",
169
+ "model.transformer.blocks.5.ff_norm.weight": "model-00002-of-00007.safetensors",
170
+ "model.transformer.blocks.5.mlp.ff_out.weight": "model-00002-of-00007.safetensors",
171
+ "model.transformer.blocks.5.mlp.ff_proj.weight": "model-00002-of-00007.safetensors",
172
+ "model.transformer.blocks.5.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
173
+ "model.transformer.blocks.5.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
174
+ "model.transformer.blocks.5.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
175
+ "model.transformer.blocks.6.attn_norm.weight": "model-00002-of-00007.safetensors",
176
+ "model.transformer.blocks.6.ff_norm.weight": "model-00002-of-00007.safetensors",
177
+ "model.transformer.blocks.6.mlp.ff_out.weight": "model-00002-of-00007.safetensors",
178
+ "model.transformer.blocks.6.mlp.ff_proj.weight": "model-00002-of-00007.safetensors",
179
+ "model.transformer.blocks.6.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
180
+ "model.transformer.blocks.6.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
181
+ "model.transformer.blocks.6.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
182
+ "model.transformer.blocks.7.attn_norm.weight": "model-00002-of-00007.safetensors",
183
+ "model.transformer.blocks.7.ff_norm.weight": "model-00002-of-00007.safetensors",
184
+ "model.transformer.blocks.7.mlp.ff_out.weight": "model-00002-of-00007.safetensors",
185
+ "model.transformer.blocks.7.mlp.ff_proj.weight": "model-00002-of-00007.safetensors",
186
+ "model.transformer.blocks.7.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
187
+ "model.transformer.blocks.7.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
188
+ "model.transformer.blocks.7.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
189
+ "model.transformer.blocks.8.attn_norm.weight": "model-00002-of-00007.safetensors",
190
+ "model.transformer.blocks.8.ff_norm.weight": "model-00003-of-00007.safetensors",
191
+ "model.transformer.blocks.8.mlp.ff_out.weight": "model-00003-of-00007.safetensors",
192
+ "model.transformer.blocks.8.mlp.ff_proj.weight": "model-00003-of-00007.safetensors",
193
+ "model.transformer.blocks.8.self_attn.att_proj.bias": "model-00002-of-00007.safetensors",
194
+ "model.transformer.blocks.8.self_attn.att_proj.weight": "model-00002-of-00007.safetensors",
195
+ "model.transformer.blocks.8.self_attn.attn_out.weight": "model-00002-of-00007.safetensors",
196
+ "model.transformer.blocks.9.attn_norm.weight": "model-00003-of-00007.safetensors",
197
+ "model.transformer.blocks.9.ff_norm.weight": "model-00003-of-00007.safetensors",
198
+ "model.transformer.blocks.9.mlp.ff_out.weight": "model-00003-of-00007.safetensors",
199
+ "model.transformer.blocks.9.mlp.ff_proj.weight": "model-00003-of-00007.safetensors",
200
+ "model.transformer.blocks.9.self_attn.att_proj.bias": "model-00003-of-00007.safetensors",
201
+ "model.transformer.blocks.9.self_attn.att_proj.weight": "model-00003-of-00007.safetensors",
202
+ "model.transformer.blocks.9.self_attn.attn_out.weight": "model-00003-of-00007.safetensors",
203
+ "model.transformer.ln_f.weight": "model-00006-of-00007.safetensors",
204
+ "model.transformer.wte.embedding": "model-00001-of-00007.safetensors",
205
+ "model.transformer.wte.new_embedding": "model-00001-of-00007.safetensors",
206
+ "model.vision_backbone.image_pooling_2d.wk.bias": "model-00007-of-00007.safetensors",
207
+ "model.vision_backbone.image_pooling_2d.wk.weight": "model-00007-of-00007.safetensors",
208
+ "model.vision_backbone.image_pooling_2d.wo.bias": "model-00007-of-00007.safetensors",
209
+ "model.vision_backbone.image_pooling_2d.wo.weight": "model-00007-of-00007.safetensors",
210
+ "model.vision_backbone.image_pooling_2d.wq.bias": "model-00007-of-00007.safetensors",
211
+ "model.vision_backbone.image_pooling_2d.wq.weight": "model-00007-of-00007.safetensors",
212
+ "model.vision_backbone.image_pooling_2d.wv.bias": "model-00007-of-00007.safetensors",
213
+ "model.vision_backbone.image_pooling_2d.wv.weight": "model-00007-of-00007.safetensors",
214
+ "model.vision_backbone.image_projector.w1.weight": "model-00007-of-00007.safetensors",
215
+ "model.vision_backbone.image_projector.w2.weight": "model-00007-of-00007.safetensors",
216
+ "model.vision_backbone.image_projector.w3.weight": "model-00007-of-00007.safetensors",
217
+ "model.vision_backbone.image_vit.patch_embedding.bias": "model-00006-of-00007.safetensors",
218
+ "model.vision_backbone.image_vit.patch_embedding.weight": "model-00006-of-00007.safetensors",
219
+ "model.vision_backbone.image_vit.positional_embedding": "model-00006-of-00007.safetensors",
220
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.bias": "model-00006-of-00007.safetensors",
221
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.weight": "model-00006-of-00007.safetensors",
222
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.bias": "model-00006-of-00007.safetensors",
223
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.weight": "model-00006-of-00007.safetensors",
224
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.bias": "model-00006-of-00007.safetensors",
225
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.weight": "model-00006-of-00007.safetensors",
226
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.bias": "model-00006-of-00007.safetensors",
227
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.weight": "model-00006-of-00007.safetensors",
228
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.bias": "model-00006-of-00007.safetensors",
229
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.weight": "model-00006-of-00007.safetensors",
230
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
231
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
232
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
233
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
234
+ "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.bias": "model-00006-of-00007.safetensors",
235
+ "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.weight": "model-00006-of-00007.safetensors",
236
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.bias": "model-00006-of-00007.safetensors",
237
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.weight": "model-00006-of-00007.safetensors",
238
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.bias": "model-00006-of-00007.safetensors",
239
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.weight": "model-00006-of-00007.safetensors",
240
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.bias": "model-00006-of-00007.safetensors",
241
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.weight": "model-00006-of-00007.safetensors",
242
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.bias": "model-00006-of-00007.safetensors",
243
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.weight": "model-00006-of-00007.safetensors",
244
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.bias": "model-00006-of-00007.safetensors",
245
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.weight": "model-00006-of-00007.safetensors",
246
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
247
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
248
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
249
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
250
+ "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.bias": "model-00006-of-00007.safetensors",
251
+ "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.weight": "model-00006-of-00007.safetensors",
252
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.bias": "model-00007-of-00007.safetensors",
253
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.weight": "model-00007-of-00007.safetensors",
254
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.bias": "model-00007-of-00007.safetensors",
255
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.weight": "model-00007-of-00007.safetensors",
256
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.bias": "model-00007-of-00007.safetensors",
257
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.weight": "model-00007-of-00007.safetensors",
258
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.bias": "model-00007-of-00007.safetensors",
259
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.weight": "model-00007-of-00007.safetensors",
260
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.bias": "model-00007-of-00007.safetensors",
261
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.weight": "model-00007-of-00007.safetensors",
262
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
263
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
264
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
265
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
266
+ "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.bias": "model-00007-of-00007.safetensors",
267
+ "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.weight": "model-00007-of-00007.safetensors",
268
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.bias": "model-00007-of-00007.safetensors",
269
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.weight": "model-00007-of-00007.safetensors",
270
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.bias": "model-00007-of-00007.safetensors",
271
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.weight": "model-00007-of-00007.safetensors",
272
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.bias": "model-00007-of-00007.safetensors",
273
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.weight": "model-00007-of-00007.safetensors",
274
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.bias": "model-00007-of-00007.safetensors",
275
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.weight": "model-00007-of-00007.safetensors",
276
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.bias": "model-00007-of-00007.safetensors",
277
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.weight": "model-00007-of-00007.safetensors",
278
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
279
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
280
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
281
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
282
+ "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.bias": "model-00007-of-00007.safetensors",
283
+ "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.weight": "model-00007-of-00007.safetensors",
284
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.bias": "model-00007-of-00007.safetensors",
285
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.weight": "model-00007-of-00007.safetensors",
286
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.bias": "model-00007-of-00007.safetensors",
287
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.weight": "model-00007-of-00007.safetensors",
288
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.bias": "model-00007-of-00007.safetensors",
289
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.weight": "model-00007-of-00007.safetensors",
290
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.bias": "model-00007-of-00007.safetensors",
291
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.weight": "model-00007-of-00007.safetensors",
292
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.bias": "model-00007-of-00007.safetensors",
293
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.weight": "model-00007-of-00007.safetensors",
294
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
295
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
296
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
297
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
298
+ "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.bias": "model-00007-of-00007.safetensors",
299
+ "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.weight": "model-00007-of-00007.safetensors",
300
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.bias": "model-00007-of-00007.safetensors",
301
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.weight": "model-00007-of-00007.safetensors",
302
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.bias": "model-00007-of-00007.safetensors",
303
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.weight": "model-00007-of-00007.safetensors",
304
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.bias": "model-00007-of-00007.safetensors",
305
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.weight": "model-00007-of-00007.safetensors",
306
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.bias": "model-00007-of-00007.safetensors",
307
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.weight": "model-00007-of-00007.safetensors",
308
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.bias": "model-00007-of-00007.safetensors",
309
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.weight": "model-00007-of-00007.safetensors",
310
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
311
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
312
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
313
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
314
+ "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.bias": "model-00007-of-00007.safetensors",
315
+ "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.weight": "model-00007-of-00007.safetensors",
316
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.bias": "model-00007-of-00007.safetensors",
317
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.weight": "model-00007-of-00007.safetensors",
318
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.bias": "model-00007-of-00007.safetensors",
319
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.weight": "model-00007-of-00007.safetensors",
320
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.bias": "model-00007-of-00007.safetensors",
321
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.weight": "model-00007-of-00007.safetensors",
322
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.bias": "model-00007-of-00007.safetensors",
323
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.weight": "model-00007-of-00007.safetensors",
324
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.bias": "model-00007-of-00007.safetensors",
325
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.weight": "model-00007-of-00007.safetensors",
326
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
327
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
328
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
329
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
330
+ "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.bias": "model-00007-of-00007.safetensors",
331
+ "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.weight": "model-00007-of-00007.safetensors",
332
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.bias": "model-00007-of-00007.safetensors",
333
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.weight": "model-00007-of-00007.safetensors",
334
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.bias": "model-00007-of-00007.safetensors",
335
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.weight": "model-00007-of-00007.safetensors",
336
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.bias": "model-00007-of-00007.safetensors",
337
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.weight": "model-00007-of-00007.safetensors",
338
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.bias": "model-00007-of-00007.safetensors",
339
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.weight": "model-00007-of-00007.safetensors",
340
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.bias": "model-00007-of-00007.safetensors",
341
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.weight": "model-00007-of-00007.safetensors",
342
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
343
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
344
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
345
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
346
+ "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.bias": "model-00007-of-00007.safetensors",
347
+ "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.weight": "model-00007-of-00007.safetensors",
348
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.bias": "model-00007-of-00007.safetensors",
349
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.weight": "model-00007-of-00007.safetensors",
350
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.bias": "model-00007-of-00007.safetensors",
351
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.weight": "model-00007-of-00007.safetensors",
352
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.bias": "model-00007-of-00007.safetensors",
353
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.weight": "model-00007-of-00007.safetensors",
354
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.bias": "model-00007-of-00007.safetensors",
355
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.weight": "model-00007-of-00007.safetensors",
356
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.bias": "model-00007-of-00007.safetensors",
357
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.weight": "model-00007-of-00007.safetensors",
358
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
359
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
360
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
361
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
362
+ "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.bias": "model-00007-of-00007.safetensors",
363
+ "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.weight": "model-00007-of-00007.safetensors",
364
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.bias": "model-00007-of-00007.safetensors",
365
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.weight": "model-00007-of-00007.safetensors",
366
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.bias": "model-00007-of-00007.safetensors",
367
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.weight": "model-00007-of-00007.safetensors",
368
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.bias": "model-00007-of-00007.safetensors",
369
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.weight": "model-00007-of-00007.safetensors",
370
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.bias": "model-00007-of-00007.safetensors",
371
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.weight": "model-00007-of-00007.safetensors",
372
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.bias": "model-00007-of-00007.safetensors",
373
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.weight": "model-00007-of-00007.safetensors",
374
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
375
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
376
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
377
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
378
+ "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.bias": "model-00007-of-00007.safetensors",
379
+ "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.weight": "model-00007-of-00007.safetensors",
380
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.bias": "model-00007-of-00007.safetensors",
381
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.weight": "model-00007-of-00007.safetensors",
382
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.bias": "model-00007-of-00007.safetensors",
383
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.weight": "model-00007-of-00007.safetensors",
384
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.bias": "model-00007-of-00007.safetensors",
385
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.weight": "model-00007-of-00007.safetensors",
386
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.bias": "model-00007-of-00007.safetensors",
387
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.weight": "model-00007-of-00007.safetensors",
388
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.bias": "model-00007-of-00007.safetensors",
389
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.weight": "model-00007-of-00007.safetensors",
390
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
391
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
392
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
393
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
394
+ "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.bias": "model-00007-of-00007.safetensors",
395
+ "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.weight": "model-00007-of-00007.safetensors",
396
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.bias": "model-00007-of-00007.safetensors",
397
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.weight": "model-00007-of-00007.safetensors",
398
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.bias": "model-00007-of-00007.safetensors",
399
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.weight": "model-00007-of-00007.safetensors",
400
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.bias": "model-00007-of-00007.safetensors",
401
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.weight": "model-00007-of-00007.safetensors",
402
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.bias": "model-00007-of-00007.safetensors",
403
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.weight": "model-00007-of-00007.safetensors",
404
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.bias": "model-00007-of-00007.safetensors",
405
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.weight": "model-00007-of-00007.safetensors",
406
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
407
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
408
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
409
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
410
+ "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.bias": "model-00007-of-00007.safetensors",
411
+ "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.weight": "model-00007-of-00007.safetensors",
412
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.bias": "model-00006-of-00007.safetensors",
413
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.weight": "model-00006-of-00007.safetensors",
414
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.bias": "model-00006-of-00007.safetensors",
415
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.weight": "model-00006-of-00007.safetensors",
416
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.bias": "model-00006-of-00007.safetensors",
417
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.weight": "model-00006-of-00007.safetensors",
418
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.bias": "model-00006-of-00007.safetensors",
419
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.weight": "model-00006-of-00007.safetensors",
420
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.bias": "model-00006-of-00007.safetensors",
421
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.weight": "model-00006-of-00007.safetensors",
422
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
423
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
424
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
425
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
426
+ "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.bias": "model-00006-of-00007.safetensors",
427
+ "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.weight": "model-00006-of-00007.safetensors",
428
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.bias": "model-00007-of-00007.safetensors",
429
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.weight": "model-00007-of-00007.safetensors",
430
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.bias": "model-00007-of-00007.safetensors",
431
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.weight": "model-00007-of-00007.safetensors",
432
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.bias": "model-00007-of-00007.safetensors",
433
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.weight": "model-00007-of-00007.safetensors",
434
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.bias": "model-00007-of-00007.safetensors",
435
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.weight": "model-00007-of-00007.safetensors",
436
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.bias": "model-00007-of-00007.safetensors",
437
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.weight": "model-00007-of-00007.safetensors",
438
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
439
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
440
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
441
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
442
+ "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.bias": "model-00007-of-00007.safetensors",
443
+ "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.weight": "model-00007-of-00007.safetensors",
444
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.bias": "model-00007-of-00007.safetensors",
445
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.weight": "model-00007-of-00007.safetensors",
446
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.bias": "model-00007-of-00007.safetensors",
447
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.weight": "model-00007-of-00007.safetensors",
448
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.bias": "model-00007-of-00007.safetensors",
449
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.weight": "model-00007-of-00007.safetensors",
450
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.bias": "model-00007-of-00007.safetensors",
451
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.weight": "model-00007-of-00007.safetensors",
452
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.bias": "model-00007-of-00007.safetensors",
453
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.weight": "model-00007-of-00007.safetensors",
454
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
455
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
456
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
457
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
458
+ "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.bias": "model-00007-of-00007.safetensors",
459
+ "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.weight": "model-00007-of-00007.safetensors",
460
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.bias": "model-00007-of-00007.safetensors",
461
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.weight": "model-00007-of-00007.safetensors",
462
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.bias": "model-00007-of-00007.safetensors",
463
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.weight": "model-00007-of-00007.safetensors",
464
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.bias": "model-00007-of-00007.safetensors",
465
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.weight": "model-00007-of-00007.safetensors",
466
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.bias": "model-00007-of-00007.safetensors",
467
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.weight": "model-00007-of-00007.safetensors",
468
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.bias": "model-00007-of-00007.safetensors",
469
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.weight": "model-00007-of-00007.safetensors",
470
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
471
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
472
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
473
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
474
+ "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.bias": "model-00007-of-00007.safetensors",
475
+ "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.weight": "model-00007-of-00007.safetensors",
476
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wk.bias": "model-00007-of-00007.safetensors",
477
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wk.weight": "model-00007-of-00007.safetensors",
478
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wo.bias": "model-00007-of-00007.safetensors",
479
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wo.weight": "model-00007-of-00007.safetensors",
480
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wq.bias": "model-00007-of-00007.safetensors",
481
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wq.weight": "model-00007-of-00007.safetensors",
482
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wv.bias": "model-00007-of-00007.safetensors",
483
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention.wv.weight": "model-00007-of-00007.safetensors",
484
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention_norm.bias": "model-00007-of-00007.safetensors",
485
+ "model.vision_backbone.image_vit.transformer.resblocks.23.attention_norm.weight": "model-00007-of-00007.safetensors",
486
+ "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
487
+ "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
488
+ "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
489
+ "model.vision_backbone.image_vit.transformer.resblocks.23.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
490
+ "model.vision_backbone.image_vit.transformer.resblocks.23.ffn_norm.bias": "model-00007-of-00007.safetensors",
491
+ "model.vision_backbone.image_vit.transformer.resblocks.23.ffn_norm.weight": "model-00007-of-00007.safetensors",
492
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wk.bias": "model-00007-of-00007.safetensors",
493
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wk.weight": "model-00007-of-00007.safetensors",
494
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wo.bias": "model-00007-of-00007.safetensors",
495
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wo.weight": "model-00007-of-00007.safetensors",
496
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wq.bias": "model-00007-of-00007.safetensors",
497
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wq.weight": "model-00007-of-00007.safetensors",
498
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wv.bias": "model-00007-of-00007.safetensors",
499
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention.wv.weight": "model-00007-of-00007.safetensors",
500
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention_norm.bias": "model-00007-of-00007.safetensors",
501
+ "model.vision_backbone.image_vit.transformer.resblocks.24.attention_norm.weight": "model-00007-of-00007.safetensors",
502
+ "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
503
+ "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
504
+ "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
505
+ "model.vision_backbone.image_vit.transformer.resblocks.24.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
506
+ "model.vision_backbone.image_vit.transformer.resblocks.24.ffn_norm.bias": "model-00007-of-00007.safetensors",
507
+ "model.vision_backbone.image_vit.transformer.resblocks.24.ffn_norm.weight": "model-00007-of-00007.safetensors",
508
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.bias": "model-00006-of-00007.safetensors",
509
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.weight": "model-00006-of-00007.safetensors",
510
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.bias": "model-00006-of-00007.safetensors",
511
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.weight": "model-00006-of-00007.safetensors",
512
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.bias": "model-00006-of-00007.safetensors",
513
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.weight": "model-00006-of-00007.safetensors",
514
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.bias": "model-00006-of-00007.safetensors",
515
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.weight": "model-00006-of-00007.safetensors",
516
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.bias": "model-00006-of-00007.safetensors",
517
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.weight": "model-00006-of-00007.safetensors",
518
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
519
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
520
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
521
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
522
+ "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.bias": "model-00006-of-00007.safetensors",
523
+ "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.weight": "model-00006-of-00007.safetensors",
524
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.bias": "model-00006-of-00007.safetensors",
525
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.weight": "model-00006-of-00007.safetensors",
526
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.bias": "model-00006-of-00007.safetensors",
527
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.weight": "model-00006-of-00007.safetensors",
528
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.bias": "model-00006-of-00007.safetensors",
529
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.weight": "model-00006-of-00007.safetensors",
530
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.bias": "model-00006-of-00007.safetensors",
531
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.weight": "model-00006-of-00007.safetensors",
532
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.bias": "model-00006-of-00007.safetensors",
533
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.weight": "model-00006-of-00007.safetensors",
534
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
535
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
536
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
537
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
538
+ "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.bias": "model-00006-of-00007.safetensors",
539
+ "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.weight": "model-00006-of-00007.safetensors",
540
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.bias": "model-00006-of-00007.safetensors",
541
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.weight": "model-00006-of-00007.safetensors",
542
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.bias": "model-00006-of-00007.safetensors",
543
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.weight": "model-00006-of-00007.safetensors",
544
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.bias": "model-00006-of-00007.safetensors",
545
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.weight": "model-00006-of-00007.safetensors",
546
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.bias": "model-00006-of-00007.safetensors",
547
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.weight": "model-00006-of-00007.safetensors",
548
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.bias": "model-00006-of-00007.safetensors",
549
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.weight": "model-00006-of-00007.safetensors",
550
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
551
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
552
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
553
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
554
+ "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.bias": "model-00006-of-00007.safetensors",
555
+ "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.weight": "model-00006-of-00007.safetensors",
556
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.bias": "model-00006-of-00007.safetensors",
557
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.weight": "model-00006-of-00007.safetensors",
558
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.bias": "model-00006-of-00007.safetensors",
559
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.weight": "model-00006-of-00007.safetensors",
560
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.bias": "model-00006-of-00007.safetensors",
561
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.weight": "model-00006-of-00007.safetensors",
562
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.bias": "model-00006-of-00007.safetensors",
563
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.weight": "model-00006-of-00007.safetensors",
564
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.bias": "model-00006-of-00007.safetensors",
565
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.weight": "model-00006-of-00007.safetensors",
566
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.bias": "model-00006-of-00007.safetensors",
567
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.weight": "model-00006-of-00007.safetensors",
568
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.bias": "model-00006-of-00007.safetensors",
569
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.weight": "model-00006-of-00007.safetensors",
570
+ "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.bias": "model-00006-of-00007.safetensors",
571
+ "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.weight": "model-00006-of-00007.safetensors",
572
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.bias": "model-00006-of-00007.safetensors",
573
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.weight": "model-00006-of-00007.safetensors",
574
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.bias": "model-00006-of-00007.safetensors",
575
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.weight": "model-00006-of-00007.safetensors",
576
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.bias": "model-00006-of-00007.safetensors",
577
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.weight": "model-00006-of-00007.safetensors",
578
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.bias": "model-00006-of-00007.safetensors",
579
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.weight": "model-00006-of-00007.safetensors",
580
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.bias": "model-00007-of-00007.safetensors",
581
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.weight": "model-00007-of-00007.safetensors",
582
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
583
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
584
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
585
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
586
+ "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.bias": "model-00007-of-00007.safetensors",
587
+ "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.weight": "model-00007-of-00007.safetensors",
588
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.bias": "model-00007-of-00007.safetensors",
589
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.weight": "model-00007-of-00007.safetensors",
590
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.bias": "model-00007-of-00007.safetensors",
591
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.weight": "model-00007-of-00007.safetensors",
592
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.bias": "model-00007-of-00007.safetensors",
593
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.weight": "model-00007-of-00007.safetensors",
594
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.bias": "model-00007-of-00007.safetensors",
595
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.weight": "model-00007-of-00007.safetensors",
596
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.bias": "model-00007-of-00007.safetensors",
597
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.weight": "model-00007-of-00007.safetensors",
598
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
599
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
600
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
601
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
602
+ "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.bias": "model-00007-of-00007.safetensors",
603
+ "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.weight": "model-00007-of-00007.safetensors",
604
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.bias": "model-00007-of-00007.safetensors",
605
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.weight": "model-00007-of-00007.safetensors",
606
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.bias": "model-00007-of-00007.safetensors",
607
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.weight": "model-00007-of-00007.safetensors",
608
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.bias": "model-00007-of-00007.safetensors",
609
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.weight": "model-00007-of-00007.safetensors",
610
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.bias": "model-00007-of-00007.safetensors",
611
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.weight": "model-00007-of-00007.safetensors",
612
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.bias": "model-00007-of-00007.safetensors",
613
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.weight": "model-00007-of-00007.safetensors",
614
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.bias": "model-00007-of-00007.safetensors",
615
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.weight": "model-00007-of-00007.safetensors",
616
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.bias": "model-00007-of-00007.safetensors",
617
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.weight": "model-00007-of-00007.safetensors",
618
+ "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.bias": "model-00007-of-00007.safetensors",
619
+ "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.weight": "model-00007-of-00007.safetensors"
620
+ }
621
+ }
model.yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: molmo
2
+ llm:
3
+ d_model: 3584
4
+ n_heads: 28
5
+ n_kv_heads: 4
6
+ head_dim: null
7
+ qkv_bias: true
8
+ clip_qkv: null
9
+ n_layers: 28
10
+ mlp_ratio: 4
11
+ mlp_hidden_size: 37888
12
+ activation_type: swiglu
13
+ block_type: sequential
14
+ rope: true
15
+ rope_full_precision: true
16
+ rope_theta: 1000000.0
17
+ rope_type: default
18
+ rope_factor: null
19
+ rope_high_freq_factor: null
20
+ rope_low_freq_factor: null
21
+ rope_original_max_position_embeddings: null
22
+ attention_type: sdpa
23
+ float32_attention: true
24
+ attention_dropout: 0.0
25
+ attention_layer_norm: false
26
+ attention_layer_norm_type: olmo
27
+ residual_dropout: 0.0
28
+ response_residual_dropout: 0.1
29
+ layer_norm_type: rms
30
+ layer_norm_with_affine: true
31
+ layer_norm_eps: 1.0e-06
32
+ attention_layer_norm_with_affine: true
33
+ max_sequence_length: 4096
34
+ max_position_embeddings: null
35
+ include_bias: false
36
+ bias_for_layer_norm: null
37
+ norm_after: false
38
+ moe_num_experts: 8
39
+ moe_top_k: 2
40
+ moe_mlp_impl: sparse
41
+ moe_log_expert_assignment: false
42
+ moe_shared_expert: false
43
+ moe_lbl_in_fp32: false
44
+ moe_interleave: false
45
+ moe_loss_weight: 0.1
46
+ moe_zloss_weight: null
47
+ moe_dropless: true
48
+ moe_capacity_factor: 1.25
49
+ embedding_dropout: 0.0
50
+ scale_logits: false
51
+ vocab_size: 152064
52
+ additional_vocab_size: 128
53
+ weight_tying: false
54
+ embedding_size: 152064
55
+ use_position_ids: true
56
+ tokenizer:
57
+ identifier: Qwen/Qwen2.5-7B
58
+ tokenizer_dir: null
59
+ depth_tokens: false
60
+ init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt
61
+ init_incremental: null
62
+ new_embedding_init_range: 0.02
63
+ initializer_range: 0.02
64
+ normalize_input_embeds: false
65
+ activation_checkpoint: whole_layer
66
+ compile: blocks
67
+ fix_pad_tokenizer: false
68
+ resize_vocab: false
69
+ init_std: 0.02
70
+ init_fn: normal
71
+ init_cutoff_factor: null
72
+ vision_backbone:
73
+ vit:
74
+ image_model_type: siglip
75
+ image_default_input_size:
76
+ - 378
77
+ - 378
78
+ image_patch_size: 14
79
+ image_pos_patch_size: 14
80
+ image_emb_dim: 1152
81
+ image_num_heads: 16
82
+ image_num_key_value_heads: 16
83
+ image_num_layers: 27
84
+ image_head_dim: 72
85
+ image_mlp_dim: 4304
86
+ image_mlp_activations: gelu_pytorch_tanh
87
+ image_dropout_rate: 0.0
88
+ image_num_pos: 729
89
+ image_norm_eps: 1.0e-06
90
+ attention_dropout: 0.0
91
+ residual_dropout: 0.0
92
+ initializer_range: 0.02
93
+ float32_attention: true
94
+ attention_type: sdpa
95
+ activation_checkpointing: true
96
+ init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
97
+ resize_mode: siglip
98
+ pad_value: 0.0
99
+ normalize: siglip
100
+ image_pooling_2d: attention_meanq
101
+ pooling_attention_mask: false
102
+ image_projector: mlp
103
+ image_padding_embed: null
104
+ vit_layers:
105
+ - -3
106
+ - -9
107
+ skip_unused_layers: true
108
+ image_feature_dropout: 0.0
109
+ connector_activation_checkpointing: true
110
+ compile_vit: blocks
111
+ data_formatter:
112
+ prompt_templates: none
113
+ message_format: none
114
+ system_prompt: style_and_length
115
+ always_start_with_space: false
116
+ default_inference_len: 65
117
+ select_answer: best
118
+ debug: false
119
+ image_last: false
120
+ format_message_list: null
121
+ p_one_message: 0.0
122
+ mm_preprocessor:
123
+ crop_mode: overlap-and-resize-c2
124
+ max_crops: 8
125
+ max_images: null
126
+ max_multi_image_crops: 6
127
+ pooling_w: 2
128
+ pooling_h: 2
129
+ overlap_margins:
130
+ - 4
131
+ - 4
132
+ use_col_tokens: true
133
+ loss_token_weighting: null
134
+ legacy_image_mask: false
135
+ max_answer_len: null
136
+ img_aug: false
137
+ bi_directional_attn: null
138
+ lora_enable: null
139
+ lora_rank: null
140
+ lora_alpha: null
141
+ lora_dropout: null
142
+ lora_bias: null
143
+ n_action_bins: 256
144
+ norm_stats: null
modeling_molmoact.py ADDED
@@ -0,0 +1,2102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from copy import deepcopy
3
+ from dataclasses import dataclass
4
+ from typing import List, Optional, Tuple, Union, Dict, Any, Sequence, Callable
5
+
6
+ import torch
7
+ from torch import nn
8
+ from torch.nn import functional as F
9
+
10
+ from transformers.models.auto import AutoModelForCausalLM, AutoModelForImageTextToText
11
+ from transformers.activations import ACT2FN
12
+ from transformers.cache_utils import Cache, DynamicCache
13
+ from transformers.generation import GenerationMixin
14
+ from transformers.generation.configuration_utils import GenerationConfig
15
+ from transformers.generation.utils import GenerateOutput
16
+ from transformers.integrations import use_kernel_forward_from_hub
17
+ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
18
+ from transformers.modeling_flash_attention_utils import _flash_attention_forward, FlashAttentionKwargs
19
+ from transformers import GradientCheckpointingLayer
20
+ from transformers.modeling_outputs import (
21
+ BaseModelOutput,
22
+ BaseModelOutputWithPast,
23
+ BaseModelOutputWithPooling,
24
+ CausalLMOutputWithPast,
25
+ )
26
+ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
27
+ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
28
+ from transformers.processing_utils import Unpack
29
+ from transformers.utils import (
30
+ ModelOutput,
31
+ can_return_tuple,
32
+ is_torch_flex_attn_available,
33
+ logging,
34
+ add_start_docstrings,
35
+ add_start_docstrings_to_model_forward,
36
+ )
37
+
38
+ from .configuration_molmoact import MolmoActConfig, MolmoActVitConfig, MolmoActAdapterConfig, MolmoActLlmConfig
39
+
40
+ import re
41
+ import numpy as np
42
+ from transformers import Qwen2Tokenizer
43
+
44
+
45
+ if is_torch_flex_attn_available():
46
+ from torch.nn.attention.flex_attention import BlockMask
47
+
48
+ from transformers.integrations.flex_attention import make_flex_block_causal_mask
49
+
50
+
51
+ logger = logging.get_logger(__name__)
52
+
53
+
54
+ MOLMO_START_DOCSTRING = r"""
55
+ This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
56
+ library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
57
+ etc.)
58
+
59
+ This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
60
+ Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
61
+ and behavior.
62
+
63
+ Parameters:
64
+ config ([`MolmoActConfig`]):
65
+ Model configuration class with all the parameters of the model. Initializing with a config file does not
66
+ load the weights associated with the model, only the configuration. Check out the
67
+ [`~PreTrainedModel.from_pretrained`] method to load the model weights.
68
+ """
69
+
70
+
71
+ NUM_RE = re.compile(r'[+-]?(?:\d+(?:\.\d+)?|\.\d+)(?:[eE][+-]?\d+)?$')
72
+ DEPTH_RE = re.compile(r'<DEPTH_START>(.*?)<DEPTH_END>', re.DOTALL)
73
+ # One-level-nested [...] matcher: outer block that may contain inner [ ... ] lists
74
+ OUTER_BLOCK_RE = re.compile(r'\[(?:[^\[\]]|\[[^\[\]]*\])+\]')
75
+
76
+ def _is_number(s: str) -> bool:
77
+ return bool(NUM_RE.match(s))
78
+
79
+ def _has_non_ascii(s: str) -> bool:
80
+ return any(ord(ch) > 127 for ch in s)
81
+
82
+ def _to_number(s: str):
83
+ """Parse string number to int when possible, else float."""
84
+ v = float(s)
85
+ return int(v) if v.is_integer() else v
86
+
87
+ def extract_depth_string(text: str, include_tags: bool = False) -> list[str]:
88
+ """
89
+ Return all occurrences of depth strings.
90
+ If include_tags=True, each item is '<DEPTH_START>...<DEPTH_END>';
91
+ otherwise each item is just the inner '...'.
92
+ """
93
+ matches = list(DEPTH_RE.finditer(text))
94
+ if include_tags:
95
+ return [m.group(0) for m in matches]
96
+ return [m.group(1) for m in matches]
97
+
98
+ def extract_trace_lists(
99
+ text: str,
100
+ point_len: int | None = 2, # e.g., 2 for [x,y], 3 for [x,y,z]; None = any length ≥1
101
+ min_points: int = 1
102
+ ) -> list[list[list[float]]]:
103
+ """
104
+ Extract *numeric* lists-of-lists like [[140,225],[130,212],...].
105
+ Returns a list of traces; each trace is a list of points (lists of numbers).
106
+
107
+ Heuristic:
108
+ - Find outer [ ... ] blocks that may contain inner lists
109
+ - Keep blocks where every inner list is fully numeric
110
+ - Enforce per-point length (point_len) and a minimum number of points (min_points)
111
+ """
112
+ traces: list[list[list[float]]] = []
113
+
114
+ # Find outer blocks that can contain nested lists
115
+ for block in OUTER_BLOCK_RE.findall(text):
116
+ inner_strs = re.findall(r'\[([^\[\]]+)\]', block) # contents of each inner [...]
117
+ if len(inner_strs) < min_points:
118
+ continue
119
+
120
+ rows: list[list[float]] = []
121
+ ok = True
122
+ for row in inner_strs:
123
+ parts = [p.strip().strip('"').strip("'") for p in row.split(',')]
124
+ if point_len is not None and len(parts) != point_len:
125
+ ok = False
126
+ break
127
+ if not all(_is_number(p) for p in parts):
128
+ ok = False
129
+ break
130
+ rows.append([_to_number(p) for p in parts])
131
+
132
+ if ok:
133
+ traces.append(rows)
134
+
135
+ return traces
136
+
137
+ def extract_action_token_lists(
138
+ text: str,
139
+ only_len: int | None = None, # e.g., 7 if you expect 7-D actions
140
+ require_non_ascii: bool = True # set False if your tokens can be pure ASCII
141
+ ) -> list[list[str]]:
142
+ """
143
+ Extract all [ ... ] groups split by commas, discard numeric lists,
144
+ and return token lists (quotes stripped, whitespace trimmed).
145
+ """
146
+ lists = []
147
+ # Match NON-nested bracketed groups: [ ... ] without inner [ or ]
148
+ for inner in re.findall(r'\[([^\[\]]+)\]', text):
149
+ parts = [p.strip().strip('"').strip("'") for p in inner.split(',')]
150
+
151
+ if only_len is not None and len(parts) != only_len:
152
+ continue
153
+
154
+ # If *all* items are numeric -> not action tokens (like coordinates)
155
+ if all(_is_number(p) for p in parts):
156
+ continue
157
+
158
+ # Optionally require at least one non-ASCII char across tokens (helps exclude plain words/numbers)
159
+ if require_non_ascii and not any(_has_non_ascii(p) for p in parts):
160
+ continue
161
+
162
+ lists.append(parts)
163
+
164
+ return lists
165
+
166
+
167
+ @dataclass
168
+ class MolmoActCausalLMOutputWithPast(ModelOutput):
169
+ """
170
+ Base class for MolmoAct causal language model (or autoregressive) outputs.
171
+
172
+ Args:
173
+ loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
174
+ Language modeling loss (for next-token prediction).
175
+ logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
176
+ Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
177
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
178
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
179
+ `(batch_size, num_heads, sequence_length, embed_size_per_head)`)
180
+
181
+ Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
182
+ `past_key_values` input) to speed up sequential decoding.
183
+ hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
184
+ Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
185
+ one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
186
+
187
+ Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
188
+ attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
189
+ Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
190
+ sequence_length)`.
191
+
192
+ Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
193
+ heads.
194
+ image_hidden_states (`torch.FloatTensor`, *optional*):
195
+ A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
196
+ image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
197
+ """
198
+
199
+ loss: Optional[torch.FloatTensor] = None
200
+ logits: Optional[torch.FloatTensor] = None
201
+ past_key_values: Optional[List[torch.FloatTensor]] = None
202
+ hidden_states: Optional[Tuple[torch.FloatTensor]] = None
203
+ attentions: Optional[Tuple[torch.FloatTensor]] = None
204
+ image_hidden_states: Optional[torch.FloatTensor] = None
205
+
206
+
207
+ @dataclass
208
+ class MolmoActModelOutputWithPast(BaseModelOutputWithPast):
209
+ """
210
+ Base class for MolmoAct outputs, with hidden states and attentions.
211
+
212
+ Args:
213
+ last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
214
+ Sequence of hidden-states at the output of the last layer of the model.
215
+ past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
216
+ Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
217
+ `(batch_size, num_heads, sequence_length, embed_size_per_head)`)
218
+
219
+ Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
220
+ `past_key_values` input) to speed up sequential decoding.
221
+ hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
222
+ Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
223
+ one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
224
+
225
+ Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
226
+ attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
227
+ Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
228
+ sequence_length)`.
229
+
230
+ Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
231
+ heads.
232
+ image_hidden_states (`torch.FloatTensor`, *optional*):
233
+ A `torch.FloatTensor` of size `(batch_num_patches, hidden_size)`.
234
+ image_hidden_states of the model produced by the vision backbone
235
+ """
236
+
237
+ image_hidden_states: Optional[torch.FloatTensor] = None
238
+ logits: Optional[torch.FloatTensor] = None
239
+
240
+
241
+ class MolmoActPreTrainedModel(PreTrainedModel):
242
+ config_class = MolmoActLlmConfig
243
+ base_model_prefix = "model"
244
+ supports_gradient_checkpointing = True
245
+ _no_split_modules = ["MolmoActDecoderLayer", "MolmoActPostNormDecoderLayer"]
246
+ _skip_keys_device_placement = ["past_key_values"]
247
+ _supports_flash_attn_2 = True
248
+ _supports_sdpa = True
249
+ _supports_flex_attn = False
250
+ _supports_cache_class = True
251
+ _supports_quantized_cache = True
252
+ _supports_static_cache = True
253
+ _supports_attention_backend = True
254
+
255
+ def _init_weights(self, module):
256
+ std = self.config.initializer_range
257
+ if isinstance(module, (nn.Linear,)):
258
+ module.weight.data.normal_(mean=0.0, std=std)
259
+ if module.bias is not None:
260
+ module.bias.data.zero_()
261
+ elif isinstance(module, MolmoActEmbedding):
262
+ module.embedding.data.normal_(mean=0.0, std=std)
263
+ module.new_embedding.data.normal_(mean=0.0, std=std)
264
+ elif isinstance(module, nn.Embedding):
265
+ module.weight.data.normal_(mean=0.0, std=std)
266
+ if module.padding_idx is not None:
267
+ module.weight.data[module.padding_idx].zero_()
268
+ elif isinstance(module, MolmoActRMSNorm):
269
+ module.weight.data.fill_(1.0)
270
+ elif isinstance(module, nn.LayerNorm):
271
+ module.weight.data.fill_(1.0)
272
+ if module.bias is not None:
273
+ module.bias.data.zero_()
274
+
275
+
276
+ class ViTMLP(nn.Module):
277
+ def __init__(self, dim: int, hidden_dim: int, hidden_act: str, device: Union[str, torch.device] = None):
278
+ super().__init__()
279
+ self.w1 = nn.Linear(dim, hidden_dim, bias=True, device=device)
280
+ self.act = ACT2FN[hidden_act]
281
+ self.w2 = nn.Linear(hidden_dim, dim, bias=True, device=device)
282
+
283
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
284
+ return self.w2(self.act(self.w1(x)))
285
+
286
+
287
+ class ViTMultiHeadDotProductAttention(nn.Module):
288
+ def __init__(
289
+ self,
290
+ hidden_size: int,
291
+ num_heads: int,
292
+ num_key_value_heads: int,
293
+ head_dim: int,
294
+ use_bias: bool = True,
295
+ input_dim: Optional[int] = None,
296
+ float32_attention: bool = True,
297
+ attention_dropout: float = 0.0,
298
+ residual_dropout: float = 0.0,
299
+ device: Union[str, torch.device] = None,
300
+ attn_implementation: str = "eager",
301
+ ):
302
+ super().__init__()
303
+
304
+ self.hidden_size = hidden_size
305
+ self.num_heads = num_heads
306
+ self.head_dim = head_dim
307
+ self.num_key_value_heads = num_key_value_heads
308
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
309
+ self.attn_implementation = attn_implementation
310
+ self.is_causal = False
311
+
312
+ input_dim = input_dim or hidden_size
313
+
314
+ self.wq = nn.Linear(
315
+ input_dim,
316
+ self.num_heads * self.head_dim,
317
+ bias=use_bias,
318
+ device=device,
319
+ )
320
+ self.wk = nn.Linear(
321
+ input_dim,
322
+ self.num_key_value_heads * self.head_dim,
323
+ bias=use_bias,
324
+ device=device,
325
+ )
326
+ self.wv = nn.Linear(
327
+ input_dim,
328
+ self.num_key_value_heads * self.head_dim,
329
+ bias=use_bias,
330
+ device=device,
331
+ )
332
+ self.wo = nn.Linear(
333
+ self.num_heads * self.head_dim,
334
+ self.hidden_size,
335
+ )
336
+ self.float32_attention = float32_attention
337
+ self.attention_dropout = attention_dropout
338
+ self.residual_dropout = nn.Dropout(residual_dropout)
339
+
340
+ def _split_heads(self, hidden_states, num_heads) -> torch.Tensor:
341
+ return hidden_states.reshape(hidden_states.shape[:2] + (num_heads, self.head_dim))
342
+
343
+ def _merge_heads(self, hidden_states) -> torch.Tensor:
344
+ return hidden_states.reshape(hidden_states.shape[:2] + (self.hidden_size,))
345
+
346
+ def forward(
347
+ self,
348
+ inputs_q: torch.Tensor,
349
+ inputs_kv: Optional[torch.Tensor] = None,
350
+ attn_mask: Optional[torch.Tensor] = None,
351
+ ) -> torch.Tensor:
352
+
353
+ if inputs_kv is not None:
354
+ inputs_k = inputs_kv
355
+ inputs_v = inputs_kv
356
+ else:
357
+ inputs_k = inputs_q
358
+ inputs_v = inputs_q
359
+
360
+ xq, xk, xv = self.wq(inputs_q), self.wk(inputs_k), self.wv(inputs_v)
361
+
362
+ xq = self._split_heads(xq, self.num_heads)
363
+ xk = self._split_heads(xk, self.num_key_value_heads)
364
+ xv = self._split_heads(xv, self.num_key_value_heads)
365
+
366
+ if self.num_heads != self.num_key_value_heads:
367
+ xk = xk.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
368
+ xv = xv.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
369
+
370
+ og_dtype = xq.dtype
371
+
372
+ if self.float32_attention:
373
+ xq = xq.to(torch.float)
374
+ xk = xk.to(torch.float)
375
+
376
+ dropout_p = 0.0 if not self.training else self.attention_dropout
377
+
378
+ if self.attn_implementation == "eager":
379
+ attn_weights = torch.einsum("...qhd,...khd->...hqk", xq / math.sqrt(xq.size(-1)), xk)
380
+ attn_weights = F.softmax(attn_weights, dim=-1)
381
+ attn_weights = F.dropout(
382
+ attn_weights,
383
+ p=dropout_p,
384
+ training=self.training
385
+ )
386
+ attn_output = torch.einsum("...hqk,...khd->...qhd", attn_weights.to(xv.dtype), xv)
387
+
388
+ elif self.attn_implementation == "sdpa":
389
+ if not torch.is_autocast_enabled():
390
+ xv = xv.to(torch.float)
391
+
392
+ attn_output = F.scaled_dot_product_attention(
393
+ xq.transpose(1, 2).contiguous(),
394
+ xk.transpose(1, 2).contiguous(),
395
+ xv.transpose(1, 2).contiguous(),
396
+ attn_mask=attn_mask,
397
+ is_causal=False,
398
+ dropout_p=dropout_p,
399
+ ).transpose(1, 2)
400
+
401
+ elif self.attn_implementation == "flash_attention_2":
402
+ assert not self.config.float32_attention
403
+ # Downcast in case we are running with fp32 hidden states
404
+ attn_output = _flash_attention_forward(
405
+ xq.transpose(1, 2).to(torch.bfloat16),
406
+ xk.transpose(1, 2).to(torch.bfloat16),
407
+ xv.transpose(1, 2).to(torch.bfloat16),
408
+ attention_mask=None,
409
+ query_length=inputs_q.shape[1],
410
+ is_causal=False,
411
+ dropout=dropout_p,
412
+ )
413
+ else:
414
+ raise ValueError(f"Attention implementation {self.attn_implementation} not supported")
415
+
416
+ attn_output = attn_output.to(og_dtype)
417
+ attn_output = self._merge_heads(attn_output)
418
+ attn_output = self.wo(attn_output)
419
+ attn_output = self.residual_dropout(attn_output)
420
+
421
+ return attn_output
422
+
423
+
424
+ class MolmoActVisionBlock(nn.Module):
425
+
426
+ def __init__(self, config: MolmoActVitConfig, device: Union[str, torch.device] = None):
427
+ super().__init__()
428
+ self.attention = ViTMultiHeadDotProductAttention(
429
+ hidden_size=config.hidden_size,
430
+ num_heads=config.num_attention_heads,
431
+ num_key_value_heads=config.num_key_value_heads,
432
+ head_dim=config.head_dim,
433
+ float32_attention=config.float32_attention,
434
+ attention_dropout=config.attention_dropout,
435
+ residual_dropout=config.residual_dropout,
436
+ device=device,
437
+ attn_implementation=config._attn_implementation,
438
+ )
439
+ self.feed_forward = ViTMLP(config.hidden_size, config.intermediate_size, config.hidden_act, device=device)
440
+ self.attention_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps, device=device)
441
+ self.ffn_norm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps, device=device)
442
+
443
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
444
+ x = x + self.attention(self.attention_norm(x))
445
+ x = x + self.feed_forward(self.ffn_norm(x))
446
+ return x
447
+
448
+
449
+ class MolmoActVisionBlockCollection(nn.Module):
450
+
451
+ def __init__(self, config: MolmoActVitConfig, device: Union[str, torch.device] = None):
452
+ super().__init__()
453
+ self.conifg = config
454
+ self.resblocks = nn.ModuleList([
455
+ MolmoActVisionBlock(config, device) for _ in range(config.num_hidden_layers)
456
+ ])
457
+
458
+ def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
459
+ hidden_states = []
460
+ for r in self.resblocks:
461
+ x = r(x)
462
+ hidden_states.append(x)
463
+ return hidden_states
464
+
465
+
466
+ def _expand_token(token, batch_size: int):
467
+ return token.view(1, 1, -1).expand(batch_size, -1, -1)
468
+
469
+
470
+ class MolmoActVisionTransformer(nn.Module):
471
+
472
+ def __init__(self, config: MolmoActVitConfig, device: Union[str, torch.device] = None):
473
+ super().__init__()
474
+ self.config = config
475
+
476
+ self.scale = config.hidden_size ** -0.5
477
+
478
+ # optional CLS
479
+ self.num_prefix_tokens: int = 1 if config.use_cls_token else 0
480
+ if config.use_cls_token:
481
+ self.class_embedding = nn.Parameter(
482
+ torch.zeros(config.hidden_size, device=device)
483
+ )
484
+
485
+ # positional embeddings
486
+ self.positional_embedding = nn.Parameter(
487
+ torch.zeros(config.image_num_pos, config.hidden_size, device=device),
488
+ )
489
+
490
+ image_patch_size = config.image_patch_size
491
+ self.patch_embedding = nn.Linear(
492
+ image_patch_size * image_patch_size * 3,
493
+ config.hidden_size,
494
+ bias=config.patch_bias,
495
+ device=device,
496
+ )
497
+
498
+ # optional pre-LN
499
+ self.pre_ln = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps, device=device) \
500
+ if config.pre_layernorm else None
501
+
502
+ self.transformer = MolmoActVisionBlockCollection(config, device)
503
+
504
+ def add_pos_emb(self, x: torch.Tensor, patch_num: int) -> torch.Tensor:
505
+ pos_emb = self.positional_embedding
506
+ if self.config.use_cls_token:
507
+ cls_pos, pos_emb = pos_emb[:1], pos_emb[1:] # split out CLS
508
+
509
+ pos_emb = pos_emb.reshape(
510
+ (int(math.sqrt(pos_emb.shape[0])), int(math.sqrt(pos_emb.shape[0])), pos_emb.shape[1])
511
+ )
512
+
513
+ (patch_num_0, patch_num_1) = patch_num
514
+
515
+ if pos_emb.shape[0] != patch_num_0 or pos_emb.shape[1] != patch_num_1:
516
+ # Dervied from https://github.com/facebookresearch/mae/blob/main/util/pos_embed.py
517
+ # antialias: default True in jax.image.resize
518
+ pos_emb = pos_emb.unsqueeze(0).permute(0, 3, 1, 2)
519
+ pos_emb = F.interpolate(
520
+ pos_emb, size=(patch_num_0, patch_num_1), mode="bicubic", align_corners=False, antialias=True,
521
+ )
522
+ pos_emb = pos_emb.permute(0, 2, 3, 1).squeeze(0)
523
+
524
+ pos_emb = pos_emb.reshape(-1, pos_emb.shape[-1])
525
+
526
+ if self.config.use_cls_token:
527
+ x = x + torch.cat([cls_pos[None, :, :], pos_emb[None, :, :]], dim=1).to(x.dtype)
528
+ else:
529
+ x = x + pos_emb[None, :, :].to(x.dtype)
530
+
531
+ return x
532
+
533
+ def forward(self, x: torch.Tensor, patch_num: int = None) -> List[torch.Tensor]:
534
+ """
535
+ : param x: (batch_size, num_patch, n_pixels)
536
+ """
537
+ if patch_num is None:
538
+ patch_num = self.config.image_num_patch
539
+
540
+ B, N, D = x.shape
541
+
542
+ x = self.patch_embedding(x)
543
+
544
+ if self.config.use_cls_token:
545
+ x = torch.cat([_expand_token(self.class_embedding, x.size(0)).to(x.dtype), x], dim=1)
546
+
547
+ # class embeddings and positional embeddings
548
+ x = self.add_pos_emb(x, patch_num)
549
+
550
+ if self.pre_ln is not None:
551
+ x = self.pre_ln(x)
552
+
553
+ hidden_states = self.transformer(x)
554
+ return hidden_states
555
+
556
+
557
+ class ImageProjectorMLP(nn.Module):
558
+
559
+ def __init__(
560
+ self,
561
+ input_dim: int,
562
+ hidden_dim: int,
563
+ output_dim: int,
564
+ hidden_act: str,
565
+ device: Union[str, torch.device] = None,
566
+ ):
567
+ super().__init__()
568
+ self.w1 = nn.Linear(input_dim, hidden_dim, bias=False, device=device)
569
+ self.w2 = nn.Linear(hidden_dim, output_dim, bias=False, device=device)
570
+ self.w3 = nn.Linear(input_dim, hidden_dim, bias=False, device=device)
571
+ self.act = ACT2FN[hidden_act]
572
+
573
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
574
+ return self.w2(self.act(self.w1(x)) * self.w3(x))
575
+
576
+
577
+ class MolmoActVisionBackbone(nn.Module):
578
+ def __init__(self, vit_config: MolmoActVitConfig, adapter_config: MolmoActAdapterConfig):
579
+ super().__init__()
580
+ self.vit_config = vit_config
581
+ self.adapter_config = adapter_config
582
+
583
+ self.vit_layers = []
584
+ for layer in adapter_config.vit_layers:
585
+ if layer >= 0:
586
+ self.vit_layers.append(layer)
587
+ else:
588
+ self.vit_layers.append(layer + vit_config.num_hidden_layers)
589
+
590
+ last_layer_needed = max(self.vit_layers) + 1
591
+ if last_layer_needed < vit_config.num_hidden_layers:
592
+ new_vit_config = deepcopy(vit_config)
593
+ new_vit_config.num_hidden_layers = last_layer_needed
594
+ self.image_vit = MolmoActVisionTransformer(new_vit_config)
595
+ else:
596
+ self.image_vit = MolmoActVisionTransformer(vit_config)
597
+
598
+ self.num_prefix_tokens: int = self.image_vit.num_prefix_tokens
599
+
600
+ # optional pad_embed
601
+ self.pad_embed = None
602
+ if adapter_config.image_padding_embed == "pad_and_partial_pad":
603
+ pool_dim = vit_config.hidden_size * len(adapter_config.vit_layers)
604
+ self.pad_embed = nn.Parameter(torch.zeros((2, pool_dim)))
605
+
606
+ pool_dim = vit_config.hidden_size * len(adapter_config.vit_layers)
607
+ self.image_pooling_2d = ViTMultiHeadDotProductAttention(
608
+ hidden_size=adapter_config.hidden_size,
609
+ num_heads=adapter_config.num_attention_heads,
610
+ num_key_value_heads=adapter_config.num_key_value_heads,
611
+ head_dim=adapter_config.head_dim,
612
+ input_dim=pool_dim,
613
+ float32_attention=adapter_config.float32_attention,
614
+ attention_dropout=adapter_config.attention_dropout,
615
+ residual_dropout=adapter_config.residual_dropout,
616
+ attn_implementation=adapter_config._attn_implementation,
617
+ )
618
+ self.image_projector = ImageProjectorMLP(
619
+ adapter_config.hidden_size,
620
+ adapter_config.intermediate_size,
621
+ adapter_config.text_hidden_size,
622
+ adapter_config.hidden_act,
623
+ )
624
+ self.image_feature_dropout = nn.Dropout(adapter_config.image_feature_dropout)
625
+
626
+ def encode_image(self, images: torch.Tensor) -> torch.Tensor:
627
+ """
628
+ : param images: (batch_size, num_crops, num_patch, n_pixels)
629
+ """
630
+ B, T, N, D = images.shape
631
+ images = images.view(B * T, N, D)
632
+ image_features = self.image_vit(images)
633
+
634
+ features = []
635
+ for layer in self.vit_layers:
636
+ features.append(image_features[layer])
637
+ image_features = torch.cat(features, dim=-1)
638
+
639
+ if self.num_prefix_tokens > 0:
640
+ image_features = image_features[:, 1:]
641
+ image_features = image_features.view(B, T, N, -1)
642
+ return image_features
643
+
644
+ @property
645
+ def dtype(self) -> torch.dtype:
646
+ return self.image_vit.patch_embedding.weight.dtype
647
+
648
+ @property
649
+ def device(self) -> torch.device:
650
+ return self.image_vit.patch_embedding.weight.device
651
+
652
+ def forward(
653
+ self,
654
+ images: torch.Tensor,
655
+ pooled_patches_idx: torch.Tensor,
656
+ image_masks: torch.Tensor = None,
657
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
658
+
659
+ # image_features: (batch_size, num_crops(=num_image), num_patch, nximage_emb_dim)
660
+ batch_size, num_image = images.shape[:2]
661
+ images = images.to(device=self.device, dtype=self.dtype)
662
+ image_features = self.encode_image(images)
663
+
664
+ # optional padding embeddings
665
+ if self.pad_embed is not None and image_masks is not None:
666
+ image_masks = image_masks.to(device=self.device)
667
+ all_pad = (image_masks == 0).to(image_features.dtype)
668
+ partial = torch.logical_and(image_masks < 1, ~ (image_masks == 0)).to(image_features.dtype)
669
+ image_features = image_features + self.pad_embed[0][None,None,None,:] * all_pad[...,None] \
670
+ + self.pad_embed[1][None,None,None,:] * partial[...,None]
671
+
672
+ image_features = self.image_feature_dropout(image_features)
673
+ dim = image_features.shape[-1]
674
+
675
+ valid = pooled_patches_idx >= 0
676
+ valid_token = torch.any(valid, -1)
677
+
678
+ # Use `pooled_patches_idx` to arange the features for image pooling
679
+ batch_idx = torch.arange(pooled_patches_idx.shape[0], dtype=torch.long, device=pooled_patches_idx.device)
680
+ batch_idx = torch.tile(batch_idx.view(batch_size, 1, 1), [1, pooled_patches_idx.shape[1], pooled_patches_idx.shape[2]])
681
+
682
+ # Now [batch, num_high_res_features, pool_dim, dim]
683
+ to_pool = image_features.reshape(batch_size, -1, dim)[batch_idx, torch.clip(pooled_patches_idx, 0)]
684
+ to_pool = to_pool * valid.to(self.dtype)[:, :, :, None]
685
+ to_pool = to_pool.reshape([-1, pooled_patches_idx.shape[-1], dim])
686
+
687
+ query = to_pool.mean(-2, keepdim=True)
688
+ pooled_features = self.image_pooling_2d(query, to_pool)
689
+ pooled_features = pooled_features.reshape([batch_size, -1, pooled_features.shape[-1]])
690
+
691
+ # MLP layer to map the feature.
692
+ pooled_features = self.image_projector(pooled_features)
693
+ return pooled_features.view(-1, pooled_features.shape[-1])[valid_token.flatten()]
694
+
695
+
696
+ # Copied from transformers.models.llama.modeling_llama.rotate_half
697
+ def rotate_half(x):
698
+ """Rotates half the hidden dims of the input."""
699
+ x1 = x[..., : x.shape[-1] // 2]
700
+ x2 = x[..., x.shape[-1] // 2 :]
701
+ return torch.cat((-x2, x1), dim=-1)
702
+
703
+
704
+ # Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
705
+ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
706
+ """Applies Rotary Position Embedding to the query and key tensors.
707
+
708
+ Args:
709
+ q (`torch.Tensor`): The query tensor.
710
+ k (`torch.Tensor`): The key tensor.
711
+ cos (`torch.Tensor`): The cosine part of the rotary embedding.
712
+ sin (`torch.Tensor`): The sine part of the rotary embedding.
713
+ position_ids (`torch.Tensor`, *optional*):
714
+ Deprecated and unused.
715
+ unsqueeze_dim (`int`, *optional*, defaults to 1):
716
+ The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
717
+ sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
718
+ that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
719
+ k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
720
+ cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
721
+ the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
722
+ Returns:
723
+ `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
724
+ """
725
+ cos = cos.unsqueeze(unsqueeze_dim)
726
+ sin = sin.unsqueeze(unsqueeze_dim)
727
+ q_embed = (q * cos) + (rotate_half(q) * sin)
728
+ k_embed = (k * cos) + (rotate_half(k) * sin)
729
+ return q_embed, k_embed
730
+
731
+
732
+ # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding
733
+ class MolmoActRotaryEmbedding(nn.Module):
734
+
735
+ def __init__(self, config: MolmoActLlmConfig, device: Union[str, torch.device] = None):
736
+ super().__init__()
737
+ # BC: "rope_type" was originally "type"
738
+ if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
739
+ self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
740
+ else:
741
+ self.rope_type = "default"
742
+ self.max_seq_len_cached = config.max_position_embeddings
743
+ self.original_max_seq_len = config.max_position_embeddings
744
+
745
+ self.config = config
746
+ self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
747
+
748
+ inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
749
+ self.register_buffer("inv_freq", inv_freq, persistent=False)
750
+ self.original_inv_freq = self.inv_freq
751
+
752
+ @torch.no_grad()
753
+ @dynamic_rope_update # power user: used with advanced RoPE types (e.g. dynamic rope)
754
+ def forward(self, x, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
755
+ inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
756
+ position_ids_expanded = position_ids[:, None, :].float()
757
+
758
+ device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
759
+ with torch.autocast(device_type=device_type, enabled=False): # Force float32
760
+ freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
761
+ emb = torch.cat((freqs, freqs), dim=-1)
762
+ cos = emb.cos() * self.attention_scaling
763
+ sin = emb.sin() * self.attention_scaling
764
+
765
+ return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
766
+
767
+
768
+ @use_kernel_forward_from_hub("RMSNorm")
769
+ class MolmoActRMSNorm(nn.Module):
770
+
771
+ def __init__(
772
+ self,
773
+ size: int,
774
+ eps: float = 1e-6,
775
+ device: Union[str, torch.device] = None,
776
+ ):
777
+ super().__init__()
778
+ self.weight = nn.Parameter(torch.ones(size, device=device))
779
+ self.eps = eps
780
+
781
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
782
+ with torch.autocast(enabled=False, device_type=x.device.type):
783
+ og_dtype = x.dtype
784
+ x = x.to(torch.float32)
785
+ variance = x.pow(2).mean(-1, keepdim=True)
786
+ x = x * torch.rsqrt(variance + self.eps)
787
+ x = x.to(og_dtype)
788
+
789
+ return self.weight * x
790
+
791
+ def extra_repr(self):
792
+ return f"{tuple(self.weight.shape)}, eps={self.eps}"
793
+
794
+
795
+ # Copied from transformers.models.llama.modeling_llama.repeat_kv
796
+ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
797
+ """
798
+ This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
799
+ num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
800
+ """
801
+ batch, num_key_value_heads, slen, head_dim = hidden_states.shape
802
+ if n_rep == 1:
803
+ return hidden_states
804
+ hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
805
+ return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
806
+
807
+
808
+ def eager_attention_forward(
809
+ module: nn.Module,
810
+ query: torch.Tensor,
811
+ key: torch.Tensor,
812
+ value: torch.Tensor,
813
+ attention_mask: Optional[torch.Tensor],
814
+ scaling: float,
815
+ dropout: float = 0.0,
816
+ **kwargs,
817
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
818
+ key_states = repeat_kv(key, module.num_key_value_groups)
819
+ value_states = repeat_kv(value, module.num_key_value_groups)
820
+
821
+ attn_weights = torch.matmul(query, key_states.transpose(2, 3)) * scaling
822
+ if attention_mask is not None:
823
+ causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
824
+ attn_weights = attn_weights + causal_mask
825
+
826
+ attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
827
+ attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training)
828
+ attn_output = torch.matmul(attn_weights, value_states)
829
+ attn_output = attn_output.transpose(1, 2).contiguous()
830
+
831
+ return attn_output, attn_weights
832
+
833
+
834
+ class MolmoActAttention(nn.Module):
835
+ """Multi-headed attention from 'Attention Is All You Need' paper"""
836
+
837
+ # copied from transformers.models.llama.modeling_llama.LlamaAttention.__init__ with Llama->MolmoAct
838
+ def __init__(self, config: MolmoActLlmConfig, layer_idx: Optional[int] = None) -> None:
839
+ super().__init__()
840
+ self.config = config
841
+ self.layer_idx = layer_idx
842
+ if layer_idx is None:
843
+ logger.warning_once(
844
+ f"Instantiating {self.__class__.__name__} without passing a `layer_idx` is not recommended and will "
845
+ "lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` "
846
+ "when creating this class."
847
+ )
848
+
849
+ self.num_heads = config.num_attention_heads
850
+ self.num_key_value_heads = config.num_key_value_heads
851
+ self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
852
+ self.head_dim = config.head_dim
853
+ self.scaling = self.head_dim**-0.5
854
+ self.is_causal = True
855
+
856
+ if (config.head_dim * config.num_attention_heads) != config.hidden_size:
857
+ raise ValueError(
858
+ f"hidden_size must be divisible by num_heads (got `hidden_size`: {config.hidden_size}"
859
+ f" and `num_attention_heads`: {config.num_attention_heads})."
860
+ )
861
+
862
+ self.fused_dims = (
863
+ config.hidden_size,
864
+ config.head_dim * config.num_key_value_heads,
865
+ config.head_dim * config.num_key_value_heads,
866
+ )
867
+ self.att_proj = nn.Linear(
868
+ config.hidden_size,
869
+ sum(self.fused_dims),
870
+ bias=config.qkv_bias,
871
+ )
872
+
873
+ # Layer norms.
874
+ self.k_norm: Optional[MolmoActRMSNorm] = None
875
+ self.q_norm: Optional[MolmoActRMSNorm] = None
876
+ self.qk_norm_type: Optional[str] = None
877
+ if config.use_qk_norm:
878
+ k_norm_size = (
879
+ config.head_dim
880
+ if config.qk_norm_type == "qwen3" else
881
+ config.num_key_value_heads * config.head_dim
882
+ )
883
+ self.k_norm = MolmoActRMSNorm(k_norm_size, eps=config.layer_norm_eps)
884
+ q_norm_size = (
885
+ config.head_dim
886
+ if config.qk_norm_type == "qwen3" else
887
+ config.num_attention_heads * config.head_dim
888
+ )
889
+ self.q_norm = MolmoActRMSNorm(q_norm_size, eps=config.layer_norm_eps)
890
+ self.qk_norm_type = config.qk_norm_type
891
+
892
+ self.attention_dropout = config.attention_dropout
893
+
894
+ self.attn_out = nn.Linear(
895
+ config.hidden_size,
896
+ config.hidden_size,
897
+ bias=False,
898
+ )
899
+
900
+ def forward(
901
+ self,
902
+ hidden_states: torch.Tensor,
903
+ position_embeddings: Tuple[torch.Tensor, torch.Tensor],
904
+ attention_mask: Optional[torch.Tensor],
905
+ past_key_value: Optional[Cache] = None,
906
+ cache_position: Optional[torch.LongTensor] = None,
907
+ **kwargs: Unpack[FlashAttentionKwargs],
908
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
909
+ input_shape = hidden_states.shape[:-1]
910
+ hidden_shape = (*input_shape, -1, self.head_dim)
911
+
912
+ qkv = self.att_proj(hidden_states)
913
+ query_states, key_states, value_states = qkv.split(self.fused_dims, dim=-1)
914
+ value_states = value_states.view(hidden_shape)
915
+
916
+ # Optionally apply layer norm to keys and queries.
917
+ if self.q_norm is not None and self.k_norm is not None and self.qk_norm_type != "qwen3":
918
+ query_states = self.q_norm(query_states)
919
+ key_states = self.k_norm(key_states)
920
+
921
+ query_states = query_states.view(hidden_shape)
922
+ key_states = key_states.view(hidden_shape)
923
+ if self.q_norm is not None and self.k_norm is not None and self.qk_norm_type == "qwen3":
924
+ query_states = self.q_norm(query_states)
925
+ key_states = self.k_norm(key_states)
926
+ query_states = query_states.transpose(1, 2)
927
+ key_states = key_states.transpose(1, 2)
928
+ value_states = value_states.transpose(1, 2)
929
+
930
+ cos, sin = position_embeddings
931
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
932
+
933
+ if past_key_value is not None:
934
+ # sin and cos are specific to RoPE models; cache_position needed for the static cache
935
+ cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
936
+ key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
937
+
938
+ attention_interface: Callable = eager_attention_forward
939
+ if self.config._attn_implementation != "eager":
940
+ if self.config._attn_implementation == "sdpa" and kwargs.get("output_attentions", False):
941
+ logger.warning_once(
942
+ "`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to "
943
+ 'eager attention. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
944
+ )
945
+ else:
946
+ attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
947
+
948
+ attn_output, attn_weights = attention_interface(
949
+ self,
950
+ query_states,
951
+ key_states,
952
+ value_states,
953
+ attention_mask,
954
+ dropout=0.0 if not self.training else self.attention_dropout,
955
+ scaling=self.scaling,
956
+ **kwargs,
957
+ )
958
+
959
+ attn_output = attn_output.reshape(*input_shape, -1).contiguous()
960
+ attn_output = self.attn_out(attn_output)
961
+
962
+ return attn_output, attn_weights
963
+
964
+
965
+ class LanguageModelMLP(nn.Module):
966
+
967
+ def __init__(
968
+ self,
969
+ input_dim: int,
970
+ intermediate_size: int,
971
+ hidden_act: str,
972
+ device: Union[str, torch.device] = None,
973
+ ):
974
+ super().__init__()
975
+ self.ff_proj = nn.Linear(input_dim, intermediate_size * 2, bias=False, device=device)
976
+ self.ff_out = nn.Linear(intermediate_size, input_dim, bias=False, device=device)
977
+ self.act = ACT2FN[hidden_act]
978
+
979
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
980
+ x = self.ff_proj(x)
981
+ x, gate = x.chunk(2, dim=-1)
982
+ x = self.act(gate) * x
983
+ x = self.ff_out(x)
984
+ return x
985
+
986
+
987
+ class MolmoActDecoderLayer(GradientCheckpointingLayer):
988
+
989
+ def __init__(
990
+ self,
991
+ config: MolmoActLlmConfig,
992
+ layer_idx: Optional[int] = None,
993
+ device: Union[str, torch.device] = None
994
+ ):
995
+ super().__init__()
996
+ self.config = config
997
+
998
+ self.self_attn = MolmoActAttention(config, layer_idx)
999
+ self.attn_norm = MolmoActRMSNorm(
1000
+ config.hidden_size, eps=config.layer_norm_eps, device=device)
1001
+ self.dropout = nn.Dropout(config.residual_dropout)
1002
+ self.mlp = LanguageModelMLP(
1003
+ config.hidden_size, config.intermediate_size, config.hidden_act, device=device)
1004
+ self.ff_norm = MolmoActRMSNorm(
1005
+ config.hidden_size, eps=config.layer_norm_eps, device=device)
1006
+
1007
+ def forward(
1008
+ self,
1009
+ hidden_states: torch.Tensor,
1010
+ attention_mask: Optional[torch.Tensor] = None,
1011
+ position_ids: Optional[torch.LongTensor] = None,
1012
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
1013
+ output_attentions: Optional[bool] = False,
1014
+ use_cache: Optional[bool] = False,
1015
+ cache_position: Optional[torch.LongTensor] = None,
1016
+ position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
1017
+ **kwargs,
1018
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
1019
+ """
1020
+ Args:
1021
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
1022
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
1023
+ `(batch, sequence_length)` where padding elements are indicated by 0.
1024
+ output_attentions (`bool`, *optional*):
1025
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
1026
+ returned tensors for more detail.
1027
+ use_cache (`bool`, *optional*):
1028
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
1029
+ (see `past_key_values`).
1030
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
1031
+ cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
1032
+ Indices depicting the position of the input sequence tokens in the sequence.
1033
+ position_embeddings (`Tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*):
1034
+ Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
1035
+ with `head_dim` being the embedding dimension of each attention head.
1036
+ kwargs (`dict`, *optional*):
1037
+ Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
1038
+ into the model
1039
+ """
1040
+
1041
+ residual = hidden_states
1042
+ hidden_states = self.attn_norm(hidden_states)
1043
+
1044
+ # Self Attention
1045
+ hidden_states, self_attn_weights = self.self_attn(
1046
+ hidden_states=hidden_states,
1047
+ attention_mask=attention_mask,
1048
+ position_ids=position_ids,
1049
+ past_key_value=past_key_value,
1050
+ output_attentions=output_attentions,
1051
+ use_cache=use_cache,
1052
+ cache_position=cache_position,
1053
+ position_embeddings=position_embeddings,
1054
+ )
1055
+
1056
+ hidden_states = residual + self.dropout(hidden_states)
1057
+
1058
+ # Fully Connected
1059
+ residual = hidden_states
1060
+ hidden_states = self.ff_norm(hidden_states)
1061
+ hidden_states = self.mlp(hidden_states)
1062
+
1063
+ hidden_states = residual + self.dropout(hidden_states)
1064
+
1065
+ outputs = (hidden_states,)
1066
+
1067
+ if output_attentions:
1068
+ outputs += (self_attn_weights,)
1069
+
1070
+ return outputs
1071
+
1072
+
1073
+ class MolmoActPostNormDecoderLayer(MolmoActDecoderLayer):
1074
+ def forward(
1075
+ self,
1076
+ hidden_states: torch.Tensor,
1077
+ attention_mask: Optional[torch.Tensor] = None,
1078
+ position_ids: Optional[torch.LongTensor] = None,
1079
+ past_key_value: Optional[Tuple[torch.Tensor]] = None,
1080
+ output_attentions: Optional[bool] = False,
1081
+ use_cache: Optional[bool] = False,
1082
+ cache_position: Optional[torch.LongTensor] = None,
1083
+ position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
1084
+ **kwargs,
1085
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
1086
+ """
1087
+ Args:
1088
+ hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
1089
+ attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
1090
+ `(batch, sequence_length)` where padding elements are indicated by 0.
1091
+ output_attentions (`bool`, *optional*):
1092
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under
1093
+ returned tensors for more detail.
1094
+ use_cache (`bool`, *optional*):
1095
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
1096
+ (see `past_key_values`).
1097
+ past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
1098
+ cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
1099
+ Indices depicting the position of the input sequence tokens in the sequence.
1100
+ position_embeddings (`Tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*):
1101
+ Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
1102
+ with `head_dim` being the embedding dimension of each attention head.
1103
+ kwargs (`dict`, *optional*):
1104
+ Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
1105
+ into the model
1106
+ """
1107
+
1108
+ residual = hidden_states
1109
+
1110
+ # Self Attention
1111
+ hidden_states, self_attn_weights = self.self_attn(
1112
+ hidden_states=hidden_states,
1113
+ attention_mask=attention_mask,
1114
+ position_ids=position_ids,
1115
+ past_key_value=past_key_value,
1116
+ output_attentions=output_attentions,
1117
+ use_cache=use_cache,
1118
+ cache_position=cache_position,
1119
+ position_embeddings=position_embeddings,
1120
+ )
1121
+ hidden_states = self.attn_norm(hidden_states)
1122
+
1123
+ hidden_states = residual + self.dropout(hidden_states)
1124
+
1125
+ # Fully Connected
1126
+ residual = hidden_states
1127
+ hidden_states = self.mlp(hidden_states)
1128
+ hidden_states = self.ff_norm(hidden_states)
1129
+
1130
+ hidden_states = residual + self.dropout(hidden_states)
1131
+
1132
+ outputs = (hidden_states,)
1133
+
1134
+ if output_attentions:
1135
+ outputs += (self_attn_weights,)
1136
+
1137
+ return outputs
1138
+
1139
+
1140
+ class MolmoActEmbedding(nn.Module):
1141
+ def __init__(
1142
+ self,
1143
+ num_embeddings: int,
1144
+ num_new_embeddings: int,
1145
+ features: int,
1146
+ device: Union[str, torch.device] = None,
1147
+ ):
1148
+ super().__init__()
1149
+ self.embedding = nn.Parameter(
1150
+ torch.zeros(num_embeddings, features, device=device),
1151
+ )
1152
+ self.new_embedding = nn.Parameter(
1153
+ torch.zeros(num_new_embeddings, features, device=device),
1154
+ )
1155
+
1156
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1157
+ return F.embedding(x, torch.cat([self.embedding, self.new_embedding], dim=0))
1158
+
1159
+
1160
+ MOLMO2_TEXT_ONLY_INPUTS_DOCSTRING = r"""
1161
+ Args:
1162
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
1163
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
1164
+ it.
1165
+
1166
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
1167
+ [`PreTrainedTokenizer.__call__`] for details.
1168
+
1169
+ [What are input IDs?](../glossary#input-ids)
1170
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
1171
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
1172
+
1173
+ - 1 for tokens that are **not masked**,
1174
+ - 0 for tokens that are **masked**.
1175
+
1176
+ [What are attention masks?](../glossary#attention-mask)
1177
+
1178
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
1179
+ [`PreTrainedTokenizer.__call__`] for details.
1180
+
1181
+ If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
1182
+ `past_key_values`).
1183
+
1184
+ If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
1185
+ and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
1186
+ information on the default strategy.
1187
+
1188
+ - 1 indicates the head is **not masked**,
1189
+ - 0 indicates the head is **masked**.
1190
+ position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1191
+ Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
1192
+ config.n_positions - 1]`.
1193
+
1194
+ [What are position IDs?](../glossary#position-ids)
1195
+ past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
1196
+ Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
1197
+ blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
1198
+ returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.
1199
+
1200
+ Two formats are allowed:
1201
+ - a [`~cache_utils.Cache`] instance, see our
1202
+ [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache);
1203
+ - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
1204
+ shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
1205
+ cache format.
1206
+
1207
+ The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
1208
+ legacy cache format will be returned.
1209
+
1210
+ If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
1211
+ have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
1212
+ of shape `(batch_size, sequence_length)`.
1213
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
1214
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
1215
+ is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
1216
+ model's internal embedding lookup matrix.
1217
+ use_cache (`bool`, *optional*):
1218
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
1219
+ `past_key_values`).
1220
+ output_attentions (`bool`, *optional*):
1221
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
1222
+ tensors for more detail.
1223
+ output_hidden_states (`bool`, *optional*):
1224
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
1225
+ more detail.
1226
+ return_dict (`bool`, *optional*):
1227
+ Whether or not to return a [`CausalLMOutputWithPast`] instead of a plain tuple.
1228
+ cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
1229
+ Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`,
1230
+ this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
1231
+ the complete sequence length.
1232
+ """
1233
+
1234
+
1235
+ @add_start_docstrings(
1236
+ "The bare MolmoAct text-only model outputting raw hidden-states without any specific head on top.",
1237
+ MOLMO_START_DOCSTRING,
1238
+ )
1239
+ class MolmoActLlm(MolmoActPreTrainedModel):
1240
+ def __init__(self, config: MolmoActLlmConfig):
1241
+ super().__init__(config)
1242
+ self.config = config
1243
+ if config.additional_vocab_size is not None:
1244
+ self.wte = MolmoActEmbedding(
1245
+ config.vocab_size,
1246
+ config.additional_vocab_size,
1247
+ config.hidden_size,
1248
+ )
1249
+ else:
1250
+ self.wte = nn.Embedding(config.vocab_size, config.hidden_size)
1251
+ self.emb_drop = nn.Dropout(config.embedding_dropout)
1252
+ decoder_layer = MolmoActPostNormDecoderLayer if config.norm_after else MolmoActDecoderLayer
1253
+ self.blocks = nn.ModuleList(
1254
+ [decoder_layer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
1255
+ )
1256
+ self.ln_f = MolmoActRMSNorm(config.hidden_size, eps=config.layer_norm_eps)
1257
+ self.rotary_emb = MolmoActRotaryEmbedding(config)
1258
+ self.gradient_checkpointing = False
1259
+
1260
+ # Initialize weights and apply final processing
1261
+ self.post_init()
1262
+
1263
+ def get_input_embeddings(self) -> torch.nn.Module:
1264
+ return self.wte
1265
+
1266
+ def set_input_embeddings(self, value: torch.nn.Module) -> None:
1267
+ self.wte = value
1268
+
1269
+ @can_return_tuple
1270
+ def forward(
1271
+ self,
1272
+ input_ids: Optional[torch.LongTensor] = None,
1273
+ attention_mask: Optional[torch.Tensor] = None,
1274
+ position_ids: Optional[torch.LongTensor] = None,
1275
+ past_key_values: Optional[Cache] = None,
1276
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1277
+ use_cache: Optional[bool] = None,
1278
+ output_attentions: Optional[bool] = None,
1279
+ output_hidden_states: Optional[bool] = None,
1280
+ cache_position: Optional[torch.LongTensor] = None,
1281
+ **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
1282
+ ) -> BaseModelOutputWithPast:
1283
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
1284
+ output_hidden_states = (
1285
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
1286
+ )
1287
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
1288
+
1289
+ if (input_ids is None) ^ (inputs_embeds is not None):
1290
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
1291
+
1292
+ if self.gradient_checkpointing and self.training and use_cache:
1293
+ logger.warning_once(
1294
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`."
1295
+ )
1296
+ use_cache = False
1297
+
1298
+ # TODO (joao): remove this exception in v4.56 -- it exists for users that try to pass a legacy cache
1299
+ if not isinstance(past_key_values, (type(None), Cache)):
1300
+ raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.")
1301
+
1302
+ if inputs_embeds is None:
1303
+ input_ids = input_ids * (input_ids != -1).to(input_ids.dtype)
1304
+ inputs_embeds = self.wte(input_ids)
1305
+
1306
+ if use_cache and past_key_values is None:
1307
+ past_key_values = DynamicCache()
1308
+
1309
+ if cache_position is None:
1310
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
1311
+ cache_position = torch.arange(
1312
+ past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
1313
+ )
1314
+
1315
+ if position_ids is None:
1316
+ position_ids = cache_position.unsqueeze(0)
1317
+
1318
+ causal_mask = self._update_causal_mask(
1319
+ attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
1320
+ )
1321
+
1322
+ hidden_states = inputs_embeds
1323
+
1324
+ # create position embeddings to be shared across the decoder layers
1325
+ position_embeddings = self.rotary_emb(hidden_states, position_ids)
1326
+
1327
+ # decoder layers
1328
+ all_hidden_states = () if output_hidden_states else None
1329
+ all_self_attns = () if output_attentions else None
1330
+
1331
+ for decoder_block in self.blocks[: self.config.num_hidden_layers]:
1332
+ if output_hidden_states:
1333
+ all_hidden_states += (hidden_states,)
1334
+
1335
+ layer_outputs = decoder_block(
1336
+ hidden_states,
1337
+ attention_mask=causal_mask,
1338
+ position_ids=position_ids,
1339
+ past_key_value=past_key_values,
1340
+ output_attentions=output_attentions,
1341
+ use_cache=use_cache,
1342
+ cache_position=cache_position,
1343
+ position_embeddings=position_embeddings,
1344
+ **flash_attn_kwargs,
1345
+ )
1346
+
1347
+ hidden_states = layer_outputs[0]
1348
+
1349
+ if output_attentions:
1350
+ all_self_attns += (layer_outputs[1],)
1351
+
1352
+ hidden_states = self.ln_f(hidden_states)
1353
+
1354
+ # add hidden states from the last decoder layer
1355
+ if output_hidden_states:
1356
+ all_hidden_states += (hidden_states,)
1357
+
1358
+ return BaseModelOutputWithPast(
1359
+ last_hidden_state=hidden_states,
1360
+ past_key_values=past_key_values if use_cache else None,
1361
+ hidden_states=all_hidden_states,
1362
+ attentions=all_self_attns,
1363
+ )
1364
+
1365
+ def _update_causal_mask(
1366
+ self,
1367
+ attention_mask: Union[torch.Tensor, "BlockMask"],
1368
+ input_tensor: torch.Tensor,
1369
+ cache_position: torch.Tensor,
1370
+ past_key_values: Cache,
1371
+ output_attentions: bool = False,
1372
+ ):
1373
+ if self.config._attn_implementation == "flash_attention_2":
1374
+ if attention_mask is not None and (attention_mask == 0.0).any():
1375
+ return attention_mask
1376
+ return None
1377
+ if self.config._attn_implementation == "flex_attention":
1378
+ if isinstance(attention_mask, torch.Tensor):
1379
+ attention_mask = make_flex_block_causal_mask(attention_mask)
1380
+ return attention_mask
1381
+
1382
+ # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in
1383
+ # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail
1384
+ # to infer the attention mask.
1385
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
1386
+ using_compilable_cache = past_key_values.is_compileable if past_key_values is not None else False
1387
+
1388
+ # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward
1389
+ if self.config._attn_implementation == "sdpa" and not using_compilable_cache and not output_attentions:
1390
+ if AttentionMaskConverter._ignore_causal_mask_sdpa(
1391
+ attention_mask,
1392
+ inputs_embeds=input_tensor,
1393
+ past_key_values_length=past_seen_tokens,
1394
+ is_training=self.training,
1395
+ ):
1396
+ return None
1397
+
1398
+ dtype = input_tensor.dtype
1399
+ sequence_length = input_tensor.shape[1]
1400
+ if using_compilable_cache:
1401
+ target_length = past_key_values.get_max_cache_shape()
1402
+ else:
1403
+ target_length = (
1404
+ attention_mask.shape[-1]
1405
+ if isinstance(attention_mask, torch.Tensor)
1406
+ else past_seen_tokens + sequence_length + 1
1407
+ )
1408
+
1409
+ # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).
1410
+ causal_mask = self._prepare_4d_causal_attention_mask_with_cache_position(
1411
+ attention_mask,
1412
+ sequence_length=sequence_length,
1413
+ target_length=target_length,
1414
+ dtype=dtype,
1415
+ cache_position=cache_position,
1416
+ batch_size=input_tensor.shape[0],
1417
+ )
1418
+
1419
+ if (
1420
+ self.config._attn_implementation == "sdpa"
1421
+ and attention_mask is not None
1422
+ and attention_mask.device.type in ["cuda", "xpu", "npu"]
1423
+ and not output_attentions
1424
+ ):
1425
+ # Attend to all tokens in fully masked rows in the causal_mask, for example the relevant first rows when
1426
+ # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.
1427
+ # Details: https://github.com/pytorch/pytorch/issues/110213
1428
+ min_dtype = torch.finfo(dtype).min
1429
+ causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype)
1430
+
1431
+ return causal_mask
1432
+
1433
+ @staticmethod
1434
+ def _prepare_4d_causal_attention_mask_with_cache_position(
1435
+ attention_mask: torch.Tensor,
1436
+ sequence_length: int,
1437
+ target_length: int,
1438
+ dtype: torch.dtype,
1439
+ cache_position: torch.Tensor,
1440
+ batch_size: int,
1441
+ **kwargs,
1442
+ ):
1443
+ """
1444
+ Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
1445
+ `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.
1446
+
1447
+ Args:
1448
+ attention_mask (`torch.Tensor`):
1449
+ A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
1450
+ `(batch_size, 1, query_length, key_value_length)`.
1451
+ sequence_length (`int`):
1452
+ The sequence length being processed.
1453
+ target_length (`int`):
1454
+ The target length: when generating with static cache, the mask should be as long as the static cache,
1455
+ to account for the 0 padding, the part of the cache that is not filled yet.
1456
+ dtype (`torch.dtype`):
1457
+ The dtype to use for the 4D attention mask.
1458
+ cache_position (`torch.Tensor`):
1459
+ Indices depicting the position of the input sequence tokens in the sequence.
1460
+ batch_size (`torch.Tensor`):
1461
+ Batch size.
1462
+ """
1463
+ if attention_mask is not None and attention_mask.dim() == 4:
1464
+ # In this case we assume that the mask comes already in inverted form and requires no inversion or slicing.
1465
+ causal_mask = attention_mask
1466
+ else:
1467
+ min_dtype = torch.finfo(dtype).min
1468
+ causal_mask = torch.full(
1469
+ (sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=cache_position.device
1470
+ )
1471
+ if sequence_length != 1:
1472
+ causal_mask = torch.triu(causal_mask, diagonal=1)
1473
+ causal_mask *= torch.arange(target_length, device=cache_position.device) > cache_position.reshape(-1, 1)
1474
+ causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1)
1475
+ if attention_mask is not None:
1476
+ causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
1477
+ mask_length = attention_mask.shape[-1]
1478
+ padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
1479
+ causal_mask.device
1480
+ )
1481
+ padding_mask = padding_mask == 0
1482
+ causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
1483
+ padding_mask, min_dtype
1484
+ )
1485
+
1486
+ return causal_mask
1487
+
1488
+
1489
+ @add_start_docstrings(
1490
+ "The MolmoAct text-only model which consists of a language model + lm head.",
1491
+ MOLMO_START_DOCSTRING,
1492
+ )
1493
+ class MolmoActForCausalLM(MolmoActPreTrainedModel, GenerationMixin):
1494
+ _tied_weights_keys = [] # Weights are not tied
1495
+ _tp_plan = {"lm_head": "colwise_rep"}
1496
+ _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
1497
+ base_model_prefix = "model"
1498
+
1499
+ def __init__(self, config: MolmoActLlmConfig):
1500
+ super().__init__(config)
1501
+ self.model = MolmoActLlm(config)
1502
+ self.vocab_size = config.vocab_size
1503
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
1504
+
1505
+ # Initialize weights and apply final processing
1506
+ self.post_init()
1507
+
1508
+ def get_input_embeddings(self) -> torch.nn.Module:
1509
+ return self.model.wte
1510
+
1511
+ def set_input_embeddings(self, value: torch.nn.Module) -> None:
1512
+ self.model.wte = value
1513
+
1514
+ def get_output_embeddings(self):
1515
+ return self.lm_head
1516
+
1517
+ def set_output_embeddings(self, value: torch.nn.Module) -> None:
1518
+ self.lm_head = value
1519
+
1520
+ def set_decoder(self, decoder: torch.nn.Module) -> None:
1521
+ self.model = decoder
1522
+
1523
+ def get_decoder(self) -> torch.nn.Module:
1524
+ return self.model
1525
+
1526
+ @can_return_tuple
1527
+ @add_start_docstrings_to_model_forward(MOLMO2_TEXT_ONLY_INPUTS_DOCSTRING)
1528
+ def forward(
1529
+ self,
1530
+ input_ids: Optional[torch.LongTensor] = None,
1531
+ attention_mask: Optional[torch.Tensor] = None,
1532
+ position_ids: Optional[torch.LongTensor] = None,
1533
+ past_key_values: Optional[Cache] = None,
1534
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1535
+ labels: Optional[torch.LongTensor] = None,
1536
+ use_cache: Optional[bool] = None,
1537
+ output_attentions: Optional[bool] = None,
1538
+ output_hidden_states: Optional[bool] = None,
1539
+ cache_position: Optional[torch.LongTensor] = None,
1540
+ logits_to_keep: Union[int, torch.Tensor] = 0,
1541
+ **kwargs,
1542
+ ) -> CausalLMOutputWithPast:
1543
+ r"""
1544
+ ```python
1545
+ >>> from transformers import AutoTokenizer, MolmoActForCausalLM
1546
+
1547
+ >>> model = MolmoActForCausalLM.from_pretrained("...")
1548
+ >>> tokenizer = AutoTokenizer.from_pretrained("...")
1549
+
1550
+ >>> prompt = "Hey, are you conscious? Can you talk to me?"
1551
+ >>> inputs = tokenizer(prompt, return_tensors="pt")
1552
+
1553
+ >>> # Generate
1554
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
1555
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
1556
+ "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
1557
+ ```"""
1558
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
1559
+ output_hidden_states = (
1560
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
1561
+ )
1562
+
1563
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
1564
+ outputs: BaseModelOutputWithPast = self.model(
1565
+ input_ids=input_ids,
1566
+ attention_mask=attention_mask,
1567
+ position_ids=position_ids,
1568
+ past_key_values=past_key_values,
1569
+ inputs_embeds=inputs_embeds,
1570
+ use_cache=use_cache,
1571
+ output_attentions=output_attentions,
1572
+ output_hidden_states=output_hidden_states,
1573
+ cache_position=cache_position,
1574
+ **kwargs,
1575
+ )
1576
+
1577
+ hidden_states = outputs.last_hidden_state
1578
+ # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
1579
+ slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
1580
+ logits = self.lm_head(hidden_states[:, slice_indices, :])
1581
+
1582
+ loss = None
1583
+ if labels is not None:
1584
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
1585
+
1586
+ return CausalLMOutputWithPast(
1587
+ loss=loss,
1588
+ logits=logits,
1589
+ past_key_values=outputs.past_key_values,
1590
+ hidden_states=outputs.hidden_states,
1591
+ attentions=outputs.attentions,
1592
+ )
1593
+
1594
+
1595
+ MOLMO2_INPUTS_DOCSTRING = r"""
1596
+ Args:
1597
+ input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
1598
+ Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
1599
+ it.
1600
+
1601
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
1602
+ [`PreTrainedTokenizer.__call__`] for details.
1603
+
1604
+ [What are input IDs?](../glossary#input-ids)
1605
+ images (`torch.FloatTensor` of shape `(batch_size, n_crops, 27*27, 3*14*14)`, *optional*):
1606
+ The input crops in with pixel values between 0 and 1 and normalized with SigLIP2 mean/std
1607
+
1608
+ Each crop contains 27x27 patches with 14*14*3 pixel values
1609
+ image_masks (`torch.FloatTensor` of shape `(batch_size, n_crops, n_patches, n_features)`, *optional*):
1610
+ Image masks showing what percent of each patch is paddding
1611
+ pooled_patches_idx (`torch.LongTensor` of shape `(batch_size, n_image_tokens, n_pooled_patches)`):
1612
+ For each patch_id tokens in `input_ids`, the indices of the patches in `images`
1613
+ to pool for that token, masked with -1
1614
+ means ignore the patch.
1615
+ attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
1616
+ Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
1617
+
1618
+ - 1 for tokens that are **not masked**,
1619
+ - 0 for tokens that are **masked**.
1620
+
1621
+ [What are attention masks?](../glossary#attention-mask)
1622
+
1623
+ Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
1624
+ [`PreTrainedTokenizer.__call__`] for details.
1625
+
1626
+ If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
1627
+ `past_key_values`).
1628
+
1629
+ If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
1630
+ and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
1631
+ information on the default strategy.
1632
+
1633
+ - 1 indicates the head is **not masked**,
1634
+ - 0 indicates the head is **masked**.
1635
+ position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1636
+ Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
1637
+ config.n_positions - 1]`.
1638
+
1639
+ [What are position IDs?](../glossary#position-ids)
1640
+ past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
1641
+ Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
1642
+ blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
1643
+ returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.
1644
+
1645
+ Two formats are allowed:
1646
+ - a [`~cache_utils.Cache`] instance, see our
1647
+ [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache);
1648
+ - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
1649
+ shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
1650
+ cache format.
1651
+
1652
+ The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
1653
+ legacy cache format will be returned.
1654
+
1655
+ If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
1656
+ have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
1657
+ of shape `(batch_size, sequence_length)`.
1658
+ inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
1659
+ Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
1660
+ is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
1661
+ model's internal embedding lookup matrix.
1662
+ use_cache (`bool`, *optional*):
1663
+ If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
1664
+ `past_key_values`).
1665
+ output_attentions (`bool`, *optional*):
1666
+ Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
1667
+ tensors for more detail.
1668
+ output_hidden_states (`bool`, *optional*):
1669
+ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
1670
+ more detail.
1671
+ return_dict (`bool`, *optional*):
1672
+ Whether or not to return a [`MolmoActCausalLMOutputWithPast`] instead of a plain tuple.
1673
+ cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
1674
+ Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`,
1675
+ this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
1676
+ the complete sequence length.
1677
+ """
1678
+
1679
+
1680
+ @add_start_docstrings(
1681
+ "The bare MolmoAct model outputting raw hidden-states without any specific head on top.",
1682
+ MOLMO_START_DOCSTRING,
1683
+ )
1684
+ class MolmoActModel(MolmoActPreTrainedModel):
1685
+ _checkpoint_conversion_mapping = {}
1686
+
1687
+ def __init__(self, config: MolmoActConfig):
1688
+ super().__init__(config)
1689
+ self.transformer: MolmoActLlm = MolmoActLlm(config.llm_config)
1690
+ self.vision_backbone: Optional[MolmoActVisionBackbone] = None
1691
+ if config.vit_config is not None and config.adapter_config is not None:
1692
+ self.vision_backbone = MolmoActVisionBackbone(config.vit_config, config.adapter_config)
1693
+
1694
+ # Initialize weights and apply final processing
1695
+ self.post_init()
1696
+
1697
+ def get_input_embeddings(self) -> torch.nn.Module:
1698
+ return self.transformer.wte
1699
+
1700
+ def set_input_embeddings(self, value: torch.nn.Module) -> None:
1701
+ self.transformer.wte = value
1702
+
1703
+ @property
1704
+ def device(self) -> torch.device:
1705
+ return self.transformer.ln_f.weight.device
1706
+
1707
+ def build_input_embeddings(
1708
+ self,
1709
+ input_ids: torch.LongTensor,
1710
+ images: Optional[torch.FloatTensor] = None, # image inputs
1711
+ image_masks: Optional[torch.Tensor] = None,
1712
+ pooled_patches_idx: Optional[torch.LongTensor] = None,
1713
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
1714
+
1715
+ # Get embeddings of input.
1716
+ # shape: (batch_size, seq_len, d_model)
1717
+ input_ids = input_ids * (input_ids != -1).to(input_ids.dtype)
1718
+ x = self.transformer.wte(input_ids)
1719
+
1720
+ image_features: Optional[torch.FloatTensor] = None
1721
+ if images is not None:
1722
+ image_features = self.vision_backbone(images, pooled_patches_idx)
1723
+ is_image_patch = input_ids.view(-1) == self.config.image_patch_id
1724
+ assert is_image_patch.sum() == len(image_features)
1725
+ x.view(-1, x.shape[-1])[is_image_patch] += image_features
1726
+
1727
+ # shape: (batch_size, seq_len, d_model)
1728
+ x = self.transformer.emb_drop(x) # type: ignore
1729
+
1730
+ return x, image_features
1731
+
1732
+ @can_return_tuple
1733
+ def forward(
1734
+ self,
1735
+ input_ids: Optional[torch.LongTensor] = None,
1736
+ images: Optional[torch.FloatTensor] = None,
1737
+ image_masks: Optional[torch.Tensor] = None,
1738
+ pooled_patches_idx: Optional[torch.Tensor] = None,
1739
+ attention_mask: Optional[torch.Tensor] = None,
1740
+ position_ids: Optional[torch.Tensor] = None,
1741
+ past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
1742
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1743
+ use_cache: Optional[bool] = None,
1744
+ output_attentions: Optional[bool] = None,
1745
+ output_hidden_states: Optional[bool] = None,
1746
+ cache_position: Optional[torch.LongTensor] = None,
1747
+ ) -> Union[Tuple, MolmoActModelOutputWithPast]:
1748
+
1749
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
1750
+ output_hidden_states = (
1751
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
1752
+ )
1753
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
1754
+
1755
+ if (input_ids is None) ^ (inputs_embeds is not None):
1756
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
1757
+
1758
+ if images is not None and inputs_embeds is not None:
1759
+ raise ValueError(
1760
+ "You cannot specify both images and inputs_embeds at the same time."
1761
+ )
1762
+
1763
+ if inputs_embeds is None:
1764
+ inputs_embeds, image_features = self.build_input_embeddings(
1765
+ input_ids, images, image_masks, pooled_patches_idx)
1766
+
1767
+ outputs = self.transformer(
1768
+ attention_mask=attention_mask,
1769
+ position_ids=position_ids,
1770
+ past_key_values=past_key_values,
1771
+ inputs_embeds=inputs_embeds,
1772
+ use_cache=use_cache,
1773
+ output_attentions=output_attentions,
1774
+ output_hidden_states=output_hidden_states,
1775
+ cache_position=cache_position,
1776
+ )
1777
+
1778
+ return MolmoActModelOutputWithPast(
1779
+ last_hidden_state=outputs.last_hidden_state,
1780
+ past_key_values=outputs.past_key_values,
1781
+ hidden_states=outputs.hidden_states,
1782
+ attentions=outputs.attentions,
1783
+ image_hidden_states=image_features if images is not None else None,
1784
+ )
1785
+
1786
+ @add_start_docstrings(
1787
+ "The MolmoAct model which consists of a vision backbone and a language model + lm head.",
1788
+ MOLMO_START_DOCSTRING,
1789
+ )
1790
+ class MolmoActForActionReasoning(MolmoActPreTrainedModel, GenerationMixin):
1791
+ _checkpoint_conversion_mapping = {}
1792
+ _tied_weights_keys = [] # Weights are not tied
1793
+ config_class = MolmoActConfig
1794
+
1795
+ def __init__(self, config: MolmoActConfig):
1796
+ super().__init__(config)
1797
+
1798
+ self.model = MolmoActModel(config)
1799
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
1800
+ self.vocab_size = config.vocab_size
1801
+
1802
+ # Initialize weights and apply final processing
1803
+ self.post_init()
1804
+
1805
+ # --- Action parsing / de-tokenization setup ---
1806
+ # Stats dict expected under config.norm_stats (per-dataset key). If missing, default to empty.
1807
+ self.norm_stats = getattr(config, "norm_stats", None) or {}
1808
+ # Number of discretization bins used for action tokens, defaults to 256.
1809
+ self.n_action_bins = getattr(config, "n_action_bins", 256)
1810
+ # Precompute bin centers in [-1, 1] for inverse token to value mapping.
1811
+ self.bins = np.linspace(-1.0, 1.0, self.n_action_bins)
1812
+ self.bin_centers = (self.bins[:-1] + self.bins[1:]) / 2.0
1813
+ # Lazily constructed tokenizer for converting token strings to ids
1814
+ self._qwen_tokenizer = None
1815
+
1816
+ def get_input_embeddings(self) -> torch.nn.Module:
1817
+ return self.model.transformer.wte
1818
+
1819
+ def set_input_embeddings(self, value: torch.nn.Module) -> None:
1820
+ self.model.transformer.wte = value
1821
+
1822
+ def get_output_embeddings(self):
1823
+ self.lm_head
1824
+
1825
+ def set_output_embeddings(self, value: torch.nn.Module) -> None:
1826
+ self.lm_head = value
1827
+
1828
+ # Make modules available throught conditional class for BC
1829
+ @property
1830
+ def language_model(self) -> torch.nn.Module:
1831
+ return self.model.transformer
1832
+
1833
+ @property
1834
+ def vision_backbone(self) -> torch.nn.Module:
1835
+ return self.model.vision_backbone
1836
+
1837
+ @can_return_tuple
1838
+ @add_start_docstrings_to_model_forward(MOLMO2_INPUTS_DOCSTRING)
1839
+ def forward(
1840
+ self,
1841
+ input_ids: torch.LongTensor = None,
1842
+ images: Optional[torch.Tensor] = None,
1843
+ image_masks: Optional[torch.Tensor] = None,
1844
+ pooled_patches_idx: Optional[torch.Tensor] = None,
1845
+ attention_mask: Optional[torch.Tensor] = None,
1846
+ position_ids: Optional[torch.LongTensor] = None,
1847
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
1848
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1849
+ labels: Optional[torch.LongTensor] = None,
1850
+ use_cache: Optional[bool] = None,
1851
+ output_attentions: Optional[bool] = None,
1852
+ output_hidden_states: Optional[bool] = None,
1853
+ cache_position: Optional[torch.LongTensor] = None,
1854
+ logits_to_keep: Union[int, torch.Tensor] = 0,
1855
+ **kwargs,
1856
+ ) -> Union[Tuple, MolmoActCausalLMOutputWithPast]:
1857
+ r"""
1858
+ ```python
1859
+ >>> from PIL import Image
1860
+ >>> import requests
1861
+ >>> from transformers import AutoProcessor, MolmoActForActionReasoning
1862
+
1863
+ >>> model = MolmoActForActionReasoning.from_pretrained("...")
1864
+ >>> processor = AutoProcessor.from_pretrained("...")
1865
+
1866
+ >>> prompt = "What's the content of the image?"
1867
+ >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
1868
+ >>> image = Image.open(requests.get(url, stream=True).raw)
1869
+
1870
+ >>> inputs = processor(images=image, text=prompt, apply_chat_template=True, return_tensors="pt")
1871
+
1872
+ >>> # Generate
1873
+ >>> generated_ids = model.generate(**inputs, max_new_tokens=15)
1874
+ >>> generated_tokens = generated_ids[:, inputs['input_ids'].size(1):]
1875
+ >>> processor.batch_decode(generated_tokens, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
1876
+ "The image features a busy city street with a stop sign prominently displayed"
1877
+ ```"""
1878
+ outputs = self.model(
1879
+ input_ids=input_ids,
1880
+ images=images,
1881
+ image_masks=image_masks,
1882
+ pooled_patches_idx=pooled_patches_idx,
1883
+ attention_mask=attention_mask,
1884
+ position_ids=position_ids,
1885
+ past_key_values=past_key_values,
1886
+ inputs_embeds=inputs_embeds,
1887
+ use_cache=use_cache,
1888
+ output_attentions=output_attentions,
1889
+ output_hidden_states=output_hidden_states,
1890
+ cache_position=cache_position,
1891
+ )
1892
+
1893
+ hidden_states = outputs.last_hidden_state
1894
+ slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
1895
+ logits = self.lm_head(hidden_states[:, slice_indices, :])
1896
+
1897
+ loss = None
1898
+ if labels is not None:
1899
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.vocab_size)
1900
+
1901
+ return MolmoActCausalLMOutputWithPast(
1902
+ loss=loss,
1903
+ logits=logits,
1904
+ past_key_values=outputs.past_key_values,
1905
+ hidden_states=outputs.hidden_states,
1906
+ attentions=outputs.attentions,
1907
+ image_hidden_states=outputs.image_hidden_states,
1908
+ )
1909
+
1910
+ # ===== Utilities for action parsing / un-normalization =====
1911
+ def _check_unnorm_key(self, unnorm_key: Optional[str]) -> str:
1912
+ """Validate and resolve which dataset key to use from self.norm_stats."""
1913
+ if not self.norm_stats:
1914
+ raise ValueError("No norm_stats found in config; cannot unnormalize actions.")
1915
+ if unnorm_key is None:
1916
+ if len(self.norm_stats) != 1:
1917
+ raise ValueError(
1918
+ f"Model has multiple dataset stats; please pass `unnorm_key` from {list(self.norm_stats.keys())}"
1919
+ )
1920
+ return next(iter(self.norm_stats.keys()))
1921
+ if unnorm_key not in self.norm_stats:
1922
+ raise ValueError(f"`unnorm_key`={unnorm_key!r} not in {list(self.norm_stats.keys())}")
1923
+ return unnorm_key
1924
+
1925
+ def get_action_dim(self, unnorm_key: Optional[str] = None) -> int:
1926
+ """Return action dimensionality from q01 stats length for the dataset key."""
1927
+ key = self._check_unnorm_key(unnorm_key)
1928
+ return len(self.norm_stats[key]["action"]["q01"])
1929
+
1930
+ def get_action_stats(self, unnorm_key: Optional[str] = None) -> Dict[str, Any]:
1931
+ """Return the full action stats dict for a given dataset key."""
1932
+ key = self._check_unnorm_key(unnorm_key)
1933
+ return self.norm_stats[key]["action"]
1934
+
1935
+ @torch.no_grad()
1936
+ def parse_action(self, text: str, unnorm_key: Optional[str] = None) -> list:
1937
+ """
1938
+ Parse a generated text to extract one 1×D action token list, decode to continuous values,
1939
+ and unnormalize using dataset-specific stats from `config.norm_stats`.
1940
+
1941
+ This follows the pipeline used in `experiments/robot/libero/main_libero_10_evaluation.py`:
1942
+ - Find bracketed token lists following the phrase "the action that the robot should take is" (case-insensitive),
1943
+ falling back to any bracketed list in the text.
1944
+ - Convert token strings → ids via Qwen2Tokenizer.
1945
+ - Map ids → discretized bin indices using: `discretized = vocab_size - token_id - 1` (clipped to bins)
1946
+ - Convert bins → normalized actions in [-1, 1] using precomputed `bin_centers`.
1947
+ - Unnormalize with q01/q99 and optional `mask` from norm_stats.
1948
+
1949
+ Returns:
1950
+ List[float]: unnormalized action vector of length D.
1951
+ """
1952
+ # Resolve action dimension and stats
1953
+ action_dim = self.get_action_dim(unnorm_key)
1954
+ stats = self.get_action_stats(unnorm_key)
1955
+ q01 = np.asarray(stats["q01"], dtype=np.float32)
1956
+ q99 = np.asarray(stats["q99"], dtype=np.float32)
1957
+ mask = np.asarray(stats.get("mask", np.ones_like(q01, dtype=bool)), dtype=bool)
1958
+ # the gripper state should not be normalized
1959
+ mask[-1] = False
1960
+
1961
+ # Lazily load the tokenizer (shared across calls)
1962
+ if self._qwen_tokenizer is None:
1963
+ self._qwen_tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen2-7B")
1964
+
1965
+ token_lists = extract_action_token_lists(text, only_len=action_dim)
1966
+ action_lists = []
1967
+
1968
+ # Choose the first list (temporal aggregation, if any, should be done by the caller)
1969
+ for tokens in token_lists:
1970
+
1971
+ # Convert tokens → ids (replace None with vocab_size to avoid negatives)
1972
+ ids = self._qwen_tokenizer.convert_tokens_to_ids(tokens)
1973
+ ids = [self._qwen_tokenizer.vocab_size if i is None else int(i) for i in ids]
1974
+ ids = np.asarray(ids, dtype=np.int64)
1975
+
1976
+ # ids → discretized bin indices → normalized actions in [-1, 1]
1977
+ discretized = self._qwen_tokenizer.vocab_size - ids
1978
+ discretized = np.clip(discretized - 1, a_min=0, a_max=self.bin_centers.shape[0] - 1)
1979
+ normalized = self.bin_centers[discretized]
1980
+
1981
+ # Unnormalize using per-dimension statistics
1982
+ unnorm = 0.5 * (normalized + 1.0) * (q99 - q01) + q01
1983
+ actions = np.where(mask, unnorm, normalized)
1984
+
1985
+ action_lists.append([float(x) for x in actions])
1986
+
1987
+ # Return a Python list of float actions
1988
+ return action_lists
1989
+
1990
+ @torch.no_grad()
1991
+ def parse_trace(self, text: str) -> list:
1992
+ return extract_trace_lists(text, point_len=2, min_points=1)
1993
+
1994
+ @torch.no_grad()
1995
+ def parse_depth(self, text: str) -> list:
1996
+ return extract_depth_string(text, include_tags=True)
1997
+
1998
+
1999
+ def prepare_inputs_for_generation(
2000
+ self,
2001
+ input_ids: torch.LongTensor,
2002
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
2003
+ inputs_embeds: Optional[torch.FloatTensor] = None,
2004
+ images: Optional[torch.FloatTensor] = None,
2005
+ image_masks: Optional[torch.Tensor] = None,
2006
+ pooled_patches_idx: Optional[torch.Tensor] = None,
2007
+ attention_mask: Optional[torch.Tensor] = None,
2008
+ cache_position: Optional[torch.LongTensor] = None,
2009
+ logits_to_keep: Optional[Union[int, torch.Tensor]] = None,
2010
+ **kwargs,
2011
+ ):
2012
+
2013
+ model_inputs = super().prepare_inputs_for_generation(
2014
+ input_ids,
2015
+ past_key_values=past_key_values,
2016
+ inputs_embeds=inputs_embeds,
2017
+ attention_mask=attention_mask,
2018
+ cache_position=cache_position,
2019
+ logits_to_keep=logits_to_keep,
2020
+ **kwargs,
2021
+ )
2022
+
2023
+ if cache_position[0] == 0:
2024
+ model_inputs["images"] = images
2025
+ model_inputs["pooled_patches_idx"] = pooled_patches_idx
2026
+ model_inputs["image_masks"] = image_masks
2027
+
2028
+ return model_inputs
2029
+
2030
+ def _update_model_kwargs_for_generation(
2031
+ self,
2032
+ outputs: ModelOutput,
2033
+ model_kwargs: Dict[str, Any],
2034
+ is_encoder_decoder: bool = False,
2035
+ num_new_tokens: int = 1,
2036
+ ) -> Dict[str, Any]:
2037
+ if model_kwargs["use_cache"] and "images" in model_kwargs:
2038
+ # After the first step, no long pass the images into forward since the images tokens
2039
+ # are already cached
2040
+ for k in ["images", "image_masks", "pooled_patches_idx"]:
2041
+ del model_kwargs[k]
2042
+ return super()._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder, num_new_tokens)
2043
+
2044
+ @staticmethod
2045
+ def _prepare_4d_causal_attention_mask_with_cache_position(
2046
+ attention_mask: torch.Tensor,
2047
+ sequence_length: int,
2048
+ target_length: int,
2049
+ dtype: torch.dtype,
2050
+ cache_position: torch.Tensor,
2051
+ batch_size: int,
2052
+ **kwargs,
2053
+ ):
2054
+ """
2055
+ Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
2056
+ `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.
2057
+
2058
+ Args:
2059
+ attention_mask (`torch.Tensor`):
2060
+ A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
2061
+ `(batch_size, 1, query_length, key_value_length)`.
2062
+ sequence_length (`int`):
2063
+ The sequence length being processed.
2064
+ target_length (`int`):
2065
+ The target length: when generating with static cache, the mask should be as long as the static cache,
2066
+ to account for the 0 padding, the part of the cache that is not filled yet.
2067
+ dtype (`torch.dtype`):
2068
+ The dtype to use for the 4D attention mask.
2069
+ cache_position (`torch.Tensor`):
2070
+ Indices depicting the position of the input sequence tokens in the sequence.
2071
+ batch_size (`torch.Tensor`):
2072
+ Batch size.
2073
+ """
2074
+ if attention_mask is not None and attention_mask.dim() == 4:
2075
+ # In this case we assume that the mask comes already in inverted form and requires no inversion or slicing.
2076
+ causal_mask = attention_mask
2077
+ else:
2078
+ min_dtype = torch.finfo(dtype).min
2079
+ causal_mask = torch.full(
2080
+ (sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=cache_position.device
2081
+ )
2082
+ if sequence_length != 1:
2083
+ causal_mask = torch.triu(causal_mask, diagonal=1)
2084
+ causal_mask *= torch.arange(target_length, device=cache_position.device) > cache_position.reshape(-1, 1)
2085
+ causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1)
2086
+ if attention_mask is not None:
2087
+ causal_mask = causal_mask.clone() # copy to contiguous memory for in-place edit
2088
+ mask_length = attention_mask.shape[-1]
2089
+ padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
2090
+ causal_mask.device
2091
+ )
2092
+ padding_mask = padding_mask == 0
2093
+ causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
2094
+ padding_mask, min_dtype
2095
+ )
2096
+
2097
+ return causal_mask
2098
+
2099
+
2100
+ # Always register for multi-modal features
2101
+ AutoModelForImageTextToText.register(MolmoActConfig, MolmoActForActionReasoning)
2102
+ AutoModelForCausalLM.register(MolmoActLlmConfig, MolmoActForCausalLM)
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_processing_molmoact.MolmoActImageProcessor",
4
+ "AutoProcessor": "processing_molmoact.MolmoActProcessor"
5
+ },
6
+ "base_image_input_size": [
7
+ 378,
8
+ 378
9
+ ],
10
+ "crop_mode": "overlap-and-resize-c2",
11
+ "do_convert_rgb": true,
12
+ "do_pad": true,
13
+ "image_patch_size": 14,
14
+ "image_pooling_h": 2,
15
+ "image_pooling_w": 2,
16
+ "image_processor_type": "MolmoActImageProcessor",
17
+ "max_crops": 8,
18
+ "max_multi_image_crops": 6,
19
+ "normalize_mode": "siglip",
20
+ "overlap_margins": [
21
+ 4,
22
+ 4
23
+ ],
24
+ "pad_value": 0.0,
25
+ "processor_class": "MolmoActProcessor",
26
+ "resize_mode": "siglip"
27
+ }
processing_molmoact.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Processor class for MolmoAct.
3
+ """
4
+ from typing import List, Optional, Union, Dict, Tuple
5
+
6
+ import PIL
7
+ from PIL import ImageFile, ImageOps
8
+
9
+ try:
10
+ from typing import Unpack
11
+ except ImportError:
12
+ from typing_extensions import Unpack
13
+
14
+ import numpy as np
15
+ import torch
16
+
17
+ from transformers.image_utils import ImageInput
18
+ from transformers.processing_utils import (
19
+ ProcessingKwargs,
20
+ ProcessorMixin,
21
+ )
22
+ from transformers.feature_extraction_utils import BatchFeature
23
+ from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
24
+ from transformers.utils import logging
25
+
26
+ from transformers import AutoTokenizer
27
+ from .image_processing_molmoact import MolmoActImagesKwargs, MolmoActImageProcessor
28
+
29
+
30
+ logger = logging.get_logger(__name__)
31
+
32
+
33
+ # Special tokens, these should be present in any tokenizer we use since the preprocessor uses them
34
+ IMAGE_PATCH_TOKEN = f"<im_patch>" # Where to insert high-res tokens
35
+ IMAGE_LOW_RES_TOKEN = f"<im_low>" # Where to insert low-res tokens
36
+ IM_START_TOKEN = f"<im_start>"
37
+ IM_END_TOKEN = f"<im_end>"
38
+ IM_COL_TOKEN = f"<im_col>"
39
+ IMAGE_PROMPT = "<|image|>"
40
+
41
+ EXTRA_TOKENS = (IM_START_TOKEN, IM_END_TOKEN, IMAGE_PATCH_TOKEN,
42
+ IM_COL_TOKEN, IMAGE_PROMPT, IMAGE_LOW_RES_TOKEN)
43
+
44
+
45
+ DEMO_STYLES = [
46
+ "point_count",
47
+ "pointing",
48
+ "cosyn_point",
49
+ "user_qa",
50
+ "long_caption",
51
+ "short_caption",
52
+ "correction_qa",
53
+ "demo",
54
+ "android_control",
55
+ ]
56
+
57
+
58
+ def setup_pil():
59
+ PIL.Image.MAX_IMAGE_PIXELS = None
60
+ ImageFile.LOAD_TRUNCATED_IMAGES = True
61
+
62
+
63
+ def get_special_token_ids(tokenizer: AutoTokenizer) -> Dict[str, int]:
64
+ ids = tokenizer.encode("".join(EXTRA_TOKENS), add_special_tokens=False)
65
+ assert len(ids) == len(EXTRA_TOKENS)
66
+ return {k: i for k, i in zip(EXTRA_TOKENS, ids)}
67
+
68
+
69
+ def load_image(image: Union[PIL.Image.Image, np.ndarray]) -> np.ndarray:
70
+ """Load image"""
71
+ setup_pil()
72
+ if isinstance(image, PIL.Image.Image):
73
+ image = image.convert("RGB")
74
+ image = ImageOps.exif_transpose(image)
75
+ return np.array(image)
76
+ elif isinstance(image, np.ndarray):
77
+ assert len(image.shape) == 3, "Image should have 3 dimensions"
78
+ assert image.shape[2] == 3, "Image should have 3 channels"
79
+ assert image.dtype == np.uint8, "Image should have uint8 type"
80
+ return image
81
+ else:
82
+ raise ValueError("Image should be PIL.Image or np.ndarray")
83
+
84
+
85
+ class MolmoActProcessorKwargs(ProcessingKwargs, total=False):
86
+ """MolmoAct processor kwargs"""
87
+ images_kwargs: MolmoActImagesKwargs
88
+ _defaults = {
89
+ "text_kwargs": {
90
+ "padding": False,
91
+ },
92
+ }
93
+
94
+
95
+ class MolmoActProcessor(ProcessorMixin):
96
+ attributes = ["image_processor", "tokenizer"]
97
+ optional_attributes = [
98
+ "chat_template",
99
+ "prompt_templates",
100
+ "message_format",
101
+ "system_prompt",
102
+ "style",
103
+ "always_start_with_space",
104
+ "default_inference_len",
105
+ "use_col_tokens",
106
+ "image_padding_mask",
107
+ ]
108
+ image_processor_class = "AutoImageProcessor"
109
+ tokenizer_class = "AutoTokenizer"
110
+
111
+ def __init__(
112
+ self,
113
+ image_processor: MolmoActImageProcessor = None,
114
+ tokenizer: AutoTokenizer = None,
115
+ chat_template: Optional[str] = None,
116
+ prompt_templates: Optional[str] = "uber_model",
117
+ message_format: Optional[str] = "role",
118
+ system_prompt: Optional[str] = "demo_or_style",
119
+ style: Optional[str] = "demo",
120
+ always_start_with_space: Optional[bool] = False,
121
+ default_inference_len: Optional[int] = 65,
122
+ use_col_tokens: Optional[bool] = True,
123
+ image_padding_mask: bool = False,
124
+ **kwargs
125
+ ) -> None:
126
+ if tokenizer.padding_side != "left":
127
+ logger.warning(f"Tokenizer {tokenizer.name_or_path} is not left-padded, padding side will be set to left")
128
+ tokenizer.padding_side = "left" # type: ignore
129
+ super().__init__(
130
+ image_processor,
131
+ tokenizer,
132
+ chat_template=chat_template,
133
+ prompt_templates=prompt_templates,
134
+ message_format=message_format,
135
+ system_prompt=system_prompt,
136
+ style=style,
137
+ always_start_with_space=always_start_with_space,
138
+ default_inference_len=default_inference_len,
139
+ use_col_tokens=use_col_tokens,
140
+ image_padding_mask=image_padding_mask,
141
+ )
142
+ self._special_tokens = None
143
+
144
+ @property
145
+ def special_token_ids(self):
146
+ if self._special_tokens is None:
147
+ self._special_tokens = get_special_token_ids(self.tokenizer)
148
+ return self._special_tokens
149
+
150
+ def get_user_prompt(self, text: TextInput) -> str:
151
+ """Get user prompt"""
152
+ if self.prompt_templates == "none":
153
+ return ""
154
+ elif self.prompt_templates == "uber_model":
155
+ return text
156
+ else:
157
+ raise NotImplementedError(self.prompt_templates)
158
+
159
+ def get_prefix(self) -> str:
160
+ """Get prefix"""
161
+ if self.system_prompt == "style_and_length": # captioner
162
+ assert self.style in ["long_caption"]
163
+ style = self.style
164
+ n = None if self.default_inference_len is None else str(self.default_inference_len)
165
+ if n is not None and len(n) > 0: # allow empty string to signal unconditioned
166
+ prefix = style + " " + n + ":"
167
+ else:
168
+ prefix = style + " :"
169
+ elif self.system_prompt == "demo_or_style": # demo model
170
+ if self.style in DEMO_STYLES:
171
+ prefix = ""
172
+ else:
173
+ prefix = self.style + ":"
174
+ else:
175
+ raise NotImplementedError(self.system_prompt)
176
+ return prefix
177
+
178
+ def format_prompt(self, prompt: str) -> str:
179
+ """Format prompt"""
180
+ if self.message_format == "none":
181
+ pass
182
+ elif self.message_format == "role":
183
+ prompt = "User: " + prompt + " Assistant:"
184
+ else:
185
+ raise NotImplementedError(self.message_format)
186
+
187
+ if self.always_start_with_space:
188
+ prompt = " " + prompt
189
+
190
+ return prompt
191
+
192
+ def get_prompt(self, text: TextInput) -> str:
193
+ prompt = self.get_user_prompt(text)
194
+ if self.system_prompt and self.system_prompt != "none":
195
+ prefix = self.get_prefix()
196
+ if len(prefix) > 0 and len(prompt) > 0:
197
+ prompt = prefix + " " + prompt
198
+ elif len(prefix) > 0:
199
+ prompt = prefix
200
+ prompt = self.format_prompt(prompt)
201
+ return prompt
202
+
203
+ def get_image_tokens(self, image_grid: np.ndarray):
204
+ joint = []
205
+ for h, w in image_grid:
206
+ per_row = np.full(w, IMAGE_PATCH_TOKEN)
207
+ if self.use_col_tokens:
208
+ per_row = np.concatenate([per_row, [IM_COL_TOKEN]], 0)
209
+ extra_tokens = np.tile(per_row, [h])
210
+ joint += [
211
+ [IM_START_TOKEN],
212
+ extra_tokens,
213
+ [IM_END_TOKEN],
214
+ ]
215
+ return np.concatenate(joint)
216
+
217
+ def insert_bos_numpy(
218
+ self,
219
+ input_ids: np.ndarray,
220
+ attention_mask: np.ndarray,
221
+ bos_token_id: int,
222
+ pad_token_id: int,
223
+ ):
224
+ """
225
+ Args:
226
+ input_ids: [B, S] array with left padding
227
+ attention_mask: [B, S] array (0 for pad, 1 for valid)
228
+ bos_token_id: int
229
+ pad_token_id: int
230
+ Returns:
231
+ input_ids_out: [B, S] or [B, S+1] array with bos inserted if needed
232
+ attention_mask_out: same shape as input_ids_out
233
+ """
234
+
235
+ need_to_expand = len(input_ids.shape) == 1
236
+ if need_to_expand:
237
+ input_ids = input_ids[None, :]
238
+ attention_mask = attention_mask[None, :]
239
+
240
+ B, S = input_ids.shape
241
+
242
+ # Handle zero-length sequence
243
+ if S == 0:
244
+ new_input_ids = np.full((B, 1), bos_token_id, dtype=input_ids.dtype)
245
+ new_attention_mask = np.ones((B, 1), dtype=attention_mask.dtype)
246
+ if need_to_expand:
247
+ new_input_ids = new_input_ids[0]
248
+ new_attention_mask = new_attention_mask[0]
249
+ return new_input_ids, new_attention_mask
250
+
251
+ first_valid_index = (attention_mask == 1).argmax(axis=-1) # [B]
252
+ bos_already_present = np.all(input_ids[np.arange(B), first_valid_index] == bos_token_id)
253
+
254
+ if bos_already_present:
255
+ if need_to_expand:
256
+ input_ids = input_ids[0]
257
+ attention_mask = attention_mask[0]
258
+ return input_ids, attention_mask
259
+ else:
260
+ new_input_ids = np.full((B, S+1), pad_token_id, dtype=input_ids.dtype)
261
+ new_attention_mask = np.zeros((B, S+1), dtype=attention_mask.dtype)
262
+
263
+ src_idx = np.tile(np.arange(S), (B, 1)) # [B, S]
264
+ valid_mask = src_idx >= first_valid_index[:, None] # [B, S]
265
+ tgt_idx = src_idx + 1 # shit right
266
+ batch_idx = np.tile(np.arange(B)[:, None], (1, S)) # [B, S]
267
+
268
+ # flatten valid_positions
269
+ flat_vals = input_ids[valid_mask]
270
+ flat_batch = batch_idx[valid_mask]
271
+ flat_tgt = tgt_idx[valid_mask]
272
+
273
+ new_input_ids[flat_batch, flat_tgt] = flat_vals
274
+ new_attention_mask[flat_batch, flat_tgt] = 1
275
+
276
+ insert_pos = first_valid_index
277
+ new_input_ids[np.arange(B), insert_pos] = bos_token_id
278
+ new_attention_mask[np.arange(B), insert_pos] = 1
279
+
280
+ if need_to_expand:
281
+ new_input_ids = new_input_ids[0]
282
+ new_attention_mask = new_attention_mask[0]
283
+
284
+ return new_input_ids, new_attention_mask
285
+
286
+ def insert_bos_torch(
287
+ self,
288
+ input_ids: torch.Tensor,
289
+ attention_mask: torch.Tensor,
290
+ bos_token_id: int,
291
+ pad_token_id: int,
292
+ ):
293
+ """
294
+ Args:
295
+ input_ids: [B, S] tensor with left padding
296
+ attention_mask: [B, S] tensor (0 for pad, 1 for valid)
297
+ bos_token_id: int
298
+ pad_token_id: int
299
+ Returns:
300
+ input_ids_out: [B, S] or [B, S+1] tensor with bos inserted if needed
301
+ attention_mask_out: same shape as input_ids_out
302
+ """
303
+
304
+ B, S = input_ids.shape
305
+ device = input_ids.device
306
+
307
+ # Handle zero-length sequence
308
+ if S == 0:
309
+ new_input_ids = torch.full((B, 1), bos_token_id, dtype=input_ids.dtype, device=device)
310
+ new_attention_mask = torch.ones((B, 1), dtype=attention_mask.dtype, device=device)
311
+ return new_input_ids, new_attention_mask
312
+
313
+ first_valid_index = (attention_mask == 1).long().argmax(dim=-1) # [B]
314
+ bos_already_present = (input_ids[torch.arange(B), first_valid_index] == bos_token_id).all()
315
+
316
+ if bos_already_present:
317
+ return input_ids, attention_mask
318
+ else:
319
+ new_input_ids = torch.full((B, S+1), pad_token_id, dtype=input_ids.dtype, device=device)
320
+ new_attention_mask = torch.zeros((B, S+1), dtype=attention_mask.dtype, device=device)
321
+
322
+ src_idx = torch.arange(S, device=device).expand(B, S) # [B, S]
323
+ valid_mask = src_idx >= first_valid_index.unsqueeze(1) # [B, S]
324
+ tgt_idx = src_idx + 1 # shift right
325
+ batch_idx = torch.arange(B, device=device).unsqueeze(1).expand_as(src_idx)
326
+
327
+ flat_vals = input_ids[valid_mask]
328
+ flat_batch = batch_idx[valid_mask]
329
+ flat_tgt = tgt_idx[valid_mask]
330
+
331
+ new_input_ids[flat_batch, flat_tgt] = flat_vals
332
+ new_attention_mask[flat_batch, flat_tgt] = 1
333
+
334
+ insert_pos = first_valid_index
335
+ batch_indices = torch.arange(B, device=device)
336
+ new_input_ids[batch_indices, insert_pos] = bos_token_id
337
+ new_attention_mask[batch_indices, insert_pos] = 1
338
+
339
+ return new_input_ids, new_attention_mask
340
+
341
+ def __call__(
342
+ self,
343
+ text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
344
+ images: Union[ImageInput, List[ImageInput]] = None,
345
+ apply_chat_template: bool = False,
346
+ **kwargs: Unpack[MolmoActProcessorKwargs],
347
+ ) -> BatchFeature:
348
+ if images is None and text is None:
349
+ raise ValueError("You have to specify at least one of `images` or `text`.")
350
+
351
+ output_kwargs = self._merge_kwargs(
352
+ MolmoActProcessorKwargs,
353
+ tokenizer_init_kwargs=self.tokenizer.init_kwargs,
354
+ **kwargs,
355
+ )
356
+
357
+ if isinstance(text, (list, tuple)) and isinstance(images, (list, tuple)):
358
+ if len(text) != len(images):
359
+ raise ValueError("You have to provide the same number of text and images")
360
+ if len(text) > 1 and not output_kwargs["text_kwargs"].get("padding", False):
361
+ raise ValueError("You have to specify padding when you have multiple text inputs")
362
+
363
+ if isinstance(text, str):
364
+ text = [text]
365
+ elif not isinstance(text, list) and not isinstance(text[0], str):
366
+ raise ValueError("Invalid input text. Please provide a string, or a list of strings")
367
+
368
+ if images is not None:
369
+ image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
370
+ else:
371
+ image_inputs = {}
372
+
373
+ if apply_chat_template:
374
+ text = [self.get_prompt(t) for t in text]
375
+
376
+ prompt_strings = text
377
+ if image_inputs.get("images", None) is not None:
378
+
379
+ prompt_strings = []
380
+ for idx, image_grids in enumerate(image_inputs.pop("image_grids")):
381
+ if isinstance(image_grids, torch.Tensor):
382
+ image_grids = image_grids.cpu().numpy()
383
+ if isinstance(images, (list, tuple)) and isinstance(images[idx], (list, tuple)):
384
+ image_grids = image_grids[~np.all(image_grids == -1, axis=-1)]
385
+ offset = 2 if len(images[idx]) < len(image_grids) else 1 # whether to use both low and high res images
386
+ all_image_strings = []
387
+ for i in range(0, len(image_grids), offset):
388
+ image_grids_i = image_grids[i:i+offset]
389
+ image_tokens = self.get_image_tokens(image_grids_i)
390
+ img_ix = i // offset
391
+ all_image_strings.append(f"Image {img_ix + 1}" + "".join(image_tokens))
392
+ image_string = "".join(all_image_strings)
393
+ prompt_strings.append(image_string + text[idx])
394
+ else:
395
+ image_grids = image_grids[~np.all(image_grids == -1, axis=-1)]
396
+ assert len(image_grids) in [1, 2], "Only one or two crops are supported for single image inputs"
397
+ image_tokens = self.get_image_tokens(image_grids)
398
+ image_string = "".join(image_tokens)
399
+ prompt_strings.append(image_string + text[idx])
400
+
401
+ text_inputs = self.tokenizer(prompt_strings, **output_kwargs["text_kwargs"])
402
+
403
+ input_ids = text_inputs["input_ids"]
404
+ attention_mask = text_inputs["attention_mask"]
405
+
406
+ is_list = isinstance(input_ids, (list, tuple))
407
+ if is_list:
408
+ input_ids = np.array(input_ids)
409
+ attention_mask = np.array(attention_mask)
410
+
411
+ use_numpy = isinstance(attention_mask, np.ndarray)
412
+
413
+ if use_numpy and np.issubdtype(input_ids.dtype, np.floating):
414
+ input_ids = input_ids.astype(np.int64)
415
+ attention_mask = attention_mask.astype(np.int64)
416
+ elif not use_numpy and torch.is_floating_point(input_ids):
417
+ input_ids = input_ids.to(torch.int64)
418
+ attention_mask = attention_mask.to(torch.int64)
419
+
420
+ bos = self.tokenizer.bos_token_id or self.tokenizer.eos_token_id
421
+ if use_numpy:
422
+ input_ids, attention_mask = self.insert_bos_numpy(
423
+ input_ids, attention_mask, bos, self.tokenizer.pad_token_id
424
+ )
425
+ else:
426
+ input_ids, attention_mask = self.insert_bos_torch(
427
+ input_ids, attention_mask, bos, self.tokenizer.pad_token_id
428
+ )
429
+ if is_list:
430
+ input_ids = input_ids.tolist() # type: ignore
431
+ attention_mask = attention_mask.tolist() # type: ignore
432
+ text_inputs["input_ids"] = input_ids
433
+ text_inputs["attention_mask"] = attention_mask
434
+
435
+ if kwargs.get("device", None) is not None:
436
+ text_inputs = text_inputs.to(device=kwargs.get("device"), non_blocking=True)
437
+ # there is no bos token in Qwen tokenizer
438
+ return BatchFeature(
439
+ data={**text_inputs, **image_inputs}, tensor_type=output_kwargs["common_kwargs"]["return_tensors"]
440
+ )
441
+
442
+ def batch_decode(self, *args, **kwargs):
443
+ """
444
+ This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
445
+ refer to the docstring of this method for more information.
446
+ """
447
+ return self.tokenizer.batch_decode(*args, **kwargs)
448
+
449
+ def decode(self, *args, **kwargs):
450
+ """
451
+ This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
452
+ the docstring of this method for more information.
453
+ """
454
+ return self.tokenizer.decode(*args, **kwargs)
455
+
456
+ @property
457
+ def model_input_names(self):
458
+ tokenizer_input_names = self.tokenizer.model_input_names
459
+ image_processor_input_names = self.image_processor.model_input_names
460
+ return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
461
+
462
+
463
+ MolmoActProcessor.register_for_auto_class()
processor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "always_start_with_space": false,
3
+ "auto_map": {
4
+ "AutoProcessor": "processing_molmoact.MolmoActProcessor"
5
+ },
6
+ "default_inference_len": 65,
7
+ "image_padding_mask": false,
8
+ "message_format": "none",
9
+ "processor_class": "MolmoActProcessor",
10
+ "prompt_templates": "none",
11
+ "style": "long_caption",
12
+ "system_prompt": "style_and_length",
13
+ "use_col_tokens": true
14
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,2854 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "|<EXTRA_TOKENS_0>|",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "|<EXTRA_TOKENS_1>|",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "|<EXTRA_TOKENS_2>|",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "|<EXTRA_TOKENS_3>|",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "|<EXTRA_TOKENS_4>|",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "|<EXTRA_TOKENS_5>|",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "|<EXTRA_TOKENS_6>|",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "|<EXTRA_TOKENS_7>|",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "|<EXTRA_TOKENS_8>|",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "|<EXTRA_TOKENS_9>|",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "|<EXTRA_TOKENS_10>|",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "|<EXTRA_TOKENS_11>|",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "|<EXTRA_TOKENS_12>|",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "|<EXTRA_TOKENS_13>|",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "|<EXTRA_TOKENS_14>|",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "|<EXTRA_TOKENS_15>|",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "|<EXTRA_TOKENS_16>|",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "|<EXTRA_TOKENS_17>|",
124
+ "lstrip": false,
125
+ "normalized": false,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "|<EXTRA_TOKENS_18>|",
131
+ "lstrip": false,
132
+ "normalized": false,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "|<EXTRA_TOKENS_19>|",
138
+ "lstrip": false,
139
+ "normalized": false,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "|<EXTRA_TOKENS_20>|",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "|<EXTRA_TOKENS_21>|",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "|<EXTRA_TOKENS_22>|",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "|<EXTRA_TOKENS_23>|",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "|<EXTRA_TOKENS_24>|",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "|<EXTRA_TOKENS_25>|",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "|<EXTRA_TOKENS_26>|",
187
+ "lstrip": false,
188
+ "normalized": false,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "|<EXTRA_TOKENS_27>|",
194
+ "lstrip": false,
195
+ "normalized": false,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "|<EXTRA_TOKENS_28>|",
201
+ "lstrip": false,
202
+ "normalized": false,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "|<EXTRA_TOKENS_29>|",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "|<EXTRA_TOKENS_30>|",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "|<EXTRA_TOKENS_31>|",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "|<EXTRA_TOKENS_32>|",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "|<EXTRA_TOKENS_33>|",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "|<EXTRA_TOKENS_34>|",
243
+ "lstrip": false,
244
+ "normalized": false,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "|<EXTRA_TOKENS_35>|",
250
+ "lstrip": false,
251
+ "normalized": false,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "|<EXTRA_TOKENS_36>|",
257
+ "lstrip": false,
258
+ "normalized": false,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "|<EXTRA_TOKENS_37>|",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "|<EXTRA_TOKENS_38>|",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "|<EXTRA_TOKENS_39>|",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "|<EXTRA_TOKENS_40>|",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "|<EXTRA_TOKENS_41>|",
292
+ "lstrip": false,
293
+ "normalized": false,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "|<EXTRA_TOKENS_42>|",
299
+ "lstrip": false,
300
+ "normalized": false,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "|<EXTRA_TOKENS_43>|",
306
+ "lstrip": false,
307
+ "normalized": false,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "|<EXTRA_TOKENS_44>|",
313
+ "lstrip": false,
314
+ "normalized": false,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "|<EXTRA_TOKENS_45>|",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "|<EXTRA_TOKENS_46>|",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "|<EXTRA_TOKENS_47>|",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "|<EXTRA_TOKENS_48>|",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "|<EXTRA_TOKENS_49>|",
348
+ "lstrip": false,
349
+ "normalized": false,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "|<EXTRA_TOKENS_50>|",
355
+ "lstrip": false,
356
+ "normalized": false,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "|<EXTRA_TOKENS_51>|",
362
+ "lstrip": false,
363
+ "normalized": false,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "|<EXTRA_TOKENS_52>|",
369
+ "lstrip": false,
370
+ "normalized": false,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "|<EXTRA_TOKENS_53>|",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "|<EXTRA_TOKENS_54>|",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "|<EXTRA_TOKENS_55>|",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ },
395
+ {
396
+ "content": "|<EXTRA_TOKENS_56>|",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
+ {
403
+ "content": "|<EXTRA_TOKENS_57>|",
404
+ "lstrip": false,
405
+ "normalized": false,
406
+ "rstrip": false,
407
+ "single_word": false
408
+ },
409
+ {
410
+ "content": "|<EXTRA_TOKENS_58>|",
411
+ "lstrip": false,
412
+ "normalized": false,
413
+ "rstrip": false,
414
+ "single_word": false
415
+ },
416
+ {
417
+ "content": "|<EXTRA_TOKENS_59>|",
418
+ "lstrip": false,
419
+ "normalized": false,
420
+ "rstrip": false,
421
+ "single_word": false
422
+ },
423
+ {
424
+ "content": "|<EXTRA_TOKENS_60>|",
425
+ "lstrip": false,
426
+ "normalized": false,
427
+ "rstrip": false,
428
+ "single_word": false
429
+ },
430
+ {
431
+ "content": "|<EXTRA_TOKENS_61>|",
432
+ "lstrip": false,
433
+ "normalized": false,
434
+ "rstrip": false,
435
+ "single_word": false
436
+ },
437
+ {
438
+ "content": "|<EXTRA_TOKENS_62>|",
439
+ "lstrip": false,
440
+ "normalized": false,
441
+ "rstrip": false,
442
+ "single_word": false
443
+ },
444
+ {
445
+ "content": "|<EXTRA_TOKENS_63>|",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false
450
+ },
451
+ {
452
+ "content": "|<EXTRA_TOKENS_64>|",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false
457
+ },
458
+ {
459
+ "content": "|<EXTRA_TOKENS_65>|",
460
+ "lstrip": false,
461
+ "normalized": false,
462
+ "rstrip": false,
463
+ "single_word": false
464
+ },
465
+ {
466
+ "content": "|<EXTRA_TOKENS_66>|",
467
+ "lstrip": false,
468
+ "normalized": false,
469
+ "rstrip": false,
470
+ "single_word": false
471
+ },
472
+ {
473
+ "content": "|<EXTRA_TOKENS_67>|",
474
+ "lstrip": false,
475
+ "normalized": false,
476
+ "rstrip": false,
477
+ "single_word": false
478
+ },
479
+ {
480
+ "content": "|<EXTRA_TOKENS_68>|",
481
+ "lstrip": false,
482
+ "normalized": false,
483
+ "rstrip": false,
484
+ "single_word": false
485
+ },
486
+ {
487
+ "content": "|<EXTRA_TOKENS_69>|",
488
+ "lstrip": false,
489
+ "normalized": false,
490
+ "rstrip": false,
491
+ "single_word": false
492
+ },
493
+ {
494
+ "content": "|<EXTRA_TOKENS_70>|",
495
+ "lstrip": false,
496
+ "normalized": false,
497
+ "rstrip": false,
498
+ "single_word": false
499
+ },
500
+ {
501
+ "content": "|<EXTRA_TOKENS_71>|",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false
506
+ },
507
+ {
508
+ "content": "|<EXTRA_TOKENS_72>|",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false
513
+ },
514
+ {
515
+ "content": "|<EXTRA_TOKENS_73>|",
516
+ "lstrip": false,
517
+ "normalized": false,
518
+ "rstrip": false,
519
+ "single_word": false
520
+ },
521
+ {
522
+ "content": "|<EXTRA_TOKENS_74>|",
523
+ "lstrip": false,
524
+ "normalized": false,
525
+ "rstrip": false,
526
+ "single_word": false
527
+ },
528
+ {
529
+ "content": "|<EXTRA_TOKENS_75>|",
530
+ "lstrip": false,
531
+ "normalized": false,
532
+ "rstrip": false,
533
+ "single_word": false
534
+ },
535
+ {
536
+ "content": "|<EXTRA_TOKENS_76>|",
537
+ "lstrip": false,
538
+ "normalized": false,
539
+ "rstrip": false,
540
+ "single_word": false
541
+ },
542
+ {
543
+ "content": "|<EXTRA_TOKENS_77>|",
544
+ "lstrip": false,
545
+ "normalized": false,
546
+ "rstrip": false,
547
+ "single_word": false
548
+ },
549
+ {
550
+ "content": "|<EXTRA_TOKENS_78>|",
551
+ "lstrip": false,
552
+ "normalized": false,
553
+ "rstrip": false,
554
+ "single_word": false
555
+ },
556
+ {
557
+ "content": "|<EXTRA_TOKENS_79>|",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false
562
+ },
563
+ {
564
+ "content": "|<EXTRA_TOKENS_80>|",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false
569
+ },
570
+ {
571
+ "content": "|<EXTRA_TOKENS_81>|",
572
+ "lstrip": false,
573
+ "normalized": false,
574
+ "rstrip": false,
575
+ "single_word": false
576
+ },
577
+ {
578
+ "content": "|<EXTRA_TOKENS_82>|",
579
+ "lstrip": false,
580
+ "normalized": false,
581
+ "rstrip": false,
582
+ "single_word": false
583
+ },
584
+ {
585
+ "content": "|<EXTRA_TOKENS_83>|",
586
+ "lstrip": false,
587
+ "normalized": false,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ {
592
+ "content": "|<EXTRA_TOKENS_84>|",
593
+ "lstrip": false,
594
+ "normalized": false,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ {
599
+ "content": "|<EXTRA_TOKENS_85>|",
600
+ "lstrip": false,
601
+ "normalized": false,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ {
606
+ "content": "|<EXTRA_TOKENS_86>|",
607
+ "lstrip": false,
608
+ "normalized": false,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ {
613
+ "content": "|<EXTRA_TOKENS_87>|",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ },
619
+ {
620
+ "content": "|<EXTRA_TOKENS_88>|",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false
625
+ },
626
+ {
627
+ "content": "|<EXTRA_TOKENS_89>|",
628
+ "lstrip": false,
629
+ "normalized": false,
630
+ "rstrip": false,
631
+ "single_word": false
632
+ },
633
+ {
634
+ "content": "|<EXTRA_TOKENS_90>|",
635
+ "lstrip": false,
636
+ "normalized": false,
637
+ "rstrip": false,
638
+ "single_word": false
639
+ },
640
+ {
641
+ "content": "|<EXTRA_TOKENS_91>|",
642
+ "lstrip": false,
643
+ "normalized": false,
644
+ "rstrip": false,
645
+ "single_word": false
646
+ },
647
+ {
648
+ "content": "|<EXTRA_TOKENS_92>|",
649
+ "lstrip": false,
650
+ "normalized": false,
651
+ "rstrip": false,
652
+ "single_word": false
653
+ },
654
+ {
655
+ "content": "|<EXTRA_TOKENS_93>|",
656
+ "lstrip": false,
657
+ "normalized": false,
658
+ "rstrip": false,
659
+ "single_word": false
660
+ },
661
+ {
662
+ "content": "|<EXTRA_TOKENS_94>|",
663
+ "lstrip": false,
664
+ "normalized": false,
665
+ "rstrip": false,
666
+ "single_word": false
667
+ },
668
+ {
669
+ "content": "|<EXTRA_TOKENS_95>|",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false
674
+ },
675
+ {
676
+ "content": "|<EXTRA_TOKENS_96>|",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false
681
+ },
682
+ {
683
+ "content": "|<EXTRA_TOKENS_97>|",
684
+ "lstrip": false,
685
+ "normalized": false,
686
+ "rstrip": false,
687
+ "single_word": false
688
+ },
689
+ {
690
+ "content": "|<EXTRA_TOKENS_98>|",
691
+ "lstrip": false,
692
+ "normalized": false,
693
+ "rstrip": false,
694
+ "single_word": false
695
+ },
696
+ {
697
+ "content": "|<EXTRA_TOKENS_99>|",
698
+ "lstrip": false,
699
+ "normalized": false,
700
+ "rstrip": false,
701
+ "single_word": false
702
+ },
703
+ {
704
+ "content": "|<EXTRA_TOKENS_100>|",
705
+ "lstrip": false,
706
+ "normalized": false,
707
+ "rstrip": false,
708
+ "single_word": false
709
+ },
710
+ {
711
+ "content": "|<EXTRA_TOKENS_101>|",
712
+ "lstrip": false,
713
+ "normalized": false,
714
+ "rstrip": false,
715
+ "single_word": false
716
+ },
717
+ {
718
+ "content": "|<EXTRA_TOKENS_102>|",
719
+ "lstrip": false,
720
+ "normalized": false,
721
+ "rstrip": false,
722
+ "single_word": false
723
+ },
724
+ {
725
+ "content": "|<EXTRA_TOKENS_103>|",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false
730
+ },
731
+ {
732
+ "content": "|<EXTRA_TOKENS_104>|",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false
737
+ },
738
+ {
739
+ "content": "|<EXTRA_TOKENS_105>|",
740
+ "lstrip": false,
741
+ "normalized": false,
742
+ "rstrip": false,
743
+ "single_word": false
744
+ },
745
+ {
746
+ "content": "|<EXTRA_TOKENS_106>|",
747
+ "lstrip": false,
748
+ "normalized": false,
749
+ "rstrip": false,
750
+ "single_word": false
751
+ },
752
+ {
753
+ "content": "|<EXTRA_TOKENS_107>|",
754
+ "lstrip": false,
755
+ "normalized": false,
756
+ "rstrip": false,
757
+ "single_word": false
758
+ },
759
+ {
760
+ "content": "|<EXTRA_TOKENS_108>|",
761
+ "lstrip": false,
762
+ "normalized": false,
763
+ "rstrip": false,
764
+ "single_word": false
765
+ },
766
+ {
767
+ "content": "|<EXTRA_TOKENS_109>|",
768
+ "lstrip": false,
769
+ "normalized": false,
770
+ "rstrip": false,
771
+ "single_word": false
772
+ },
773
+ {
774
+ "content": "|<EXTRA_TOKENS_110>|",
775
+ "lstrip": false,
776
+ "normalized": false,
777
+ "rstrip": false,
778
+ "single_word": false
779
+ },
780
+ {
781
+ "content": "|<EXTRA_TOKENS_111>|",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false
786
+ },
787
+ {
788
+ "content": "|<EXTRA_TOKENS_112>|",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false
793
+ },
794
+ {
795
+ "content": "|<EXTRA_TOKENS_113>|",
796
+ "lstrip": false,
797
+ "normalized": false,
798
+ "rstrip": false,
799
+ "single_word": false
800
+ },
801
+ {
802
+ "content": "|<EXTRA_TOKENS_114>|",
803
+ "lstrip": false,
804
+ "normalized": false,
805
+ "rstrip": false,
806
+ "single_word": false
807
+ },
808
+ {
809
+ "content": "|<EXTRA_TOKENS_115>|",
810
+ "lstrip": false,
811
+ "normalized": false,
812
+ "rstrip": false,
813
+ "single_word": false
814
+ },
815
+ {
816
+ "content": "|<EXTRA_TOKENS_116>|",
817
+ "lstrip": false,
818
+ "normalized": false,
819
+ "rstrip": false,
820
+ "single_word": false
821
+ },
822
+ {
823
+ "content": "|<EXTRA_TOKENS_117>|",
824
+ "lstrip": false,
825
+ "normalized": false,
826
+ "rstrip": false,
827
+ "single_word": false
828
+ },
829
+ {
830
+ "content": "|<EXTRA_TOKENS_118>|",
831
+ "lstrip": false,
832
+ "normalized": false,
833
+ "rstrip": false,
834
+ "single_word": false
835
+ },
836
+ {
837
+ "content": "|<EXTRA_TOKENS_119>|",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false
842
+ },
843
+ {
844
+ "content": "|<EXTRA_TOKENS_120>|",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false
849
+ },
850
+ {
851
+ "content": "|<EXTRA_TOKENS_121>|",
852
+ "lstrip": false,
853
+ "normalized": false,
854
+ "rstrip": false,
855
+ "single_word": false
856
+ },
857
+ {
858
+ "content": "|<EXTRA_TOKENS_122>|",
859
+ "lstrip": false,
860
+ "normalized": false,
861
+ "rstrip": false,
862
+ "single_word": false
863
+ },
864
+ {
865
+ "content": "|<EXTRA_TOKENS_123>|",
866
+ "lstrip": false,
867
+ "normalized": false,
868
+ "rstrip": false,
869
+ "single_word": false
870
+ },
871
+ {
872
+ "content": "|<EXTRA_TOKENS_124>|",
873
+ "lstrip": false,
874
+ "normalized": false,
875
+ "rstrip": false,
876
+ "single_word": false
877
+ },
878
+ {
879
+ "content": "|<EXTRA_TOKENS_125>|",
880
+ "lstrip": false,
881
+ "normalized": false,
882
+ "rstrip": false,
883
+ "single_word": false
884
+ },
885
+ {
886
+ "content": "|<EXTRA_TOKENS_126>|",
887
+ "lstrip": false,
888
+ "normalized": false,
889
+ "rstrip": false,
890
+ "single_word": false
891
+ },
892
+ {
893
+ "content": "|<EXTRA_TOKENS_127>|",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false
898
+ },
899
+ {
900
+ "content": "|<EXTRA_TOKENS_128>|",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false
905
+ },
906
+ {
907
+ "content": "|<EXTRA_TOKENS_129>|",
908
+ "lstrip": false,
909
+ "normalized": false,
910
+ "rstrip": false,
911
+ "single_word": false
912
+ },
913
+ {
914
+ "content": "|<EXTRA_TOKENS_130>|",
915
+ "lstrip": false,
916
+ "normalized": false,
917
+ "rstrip": false,
918
+ "single_word": false
919
+ },
920
+ {
921
+ "content": "|<EXTRA_TOKENS_131>|",
922
+ "lstrip": false,
923
+ "normalized": false,
924
+ "rstrip": false,
925
+ "single_word": false
926
+ },
927
+ {
928
+ "content": "|<EXTRA_TOKENS_132>|",
929
+ "lstrip": false,
930
+ "normalized": false,
931
+ "rstrip": false,
932
+ "single_word": false
933
+ },
934
+ {
935
+ "content": "|<EXTRA_TOKENS_133>|",
936
+ "lstrip": false,
937
+ "normalized": false,
938
+ "rstrip": false,
939
+ "single_word": false
940
+ },
941
+ {
942
+ "content": "|<EXTRA_TOKENS_134>|",
943
+ "lstrip": false,
944
+ "normalized": false,
945
+ "rstrip": false,
946
+ "single_word": false
947
+ },
948
+ {
949
+ "content": "|<EXTRA_TOKENS_135>|",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false
954
+ },
955
+ {
956
+ "content": "|<EXTRA_TOKENS_136>|",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false
961
+ },
962
+ {
963
+ "content": "|<EXTRA_TOKENS_137>|",
964
+ "lstrip": false,
965
+ "normalized": false,
966
+ "rstrip": false,
967
+ "single_word": false
968
+ },
969
+ {
970
+ "content": "|<EXTRA_TOKENS_138>|",
971
+ "lstrip": false,
972
+ "normalized": false,
973
+ "rstrip": false,
974
+ "single_word": false
975
+ },
976
+ {
977
+ "content": "|<EXTRA_TOKENS_139>|",
978
+ "lstrip": false,
979
+ "normalized": false,
980
+ "rstrip": false,
981
+ "single_word": false
982
+ },
983
+ {
984
+ "content": "|<EXTRA_TOKENS_140>|",
985
+ "lstrip": false,
986
+ "normalized": false,
987
+ "rstrip": false,
988
+ "single_word": false
989
+ },
990
+ {
991
+ "content": "|<EXTRA_TOKENS_141>|",
992
+ "lstrip": false,
993
+ "normalized": false,
994
+ "rstrip": false,
995
+ "single_word": false
996
+ },
997
+ {
998
+ "content": "|<EXTRA_TOKENS_142>|",
999
+ "lstrip": false,
1000
+ "normalized": false,
1001
+ "rstrip": false,
1002
+ "single_word": false
1003
+ },
1004
+ {
1005
+ "content": "|<EXTRA_TOKENS_143>|",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false
1010
+ },
1011
+ {
1012
+ "content": "|<EXTRA_TOKENS_144>|",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false
1017
+ },
1018
+ {
1019
+ "content": "|<EXTRA_TOKENS_145>|",
1020
+ "lstrip": false,
1021
+ "normalized": false,
1022
+ "rstrip": false,
1023
+ "single_word": false
1024
+ },
1025
+ {
1026
+ "content": "|<EXTRA_TOKENS_146>|",
1027
+ "lstrip": false,
1028
+ "normalized": false,
1029
+ "rstrip": false,
1030
+ "single_word": false
1031
+ },
1032
+ {
1033
+ "content": "|<EXTRA_TOKENS_147>|",
1034
+ "lstrip": false,
1035
+ "normalized": false,
1036
+ "rstrip": false,
1037
+ "single_word": false
1038
+ },
1039
+ {
1040
+ "content": "|<EXTRA_TOKENS_148>|",
1041
+ "lstrip": false,
1042
+ "normalized": false,
1043
+ "rstrip": false,
1044
+ "single_word": false
1045
+ },
1046
+ {
1047
+ "content": "|<EXTRA_TOKENS_149>|",
1048
+ "lstrip": false,
1049
+ "normalized": false,
1050
+ "rstrip": false,
1051
+ "single_word": false
1052
+ },
1053
+ {
1054
+ "content": "|<EXTRA_TOKENS_150>|",
1055
+ "lstrip": false,
1056
+ "normalized": false,
1057
+ "rstrip": false,
1058
+ "single_word": false
1059
+ },
1060
+ {
1061
+ "content": "|<EXTRA_TOKENS_151>|",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false
1066
+ },
1067
+ {
1068
+ "content": "|<EXTRA_TOKENS_152>|",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false
1073
+ },
1074
+ {
1075
+ "content": "|<EXTRA_TOKENS_153>|",
1076
+ "lstrip": false,
1077
+ "normalized": false,
1078
+ "rstrip": false,
1079
+ "single_word": false
1080
+ },
1081
+ {
1082
+ "content": "|<EXTRA_TOKENS_154>|",
1083
+ "lstrip": false,
1084
+ "normalized": false,
1085
+ "rstrip": false,
1086
+ "single_word": false
1087
+ },
1088
+ {
1089
+ "content": "|<EXTRA_TOKENS_155>|",
1090
+ "lstrip": false,
1091
+ "normalized": false,
1092
+ "rstrip": false,
1093
+ "single_word": false
1094
+ },
1095
+ {
1096
+ "content": "|<EXTRA_TOKENS_156>|",
1097
+ "lstrip": false,
1098
+ "normalized": false,
1099
+ "rstrip": false,
1100
+ "single_word": false
1101
+ },
1102
+ {
1103
+ "content": "|<EXTRA_TOKENS_157>|",
1104
+ "lstrip": false,
1105
+ "normalized": false,
1106
+ "rstrip": false,
1107
+ "single_word": false
1108
+ },
1109
+ {
1110
+ "content": "|<EXTRA_TOKENS_158>|",
1111
+ "lstrip": false,
1112
+ "normalized": false,
1113
+ "rstrip": false,
1114
+ "single_word": false
1115
+ },
1116
+ {
1117
+ "content": "|<EXTRA_TOKENS_159>|",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false
1122
+ },
1123
+ {
1124
+ "content": "|<EXTRA_TOKENS_160>|",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false
1129
+ },
1130
+ {
1131
+ "content": "|<EXTRA_TOKENS_161>|",
1132
+ "lstrip": false,
1133
+ "normalized": false,
1134
+ "rstrip": false,
1135
+ "single_word": false
1136
+ },
1137
+ {
1138
+ "content": "|<EXTRA_TOKENS_162>|",
1139
+ "lstrip": false,
1140
+ "normalized": false,
1141
+ "rstrip": false,
1142
+ "single_word": false
1143
+ },
1144
+ {
1145
+ "content": "|<EXTRA_TOKENS_163>|",
1146
+ "lstrip": false,
1147
+ "normalized": false,
1148
+ "rstrip": false,
1149
+ "single_word": false
1150
+ },
1151
+ {
1152
+ "content": "|<EXTRA_TOKENS_164>|",
1153
+ "lstrip": false,
1154
+ "normalized": false,
1155
+ "rstrip": false,
1156
+ "single_word": false
1157
+ },
1158
+ {
1159
+ "content": "|<EXTRA_TOKENS_165>|",
1160
+ "lstrip": false,
1161
+ "normalized": false,
1162
+ "rstrip": false,
1163
+ "single_word": false
1164
+ },
1165
+ {
1166
+ "content": "|<EXTRA_TOKENS_166>|",
1167
+ "lstrip": false,
1168
+ "normalized": false,
1169
+ "rstrip": false,
1170
+ "single_word": false
1171
+ },
1172
+ {
1173
+ "content": "|<EXTRA_TOKENS_167>|",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false
1178
+ },
1179
+ {
1180
+ "content": "|<EXTRA_TOKENS_168>|",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false
1185
+ },
1186
+ {
1187
+ "content": "|<EXTRA_TOKENS_169>|",
1188
+ "lstrip": false,
1189
+ "normalized": false,
1190
+ "rstrip": false,
1191
+ "single_word": false
1192
+ },
1193
+ {
1194
+ "content": "|<EXTRA_TOKENS_170>|",
1195
+ "lstrip": false,
1196
+ "normalized": false,
1197
+ "rstrip": false,
1198
+ "single_word": false
1199
+ },
1200
+ {
1201
+ "content": "|<EXTRA_TOKENS_171>|",
1202
+ "lstrip": false,
1203
+ "normalized": false,
1204
+ "rstrip": false,
1205
+ "single_word": false
1206
+ },
1207
+ {
1208
+ "content": "|<EXTRA_TOKENS_172>|",
1209
+ "lstrip": false,
1210
+ "normalized": false,
1211
+ "rstrip": false,
1212
+ "single_word": false
1213
+ },
1214
+ {
1215
+ "content": "|<EXTRA_TOKENS_173>|",
1216
+ "lstrip": false,
1217
+ "normalized": false,
1218
+ "rstrip": false,
1219
+ "single_word": false
1220
+ },
1221
+ {
1222
+ "content": "|<EXTRA_TOKENS_174>|",
1223
+ "lstrip": false,
1224
+ "normalized": false,
1225
+ "rstrip": false,
1226
+ "single_word": false
1227
+ },
1228
+ {
1229
+ "content": "|<EXTRA_TOKENS_175>|",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false
1234
+ },
1235
+ {
1236
+ "content": "|<EXTRA_TOKENS_176>|",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false
1241
+ },
1242
+ {
1243
+ "content": "|<EXTRA_TOKENS_177>|",
1244
+ "lstrip": false,
1245
+ "normalized": false,
1246
+ "rstrip": false,
1247
+ "single_word": false
1248
+ },
1249
+ {
1250
+ "content": "|<EXTRA_TOKENS_178>|",
1251
+ "lstrip": false,
1252
+ "normalized": false,
1253
+ "rstrip": false,
1254
+ "single_word": false
1255
+ },
1256
+ {
1257
+ "content": "|<EXTRA_TOKENS_179>|",
1258
+ "lstrip": false,
1259
+ "normalized": false,
1260
+ "rstrip": false,
1261
+ "single_word": false
1262
+ },
1263
+ {
1264
+ "content": "|<EXTRA_TOKENS_180>|",
1265
+ "lstrip": false,
1266
+ "normalized": false,
1267
+ "rstrip": false,
1268
+ "single_word": false
1269
+ },
1270
+ {
1271
+ "content": "|<EXTRA_TOKENS_181>|",
1272
+ "lstrip": false,
1273
+ "normalized": false,
1274
+ "rstrip": false,
1275
+ "single_word": false
1276
+ },
1277
+ {
1278
+ "content": "|<EXTRA_TOKENS_182>|",
1279
+ "lstrip": false,
1280
+ "normalized": false,
1281
+ "rstrip": false,
1282
+ "single_word": false
1283
+ },
1284
+ {
1285
+ "content": "|<EXTRA_TOKENS_183>|",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false
1290
+ },
1291
+ {
1292
+ "content": "|<EXTRA_TOKENS_184>|",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false
1297
+ },
1298
+ {
1299
+ "content": "|<EXTRA_TOKENS_185>|",
1300
+ "lstrip": false,
1301
+ "normalized": false,
1302
+ "rstrip": false,
1303
+ "single_word": false
1304
+ },
1305
+ {
1306
+ "content": "|<EXTRA_TOKENS_186>|",
1307
+ "lstrip": false,
1308
+ "normalized": false,
1309
+ "rstrip": false,
1310
+ "single_word": false
1311
+ },
1312
+ {
1313
+ "content": "|<EXTRA_TOKENS_187>|",
1314
+ "lstrip": false,
1315
+ "normalized": false,
1316
+ "rstrip": false,
1317
+ "single_word": false
1318
+ },
1319
+ {
1320
+ "content": "|<EXTRA_TOKENS_188>|",
1321
+ "lstrip": false,
1322
+ "normalized": false,
1323
+ "rstrip": false,
1324
+ "single_word": false
1325
+ },
1326
+ {
1327
+ "content": "|<EXTRA_TOKENS_189>|",
1328
+ "lstrip": false,
1329
+ "normalized": false,
1330
+ "rstrip": false,
1331
+ "single_word": false
1332
+ },
1333
+ {
1334
+ "content": "|<EXTRA_TOKENS_190>|",
1335
+ "lstrip": false,
1336
+ "normalized": false,
1337
+ "rstrip": false,
1338
+ "single_word": false
1339
+ },
1340
+ {
1341
+ "content": "|<EXTRA_TOKENS_191>|",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false
1346
+ },
1347
+ {
1348
+ "content": "|<EXTRA_TOKENS_192>|",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false
1353
+ },
1354
+ {
1355
+ "content": "|<EXTRA_TOKENS_193>|",
1356
+ "lstrip": false,
1357
+ "normalized": false,
1358
+ "rstrip": false,
1359
+ "single_word": false
1360
+ },
1361
+ {
1362
+ "content": "|<EXTRA_TOKENS_194>|",
1363
+ "lstrip": false,
1364
+ "normalized": false,
1365
+ "rstrip": false,
1366
+ "single_word": false
1367
+ },
1368
+ {
1369
+ "content": "|<EXTRA_TOKENS_195>|",
1370
+ "lstrip": false,
1371
+ "normalized": false,
1372
+ "rstrip": false,
1373
+ "single_word": false
1374
+ },
1375
+ {
1376
+ "content": "|<EXTRA_TOKENS_196>|",
1377
+ "lstrip": false,
1378
+ "normalized": false,
1379
+ "rstrip": false,
1380
+ "single_word": false
1381
+ },
1382
+ {
1383
+ "content": "|<EXTRA_TOKENS_197>|",
1384
+ "lstrip": false,
1385
+ "normalized": false,
1386
+ "rstrip": false,
1387
+ "single_word": false
1388
+ },
1389
+ {
1390
+ "content": "|<EXTRA_TOKENS_198>|",
1391
+ "lstrip": false,
1392
+ "normalized": false,
1393
+ "rstrip": false,
1394
+ "single_word": false
1395
+ },
1396
+ {
1397
+ "content": "|<EXTRA_TOKENS_199>|",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false
1402
+ },
1403
+ {
1404
+ "content": "|<EXTRA_TOKENS_200>|",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false
1409
+ },
1410
+ {
1411
+ "content": "|<EXTRA_TOKENS_201>|",
1412
+ "lstrip": false,
1413
+ "normalized": false,
1414
+ "rstrip": false,
1415
+ "single_word": false
1416
+ },
1417
+ {
1418
+ "content": "|<EXTRA_TOKENS_202>|",
1419
+ "lstrip": false,
1420
+ "normalized": false,
1421
+ "rstrip": false,
1422
+ "single_word": false
1423
+ },
1424
+ {
1425
+ "content": "|<EXTRA_TOKENS_203>|",
1426
+ "lstrip": false,
1427
+ "normalized": false,
1428
+ "rstrip": false,
1429
+ "single_word": false
1430
+ },
1431
+ {
1432
+ "content": "|<EXTRA_TOKENS_204>|",
1433
+ "lstrip": false,
1434
+ "normalized": false,
1435
+ "rstrip": false,
1436
+ "single_word": false
1437
+ },
1438
+ {
1439
+ "content": "|<EXTRA_TOKENS_205>|",
1440
+ "lstrip": false,
1441
+ "normalized": false,
1442
+ "rstrip": false,
1443
+ "single_word": false
1444
+ },
1445
+ {
1446
+ "content": "|<EXTRA_TOKENS_206>|",
1447
+ "lstrip": false,
1448
+ "normalized": false,
1449
+ "rstrip": false,
1450
+ "single_word": false
1451
+ },
1452
+ {
1453
+ "content": "|<EXTRA_TOKENS_207>|",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false
1458
+ },
1459
+ {
1460
+ "content": "|<EXTRA_TOKENS_208>|",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false
1465
+ },
1466
+ {
1467
+ "content": "|<EXTRA_TOKENS_209>|",
1468
+ "lstrip": false,
1469
+ "normalized": false,
1470
+ "rstrip": false,
1471
+ "single_word": false
1472
+ },
1473
+ {
1474
+ "content": "|<EXTRA_TOKENS_210>|",
1475
+ "lstrip": false,
1476
+ "normalized": false,
1477
+ "rstrip": false,
1478
+ "single_word": false
1479
+ },
1480
+ {
1481
+ "content": "|<EXTRA_TOKENS_211>|",
1482
+ "lstrip": false,
1483
+ "normalized": false,
1484
+ "rstrip": false,
1485
+ "single_word": false
1486
+ },
1487
+ {
1488
+ "content": "|<EXTRA_TOKENS_212>|",
1489
+ "lstrip": false,
1490
+ "normalized": false,
1491
+ "rstrip": false,
1492
+ "single_word": false
1493
+ },
1494
+ {
1495
+ "content": "|<EXTRA_TOKENS_213>|",
1496
+ "lstrip": false,
1497
+ "normalized": false,
1498
+ "rstrip": false,
1499
+ "single_word": false
1500
+ },
1501
+ {
1502
+ "content": "|<EXTRA_TOKENS_214>|",
1503
+ "lstrip": false,
1504
+ "normalized": false,
1505
+ "rstrip": false,
1506
+ "single_word": false
1507
+ },
1508
+ {
1509
+ "content": "|<EXTRA_TOKENS_215>|",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false
1514
+ },
1515
+ {
1516
+ "content": "|<EXTRA_TOKENS_216>|",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false
1521
+ },
1522
+ {
1523
+ "content": "|<EXTRA_TOKENS_217>|",
1524
+ "lstrip": false,
1525
+ "normalized": false,
1526
+ "rstrip": false,
1527
+ "single_word": false
1528
+ },
1529
+ {
1530
+ "content": "|<EXTRA_TOKENS_218>|",
1531
+ "lstrip": false,
1532
+ "normalized": false,
1533
+ "rstrip": false,
1534
+ "single_word": false
1535
+ },
1536
+ {
1537
+ "content": "|<EXTRA_TOKENS_219>|",
1538
+ "lstrip": false,
1539
+ "normalized": false,
1540
+ "rstrip": false,
1541
+ "single_word": false
1542
+ },
1543
+ {
1544
+ "content": "|<EXTRA_TOKENS_220>|",
1545
+ "lstrip": false,
1546
+ "normalized": false,
1547
+ "rstrip": false,
1548
+ "single_word": false
1549
+ },
1550
+ {
1551
+ "content": "|<EXTRA_TOKENS_221>|",
1552
+ "lstrip": false,
1553
+ "normalized": false,
1554
+ "rstrip": false,
1555
+ "single_word": false
1556
+ },
1557
+ {
1558
+ "content": "|<EXTRA_TOKENS_222>|",
1559
+ "lstrip": false,
1560
+ "normalized": false,
1561
+ "rstrip": false,
1562
+ "single_word": false
1563
+ },
1564
+ {
1565
+ "content": "|<EXTRA_TOKENS_223>|",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false
1570
+ },
1571
+ {
1572
+ "content": "|<EXTRA_TOKENS_224>|",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false
1577
+ },
1578
+ {
1579
+ "content": "|<EXTRA_TOKENS_225>|",
1580
+ "lstrip": false,
1581
+ "normalized": false,
1582
+ "rstrip": false,
1583
+ "single_word": false
1584
+ },
1585
+ {
1586
+ "content": "|<EXTRA_TOKENS_226>|",
1587
+ "lstrip": false,
1588
+ "normalized": false,
1589
+ "rstrip": false,
1590
+ "single_word": false
1591
+ },
1592
+ {
1593
+ "content": "|<EXTRA_TOKENS_227>|",
1594
+ "lstrip": false,
1595
+ "normalized": false,
1596
+ "rstrip": false,
1597
+ "single_word": false
1598
+ },
1599
+ {
1600
+ "content": "|<EXTRA_TOKENS_228>|",
1601
+ "lstrip": false,
1602
+ "normalized": false,
1603
+ "rstrip": false,
1604
+ "single_word": false
1605
+ },
1606
+ {
1607
+ "content": "|<EXTRA_TOKENS_229>|",
1608
+ "lstrip": false,
1609
+ "normalized": false,
1610
+ "rstrip": false,
1611
+ "single_word": false
1612
+ },
1613
+ {
1614
+ "content": "|<EXTRA_TOKENS_230>|",
1615
+ "lstrip": false,
1616
+ "normalized": false,
1617
+ "rstrip": false,
1618
+ "single_word": false
1619
+ },
1620
+ {
1621
+ "content": "|<EXTRA_TOKENS_231>|",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false
1626
+ },
1627
+ {
1628
+ "content": "|<EXTRA_TOKENS_232>|",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false
1633
+ },
1634
+ {
1635
+ "content": "|<EXTRA_TOKENS_233>|",
1636
+ "lstrip": false,
1637
+ "normalized": false,
1638
+ "rstrip": false,
1639
+ "single_word": false
1640
+ },
1641
+ {
1642
+ "content": "|<EXTRA_TOKENS_234>|",
1643
+ "lstrip": false,
1644
+ "normalized": false,
1645
+ "rstrip": false,
1646
+ "single_word": false
1647
+ },
1648
+ {
1649
+ "content": "|<EXTRA_TOKENS_235>|",
1650
+ "lstrip": false,
1651
+ "normalized": false,
1652
+ "rstrip": false,
1653
+ "single_word": false
1654
+ },
1655
+ {
1656
+ "content": "|<EXTRA_TOKENS_236>|",
1657
+ "lstrip": false,
1658
+ "normalized": false,
1659
+ "rstrip": false,
1660
+ "single_word": false
1661
+ },
1662
+ {
1663
+ "content": "|<EXTRA_TOKENS_237>|",
1664
+ "lstrip": false,
1665
+ "normalized": false,
1666
+ "rstrip": false,
1667
+ "single_word": false
1668
+ },
1669
+ {
1670
+ "content": "|<EXTRA_TOKENS_238>|",
1671
+ "lstrip": false,
1672
+ "normalized": false,
1673
+ "rstrip": false,
1674
+ "single_word": false
1675
+ },
1676
+ {
1677
+ "content": "|<EXTRA_TOKENS_239>|",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false
1682
+ },
1683
+ {
1684
+ "content": "|<EXTRA_TOKENS_240>|",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false
1689
+ },
1690
+ {
1691
+ "content": "|<EXTRA_TOKENS_241>|",
1692
+ "lstrip": false,
1693
+ "normalized": false,
1694
+ "rstrip": false,
1695
+ "single_word": false
1696
+ },
1697
+ {
1698
+ "content": "|<EXTRA_TOKENS_242>|",
1699
+ "lstrip": false,
1700
+ "normalized": false,
1701
+ "rstrip": false,
1702
+ "single_word": false
1703
+ },
1704
+ {
1705
+ "content": "|<EXTRA_TOKENS_243>|",
1706
+ "lstrip": false,
1707
+ "normalized": false,
1708
+ "rstrip": false,
1709
+ "single_word": false
1710
+ },
1711
+ {
1712
+ "content": "|<EXTRA_TOKENS_244>|",
1713
+ "lstrip": false,
1714
+ "normalized": false,
1715
+ "rstrip": false,
1716
+ "single_word": false
1717
+ },
1718
+ {
1719
+ "content": "|<EXTRA_TOKENS_245>|",
1720
+ "lstrip": false,
1721
+ "normalized": false,
1722
+ "rstrip": false,
1723
+ "single_word": false
1724
+ },
1725
+ {
1726
+ "content": "|<EXTRA_TOKENS_246>|",
1727
+ "lstrip": false,
1728
+ "normalized": false,
1729
+ "rstrip": false,
1730
+ "single_word": false
1731
+ },
1732
+ {
1733
+ "content": "|<EXTRA_TOKENS_247>|",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false
1738
+ },
1739
+ {
1740
+ "content": "|<EXTRA_TOKENS_248>|",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false
1745
+ },
1746
+ {
1747
+ "content": "|<EXTRA_TOKENS_249>|",
1748
+ "lstrip": false,
1749
+ "normalized": false,
1750
+ "rstrip": false,
1751
+ "single_word": false
1752
+ },
1753
+ {
1754
+ "content": "|<EXTRA_TOKENS_250>|",
1755
+ "lstrip": false,
1756
+ "normalized": false,
1757
+ "rstrip": false,
1758
+ "single_word": false
1759
+ },
1760
+ {
1761
+ "content": "|<EXTRA_TOKENS_251>|",
1762
+ "lstrip": false,
1763
+ "normalized": false,
1764
+ "rstrip": false,
1765
+ "single_word": false
1766
+ },
1767
+ {
1768
+ "content": "|<EXTRA_TOKENS_252>|",
1769
+ "lstrip": false,
1770
+ "normalized": false,
1771
+ "rstrip": false,
1772
+ "single_word": false
1773
+ },
1774
+ {
1775
+ "content": "|<EXTRA_TOKENS_253>|",
1776
+ "lstrip": false,
1777
+ "normalized": false,
1778
+ "rstrip": false,
1779
+ "single_word": false
1780
+ },
1781
+ {
1782
+ "content": "|<EXTRA_TOKENS_254>|",
1783
+ "lstrip": false,
1784
+ "normalized": false,
1785
+ "rstrip": false,
1786
+ "single_word": false
1787
+ },
1788
+ {
1789
+ "content": "|<EXTRA_TOKENS_255>|",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false
1794
+ },
1795
+ {
1796
+ "content": "|<EXTRA_TOKENS_256>|",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false
1801
+ },
1802
+ {
1803
+ "content": "|<EXTRA_TOKENS_257>|",
1804
+ "lstrip": false,
1805
+ "normalized": false,
1806
+ "rstrip": false,
1807
+ "single_word": false
1808
+ },
1809
+ {
1810
+ "content": "|<EXTRA_TOKENS_258>|",
1811
+ "lstrip": false,
1812
+ "normalized": false,
1813
+ "rstrip": false,
1814
+ "single_word": false
1815
+ },
1816
+ {
1817
+ "content": "|<EXTRA_TOKENS_259>|",
1818
+ "lstrip": false,
1819
+ "normalized": false,
1820
+ "rstrip": false,
1821
+ "single_word": false
1822
+ },
1823
+ {
1824
+ "content": "|<EXTRA_TOKENS_260>|",
1825
+ "lstrip": false,
1826
+ "normalized": false,
1827
+ "rstrip": false,
1828
+ "single_word": false
1829
+ },
1830
+ {
1831
+ "content": "|<EXTRA_TOKENS_261>|",
1832
+ "lstrip": false,
1833
+ "normalized": false,
1834
+ "rstrip": false,
1835
+ "single_word": false
1836
+ },
1837
+ {
1838
+ "content": "|<EXTRA_TOKENS_262>|",
1839
+ "lstrip": false,
1840
+ "normalized": false,
1841
+ "rstrip": false,
1842
+ "single_word": false
1843
+ },
1844
+ {
1845
+ "content": "|<EXTRA_TOKENS_263>|",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false
1850
+ },
1851
+ {
1852
+ "content": "|<EXTRA_TOKENS_264>|",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false
1857
+ },
1858
+ {
1859
+ "content": "|<EXTRA_TOKENS_265>|",
1860
+ "lstrip": false,
1861
+ "normalized": false,
1862
+ "rstrip": false,
1863
+ "single_word": false
1864
+ },
1865
+ {
1866
+ "content": "|<EXTRA_TOKENS_266>|",
1867
+ "lstrip": false,
1868
+ "normalized": false,
1869
+ "rstrip": false,
1870
+ "single_word": false
1871
+ },
1872
+ {
1873
+ "content": "|<EXTRA_TOKENS_267>|",
1874
+ "lstrip": false,
1875
+ "normalized": false,
1876
+ "rstrip": false,
1877
+ "single_word": false
1878
+ },
1879
+ {
1880
+ "content": "|<EXTRA_TOKENS_268>|",
1881
+ "lstrip": false,
1882
+ "normalized": false,
1883
+ "rstrip": false,
1884
+ "single_word": false
1885
+ },
1886
+ {
1887
+ "content": "|<EXTRA_TOKENS_269>|",
1888
+ "lstrip": false,
1889
+ "normalized": false,
1890
+ "rstrip": false,
1891
+ "single_word": false
1892
+ },
1893
+ {
1894
+ "content": "|<EXTRA_TOKENS_270>|",
1895
+ "lstrip": false,
1896
+ "normalized": false,
1897
+ "rstrip": false,
1898
+ "single_word": false
1899
+ },
1900
+ {
1901
+ "content": "|<EXTRA_TOKENS_271>|",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false
1906
+ },
1907
+ {
1908
+ "content": "|<EXTRA_TOKENS_272>|",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false
1913
+ },
1914
+ {
1915
+ "content": "|<EXTRA_TOKENS_273>|",
1916
+ "lstrip": false,
1917
+ "normalized": false,
1918
+ "rstrip": false,
1919
+ "single_word": false
1920
+ },
1921
+ {
1922
+ "content": "|<EXTRA_TOKENS_274>|",
1923
+ "lstrip": false,
1924
+ "normalized": false,
1925
+ "rstrip": false,
1926
+ "single_word": false
1927
+ },
1928
+ {
1929
+ "content": "|<EXTRA_TOKENS_275>|",
1930
+ "lstrip": false,
1931
+ "normalized": false,
1932
+ "rstrip": false,
1933
+ "single_word": false
1934
+ },
1935
+ {
1936
+ "content": "|<EXTRA_TOKENS_276>|",
1937
+ "lstrip": false,
1938
+ "normalized": false,
1939
+ "rstrip": false,
1940
+ "single_word": false
1941
+ },
1942
+ {
1943
+ "content": "|<EXTRA_TOKENS_277>|",
1944
+ "lstrip": false,
1945
+ "normalized": false,
1946
+ "rstrip": false,
1947
+ "single_word": false
1948
+ },
1949
+ {
1950
+ "content": "|<EXTRA_TOKENS_278>|",
1951
+ "lstrip": false,
1952
+ "normalized": false,
1953
+ "rstrip": false,
1954
+ "single_word": false
1955
+ },
1956
+ {
1957
+ "content": "|<EXTRA_TOKENS_279>|",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false
1962
+ },
1963
+ {
1964
+ "content": "|<EXTRA_TOKENS_280>|",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false
1969
+ },
1970
+ {
1971
+ "content": "|<EXTRA_TOKENS_281>|",
1972
+ "lstrip": false,
1973
+ "normalized": false,
1974
+ "rstrip": false,
1975
+ "single_word": false
1976
+ },
1977
+ {
1978
+ "content": "|<EXTRA_TOKENS_282>|",
1979
+ "lstrip": false,
1980
+ "normalized": false,
1981
+ "rstrip": false,
1982
+ "single_word": false
1983
+ },
1984
+ {
1985
+ "content": "|<EXTRA_TOKENS_283>|",
1986
+ "lstrip": false,
1987
+ "normalized": false,
1988
+ "rstrip": false,
1989
+ "single_word": false
1990
+ },
1991
+ {
1992
+ "content": "|<EXTRA_TOKENS_284>|",
1993
+ "lstrip": false,
1994
+ "normalized": false,
1995
+ "rstrip": false,
1996
+ "single_word": false
1997
+ },
1998
+ {
1999
+ "content": "|<EXTRA_TOKENS_285>|",
2000
+ "lstrip": false,
2001
+ "normalized": false,
2002
+ "rstrip": false,
2003
+ "single_word": false
2004
+ },
2005
+ {
2006
+ "content": "|<EXTRA_TOKENS_286>|",
2007
+ "lstrip": false,
2008
+ "normalized": false,
2009
+ "rstrip": false,
2010
+ "single_word": false
2011
+ },
2012
+ {
2013
+ "content": "|<EXTRA_TOKENS_287>|",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false
2018
+ },
2019
+ {
2020
+ "content": "|<EXTRA_TOKENS_288>|",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false
2025
+ },
2026
+ {
2027
+ "content": "|<EXTRA_TOKENS_289>|",
2028
+ "lstrip": false,
2029
+ "normalized": false,
2030
+ "rstrip": false,
2031
+ "single_word": false
2032
+ },
2033
+ {
2034
+ "content": "|<EXTRA_TOKENS_290>|",
2035
+ "lstrip": false,
2036
+ "normalized": false,
2037
+ "rstrip": false,
2038
+ "single_word": false
2039
+ },
2040
+ {
2041
+ "content": "|<EXTRA_TOKENS_291>|",
2042
+ "lstrip": false,
2043
+ "normalized": false,
2044
+ "rstrip": false,
2045
+ "single_word": false
2046
+ },
2047
+ {
2048
+ "content": "|<EXTRA_TOKENS_292>|",
2049
+ "lstrip": false,
2050
+ "normalized": false,
2051
+ "rstrip": false,
2052
+ "single_word": false
2053
+ },
2054
+ {
2055
+ "content": "|<EXTRA_TOKENS_293>|",
2056
+ "lstrip": false,
2057
+ "normalized": false,
2058
+ "rstrip": false,
2059
+ "single_word": false
2060
+ },
2061
+ {
2062
+ "content": "|<EXTRA_TOKENS_294>|",
2063
+ "lstrip": false,
2064
+ "normalized": false,
2065
+ "rstrip": false,
2066
+ "single_word": false
2067
+ },
2068
+ {
2069
+ "content": "|<EXTRA_TOKENS_295>|",
2070
+ "lstrip": false,
2071
+ "normalized": false,
2072
+ "rstrip": false,
2073
+ "single_word": false
2074
+ },
2075
+ {
2076
+ "content": "|<EXTRA_TOKENS_296>|",
2077
+ "lstrip": false,
2078
+ "normalized": false,
2079
+ "rstrip": false,
2080
+ "single_word": false
2081
+ },
2082
+ {
2083
+ "content": "|<EXTRA_TOKENS_297>|",
2084
+ "lstrip": false,
2085
+ "normalized": false,
2086
+ "rstrip": false,
2087
+ "single_word": false
2088
+ },
2089
+ {
2090
+ "content": "|<EXTRA_TOKENS_298>|",
2091
+ "lstrip": false,
2092
+ "normalized": false,
2093
+ "rstrip": false,
2094
+ "single_word": false
2095
+ },
2096
+ {
2097
+ "content": "|<EXTRA_TOKENS_299>|",
2098
+ "lstrip": false,
2099
+ "normalized": false,
2100
+ "rstrip": false,
2101
+ "single_word": false
2102
+ },
2103
+ {
2104
+ "content": "|<EXTRA_TOKENS_300>|",
2105
+ "lstrip": false,
2106
+ "normalized": false,
2107
+ "rstrip": false,
2108
+ "single_word": false
2109
+ },
2110
+ {
2111
+ "content": "|<EXTRA_TOKENS_301>|",
2112
+ "lstrip": false,
2113
+ "normalized": false,
2114
+ "rstrip": false,
2115
+ "single_word": false
2116
+ },
2117
+ {
2118
+ "content": "|<EXTRA_TOKENS_302>|",
2119
+ "lstrip": false,
2120
+ "normalized": false,
2121
+ "rstrip": false,
2122
+ "single_word": false
2123
+ },
2124
+ {
2125
+ "content": "|<EXTRA_TOKENS_303>|",
2126
+ "lstrip": false,
2127
+ "normalized": false,
2128
+ "rstrip": false,
2129
+ "single_word": false
2130
+ },
2131
+ {
2132
+ "content": "|<EXTRA_TOKENS_304>|",
2133
+ "lstrip": false,
2134
+ "normalized": false,
2135
+ "rstrip": false,
2136
+ "single_word": false
2137
+ },
2138
+ {
2139
+ "content": "|<EXTRA_TOKENS_305>|",
2140
+ "lstrip": false,
2141
+ "normalized": false,
2142
+ "rstrip": false,
2143
+ "single_word": false
2144
+ },
2145
+ {
2146
+ "content": "|<EXTRA_TOKENS_306>|",
2147
+ "lstrip": false,
2148
+ "normalized": false,
2149
+ "rstrip": false,
2150
+ "single_word": false
2151
+ },
2152
+ {
2153
+ "content": "|<EXTRA_TOKENS_307>|",
2154
+ "lstrip": false,
2155
+ "normalized": false,
2156
+ "rstrip": false,
2157
+ "single_word": false
2158
+ },
2159
+ {
2160
+ "content": "|<EXTRA_TOKENS_308>|",
2161
+ "lstrip": false,
2162
+ "normalized": false,
2163
+ "rstrip": false,
2164
+ "single_word": false
2165
+ },
2166
+ {
2167
+ "content": "|<EXTRA_TOKENS_309>|",
2168
+ "lstrip": false,
2169
+ "normalized": false,
2170
+ "rstrip": false,
2171
+ "single_word": false
2172
+ },
2173
+ {
2174
+ "content": "|<EXTRA_TOKENS_310>|",
2175
+ "lstrip": false,
2176
+ "normalized": false,
2177
+ "rstrip": false,
2178
+ "single_word": false
2179
+ },
2180
+ {
2181
+ "content": "|<EXTRA_TOKENS_311>|",
2182
+ "lstrip": false,
2183
+ "normalized": false,
2184
+ "rstrip": false,
2185
+ "single_word": false
2186
+ },
2187
+ {
2188
+ "content": "|<EXTRA_TOKENS_312>|",
2189
+ "lstrip": false,
2190
+ "normalized": false,
2191
+ "rstrip": false,
2192
+ "single_word": false
2193
+ },
2194
+ {
2195
+ "content": "|<EXTRA_TOKENS_313>|",
2196
+ "lstrip": false,
2197
+ "normalized": false,
2198
+ "rstrip": false,
2199
+ "single_word": false
2200
+ },
2201
+ {
2202
+ "content": "|<EXTRA_TOKENS_314>|",
2203
+ "lstrip": false,
2204
+ "normalized": false,
2205
+ "rstrip": false,
2206
+ "single_word": false
2207
+ },
2208
+ {
2209
+ "content": "|<EXTRA_TOKENS_315>|",
2210
+ "lstrip": false,
2211
+ "normalized": false,
2212
+ "rstrip": false,
2213
+ "single_word": false
2214
+ },
2215
+ {
2216
+ "content": "|<EXTRA_TOKENS_316>|",
2217
+ "lstrip": false,
2218
+ "normalized": false,
2219
+ "rstrip": false,
2220
+ "single_word": false
2221
+ },
2222
+ {
2223
+ "content": "|<EXTRA_TOKENS_317>|",
2224
+ "lstrip": false,
2225
+ "normalized": false,
2226
+ "rstrip": false,
2227
+ "single_word": false
2228
+ },
2229
+ {
2230
+ "content": "|<EXTRA_TOKENS_318>|",
2231
+ "lstrip": false,
2232
+ "normalized": false,
2233
+ "rstrip": false,
2234
+ "single_word": false
2235
+ },
2236
+ {
2237
+ "content": "|<EXTRA_TOKENS_319>|",
2238
+ "lstrip": false,
2239
+ "normalized": false,
2240
+ "rstrip": false,
2241
+ "single_word": false
2242
+ },
2243
+ {
2244
+ "content": "|<EXTRA_TOKENS_320>|",
2245
+ "lstrip": false,
2246
+ "normalized": false,
2247
+ "rstrip": false,
2248
+ "single_word": false
2249
+ },
2250
+ {
2251
+ "content": "|<EXTRA_TOKENS_321>|",
2252
+ "lstrip": false,
2253
+ "normalized": false,
2254
+ "rstrip": false,
2255
+ "single_word": false
2256
+ },
2257
+ {
2258
+ "content": "|<EXTRA_TOKENS_322>|",
2259
+ "lstrip": false,
2260
+ "normalized": false,
2261
+ "rstrip": false,
2262
+ "single_word": false
2263
+ },
2264
+ {
2265
+ "content": "|<EXTRA_TOKENS_323>|",
2266
+ "lstrip": false,
2267
+ "normalized": false,
2268
+ "rstrip": false,
2269
+ "single_word": false
2270
+ },
2271
+ {
2272
+ "content": "|<EXTRA_TOKENS_324>|",
2273
+ "lstrip": false,
2274
+ "normalized": false,
2275
+ "rstrip": false,
2276
+ "single_word": false
2277
+ },
2278
+ {
2279
+ "content": "|<EXTRA_TOKENS_325>|",
2280
+ "lstrip": false,
2281
+ "normalized": false,
2282
+ "rstrip": false,
2283
+ "single_word": false
2284
+ },
2285
+ {
2286
+ "content": "|<EXTRA_TOKENS_326>|",
2287
+ "lstrip": false,
2288
+ "normalized": false,
2289
+ "rstrip": false,
2290
+ "single_word": false
2291
+ },
2292
+ {
2293
+ "content": "|<EXTRA_TOKENS_327>|",
2294
+ "lstrip": false,
2295
+ "normalized": false,
2296
+ "rstrip": false,
2297
+ "single_word": false
2298
+ },
2299
+ {
2300
+ "content": "|<EXTRA_TOKENS_328>|",
2301
+ "lstrip": false,
2302
+ "normalized": false,
2303
+ "rstrip": false,
2304
+ "single_word": false
2305
+ },
2306
+ {
2307
+ "content": "|<EXTRA_TOKENS_329>|",
2308
+ "lstrip": false,
2309
+ "normalized": false,
2310
+ "rstrip": false,
2311
+ "single_word": false
2312
+ },
2313
+ {
2314
+ "content": "|<EXTRA_TOKENS_330>|",
2315
+ "lstrip": false,
2316
+ "normalized": false,
2317
+ "rstrip": false,
2318
+ "single_word": false
2319
+ },
2320
+ {
2321
+ "content": "|<EXTRA_TOKENS_331>|",
2322
+ "lstrip": false,
2323
+ "normalized": false,
2324
+ "rstrip": false,
2325
+ "single_word": false
2326
+ },
2327
+ {
2328
+ "content": "|<EXTRA_TOKENS_332>|",
2329
+ "lstrip": false,
2330
+ "normalized": false,
2331
+ "rstrip": false,
2332
+ "single_word": false
2333
+ },
2334
+ {
2335
+ "content": "|<EXTRA_TOKENS_333>|",
2336
+ "lstrip": false,
2337
+ "normalized": false,
2338
+ "rstrip": false,
2339
+ "single_word": false
2340
+ },
2341
+ {
2342
+ "content": "|<EXTRA_TOKENS_334>|",
2343
+ "lstrip": false,
2344
+ "normalized": false,
2345
+ "rstrip": false,
2346
+ "single_word": false
2347
+ },
2348
+ {
2349
+ "content": "|<EXTRA_TOKENS_335>|",
2350
+ "lstrip": false,
2351
+ "normalized": false,
2352
+ "rstrip": false,
2353
+ "single_word": false
2354
+ },
2355
+ {
2356
+ "content": "|<EXTRA_TOKENS_336>|",
2357
+ "lstrip": false,
2358
+ "normalized": false,
2359
+ "rstrip": false,
2360
+ "single_word": false
2361
+ },
2362
+ {
2363
+ "content": "|<EXTRA_TOKENS_337>|",
2364
+ "lstrip": false,
2365
+ "normalized": false,
2366
+ "rstrip": false,
2367
+ "single_word": false
2368
+ },
2369
+ {
2370
+ "content": "|<EXTRA_TOKENS_338>|",
2371
+ "lstrip": false,
2372
+ "normalized": false,
2373
+ "rstrip": false,
2374
+ "single_word": false
2375
+ },
2376
+ {
2377
+ "content": "|<EXTRA_TOKENS_339>|",
2378
+ "lstrip": false,
2379
+ "normalized": false,
2380
+ "rstrip": false,
2381
+ "single_word": false
2382
+ },
2383
+ {
2384
+ "content": "|<EXTRA_TOKENS_340>|",
2385
+ "lstrip": false,
2386
+ "normalized": false,
2387
+ "rstrip": false,
2388
+ "single_word": false
2389
+ },
2390
+ {
2391
+ "content": "|<EXTRA_TOKENS_341>|",
2392
+ "lstrip": false,
2393
+ "normalized": false,
2394
+ "rstrip": false,
2395
+ "single_word": false
2396
+ },
2397
+ {
2398
+ "content": "|<EXTRA_TOKENS_342>|",
2399
+ "lstrip": false,
2400
+ "normalized": false,
2401
+ "rstrip": false,
2402
+ "single_word": false
2403
+ },
2404
+ {
2405
+ "content": "|<EXTRA_TOKENS_343>|",
2406
+ "lstrip": false,
2407
+ "normalized": false,
2408
+ "rstrip": false,
2409
+ "single_word": false
2410
+ },
2411
+ {
2412
+ "content": "|<EXTRA_TOKENS_344>|",
2413
+ "lstrip": false,
2414
+ "normalized": false,
2415
+ "rstrip": false,
2416
+ "single_word": false
2417
+ },
2418
+ {
2419
+ "content": "|<EXTRA_TOKENS_345>|",
2420
+ "lstrip": false,
2421
+ "normalized": false,
2422
+ "rstrip": false,
2423
+ "single_word": false
2424
+ },
2425
+ {
2426
+ "content": "|<EXTRA_TOKENS_346>|",
2427
+ "lstrip": false,
2428
+ "normalized": false,
2429
+ "rstrip": false,
2430
+ "single_word": false
2431
+ },
2432
+ {
2433
+ "content": "|<EXTRA_TOKENS_347>|",
2434
+ "lstrip": false,
2435
+ "normalized": false,
2436
+ "rstrip": false,
2437
+ "single_word": false
2438
+ },
2439
+ {
2440
+ "content": "|<EXTRA_TOKENS_348>|",
2441
+ "lstrip": false,
2442
+ "normalized": false,
2443
+ "rstrip": false,
2444
+ "single_word": false
2445
+ },
2446
+ {
2447
+ "content": "|<EXTRA_TOKENS_349>|",
2448
+ "lstrip": false,
2449
+ "normalized": false,
2450
+ "rstrip": false,
2451
+ "single_word": false
2452
+ },
2453
+ {
2454
+ "content": "|<EXTRA_TOKENS_350>|",
2455
+ "lstrip": false,
2456
+ "normalized": false,
2457
+ "rstrip": false,
2458
+ "single_word": false
2459
+ },
2460
+ {
2461
+ "content": "|<EXTRA_TOKENS_351>|",
2462
+ "lstrip": false,
2463
+ "normalized": false,
2464
+ "rstrip": false,
2465
+ "single_word": false
2466
+ },
2467
+ {
2468
+ "content": "|<EXTRA_TOKENS_352>|",
2469
+ "lstrip": false,
2470
+ "normalized": false,
2471
+ "rstrip": false,
2472
+ "single_word": false
2473
+ },
2474
+ {
2475
+ "content": "|<EXTRA_TOKENS_353>|",
2476
+ "lstrip": false,
2477
+ "normalized": false,
2478
+ "rstrip": false,
2479
+ "single_word": false
2480
+ },
2481
+ {
2482
+ "content": "|<EXTRA_TOKENS_354>|",
2483
+ "lstrip": false,
2484
+ "normalized": false,
2485
+ "rstrip": false,
2486
+ "single_word": false
2487
+ },
2488
+ {
2489
+ "content": "|<EXTRA_TOKENS_355>|",
2490
+ "lstrip": false,
2491
+ "normalized": false,
2492
+ "rstrip": false,
2493
+ "single_word": false
2494
+ },
2495
+ {
2496
+ "content": "|<EXTRA_TOKENS_356>|",
2497
+ "lstrip": false,
2498
+ "normalized": false,
2499
+ "rstrip": false,
2500
+ "single_word": false
2501
+ },
2502
+ {
2503
+ "content": "|<EXTRA_TOKENS_357>|",
2504
+ "lstrip": false,
2505
+ "normalized": false,
2506
+ "rstrip": false,
2507
+ "single_word": false
2508
+ },
2509
+ {
2510
+ "content": "|<EXTRA_TOKENS_358>|",
2511
+ "lstrip": false,
2512
+ "normalized": false,
2513
+ "rstrip": false,
2514
+ "single_word": false
2515
+ },
2516
+ {
2517
+ "content": "|<EXTRA_TOKENS_359>|",
2518
+ "lstrip": false,
2519
+ "normalized": false,
2520
+ "rstrip": false,
2521
+ "single_word": false
2522
+ },
2523
+ {
2524
+ "content": "|<EXTRA_TOKENS_360>|",
2525
+ "lstrip": false,
2526
+ "normalized": false,
2527
+ "rstrip": false,
2528
+ "single_word": false
2529
+ },
2530
+ {
2531
+ "content": "|<EXTRA_TOKENS_361>|",
2532
+ "lstrip": false,
2533
+ "normalized": false,
2534
+ "rstrip": false,
2535
+ "single_word": false
2536
+ },
2537
+ {
2538
+ "content": "|<EXTRA_TOKENS_362>|",
2539
+ "lstrip": false,
2540
+ "normalized": false,
2541
+ "rstrip": false,
2542
+ "single_word": false
2543
+ },
2544
+ {
2545
+ "content": "|<EXTRA_TOKENS_363>|",
2546
+ "lstrip": false,
2547
+ "normalized": false,
2548
+ "rstrip": false,
2549
+ "single_word": false
2550
+ },
2551
+ {
2552
+ "content": "|<EXTRA_TOKENS_364>|",
2553
+ "lstrip": false,
2554
+ "normalized": false,
2555
+ "rstrip": false,
2556
+ "single_word": false
2557
+ },
2558
+ {
2559
+ "content": "|<EXTRA_TOKENS_365>|",
2560
+ "lstrip": false,
2561
+ "normalized": false,
2562
+ "rstrip": false,
2563
+ "single_word": false
2564
+ },
2565
+ {
2566
+ "content": "|<EXTRA_TOKENS_366>|",
2567
+ "lstrip": false,
2568
+ "normalized": false,
2569
+ "rstrip": false,
2570
+ "single_word": false
2571
+ },
2572
+ {
2573
+ "content": "|<EXTRA_TOKENS_367>|",
2574
+ "lstrip": false,
2575
+ "normalized": false,
2576
+ "rstrip": false,
2577
+ "single_word": false
2578
+ },
2579
+ {
2580
+ "content": "|<EXTRA_TOKENS_368>|",
2581
+ "lstrip": false,
2582
+ "normalized": false,
2583
+ "rstrip": false,
2584
+ "single_word": false
2585
+ },
2586
+ {
2587
+ "content": "|<EXTRA_TOKENS_369>|",
2588
+ "lstrip": false,
2589
+ "normalized": false,
2590
+ "rstrip": false,
2591
+ "single_word": false
2592
+ },
2593
+ {
2594
+ "content": "|<EXTRA_TOKENS_370>|",
2595
+ "lstrip": false,
2596
+ "normalized": false,
2597
+ "rstrip": false,
2598
+ "single_word": false
2599
+ },
2600
+ {
2601
+ "content": "|<EXTRA_TOKENS_371>|",
2602
+ "lstrip": false,
2603
+ "normalized": false,
2604
+ "rstrip": false,
2605
+ "single_word": false
2606
+ },
2607
+ {
2608
+ "content": "|<EXTRA_TOKENS_372>|",
2609
+ "lstrip": false,
2610
+ "normalized": false,
2611
+ "rstrip": false,
2612
+ "single_word": false
2613
+ },
2614
+ {
2615
+ "content": "|<EXTRA_TOKENS_373>|",
2616
+ "lstrip": false,
2617
+ "normalized": false,
2618
+ "rstrip": false,
2619
+ "single_word": false
2620
+ },
2621
+ {
2622
+ "content": "|<EXTRA_TOKENS_374>|",
2623
+ "lstrip": false,
2624
+ "normalized": false,
2625
+ "rstrip": false,
2626
+ "single_word": false
2627
+ },
2628
+ {
2629
+ "content": "|<EXTRA_TOKENS_375>|",
2630
+ "lstrip": false,
2631
+ "normalized": false,
2632
+ "rstrip": false,
2633
+ "single_word": false
2634
+ },
2635
+ {
2636
+ "content": "|<EXTRA_TOKENS_376>|",
2637
+ "lstrip": false,
2638
+ "normalized": false,
2639
+ "rstrip": false,
2640
+ "single_word": false
2641
+ },
2642
+ {
2643
+ "content": "|<EXTRA_TOKENS_377>|",
2644
+ "lstrip": false,
2645
+ "normalized": false,
2646
+ "rstrip": false,
2647
+ "single_word": false
2648
+ },
2649
+ {
2650
+ "content": "|<EXTRA_TOKENS_378>|",
2651
+ "lstrip": false,
2652
+ "normalized": false,
2653
+ "rstrip": false,
2654
+ "single_word": false
2655
+ },
2656
+ {
2657
+ "content": "|<EXTRA_TOKENS_379>|",
2658
+ "lstrip": false,
2659
+ "normalized": false,
2660
+ "rstrip": false,
2661
+ "single_word": false
2662
+ },
2663
+ {
2664
+ "content": "|<EXTRA_TOKENS_380>|",
2665
+ "lstrip": false,
2666
+ "normalized": false,
2667
+ "rstrip": false,
2668
+ "single_word": false
2669
+ },
2670
+ {
2671
+ "content": "|<EXTRA_TOKENS_381>|",
2672
+ "lstrip": false,
2673
+ "normalized": false,
2674
+ "rstrip": false,
2675
+ "single_word": false
2676
+ },
2677
+ {
2678
+ "content": "|<EXTRA_TOKENS_382>|",
2679
+ "lstrip": false,
2680
+ "normalized": false,
2681
+ "rstrip": false,
2682
+ "single_word": false
2683
+ },
2684
+ {
2685
+ "content": "|<EXTRA_TOKENS_383>|",
2686
+ "lstrip": false,
2687
+ "normalized": false,
2688
+ "rstrip": false,
2689
+ "single_word": false
2690
+ },
2691
+ {
2692
+ "content": "|<EXTRA_TOKENS_384>|",
2693
+ "lstrip": false,
2694
+ "normalized": false,
2695
+ "rstrip": false,
2696
+ "single_word": false
2697
+ },
2698
+ {
2699
+ "content": "|<EXTRA_TOKENS_385>|",
2700
+ "lstrip": false,
2701
+ "normalized": false,
2702
+ "rstrip": false,
2703
+ "single_word": false
2704
+ },
2705
+ {
2706
+ "content": "|<EXTRA_TOKENS_386>|",
2707
+ "lstrip": false,
2708
+ "normalized": false,
2709
+ "rstrip": false,
2710
+ "single_word": false
2711
+ },
2712
+ {
2713
+ "content": "|<EXTRA_TOKENS_387>|",
2714
+ "lstrip": false,
2715
+ "normalized": false,
2716
+ "rstrip": false,
2717
+ "single_word": false
2718
+ },
2719
+ {
2720
+ "content": "|<EXTRA_TOKENS_388>|",
2721
+ "lstrip": false,
2722
+ "normalized": false,
2723
+ "rstrip": false,
2724
+ "single_word": false
2725
+ },
2726
+ {
2727
+ "content": "|<EXTRA_TOKENS_389>|",
2728
+ "lstrip": false,
2729
+ "normalized": false,
2730
+ "rstrip": false,
2731
+ "single_word": false
2732
+ },
2733
+ {
2734
+ "content": "|<EXTRA_TOKENS_390>|",
2735
+ "lstrip": false,
2736
+ "normalized": false,
2737
+ "rstrip": false,
2738
+ "single_word": false
2739
+ },
2740
+ {
2741
+ "content": "|<EXTRA_TOKENS_391>|",
2742
+ "lstrip": false,
2743
+ "normalized": false,
2744
+ "rstrip": false,
2745
+ "single_word": false
2746
+ },
2747
+ {
2748
+ "content": "|<EXTRA_TOKENS_392>|",
2749
+ "lstrip": false,
2750
+ "normalized": false,
2751
+ "rstrip": false,
2752
+ "single_word": false
2753
+ },
2754
+ {
2755
+ "content": "|<EXTRA_TOKENS_393>|",
2756
+ "lstrip": false,
2757
+ "normalized": false,
2758
+ "rstrip": false,
2759
+ "single_word": false
2760
+ },
2761
+ {
2762
+ "content": "|<EXTRA_TOKENS_394>|",
2763
+ "lstrip": false,
2764
+ "normalized": false,
2765
+ "rstrip": false,
2766
+ "single_word": false
2767
+ },
2768
+ {
2769
+ "content": "|<EXTRA_TOKENS_395>|",
2770
+ "lstrip": false,
2771
+ "normalized": false,
2772
+ "rstrip": false,
2773
+ "single_word": false
2774
+ },
2775
+ {
2776
+ "content": "|<EXTRA_TOKENS_396>|",
2777
+ "lstrip": false,
2778
+ "normalized": false,
2779
+ "rstrip": false,
2780
+ "single_word": false
2781
+ },
2782
+ {
2783
+ "content": "|<EXTRA_TOKENS_397>|",
2784
+ "lstrip": false,
2785
+ "normalized": false,
2786
+ "rstrip": false,
2787
+ "single_word": false
2788
+ },
2789
+ {
2790
+ "content": "|<EXTRA_TOKENS_398>|",
2791
+ "lstrip": false,
2792
+ "normalized": false,
2793
+ "rstrip": false,
2794
+ "single_word": false
2795
+ },
2796
+ {
2797
+ "content": "<im_start>",
2798
+ "lstrip": false,
2799
+ "normalized": false,
2800
+ "rstrip": false,
2801
+ "single_word": false
2802
+ },
2803
+ {
2804
+ "content": "<im_end>",
2805
+ "lstrip": false,
2806
+ "normalized": false,
2807
+ "rstrip": false,
2808
+ "single_word": false
2809
+ },
2810
+ {
2811
+ "content": "<im_patch>",
2812
+ "lstrip": false,
2813
+ "normalized": false,
2814
+ "rstrip": false,
2815
+ "single_word": false
2816
+ },
2817
+ {
2818
+ "content": "<im_col>",
2819
+ "lstrip": false,
2820
+ "normalized": false,
2821
+ "rstrip": false,
2822
+ "single_word": false
2823
+ },
2824
+ {
2825
+ "content": "<|image|>",
2826
+ "lstrip": false,
2827
+ "normalized": false,
2828
+ "rstrip": false,
2829
+ "single_word": false
2830
+ },
2831
+ {
2832
+ "content": "<im_low>",
2833
+ "lstrip": false,
2834
+ "normalized": false,
2835
+ "rstrip": false,
2836
+ "single_word": false
2837
+ }
2838
+ ],
2839
+ "bos_token": "<|endoftext|>",
2840
+ "eos_token": {
2841
+ "content": "<|endoftext|>",
2842
+ "lstrip": false,
2843
+ "normalized": false,
2844
+ "rstrip": false,
2845
+ "single_word": false
2846
+ },
2847
+ "pad_token": {
2848
+ "content": "<|endoftext|>",
2849
+ "lstrip": false,
2850
+ "normalized": false,
2851
+ "rstrip": false,
2852
+ "single_word": false
2853
+ }
2854
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3f3a3aaae61d8848c62e82353631a941af7986b52eccf9fbd88a065dfd7f67
3
+ size 11501504
tokenizer_config.json ADDED
@@ -0,0 +1,3843 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "|<EXTRA_TOKENS_0>|",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": true
188
+ },
189
+ "151666": {
190
+ "content": "|<EXTRA_TOKENS_1>|",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": true
196
+ },
197
+ "151667": {
198
+ "content": "|<EXTRA_TOKENS_2>|",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "151668": {
206
+ "content": "|<EXTRA_TOKENS_3>|",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "151669": {
214
+ "content": "|<EXTRA_TOKENS_4>|",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "|<EXTRA_TOKENS_5>|",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "|<EXTRA_TOKENS_6>|",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "|<EXTRA_TOKENS_7>|",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "|<EXTRA_TOKENS_8>|",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "|<EXTRA_TOKENS_9>|",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "|<EXTRA_TOKENS_10>|",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "151676": {
270
+ "content": "|<EXTRA_TOKENS_11>|",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "151677": {
278
+ "content": "|<EXTRA_TOKENS_12>|",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "151678": {
286
+ "content": "|<EXTRA_TOKENS_13>|",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "151679": {
294
+ "content": "|<EXTRA_TOKENS_14>|",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": false,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "151680": {
302
+ "content": "|<EXTRA_TOKENS_15>|",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": false,
306
+ "single_word": false,
307
+ "special": true
308
+ },
309
+ "151681": {
310
+ "content": "|<EXTRA_TOKENS_16>|",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": false,
314
+ "single_word": false,
315
+ "special": true
316
+ },
317
+ "151682": {
318
+ "content": "|<EXTRA_TOKENS_17>|",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": false,
322
+ "single_word": false,
323
+ "special": true
324
+ },
325
+ "151683": {
326
+ "content": "|<EXTRA_TOKENS_18>|",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false,
331
+ "special": true
332
+ },
333
+ "151684": {
334
+ "content": "|<EXTRA_TOKENS_19>|",
335
+ "lstrip": false,
336
+ "normalized": false,
337
+ "rstrip": false,
338
+ "single_word": false,
339
+ "special": true
340
+ },
341
+ "151685": {
342
+ "content": "|<EXTRA_TOKENS_20>|",
343
+ "lstrip": false,
344
+ "normalized": false,
345
+ "rstrip": false,
346
+ "single_word": false,
347
+ "special": true
348
+ },
349
+ "151686": {
350
+ "content": "|<EXTRA_TOKENS_21>|",
351
+ "lstrip": false,
352
+ "normalized": false,
353
+ "rstrip": false,
354
+ "single_word": false,
355
+ "special": true
356
+ },
357
+ "151687": {
358
+ "content": "|<EXTRA_TOKENS_22>|",
359
+ "lstrip": false,
360
+ "normalized": false,
361
+ "rstrip": false,
362
+ "single_word": false,
363
+ "special": true
364
+ },
365
+ "151688": {
366
+ "content": "|<EXTRA_TOKENS_23>|",
367
+ "lstrip": false,
368
+ "normalized": false,
369
+ "rstrip": false,
370
+ "single_word": false,
371
+ "special": true
372
+ },
373
+ "151689": {
374
+ "content": "|<EXTRA_TOKENS_24>|",
375
+ "lstrip": false,
376
+ "normalized": false,
377
+ "rstrip": false,
378
+ "single_word": false,
379
+ "special": true
380
+ },
381
+ "151690": {
382
+ "content": "|<EXTRA_TOKENS_25>|",
383
+ "lstrip": false,
384
+ "normalized": false,
385
+ "rstrip": false,
386
+ "single_word": false,
387
+ "special": true
388
+ },
389
+ "151691": {
390
+ "content": "|<EXTRA_TOKENS_26>|",
391
+ "lstrip": false,
392
+ "normalized": false,
393
+ "rstrip": false,
394
+ "single_word": false,
395
+ "special": true
396
+ },
397
+ "151692": {
398
+ "content": "|<EXTRA_TOKENS_27>|",
399
+ "lstrip": false,
400
+ "normalized": false,
401
+ "rstrip": false,
402
+ "single_word": false,
403
+ "special": true
404
+ },
405
+ "151693": {
406
+ "content": "|<EXTRA_TOKENS_28>|",
407
+ "lstrip": false,
408
+ "normalized": false,
409
+ "rstrip": false,
410
+ "single_word": false,
411
+ "special": true
412
+ },
413
+ "151694": {
414
+ "content": "|<EXTRA_TOKENS_29>|",
415
+ "lstrip": false,
416
+ "normalized": false,
417
+ "rstrip": false,
418
+ "single_word": false,
419
+ "special": true
420
+ },
421
+ "151695": {
422
+ "content": "|<EXTRA_TOKENS_30>|",
423
+ "lstrip": false,
424
+ "normalized": false,
425
+ "rstrip": false,
426
+ "single_word": false,
427
+ "special": true
428
+ },
429
+ "151696": {
430
+ "content": "|<EXTRA_TOKENS_31>|",
431
+ "lstrip": false,
432
+ "normalized": false,
433
+ "rstrip": false,
434
+ "single_word": false,
435
+ "special": true
436
+ },
437
+ "151697": {
438
+ "content": "|<EXTRA_TOKENS_32>|",
439
+ "lstrip": false,
440
+ "normalized": false,
441
+ "rstrip": false,
442
+ "single_word": false,
443
+ "special": true
444
+ },
445
+ "151698": {
446
+ "content": "|<EXTRA_TOKENS_33>|",
447
+ "lstrip": false,
448
+ "normalized": false,
449
+ "rstrip": false,
450
+ "single_word": false,
451
+ "special": true
452
+ },
453
+ "151699": {
454
+ "content": "|<EXTRA_TOKENS_34>|",
455
+ "lstrip": false,
456
+ "normalized": false,
457
+ "rstrip": false,
458
+ "single_word": false,
459
+ "special": true
460
+ },
461
+ "151700": {
462
+ "content": "|<EXTRA_TOKENS_35>|",
463
+ "lstrip": false,
464
+ "normalized": false,
465
+ "rstrip": false,
466
+ "single_word": false,
467
+ "special": true
468
+ },
469
+ "151701": {
470
+ "content": "|<EXTRA_TOKENS_36>|",
471
+ "lstrip": false,
472
+ "normalized": false,
473
+ "rstrip": false,
474
+ "single_word": false,
475
+ "special": true
476
+ },
477
+ "151702": {
478
+ "content": "|<EXTRA_TOKENS_37>|",
479
+ "lstrip": false,
480
+ "normalized": false,
481
+ "rstrip": false,
482
+ "single_word": false,
483
+ "special": true
484
+ },
485
+ "151703": {
486
+ "content": "|<EXTRA_TOKENS_38>|",
487
+ "lstrip": false,
488
+ "normalized": false,
489
+ "rstrip": false,
490
+ "single_word": false,
491
+ "special": true
492
+ },
493
+ "151704": {
494
+ "content": "|<EXTRA_TOKENS_39>|",
495
+ "lstrip": false,
496
+ "normalized": false,
497
+ "rstrip": false,
498
+ "single_word": false,
499
+ "special": true
500
+ },
501
+ "151705": {
502
+ "content": "|<EXTRA_TOKENS_40>|",
503
+ "lstrip": false,
504
+ "normalized": false,
505
+ "rstrip": false,
506
+ "single_word": false,
507
+ "special": true
508
+ },
509
+ "151706": {
510
+ "content": "|<EXTRA_TOKENS_41>|",
511
+ "lstrip": false,
512
+ "normalized": false,
513
+ "rstrip": false,
514
+ "single_word": false,
515
+ "special": true
516
+ },
517
+ "151707": {
518
+ "content": "|<EXTRA_TOKENS_42>|",
519
+ "lstrip": false,
520
+ "normalized": false,
521
+ "rstrip": false,
522
+ "single_word": false,
523
+ "special": true
524
+ },
525
+ "151708": {
526
+ "content": "|<EXTRA_TOKENS_43>|",
527
+ "lstrip": false,
528
+ "normalized": false,
529
+ "rstrip": false,
530
+ "single_word": false,
531
+ "special": true
532
+ },
533
+ "151709": {
534
+ "content": "|<EXTRA_TOKENS_44>|",
535
+ "lstrip": false,
536
+ "normalized": false,
537
+ "rstrip": false,
538
+ "single_word": false,
539
+ "special": true
540
+ },
541
+ "151710": {
542
+ "content": "|<EXTRA_TOKENS_45>|",
543
+ "lstrip": false,
544
+ "normalized": false,
545
+ "rstrip": false,
546
+ "single_word": false,
547
+ "special": true
548
+ },
549
+ "151711": {
550
+ "content": "|<EXTRA_TOKENS_46>|",
551
+ "lstrip": false,
552
+ "normalized": false,
553
+ "rstrip": false,
554
+ "single_word": false,
555
+ "special": true
556
+ },
557
+ "151712": {
558
+ "content": "|<EXTRA_TOKENS_47>|",
559
+ "lstrip": false,
560
+ "normalized": false,
561
+ "rstrip": false,
562
+ "single_word": false,
563
+ "special": true
564
+ },
565
+ "151713": {
566
+ "content": "|<EXTRA_TOKENS_48>|",
567
+ "lstrip": false,
568
+ "normalized": false,
569
+ "rstrip": false,
570
+ "single_word": false,
571
+ "special": true
572
+ },
573
+ "151714": {
574
+ "content": "|<EXTRA_TOKENS_49>|",
575
+ "lstrip": false,
576
+ "normalized": false,
577
+ "rstrip": false,
578
+ "single_word": false,
579
+ "special": true
580
+ },
581
+ "151715": {
582
+ "content": "|<EXTRA_TOKENS_50>|",
583
+ "lstrip": false,
584
+ "normalized": false,
585
+ "rstrip": false,
586
+ "single_word": false,
587
+ "special": true
588
+ },
589
+ "151716": {
590
+ "content": "|<EXTRA_TOKENS_51>|",
591
+ "lstrip": false,
592
+ "normalized": false,
593
+ "rstrip": false,
594
+ "single_word": false,
595
+ "special": true
596
+ },
597
+ "151717": {
598
+ "content": "|<EXTRA_TOKENS_52>|",
599
+ "lstrip": false,
600
+ "normalized": false,
601
+ "rstrip": false,
602
+ "single_word": false,
603
+ "special": true
604
+ },
605
+ "151718": {
606
+ "content": "|<EXTRA_TOKENS_53>|",
607
+ "lstrip": false,
608
+ "normalized": false,
609
+ "rstrip": false,
610
+ "single_word": false,
611
+ "special": true
612
+ },
613
+ "151719": {
614
+ "content": "|<EXTRA_TOKENS_54>|",
615
+ "lstrip": false,
616
+ "normalized": false,
617
+ "rstrip": false,
618
+ "single_word": false,
619
+ "special": true
620
+ },
621
+ "151720": {
622
+ "content": "|<EXTRA_TOKENS_55>|",
623
+ "lstrip": false,
624
+ "normalized": false,
625
+ "rstrip": false,
626
+ "single_word": false,
627
+ "special": true
628
+ },
629
+ "151721": {
630
+ "content": "|<EXTRA_TOKENS_56>|",
631
+ "lstrip": false,
632
+ "normalized": false,
633
+ "rstrip": false,
634
+ "single_word": false,
635
+ "special": true
636
+ },
637
+ "151722": {
638
+ "content": "|<EXTRA_TOKENS_57>|",
639
+ "lstrip": false,
640
+ "normalized": false,
641
+ "rstrip": false,
642
+ "single_word": false,
643
+ "special": true
644
+ },
645
+ "151723": {
646
+ "content": "|<EXTRA_TOKENS_58>|",
647
+ "lstrip": false,
648
+ "normalized": false,
649
+ "rstrip": false,
650
+ "single_word": false,
651
+ "special": true
652
+ },
653
+ "151724": {
654
+ "content": "|<EXTRA_TOKENS_59>|",
655
+ "lstrip": false,
656
+ "normalized": false,
657
+ "rstrip": false,
658
+ "single_word": false,
659
+ "special": true
660
+ },
661
+ "151725": {
662
+ "content": "|<EXTRA_TOKENS_60>|",
663
+ "lstrip": false,
664
+ "normalized": false,
665
+ "rstrip": false,
666
+ "single_word": false,
667
+ "special": true
668
+ },
669
+ "151726": {
670
+ "content": "|<EXTRA_TOKENS_61>|",
671
+ "lstrip": false,
672
+ "normalized": false,
673
+ "rstrip": false,
674
+ "single_word": false,
675
+ "special": true
676
+ },
677
+ "151727": {
678
+ "content": "|<EXTRA_TOKENS_62>|",
679
+ "lstrip": false,
680
+ "normalized": false,
681
+ "rstrip": false,
682
+ "single_word": false,
683
+ "special": true
684
+ },
685
+ "151728": {
686
+ "content": "|<EXTRA_TOKENS_63>|",
687
+ "lstrip": false,
688
+ "normalized": false,
689
+ "rstrip": false,
690
+ "single_word": false,
691
+ "special": true
692
+ },
693
+ "151729": {
694
+ "content": "|<EXTRA_TOKENS_64>|",
695
+ "lstrip": false,
696
+ "normalized": false,
697
+ "rstrip": false,
698
+ "single_word": false,
699
+ "special": true
700
+ },
701
+ "151730": {
702
+ "content": "|<EXTRA_TOKENS_65>|",
703
+ "lstrip": false,
704
+ "normalized": false,
705
+ "rstrip": false,
706
+ "single_word": false,
707
+ "special": true
708
+ },
709
+ "151731": {
710
+ "content": "|<EXTRA_TOKENS_66>|",
711
+ "lstrip": false,
712
+ "normalized": false,
713
+ "rstrip": false,
714
+ "single_word": false,
715
+ "special": true
716
+ },
717
+ "151732": {
718
+ "content": "|<EXTRA_TOKENS_67>|",
719
+ "lstrip": false,
720
+ "normalized": false,
721
+ "rstrip": false,
722
+ "single_word": false,
723
+ "special": true
724
+ },
725
+ "151733": {
726
+ "content": "|<EXTRA_TOKENS_68>|",
727
+ "lstrip": false,
728
+ "normalized": false,
729
+ "rstrip": false,
730
+ "single_word": false,
731
+ "special": true
732
+ },
733
+ "151734": {
734
+ "content": "|<EXTRA_TOKENS_69>|",
735
+ "lstrip": false,
736
+ "normalized": false,
737
+ "rstrip": false,
738
+ "single_word": false,
739
+ "special": true
740
+ },
741
+ "151735": {
742
+ "content": "|<EXTRA_TOKENS_70>|",
743
+ "lstrip": false,
744
+ "normalized": false,
745
+ "rstrip": false,
746
+ "single_word": false,
747
+ "special": true
748
+ },
749
+ "151736": {
750
+ "content": "|<EXTRA_TOKENS_71>|",
751
+ "lstrip": false,
752
+ "normalized": false,
753
+ "rstrip": false,
754
+ "single_word": false,
755
+ "special": true
756
+ },
757
+ "151737": {
758
+ "content": "|<EXTRA_TOKENS_72>|",
759
+ "lstrip": false,
760
+ "normalized": false,
761
+ "rstrip": false,
762
+ "single_word": false,
763
+ "special": true
764
+ },
765
+ "151738": {
766
+ "content": "|<EXTRA_TOKENS_73>|",
767
+ "lstrip": false,
768
+ "normalized": false,
769
+ "rstrip": false,
770
+ "single_word": false,
771
+ "special": true
772
+ },
773
+ "151739": {
774
+ "content": "|<EXTRA_TOKENS_74>|",
775
+ "lstrip": false,
776
+ "normalized": false,
777
+ "rstrip": false,
778
+ "single_word": false,
779
+ "special": true
780
+ },
781
+ "151740": {
782
+ "content": "|<EXTRA_TOKENS_75>|",
783
+ "lstrip": false,
784
+ "normalized": false,
785
+ "rstrip": false,
786
+ "single_word": false,
787
+ "special": true
788
+ },
789
+ "151741": {
790
+ "content": "|<EXTRA_TOKENS_76>|",
791
+ "lstrip": false,
792
+ "normalized": false,
793
+ "rstrip": false,
794
+ "single_word": false,
795
+ "special": true
796
+ },
797
+ "151742": {
798
+ "content": "|<EXTRA_TOKENS_77>|",
799
+ "lstrip": false,
800
+ "normalized": false,
801
+ "rstrip": false,
802
+ "single_word": false,
803
+ "special": true
804
+ },
805
+ "151743": {
806
+ "content": "|<EXTRA_TOKENS_78>|",
807
+ "lstrip": false,
808
+ "normalized": false,
809
+ "rstrip": false,
810
+ "single_word": false,
811
+ "special": true
812
+ },
813
+ "151744": {
814
+ "content": "|<EXTRA_TOKENS_79>|",
815
+ "lstrip": false,
816
+ "normalized": false,
817
+ "rstrip": false,
818
+ "single_word": false,
819
+ "special": true
820
+ },
821
+ "151745": {
822
+ "content": "|<EXTRA_TOKENS_80>|",
823
+ "lstrip": false,
824
+ "normalized": false,
825
+ "rstrip": false,
826
+ "single_word": false,
827
+ "special": true
828
+ },
829
+ "151746": {
830
+ "content": "|<EXTRA_TOKENS_81>|",
831
+ "lstrip": false,
832
+ "normalized": false,
833
+ "rstrip": false,
834
+ "single_word": false,
835
+ "special": true
836
+ },
837
+ "151747": {
838
+ "content": "|<EXTRA_TOKENS_82>|",
839
+ "lstrip": false,
840
+ "normalized": false,
841
+ "rstrip": false,
842
+ "single_word": false,
843
+ "special": true
844
+ },
845
+ "151748": {
846
+ "content": "|<EXTRA_TOKENS_83>|",
847
+ "lstrip": false,
848
+ "normalized": false,
849
+ "rstrip": false,
850
+ "single_word": false,
851
+ "special": true
852
+ },
853
+ "151749": {
854
+ "content": "|<EXTRA_TOKENS_84>|",
855
+ "lstrip": false,
856
+ "normalized": false,
857
+ "rstrip": false,
858
+ "single_word": false,
859
+ "special": true
860
+ },
861
+ "151750": {
862
+ "content": "|<EXTRA_TOKENS_85>|",
863
+ "lstrip": false,
864
+ "normalized": false,
865
+ "rstrip": false,
866
+ "single_word": false,
867
+ "special": true
868
+ },
869
+ "151751": {
870
+ "content": "|<EXTRA_TOKENS_86>|",
871
+ "lstrip": false,
872
+ "normalized": false,
873
+ "rstrip": false,
874
+ "single_word": false,
875
+ "special": true
876
+ },
877
+ "151752": {
878
+ "content": "|<EXTRA_TOKENS_87>|",
879
+ "lstrip": false,
880
+ "normalized": false,
881
+ "rstrip": false,
882
+ "single_word": false,
883
+ "special": true
884
+ },
885
+ "151753": {
886
+ "content": "|<EXTRA_TOKENS_88>|",
887
+ "lstrip": false,
888
+ "normalized": false,
889
+ "rstrip": false,
890
+ "single_word": false,
891
+ "special": true
892
+ },
893
+ "151754": {
894
+ "content": "|<EXTRA_TOKENS_89>|",
895
+ "lstrip": false,
896
+ "normalized": false,
897
+ "rstrip": false,
898
+ "single_word": false,
899
+ "special": true
900
+ },
901
+ "151755": {
902
+ "content": "|<EXTRA_TOKENS_90>|",
903
+ "lstrip": false,
904
+ "normalized": false,
905
+ "rstrip": false,
906
+ "single_word": false,
907
+ "special": true
908
+ },
909
+ "151756": {
910
+ "content": "|<EXTRA_TOKENS_91>|",
911
+ "lstrip": false,
912
+ "normalized": false,
913
+ "rstrip": false,
914
+ "single_word": false,
915
+ "special": true
916
+ },
917
+ "151757": {
918
+ "content": "|<EXTRA_TOKENS_92>|",
919
+ "lstrip": false,
920
+ "normalized": false,
921
+ "rstrip": false,
922
+ "single_word": false,
923
+ "special": true
924
+ },
925
+ "151758": {
926
+ "content": "|<EXTRA_TOKENS_93>|",
927
+ "lstrip": false,
928
+ "normalized": false,
929
+ "rstrip": false,
930
+ "single_word": false,
931
+ "special": true
932
+ },
933
+ "151759": {
934
+ "content": "|<EXTRA_TOKENS_94>|",
935
+ "lstrip": false,
936
+ "normalized": false,
937
+ "rstrip": false,
938
+ "single_word": false,
939
+ "special": true
940
+ },
941
+ "151760": {
942
+ "content": "|<EXTRA_TOKENS_95>|",
943
+ "lstrip": false,
944
+ "normalized": false,
945
+ "rstrip": false,
946
+ "single_word": false,
947
+ "special": true
948
+ },
949
+ "151761": {
950
+ "content": "|<EXTRA_TOKENS_96>|",
951
+ "lstrip": false,
952
+ "normalized": false,
953
+ "rstrip": false,
954
+ "single_word": false,
955
+ "special": true
956
+ },
957
+ "151762": {
958
+ "content": "|<EXTRA_TOKENS_97>|",
959
+ "lstrip": false,
960
+ "normalized": false,
961
+ "rstrip": false,
962
+ "single_word": false,
963
+ "special": true
964
+ },
965
+ "151763": {
966
+ "content": "|<EXTRA_TOKENS_98>|",
967
+ "lstrip": false,
968
+ "normalized": false,
969
+ "rstrip": false,
970
+ "single_word": false,
971
+ "special": true
972
+ },
973
+ "151764": {
974
+ "content": "|<EXTRA_TOKENS_99>|",
975
+ "lstrip": false,
976
+ "normalized": false,
977
+ "rstrip": false,
978
+ "single_word": false,
979
+ "special": true
980
+ },
981
+ "151765": {
982
+ "content": "|<EXTRA_TOKENS_100>|",
983
+ "lstrip": false,
984
+ "normalized": false,
985
+ "rstrip": false,
986
+ "single_word": false,
987
+ "special": true
988
+ },
989
+ "151766": {
990
+ "content": "|<EXTRA_TOKENS_101>|",
991
+ "lstrip": false,
992
+ "normalized": false,
993
+ "rstrip": false,
994
+ "single_word": false,
995
+ "special": true
996
+ },
997
+ "151767": {
998
+ "content": "|<EXTRA_TOKENS_102>|",
999
+ "lstrip": false,
1000
+ "normalized": false,
1001
+ "rstrip": false,
1002
+ "single_word": false,
1003
+ "special": true
1004
+ },
1005
+ "151768": {
1006
+ "content": "|<EXTRA_TOKENS_103>|",
1007
+ "lstrip": false,
1008
+ "normalized": false,
1009
+ "rstrip": false,
1010
+ "single_word": false,
1011
+ "special": true
1012
+ },
1013
+ "151769": {
1014
+ "content": "|<EXTRA_TOKENS_104>|",
1015
+ "lstrip": false,
1016
+ "normalized": false,
1017
+ "rstrip": false,
1018
+ "single_word": false,
1019
+ "special": true
1020
+ },
1021
+ "151770": {
1022
+ "content": "|<EXTRA_TOKENS_105>|",
1023
+ "lstrip": false,
1024
+ "normalized": false,
1025
+ "rstrip": false,
1026
+ "single_word": false,
1027
+ "special": true
1028
+ },
1029
+ "151771": {
1030
+ "content": "|<EXTRA_TOKENS_106>|",
1031
+ "lstrip": false,
1032
+ "normalized": false,
1033
+ "rstrip": false,
1034
+ "single_word": false,
1035
+ "special": true
1036
+ },
1037
+ "151772": {
1038
+ "content": "|<EXTRA_TOKENS_107>|",
1039
+ "lstrip": false,
1040
+ "normalized": false,
1041
+ "rstrip": false,
1042
+ "single_word": false,
1043
+ "special": true
1044
+ },
1045
+ "151773": {
1046
+ "content": "|<EXTRA_TOKENS_108>|",
1047
+ "lstrip": false,
1048
+ "normalized": false,
1049
+ "rstrip": false,
1050
+ "single_word": false,
1051
+ "special": true
1052
+ },
1053
+ "151774": {
1054
+ "content": "|<EXTRA_TOKENS_109>|",
1055
+ "lstrip": false,
1056
+ "normalized": false,
1057
+ "rstrip": false,
1058
+ "single_word": false,
1059
+ "special": true
1060
+ },
1061
+ "151775": {
1062
+ "content": "|<EXTRA_TOKENS_110>|",
1063
+ "lstrip": false,
1064
+ "normalized": false,
1065
+ "rstrip": false,
1066
+ "single_word": false,
1067
+ "special": true
1068
+ },
1069
+ "151776": {
1070
+ "content": "|<EXTRA_TOKENS_111>|",
1071
+ "lstrip": false,
1072
+ "normalized": false,
1073
+ "rstrip": false,
1074
+ "single_word": false,
1075
+ "special": true
1076
+ },
1077
+ "151777": {
1078
+ "content": "|<EXTRA_TOKENS_112>|",
1079
+ "lstrip": false,
1080
+ "normalized": false,
1081
+ "rstrip": false,
1082
+ "single_word": false,
1083
+ "special": true
1084
+ },
1085
+ "151778": {
1086
+ "content": "|<EXTRA_TOKENS_113>|",
1087
+ "lstrip": false,
1088
+ "normalized": false,
1089
+ "rstrip": false,
1090
+ "single_word": false,
1091
+ "special": true
1092
+ },
1093
+ "151779": {
1094
+ "content": "|<EXTRA_TOKENS_114>|",
1095
+ "lstrip": false,
1096
+ "normalized": false,
1097
+ "rstrip": false,
1098
+ "single_word": false,
1099
+ "special": true
1100
+ },
1101
+ "151780": {
1102
+ "content": "|<EXTRA_TOKENS_115>|",
1103
+ "lstrip": false,
1104
+ "normalized": false,
1105
+ "rstrip": false,
1106
+ "single_word": false,
1107
+ "special": true
1108
+ },
1109
+ "151781": {
1110
+ "content": "|<EXTRA_TOKENS_116>|",
1111
+ "lstrip": false,
1112
+ "normalized": false,
1113
+ "rstrip": false,
1114
+ "single_word": false,
1115
+ "special": true
1116
+ },
1117
+ "151782": {
1118
+ "content": "|<EXTRA_TOKENS_117>|",
1119
+ "lstrip": false,
1120
+ "normalized": false,
1121
+ "rstrip": false,
1122
+ "single_word": false,
1123
+ "special": true
1124
+ },
1125
+ "151783": {
1126
+ "content": "|<EXTRA_TOKENS_118>|",
1127
+ "lstrip": false,
1128
+ "normalized": false,
1129
+ "rstrip": false,
1130
+ "single_word": false,
1131
+ "special": true
1132
+ },
1133
+ "151784": {
1134
+ "content": "|<EXTRA_TOKENS_119>|",
1135
+ "lstrip": false,
1136
+ "normalized": false,
1137
+ "rstrip": false,
1138
+ "single_word": false,
1139
+ "special": true
1140
+ },
1141
+ "151785": {
1142
+ "content": "|<EXTRA_TOKENS_120>|",
1143
+ "lstrip": false,
1144
+ "normalized": false,
1145
+ "rstrip": false,
1146
+ "single_word": false,
1147
+ "special": true
1148
+ },
1149
+ "151786": {
1150
+ "content": "|<EXTRA_TOKENS_121>|",
1151
+ "lstrip": false,
1152
+ "normalized": false,
1153
+ "rstrip": false,
1154
+ "single_word": false,
1155
+ "special": true
1156
+ },
1157
+ "151787": {
1158
+ "content": "|<EXTRA_TOKENS_122>|",
1159
+ "lstrip": false,
1160
+ "normalized": false,
1161
+ "rstrip": false,
1162
+ "single_word": false,
1163
+ "special": true
1164
+ },
1165
+ "151788": {
1166
+ "content": "|<EXTRA_TOKENS_123>|",
1167
+ "lstrip": false,
1168
+ "normalized": false,
1169
+ "rstrip": false,
1170
+ "single_word": false,
1171
+ "special": true
1172
+ },
1173
+ "151789": {
1174
+ "content": "|<EXTRA_TOKENS_124>|",
1175
+ "lstrip": false,
1176
+ "normalized": false,
1177
+ "rstrip": false,
1178
+ "single_word": false,
1179
+ "special": true
1180
+ },
1181
+ "151790": {
1182
+ "content": "|<EXTRA_TOKENS_125>|",
1183
+ "lstrip": false,
1184
+ "normalized": false,
1185
+ "rstrip": false,
1186
+ "single_word": false,
1187
+ "special": true
1188
+ },
1189
+ "151791": {
1190
+ "content": "|<EXTRA_TOKENS_126>|",
1191
+ "lstrip": false,
1192
+ "normalized": false,
1193
+ "rstrip": false,
1194
+ "single_word": false,
1195
+ "special": true
1196
+ },
1197
+ "151792": {
1198
+ "content": "|<EXTRA_TOKENS_127>|",
1199
+ "lstrip": false,
1200
+ "normalized": false,
1201
+ "rstrip": false,
1202
+ "single_word": false,
1203
+ "special": true
1204
+ },
1205
+ "151793": {
1206
+ "content": "|<EXTRA_TOKENS_128>|",
1207
+ "lstrip": false,
1208
+ "normalized": false,
1209
+ "rstrip": false,
1210
+ "single_word": false,
1211
+ "special": true
1212
+ },
1213
+ "151794": {
1214
+ "content": "|<EXTRA_TOKENS_129>|",
1215
+ "lstrip": false,
1216
+ "normalized": false,
1217
+ "rstrip": false,
1218
+ "single_word": false,
1219
+ "special": true
1220
+ },
1221
+ "151795": {
1222
+ "content": "|<EXTRA_TOKENS_130>|",
1223
+ "lstrip": false,
1224
+ "normalized": false,
1225
+ "rstrip": false,
1226
+ "single_word": false,
1227
+ "special": true
1228
+ },
1229
+ "151796": {
1230
+ "content": "|<EXTRA_TOKENS_131>|",
1231
+ "lstrip": false,
1232
+ "normalized": false,
1233
+ "rstrip": false,
1234
+ "single_word": false,
1235
+ "special": true
1236
+ },
1237
+ "151797": {
1238
+ "content": "|<EXTRA_TOKENS_132>|",
1239
+ "lstrip": false,
1240
+ "normalized": false,
1241
+ "rstrip": false,
1242
+ "single_word": false,
1243
+ "special": true
1244
+ },
1245
+ "151798": {
1246
+ "content": "|<EXTRA_TOKENS_133>|",
1247
+ "lstrip": false,
1248
+ "normalized": false,
1249
+ "rstrip": false,
1250
+ "single_word": false,
1251
+ "special": true
1252
+ },
1253
+ "151799": {
1254
+ "content": "|<EXTRA_TOKENS_134>|",
1255
+ "lstrip": false,
1256
+ "normalized": false,
1257
+ "rstrip": false,
1258
+ "single_word": false,
1259
+ "special": true
1260
+ },
1261
+ "151800": {
1262
+ "content": "|<EXTRA_TOKENS_135>|",
1263
+ "lstrip": false,
1264
+ "normalized": false,
1265
+ "rstrip": false,
1266
+ "single_word": false,
1267
+ "special": true
1268
+ },
1269
+ "151801": {
1270
+ "content": "|<EXTRA_TOKENS_136>|",
1271
+ "lstrip": false,
1272
+ "normalized": false,
1273
+ "rstrip": false,
1274
+ "single_word": false,
1275
+ "special": true
1276
+ },
1277
+ "151802": {
1278
+ "content": "|<EXTRA_TOKENS_137>|",
1279
+ "lstrip": false,
1280
+ "normalized": false,
1281
+ "rstrip": false,
1282
+ "single_word": false,
1283
+ "special": true
1284
+ },
1285
+ "151803": {
1286
+ "content": "|<EXTRA_TOKENS_138>|",
1287
+ "lstrip": false,
1288
+ "normalized": false,
1289
+ "rstrip": false,
1290
+ "single_word": false,
1291
+ "special": true
1292
+ },
1293
+ "151804": {
1294
+ "content": "|<EXTRA_TOKENS_139>|",
1295
+ "lstrip": false,
1296
+ "normalized": false,
1297
+ "rstrip": false,
1298
+ "single_word": false,
1299
+ "special": true
1300
+ },
1301
+ "151805": {
1302
+ "content": "|<EXTRA_TOKENS_140>|",
1303
+ "lstrip": false,
1304
+ "normalized": false,
1305
+ "rstrip": false,
1306
+ "single_word": false,
1307
+ "special": true
1308
+ },
1309
+ "151806": {
1310
+ "content": "|<EXTRA_TOKENS_141>|",
1311
+ "lstrip": false,
1312
+ "normalized": false,
1313
+ "rstrip": false,
1314
+ "single_word": false,
1315
+ "special": true
1316
+ },
1317
+ "151807": {
1318
+ "content": "|<EXTRA_TOKENS_142>|",
1319
+ "lstrip": false,
1320
+ "normalized": false,
1321
+ "rstrip": false,
1322
+ "single_word": false,
1323
+ "special": true
1324
+ },
1325
+ "151808": {
1326
+ "content": "|<EXTRA_TOKENS_143>|",
1327
+ "lstrip": false,
1328
+ "normalized": false,
1329
+ "rstrip": false,
1330
+ "single_word": false,
1331
+ "special": true
1332
+ },
1333
+ "151809": {
1334
+ "content": "|<EXTRA_TOKENS_144>|",
1335
+ "lstrip": false,
1336
+ "normalized": false,
1337
+ "rstrip": false,
1338
+ "single_word": false,
1339
+ "special": true
1340
+ },
1341
+ "151810": {
1342
+ "content": "|<EXTRA_TOKENS_145>|",
1343
+ "lstrip": false,
1344
+ "normalized": false,
1345
+ "rstrip": false,
1346
+ "single_word": false,
1347
+ "special": true
1348
+ },
1349
+ "151811": {
1350
+ "content": "|<EXTRA_TOKENS_146>|",
1351
+ "lstrip": false,
1352
+ "normalized": false,
1353
+ "rstrip": false,
1354
+ "single_word": false,
1355
+ "special": true
1356
+ },
1357
+ "151812": {
1358
+ "content": "|<EXTRA_TOKENS_147>|",
1359
+ "lstrip": false,
1360
+ "normalized": false,
1361
+ "rstrip": false,
1362
+ "single_word": false,
1363
+ "special": true
1364
+ },
1365
+ "151813": {
1366
+ "content": "|<EXTRA_TOKENS_148>|",
1367
+ "lstrip": false,
1368
+ "normalized": false,
1369
+ "rstrip": false,
1370
+ "single_word": false,
1371
+ "special": true
1372
+ },
1373
+ "151814": {
1374
+ "content": "|<EXTRA_TOKENS_149>|",
1375
+ "lstrip": false,
1376
+ "normalized": false,
1377
+ "rstrip": false,
1378
+ "single_word": false,
1379
+ "special": true
1380
+ },
1381
+ "151815": {
1382
+ "content": "|<EXTRA_TOKENS_150>|",
1383
+ "lstrip": false,
1384
+ "normalized": false,
1385
+ "rstrip": false,
1386
+ "single_word": false,
1387
+ "special": true
1388
+ },
1389
+ "151816": {
1390
+ "content": "|<EXTRA_TOKENS_151>|",
1391
+ "lstrip": false,
1392
+ "normalized": false,
1393
+ "rstrip": false,
1394
+ "single_word": false,
1395
+ "special": true
1396
+ },
1397
+ "151817": {
1398
+ "content": "|<EXTRA_TOKENS_152>|",
1399
+ "lstrip": false,
1400
+ "normalized": false,
1401
+ "rstrip": false,
1402
+ "single_word": false,
1403
+ "special": true
1404
+ },
1405
+ "151818": {
1406
+ "content": "|<EXTRA_TOKENS_153>|",
1407
+ "lstrip": false,
1408
+ "normalized": false,
1409
+ "rstrip": false,
1410
+ "single_word": false,
1411
+ "special": true
1412
+ },
1413
+ "151819": {
1414
+ "content": "|<EXTRA_TOKENS_154>|",
1415
+ "lstrip": false,
1416
+ "normalized": false,
1417
+ "rstrip": false,
1418
+ "single_word": false,
1419
+ "special": true
1420
+ },
1421
+ "151820": {
1422
+ "content": "|<EXTRA_TOKENS_155>|",
1423
+ "lstrip": false,
1424
+ "normalized": false,
1425
+ "rstrip": false,
1426
+ "single_word": false,
1427
+ "special": true
1428
+ },
1429
+ "151821": {
1430
+ "content": "|<EXTRA_TOKENS_156>|",
1431
+ "lstrip": false,
1432
+ "normalized": false,
1433
+ "rstrip": false,
1434
+ "single_word": false,
1435
+ "special": true
1436
+ },
1437
+ "151822": {
1438
+ "content": "|<EXTRA_TOKENS_157>|",
1439
+ "lstrip": false,
1440
+ "normalized": false,
1441
+ "rstrip": false,
1442
+ "single_word": false,
1443
+ "special": true
1444
+ },
1445
+ "151823": {
1446
+ "content": "|<EXTRA_TOKENS_158>|",
1447
+ "lstrip": false,
1448
+ "normalized": false,
1449
+ "rstrip": false,
1450
+ "single_word": false,
1451
+ "special": true
1452
+ },
1453
+ "151824": {
1454
+ "content": "|<EXTRA_TOKENS_159>|",
1455
+ "lstrip": false,
1456
+ "normalized": false,
1457
+ "rstrip": false,
1458
+ "single_word": false,
1459
+ "special": true
1460
+ },
1461
+ "151825": {
1462
+ "content": "|<EXTRA_TOKENS_160>|",
1463
+ "lstrip": false,
1464
+ "normalized": false,
1465
+ "rstrip": false,
1466
+ "single_word": false,
1467
+ "special": true
1468
+ },
1469
+ "151826": {
1470
+ "content": "|<EXTRA_TOKENS_161>|",
1471
+ "lstrip": false,
1472
+ "normalized": false,
1473
+ "rstrip": false,
1474
+ "single_word": false,
1475
+ "special": true
1476
+ },
1477
+ "151827": {
1478
+ "content": "|<EXTRA_TOKENS_162>|",
1479
+ "lstrip": false,
1480
+ "normalized": false,
1481
+ "rstrip": false,
1482
+ "single_word": false,
1483
+ "special": true
1484
+ },
1485
+ "151828": {
1486
+ "content": "|<EXTRA_TOKENS_163>|",
1487
+ "lstrip": false,
1488
+ "normalized": false,
1489
+ "rstrip": false,
1490
+ "single_word": false,
1491
+ "special": true
1492
+ },
1493
+ "151829": {
1494
+ "content": "|<EXTRA_TOKENS_164>|",
1495
+ "lstrip": false,
1496
+ "normalized": false,
1497
+ "rstrip": false,
1498
+ "single_word": false,
1499
+ "special": true
1500
+ },
1501
+ "151830": {
1502
+ "content": "|<EXTRA_TOKENS_165>|",
1503
+ "lstrip": false,
1504
+ "normalized": false,
1505
+ "rstrip": false,
1506
+ "single_word": false,
1507
+ "special": true
1508
+ },
1509
+ "151831": {
1510
+ "content": "|<EXTRA_TOKENS_166>|",
1511
+ "lstrip": false,
1512
+ "normalized": false,
1513
+ "rstrip": false,
1514
+ "single_word": false,
1515
+ "special": true
1516
+ },
1517
+ "151832": {
1518
+ "content": "|<EXTRA_TOKENS_167>|",
1519
+ "lstrip": false,
1520
+ "normalized": false,
1521
+ "rstrip": false,
1522
+ "single_word": false,
1523
+ "special": true
1524
+ },
1525
+ "151833": {
1526
+ "content": "|<EXTRA_TOKENS_168>|",
1527
+ "lstrip": false,
1528
+ "normalized": false,
1529
+ "rstrip": false,
1530
+ "single_word": false,
1531
+ "special": true
1532
+ },
1533
+ "151834": {
1534
+ "content": "|<EXTRA_TOKENS_169>|",
1535
+ "lstrip": false,
1536
+ "normalized": false,
1537
+ "rstrip": false,
1538
+ "single_word": false,
1539
+ "special": true
1540
+ },
1541
+ "151835": {
1542
+ "content": "|<EXTRA_TOKENS_170>|",
1543
+ "lstrip": false,
1544
+ "normalized": false,
1545
+ "rstrip": false,
1546
+ "single_word": false,
1547
+ "special": true
1548
+ },
1549
+ "151836": {
1550
+ "content": "|<EXTRA_TOKENS_171>|",
1551
+ "lstrip": false,
1552
+ "normalized": false,
1553
+ "rstrip": false,
1554
+ "single_word": false,
1555
+ "special": true
1556
+ },
1557
+ "151837": {
1558
+ "content": "|<EXTRA_TOKENS_172>|",
1559
+ "lstrip": false,
1560
+ "normalized": false,
1561
+ "rstrip": false,
1562
+ "single_word": false,
1563
+ "special": true
1564
+ },
1565
+ "151838": {
1566
+ "content": "|<EXTRA_TOKENS_173>|",
1567
+ "lstrip": false,
1568
+ "normalized": false,
1569
+ "rstrip": false,
1570
+ "single_word": false,
1571
+ "special": true
1572
+ },
1573
+ "151839": {
1574
+ "content": "|<EXTRA_TOKENS_174>|",
1575
+ "lstrip": false,
1576
+ "normalized": false,
1577
+ "rstrip": false,
1578
+ "single_word": false,
1579
+ "special": true
1580
+ },
1581
+ "151840": {
1582
+ "content": "|<EXTRA_TOKENS_175>|",
1583
+ "lstrip": false,
1584
+ "normalized": false,
1585
+ "rstrip": false,
1586
+ "single_word": false,
1587
+ "special": true
1588
+ },
1589
+ "151841": {
1590
+ "content": "|<EXTRA_TOKENS_176>|",
1591
+ "lstrip": false,
1592
+ "normalized": false,
1593
+ "rstrip": false,
1594
+ "single_word": false,
1595
+ "special": true
1596
+ },
1597
+ "151842": {
1598
+ "content": "|<EXTRA_TOKENS_177>|",
1599
+ "lstrip": false,
1600
+ "normalized": false,
1601
+ "rstrip": false,
1602
+ "single_word": false,
1603
+ "special": true
1604
+ },
1605
+ "151843": {
1606
+ "content": "|<EXTRA_TOKENS_178>|",
1607
+ "lstrip": false,
1608
+ "normalized": false,
1609
+ "rstrip": false,
1610
+ "single_word": false,
1611
+ "special": true
1612
+ },
1613
+ "151844": {
1614
+ "content": "|<EXTRA_TOKENS_179>|",
1615
+ "lstrip": false,
1616
+ "normalized": false,
1617
+ "rstrip": false,
1618
+ "single_word": false,
1619
+ "special": true
1620
+ },
1621
+ "151845": {
1622
+ "content": "|<EXTRA_TOKENS_180>|",
1623
+ "lstrip": false,
1624
+ "normalized": false,
1625
+ "rstrip": false,
1626
+ "single_word": false,
1627
+ "special": true
1628
+ },
1629
+ "151846": {
1630
+ "content": "|<EXTRA_TOKENS_181>|",
1631
+ "lstrip": false,
1632
+ "normalized": false,
1633
+ "rstrip": false,
1634
+ "single_word": false,
1635
+ "special": true
1636
+ },
1637
+ "151847": {
1638
+ "content": "|<EXTRA_TOKENS_182>|",
1639
+ "lstrip": false,
1640
+ "normalized": false,
1641
+ "rstrip": false,
1642
+ "single_word": false,
1643
+ "special": true
1644
+ },
1645
+ "151848": {
1646
+ "content": "|<EXTRA_TOKENS_183>|",
1647
+ "lstrip": false,
1648
+ "normalized": false,
1649
+ "rstrip": false,
1650
+ "single_word": false,
1651
+ "special": true
1652
+ },
1653
+ "151849": {
1654
+ "content": "|<EXTRA_TOKENS_184>|",
1655
+ "lstrip": false,
1656
+ "normalized": false,
1657
+ "rstrip": false,
1658
+ "single_word": false,
1659
+ "special": true
1660
+ },
1661
+ "151850": {
1662
+ "content": "|<EXTRA_TOKENS_185>|",
1663
+ "lstrip": false,
1664
+ "normalized": false,
1665
+ "rstrip": false,
1666
+ "single_word": false,
1667
+ "special": true
1668
+ },
1669
+ "151851": {
1670
+ "content": "|<EXTRA_TOKENS_186>|",
1671
+ "lstrip": false,
1672
+ "normalized": false,
1673
+ "rstrip": false,
1674
+ "single_word": false,
1675
+ "special": true
1676
+ },
1677
+ "151852": {
1678
+ "content": "|<EXTRA_TOKENS_187>|",
1679
+ "lstrip": false,
1680
+ "normalized": false,
1681
+ "rstrip": false,
1682
+ "single_word": false,
1683
+ "special": true
1684
+ },
1685
+ "151853": {
1686
+ "content": "|<EXTRA_TOKENS_188>|",
1687
+ "lstrip": false,
1688
+ "normalized": false,
1689
+ "rstrip": false,
1690
+ "single_word": false,
1691
+ "special": true
1692
+ },
1693
+ "151854": {
1694
+ "content": "|<EXTRA_TOKENS_189>|",
1695
+ "lstrip": false,
1696
+ "normalized": false,
1697
+ "rstrip": false,
1698
+ "single_word": false,
1699
+ "special": true
1700
+ },
1701
+ "151855": {
1702
+ "content": "|<EXTRA_TOKENS_190>|",
1703
+ "lstrip": false,
1704
+ "normalized": false,
1705
+ "rstrip": false,
1706
+ "single_word": false,
1707
+ "special": true
1708
+ },
1709
+ "151856": {
1710
+ "content": "|<EXTRA_TOKENS_191>|",
1711
+ "lstrip": false,
1712
+ "normalized": false,
1713
+ "rstrip": false,
1714
+ "single_word": false,
1715
+ "special": true
1716
+ },
1717
+ "151857": {
1718
+ "content": "|<EXTRA_TOKENS_192>|",
1719
+ "lstrip": false,
1720
+ "normalized": false,
1721
+ "rstrip": false,
1722
+ "single_word": false,
1723
+ "special": true
1724
+ },
1725
+ "151858": {
1726
+ "content": "|<EXTRA_TOKENS_193>|",
1727
+ "lstrip": false,
1728
+ "normalized": false,
1729
+ "rstrip": false,
1730
+ "single_word": false,
1731
+ "special": true
1732
+ },
1733
+ "151859": {
1734
+ "content": "|<EXTRA_TOKENS_194>|",
1735
+ "lstrip": false,
1736
+ "normalized": false,
1737
+ "rstrip": false,
1738
+ "single_word": false,
1739
+ "special": true
1740
+ },
1741
+ "151860": {
1742
+ "content": "|<EXTRA_TOKENS_195>|",
1743
+ "lstrip": false,
1744
+ "normalized": false,
1745
+ "rstrip": false,
1746
+ "single_word": false,
1747
+ "special": true
1748
+ },
1749
+ "151861": {
1750
+ "content": "|<EXTRA_TOKENS_196>|",
1751
+ "lstrip": false,
1752
+ "normalized": false,
1753
+ "rstrip": false,
1754
+ "single_word": false,
1755
+ "special": true
1756
+ },
1757
+ "151862": {
1758
+ "content": "|<EXTRA_TOKENS_197>|",
1759
+ "lstrip": false,
1760
+ "normalized": false,
1761
+ "rstrip": false,
1762
+ "single_word": false,
1763
+ "special": true
1764
+ },
1765
+ "151863": {
1766
+ "content": "|<EXTRA_TOKENS_198>|",
1767
+ "lstrip": false,
1768
+ "normalized": false,
1769
+ "rstrip": false,
1770
+ "single_word": false,
1771
+ "special": true
1772
+ },
1773
+ "151864": {
1774
+ "content": "|<EXTRA_TOKENS_199>|",
1775
+ "lstrip": false,
1776
+ "normalized": false,
1777
+ "rstrip": false,
1778
+ "single_word": false,
1779
+ "special": true
1780
+ },
1781
+ "151865": {
1782
+ "content": "|<EXTRA_TOKENS_200>|",
1783
+ "lstrip": false,
1784
+ "normalized": false,
1785
+ "rstrip": false,
1786
+ "single_word": false,
1787
+ "special": true
1788
+ },
1789
+ "151866": {
1790
+ "content": "|<EXTRA_TOKENS_201>|",
1791
+ "lstrip": false,
1792
+ "normalized": false,
1793
+ "rstrip": false,
1794
+ "single_word": false,
1795
+ "special": true
1796
+ },
1797
+ "151867": {
1798
+ "content": "|<EXTRA_TOKENS_202>|",
1799
+ "lstrip": false,
1800
+ "normalized": false,
1801
+ "rstrip": false,
1802
+ "single_word": false,
1803
+ "special": true
1804
+ },
1805
+ "151868": {
1806
+ "content": "|<EXTRA_TOKENS_203>|",
1807
+ "lstrip": false,
1808
+ "normalized": false,
1809
+ "rstrip": false,
1810
+ "single_word": false,
1811
+ "special": true
1812
+ },
1813
+ "151869": {
1814
+ "content": "|<EXTRA_TOKENS_204>|",
1815
+ "lstrip": false,
1816
+ "normalized": false,
1817
+ "rstrip": false,
1818
+ "single_word": false,
1819
+ "special": true
1820
+ },
1821
+ "151870": {
1822
+ "content": "|<EXTRA_TOKENS_205>|",
1823
+ "lstrip": false,
1824
+ "normalized": false,
1825
+ "rstrip": false,
1826
+ "single_word": false,
1827
+ "special": true
1828
+ },
1829
+ "151871": {
1830
+ "content": "|<EXTRA_TOKENS_206>|",
1831
+ "lstrip": false,
1832
+ "normalized": false,
1833
+ "rstrip": false,
1834
+ "single_word": false,
1835
+ "special": true
1836
+ },
1837
+ "151872": {
1838
+ "content": "|<EXTRA_TOKENS_207>|",
1839
+ "lstrip": false,
1840
+ "normalized": false,
1841
+ "rstrip": false,
1842
+ "single_word": false,
1843
+ "special": true
1844
+ },
1845
+ "151873": {
1846
+ "content": "|<EXTRA_TOKENS_208>|",
1847
+ "lstrip": false,
1848
+ "normalized": false,
1849
+ "rstrip": false,
1850
+ "single_word": false,
1851
+ "special": true
1852
+ },
1853
+ "151874": {
1854
+ "content": "|<EXTRA_TOKENS_209>|",
1855
+ "lstrip": false,
1856
+ "normalized": false,
1857
+ "rstrip": false,
1858
+ "single_word": false,
1859
+ "special": true
1860
+ },
1861
+ "151875": {
1862
+ "content": "|<EXTRA_TOKENS_210>|",
1863
+ "lstrip": false,
1864
+ "normalized": false,
1865
+ "rstrip": false,
1866
+ "single_word": false,
1867
+ "special": true
1868
+ },
1869
+ "151876": {
1870
+ "content": "|<EXTRA_TOKENS_211>|",
1871
+ "lstrip": false,
1872
+ "normalized": false,
1873
+ "rstrip": false,
1874
+ "single_word": false,
1875
+ "special": true
1876
+ },
1877
+ "151877": {
1878
+ "content": "|<EXTRA_TOKENS_212>|",
1879
+ "lstrip": false,
1880
+ "normalized": false,
1881
+ "rstrip": false,
1882
+ "single_word": false,
1883
+ "special": true
1884
+ },
1885
+ "151878": {
1886
+ "content": "|<EXTRA_TOKENS_213>|",
1887
+ "lstrip": false,
1888
+ "normalized": false,
1889
+ "rstrip": false,
1890
+ "single_word": false,
1891
+ "special": true
1892
+ },
1893
+ "151879": {
1894
+ "content": "|<EXTRA_TOKENS_214>|",
1895
+ "lstrip": false,
1896
+ "normalized": false,
1897
+ "rstrip": false,
1898
+ "single_word": false,
1899
+ "special": true
1900
+ },
1901
+ "151880": {
1902
+ "content": "|<EXTRA_TOKENS_215>|",
1903
+ "lstrip": false,
1904
+ "normalized": false,
1905
+ "rstrip": false,
1906
+ "single_word": false,
1907
+ "special": true
1908
+ },
1909
+ "151881": {
1910
+ "content": "|<EXTRA_TOKENS_216>|",
1911
+ "lstrip": false,
1912
+ "normalized": false,
1913
+ "rstrip": false,
1914
+ "single_word": false,
1915
+ "special": true
1916
+ },
1917
+ "151882": {
1918
+ "content": "|<EXTRA_TOKENS_217>|",
1919
+ "lstrip": false,
1920
+ "normalized": false,
1921
+ "rstrip": false,
1922
+ "single_word": false,
1923
+ "special": true
1924
+ },
1925
+ "151883": {
1926
+ "content": "|<EXTRA_TOKENS_218>|",
1927
+ "lstrip": false,
1928
+ "normalized": false,
1929
+ "rstrip": false,
1930
+ "single_word": false,
1931
+ "special": true
1932
+ },
1933
+ "151884": {
1934
+ "content": "|<EXTRA_TOKENS_219>|",
1935
+ "lstrip": false,
1936
+ "normalized": false,
1937
+ "rstrip": false,
1938
+ "single_word": false,
1939
+ "special": true
1940
+ },
1941
+ "151885": {
1942
+ "content": "|<EXTRA_TOKENS_220>|",
1943
+ "lstrip": false,
1944
+ "normalized": false,
1945
+ "rstrip": false,
1946
+ "single_word": false,
1947
+ "special": true
1948
+ },
1949
+ "151886": {
1950
+ "content": "|<EXTRA_TOKENS_221>|",
1951
+ "lstrip": false,
1952
+ "normalized": false,
1953
+ "rstrip": false,
1954
+ "single_word": false,
1955
+ "special": true
1956
+ },
1957
+ "151887": {
1958
+ "content": "|<EXTRA_TOKENS_222>|",
1959
+ "lstrip": false,
1960
+ "normalized": false,
1961
+ "rstrip": false,
1962
+ "single_word": false,
1963
+ "special": true
1964
+ },
1965
+ "151888": {
1966
+ "content": "|<EXTRA_TOKENS_223>|",
1967
+ "lstrip": false,
1968
+ "normalized": false,
1969
+ "rstrip": false,
1970
+ "single_word": false,
1971
+ "special": true
1972
+ },
1973
+ "151889": {
1974
+ "content": "|<EXTRA_TOKENS_224>|",
1975
+ "lstrip": false,
1976
+ "normalized": false,
1977
+ "rstrip": false,
1978
+ "single_word": false,
1979
+ "special": true
1980
+ },
1981
+ "151890": {
1982
+ "content": "|<EXTRA_TOKENS_225>|",
1983
+ "lstrip": false,
1984
+ "normalized": false,
1985
+ "rstrip": false,
1986
+ "single_word": false,
1987
+ "special": true
1988
+ },
1989
+ "151891": {
1990
+ "content": "|<EXTRA_TOKENS_226>|",
1991
+ "lstrip": false,
1992
+ "normalized": false,
1993
+ "rstrip": false,
1994
+ "single_word": false,
1995
+ "special": true
1996
+ },
1997
+ "151892": {
1998
+ "content": "|<EXTRA_TOKENS_227>|",
1999
+ "lstrip": false,
2000
+ "normalized": false,
2001
+ "rstrip": false,
2002
+ "single_word": false,
2003
+ "special": true
2004
+ },
2005
+ "151893": {
2006
+ "content": "|<EXTRA_TOKENS_228>|",
2007
+ "lstrip": false,
2008
+ "normalized": false,
2009
+ "rstrip": false,
2010
+ "single_word": false,
2011
+ "special": true
2012
+ },
2013
+ "151894": {
2014
+ "content": "|<EXTRA_TOKENS_229>|",
2015
+ "lstrip": false,
2016
+ "normalized": false,
2017
+ "rstrip": false,
2018
+ "single_word": false,
2019
+ "special": true
2020
+ },
2021
+ "151895": {
2022
+ "content": "|<EXTRA_TOKENS_230>|",
2023
+ "lstrip": false,
2024
+ "normalized": false,
2025
+ "rstrip": false,
2026
+ "single_word": false,
2027
+ "special": true
2028
+ },
2029
+ "151896": {
2030
+ "content": "|<EXTRA_TOKENS_231>|",
2031
+ "lstrip": false,
2032
+ "normalized": false,
2033
+ "rstrip": false,
2034
+ "single_word": false,
2035
+ "special": true
2036
+ },
2037
+ "151897": {
2038
+ "content": "|<EXTRA_TOKENS_232>|",
2039
+ "lstrip": false,
2040
+ "normalized": false,
2041
+ "rstrip": false,
2042
+ "single_word": false,
2043
+ "special": true
2044
+ },
2045
+ "151898": {
2046
+ "content": "|<EXTRA_TOKENS_233>|",
2047
+ "lstrip": false,
2048
+ "normalized": false,
2049
+ "rstrip": false,
2050
+ "single_word": false,
2051
+ "special": true
2052
+ },
2053
+ "151899": {
2054
+ "content": "|<EXTRA_TOKENS_234>|",
2055
+ "lstrip": false,
2056
+ "normalized": false,
2057
+ "rstrip": false,
2058
+ "single_word": false,
2059
+ "special": true
2060
+ },
2061
+ "151900": {
2062
+ "content": "|<EXTRA_TOKENS_235>|",
2063
+ "lstrip": false,
2064
+ "normalized": false,
2065
+ "rstrip": false,
2066
+ "single_word": false,
2067
+ "special": true
2068
+ },
2069
+ "151901": {
2070
+ "content": "|<EXTRA_TOKENS_236>|",
2071
+ "lstrip": false,
2072
+ "normalized": false,
2073
+ "rstrip": false,
2074
+ "single_word": false,
2075
+ "special": true
2076
+ },
2077
+ "151902": {
2078
+ "content": "|<EXTRA_TOKENS_237>|",
2079
+ "lstrip": false,
2080
+ "normalized": false,
2081
+ "rstrip": false,
2082
+ "single_word": false,
2083
+ "special": true
2084
+ },
2085
+ "151903": {
2086
+ "content": "|<EXTRA_TOKENS_238>|",
2087
+ "lstrip": false,
2088
+ "normalized": false,
2089
+ "rstrip": false,
2090
+ "single_word": false,
2091
+ "special": true
2092
+ },
2093
+ "151904": {
2094
+ "content": "|<EXTRA_TOKENS_239>|",
2095
+ "lstrip": false,
2096
+ "normalized": false,
2097
+ "rstrip": false,
2098
+ "single_word": false,
2099
+ "special": true
2100
+ },
2101
+ "151905": {
2102
+ "content": "|<EXTRA_TOKENS_240>|",
2103
+ "lstrip": false,
2104
+ "normalized": false,
2105
+ "rstrip": false,
2106
+ "single_word": false,
2107
+ "special": true
2108
+ },
2109
+ "151906": {
2110
+ "content": "|<EXTRA_TOKENS_241>|",
2111
+ "lstrip": false,
2112
+ "normalized": false,
2113
+ "rstrip": false,
2114
+ "single_word": false,
2115
+ "special": true
2116
+ },
2117
+ "151907": {
2118
+ "content": "|<EXTRA_TOKENS_242>|",
2119
+ "lstrip": false,
2120
+ "normalized": false,
2121
+ "rstrip": false,
2122
+ "single_word": false,
2123
+ "special": true
2124
+ },
2125
+ "151908": {
2126
+ "content": "|<EXTRA_TOKENS_243>|",
2127
+ "lstrip": false,
2128
+ "normalized": false,
2129
+ "rstrip": false,
2130
+ "single_word": false,
2131
+ "special": true
2132
+ },
2133
+ "151909": {
2134
+ "content": "|<EXTRA_TOKENS_244>|",
2135
+ "lstrip": false,
2136
+ "normalized": false,
2137
+ "rstrip": false,
2138
+ "single_word": false,
2139
+ "special": true
2140
+ },
2141
+ "151910": {
2142
+ "content": "|<EXTRA_TOKENS_245>|",
2143
+ "lstrip": false,
2144
+ "normalized": false,
2145
+ "rstrip": false,
2146
+ "single_word": false,
2147
+ "special": true
2148
+ },
2149
+ "151911": {
2150
+ "content": "|<EXTRA_TOKENS_246>|",
2151
+ "lstrip": false,
2152
+ "normalized": false,
2153
+ "rstrip": false,
2154
+ "single_word": false,
2155
+ "special": true
2156
+ },
2157
+ "151912": {
2158
+ "content": "|<EXTRA_TOKENS_247>|",
2159
+ "lstrip": false,
2160
+ "normalized": false,
2161
+ "rstrip": false,
2162
+ "single_word": false,
2163
+ "special": true
2164
+ },
2165
+ "151913": {
2166
+ "content": "|<EXTRA_TOKENS_248>|",
2167
+ "lstrip": false,
2168
+ "normalized": false,
2169
+ "rstrip": false,
2170
+ "single_word": false,
2171
+ "special": true
2172
+ },
2173
+ "151914": {
2174
+ "content": "|<EXTRA_TOKENS_249>|",
2175
+ "lstrip": false,
2176
+ "normalized": false,
2177
+ "rstrip": false,
2178
+ "single_word": false,
2179
+ "special": true
2180
+ },
2181
+ "151915": {
2182
+ "content": "|<EXTRA_TOKENS_250>|",
2183
+ "lstrip": false,
2184
+ "normalized": false,
2185
+ "rstrip": false,
2186
+ "single_word": false,
2187
+ "special": true
2188
+ },
2189
+ "151916": {
2190
+ "content": "|<EXTRA_TOKENS_251>|",
2191
+ "lstrip": false,
2192
+ "normalized": false,
2193
+ "rstrip": false,
2194
+ "single_word": false,
2195
+ "special": true
2196
+ },
2197
+ "151917": {
2198
+ "content": "|<EXTRA_TOKENS_252>|",
2199
+ "lstrip": false,
2200
+ "normalized": false,
2201
+ "rstrip": false,
2202
+ "single_word": false,
2203
+ "special": true
2204
+ },
2205
+ "151918": {
2206
+ "content": "|<EXTRA_TOKENS_253>|",
2207
+ "lstrip": false,
2208
+ "normalized": false,
2209
+ "rstrip": false,
2210
+ "single_word": false,
2211
+ "special": true
2212
+ },
2213
+ "151919": {
2214
+ "content": "|<EXTRA_TOKENS_254>|",
2215
+ "lstrip": false,
2216
+ "normalized": false,
2217
+ "rstrip": false,
2218
+ "single_word": false,
2219
+ "special": true
2220
+ },
2221
+ "151920": {
2222
+ "content": "|<EXTRA_TOKENS_255>|",
2223
+ "lstrip": false,
2224
+ "normalized": false,
2225
+ "rstrip": false,
2226
+ "single_word": false,
2227
+ "special": true
2228
+ },
2229
+ "151921": {
2230
+ "content": "|<EXTRA_TOKENS_256>|",
2231
+ "lstrip": false,
2232
+ "normalized": false,
2233
+ "rstrip": false,
2234
+ "single_word": false,
2235
+ "special": true
2236
+ },
2237
+ "151922": {
2238
+ "content": "|<EXTRA_TOKENS_257>|",
2239
+ "lstrip": false,
2240
+ "normalized": false,
2241
+ "rstrip": false,
2242
+ "single_word": false,
2243
+ "special": true
2244
+ },
2245
+ "151923": {
2246
+ "content": "|<EXTRA_TOKENS_258>|",
2247
+ "lstrip": false,
2248
+ "normalized": false,
2249
+ "rstrip": false,
2250
+ "single_word": false,
2251
+ "special": true
2252
+ },
2253
+ "151924": {
2254
+ "content": "|<EXTRA_TOKENS_259>|",
2255
+ "lstrip": false,
2256
+ "normalized": false,
2257
+ "rstrip": false,
2258
+ "single_word": false,
2259
+ "special": true
2260
+ },
2261
+ "151925": {
2262
+ "content": "|<EXTRA_TOKENS_260>|",
2263
+ "lstrip": false,
2264
+ "normalized": false,
2265
+ "rstrip": false,
2266
+ "single_word": false,
2267
+ "special": true
2268
+ },
2269
+ "151926": {
2270
+ "content": "|<EXTRA_TOKENS_261>|",
2271
+ "lstrip": false,
2272
+ "normalized": false,
2273
+ "rstrip": false,
2274
+ "single_word": false,
2275
+ "special": true
2276
+ },
2277
+ "151927": {
2278
+ "content": "|<EXTRA_TOKENS_262>|",
2279
+ "lstrip": false,
2280
+ "normalized": false,
2281
+ "rstrip": false,
2282
+ "single_word": false,
2283
+ "special": true
2284
+ },
2285
+ "151928": {
2286
+ "content": "|<EXTRA_TOKENS_263>|",
2287
+ "lstrip": false,
2288
+ "normalized": false,
2289
+ "rstrip": false,
2290
+ "single_word": false,
2291
+ "special": true
2292
+ },
2293
+ "151929": {
2294
+ "content": "|<EXTRA_TOKENS_264>|",
2295
+ "lstrip": false,
2296
+ "normalized": false,
2297
+ "rstrip": false,
2298
+ "single_word": false,
2299
+ "special": true
2300
+ },
2301
+ "151930": {
2302
+ "content": "|<EXTRA_TOKENS_265>|",
2303
+ "lstrip": false,
2304
+ "normalized": false,
2305
+ "rstrip": false,
2306
+ "single_word": false,
2307
+ "special": true
2308
+ },
2309
+ "151931": {
2310
+ "content": "|<EXTRA_TOKENS_266>|",
2311
+ "lstrip": false,
2312
+ "normalized": false,
2313
+ "rstrip": false,
2314
+ "single_word": false,
2315
+ "special": true
2316
+ },
2317
+ "151932": {
2318
+ "content": "|<EXTRA_TOKENS_267>|",
2319
+ "lstrip": false,
2320
+ "normalized": false,
2321
+ "rstrip": false,
2322
+ "single_word": false,
2323
+ "special": true
2324
+ },
2325
+ "151933": {
2326
+ "content": "|<EXTRA_TOKENS_268>|",
2327
+ "lstrip": false,
2328
+ "normalized": false,
2329
+ "rstrip": false,
2330
+ "single_word": false,
2331
+ "special": true
2332
+ },
2333
+ "151934": {
2334
+ "content": "|<EXTRA_TOKENS_269>|",
2335
+ "lstrip": false,
2336
+ "normalized": false,
2337
+ "rstrip": false,
2338
+ "single_word": false,
2339
+ "special": true
2340
+ },
2341
+ "151935": {
2342
+ "content": "|<EXTRA_TOKENS_270>|",
2343
+ "lstrip": false,
2344
+ "normalized": false,
2345
+ "rstrip": false,
2346
+ "single_word": false,
2347
+ "special": true
2348
+ },
2349
+ "151936": {
2350
+ "content": "|<EXTRA_TOKENS_271>|",
2351
+ "lstrip": false,
2352
+ "normalized": false,
2353
+ "rstrip": false,
2354
+ "single_word": false,
2355
+ "special": true
2356
+ },
2357
+ "151937": {
2358
+ "content": "|<EXTRA_TOKENS_272>|",
2359
+ "lstrip": false,
2360
+ "normalized": false,
2361
+ "rstrip": false,
2362
+ "single_word": false,
2363
+ "special": true
2364
+ },
2365
+ "151938": {
2366
+ "content": "|<EXTRA_TOKENS_273>|",
2367
+ "lstrip": false,
2368
+ "normalized": false,
2369
+ "rstrip": false,
2370
+ "single_word": false,
2371
+ "special": true
2372
+ },
2373
+ "151939": {
2374
+ "content": "|<EXTRA_TOKENS_274>|",
2375
+ "lstrip": false,
2376
+ "normalized": false,
2377
+ "rstrip": false,
2378
+ "single_word": false,
2379
+ "special": true
2380
+ },
2381
+ "151940": {
2382
+ "content": "|<EXTRA_TOKENS_275>|",
2383
+ "lstrip": false,
2384
+ "normalized": false,
2385
+ "rstrip": false,
2386
+ "single_word": false,
2387
+ "special": true
2388
+ },
2389
+ "151941": {
2390
+ "content": "|<EXTRA_TOKENS_276>|",
2391
+ "lstrip": false,
2392
+ "normalized": false,
2393
+ "rstrip": false,
2394
+ "single_word": false,
2395
+ "special": true
2396
+ },
2397
+ "151942": {
2398
+ "content": "|<EXTRA_TOKENS_277>|",
2399
+ "lstrip": false,
2400
+ "normalized": false,
2401
+ "rstrip": false,
2402
+ "single_word": false,
2403
+ "special": true
2404
+ },
2405
+ "151943": {
2406
+ "content": "|<EXTRA_TOKENS_278>|",
2407
+ "lstrip": false,
2408
+ "normalized": false,
2409
+ "rstrip": false,
2410
+ "single_word": false,
2411
+ "special": true
2412
+ },
2413
+ "151944": {
2414
+ "content": "|<EXTRA_TOKENS_279>|",
2415
+ "lstrip": false,
2416
+ "normalized": false,
2417
+ "rstrip": false,
2418
+ "single_word": false,
2419
+ "special": true
2420
+ },
2421
+ "151945": {
2422
+ "content": "|<EXTRA_TOKENS_280>|",
2423
+ "lstrip": false,
2424
+ "normalized": false,
2425
+ "rstrip": false,
2426
+ "single_word": false,
2427
+ "special": true
2428
+ },
2429
+ "151946": {
2430
+ "content": "|<EXTRA_TOKENS_281>|",
2431
+ "lstrip": false,
2432
+ "normalized": false,
2433
+ "rstrip": false,
2434
+ "single_word": false,
2435
+ "special": true
2436
+ },
2437
+ "151947": {
2438
+ "content": "|<EXTRA_TOKENS_282>|",
2439
+ "lstrip": false,
2440
+ "normalized": false,
2441
+ "rstrip": false,
2442
+ "single_word": false,
2443
+ "special": true
2444
+ },
2445
+ "151948": {
2446
+ "content": "|<EXTRA_TOKENS_283>|",
2447
+ "lstrip": false,
2448
+ "normalized": false,
2449
+ "rstrip": false,
2450
+ "single_word": false,
2451
+ "special": true
2452
+ },
2453
+ "151949": {
2454
+ "content": "|<EXTRA_TOKENS_284>|",
2455
+ "lstrip": false,
2456
+ "normalized": false,
2457
+ "rstrip": false,
2458
+ "single_word": false,
2459
+ "special": true
2460
+ },
2461
+ "151950": {
2462
+ "content": "|<EXTRA_TOKENS_285>|",
2463
+ "lstrip": false,
2464
+ "normalized": false,
2465
+ "rstrip": false,
2466
+ "single_word": false,
2467
+ "special": true
2468
+ },
2469
+ "151951": {
2470
+ "content": "|<EXTRA_TOKENS_286>|",
2471
+ "lstrip": false,
2472
+ "normalized": false,
2473
+ "rstrip": false,
2474
+ "single_word": false,
2475
+ "special": true
2476
+ },
2477
+ "151952": {
2478
+ "content": "|<EXTRA_TOKENS_287>|",
2479
+ "lstrip": false,
2480
+ "normalized": false,
2481
+ "rstrip": false,
2482
+ "single_word": false,
2483
+ "special": true
2484
+ },
2485
+ "151953": {
2486
+ "content": "|<EXTRA_TOKENS_288>|",
2487
+ "lstrip": false,
2488
+ "normalized": false,
2489
+ "rstrip": false,
2490
+ "single_word": false,
2491
+ "special": true
2492
+ },
2493
+ "151954": {
2494
+ "content": "|<EXTRA_TOKENS_289>|",
2495
+ "lstrip": false,
2496
+ "normalized": false,
2497
+ "rstrip": false,
2498
+ "single_word": false,
2499
+ "special": true
2500
+ },
2501
+ "151955": {
2502
+ "content": "|<EXTRA_TOKENS_290>|",
2503
+ "lstrip": false,
2504
+ "normalized": false,
2505
+ "rstrip": false,
2506
+ "single_word": false,
2507
+ "special": true
2508
+ },
2509
+ "151956": {
2510
+ "content": "|<EXTRA_TOKENS_291>|",
2511
+ "lstrip": false,
2512
+ "normalized": false,
2513
+ "rstrip": false,
2514
+ "single_word": false,
2515
+ "special": true
2516
+ },
2517
+ "151957": {
2518
+ "content": "|<EXTRA_TOKENS_292>|",
2519
+ "lstrip": false,
2520
+ "normalized": false,
2521
+ "rstrip": false,
2522
+ "single_word": false,
2523
+ "special": true
2524
+ },
2525
+ "151958": {
2526
+ "content": "|<EXTRA_TOKENS_293>|",
2527
+ "lstrip": false,
2528
+ "normalized": false,
2529
+ "rstrip": false,
2530
+ "single_word": false,
2531
+ "special": true
2532
+ },
2533
+ "151959": {
2534
+ "content": "|<EXTRA_TOKENS_294>|",
2535
+ "lstrip": false,
2536
+ "normalized": false,
2537
+ "rstrip": false,
2538
+ "single_word": false,
2539
+ "special": true
2540
+ },
2541
+ "151960": {
2542
+ "content": "|<EXTRA_TOKENS_295>|",
2543
+ "lstrip": false,
2544
+ "normalized": false,
2545
+ "rstrip": false,
2546
+ "single_word": false,
2547
+ "special": true
2548
+ },
2549
+ "151961": {
2550
+ "content": "|<EXTRA_TOKENS_296>|",
2551
+ "lstrip": false,
2552
+ "normalized": false,
2553
+ "rstrip": false,
2554
+ "single_word": false,
2555
+ "special": true
2556
+ },
2557
+ "151962": {
2558
+ "content": "|<EXTRA_TOKENS_297>|",
2559
+ "lstrip": false,
2560
+ "normalized": false,
2561
+ "rstrip": false,
2562
+ "single_word": false,
2563
+ "special": true
2564
+ },
2565
+ "151963": {
2566
+ "content": "|<EXTRA_TOKENS_298>|",
2567
+ "lstrip": false,
2568
+ "normalized": false,
2569
+ "rstrip": false,
2570
+ "single_word": false,
2571
+ "special": true
2572
+ },
2573
+ "151964": {
2574
+ "content": "|<EXTRA_TOKENS_299>|",
2575
+ "lstrip": false,
2576
+ "normalized": false,
2577
+ "rstrip": false,
2578
+ "single_word": false,
2579
+ "special": true
2580
+ },
2581
+ "151965": {
2582
+ "content": "|<EXTRA_TOKENS_300>|",
2583
+ "lstrip": false,
2584
+ "normalized": false,
2585
+ "rstrip": false,
2586
+ "single_word": false,
2587
+ "special": true
2588
+ },
2589
+ "151966": {
2590
+ "content": "|<EXTRA_TOKENS_301>|",
2591
+ "lstrip": false,
2592
+ "normalized": false,
2593
+ "rstrip": false,
2594
+ "single_word": false,
2595
+ "special": true
2596
+ },
2597
+ "151967": {
2598
+ "content": "|<EXTRA_TOKENS_302>|",
2599
+ "lstrip": false,
2600
+ "normalized": false,
2601
+ "rstrip": false,
2602
+ "single_word": false,
2603
+ "special": true
2604
+ },
2605
+ "151968": {
2606
+ "content": "|<EXTRA_TOKENS_303>|",
2607
+ "lstrip": false,
2608
+ "normalized": false,
2609
+ "rstrip": false,
2610
+ "single_word": false,
2611
+ "special": true
2612
+ },
2613
+ "151969": {
2614
+ "content": "|<EXTRA_TOKENS_304>|",
2615
+ "lstrip": false,
2616
+ "normalized": false,
2617
+ "rstrip": false,
2618
+ "single_word": false,
2619
+ "special": true
2620
+ },
2621
+ "151970": {
2622
+ "content": "|<EXTRA_TOKENS_305>|",
2623
+ "lstrip": false,
2624
+ "normalized": false,
2625
+ "rstrip": false,
2626
+ "single_word": false,
2627
+ "special": true
2628
+ },
2629
+ "151971": {
2630
+ "content": "|<EXTRA_TOKENS_306>|",
2631
+ "lstrip": false,
2632
+ "normalized": false,
2633
+ "rstrip": false,
2634
+ "single_word": false,
2635
+ "special": true
2636
+ },
2637
+ "151972": {
2638
+ "content": "|<EXTRA_TOKENS_307>|",
2639
+ "lstrip": false,
2640
+ "normalized": false,
2641
+ "rstrip": false,
2642
+ "single_word": false,
2643
+ "special": true
2644
+ },
2645
+ "151973": {
2646
+ "content": "|<EXTRA_TOKENS_308>|",
2647
+ "lstrip": false,
2648
+ "normalized": false,
2649
+ "rstrip": false,
2650
+ "single_word": false,
2651
+ "special": true
2652
+ },
2653
+ "151974": {
2654
+ "content": "|<EXTRA_TOKENS_309>|",
2655
+ "lstrip": false,
2656
+ "normalized": false,
2657
+ "rstrip": false,
2658
+ "single_word": false,
2659
+ "special": true
2660
+ },
2661
+ "151975": {
2662
+ "content": "|<EXTRA_TOKENS_310>|",
2663
+ "lstrip": false,
2664
+ "normalized": false,
2665
+ "rstrip": false,
2666
+ "single_word": false,
2667
+ "special": true
2668
+ },
2669
+ "151976": {
2670
+ "content": "|<EXTRA_TOKENS_311>|",
2671
+ "lstrip": false,
2672
+ "normalized": false,
2673
+ "rstrip": false,
2674
+ "single_word": false,
2675
+ "special": true
2676
+ },
2677
+ "151977": {
2678
+ "content": "|<EXTRA_TOKENS_312>|",
2679
+ "lstrip": false,
2680
+ "normalized": false,
2681
+ "rstrip": false,
2682
+ "single_word": false,
2683
+ "special": true
2684
+ },
2685
+ "151978": {
2686
+ "content": "|<EXTRA_TOKENS_313>|",
2687
+ "lstrip": false,
2688
+ "normalized": false,
2689
+ "rstrip": false,
2690
+ "single_word": false,
2691
+ "special": true
2692
+ },
2693
+ "151979": {
2694
+ "content": "|<EXTRA_TOKENS_314>|",
2695
+ "lstrip": false,
2696
+ "normalized": false,
2697
+ "rstrip": false,
2698
+ "single_word": false,
2699
+ "special": true
2700
+ },
2701
+ "151980": {
2702
+ "content": "|<EXTRA_TOKENS_315>|",
2703
+ "lstrip": false,
2704
+ "normalized": false,
2705
+ "rstrip": false,
2706
+ "single_word": false,
2707
+ "special": true
2708
+ },
2709
+ "151981": {
2710
+ "content": "|<EXTRA_TOKENS_316>|",
2711
+ "lstrip": false,
2712
+ "normalized": false,
2713
+ "rstrip": false,
2714
+ "single_word": false,
2715
+ "special": true
2716
+ },
2717
+ "151982": {
2718
+ "content": "|<EXTRA_TOKENS_317>|",
2719
+ "lstrip": false,
2720
+ "normalized": false,
2721
+ "rstrip": false,
2722
+ "single_word": false,
2723
+ "special": true
2724
+ },
2725
+ "151983": {
2726
+ "content": "|<EXTRA_TOKENS_318>|",
2727
+ "lstrip": false,
2728
+ "normalized": false,
2729
+ "rstrip": false,
2730
+ "single_word": false,
2731
+ "special": true
2732
+ },
2733
+ "151984": {
2734
+ "content": "|<EXTRA_TOKENS_319>|",
2735
+ "lstrip": false,
2736
+ "normalized": false,
2737
+ "rstrip": false,
2738
+ "single_word": false,
2739
+ "special": true
2740
+ },
2741
+ "151985": {
2742
+ "content": "|<EXTRA_TOKENS_320>|",
2743
+ "lstrip": false,
2744
+ "normalized": false,
2745
+ "rstrip": false,
2746
+ "single_word": false,
2747
+ "special": true
2748
+ },
2749
+ "151986": {
2750
+ "content": "|<EXTRA_TOKENS_321>|",
2751
+ "lstrip": false,
2752
+ "normalized": false,
2753
+ "rstrip": false,
2754
+ "single_word": false,
2755
+ "special": true
2756
+ },
2757
+ "151987": {
2758
+ "content": "|<EXTRA_TOKENS_322>|",
2759
+ "lstrip": false,
2760
+ "normalized": false,
2761
+ "rstrip": false,
2762
+ "single_word": false,
2763
+ "special": true
2764
+ },
2765
+ "151988": {
2766
+ "content": "|<EXTRA_TOKENS_323>|",
2767
+ "lstrip": false,
2768
+ "normalized": false,
2769
+ "rstrip": false,
2770
+ "single_word": false,
2771
+ "special": true
2772
+ },
2773
+ "151989": {
2774
+ "content": "|<EXTRA_TOKENS_324>|",
2775
+ "lstrip": false,
2776
+ "normalized": false,
2777
+ "rstrip": false,
2778
+ "single_word": false,
2779
+ "special": true
2780
+ },
2781
+ "151990": {
2782
+ "content": "|<EXTRA_TOKENS_325>|",
2783
+ "lstrip": false,
2784
+ "normalized": false,
2785
+ "rstrip": false,
2786
+ "single_word": false,
2787
+ "special": true
2788
+ },
2789
+ "151991": {
2790
+ "content": "|<EXTRA_TOKENS_326>|",
2791
+ "lstrip": false,
2792
+ "normalized": false,
2793
+ "rstrip": false,
2794
+ "single_word": false,
2795
+ "special": true
2796
+ },
2797
+ "151992": {
2798
+ "content": "|<EXTRA_TOKENS_327>|",
2799
+ "lstrip": false,
2800
+ "normalized": false,
2801
+ "rstrip": false,
2802
+ "single_word": false,
2803
+ "special": true
2804
+ },
2805
+ "151993": {
2806
+ "content": "|<EXTRA_TOKENS_328>|",
2807
+ "lstrip": false,
2808
+ "normalized": false,
2809
+ "rstrip": false,
2810
+ "single_word": false,
2811
+ "special": true
2812
+ },
2813
+ "151994": {
2814
+ "content": "|<EXTRA_TOKENS_329>|",
2815
+ "lstrip": false,
2816
+ "normalized": false,
2817
+ "rstrip": false,
2818
+ "single_word": false,
2819
+ "special": true
2820
+ },
2821
+ "151995": {
2822
+ "content": "|<EXTRA_TOKENS_330>|",
2823
+ "lstrip": false,
2824
+ "normalized": false,
2825
+ "rstrip": false,
2826
+ "single_word": false,
2827
+ "special": true
2828
+ },
2829
+ "151996": {
2830
+ "content": "|<EXTRA_TOKENS_331>|",
2831
+ "lstrip": false,
2832
+ "normalized": false,
2833
+ "rstrip": false,
2834
+ "single_word": false,
2835
+ "special": true
2836
+ },
2837
+ "151997": {
2838
+ "content": "|<EXTRA_TOKENS_332>|",
2839
+ "lstrip": false,
2840
+ "normalized": false,
2841
+ "rstrip": false,
2842
+ "single_word": false,
2843
+ "special": true
2844
+ },
2845
+ "151998": {
2846
+ "content": "|<EXTRA_TOKENS_333>|",
2847
+ "lstrip": false,
2848
+ "normalized": false,
2849
+ "rstrip": false,
2850
+ "single_word": false,
2851
+ "special": true
2852
+ },
2853
+ "151999": {
2854
+ "content": "|<EXTRA_TOKENS_334>|",
2855
+ "lstrip": false,
2856
+ "normalized": false,
2857
+ "rstrip": false,
2858
+ "single_word": false,
2859
+ "special": true
2860
+ },
2861
+ "152000": {
2862
+ "content": "|<EXTRA_TOKENS_335>|",
2863
+ "lstrip": false,
2864
+ "normalized": false,
2865
+ "rstrip": false,
2866
+ "single_word": false,
2867
+ "special": true
2868
+ },
2869
+ "152001": {
2870
+ "content": "|<EXTRA_TOKENS_336>|",
2871
+ "lstrip": false,
2872
+ "normalized": false,
2873
+ "rstrip": false,
2874
+ "single_word": false,
2875
+ "special": true
2876
+ },
2877
+ "152002": {
2878
+ "content": "|<EXTRA_TOKENS_337>|",
2879
+ "lstrip": false,
2880
+ "normalized": false,
2881
+ "rstrip": false,
2882
+ "single_word": false,
2883
+ "special": true
2884
+ },
2885
+ "152003": {
2886
+ "content": "|<EXTRA_TOKENS_338>|",
2887
+ "lstrip": false,
2888
+ "normalized": false,
2889
+ "rstrip": false,
2890
+ "single_word": false,
2891
+ "special": true
2892
+ },
2893
+ "152004": {
2894
+ "content": "|<EXTRA_TOKENS_339>|",
2895
+ "lstrip": false,
2896
+ "normalized": false,
2897
+ "rstrip": false,
2898
+ "single_word": false,
2899
+ "special": true
2900
+ },
2901
+ "152005": {
2902
+ "content": "|<EXTRA_TOKENS_340>|",
2903
+ "lstrip": false,
2904
+ "normalized": false,
2905
+ "rstrip": false,
2906
+ "single_word": false,
2907
+ "special": true
2908
+ },
2909
+ "152006": {
2910
+ "content": "|<EXTRA_TOKENS_341>|",
2911
+ "lstrip": false,
2912
+ "normalized": false,
2913
+ "rstrip": false,
2914
+ "single_word": false,
2915
+ "special": true
2916
+ },
2917
+ "152007": {
2918
+ "content": "|<EXTRA_TOKENS_342>|",
2919
+ "lstrip": false,
2920
+ "normalized": false,
2921
+ "rstrip": false,
2922
+ "single_word": false,
2923
+ "special": true
2924
+ },
2925
+ "152008": {
2926
+ "content": "|<EXTRA_TOKENS_343>|",
2927
+ "lstrip": false,
2928
+ "normalized": false,
2929
+ "rstrip": false,
2930
+ "single_word": false,
2931
+ "special": true
2932
+ },
2933
+ "152009": {
2934
+ "content": "|<EXTRA_TOKENS_344>|",
2935
+ "lstrip": false,
2936
+ "normalized": false,
2937
+ "rstrip": false,
2938
+ "single_word": false,
2939
+ "special": true
2940
+ },
2941
+ "152010": {
2942
+ "content": "|<EXTRA_TOKENS_345>|",
2943
+ "lstrip": false,
2944
+ "normalized": false,
2945
+ "rstrip": false,
2946
+ "single_word": false,
2947
+ "special": true
2948
+ },
2949
+ "152011": {
2950
+ "content": "|<EXTRA_TOKENS_346>|",
2951
+ "lstrip": false,
2952
+ "normalized": false,
2953
+ "rstrip": false,
2954
+ "single_word": false,
2955
+ "special": true
2956
+ },
2957
+ "152012": {
2958
+ "content": "|<EXTRA_TOKENS_347>|",
2959
+ "lstrip": false,
2960
+ "normalized": false,
2961
+ "rstrip": false,
2962
+ "single_word": false,
2963
+ "special": true
2964
+ },
2965
+ "152013": {
2966
+ "content": "|<EXTRA_TOKENS_348>|",
2967
+ "lstrip": false,
2968
+ "normalized": false,
2969
+ "rstrip": false,
2970
+ "single_word": false,
2971
+ "special": true
2972
+ },
2973
+ "152014": {
2974
+ "content": "|<EXTRA_TOKENS_349>|",
2975
+ "lstrip": false,
2976
+ "normalized": false,
2977
+ "rstrip": false,
2978
+ "single_word": false,
2979
+ "special": true
2980
+ },
2981
+ "152015": {
2982
+ "content": "|<EXTRA_TOKENS_350>|",
2983
+ "lstrip": false,
2984
+ "normalized": false,
2985
+ "rstrip": false,
2986
+ "single_word": false,
2987
+ "special": true
2988
+ },
2989
+ "152016": {
2990
+ "content": "|<EXTRA_TOKENS_351>|",
2991
+ "lstrip": false,
2992
+ "normalized": false,
2993
+ "rstrip": false,
2994
+ "single_word": false,
2995
+ "special": true
2996
+ },
2997
+ "152017": {
2998
+ "content": "|<EXTRA_TOKENS_352>|",
2999
+ "lstrip": false,
3000
+ "normalized": false,
3001
+ "rstrip": false,
3002
+ "single_word": false,
3003
+ "special": true
3004
+ },
3005
+ "152018": {
3006
+ "content": "|<EXTRA_TOKENS_353>|",
3007
+ "lstrip": false,
3008
+ "normalized": false,
3009
+ "rstrip": false,
3010
+ "single_word": false,
3011
+ "special": true
3012
+ },
3013
+ "152019": {
3014
+ "content": "|<EXTRA_TOKENS_354>|",
3015
+ "lstrip": false,
3016
+ "normalized": false,
3017
+ "rstrip": false,
3018
+ "single_word": false,
3019
+ "special": true
3020
+ },
3021
+ "152020": {
3022
+ "content": "|<EXTRA_TOKENS_355>|",
3023
+ "lstrip": false,
3024
+ "normalized": false,
3025
+ "rstrip": false,
3026
+ "single_word": false,
3027
+ "special": true
3028
+ },
3029
+ "152021": {
3030
+ "content": "|<EXTRA_TOKENS_356>|",
3031
+ "lstrip": false,
3032
+ "normalized": false,
3033
+ "rstrip": false,
3034
+ "single_word": false,
3035
+ "special": true
3036
+ },
3037
+ "152022": {
3038
+ "content": "|<EXTRA_TOKENS_357>|",
3039
+ "lstrip": false,
3040
+ "normalized": false,
3041
+ "rstrip": false,
3042
+ "single_word": false,
3043
+ "special": true
3044
+ },
3045
+ "152023": {
3046
+ "content": "|<EXTRA_TOKENS_358>|",
3047
+ "lstrip": false,
3048
+ "normalized": false,
3049
+ "rstrip": false,
3050
+ "single_word": false,
3051
+ "special": true
3052
+ },
3053
+ "152024": {
3054
+ "content": "|<EXTRA_TOKENS_359>|",
3055
+ "lstrip": false,
3056
+ "normalized": false,
3057
+ "rstrip": false,
3058
+ "single_word": false,
3059
+ "special": true
3060
+ },
3061
+ "152025": {
3062
+ "content": "|<EXTRA_TOKENS_360>|",
3063
+ "lstrip": false,
3064
+ "normalized": false,
3065
+ "rstrip": false,
3066
+ "single_word": false,
3067
+ "special": true
3068
+ },
3069
+ "152026": {
3070
+ "content": "|<EXTRA_TOKENS_361>|",
3071
+ "lstrip": false,
3072
+ "normalized": false,
3073
+ "rstrip": false,
3074
+ "single_word": false,
3075
+ "special": true
3076
+ },
3077
+ "152027": {
3078
+ "content": "|<EXTRA_TOKENS_362>|",
3079
+ "lstrip": false,
3080
+ "normalized": false,
3081
+ "rstrip": false,
3082
+ "single_word": false,
3083
+ "special": true
3084
+ },
3085
+ "152028": {
3086
+ "content": "|<EXTRA_TOKENS_363>|",
3087
+ "lstrip": false,
3088
+ "normalized": false,
3089
+ "rstrip": false,
3090
+ "single_word": false,
3091
+ "special": true
3092
+ },
3093
+ "152029": {
3094
+ "content": "|<EXTRA_TOKENS_364>|",
3095
+ "lstrip": false,
3096
+ "normalized": false,
3097
+ "rstrip": false,
3098
+ "single_word": false,
3099
+ "special": true
3100
+ },
3101
+ "152030": {
3102
+ "content": "|<EXTRA_TOKENS_365>|",
3103
+ "lstrip": false,
3104
+ "normalized": false,
3105
+ "rstrip": false,
3106
+ "single_word": false,
3107
+ "special": true
3108
+ },
3109
+ "152031": {
3110
+ "content": "|<EXTRA_TOKENS_366>|",
3111
+ "lstrip": false,
3112
+ "normalized": false,
3113
+ "rstrip": false,
3114
+ "single_word": false,
3115
+ "special": true
3116
+ },
3117
+ "152032": {
3118
+ "content": "|<EXTRA_TOKENS_367>|",
3119
+ "lstrip": false,
3120
+ "normalized": false,
3121
+ "rstrip": false,
3122
+ "single_word": false,
3123
+ "special": true
3124
+ },
3125
+ "152033": {
3126
+ "content": "|<EXTRA_TOKENS_368>|",
3127
+ "lstrip": false,
3128
+ "normalized": false,
3129
+ "rstrip": false,
3130
+ "single_word": false,
3131
+ "special": true
3132
+ },
3133
+ "152034": {
3134
+ "content": "|<EXTRA_TOKENS_369>|",
3135
+ "lstrip": false,
3136
+ "normalized": false,
3137
+ "rstrip": false,
3138
+ "single_word": false,
3139
+ "special": true
3140
+ },
3141
+ "152035": {
3142
+ "content": "|<EXTRA_TOKENS_370>|",
3143
+ "lstrip": false,
3144
+ "normalized": false,
3145
+ "rstrip": false,
3146
+ "single_word": false,
3147
+ "special": true
3148
+ },
3149
+ "152036": {
3150
+ "content": "|<EXTRA_TOKENS_371>|",
3151
+ "lstrip": false,
3152
+ "normalized": false,
3153
+ "rstrip": false,
3154
+ "single_word": false,
3155
+ "special": true
3156
+ },
3157
+ "152037": {
3158
+ "content": "|<EXTRA_TOKENS_372>|",
3159
+ "lstrip": false,
3160
+ "normalized": false,
3161
+ "rstrip": false,
3162
+ "single_word": false,
3163
+ "special": true
3164
+ },
3165
+ "152038": {
3166
+ "content": "|<EXTRA_TOKENS_373>|",
3167
+ "lstrip": false,
3168
+ "normalized": false,
3169
+ "rstrip": false,
3170
+ "single_word": false,
3171
+ "special": true
3172
+ },
3173
+ "152039": {
3174
+ "content": "|<EXTRA_TOKENS_374>|",
3175
+ "lstrip": false,
3176
+ "normalized": false,
3177
+ "rstrip": false,
3178
+ "single_word": false,
3179
+ "special": true
3180
+ },
3181
+ "152040": {
3182
+ "content": "|<EXTRA_TOKENS_375>|",
3183
+ "lstrip": false,
3184
+ "normalized": false,
3185
+ "rstrip": false,
3186
+ "single_word": false,
3187
+ "special": true
3188
+ },
3189
+ "152041": {
3190
+ "content": "|<EXTRA_TOKENS_376>|",
3191
+ "lstrip": false,
3192
+ "normalized": false,
3193
+ "rstrip": false,
3194
+ "single_word": false,
3195
+ "special": true
3196
+ },
3197
+ "152042": {
3198
+ "content": "|<EXTRA_TOKENS_377>|",
3199
+ "lstrip": false,
3200
+ "normalized": false,
3201
+ "rstrip": false,
3202
+ "single_word": false,
3203
+ "special": true
3204
+ },
3205
+ "152043": {
3206
+ "content": "|<EXTRA_TOKENS_378>|",
3207
+ "lstrip": false,
3208
+ "normalized": false,
3209
+ "rstrip": false,
3210
+ "single_word": false,
3211
+ "special": true
3212
+ },
3213
+ "152044": {
3214
+ "content": "|<EXTRA_TOKENS_379>|",
3215
+ "lstrip": false,
3216
+ "normalized": false,
3217
+ "rstrip": false,
3218
+ "single_word": false,
3219
+ "special": true
3220
+ },
3221
+ "152045": {
3222
+ "content": "|<EXTRA_TOKENS_380>|",
3223
+ "lstrip": false,
3224
+ "normalized": false,
3225
+ "rstrip": false,
3226
+ "single_word": false,
3227
+ "special": true
3228
+ },
3229
+ "152046": {
3230
+ "content": "|<EXTRA_TOKENS_381>|",
3231
+ "lstrip": false,
3232
+ "normalized": false,
3233
+ "rstrip": false,
3234
+ "single_word": false,
3235
+ "special": true
3236
+ },
3237
+ "152047": {
3238
+ "content": "|<EXTRA_TOKENS_382>|",
3239
+ "lstrip": false,
3240
+ "normalized": false,
3241
+ "rstrip": false,
3242
+ "single_word": false,
3243
+ "special": true
3244
+ },
3245
+ "152048": {
3246
+ "content": "|<EXTRA_TOKENS_383>|",
3247
+ "lstrip": false,
3248
+ "normalized": false,
3249
+ "rstrip": false,
3250
+ "single_word": false,
3251
+ "special": true
3252
+ },
3253
+ "152049": {
3254
+ "content": "|<EXTRA_TOKENS_384>|",
3255
+ "lstrip": false,
3256
+ "normalized": false,
3257
+ "rstrip": false,
3258
+ "single_word": false,
3259
+ "special": true
3260
+ },
3261
+ "152050": {
3262
+ "content": "|<EXTRA_TOKENS_385>|",
3263
+ "lstrip": false,
3264
+ "normalized": false,
3265
+ "rstrip": false,
3266
+ "single_word": false,
3267
+ "special": true
3268
+ },
3269
+ "152051": {
3270
+ "content": "|<EXTRA_TOKENS_386>|",
3271
+ "lstrip": false,
3272
+ "normalized": false,
3273
+ "rstrip": false,
3274
+ "single_word": false,
3275
+ "special": true
3276
+ },
3277
+ "152052": {
3278
+ "content": "|<EXTRA_TOKENS_387>|",
3279
+ "lstrip": false,
3280
+ "normalized": false,
3281
+ "rstrip": false,
3282
+ "single_word": false,
3283
+ "special": true
3284
+ },
3285
+ "152053": {
3286
+ "content": "|<EXTRA_TOKENS_388>|",
3287
+ "lstrip": false,
3288
+ "normalized": false,
3289
+ "rstrip": false,
3290
+ "single_word": false,
3291
+ "special": true
3292
+ },
3293
+ "152054": {
3294
+ "content": "|<EXTRA_TOKENS_389>|",
3295
+ "lstrip": false,
3296
+ "normalized": false,
3297
+ "rstrip": false,
3298
+ "single_word": false,
3299
+ "special": true
3300
+ },
3301
+ "152055": {
3302
+ "content": "|<EXTRA_TOKENS_390>|",
3303
+ "lstrip": false,
3304
+ "normalized": false,
3305
+ "rstrip": false,
3306
+ "single_word": false,
3307
+ "special": true
3308
+ },
3309
+ "152056": {
3310
+ "content": "|<EXTRA_TOKENS_391>|",
3311
+ "lstrip": false,
3312
+ "normalized": false,
3313
+ "rstrip": false,
3314
+ "single_word": false,
3315
+ "special": true
3316
+ },
3317
+ "152057": {
3318
+ "content": "|<EXTRA_TOKENS_392>|",
3319
+ "lstrip": false,
3320
+ "normalized": false,
3321
+ "rstrip": false,
3322
+ "single_word": false,
3323
+ "special": true
3324
+ },
3325
+ "152058": {
3326
+ "content": "|<EXTRA_TOKENS_393>|",
3327
+ "lstrip": false,
3328
+ "normalized": false,
3329
+ "rstrip": false,
3330
+ "single_word": false,
3331
+ "special": true
3332
+ },
3333
+ "152059": {
3334
+ "content": "|<EXTRA_TOKENS_394>|",
3335
+ "lstrip": false,
3336
+ "normalized": false,
3337
+ "rstrip": false,
3338
+ "single_word": false,
3339
+ "special": true
3340
+ },
3341
+ "152060": {
3342
+ "content": "|<EXTRA_TOKENS_395>|",
3343
+ "lstrip": false,
3344
+ "normalized": false,
3345
+ "rstrip": false,
3346
+ "single_word": false,
3347
+ "special": true
3348
+ },
3349
+ "152061": {
3350
+ "content": "|<EXTRA_TOKENS_396>|",
3351
+ "lstrip": false,
3352
+ "normalized": false,
3353
+ "rstrip": false,
3354
+ "single_word": false,
3355
+ "special": true
3356
+ },
3357
+ "152062": {
3358
+ "content": "|<EXTRA_TOKENS_397>|",
3359
+ "lstrip": false,
3360
+ "normalized": false,
3361
+ "rstrip": false,
3362
+ "single_word": false,
3363
+ "special": true
3364
+ },
3365
+ "152063": {
3366
+ "content": "|<EXTRA_TOKENS_398>|",
3367
+ "lstrip": false,
3368
+ "normalized": false,
3369
+ "rstrip": false,
3370
+ "single_word": false,
3371
+ "special": true
3372
+ },
3373
+ "152064": {
3374
+ "content": "<im_start>",
3375
+ "lstrip": false,
3376
+ "normalized": false,
3377
+ "rstrip": false,
3378
+ "single_word": false,
3379
+ "special": true
3380
+ },
3381
+ "152065": {
3382
+ "content": "<im_end>",
3383
+ "lstrip": false,
3384
+ "normalized": false,
3385
+ "rstrip": false,
3386
+ "single_word": false,
3387
+ "special": true
3388
+ },
3389
+ "152066": {
3390
+ "content": "<im_patch>",
3391
+ "lstrip": false,
3392
+ "normalized": false,
3393
+ "rstrip": false,
3394
+ "single_word": false,
3395
+ "special": true
3396
+ },
3397
+ "152067": {
3398
+ "content": "<im_col>",
3399
+ "lstrip": false,
3400
+ "normalized": false,
3401
+ "rstrip": false,
3402
+ "single_word": false,
3403
+ "special": true
3404
+ },
3405
+ "152068": {
3406
+ "content": "<|image|>",
3407
+ "lstrip": false,
3408
+ "normalized": false,
3409
+ "rstrip": false,
3410
+ "single_word": false,
3411
+ "special": true
3412
+ },
3413
+ "152069": {
3414
+ "content": "<im_low>",
3415
+ "lstrip": false,
3416
+ "normalized": false,
3417
+ "rstrip": false,
3418
+ "single_word": false,
3419
+ "special": true
3420
+ }
3421
+ },
3422
+ "additional_special_tokens": [
3423
+ "|<EXTRA_TOKENS_0>|",
3424
+ "|<EXTRA_TOKENS_1>|",
3425
+ "|<EXTRA_TOKENS_2>|",
3426
+ "|<EXTRA_TOKENS_3>|",
3427
+ "|<EXTRA_TOKENS_4>|",
3428
+ "|<EXTRA_TOKENS_5>|",
3429
+ "|<EXTRA_TOKENS_6>|",
3430
+ "|<EXTRA_TOKENS_7>|",
3431
+ "|<EXTRA_TOKENS_8>|",
3432
+ "|<EXTRA_TOKENS_9>|",
3433
+ "|<EXTRA_TOKENS_10>|",
3434
+ "|<EXTRA_TOKENS_11>|",
3435
+ "|<EXTRA_TOKENS_12>|",
3436
+ "|<EXTRA_TOKENS_13>|",
3437
+ "|<EXTRA_TOKENS_14>|",
3438
+ "|<EXTRA_TOKENS_15>|",
3439
+ "|<EXTRA_TOKENS_16>|",
3440
+ "|<EXTRA_TOKENS_17>|",
3441
+ "|<EXTRA_TOKENS_18>|",
3442
+ "|<EXTRA_TOKENS_19>|",
3443
+ "|<EXTRA_TOKENS_20>|",
3444
+ "|<EXTRA_TOKENS_21>|",
3445
+ "|<EXTRA_TOKENS_22>|",
3446
+ "|<EXTRA_TOKENS_23>|",
3447
+ "|<EXTRA_TOKENS_24>|",
3448
+ "|<EXTRA_TOKENS_25>|",
3449
+ "|<EXTRA_TOKENS_26>|",
3450
+ "|<EXTRA_TOKENS_27>|",
3451
+ "|<EXTRA_TOKENS_28>|",
3452
+ "|<EXTRA_TOKENS_29>|",
3453
+ "|<EXTRA_TOKENS_30>|",
3454
+ "|<EXTRA_TOKENS_31>|",
3455
+ "|<EXTRA_TOKENS_32>|",
3456
+ "|<EXTRA_TOKENS_33>|",
3457
+ "|<EXTRA_TOKENS_34>|",
3458
+ "|<EXTRA_TOKENS_35>|",
3459
+ "|<EXTRA_TOKENS_36>|",
3460
+ "|<EXTRA_TOKENS_37>|",
3461
+ "|<EXTRA_TOKENS_38>|",
3462
+ "|<EXTRA_TOKENS_39>|",
3463
+ "|<EXTRA_TOKENS_40>|",
3464
+ "|<EXTRA_TOKENS_41>|",
3465
+ "|<EXTRA_TOKENS_42>|",
3466
+ "|<EXTRA_TOKENS_43>|",
3467
+ "|<EXTRA_TOKENS_44>|",
3468
+ "|<EXTRA_TOKENS_45>|",
3469
+ "|<EXTRA_TOKENS_46>|",
3470
+ "|<EXTRA_TOKENS_47>|",
3471
+ "|<EXTRA_TOKENS_48>|",
3472
+ "|<EXTRA_TOKENS_49>|",
3473
+ "|<EXTRA_TOKENS_50>|",
3474
+ "|<EXTRA_TOKENS_51>|",
3475
+ "|<EXTRA_TOKENS_52>|",
3476
+ "|<EXTRA_TOKENS_53>|",
3477
+ "|<EXTRA_TOKENS_54>|",
3478
+ "|<EXTRA_TOKENS_55>|",
3479
+ "|<EXTRA_TOKENS_56>|",
3480
+ "|<EXTRA_TOKENS_57>|",
3481
+ "|<EXTRA_TOKENS_58>|",
3482
+ "|<EXTRA_TOKENS_59>|",
3483
+ "|<EXTRA_TOKENS_60>|",
3484
+ "|<EXTRA_TOKENS_61>|",
3485
+ "|<EXTRA_TOKENS_62>|",
3486
+ "|<EXTRA_TOKENS_63>|",
3487
+ "|<EXTRA_TOKENS_64>|",
3488
+ "|<EXTRA_TOKENS_65>|",
3489
+ "|<EXTRA_TOKENS_66>|",
3490
+ "|<EXTRA_TOKENS_67>|",
3491
+ "|<EXTRA_TOKENS_68>|",
3492
+ "|<EXTRA_TOKENS_69>|",
3493
+ "|<EXTRA_TOKENS_70>|",
3494
+ "|<EXTRA_TOKENS_71>|",
3495
+ "|<EXTRA_TOKENS_72>|",
3496
+ "|<EXTRA_TOKENS_73>|",
3497
+ "|<EXTRA_TOKENS_74>|",
3498
+ "|<EXTRA_TOKENS_75>|",
3499
+ "|<EXTRA_TOKENS_76>|",
3500
+ "|<EXTRA_TOKENS_77>|",
3501
+ "|<EXTRA_TOKENS_78>|",
3502
+ "|<EXTRA_TOKENS_79>|",
3503
+ "|<EXTRA_TOKENS_80>|",
3504
+ "|<EXTRA_TOKENS_81>|",
3505
+ "|<EXTRA_TOKENS_82>|",
3506
+ "|<EXTRA_TOKENS_83>|",
3507
+ "|<EXTRA_TOKENS_84>|",
3508
+ "|<EXTRA_TOKENS_85>|",
3509
+ "|<EXTRA_TOKENS_86>|",
3510
+ "|<EXTRA_TOKENS_87>|",
3511
+ "|<EXTRA_TOKENS_88>|",
3512
+ "|<EXTRA_TOKENS_89>|",
3513
+ "|<EXTRA_TOKENS_90>|",
3514
+ "|<EXTRA_TOKENS_91>|",
3515
+ "|<EXTRA_TOKENS_92>|",
3516
+ "|<EXTRA_TOKENS_93>|",
3517
+ "|<EXTRA_TOKENS_94>|",
3518
+ "|<EXTRA_TOKENS_95>|",
3519
+ "|<EXTRA_TOKENS_96>|",
3520
+ "|<EXTRA_TOKENS_97>|",
3521
+ "|<EXTRA_TOKENS_98>|",
3522
+ "|<EXTRA_TOKENS_99>|",
3523
+ "|<EXTRA_TOKENS_100>|",
3524
+ "|<EXTRA_TOKENS_101>|",
3525
+ "|<EXTRA_TOKENS_102>|",
3526
+ "|<EXTRA_TOKENS_103>|",
3527
+ "|<EXTRA_TOKENS_104>|",
3528
+ "|<EXTRA_TOKENS_105>|",
3529
+ "|<EXTRA_TOKENS_106>|",
3530
+ "|<EXTRA_TOKENS_107>|",
3531
+ "|<EXTRA_TOKENS_108>|",
3532
+ "|<EXTRA_TOKENS_109>|",
3533
+ "|<EXTRA_TOKENS_110>|",
3534
+ "|<EXTRA_TOKENS_111>|",
3535
+ "|<EXTRA_TOKENS_112>|",
3536
+ "|<EXTRA_TOKENS_113>|",
3537
+ "|<EXTRA_TOKENS_114>|",
3538
+ "|<EXTRA_TOKENS_115>|",
3539
+ "|<EXTRA_TOKENS_116>|",
3540
+ "|<EXTRA_TOKENS_117>|",
3541
+ "|<EXTRA_TOKENS_118>|",
3542
+ "|<EXTRA_TOKENS_119>|",
3543
+ "|<EXTRA_TOKENS_120>|",
3544
+ "|<EXTRA_TOKENS_121>|",
3545
+ "|<EXTRA_TOKENS_122>|",
3546
+ "|<EXTRA_TOKENS_123>|",
3547
+ "|<EXTRA_TOKENS_124>|",
3548
+ "|<EXTRA_TOKENS_125>|",
3549
+ "|<EXTRA_TOKENS_126>|",
3550
+ "|<EXTRA_TOKENS_127>|",
3551
+ "|<EXTRA_TOKENS_128>|",
3552
+ "|<EXTRA_TOKENS_129>|",
3553
+ "|<EXTRA_TOKENS_130>|",
3554
+ "|<EXTRA_TOKENS_131>|",
3555
+ "|<EXTRA_TOKENS_132>|",
3556
+ "|<EXTRA_TOKENS_133>|",
3557
+ "|<EXTRA_TOKENS_134>|",
3558
+ "|<EXTRA_TOKENS_135>|",
3559
+ "|<EXTRA_TOKENS_136>|",
3560
+ "|<EXTRA_TOKENS_137>|",
3561
+ "|<EXTRA_TOKENS_138>|",
3562
+ "|<EXTRA_TOKENS_139>|",
3563
+ "|<EXTRA_TOKENS_140>|",
3564
+ "|<EXTRA_TOKENS_141>|",
3565
+ "|<EXTRA_TOKENS_142>|",
3566
+ "|<EXTRA_TOKENS_143>|",
3567
+ "|<EXTRA_TOKENS_144>|",
3568
+ "|<EXTRA_TOKENS_145>|",
3569
+ "|<EXTRA_TOKENS_146>|",
3570
+ "|<EXTRA_TOKENS_147>|",
3571
+ "|<EXTRA_TOKENS_148>|",
3572
+ "|<EXTRA_TOKENS_149>|",
3573
+ "|<EXTRA_TOKENS_150>|",
3574
+ "|<EXTRA_TOKENS_151>|",
3575
+ "|<EXTRA_TOKENS_152>|",
3576
+ "|<EXTRA_TOKENS_153>|",
3577
+ "|<EXTRA_TOKENS_154>|",
3578
+ "|<EXTRA_TOKENS_155>|",
3579
+ "|<EXTRA_TOKENS_156>|",
3580
+ "|<EXTRA_TOKENS_157>|",
3581
+ "|<EXTRA_TOKENS_158>|",
3582
+ "|<EXTRA_TOKENS_159>|",
3583
+ "|<EXTRA_TOKENS_160>|",
3584
+ "|<EXTRA_TOKENS_161>|",
3585
+ "|<EXTRA_TOKENS_162>|",
3586
+ "|<EXTRA_TOKENS_163>|",
3587
+ "|<EXTRA_TOKENS_164>|",
3588
+ "|<EXTRA_TOKENS_165>|",
3589
+ "|<EXTRA_TOKENS_166>|",
3590
+ "|<EXTRA_TOKENS_167>|",
3591
+ "|<EXTRA_TOKENS_168>|",
3592
+ "|<EXTRA_TOKENS_169>|",
3593
+ "|<EXTRA_TOKENS_170>|",
3594
+ "|<EXTRA_TOKENS_171>|",
3595
+ "|<EXTRA_TOKENS_172>|",
3596
+ "|<EXTRA_TOKENS_173>|",
3597
+ "|<EXTRA_TOKENS_174>|",
3598
+ "|<EXTRA_TOKENS_175>|",
3599
+ "|<EXTRA_TOKENS_176>|",
3600
+ "|<EXTRA_TOKENS_177>|",
3601
+ "|<EXTRA_TOKENS_178>|",
3602
+ "|<EXTRA_TOKENS_179>|",
3603
+ "|<EXTRA_TOKENS_180>|",
3604
+ "|<EXTRA_TOKENS_181>|",
3605
+ "|<EXTRA_TOKENS_182>|",
3606
+ "|<EXTRA_TOKENS_183>|",
3607
+ "|<EXTRA_TOKENS_184>|",
3608
+ "|<EXTRA_TOKENS_185>|",
3609
+ "|<EXTRA_TOKENS_186>|",
3610
+ "|<EXTRA_TOKENS_187>|",
3611
+ "|<EXTRA_TOKENS_188>|",
3612
+ "|<EXTRA_TOKENS_189>|",
3613
+ "|<EXTRA_TOKENS_190>|",
3614
+ "|<EXTRA_TOKENS_191>|",
3615
+ "|<EXTRA_TOKENS_192>|",
3616
+ "|<EXTRA_TOKENS_193>|",
3617
+ "|<EXTRA_TOKENS_194>|",
3618
+ "|<EXTRA_TOKENS_195>|",
3619
+ "|<EXTRA_TOKENS_196>|",
3620
+ "|<EXTRA_TOKENS_197>|",
3621
+ "|<EXTRA_TOKENS_198>|",
3622
+ "|<EXTRA_TOKENS_199>|",
3623
+ "|<EXTRA_TOKENS_200>|",
3624
+ "|<EXTRA_TOKENS_201>|",
3625
+ "|<EXTRA_TOKENS_202>|",
3626
+ "|<EXTRA_TOKENS_203>|",
3627
+ "|<EXTRA_TOKENS_204>|",
3628
+ "|<EXTRA_TOKENS_205>|",
3629
+ "|<EXTRA_TOKENS_206>|",
3630
+ "|<EXTRA_TOKENS_207>|",
3631
+ "|<EXTRA_TOKENS_208>|",
3632
+ "|<EXTRA_TOKENS_209>|",
3633
+ "|<EXTRA_TOKENS_210>|",
3634
+ "|<EXTRA_TOKENS_211>|",
3635
+ "|<EXTRA_TOKENS_212>|",
3636
+ "|<EXTRA_TOKENS_213>|",
3637
+ "|<EXTRA_TOKENS_214>|",
3638
+ "|<EXTRA_TOKENS_215>|",
3639
+ "|<EXTRA_TOKENS_216>|",
3640
+ "|<EXTRA_TOKENS_217>|",
3641
+ "|<EXTRA_TOKENS_218>|",
3642
+ "|<EXTRA_TOKENS_219>|",
3643
+ "|<EXTRA_TOKENS_220>|",
3644
+ "|<EXTRA_TOKENS_221>|",
3645
+ "|<EXTRA_TOKENS_222>|",
3646
+ "|<EXTRA_TOKENS_223>|",
3647
+ "|<EXTRA_TOKENS_224>|",
3648
+ "|<EXTRA_TOKENS_225>|",
3649
+ "|<EXTRA_TOKENS_226>|",
3650
+ "|<EXTRA_TOKENS_227>|",
3651
+ "|<EXTRA_TOKENS_228>|",
3652
+ "|<EXTRA_TOKENS_229>|",
3653
+ "|<EXTRA_TOKENS_230>|",
3654
+ "|<EXTRA_TOKENS_231>|",
3655
+ "|<EXTRA_TOKENS_232>|",
3656
+ "|<EXTRA_TOKENS_233>|",
3657
+ "|<EXTRA_TOKENS_234>|",
3658
+ "|<EXTRA_TOKENS_235>|",
3659
+ "|<EXTRA_TOKENS_236>|",
3660
+ "|<EXTRA_TOKENS_237>|",
3661
+ "|<EXTRA_TOKENS_238>|",
3662
+ "|<EXTRA_TOKENS_239>|",
3663
+ "|<EXTRA_TOKENS_240>|",
3664
+ "|<EXTRA_TOKENS_241>|",
3665
+ "|<EXTRA_TOKENS_242>|",
3666
+ "|<EXTRA_TOKENS_243>|",
3667
+ "|<EXTRA_TOKENS_244>|",
3668
+ "|<EXTRA_TOKENS_245>|",
3669
+ "|<EXTRA_TOKENS_246>|",
3670
+ "|<EXTRA_TOKENS_247>|",
3671
+ "|<EXTRA_TOKENS_248>|",
3672
+ "|<EXTRA_TOKENS_249>|",
3673
+ "|<EXTRA_TOKENS_250>|",
3674
+ "|<EXTRA_TOKENS_251>|",
3675
+ "|<EXTRA_TOKENS_252>|",
3676
+ "|<EXTRA_TOKENS_253>|",
3677
+ "|<EXTRA_TOKENS_254>|",
3678
+ "|<EXTRA_TOKENS_255>|",
3679
+ "|<EXTRA_TOKENS_256>|",
3680
+ "|<EXTRA_TOKENS_257>|",
3681
+ "|<EXTRA_TOKENS_258>|",
3682
+ "|<EXTRA_TOKENS_259>|",
3683
+ "|<EXTRA_TOKENS_260>|",
3684
+ "|<EXTRA_TOKENS_261>|",
3685
+ "|<EXTRA_TOKENS_262>|",
3686
+ "|<EXTRA_TOKENS_263>|",
3687
+ "|<EXTRA_TOKENS_264>|",
3688
+ "|<EXTRA_TOKENS_265>|",
3689
+ "|<EXTRA_TOKENS_266>|",
3690
+ "|<EXTRA_TOKENS_267>|",
3691
+ "|<EXTRA_TOKENS_268>|",
3692
+ "|<EXTRA_TOKENS_269>|",
3693
+ "|<EXTRA_TOKENS_270>|",
3694
+ "|<EXTRA_TOKENS_271>|",
3695
+ "|<EXTRA_TOKENS_272>|",
3696
+ "|<EXTRA_TOKENS_273>|",
3697
+ "|<EXTRA_TOKENS_274>|",
3698
+ "|<EXTRA_TOKENS_275>|",
3699
+ "|<EXTRA_TOKENS_276>|",
3700
+ "|<EXTRA_TOKENS_277>|",
3701
+ "|<EXTRA_TOKENS_278>|",
3702
+ "|<EXTRA_TOKENS_279>|",
3703
+ "|<EXTRA_TOKENS_280>|",
3704
+ "|<EXTRA_TOKENS_281>|",
3705
+ "|<EXTRA_TOKENS_282>|",
3706
+ "|<EXTRA_TOKENS_283>|",
3707
+ "|<EXTRA_TOKENS_284>|",
3708
+ "|<EXTRA_TOKENS_285>|",
3709
+ "|<EXTRA_TOKENS_286>|",
3710
+ "|<EXTRA_TOKENS_287>|",
3711
+ "|<EXTRA_TOKENS_288>|",
3712
+ "|<EXTRA_TOKENS_289>|",
3713
+ "|<EXTRA_TOKENS_290>|",
3714
+ "|<EXTRA_TOKENS_291>|",
3715
+ "|<EXTRA_TOKENS_292>|",
3716
+ "|<EXTRA_TOKENS_293>|",
3717
+ "|<EXTRA_TOKENS_294>|",
3718
+ "|<EXTRA_TOKENS_295>|",
3719
+ "|<EXTRA_TOKENS_296>|",
3720
+ "|<EXTRA_TOKENS_297>|",
3721
+ "|<EXTRA_TOKENS_298>|",
3722
+ "|<EXTRA_TOKENS_299>|",
3723
+ "|<EXTRA_TOKENS_300>|",
3724
+ "|<EXTRA_TOKENS_301>|",
3725
+ "|<EXTRA_TOKENS_302>|",
3726
+ "|<EXTRA_TOKENS_303>|",
3727
+ "|<EXTRA_TOKENS_304>|",
3728
+ "|<EXTRA_TOKENS_305>|",
3729
+ "|<EXTRA_TOKENS_306>|",
3730
+ "|<EXTRA_TOKENS_307>|",
3731
+ "|<EXTRA_TOKENS_308>|",
3732
+ "|<EXTRA_TOKENS_309>|",
3733
+ "|<EXTRA_TOKENS_310>|",
3734
+ "|<EXTRA_TOKENS_311>|",
3735
+ "|<EXTRA_TOKENS_312>|",
3736
+ "|<EXTRA_TOKENS_313>|",
3737
+ "|<EXTRA_TOKENS_314>|",
3738
+ "|<EXTRA_TOKENS_315>|",
3739
+ "|<EXTRA_TOKENS_316>|",
3740
+ "|<EXTRA_TOKENS_317>|",
3741
+ "|<EXTRA_TOKENS_318>|",
3742
+ "|<EXTRA_TOKENS_319>|",
3743
+ "|<EXTRA_TOKENS_320>|",
3744
+ "|<EXTRA_TOKENS_321>|",
3745
+ "|<EXTRA_TOKENS_322>|",
3746
+ "|<EXTRA_TOKENS_323>|",
3747
+ "|<EXTRA_TOKENS_324>|",
3748
+ "|<EXTRA_TOKENS_325>|",
3749
+ "|<EXTRA_TOKENS_326>|",
3750
+ "|<EXTRA_TOKENS_327>|",
3751
+ "|<EXTRA_TOKENS_328>|",
3752
+ "|<EXTRA_TOKENS_329>|",
3753
+ "|<EXTRA_TOKENS_330>|",
3754
+ "|<EXTRA_TOKENS_331>|",
3755
+ "|<EXTRA_TOKENS_332>|",
3756
+ "|<EXTRA_TOKENS_333>|",
3757
+ "|<EXTRA_TOKENS_334>|",
3758
+ "|<EXTRA_TOKENS_335>|",
3759
+ "|<EXTRA_TOKENS_336>|",
3760
+ "|<EXTRA_TOKENS_337>|",
3761
+ "|<EXTRA_TOKENS_338>|",
3762
+ "|<EXTRA_TOKENS_339>|",
3763
+ "|<EXTRA_TOKENS_340>|",
3764
+ "|<EXTRA_TOKENS_341>|",
3765
+ "|<EXTRA_TOKENS_342>|",
3766
+ "|<EXTRA_TOKENS_343>|",
3767
+ "|<EXTRA_TOKENS_344>|",
3768
+ "|<EXTRA_TOKENS_345>|",
3769
+ "|<EXTRA_TOKENS_346>|",
3770
+ "|<EXTRA_TOKENS_347>|",
3771
+ "|<EXTRA_TOKENS_348>|",
3772
+ "|<EXTRA_TOKENS_349>|",
3773
+ "|<EXTRA_TOKENS_350>|",
3774
+ "|<EXTRA_TOKENS_351>|",
3775
+ "|<EXTRA_TOKENS_352>|",
3776
+ "|<EXTRA_TOKENS_353>|",
3777
+ "|<EXTRA_TOKENS_354>|",
3778
+ "|<EXTRA_TOKENS_355>|",
3779
+ "|<EXTRA_TOKENS_356>|",
3780
+ "|<EXTRA_TOKENS_357>|",
3781
+ "|<EXTRA_TOKENS_358>|",
3782
+ "|<EXTRA_TOKENS_359>|",
3783
+ "|<EXTRA_TOKENS_360>|",
3784
+ "|<EXTRA_TOKENS_361>|",
3785
+ "|<EXTRA_TOKENS_362>|",
3786
+ "|<EXTRA_TOKENS_363>|",
3787
+ "|<EXTRA_TOKENS_364>|",
3788
+ "|<EXTRA_TOKENS_365>|",
3789
+ "|<EXTRA_TOKENS_366>|",
3790
+ "|<EXTRA_TOKENS_367>|",
3791
+ "|<EXTRA_TOKENS_368>|",
3792
+ "|<EXTRA_TOKENS_369>|",
3793
+ "|<EXTRA_TOKENS_370>|",
3794
+ "|<EXTRA_TOKENS_371>|",
3795
+ "|<EXTRA_TOKENS_372>|",
3796
+ "|<EXTRA_TOKENS_373>|",
3797
+ "|<EXTRA_TOKENS_374>|",
3798
+ "|<EXTRA_TOKENS_375>|",
3799
+ "|<EXTRA_TOKENS_376>|",
3800
+ "|<EXTRA_TOKENS_377>|",
3801
+ "|<EXTRA_TOKENS_378>|",
3802
+ "|<EXTRA_TOKENS_379>|",
3803
+ "|<EXTRA_TOKENS_380>|",
3804
+ "|<EXTRA_TOKENS_381>|",
3805
+ "|<EXTRA_TOKENS_382>|",
3806
+ "|<EXTRA_TOKENS_383>|",
3807
+ "|<EXTRA_TOKENS_384>|",
3808
+ "|<EXTRA_TOKENS_385>|",
3809
+ "|<EXTRA_TOKENS_386>|",
3810
+ "|<EXTRA_TOKENS_387>|",
3811
+ "|<EXTRA_TOKENS_388>|",
3812
+ "|<EXTRA_TOKENS_389>|",
3813
+ "|<EXTRA_TOKENS_390>|",
3814
+ "|<EXTRA_TOKENS_391>|",
3815
+ "|<EXTRA_TOKENS_392>|",
3816
+ "|<EXTRA_TOKENS_393>|",
3817
+ "|<EXTRA_TOKENS_394>|",
3818
+ "|<EXTRA_TOKENS_395>|",
3819
+ "|<EXTRA_TOKENS_396>|",
3820
+ "|<EXTRA_TOKENS_397>|",
3821
+ "|<EXTRA_TOKENS_398>|",
3822
+ "<im_start>",
3823
+ "<im_end>",
3824
+ "<im_patch>",
3825
+ "<im_col>",
3826
+ "<|image|>",
3827
+ "<im_low>"
3828
+ ],
3829
+ "auto_map": {
3830
+ "AutoProcessor": "processing_molmoact.MolmoActProcessor"
3831
+ },
3832
+ "bos_token": "<|endoftext|>",
3833
+ "clean_up_tokenization_spaces": false,
3834
+ "eos_token": "<|endoftext|>",
3835
+ "errors": "replace",
3836
+ "extra_special_tokens": {},
3837
+ "model_max_length": 131072,
3838
+ "pad_token": "<|endoftext|>",
3839
+ "processor_class": "MolmoActProcessor",
3840
+ "split_special_tokens": false,
3841
+ "tokenizer_class": "Qwen2Tokenizer",
3842
+ "unk_token": null
3843
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff