peterroh commited on
Commit
4ce7387
·
verified ·
1 Parent(s): d82e323

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/example1.png filter=lfs diff=lfs merge=lfs -text
37
+ examples/waybill.png filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ KANANA LICENSE AGREEMENT
2
+
3
+ Kanana Release Date: July 17, 2025
4
+
5
+ This KANANA LICENSE AGREEMENT (this “Agreement”) is made by and between you and Kakao Corp. (“KAKAO”) that governs your use of Kanana Materials that KAKAO provides to you.
6
+ By using, copying, modifying, distributing, performing, or displaying all or part of Kanana Materials, or otherwise accepting the terms and conditions of this Agreement, you agree to be bound by this Agreement. You hereby represent and warrant that (i) you are legally authorized to enter into this Agreement, and (ii) if you are entering into this Agreement on behalf of a legal entity, you have the authority to legally and validly bind such entity.
7
+
8
+ 1. Definition
9
+ 1.1 “Agreement” means the terms and conditions for use, copying, distribution and modification of Kanana Materials as set forth herein.
10
+ 1.2 “KAKAO” means Kakao Corp.
11
+ 1.3 “You” means an individual or legal entity that enters into this Agreement with KAKAO and exercises its rights hereunder or uses Kanana Materials for any purpose. If you enter into this Agreement on behalf of a legal entity, “you” shall include such entity.
12
+ 1.4 “Kanana” means the basic large-scale language model, software, and algorithms distributed by KAKAO under this Agreement, including parameters (such as Model Weights and optimizer status), machine learning model codes, inference/learning/fine-tuning codes, and other related elements.
13
+ 1.5 “Documentation” means the specifications, manuals, and other documentation accompanying Kanana distributed by KAKAO.
14
+ 1.6 “Kanana Materials” means, collectively, Kanana and Documentation, including any portions or components thereof.
15
+ 1.7 “Outputs” means information content generated by operating or otherwise using Kanana Materials.
16
+ 1.8 “Derivative Works” means (i) any modifications to Kanana, (ii) any work of authorship based on Kanana, or (iii) any other designed machine learning models that either directly use the patterns of Model Weights, parameters, operations, and/or outputs or incorporate a substantial part of Kanana’s performance or functional characteristics through methods including, but not limited to, transfer learning, fine-tuning, or knowledge distillation. This includes distillation methods using Kanana’s intermediate data representations or a method based on the synthetic data outputs generated by Kanana; provided, however, that Outputs shall not be deemed to be Derivative Works.
17
+ 1.9 “Model Weights” means a set of numerical parameter values generated during Kanana’s learning process, representing the result of substantial investment and effort by KAKAO.
18
+
19
+ 2. Grant of License and Use Policy
20
+ 2.1 Grant of License. Subject to the terms and conditions of this Agreement, you are granted a non-exclusive, worldwide, non-transferrable, royalty-free limited license under KAKAO’s intellectual property or other rights owned by KAKAO that enables you to access, download, install, copy, use, reproduce, distribute, create Derivative Works of, and make modifications to Kanana Materials.
21
+ 2.2 Policy on Prohibited Use. Your use of Kanana Materials and Derivative Works must comply with applicable laws and regulations and adhere to KAKAO’s Guidelines For Responsible AI (https://www.kakaocorp.com/page/responsible/detail/guidelinesForResponsibleAI), which is hereby incorporated into this Agreement.
22
+ 2.3 This Agreement applies solely to Kanana-*** and shall not apply to any other models distributed by KAKAO under separate licenses. Licenses applicable to such other models shall not apply to Kanana-***.
23
+ 2.4 The license terms applicable to a specific version of Kanana applies exclusively to that version and shall not extend to any other versions. Each version shall be deemed as an independent and separate work of authorship.
24
+ 2.5 You may use each version of Kanana only in accordance with the license terms expressly specified for that version, and you shall not claim that the license terms applicable to one version apply to any other version.
25
+ 2.6 You shall not combine different versions of Kanana versions that are subject to different license terms in order to circumvent any applicable license terms.
26
+
27
+ 3. Redistribution
28
+ 3.1 You may copy, distribute or disclose Kanana, Derivative Works, or any products or services that contain Kanana or Derivative Works; provided, however, that you shall:
29
+ (i) incorporate the compliance obligation set forth in the Policy on Prohibited Use provision of Section 2.2 in any agreement for use and distribution and notify subsequent users that such use restrictions apply;
30
+ (ii) provide any recipients of Kanana Materials or Derivative Works a copy of this Agreement;
31
+ (iii) expressly indicate in any files you have modified that it has been modified by you;
32
+ (iv) include a “Notice” text file that includes the following notice:
33
+ “Kanana is licensed in accordance with the Kanana License Agreement. Copyright © KAKAO Corp. All Rights Reserved.”; and
34
+ (v) clearly display the phrase “Powered by Kanana” on related websites, user interfaces, blog posts, introduction pages, or product documentation in a manner that is easily recognizable to users. In addition, if you use Kanana Materials or their outputs to create, train, improve, or enhance other AI models and distribute them, you must include ‘Kanana’ as a prefix to the name of such AI models.
35
+ 3.2 You may add your own copyright statement to your modifications of Kanana Materials and may provide additional or different license terms and conditions; provided, however, that such additional or different license terms and conditions shall not violate or conflict with any provisions of this Agreement.
36
+
37
+ 4. Additional Commercial Terms
38
+ 4.1 If you wish to engage in any of the following activities using Kanana Materials or any Derivative Works, you must obtain a separate commercial license expressly granted by KAKAO:
39
+ (i) Offering or (re)selling to third parties access to Kanana Materials or any Derivative Works through API, cloud platforms, or other remote access services;
40
+ (ii) Offering or (re)selling to third parties Kanana Materials or any Derivative Works in whole or in part, as part of a system integration (SI) or on-premise deployment solution; or
41
+ (iii) Offering or (re)selling to third parties Kanana Materials or any Derivative Works embedded in an on-device domains.
42
+ 4.2 If, as of Kanana Release Date, the number of monthly active users of the products or services provided by you and/or your affiliates, is greater than 10 million in the preceding calendar month, you must obtain a separate commercial license expressly granted by KAKAO.
43
+ 4.3 For clarity, unless your activities or conditions fall within those specified in Sections 4.1 and 4.2 above, you may use Kanana Materials or any Derivative Works for the development and operation of your own services without obtaining a commercial license from KAKAO.
44
+ 4.4 The grant of any commercial license under Sections 4.1 and 4.2 shall be at KAKAO’s sole discretion
45
+
46
+ 5. Outputs
47
+ KAKAO will not claim any rights to Outputs you generate using Kanana Materials. You shall be solely responsible for Outputs and the use thereof.
48
+
49
+ 6. Disclaimer of Warranty
50
+ Unless required by law, Kanana Materials are provided on an “AS IS” basis, and KAKAO disclaims all warranties of any kind, both express and implied, including, without limitation, any warranties of title, non-infringement, merchantability, or fitness for a particular purpose.
51
+
52
+ 7. Limitation on Liability
53
+ Unless required by law, in no event shall KAKAO be liable to you for damages, including any direct, indirect, special, consequential, incidental, and punitive damages of any character arising out of the use or inability to use Kanana Materials, Derivative Works, or Outputs, even if KAKAO has been advised of the possibility of such damages.
54
+
55
+ 8. Indemnification
56
+ You shall indemnify and hold KAKAO harmless from and against any and all claims that may be filed by a third party as a result of your infringement of any third party’s rights or violation of any applicable law, to the extent caused by your use or distribution of Kanana Materials, Derivative Works, or Outputs; provided, however, that the foregoing shall not apply to claims resulting from KAKAO’s willful or gross negligence.
57
+
58
+ 9. Intellectual Property
59
+ 9.1 This Agreement does not grant you any rights to use KAKAO’s trademarks, service marks, or product names. However, on a limited basis and solely for the purpose of complying with Section 3.1(v), KAKAO authorizes you to use the Kanana trademark, provided that KAKAO may require you to discontinue such use at any time if you impair the value of the Kanana trademark.
60
+ 9.2 KAKAO retains ownership of Kanana Materials and Derivative Works created by KAKAO, but you will retain ownership of any Derivative Works and modifications made by you.
61
+ 9.3 If you bring any legal action or proceeding against KAKAO or a third party alleging that the Kanana Materials, Derivative Works, or Outputs infringe your intellectual property rights, your rights under this Agreement shall automatically terminate as of the date such action is filed.
62
+ 9.4 You acknowledge that Model Weights are a valuable asset of KAKAO. You shall not extract, copy, distribute, modify Model Weights or use them to train new models, except as expressly permitted under this Agreement.
63
+ 9.5 The protections under this Agreement apply to all components of Kanana Materials (irrespective of whether it is recognized as a work of authorship), including, but not limited to, Model Weights, parameters, algorithms, or structures. You may exercise your rights in these components only to the extent expressly permitted under this Agreement.
64
+
65
+ 10. Term and Termination
66
+ The term of this Agreement will commence upon your acceptance of this Agreement or access to Kanana Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. KAKAO may terminate this Agreement if you are in breach of any term or condition of this Agreement. Upon termination of this Agreement, you shall delete and cease use of Kanana Materials and Derivative Works. Sections 5, 6, 7, 8, 10 and 11 shall survive the termination of this Agreement.
67
+
68
+ 11. Governing Law and Arbitration
69
+ 11.1 This Agreement will be governed and construed under the laws of the Republic of Korea, without regard to its conflicts of laws principles.
70
+ 11.2 Any disputes arising out of or in connection with this Agreement shall be finally settled by arbitration in accordance with the International Arbitration Rules of the Korean Commercial Arbitration Board. The number of arbitrators shall be one. The seat, or legal place, of arbitral proceedings shall be Seoul, Republic of Korea. The language to be used in the arbitral proceedings shall be English. Either party may seek interim or provisional relief from a court of competent jurisdiction, which shall not be considered a waiver of any provision in this Section. The arbitral tribunal also has the authority to issue orders for interim or provisional relief.
71
+
72
+ 12. No Waiver
73
+ KAKAO’s failure or delay in exercising any of its rights under this Agreement shall not constitute a waiver of such rights.
README.md ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: "kanana"
4
+ license_link: LICENSE
5
+ language:
6
+ - ko
7
+ - en
8
+ base_model:
9
+ - kakaocorp/kanana-1.5-v-3b-instruct
10
+ pipeline_tag: image-text-to-text
11
+ ---
12
+
13
+
14
+
15
+ # kanana-1.5-v-3b-instruct
16
+
17
+ The Unified Foundation Model (UFO) task force of Kanana at Kakao developed and released the Kanana-V family of multimodal large language models (MLLMs), a collection of pretrained text/image-to-text (TI2T) models.
18
+
19
+
20
+
21
+ ## Intended Use
22
+
23
+ kanana-1.5-v-3b-instruct is intended for research and application development in multimodal understanding and text generation tasks. Typical use cases include image captioning, document understanding, OCR-based reasoning, and multimodal instruction following in both English and Korean. The model is optimized for both general-purpose and Korea-specific benchmarks, making it suitable for bilingual environments.
24
+
25
+
26
+
27
+
28
+ ## Model Details
29
+
30
+ - **Developed by:** Unified Foundation Model (UFO) TF at Kakao
31
+ - **Language(s) :** ['en', 'ko']
32
+ - **Model Architecture:** kanana-1.5-v-3b-instruct has 3.6B parameters and contains image encoder, C-abstractor, and kanana-1.5-3b-instruct language model.
33
+ - **Input:** The models accept text and image inputs.
34
+ - **Output:** The models generate text only.
35
+ - **Context Length:** 32k
36
+ - **Knowledge Cutoff Date:** June 30, 2024
37
+ - **Model Release Date:** Jul 24, 2025.
38
+ - **License:** kanana-license
39
+
40
+
41
+
42
+
43
+ ## Evaluation
44
+
45
+ ### Model Configuration Summary
46
+
47
+ | Model | LLM | Total Parameter |
48
+ |----------------------------|----------------------------------|-----------|
49
+ | **kanana-1.5-v-3b-instruct** | kanana-1.5-3b-instruct | 3.67B |
50
+ | HCX-SEED-Vision-3B | HyperCLOVAX-SEED-Text-Base-3B | 3.72B |
51
+ | Phi-3-Vision | Phi-3-Mini | 4.15B |
52
+ | Qwen2.5-VL-3B-Instruct | Qwen2.5-3B | 3.75B |
53
+ | InternVL2.5-4B | Qwen2.5-3B-Instruct | 3.94B |
54
+
55
+ ### Overview
56
+
57
+ | Model | All | Image (EN) | Image (KO) | IF (EN, KO) |
58
+ |----------------------------|--------|------------|------------|-------------|
59
+ | **kanana-1.5-v-3b-instruct** | 73.22 | 74.00 | 68.27 | 77.39 |
60
+ | HCX-SEED-Vision-3B | 59.00 | 64.81 | 51.96 | 60.23 |
61
+ | Phi-3-Vision | 48.84 | 65.41 | 36.40 | 44.71 |
62
+ | Qwen2.5-VL-3B-Instruct | 63.54 | 73.97 | 60.60 | 56.04 |
63
+ | InternVL2.5-4B | 61.35 | 74.73 | 54.68 | 54.63 |
64
+
65
+ ### Image Benchmarks (EN)
66
+
67
+ | Model | average | MMMU (Val) | MathVista | DocVQA | ChartQA | OCRBench | InfoVQA | TextVQA | RealWorldQA | MMStar | MMB | SEED-image | MMVet | LLaVA-Wild | scienceqa | AI2D |
68
+ |----------------------------|--------------|------------|-----------|--------|---------|----------|---------|---------|-------------|--------|-------|------------|-------|------------|-----------|-------|
69
+ | **kanana-1.5-v-3b-instruct** | 74.00 | 43.89 | 56.00 | 93.06 | 81.20 | 82.50 | 73.62 | 78.62 | 65.36 | 56.32 | 78.44 | 75.17 | 65.87 | 89.60 | 95.61 | 74.81 |
70
+ | HCX-SEED-Vision-3B | 64.81 | 38.89 | 47.40 | 79.87 | 71.88 | 62.90 | 55.59 | 73.51 | 62.48 | 46.66 | 72.42 | 74.84 | 47.27 | 79.30 | 86.84 | 72.31 |
71
+ | Phi-3-Vision | 65.41 | 45.33 | 43.60 | 87.04 | 81.40 | 63.60 | 54.80 | 69.61 | 59.08 | 47.47 | 73.37 | 71.69 | 45.96 | 70.40 | 90.84 | 76.98 |
72
+ | Qwen2.5-VL-3B-Instruct | 73.97 | 50.67 | 62.00 | 94.19 | 83.60 | 79.10 | 77.22 | 77.77 | 59.74 | 56.26 | 77.75 | 74.83 | 61.06 | 96.90 | 79.69 | 78.79 |
73
+ | InternVL2.5-4B | 74.73 | 52.33 | 61.80 | 92.13 | 82.76 | 79.20 | 69.73 | 78.24 | 62.88 | 59.72 | 81.96 | 75.59 | 61.38 | 86.30 | 97.14 | 79.83 |
74
+
75
+
76
+ ### Image Benchmarks (KO)
77
+
78
+ | Model | average | KoOCRBench | KoMMDBench | KoChartTask | KoMathSolution | KoCosMed | KoFoodMenu | KoEntity | KoExam | KoCelebV2 |
79
+ |----------------------------|--------------|----------------------|------------|-------------|----------------|----------|------------|----------|--------|-----------|
80
+ | **kanana-1.5-v-3b-instruct** | 68.27 | 85.93 | 74.00 | 84.96 | 36.88 | 87.58 | 70.84 | 72.04 | 58.99 | 43.24 |
81
+ | HCX-SEED-Vision-3B | 51.96 | 32.91 | 64.57 | 73.55 | 27.88 | 78.16 | 57.08 | 64.12 | 31.82 | 37.58 |
82
+ | Phi-3-Vision | 36.40 | 25.13 | 37.93 | 52.36 | 38.75 | 56.75 | 34.70 | 31.71 | 24.05 | 26.25 |
83
+ | Qwen2.5-VL-3B-Instruct | 60.60 | 50.67 | 61.75 | 84.96 | 47.13 | 82.01 | 66.32 | 58.15 | 60.68 | 33.72 |
84
+ | InternVL2.5-4B | 54.68 | 20.52 | 62.65 | 82.61 | 46.50 | 82.66 | 65.09 | 50.42 | 47.43 | 34.23 |
85
+
86
+ ### Multimodal Instruction Following Benchmarks (EN, KO)
87
+
88
+ | Model | average | MIABench | MIABench-Ko | MM-IFEval | MM-OmniAlign |
89
+ |----------------------------|--------------|----------|-------------|-----------|--------------|
90
+ | **kanana-1.5-v-3b-instruct** | 77.39 | 90.28 | 91.17 | 56.67 | 71.43 |
91
+ | HCX-SEED-Vision-3B | 60.23 | 85.81 | 81.80 | 47.91 | 25.40 |
92
+ | Phi-3-Vision | 44.71 | 85.78 | 38.35 | 44.37 | 10.32 |
93
+ | Qwen2.5-VL-3B-Instruct | 56.04 | 82.55 | 59.61 | 39.14 | 42.86 |
94
+ | InternVL2.5-4B | 54.63 | 85.68 | 68.35 | 43.06 | 21.43 |
95
+
96
+
97
+
98
+ ### Note on Benchmarking Methodology
99
+
100
+ All benchmarks were re-measured under identical software conditions to ensure fair comparison.
101
+
102
+ - **[VLMEvalKit](https://github.com/open-compass/VLMEvalKit)** was used for MMMU, MathVista, ScienceQA, MIA-Bench, MM-IFEval and MM-OmniAlign.
103
+
104
+ - **[lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval)** was employed for DocVQA, ChartQA, OCRBench, InfoVQA, TextVQA, RealWorldQA, MMStar, MMB, and SEED-image.
105
+
106
+ - HCX-SEED-Vision-3B was evaluated without the use of any auxiliary tools (e.g., external OCR engines or Lens features), as such tools are not publicly available and therefore not included in our evaluation setup.
107
+
108
+ - **Important note for ChartQA**: It was identified that the original rule-based parser used by lmms-eval marked answers ending with a period (".") as incorrect due to parsing issues. To address this, the parser logic was modified to remove any trailing period before parsing the response. All ChartQA evaluations presented here reflect results obtained after applying this parser adjustment.
109
+
110
+
111
+ The following in-house benchmarks evaluate Korean-language tasks and Korea-specific knowledge:
112
+
113
+ | Benchmark | Purpose |
114
+ |-----------|---------|
115
+ | **KoOCRBench** | Korean character recognition (OCR) |
116
+ | **KoMMDBench**, **KoEntity**, **KoCelebV2** | Korean knowledge & cultural visual QA |
117
+ | **KoFoodMenu**, **KoCosMed** | Korean text-based visual QA |
118
+ | **KoChartTask** | Chart understanding in Korean |
119
+ | **KoExam**, **KoMathSolution** | Multimodal Problem-solving in Korean (general exams & mathematics) |
120
+ | **MIABench-Ko** | Korean multimodal instruction-following benchmark (derived from MIABench) |
121
+
122
+
123
+
124
+ ## Usage
125
+
126
+ ### Requirements
127
+
128
+ ```
129
+ pip install transformers accelerate timm omegaconf
130
+ ```
131
+ `transformers>=4.45.0` or the latest version is recommended.
132
+
133
+ ### Quickstart
134
+
135
+ The following is a code snippet that briefly demonstrates how to load a model and process input data using the `AutoClass` from `transformers`.
136
+ ```python
137
+ from PIL import Image
138
+ import torch
139
+ from transformers import AutoModelForVision2Seq, AutoProcessor
140
+
141
+ MODEL = "kakaocorp/kanana-1.5-v-3b-instruct"
142
+
143
+ # Load the model on the available device(s)
144
+ model = AutoModelForVision2Seq.from_pretrained(
145
+ MODEL,
146
+ torch_dtype=torch.bfloat16,
147
+ device_map="auto",
148
+ trust_remote_code=True
149
+ )
150
+ model.eval()
151
+
152
+ # Load processor
153
+ processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
154
+
155
+ # Prepare input batch
156
+ batch = []
157
+ for _ in range(1): # dummy loop to demonstrate batch processing
158
+ image_files = [
159
+ "./examples/waybill.png"
160
+ ]
161
+
162
+ sample = {
163
+ "image": [Image.open(image_file_path).convert("RGB") for image_file_path in image_files],
164
+ "conv": [
165
+ {"role": "system", "content": "The following is a conversation between a curious human and AI assistant."},
166
+ {"role": "user", "content": " ".join(["<image>"] * len(image_files))},
167
+ {"role": "user", "content": "사진에서 보내는 사람과 받는 사람 정보를 json 형태로 정리해줘."},
168
+ ]
169
+ }
170
+
171
+ batch.append(sample)
172
+
173
+ inputs = processor.batch_encode_collate(
174
+ batch, padding_side="left", add_generation_prompt=True, max_length=8192
175
+ )
176
+ inputs = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
177
+
178
+ # Set the generation config
179
+ gen_kwargs = {
180
+ "max_new_tokens": 2048,
181
+ "temperature": 0,
182
+ "top_p": 1.0,
183
+ "num_beams": 1,
184
+ "do_sample": False,
185
+ }
186
+
187
+ # Generate text
188
+ gens = model.generate(
189
+ **inputs,
190
+ **gen_kwargs,
191
+ )
192
+ text_outputs = processor.tokenizer.batch_decode(gens, skip_special_tokens=True)
193
+ print(text_outputs) # ['```json\n{\n "보내는분": {\n "성명": "카카오",\n "주소": "경기도 성남시 판교역로 166"\n },\n "받는분": {\n "성명": "카나나",\n "주소": "제주도 제주시 첨단로 242"\n }\n}\n```']
194
+ ```
195
+
196
+
197
+
198
+ ## Limitations
199
+
200
+ - The model may generate inaccurate or misleading content, especially in scenarios requiring precise factual understanding (e.g., scientific diagrams or mathematical reasoning).
201
+ - Performance on languages other than Korean and English has not been evaluated and may be poor.
202
+ - The model is not designed for medical, legal, or other high-stakes domains.
203
+ - The model may reflect social biases present in the pretraining data.
204
+
205
+
206
+
207
+ ## Contributors
208
+ - Beomhee Park, Byeonguk Bae, Byungseok Roh, Daejin Jo, Donghee Son, Dongjin Lee, Hyunwoong Ko, Jaemyung Lee, Jeehye Lee, Sunghun Kang, Wooyoung Kang
209
+ - Listed in alphabetical order (first name)
210
+
211
+
212
+
213
+ ## Contact
214
+ - Kanana MLLM Core Team Technical Support: [email protected]
215
+ - Business & Partnership Contact: [email protected]
216
+
config.json ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "KananaVForConditionalGeneration"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration.KananaVConfig",
7
+ "AutoModelForVision2Seq": "modeling.KananaVForConditionalGeneration",
8
+ "AutoImageProcessor": "processing_image.KananaVImageProcessor",
9
+ "AutoProcessor": "processing.KananaVProcessor"
10
+ },
11
+ "model_type": "kanana-1.5-v",
12
+ "plora_config": null,
13
+ "projector_config": {
14
+ "_attn_implementation_autoset": false,
15
+ "add_cross_attention": false,
16
+ "architectures": null,
17
+ "bad_words_ids": null,
18
+ "begin_suppress_tokens": null,
19
+ "bos_token_id": null,
20
+ "chunk_size_feed_forward": 0,
21
+ "cross_attention_hidden_size": null,
22
+ "decoder_start_token_id": null,
23
+ "depth": 2,
24
+ "diversity_penalty": 0.0,
25
+ "do_sample": false,
26
+ "early_stopping": false,
27
+ "encoder_hidden_size": 1280,
28
+ "encoder_no_repeat_ngram_size": 0,
29
+ "eos_token_id": null,
30
+ "exponential_decay_length_penalty": null,
31
+ "feature_layer_index": -1,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "hidden_size": 1024,
36
+ "id2label": {
37
+ "0": "LABEL_0",
38
+ "1": "LABEL_1"
39
+ },
40
+ "is_decoder": false,
41
+ "is_encoder_decoder": false,
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1
45
+ },
46
+ "length_penalty": 1.0,
47
+ "max_length": 20,
48
+ "merge_size": 2,
49
+ "min_length": 0,
50
+ "mlp_depth": 2,
51
+ "model_type": "kanana-1.5-v-visual_projector",
52
+ "no_repeat_ngram_size": 0,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_eos_tokens": 0,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_size": 2048,
59
+ "output_hidden_states": false,
60
+ "output_scores": false,
61
+ "pad_token_id": null,
62
+ "pos_emb": true,
63
+ "pos_emb_size": 576,
64
+ "prefix": null,
65
+ "prenorm": false,
66
+ "problem_type": null,
67
+ "projector_type": "dynamic-c-abs",
68
+ "pruned_heads": {},
69
+ "remove_invalid_values": false,
70
+ "repetition_penalty": 1.0,
71
+ "return_dict": true,
72
+ "return_dict_in_generate": false,
73
+ "sep_token_id": null,
74
+ "suppress_tokens": null,
75
+ "task_specific_params": null,
76
+ "temperature": 1.0,
77
+ "tf_legacy_loss": false,
78
+ "tie_encoder_decoder": false,
79
+ "tie_word_embeddings": true,
80
+ "tokenizer_class": null,
81
+ "top_k": 50,
82
+ "top_p": 1.0,
83
+ "torch_dtype": null,
84
+ "torchscript": false,
85
+ "typical_p": 1.0,
86
+ "use_bfloat16": false
87
+ },
88
+ "text_config": {
89
+ "_name_or_path": "kakaocorp/kanana-1.5-3b-instruct",
90
+ "_attn_implementation_autoset": false,
91
+ "add_cross_attention": false,
92
+ "architectures": [
93
+ "LlamaForCausalLM"
94
+ ],
95
+ "attention_bias": false,
96
+ "attention_dropout": 0.0,
97
+ "bad_words_ids": null,
98
+ "begin_suppress_tokens": null,
99
+ "bos_token_id": 128000,
100
+ "chunk_size_feed_forward": 0,
101
+ "cross_attention_hidden_size": null,
102
+ "decoder_start_token_id": null,
103
+ "diversity_penalty": 0.0,
104
+ "do_sample": false,
105
+ "early_stopping": false,
106
+ "encoder_no_repeat_ngram_size": 0,
107
+ "eos_token_id": 128009,
108
+ "exponential_decay_length_penalty": null,
109
+ "finetuning_task": null,
110
+ "forced_bos_token_id": null,
111
+ "forced_eos_token_id": null,
112
+ "head_dim": 128,
113
+ "hidden_act": "silu",
114
+ "hidden_size": 2048,
115
+ "id2label": {
116
+ "0": "LABEL_0",
117
+ "1": "LABEL_1"
118
+ },
119
+ "initializer_range": 0.02,
120
+ "intermediate_size": 9216,
121
+ "is_decoder": false,
122
+ "is_encoder_decoder": false,
123
+ "label2id": {
124
+ "LABEL_0": 0,
125
+ "LABEL_1": 1
126
+ },
127
+ "length_penalty": 1.0,
128
+ "max_length": 20,
129
+ "max_position_embeddings": 32768,
130
+ "min_length": 0,
131
+ "mlp_bias": false,
132
+ "model_type": "kanana-1.5-3b-instruct",
133
+ "no_repeat_ngram_size": 0,
134
+ "num_attention_heads": 32,
135
+ "num_beam_groups": 1,
136
+ "num_beams": 1,
137
+ "num_hidden_layers": 32,
138
+ "num_key_value_heads": 8,
139
+ "num_return_sequences": 1,
140
+ "output_attentions": false,
141
+ "output_hidden_states": false,
142
+ "output_scores": false,
143
+ "pad_token_id": 128001,
144
+ "prefix": null,
145
+ "pretraining_tp": 1,
146
+ "problem_type": null,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "rms_norm_eps": 1e-05,
153
+ "rope_scaling": null,
154
+ "rope_theta": 8000000.0,
155
+ "sep_token_id": null,
156
+ "suppress_tokens": null,
157
+ "task_specific_params": null,
158
+ "temperature": 1.0,
159
+ "tf_legacy_loss": false,
160
+ "tie_encoder_decoder": false,
161
+ "tie_word_embeddings": false,
162
+ "tokenizer_class": null,
163
+ "top_k": 50,
164
+ "top_p": 1.0,
165
+ "torch_dtype": "bfloat16",
166
+ "torchscript": false,
167
+ "typical_p": 1.0,
168
+ "use_bfloat16": false,
169
+ "use_cache": false,
170
+ "vocab_size": 128259
171
+ },
172
+ "torch_dtype": "bfloat16",
173
+ "transformers_version": "4.51.3",
174
+ "vision_config": {
175
+ "_attn_implementation_autoset": false,
176
+ "add_cross_attention": false,
177
+ "architectures": null,
178
+ "bad_words_ids": null,
179
+ "begin_suppress_tokens": null,
180
+ "bos_token_id": null,
181
+ "chunk_size_feed_forward": 0,
182
+ "cross_attention_hidden_size": null,
183
+ "decoder_start_token_id": null,
184
+ "depth": 32,
185
+ "diversity_penalty": 0.0,
186
+ "do_sample": false,
187
+ "early_stopping": false,
188
+ "embed_dim": 1280,
189
+ "encoder_no_repeat_ngram_size": 0,
190
+ "encoder_type": "qwen2-vl-ve",
191
+ "eos_token_id": null,
192
+ "exponential_decay_length_penalty": null,
193
+ "finetuning_task": null,
194
+ "forced_bos_token_id": null,
195
+ "forced_eos_token_id": null,
196
+ "hidden_act": "quick_gelu",
197
+ "hidden_size": 1280,
198
+ "id2label": {
199
+ "0": "LABEL_0",
200
+ "1": "LABEL_1"
201
+ },
202
+ "image_mean": [
203
+ 0.48145466,
204
+ 0.4578275,
205
+ 0.40821073
206
+ ],
207
+ "image_size": "dynamic",
208
+ "image_std": [
209
+ 0.26862954,
210
+ 0.26130258,
211
+ 0.27577711
212
+ ],
213
+ "in_channels": 3,
214
+ "in_chans": 3,
215
+ "initializer_range": 0.02,
216
+ "is_decoder": false,
217
+ "is_encoder_decoder": false,
218
+ "label2id": {
219
+ "LABEL_0": 0,
220
+ "LABEL_1": 1
221
+ },
222
+ "length_penalty": 1.0,
223
+ "max_length": 20,
224
+ "min_length": 0,
225
+ "mlp_ratio": 4,
226
+ "model_type": "kanana-1.5-v-visual-encoder",
227
+ "no_repeat_ngram_size": 0,
228
+ "num_beam_groups": 1,
229
+ "num_beams": 1,
230
+ "num_heads": 16,
231
+ "num_return_sequences": 1,
232
+ "output_attentions": false,
233
+ "output_hidden_states": false,
234
+ "output_scores": false,
235
+ "pad_token_id": null,
236
+ "patch_size": 14,
237
+ "prefix": null,
238
+ "problem_type": null,
239
+ "pruned_heads": {},
240
+ "remove_invalid_values": false,
241
+ "repetition_penalty": 1.0,
242
+ "return_dict": true,
243
+ "return_dict_in_generate": false,
244
+ "sep_token_id": null,
245
+ "spatial_merge_size": 2,
246
+ "spatial_patch_size": 14,
247
+ "suppress_tokens": null,
248
+ "task_specific_params": null,
249
+ "temperature": 1.0,
250
+ "temporal_patch_size": 2,
251
+ "tf_legacy_loss": false,
252
+ "tie_encoder_decoder": false,
253
+ "tie_word_embeddings": true,
254
+ "tokenizer_class": null,
255
+ "top_k": 50,
256
+ "top_p": 1.0,
257
+ "torch_dtype": "bfloat16",
258
+ "torchscript": false,
259
+ "typical_p": 1.0,
260
+ "use_bfloat16": false
261
+ }
262
+ }
configuration.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from transformers.configuration_utils import PretrainedConfig
4
+ from transformers.models.llama.configuration_llama import LlamaConfig
5
+ from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
6
+
7
+ logger = logging.getLogger("kanana-1.5-v")
8
+
9
+
10
+ class KananaVVisionConfig(PretrainedConfig):
11
+ model_type = "kanana-1.5-v-visual-encoder"
12
+ base_config_key = "vision_config"
13
+
14
+ def __init__(
15
+ self,
16
+ depth=32,
17
+ embed_dim=1280,
18
+ mlp_ratio=4,
19
+ num_heads=16,
20
+ in_chans=3,
21
+ hidden_size=1280,
22
+ patch_size=14,
23
+ spatial_merge_size=2,
24
+ spatial_patch_size=14,
25
+ temporal_patch_size=2,
26
+ initializer_range=0.02,
27
+ image_size="dynamic",
28
+ image_mean=OPENAI_CLIP_MEAN,
29
+ image_std=OPENAI_CLIP_STD,
30
+ **kwargs,
31
+ ):
32
+ super().__init__(**kwargs)
33
+
34
+ self.depth = depth
35
+ self.embed_dim = embed_dim
36
+ self.mlp_ratio = mlp_ratio
37
+ self.num_heads = num_heads
38
+ self.in_chans = in_chans
39
+ self.hidden_size = hidden_size
40
+ self.patch_size = patch_size
41
+ self.spatial_merge_size = spatial_merge_size
42
+ self.spatial_patch_size = spatial_patch_size
43
+ self.temporal_patch_size = temporal_patch_size
44
+ self.initializer_range = initializer_range
45
+ self.image_size = image_size
46
+ self.image_mean = image_mean
47
+ self.image_std = image_std
48
+
49
+
50
+ class KananaVVisualProjectorConfig(PretrainedConfig):
51
+ model_type = "kanana-1.5-v-visual_projector"
52
+ base_config_key = "projector_config"
53
+
54
+ def __init__(
55
+ self,
56
+ depth=2,
57
+ encoder_hidden_size=1280,
58
+ feature_layer_index=-1,
59
+ hidden_size=1024,
60
+ merge_size=2,
61
+ mlp_depth=2,
62
+ num_eos_tokens=0,
63
+ output_hidden_size=2048,
64
+ pos_emb=True,
65
+ pos_emb_size=576,
66
+ prenorm=False,
67
+ projector_type="dynamic-c-abs",
68
+ **kwargs,
69
+ ):
70
+ super().__init__(**kwargs)
71
+
72
+ self.depth = depth
73
+ self.encoder_hidden_size = encoder_hidden_size
74
+ self.feature_layer_index = feature_layer_index
75
+ self.hidden_size = hidden_size
76
+ self.merge_size = merge_size
77
+ self.mlp_depth = mlp_depth
78
+ self.num_eos_tokens = num_eos_tokens
79
+ self.output_hidden_size = output_hidden_size
80
+ self.pos_emb = pos_emb
81
+ self.pos_emb_size = pos_emb_size
82
+ self.prenorm = prenorm
83
+ self.projector_type = projector_type
84
+
85
+
86
+ class KananaLanguageConfig(LlamaConfig):
87
+ model_type = "kanana-1.5-3b-instruct"
88
+ base_config_key = "text_config"
89
+
90
+ def __init__(
91
+ self,
92
+ **kwargs,
93
+ ):
94
+ super().__init__(**kwargs)
95
+
96
+
97
+ class KananaVConfig(PretrainedConfig):
98
+ model_type = "kanana-1.5-v"
99
+ is_composition = True
100
+
101
+ def __init__(
102
+ self,
103
+ vision_config: dict = {},
104
+ projector_config: dict = {},
105
+ text_config: dict = {},
106
+ **kwargs,
107
+ ):
108
+ super().__init__(**kwargs)
109
+
110
+ # Vision config
111
+ self.vision_config = KananaVVisionConfig(**vision_config)
112
+
113
+ # Visual projector config
114
+ self.projector_config = KananaVVisualProjectorConfig(**projector_config)
115
+
116
+ # Language model config
117
+ self.text_config = KananaLanguageConfig(**text_config)
118
+
119
+ @property
120
+ def num_visual_tokens(self):
121
+ return "dynamic"
122
+
123
+ @property
124
+ def hidden_size(self):
125
+ return self.text_config.hidden_size
examples/waybill.png ADDED

Git LFS Details

  • SHA256: e9a1e1d9ac471583e1c4734787c558b4f36b3816e7c88ccda4952484032eb35d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.43 MB
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": 128009,
5
+ "pad_token_id": 128001,
6
+ "transformers_version": "4.51.3",
7
+ "use_cache": false
8
+ }
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94272a33a98c25bdd9d646f82c13d1cfeae654e7dd9780cef9ff259799621577
3
+ size 4990094968
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90eac19b6578027e8271c0712f7c14a9b2ee0705633a022aa22d31b7b02746cb
3
+ size 2345793064
model.safetensors.index.json ADDED
@@ -0,0 +1,746 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 7335800448
4
+ },
5
+ "weight_map": {
6
+ "abstractor.net.0.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
7
+ "abstractor.net.0.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
8
+ "abstractor.net.0.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
9
+ "abstractor.net.0.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
10
+ "abstractor.net.0.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
11
+ "abstractor.net.0.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
12
+ "abstractor.net.0.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
13
+ "abstractor.net.0.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
14
+ "abstractor.net.0.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
15
+ "abstractor.net.0.b1.downsample.bn.bias": "model-00001-of-00002.safetensors",
16
+ "abstractor.net.0.b1.downsample.bn.weight": "model-00001-of-00002.safetensors",
17
+ "abstractor.net.0.b1.downsample.conv.weight": "model-00001-of-00002.safetensors",
18
+ "abstractor.net.0.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
19
+ "abstractor.net.0.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
20
+ "abstractor.net.0.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
21
+ "abstractor.net.0.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
22
+ "abstractor.net.0.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
23
+ "abstractor.net.0.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
24
+ "abstractor.net.0.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
25
+ "abstractor.net.0.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
26
+ "abstractor.net.0.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
27
+ "abstractor.net.0.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
28
+ "abstractor.net.0.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
29
+ "abstractor.net.0.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
30
+ "abstractor.net.0.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
31
+ "abstractor.net.0.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
32
+ "abstractor.net.0.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
33
+ "abstractor.net.0.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
34
+ "abstractor.net.0.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
35
+ "abstractor.net.2.b1.conv1.bn.bias": "model-00001-of-00002.safetensors",
36
+ "abstractor.net.2.b1.conv1.bn.weight": "model-00001-of-00002.safetensors",
37
+ "abstractor.net.2.b1.conv1.conv.weight": "model-00001-of-00002.safetensors",
38
+ "abstractor.net.2.b1.conv2.bn.bias": "model-00001-of-00002.safetensors",
39
+ "abstractor.net.2.b1.conv2.bn.weight": "model-00001-of-00002.safetensors",
40
+ "abstractor.net.2.b1.conv2.conv.weight": "model-00001-of-00002.safetensors",
41
+ "abstractor.net.2.b1.conv3.bn.bias": "model-00001-of-00002.safetensors",
42
+ "abstractor.net.2.b1.conv3.bn.weight": "model-00001-of-00002.safetensors",
43
+ "abstractor.net.2.b1.conv3.conv.weight": "model-00001-of-00002.safetensors",
44
+ "abstractor.net.2.b1.downsample.bn.bias": "model-00001-of-00002.safetensors",
45
+ "abstractor.net.2.b1.downsample.bn.weight": "model-00001-of-00002.safetensors",
46
+ "abstractor.net.2.b1.downsample.conv.weight": "model-00001-of-00002.safetensors",
47
+ "abstractor.net.2.b1.se.fc1.bias": "model-00001-of-00002.safetensors",
48
+ "abstractor.net.2.b1.se.fc1.weight": "model-00001-of-00002.safetensors",
49
+ "abstractor.net.2.b1.se.fc2.bias": "model-00001-of-00002.safetensors",
50
+ "abstractor.net.2.b1.se.fc2.weight": "model-00001-of-00002.safetensors",
51
+ "abstractor.net.2.b2.conv1.bn.bias": "model-00001-of-00002.safetensors",
52
+ "abstractor.net.2.b2.conv1.bn.weight": "model-00001-of-00002.safetensors",
53
+ "abstractor.net.2.b2.conv1.conv.weight": "model-00001-of-00002.safetensors",
54
+ "abstractor.net.2.b2.conv2.bn.bias": "model-00001-of-00002.safetensors",
55
+ "abstractor.net.2.b2.conv2.bn.weight": "model-00001-of-00002.safetensors",
56
+ "abstractor.net.2.b2.conv2.conv.weight": "model-00001-of-00002.safetensors",
57
+ "abstractor.net.2.b2.conv3.bn.bias": "model-00001-of-00002.safetensors",
58
+ "abstractor.net.2.b2.conv3.bn.weight": "model-00001-of-00002.safetensors",
59
+ "abstractor.net.2.b2.conv3.conv.weight": "model-00001-of-00002.safetensors",
60
+ "abstractor.net.2.b2.se.fc1.bias": "model-00001-of-00002.safetensors",
61
+ "abstractor.net.2.b2.se.fc1.weight": "model-00001-of-00002.safetensors",
62
+ "abstractor.net.2.b2.se.fc2.bias": "model-00001-of-00002.safetensors",
63
+ "abstractor.net.2.b2.se.fc2.weight": "model-00001-of-00002.safetensors",
64
+ "abstractor.pos_emb": "model-00001-of-00002.safetensors",
65
+ "abstractor.readout.0.bias": "model-00001-of-00002.safetensors",
66
+ "abstractor.readout.0.weight": "model-00001-of-00002.safetensors",
67
+ "abstractor.readout.2.bias": "model-00001-of-00002.safetensors",
68
+ "abstractor.readout.2.weight": "model-00001-of-00002.safetensors",
69
+ "language_model.lm_head.weight": "model-00002-of-00002.safetensors",
70
+ "language_model.model.embed_tokens.weight": "model-00001-of-00002.safetensors",
71
+ "language_model.model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "language_model.model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
73
+ "language_model.model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
74
+ "language_model.model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
75
+ "language_model.model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
76
+ "language_model.model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
77
+ "language_model.model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
78
+ "language_model.model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
79
+ "language_model.model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
80
+ "language_model.model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
+ "language_model.model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
+ "language_model.model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
83
+ "language_model.model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
84
+ "language_model.model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
85
+ "language_model.model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "language_model.model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "language_model.model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
88
+ "language_model.model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
89
+ "language_model.model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "language_model.model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
91
+ "language_model.model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
92
+ "language_model.model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
93
+ "language_model.model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
94
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
95
+ "language_model.model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
96
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
97
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
98
+ "language_model.model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
99
+ "language_model.model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
100
+ "language_model.model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
+ "language_model.model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
102
+ "language_model.model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
103
+ "language_model.model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
+ "language_model.model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
105
+ "language_model.model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "language_model.model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "language_model.model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "language_model.model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
109
+ "language_model.model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
110
+ "language_model.model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "language_model.model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
112
+ "language_model.model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
113
+ "language_model.model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
114
+ "language_model.model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
115
+ "language_model.model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
116
+ "language_model.model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
117
+ "language_model.model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
118
+ "language_model.model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
119
+ "language_model.model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
120
+ "language_model.model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
121
+ "language_model.model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "language_model.model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "language_model.model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
124
+ "language_model.model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
+ "language_model.model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
126
+ "language_model.model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
127
+ "language_model.model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
128
+ "language_model.model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
129
+ "language_model.model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
130
+ "language_model.model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
+ "language_model.model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
132
+ "language_model.model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
133
+ "language_model.model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
134
+ "language_model.model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
135
+ "language_model.model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
136
+ "language_model.model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
137
+ "language_model.model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
138
+ "language_model.model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
139
+ "language_model.model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
140
+ "language_model.model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
141
+ "language_model.model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
142
+ "language_model.model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
143
+ "language_model.model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
144
+ "language_model.model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
145
+ "language_model.model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
146
+ "language_model.model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
147
+ "language_model.model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
148
+ "language_model.model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
149
+ "language_model.model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
150
+ "language_model.model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
151
+ "language_model.model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
152
+ "language_model.model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
153
+ "language_model.model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
154
+ "language_model.model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
155
+ "language_model.model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
156
+ "language_model.model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
157
+ "language_model.model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
158
+ "language_model.model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
159
+ "language_model.model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
160
+ "language_model.model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
161
+ "language_model.model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
162
+ "language_model.model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
163
+ "language_model.model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
164
+ "language_model.model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
165
+ "language_model.model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
166
+ "language_model.model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
167
+ "language_model.model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
168
+ "language_model.model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
169
+ "language_model.model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
170
+ "language_model.model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
171
+ "language_model.model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
172
+ "language_model.model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
173
+ "language_model.model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
174
+ "language_model.model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
175
+ "language_model.model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
176
+ "language_model.model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
177
+ "language_model.model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
178
+ "language_model.model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
179
+ "language_model.model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
180
+ "language_model.model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
181
+ "language_model.model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
182
+ "language_model.model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
183
+ "language_model.model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
184
+ "language_model.model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
185
+ "language_model.model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
186
+ "language_model.model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
187
+ "language_model.model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
188
+ "language_model.model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "language_model.model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
190
+ "language_model.model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
191
+ "language_model.model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
192
+ "language_model.model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
193
+ "language_model.model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
194
+ "language_model.model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
195
+ "language_model.model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
196
+ "language_model.model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
197
+ "language_model.model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
198
+ "language_model.model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
199
+ "language_model.model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
200
+ "language_model.model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
201
+ "language_model.model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
202
+ "language_model.model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
203
+ "language_model.model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
204
+ "language_model.model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "language_model.model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "language_model.model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "language_model.model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "language_model.model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "language_model.model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "language_model.model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "language_model.model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
212
+ "language_model.model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
213
+ "language_model.model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
214
+ "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
215
+ "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
216
+ "language_model.model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
217
+ "language_model.model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
218
+ "language_model.model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
219
+ "language_model.model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
220
+ "language_model.model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
221
+ "language_model.model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
222
+ "language_model.model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
223
+ "language_model.model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
224
+ "language_model.model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
225
+ "language_model.model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
226
+ "language_model.model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
227
+ "language_model.model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
228
+ "language_model.model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "language_model.model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
+ "language_model.model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
+ "language_model.model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
232
+ "language_model.model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
233
+ "language_model.model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
234
+ "language_model.model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
235
+ "language_model.model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
236
+ "language_model.model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
237
+ "language_model.model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
238
+ "language_model.model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
239
+ "language_model.model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
240
+ "language_model.model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
241
+ "language_model.model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
242
+ "language_model.model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
243
+ "language_model.model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
244
+ "language_model.model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
245
+ "language_model.model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
246
+ "language_model.model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
247
+ "language_model.model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
248
+ "language_model.model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
249
+ "language_model.model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
250
+ "language_model.model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
251
+ "language_model.model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
252
+ "language_model.model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
253
+ "language_model.model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
254
+ "language_model.model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
255
+ "language_model.model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
256
+ "language_model.model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "language_model.model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "language_model.model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
259
+ "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
260
+ "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
261
+ "language_model.model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
262
+ "language_model.model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
263
+ "language_model.model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
264
+ "language_model.model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
265
+ "language_model.model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
266
+ "language_model.model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
267
+ "language_model.model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
268
+ "language_model.model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
269
+ "language_model.model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
270
+ "language_model.model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
271
+ "language_model.model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
272
+ "language_model.model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
273
+ "language_model.model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
274
+ "language_model.model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
275
+ "language_model.model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
276
+ "language_model.model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
277
+ "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
278
+ "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
279
+ "language_model.model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
280
+ "language_model.model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
281
+ "language_model.model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
282
+ "language_model.model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
283
+ "language_model.model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
284
+ "language_model.model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
285
+ "language_model.model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
286
+ "language_model.model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
287
+ "language_model.model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "language_model.model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
289
+ "language_model.model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
290
+ "language_model.model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
291
+ "language_model.model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
292
+ "language_model.model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
293
+ "language_model.model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
294
+ "language_model.model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
295
+ "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
296
+ "language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
297
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
298
+ "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
299
+ "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
300
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
301
+ "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
302
+ "language_model.model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
303
+ "language_model.model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
306
+ "language_model.model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
307
+ "language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
308
+ "language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
309
+ "language_model.model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
310
+ "language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
311
+ "language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
312
+ "language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
313
+ "language_model.model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
314
+ "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
315
+ "language_model.model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
316
+ "language_model.model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
317
+ "language_model.model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
318
+ "language_model.model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
319
+ "language_model.model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
320
+ "language_model.model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
321
+ "language_model.model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
322
+ "language_model.model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
323
+ "language_model.model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
324
+ "language_model.model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
325
+ "language_model.model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
326
+ "language_model.model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
327
+ "language_model.model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
328
+ "language_model.model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
329
+ "language_model.model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
330
+ "language_model.model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
331
+ "language_model.model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
332
+ "language_model.model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
333
+ "language_model.model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
334
+ "language_model.model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
335
+ "language_model.model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
336
+ "language_model.model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
337
+ "language_model.model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
338
+ "language_model.model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
339
+ "language_model.model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
340
+ "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
341
+ "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
342
+ "language_model.model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
343
+ "language_model.model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
344
+ "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
345
+ "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
346
+ "language_model.model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
347
+ "language_model.model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
348
+ "language_model.model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
349
+ "language_model.model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
350
+ "language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
351
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
352
+ "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
353
+ "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
354
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
355
+ "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "language_model.model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "language_model.model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
358
+ "language_model.model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
359
+ "language_model.model.norm.weight": "model-00002-of-00002.safetensors",
360
+ "vision_model.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
361
+ "vision_model.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
362
+ "vision_model.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
363
+ "vision_model.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
364
+ "vision_model.blocks.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
365
+ "vision_model.blocks.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
366
+ "vision_model.blocks.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
367
+ "vision_model.blocks.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
368
+ "vision_model.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
369
+ "vision_model.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
370
+ "vision_model.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
371
+ "vision_model.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
372
+ "vision_model.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
373
+ "vision_model.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
374
+ "vision_model.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
375
+ "vision_model.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
376
+ "vision_model.blocks.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
377
+ "vision_model.blocks.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
378
+ "vision_model.blocks.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
379
+ "vision_model.blocks.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
380
+ "vision_model.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
381
+ "vision_model.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
382
+ "vision_model.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
383
+ "vision_model.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
384
+ "vision_model.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
385
+ "vision_model.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
386
+ "vision_model.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
387
+ "vision_model.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
388
+ "vision_model.blocks.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
389
+ "vision_model.blocks.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
390
+ "vision_model.blocks.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
391
+ "vision_model.blocks.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
392
+ "vision_model.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
393
+ "vision_model.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
394
+ "vision_model.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
395
+ "vision_model.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
396
+ "vision_model.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
397
+ "vision_model.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
398
+ "vision_model.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
399
+ "vision_model.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
400
+ "vision_model.blocks.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
401
+ "vision_model.blocks.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
402
+ "vision_model.blocks.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
403
+ "vision_model.blocks.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
404
+ "vision_model.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
405
+ "vision_model.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
406
+ "vision_model.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
407
+ "vision_model.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
408
+ "vision_model.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
409
+ "vision_model.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
410
+ "vision_model.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
411
+ "vision_model.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
412
+ "vision_model.blocks.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
413
+ "vision_model.blocks.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
414
+ "vision_model.blocks.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
415
+ "vision_model.blocks.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
416
+ "vision_model.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
417
+ "vision_model.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
418
+ "vision_model.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
419
+ "vision_model.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
420
+ "vision_model.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
421
+ "vision_model.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
422
+ "vision_model.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
423
+ "vision_model.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
424
+ "vision_model.blocks.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
425
+ "vision_model.blocks.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
426
+ "vision_model.blocks.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
427
+ "vision_model.blocks.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
428
+ "vision_model.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
429
+ "vision_model.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
430
+ "vision_model.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
431
+ "vision_model.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
432
+ "vision_model.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
433
+ "vision_model.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
434
+ "vision_model.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
435
+ "vision_model.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
436
+ "vision_model.blocks.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
437
+ "vision_model.blocks.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
438
+ "vision_model.blocks.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
439
+ "vision_model.blocks.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
440
+ "vision_model.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
441
+ "vision_model.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
442
+ "vision_model.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
443
+ "vision_model.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
444
+ "vision_model.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
445
+ "vision_model.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
446
+ "vision_model.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
447
+ "vision_model.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
448
+ "vision_model.blocks.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
449
+ "vision_model.blocks.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
450
+ "vision_model.blocks.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
451
+ "vision_model.blocks.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
452
+ "vision_model.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
453
+ "vision_model.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
454
+ "vision_model.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
455
+ "vision_model.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
456
+ "vision_model.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
457
+ "vision_model.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
458
+ "vision_model.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
459
+ "vision_model.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
460
+ "vision_model.blocks.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
461
+ "vision_model.blocks.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
462
+ "vision_model.blocks.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
463
+ "vision_model.blocks.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
464
+ "vision_model.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
465
+ "vision_model.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
466
+ "vision_model.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
467
+ "vision_model.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
468
+ "vision_model.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
469
+ "vision_model.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
470
+ "vision_model.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
471
+ "vision_model.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
472
+ "vision_model.blocks.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
473
+ "vision_model.blocks.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
474
+ "vision_model.blocks.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
475
+ "vision_model.blocks.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
476
+ "vision_model.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
477
+ "vision_model.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
478
+ "vision_model.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
479
+ "vision_model.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
480
+ "vision_model.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
481
+ "vision_model.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
482
+ "vision_model.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
483
+ "vision_model.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
484
+ "vision_model.blocks.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
485
+ "vision_model.blocks.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
486
+ "vision_model.blocks.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
487
+ "vision_model.blocks.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
488
+ "vision_model.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
489
+ "vision_model.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
490
+ "vision_model.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
491
+ "vision_model.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
492
+ "vision_model.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
493
+ "vision_model.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
494
+ "vision_model.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
495
+ "vision_model.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
496
+ "vision_model.blocks.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
497
+ "vision_model.blocks.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
498
+ "vision_model.blocks.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
499
+ "vision_model.blocks.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
500
+ "vision_model.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
501
+ "vision_model.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
502
+ "vision_model.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
503
+ "vision_model.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
504
+ "vision_model.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
505
+ "vision_model.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
506
+ "vision_model.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
507
+ "vision_model.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
508
+ "vision_model.blocks.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
509
+ "vision_model.blocks.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
510
+ "vision_model.blocks.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
511
+ "vision_model.blocks.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
512
+ "vision_model.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
513
+ "vision_model.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
514
+ "vision_model.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
515
+ "vision_model.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
516
+ "vision_model.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
517
+ "vision_model.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
518
+ "vision_model.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
519
+ "vision_model.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
520
+ "vision_model.blocks.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
521
+ "vision_model.blocks.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
522
+ "vision_model.blocks.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
523
+ "vision_model.blocks.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
524
+ "vision_model.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
525
+ "vision_model.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
526
+ "vision_model.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
527
+ "vision_model.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
528
+ "vision_model.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
529
+ "vision_model.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
530
+ "vision_model.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
531
+ "vision_model.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
532
+ "vision_model.blocks.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
533
+ "vision_model.blocks.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
534
+ "vision_model.blocks.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
535
+ "vision_model.blocks.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
536
+ "vision_model.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
537
+ "vision_model.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
538
+ "vision_model.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
539
+ "vision_model.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
540
+ "vision_model.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
541
+ "vision_model.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
542
+ "vision_model.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
543
+ "vision_model.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
544
+ "vision_model.blocks.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
545
+ "vision_model.blocks.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
546
+ "vision_model.blocks.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
547
+ "vision_model.blocks.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
548
+ "vision_model.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
549
+ "vision_model.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
550
+ "vision_model.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
551
+ "vision_model.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
552
+ "vision_model.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
553
+ "vision_model.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
554
+ "vision_model.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
555
+ "vision_model.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
556
+ "vision_model.blocks.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
557
+ "vision_model.blocks.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
558
+ "vision_model.blocks.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
559
+ "vision_model.blocks.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
560
+ "vision_model.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
561
+ "vision_model.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
562
+ "vision_model.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
563
+ "vision_model.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
564
+ "vision_model.blocks.24.attn.proj.bias": "model-00001-of-00002.safetensors",
565
+ "vision_model.blocks.24.attn.proj.weight": "model-00001-of-00002.safetensors",
566
+ "vision_model.blocks.24.attn.qkv.bias": "model-00001-of-00002.safetensors",
567
+ "vision_model.blocks.24.attn.qkv.weight": "model-00001-of-00002.safetensors",
568
+ "vision_model.blocks.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
569
+ "vision_model.blocks.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
570
+ "vision_model.blocks.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
571
+ "vision_model.blocks.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
572
+ "vision_model.blocks.24.norm1.bias": "model-00001-of-00002.safetensors",
573
+ "vision_model.blocks.24.norm1.weight": "model-00001-of-00002.safetensors",
574
+ "vision_model.blocks.24.norm2.bias": "model-00001-of-00002.safetensors",
575
+ "vision_model.blocks.24.norm2.weight": "model-00001-of-00002.safetensors",
576
+ "vision_model.blocks.25.attn.proj.bias": "model-00001-of-00002.safetensors",
577
+ "vision_model.blocks.25.attn.proj.weight": "model-00001-of-00002.safetensors",
578
+ "vision_model.blocks.25.attn.qkv.bias": "model-00001-of-00002.safetensors",
579
+ "vision_model.blocks.25.attn.qkv.weight": "model-00001-of-00002.safetensors",
580
+ "vision_model.blocks.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
581
+ "vision_model.blocks.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
582
+ "vision_model.blocks.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
583
+ "vision_model.blocks.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
584
+ "vision_model.blocks.25.norm1.bias": "model-00001-of-00002.safetensors",
585
+ "vision_model.blocks.25.norm1.weight": "model-00001-of-00002.safetensors",
586
+ "vision_model.blocks.25.norm2.bias": "model-00001-of-00002.safetensors",
587
+ "vision_model.blocks.25.norm2.weight": "model-00001-of-00002.safetensors",
588
+ "vision_model.blocks.26.attn.proj.bias": "model-00001-of-00002.safetensors",
589
+ "vision_model.blocks.26.attn.proj.weight": "model-00001-of-00002.safetensors",
590
+ "vision_model.blocks.26.attn.qkv.bias": "model-00001-of-00002.safetensors",
591
+ "vision_model.blocks.26.attn.qkv.weight": "model-00001-of-00002.safetensors",
592
+ "vision_model.blocks.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
593
+ "vision_model.blocks.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
594
+ "vision_model.blocks.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
595
+ "vision_model.blocks.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
596
+ "vision_model.blocks.26.norm1.bias": "model-00001-of-00002.safetensors",
597
+ "vision_model.blocks.26.norm1.weight": "model-00001-of-00002.safetensors",
598
+ "vision_model.blocks.26.norm2.bias": "model-00001-of-00002.safetensors",
599
+ "vision_model.blocks.26.norm2.weight": "model-00001-of-00002.safetensors",
600
+ "vision_model.blocks.27.attn.proj.bias": "model-00001-of-00002.safetensors",
601
+ "vision_model.blocks.27.attn.proj.weight": "model-00001-of-00002.safetensors",
602
+ "vision_model.blocks.27.attn.qkv.bias": "model-00001-of-00002.safetensors",
603
+ "vision_model.blocks.27.attn.qkv.weight": "model-00001-of-00002.safetensors",
604
+ "vision_model.blocks.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
605
+ "vision_model.blocks.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
606
+ "vision_model.blocks.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
607
+ "vision_model.blocks.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
608
+ "vision_model.blocks.27.norm1.bias": "model-00001-of-00002.safetensors",
609
+ "vision_model.blocks.27.norm1.weight": "model-00001-of-00002.safetensors",
610
+ "vision_model.blocks.27.norm2.bias": "model-00001-of-00002.safetensors",
611
+ "vision_model.blocks.27.norm2.weight": "model-00001-of-00002.safetensors",
612
+ "vision_model.blocks.28.attn.proj.bias": "model-00001-of-00002.safetensors",
613
+ "vision_model.blocks.28.attn.proj.weight": "model-00001-of-00002.safetensors",
614
+ "vision_model.blocks.28.attn.qkv.bias": "model-00001-of-00002.safetensors",
615
+ "vision_model.blocks.28.attn.qkv.weight": "model-00001-of-00002.safetensors",
616
+ "vision_model.blocks.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
617
+ "vision_model.blocks.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
618
+ "vision_model.blocks.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
619
+ "vision_model.blocks.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
620
+ "vision_model.blocks.28.norm1.bias": "model-00001-of-00002.safetensors",
621
+ "vision_model.blocks.28.norm1.weight": "model-00001-of-00002.safetensors",
622
+ "vision_model.blocks.28.norm2.bias": "model-00001-of-00002.safetensors",
623
+ "vision_model.blocks.28.norm2.weight": "model-00001-of-00002.safetensors",
624
+ "vision_model.blocks.29.attn.proj.bias": "model-00001-of-00002.safetensors",
625
+ "vision_model.blocks.29.attn.proj.weight": "model-00001-of-00002.safetensors",
626
+ "vision_model.blocks.29.attn.qkv.bias": "model-00001-of-00002.safetensors",
627
+ "vision_model.blocks.29.attn.qkv.weight": "model-00001-of-00002.safetensors",
628
+ "vision_model.blocks.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
629
+ "vision_model.blocks.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
630
+ "vision_model.blocks.29.mlp.fc2.bias": "model-00001-of-00002.safetensors",
631
+ "vision_model.blocks.29.mlp.fc2.weight": "model-00001-of-00002.safetensors",
632
+ "vision_model.blocks.29.norm1.bias": "model-00001-of-00002.safetensors",
633
+ "vision_model.blocks.29.norm1.weight": "model-00001-of-00002.safetensors",
634
+ "vision_model.blocks.29.norm2.bias": "model-00001-of-00002.safetensors",
635
+ "vision_model.blocks.29.norm2.weight": "model-00001-of-00002.safetensors",
636
+ "vision_model.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
637
+ "vision_model.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
638
+ "vision_model.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
639
+ "vision_model.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
640
+ "vision_model.blocks.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
641
+ "vision_model.blocks.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
642
+ "vision_model.blocks.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
643
+ "vision_model.blocks.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
644
+ "vision_model.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
645
+ "vision_model.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
646
+ "vision_model.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
647
+ "vision_model.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
648
+ "vision_model.blocks.30.attn.proj.bias": "model-00001-of-00002.safetensors",
649
+ "vision_model.blocks.30.attn.proj.weight": "model-00001-of-00002.safetensors",
650
+ "vision_model.blocks.30.attn.qkv.bias": "model-00001-of-00002.safetensors",
651
+ "vision_model.blocks.30.attn.qkv.weight": "model-00001-of-00002.safetensors",
652
+ "vision_model.blocks.30.mlp.fc1.bias": "model-00001-of-00002.safetensors",
653
+ "vision_model.blocks.30.mlp.fc1.weight": "model-00001-of-00002.safetensors",
654
+ "vision_model.blocks.30.mlp.fc2.bias": "model-00001-of-00002.safetensors",
655
+ "vision_model.blocks.30.mlp.fc2.weight": "model-00001-of-00002.safetensors",
656
+ "vision_model.blocks.30.norm1.bias": "model-00001-of-00002.safetensors",
657
+ "vision_model.blocks.30.norm1.weight": "model-00001-of-00002.safetensors",
658
+ "vision_model.blocks.30.norm2.bias": "model-00001-of-00002.safetensors",
659
+ "vision_model.blocks.30.norm2.weight": "model-00001-of-00002.safetensors",
660
+ "vision_model.blocks.31.attn.proj.bias": "model-00001-of-00002.safetensors",
661
+ "vision_model.blocks.31.attn.proj.weight": "model-00001-of-00002.safetensors",
662
+ "vision_model.blocks.31.attn.qkv.bias": "model-00001-of-00002.safetensors",
663
+ "vision_model.blocks.31.attn.qkv.weight": "model-00001-of-00002.safetensors",
664
+ "vision_model.blocks.31.mlp.fc1.bias": "model-00001-of-00002.safetensors",
665
+ "vision_model.blocks.31.mlp.fc1.weight": "model-00001-of-00002.safetensors",
666
+ "vision_model.blocks.31.mlp.fc2.bias": "model-00001-of-00002.safetensors",
667
+ "vision_model.blocks.31.mlp.fc2.weight": "model-00001-of-00002.safetensors",
668
+ "vision_model.blocks.31.norm1.bias": "model-00001-of-00002.safetensors",
669
+ "vision_model.blocks.31.norm1.weight": "model-00001-of-00002.safetensors",
670
+ "vision_model.blocks.31.norm2.bias": "model-00001-of-00002.safetensors",
671
+ "vision_model.blocks.31.norm2.weight": "model-00001-of-00002.safetensors",
672
+ "vision_model.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
673
+ "vision_model.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
674
+ "vision_model.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
675
+ "vision_model.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
676
+ "vision_model.blocks.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
677
+ "vision_model.blocks.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
678
+ "vision_model.blocks.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
679
+ "vision_model.blocks.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
680
+ "vision_model.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
681
+ "vision_model.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
682
+ "vision_model.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
683
+ "vision_model.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
684
+ "vision_model.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
685
+ "vision_model.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
686
+ "vision_model.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
687
+ "vision_model.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
688
+ "vision_model.blocks.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
689
+ "vision_model.blocks.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
690
+ "vision_model.blocks.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
691
+ "vision_model.blocks.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
692
+ "vision_model.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
693
+ "vision_model.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
694
+ "vision_model.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
695
+ "vision_model.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
696
+ "vision_model.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
697
+ "vision_model.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
698
+ "vision_model.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
699
+ "vision_model.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
700
+ "vision_model.blocks.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
701
+ "vision_model.blocks.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
702
+ "vision_model.blocks.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
703
+ "vision_model.blocks.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
704
+ "vision_model.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
705
+ "vision_model.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
706
+ "vision_model.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
707
+ "vision_model.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
708
+ "vision_model.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
709
+ "vision_model.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
710
+ "vision_model.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
711
+ "vision_model.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
712
+ "vision_model.blocks.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
713
+ "vision_model.blocks.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
714
+ "vision_model.blocks.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
715
+ "vision_model.blocks.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
716
+ "vision_model.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
717
+ "vision_model.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
718
+ "vision_model.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
719
+ "vision_model.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
720
+ "vision_model.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
721
+ "vision_model.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
722
+ "vision_model.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
723
+ "vision_model.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
724
+ "vision_model.blocks.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
725
+ "vision_model.blocks.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
726
+ "vision_model.blocks.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
727
+ "vision_model.blocks.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
728
+ "vision_model.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
729
+ "vision_model.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
730
+ "vision_model.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
731
+ "vision_model.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
732
+ "vision_model.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
733
+ "vision_model.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
734
+ "vision_model.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
735
+ "vision_model.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
736
+ "vision_model.blocks.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
737
+ "vision_model.blocks.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
738
+ "vision_model.blocks.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
739
+ "vision_model.blocks.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
740
+ "vision_model.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
741
+ "vision_model.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
742
+ "vision_model.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
743
+ "vision_model.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
744
+ "vision_model.patch_embed.proj.weight": "model-00001-of-00002.safetensors"
745
+ }
746
+ }
modeling.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import partial
2
+ import logging
3
+ import re
4
+ from typing import Optional, Tuple, Union
5
+
6
+ from einops import rearrange
7
+ from timm.layers import LayerNorm, LayerNorm2d
8
+ from timm.layers.pos_embed import resample_abs_pos_embed
9
+ from timm.models.regnet import RegStage
10
+ import torch
11
+ from torch import nn
12
+ import torch.nn.functional as F
13
+ import torch.utils.checkpoint
14
+ from transformers import LlamaForCausalLM
15
+ from transformers.modeling_outputs import BaseModelOutput
16
+ from transformers.modeling_utils import PreTrainedModel
17
+ from transformers.models.auto import AutoModelForCausalLM
18
+ from transformers.models.qwen2_vl.configuration_qwen2_vl import (
19
+ Qwen2VLVisionConfig,
20
+ )
21
+ from transformers.models.qwen2_vl.modeling_qwen2_vl import (
22
+ PatchEmbed,
23
+ Qwen2VLPreTrainedModel,
24
+ Qwen2VisionTransformerPretrainedModel,
25
+ Qwen2VLVisionBlock,
26
+ VisionRotaryEmbedding
27
+ )
28
+
29
+ from .configuration import KananaVVisualProjectorConfig, KananaVConfig
30
+
31
+ logger = logging.getLogger("kanana-1.5-v")
32
+
33
+
34
+ def build_pos_embeds(
35
+ config: KananaVVisualProjectorConfig, num_input_tokens: int, vision_hidden_size: int
36
+ ):
37
+ # pos emb
38
+ if config.pos_emb:
39
+ pos_emb = torch.nn.Parameter(torch.zeros(1, num_input_tokens, vision_hidden_size))
40
+ nn.init.trunc_normal_(pos_emb, mean=0.0, std=0.02)
41
+ else:
42
+ pos_emb = None
43
+
44
+ return pos_emb
45
+
46
+
47
+ def build_eos_tokens(config: KananaVVisualProjectorConfig, output_hidden_size: int):
48
+ # think tokens
49
+ num_eos_tokens = config.num_eos_tokens
50
+ if num_eos_tokens:
51
+ eos_tokens = torch.nn.Parameter(torch.randn(1, num_eos_tokens, output_hidden_size))
52
+ nn.init.trunc_normal_(eos_tokens, mean=0.0, std=config.initializer_range)
53
+ else:
54
+ eos_tokens = None
55
+
56
+ return eos_tokens
57
+
58
+
59
+ def build_prenorm(config: KananaVVisualProjectorConfig):
60
+ if getattr(config, "prenorm", False):
61
+ prenorm = LayerNorm(config.encoder_hidden_size)
62
+ else:
63
+ prenorm = None
64
+ return prenorm
65
+
66
+
67
+ def build_mlp(depth: int, hidden_size: int, output_hidden_size: int):
68
+ layers = [nn.Linear(hidden_size, output_hidden_size)]
69
+ for _ in range(1, depth):
70
+ layers.append(nn.SiLU())
71
+ layers.append(nn.Linear(output_hidden_size, output_hidden_size))
72
+ return nn.Sequential(*layers)
73
+
74
+
75
+ class PatchMerge(nn.Module):
76
+ def __init__(self, merge_size):
77
+ super().__init__()
78
+ self.merge_size = merge_size
79
+
80
+ def forward(self, x, channel_last=False):
81
+ if channel_last:
82
+ x = rearrange(x, "B H W D -> B D H W")
83
+ _, D, H, W = x.shape
84
+ merged_x = rearrange(
85
+ x, "B D (H h2) (W w2) -> B (D h2 w2) H W", h2=self.merge_size, w2=self.merge_size
86
+ )
87
+ return merged_x
88
+
89
+
90
+ class DynamicCAbstractor(nn.Module):
91
+ """Dynamic C-Abstractor based on RegBlock"""
92
+
93
+ def __init__(self, config: KananaVVisualProjectorConfig, num_input_tokens: int):
94
+ super().__init__()
95
+ self.config = config
96
+ if num_input_tokens == -1:
97
+ num_input_tokens = config.pos_emb_size
98
+ self.num_input_tokens = num_input_tokens
99
+
100
+ self.merge_size = config.merge_size
101
+ self.pos_emb_size = config.pos_emb_size
102
+
103
+ self.eos_tokens = build_eos_tokens(config, config.output_hidden_size)
104
+ self.pos_emb = build_pos_embeds(config, num_input_tokens, config.encoder_hidden_size)
105
+ self.prenorm = build_prenorm(config)
106
+
107
+ self.build_net()
108
+
109
+ def build_net(self):
110
+ encoder_hidden_size = self.config.encoder_hidden_size
111
+ hidden_size = self.config.hidden_size
112
+ output_hidden_size = self.config.output_hidden_size
113
+ depth = self.config.depth
114
+ mlp_depth = self.config.mlp_depth
115
+
116
+ RegBlock = partial(
117
+ RegStage,
118
+ stride=1,
119
+ dilation=1,
120
+ act_layer=nn.SiLU,
121
+ norm_layer=LayerNorm2d,
122
+ )
123
+
124
+ s1 = RegBlock(
125
+ depth,
126
+ encoder_hidden_size,
127
+ hidden_size,
128
+ )
129
+ sampler = PatchMerge(merge_size=self.merge_size)
130
+ s2 = RegBlock(
131
+ depth,
132
+ self.merge_size**2 * hidden_size,
133
+ hidden_size,
134
+ )
135
+
136
+ if depth:
137
+ self.net = nn.ModuleList([s1, sampler, s2])
138
+ self.readout = build_mlp(mlp_depth, hidden_size, output_hidden_size)
139
+ else:
140
+ self.net = sampler
141
+ self.readout = build_mlp(mlp_depth, encoder_hidden_size, output_hidden_size)
142
+
143
+ def forward(self, flattened_visual_embeds, grid_thw, **unused_kwargs):
144
+ n_token_loc = torch.prod(grid_thw, dim=1)
145
+ split_visual_embeds = torch.split(flattened_visual_embeds, n_token_loc.tolist())
146
+
147
+ flattened_visual_embeds = []
148
+ for _visual_embeds, _grid_thw in zip(split_visual_embeds, grid_thw):
149
+ T, H, W = _grid_thw
150
+ assert T == 1, "T must be 1. Video is not supported yet."
151
+ reshaped_visual_embeds = rearrange(
152
+ _visual_embeds, "(t h w) d -> 1 t h w d", t=T, h=H, w=W
153
+ )
154
+ # remove temporal dim
155
+ reshaped_visual_embeds = reshaped_visual_embeds[:, 0]
156
+
157
+ if self.prenorm is not None:
158
+ reshaped_visual_embeds = self.prenorm(reshaped_visual_embeds)
159
+
160
+ if self.pos_emb is not None:
161
+ # interpolate pos emb and add to visual embeds
162
+ _local_pos_emb = resample_abs_pos_embed(
163
+ posemb=self.pos_emb,
164
+ old_size=tuple([int(self.pos_emb_size**0.5)] * 2),
165
+ new_size=(H, W),
166
+ num_prefix_tokens=0,
167
+ )
168
+ _local_pos_emb = rearrange(
169
+ _local_pos_emb,
170
+ "1 (h w) d -> 1 h w d",
171
+ h=H,
172
+ w=W,
173
+ )
174
+ reshaped_visual_embeds = reshaped_visual_embeds + _local_pos_emb
175
+
176
+ reshaped_visual_embeds = self._forward(
177
+ reshaped_visual_embeds,
178
+ input_size=(H, W),
179
+ )
180
+ flattened_visual_embeds.append(reshaped_visual_embeds)
181
+ reshaped_visual_embeds = torch.cat(flattened_visual_embeds, dim=0)
182
+ output = BaseModelOutput(last_hidden_state=reshaped_visual_embeds)
183
+ return output
184
+
185
+ def _forward(self, x, input_size):
186
+ h, w = input_size
187
+ x = rearrange(x, "1 h w d -> 1 d h w", h=h, w=w)
188
+ x = self.net[0](x)
189
+ x = self.net[1](x)
190
+ x = self.net[2](x)
191
+ x = rearrange(x, "1 d h w -> (h w) d")
192
+ x = self.readout(x)
193
+ return x
194
+
195
+
196
+ class CustomQwen2VLVE(Qwen2VisionTransformerPretrainedModel):
197
+ config_class = Qwen2VLVisionConfig
198
+ _no_split_modules = ["Qwen2VLVisionBlock"]
199
+
200
+ def __init__(self, config) -> None:
201
+ Qwen2VLPreTrainedModel.__init__(self, config)
202
+ self.spatial_merge_size = config.spatial_merge_size
203
+ self.gradient_checkpointing = False
204
+
205
+ self.patch_embed = PatchEmbed(
206
+ patch_size=config.patch_size,
207
+ temporal_patch_size=config.temporal_patch_size,
208
+ in_channels=config.in_channels,
209
+ embed_dim=config.embed_dim,
210
+ )
211
+
212
+ head_dim = config.embed_dim // config.num_heads
213
+ self.rotary_pos_emb = VisionRotaryEmbedding(head_dim // 2)
214
+
215
+ self.blocks = nn.ModuleList(
216
+ [Qwen2VLVisionBlock(config, config._attn_implementation) for _ in range(config.depth)]
217
+ )
218
+
219
+ def forward(
220
+ self,
221
+ pixel_values: torch.Tensor,
222
+ grid_thw: torch.Tensor,
223
+ output_hidden_states: Optional[bool] = None,
224
+ return_dict: Optional[bool] = None,
225
+ ) -> Union[Tuple, BaseModelOutput]:
226
+ assert return_dict, "Only return_dict=True is supported."
227
+
228
+ encoder_states = () if output_hidden_states else None
229
+
230
+ hidden_states = self.patch_embed(pixel_values)
231
+ rotary_pos_emb = self.rot_pos_emb(grid_thw)
232
+ emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
233
+ position_embeddings = emb.cos(), emb.sin()
234
+
235
+ cu_seqlens = torch.repeat_interleave(
236
+ grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
237
+ ).cumsum(dim=0, dtype=torch.int32)
238
+ cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
239
+
240
+ for blk in self.blocks:
241
+ if output_hidden_states:
242
+ encoder_states = encoder_states + (hidden_states,)
243
+ if self.gradient_checkpointing and self.training:
244
+ layer_outputs = torch.utils.checkpoint.checkpoint(
245
+ blk.__call__,
246
+ hidden_states=hidden_states,
247
+ cu_seqlens=cu_seqlens,
248
+ position_embeddings=position_embeddings,
249
+ use_reentrant=False,
250
+ )
251
+ else:
252
+ layer_outputs = blk(
253
+ hidden_states=hidden_states,
254
+ cu_seqlens=cu_seqlens,
255
+ position_embeddings=position_embeddings,
256
+ )
257
+ hidden_states = layer_outputs
258
+ if output_hidden_states:
259
+ encoder_states = encoder_states + (hidden_states,)
260
+
261
+ if not return_dict:
262
+ return tuple(v for v in [hidden_states, encoder_states] if v is not None)
263
+ return BaseModelOutput(last_hidden_state=hidden_states, hidden_states=encoder_states)
264
+
265
+ def get_num_tokens(self):
266
+ return -1
267
+
268
+
269
+ class KananaVPreTrainedModel(PreTrainedModel):
270
+ """
271
+ An abstract class to handle weights initialization and
272
+ a simple interface for downloading and loading pretrained models.
273
+ """
274
+
275
+ config_class = KananaVConfig
276
+ base_model_prefix = "kanana-1.5-v"
277
+ supports_gradient_checkpointing = True
278
+ _skip_keys_device_placement = "past_key_values"
279
+ _supports_flash_attn_2 = True
280
+ _supports_sdpa = True
281
+ _supports_cache_class = True
282
+ _supports_static_cache = False
283
+
284
+ _keys_to_ignore_on_load_missing = [
285
+ r"position_ids",
286
+ r"language_model.encoder.embed_tokens.weight",
287
+ r"language_model.decoder.embed_tokens.weight",
288
+ r"language_model.lm_head.weight",
289
+ ]
290
+ _no_split_modules = [
291
+ "CustomQwen2VLVE",
292
+ "DynamicCAbstractor",
293
+ "LlamaForCausalLM",
294
+ "Parameter",
295
+ ]
296
+
297
+ def _init_weights(self, module):
298
+ """Initialize the weights"""
299
+ if (
300
+ isinstance(module, nn.Conv2d)
301
+ or isinstance(module, nn.Embedding)
302
+ or isinstance(module, nn.Linear)
303
+ ):
304
+ module.weight.data.normal_(mean=0.0, std=0.02)
305
+ if hasattr(module, "bias") and module.bias is not None:
306
+ module.bias.data.zero_()
307
+ elif isinstance(module, nn.LayerNorm):
308
+ module.bias.data.zero_()
309
+ module.weight.data.fill_(1.0)
310
+ elif isinstance(module, nn.Parameter):
311
+ raise ValueError()
312
+
313
+
314
+ class KananaVForConditionalGeneration(KananaVPreTrainedModel):
315
+ config_class = KananaVConfig
316
+
317
+ def __init__(self, config: KananaVConfig):
318
+ super().__init__(config)
319
+
320
+ logger.info("Build vision model ...")
321
+ self.vision_model = CustomQwen2VLVE._from_config(config.vision_config)
322
+
323
+ logger.info("Build projector ...")
324
+ self.abstractor = DynamicCAbstractor(config.projector_config,
325
+ num_input_tokens=self.vision_model.get_num_tokens())
326
+
327
+ logger.info("Build language model ...")
328
+ self.language_model = LlamaForCausalLM._from_config(config=config.text_config)
329
+
330
+ self.post_init()
331
+
332
+ def forward_vision(self, pixel_values, image_metas: Optional[dict] = None):
333
+ vision_model_args = {
334
+ "pixel_values": pixel_values,
335
+ "return_dict": True,
336
+ "output_hidden_states": True,
337
+ "grid_thw": image_metas["vision_grid_thw"],
338
+ }
339
+ v_outputs = self.vision_model(**vision_model_args)
340
+ layer_index = self.config.projector_config.feature_layer_index
341
+ visual_features = self._get_visual_feature_at(v_outputs.hidden_states, layer_index)
342
+ return visual_features
343
+
344
+ def forward_projector(self, visual_features, image_metas: Optional[dict] = None):
345
+ assert image_metas is not None
346
+ visual_embeds = self.abstractor(
347
+ visual_features,
348
+ grid_thw=image_metas["vision_grid_thw"],
349
+ )["last_hidden_state"]
350
+ return visual_embeds
351
+
352
+ def forward_and_project_vision(self, pixel_values, image_metas: Optional[dict] = None):
353
+ assert pixel_values is not None
354
+ visual_features = self.forward_vision(pixel_values, image_metas=image_metas)
355
+ visual_embeds = self.forward_projector(visual_features, image_metas=image_metas)
356
+ return visual_embeds
357
+
358
+ def _get_visual_feature_at(self, v_output, layer_index):
359
+ if isinstance(layer_index, list):
360
+ visual_features = torch.stack(v_output, dim=1)[:, layer_index] # [B, n_scales, L, dim]
361
+ else:
362
+ visual_features = v_output[layer_index] # [B, L, dim]
363
+ return visual_features
364
+
365
+ def embed_text_tokens(self, input_ids):
366
+ """Embed input_ids into text_embeds, ignoring media tokens (negative values)."""
367
+ input_ids = input_ids.clone()
368
+ input_ids[input_ids < 0] = 0
369
+
370
+ text_embeds = self.language_model.get_input_embeddings()(input_ids)
371
+ if hasattr(self.language_model, "transformer") and hasattr(
372
+ self.language_model.transformer, "word_embeddings_layernorm"
373
+ ):
374
+ text_embeds = self.language_model.transformer.word_embeddings_layernorm(text_embeds)
375
+
376
+ return text_embeds
377
+
378
+ def prepare_mm_inputs(
379
+ self,
380
+ input_ids: torch.FloatTensor,
381
+ pixel_values: Optional[list[torch.FloatTensor]] = None,
382
+ image_metas: Optional[dict] = None,
383
+ attention_mask: Optional[torch.LongTensor] = None,
384
+ ):
385
+ """Prepare multimodal inputs from input_ids and pixel_values."""
386
+ if pixel_values is not None:
387
+ pixel_values = pixel_values.to(self._get_input_dtype())
388
+
389
+ if attention_mask is None:
390
+ attention_mask = input_ids.new_ones(*input_ids.shape)
391
+
392
+ # Get Text Embeddings
393
+ text_embeds = self.embed_text_tokens(input_ids)
394
+ flattened_text_embeds = rearrange(text_embeds, "b l d -> (b l) d")
395
+ flattened_input_ids = rearrange(input_ids, "b l -> (b l)")
396
+
397
+ # Get Visual Embeddings
398
+ if pixel_values is not None:
399
+ flattened_visual_embeds = self.forward_and_project_vision(
400
+ pixel_values, image_metas
401
+ )
402
+ flattened_text_embeds[flattened_input_ids == -1] = flattened_visual_embeds
403
+
404
+ input_embeds = rearrange(
405
+ flattened_text_embeds, "(b l) d -> b l d", b=input_ids.shape[0]
406
+ )
407
+ return_inputs = {
408
+ "inputs_embeds": input_embeds,
409
+ "attention_mask": attention_mask,
410
+ }
411
+ return return_inputs
412
+
413
+ def forward(
414
+ self,
415
+ pixel_values: list[torch.FloatTensor],
416
+ image_metas: dict[list],
417
+ input_ids: torch.FloatTensor,
418
+ seq_length: Optional[torch.LongTensor] = None,
419
+ attention_mask: Optional[torch.LongTensor] = None,
420
+ labels: Optional[torch.LongTensor] = None,
421
+ return_dict: Optional[bool] = None,
422
+ ):
423
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
424
+ inputs = self.prepare_mm_inputs(
425
+ input_ids=input_ids,
426
+ pixel_values=pixel_values,
427
+ image_metas=image_metas,
428
+ attention_mask=attention_mask,
429
+ )
430
+
431
+ outputs = self.language_model(
432
+ **inputs,
433
+ labels=labels,
434
+ position_ids=None,
435
+ return_dict=return_dict,
436
+ output_attentions=self.config.output_attentions,
437
+ )
438
+
439
+ return outputs
440
+
441
+ @torch.no_grad()
442
+ def generate(
443
+ self,
444
+ pixel_values: torch.FloatTensor = None,
445
+ image_metas: dict[list] = None,
446
+ input_ids: Optional[torch.LongTensor] = None,
447
+ attention_mask: Optional[torch.LongTensor] = None,
448
+ seq_length: Optional[torch.LongTensor] = None,
449
+ **generate_kwargs,
450
+ ) -> torch.LongTensor:
451
+ """
452
+ Overrides `generate` function to be able to use the model as a conditional generator.
453
+
454
+ Args:
455
+ pixel_values (`torch.FloatTensor` of shape (batch_size, num_channels, height, width)):
456
+ Input images to be processed.
457
+ input_ids (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
458
+ The sequence used as a prompt for the generation.
459
+ attention_mask (`torch.LongTensor` of shape (batch_size, sequence_length), *optional*):
460
+ Mask to avoid performing attention on padding token indices
461
+
462
+ Returns:
463
+ captions (list): A list of strings of length batch_size * num_captions.
464
+ """
465
+ if input_ids is None:
466
+ return self.language_model.generate(attention_mask=attention_mask, **generate_kwargs)
467
+ if pixel_values is None:
468
+ return self.language_model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
469
+
470
+ if (
471
+ image_metas is not None
472
+ and image_metas.get("vision_grid_thw") is not None
473
+ and isinstance(image_metas.get("vision_grid_thw"), torch.Tensor)
474
+ ):
475
+ image_metas["vision_grid_thw"] = image_metas["vision_grid_thw"].to(input_ids.device)
476
+
477
+ inputs = self.prepare_mm_inputs(
478
+ input_ids=input_ids,
479
+ pixel_values=pixel_values,
480
+ image_metas=image_metas,
481
+ attention_mask=attention_mask,
482
+ )
483
+
484
+ outputs = self.language_model.generate(
485
+ **inputs,
486
+ **generate_kwargs,
487
+ )
488
+
489
+ return outputs
490
+
491
+ def _get_input_dtype(self):
492
+ dtype = next(self.vision_model.parameters()).dtype
493
+ return dtype
preprocessor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "max_pixels": 1254400,
3
+ "merge_size": 2,
4
+ "min_pixels": 78400,
5
+ "patch_size": 14
6
+ }
processing.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import torch
4
+ from PIL.Image import Image
5
+ from transformers.processing_utils import ProcessorMixin
6
+
7
+
8
+ logger = logging.getLogger("kanana-1.5-v")
9
+
10
+
11
+ HUMAN = "Human: "
12
+ AI = "AI: "
13
+ CHAT_TEMPLATE = (
14
+ """
15
+ {%- if bos_token is defined and bos_token %}
16
+ {{- bos_token }}
17
+ {%- endif %}
18
+ {%- set intro %}
19
+ The following is a conversation between a curious human and AI assistant. 당신은 Kakao에서 개발된 인공지능 언어모델이고 이름은 kanana입니다.
20
+ Knowledge Cutoff Date: June30, 2024.
21
+ Capabilities and Limitations:
22
+ - I cannot search for external content such as weather, news, or the current date and time.
23
+ - If a URL is provided, I cannot access it directly. Insteaed, please copy and provide the relevant content for me to process.
24
+ {%- endset %}
25
+ {{ intro }}
26
+ {{- '\n' }}
27
+ {%- for message in messages %}
28
+ {%- if message['role'] == 'system' %}
29
+ {{- message['content'] }}
30
+ {%- elif message['role'] == 'user' %}
31
+ {{- '<|USER|>' + message['content'] }}
32
+ {%- elif message['role'] == 'assistant' %}
33
+ {{- '<|ASSISTANT|>' + message['content'] + eos_token }}
34
+ {%- endif %}
35
+ {%- if not loop.last %}
36
+ {{- '\n' }}
37
+ {%- endif %}
38
+ {%- endfor %}
39
+ {%- if add_generation_prompt %}
40
+ {{- '\n<|ASSISTANT|>' }}
41
+ {%- endif %}
42
+ """.strip()
43
+ .replace("<|USER|>", HUMAN)
44
+ .replace("<|ASSISTANT|>", AI)
45
+ )
46
+
47
+
48
+ class KananaVProcessor(ProcessorMixin):
49
+ attributes = ["image_processor", "tokenizer"]
50
+ valid_kwargs = []
51
+ image_processor_class = "AutoImageProcessor"
52
+ tokenizer_class = "AutoTokenizer"
53
+
54
+ def __init__(self, image_processor, tokenizer):
55
+ super().__init__(image_processor, tokenizer)
56
+ self.image_processor = image_processor
57
+ self.tokenizer = tokenizer
58
+ self.tokenizer.mllm_setup("dynamic")
59
+
60
+ def conv2prompt(
61
+ self,
62
+ conv: list[dict] | str,
63
+ chat_template=CHAT_TEMPLATE,
64
+ add_generation_prompt=False,
65
+ ) -> str:
66
+ """Convert conversation to prompt"""
67
+ if isinstance(conv, list):
68
+ prompt = self.tokenizer.apply_chat_template(
69
+ conversation=conv,
70
+ tokenize=False,
71
+ chat_template=chat_template,
72
+ add_generation_prompt=add_generation_prompt,
73
+ )
74
+ elif isinstance(conv, str):
75
+ prompt = conv
76
+ else:
77
+ raise TypeError(f"conv must be list or str, but got {type(conv)}")
78
+
79
+ return prompt
80
+
81
+ def __call__(self, data: dict, max_length, add_generation_prompt=False):
82
+ return self.encode(data, max_length, add_generation_prompt=add_generation_prompt)
83
+
84
+ def encode(self, data: dict, max_length, add_generation_prompt=False) -> dict:
85
+ """
86
+ Args:
87
+ data (dict): {
88
+ "conv": [
89
+ {"role": "system", "content": "The following is a conversation between a curious human and AI assistant."},
90
+ {"role": "user", "content": IMAGE},
91
+ {"role": "user", "content": "Hello, how are you?"},
92
+ {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
93
+ ...
94
+ ],
95
+ "image": [
96
+ PIL.Image,
97
+ ...
98
+ ]
99
+ }
100
+
101
+ Return:
102
+ data (dict): {
103
+ "text": text_tokens_from_tokenizer,
104
+ "text_raw": prompt,
105
+ "image": pixel_values,
106
+ "image_meta": image_meta (dict of list) includes image resolution, etc.
107
+ }
108
+ """
109
+ assert "images" not in data
110
+
111
+ conv = data["conv"]
112
+ images: list[Image] = data.get("image") # PIL images
113
+
114
+ data = {
115
+ "text": None,
116
+ "text_raw": None,
117
+ "image": None,
118
+ "image_meta": None,
119
+ }
120
+
121
+ # image
122
+ if images:
123
+ processor_output = [
124
+ self.image_processor(image) for image in images if image
125
+ ]
126
+ pixel_values = [
127
+ processor_output["pixel_values"] for processor_output in processor_output
128
+ ]
129
+ image_meta = [processor_output["image_meta"] for processor_output in processor_output]
130
+ if pixel_values:
131
+ pixel_values = torch.concat(pixel_values, dim=0)
132
+ data["image"] = pixel_values
133
+ data["image_meta"] = {k: [d[k] for d in image_meta] for k in image_meta[0]}
134
+
135
+ # text
136
+ prompt = self.conv2prompt(conv, add_generation_prompt=add_generation_prompt)
137
+ text_tokens = self.tokenizer.encode_prompt(
138
+ prompt,
139
+ max_length,
140
+ image_meta=data["image_meta"],
141
+ )
142
+
143
+ data["text"] = text_tokens
144
+ data["text_raw"] = prompt
145
+
146
+ return data
147
+
148
+ def batch_encode_collate(
149
+ self,
150
+ data_list: list[dict],
151
+ padding: str = "longest",
152
+ padding_side: str = "right",
153
+ max_length: int | None = None,
154
+ add_generation_prompt=False,
155
+ ):
156
+ """Encode batch and collate them"""
157
+ batch = [
158
+ self.encode(data, max_length, add_generation_prompt=add_generation_prompt)
159
+ for data in data_list
160
+ ]
161
+ batch = self.collate(
162
+ batch,
163
+ padding=padding,
164
+ padding_side=padding_side,
165
+ max_length=max_length,
166
+ )
167
+
168
+ return batch
169
+
170
+ def collate(
171
+ self,
172
+ batch,
173
+ padding,
174
+ padding_side,
175
+ max_length,
176
+ ):
177
+ """Collate encoded results to model inputs"""
178
+ text_batch = [data["text"] for data in batch]
179
+
180
+ text_batch = self.tokenizer.batch_collate_pad(
181
+ text_batch,
182
+ padding=padding,
183
+ padding_side=padding_side,
184
+ max_length=max_length,
185
+ )
186
+
187
+ image_list = [data["image"] for data in batch if data["image"] is not None]
188
+ image_meta = [data["image_meta"] for data in batch if data["image_meta"] is not None]
189
+ if len(image_meta) > 0:
190
+ image_meta = {
191
+ k: sum([d[k] for d in image_meta], []) for k in image_meta[0]
192
+ }
193
+ if image_meta.get("vision_grid_thw"):
194
+ image_meta["vision_grid_thw"] = torch.tensor(image_meta["vision_grid_thw"])
195
+ else:
196
+ image_meta = None
197
+
198
+ output_batch = text_batch
199
+
200
+ output_batch["pixel_values"] = torch.cat(image_list, dim=0) if len(image_list) > 0 else None
201
+ output_batch["image_metas"] = image_meta
202
+ return output_batch
203
+
204
+ def decode(self, *args, **kwargs):
205
+ return self.tokenizer.decode(*args, **kwargs)
206
+
207
+ def batch_decode(self, *args, **kwargs):
208
+ return self.tokenizer.batch_decode(*args, **kwargs)
processing_image.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import math
3
+ from typing import Optional, Union
4
+
5
+ import numpy as np
6
+ import torch
7
+ from einops import rearrange
8
+ from PIL import Image
9
+ from transformers.image_processing_utils import BaseImageProcessor
10
+ from transformers.image_transforms import convert_to_rgb, resize
11
+ from transformers.image_utils import (
12
+ ChannelDimension,
13
+ ImageInput,
14
+ PILImageResampling,
15
+ get_image_size,
16
+ infer_channel_dimension_format,
17
+ is_scaled_image,
18
+ make_list_of_images,
19
+ to_numpy_array,
20
+ )
21
+ from transformers.utils.constants import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD
22
+
23
+ logger = logging.getLogger("kanana-1.5-v")
24
+
25
+
26
+ def smart_resize(
27
+ height: int,
28
+ width: int,
29
+ factor: int = 28,
30
+ min_pixels: int = 56 * 56,
31
+ max_pixels: int = 14 * 14 * 4 * 1280,
32
+ ):
33
+ """Rescales the image so that the following conditions are met:
34
+
35
+ 1. Both dimensions (height and width) are divisible by 'factor'.
36
+
37
+ 2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
38
+
39
+ 3. The aspect ratio of the image is maintained as closely as possible.
40
+
41
+ """
42
+ if height < factor or width < factor:
43
+ raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
44
+ elif max(height, width) / min(height, width) > 200:
45
+ raise ValueError(
46
+ f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}"
47
+ )
48
+ h_bar = round(height / factor) * factor
49
+ w_bar = round(width / factor) * factor
50
+ if h_bar * w_bar > max_pixels:
51
+ beta = math.sqrt((height * width) / max_pixels)
52
+ h_bar = math.floor(height / beta / factor) * factor
53
+ w_bar = math.floor(width / beta / factor) * factor
54
+ elif h_bar * w_bar < min_pixels:
55
+ beta = math.sqrt(min_pixels / (height * width))
56
+ h_bar = math.ceil(height * beta / factor) * factor
57
+ w_bar = math.ceil(width * beta / factor) * factor
58
+ return h_bar, w_bar
59
+
60
+
61
+ class KananaVImageProcessor(BaseImageProcessor):
62
+ def __init__(
63
+ self,
64
+ do_resize: bool = True,
65
+ do_rescale: bool = True,
66
+ rescale_factor: Union[int, float] = 1 / 255,
67
+ do_normalize: bool = True,
68
+ image_mean: Optional[Union[float, list[float]]] = OPENAI_CLIP_MEAN,
69
+ image_std: Optional[Union[float, list[float]]] = OPENAI_CLIP_STD,
70
+ do_convert_rgb: bool = True,
71
+ min_pixels: int = 56 * 56,
72
+ max_pixels: int = 14 * 14 * 4 * 1280,
73
+ patch_size: int = 14,
74
+ temporal_patch_size: int = 2,
75
+ merge_size: int = 2,
76
+ **kwargs,
77
+ ) -> None:
78
+ super().__init__(**kwargs)
79
+ self.do_resize = do_resize
80
+ self.resample = Image.BICUBIC
81
+ self.do_rescale = do_rescale
82
+ self.rescale_factor = rescale_factor
83
+ self.do_normalize = do_normalize
84
+ self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
85
+ self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
86
+ self.min_pixels = min_pixels
87
+ self.max_pixels = max_pixels
88
+ self.patch_size = patch_size
89
+ self.temporal_patch_size = temporal_patch_size
90
+ self.merge_size = merge_size
91
+ self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels}
92
+ self.do_convert_rgb = do_convert_rgb
93
+ self.input_data_format = ChannelDimension.LAST
94
+
95
+ def _preprocess(
96
+ self,
97
+ images: Union[ImageInput],
98
+ do_resize: bool = True,
99
+ resample: PILImageResampling = None,
100
+ do_rescale: bool = None,
101
+ rescale_factor: float = None,
102
+ do_normalize: bool = None,
103
+ image_mean: Optional[Union[float, list[float]]] = None,
104
+ image_std: Optional[Union[float, list[float]]] = None,
105
+ do_convert_rgb: bool = None,
106
+ data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
107
+ input_data_format: Optional[Union[str, ChannelDimension]] = None,
108
+ ):
109
+ """
110
+ Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`.
111
+ (samuel) From image_processing_qwen2_vl.py
112
+
113
+ Args:
114
+ images (`ImageInput`):
115
+ Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. If pixel values range from 0 to 1, set `do_rescale=False`.
116
+ do_resize (`bool`, *optional*, defaults to `self.do_resize`):
117
+ Whether to resize the image.
118
+ resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
119
+ Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums.
120
+ do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
121
+ Whether to rescale the image.
122
+ rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
123
+ Scale factor to use if rescaling the image.
124
+ do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
125
+ Whether to normalize the image.
126
+ image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
127
+ Mean to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
128
+ image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
129
+ Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
130
+ do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
131
+ Whether to convert the image to RGB.
132
+ data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`):
133
+ The channel dimension format for the output image. Can be one of:
134
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
135
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
136
+ - Unset: Use the channel dimension format of the input image.
137
+ input_data_format (`ChannelDimension` or `str`, *optional*):
138
+ The channel dimension format for the input image. Can be one of:
139
+ - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
140
+ - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
141
+ - `"none"` or `ChannelDimension.NONE`: image in (height, width) format. - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
142
+ """
143
+ images = make_list_of_images(images)
144
+
145
+ if do_convert_rgb:
146
+ images = [convert_to_rgb(image) for image in images]
147
+
148
+ # All transformations expect numpy arrays.
149
+ images = [to_numpy_array(image) for image in images]
150
+
151
+ if is_scaled_image(images[0]) and do_rescale:
152
+ logger.warning_once(
153
+ "It looks like you are trying to rescale already rescaled images. If the input"
154
+ " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
155
+ )
156
+ if input_data_format is None:
157
+ # We assume that all images have the same channel dimension format.
158
+ input_data_format = infer_channel_dimension_format(images[0])
159
+
160
+ height, width = get_image_size(images[0], channel_dim=input_data_format)
161
+ resized_height, resized_width = height, width
162
+ processed_images = []
163
+ for image in images:
164
+ if do_resize:
165
+ resized_height, resized_width = smart_resize(
166
+ height,
167
+ width,
168
+ factor=self.patch_size * self.merge_size,
169
+ min_pixels=self.min_pixels,
170
+ max_pixels=self.max_pixels,
171
+ )
172
+ image = resize(
173
+ image,
174
+ size=(resized_height, resized_width),
175
+ resample=resample,
176
+ input_data_format=input_data_format,
177
+ )
178
+
179
+ if do_rescale:
180
+ image = self.rescale(
181
+ image, scale=rescale_factor, input_data_format=input_data_format
182
+ )
183
+
184
+ if do_normalize:
185
+ image = self.normalize(
186
+ image=image, mean=image_mean, std=image_std, input_data_format=input_data_format
187
+ )
188
+ processed_images.append(image)
189
+
190
+ patches = np.array(processed_images)
191
+ if data_format == ChannelDimension.LAST:
192
+ # Convert from (num_images, height, width, num_channels) format.
193
+ patches = rearrange(patches, "N H W C -> N C H W")
194
+ if patches.shape[0] == 1:
195
+ patches = np.tile(patches, (self.temporal_patch_size, 1, 1, 1))
196
+ grid_t = patches.shape[0] // self.temporal_patch_size
197
+ grid_h, grid_w = resized_height // self.patch_size, resized_width // self.patch_size
198
+ flatten_patches = rearrange(
199
+ patches,
200
+ "(nT T) C (nH sH H) (nW sW W) -> (nT nH nW sH sW) (C T H W)",
201
+ T=self.temporal_patch_size,
202
+ H=self.patch_size,
203
+ W=self.patch_size,
204
+ nH=grid_h // self.merge_size,
205
+ nW=grid_w // self.merge_size,
206
+ sH=self.merge_size,
207
+ sW=self.merge_size,
208
+ )
209
+ return (
210
+ flatten_patches,
211
+ (grid_t, grid_h, grid_w),
212
+ (resized_height, resized_width),
213
+ (height, width),
214
+ )
215
+
216
+ def resize_pil_image(self, image):
217
+ """
218
+ if width * height < self.min_pixels:
219
+ expansion_ratio = np.ceil(1 / np.sqrt((width * height / self.min_pixels)))
220
+ width, height = (width * expansion_ratio, height * expansion_ratio)
221
+ """
222
+ ori_width, ori_height = image.size
223
+ width, height = (ori_width, ori_height)
224
+ if min(width, height) < self.patch_size * self.merge_size:
225
+ scale = self.patch_size * self.merge_size / min(width, height)
226
+ width, height = (int(width * scale), int(height * scale))
227
+ if (width, height) != (ori_width, ori_height):
228
+ image = image.resize((width, height), resample=Image.BICUBIC)
229
+
230
+ return image
231
+
232
+ def __call__(self, image):
233
+ """
234
+ Args:
235
+ image:
236
+
237
+ Return:
238
+ image_input (tensors): (number of tiles, 3, H, W)
239
+ hw_tiles (tuple): (height, width) of the tiles
240
+ hw_best_resolution (tuple): (height, width) of the best resolution (with padding)
241
+ hw_orig_resolution (tuple): (height, width) of the original resolution
242
+ """
243
+ do_resize = self.do_resize
244
+ resample = self.resample
245
+ do_rescale = self.do_rescale
246
+ rescale_factor = self.rescale_factor
247
+ do_normalize = self.do_normalize
248
+ image_mean = self.image_mean
249
+ image_std = self.image_std
250
+ do_convert_rgb = self.do_convert_rgb
251
+ input_data_format = self.input_data_format
252
+
253
+ if image is not None:
254
+ # resize imagee if the shortest side is smaller than patch_size * merge_size
255
+ image = self.resize_pil_image(image)
256
+
257
+ patches, image_grid_thw, resized_hw, original_hw = self._preprocess(
258
+ images=image,
259
+ do_resize=do_resize,
260
+ resample=resample,
261
+ do_rescale=do_rescale,
262
+ rescale_factor=rescale_factor,
263
+ do_normalize=do_normalize,
264
+ image_mean=image_mean,
265
+ image_std=image_std,
266
+ do_convert_rgb=do_convert_rgb,
267
+ input_data_format=input_data_format,
268
+ data_format=ChannelDimension.LAST,
269
+ )
270
+
271
+ pixel_values = torch.tensor(patches)
272
+ image_meta = {
273
+ "vision_grid_thw": image_grid_thw,
274
+ "hw_best_resolution": resized_hw,
275
+ "hw_orig_resolution": original_hw,
276
+ "image_token_thw": (
277
+ image_grid_thw[0],
278
+ image_grid_thw[1] // self.merge_size,
279
+ image_grid_thw[2] // self.merge_size,
280
+ ),
281
+ }
282
+ else:
283
+ pixel_values = None
284
+ image_meta = None
285
+
286
+ return {
287
+ "pixel_values": pixel_values,
288
+ "image_meta": image_meta,
289
+ }
tokenization.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ from typing import Optional
4
+
5
+ import torch
6
+ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast
7
+
8
+ # Role tokens
9
+ AI = "AI: "
10
+ HUMAN = "Human: "
11
+ _AI = "\n" + AI
12
+ _HUMAN = "\n" + HUMAN
13
+
14
+ # special media tokens
15
+ IMAGE = "<image>"
16
+ IMAGE_ROW_SEPARATOR = "\n"
17
+ IMAGE_GLOBAL_LOCAL_SEPARATOR = "\n"
18
+ MEDIA_TOKENS = {
19
+ "image": [IMAGE],
20
+ }
21
+
22
+ _INFINITE = int(1e12) # infinite token length for no-truncation
23
+
24
+ logger = logging.getLogger("kanana-1.5-v")
25
+
26
+
27
+ def _pad_trunc(
28
+ x: list[list[int]],
29
+ padding: str,
30
+ padding_side: str,
31
+ pad_value: int,
32
+ max_length: int,
33
+ ) -> torch.LongTensor:
34
+ """Pad and truncate sequences to the same length
35
+
36
+ Args:
37
+ x (list[list[int]])
38
+ padding ("longest" or "max_length")
39
+ padding_side ("left" or "right")
40
+ pad_value (int)
41
+ max_length (int or None): if padding == "max_length", max_length should be given.
42
+ """
43
+ assert padding in ["longest", "max_length"]
44
+ assert padding_side in ["left", "right"]
45
+
46
+ lengths = [len(sample) for sample in x]
47
+ if padding == "longest":
48
+ max_length = max(lengths)
49
+
50
+ new_x = []
51
+ for sample, length in zip(x, lengths):
52
+ if torch.is_tensor(sample):
53
+ sample = sample.tolist()
54
+
55
+ if length >= max_length:
56
+ new_x.append(sample[:max_length])
57
+ continue
58
+
59
+ padding_size = max_length - length
60
+ pads = [pad_value] * padding_size
61
+ if padding_side == "right":
62
+ new_x.append(sample + pads)
63
+ else:
64
+ new_x.append(pads + sample)
65
+
66
+ return torch.as_tensor(new_x, dtype=torch.long)
67
+
68
+
69
+ class KananaVTokenizerMixin:
70
+ def mllm_setup(self, num_visual_tokens: int):
71
+ self.num_visual_tokens = num_visual_tokens
72
+
73
+ # Currently we only support the image modality for media modality.
74
+ self.media_tokens = {k: -int(i + 1) for i, k in enumerate(MEDIA_TOKENS["image"])}
75
+ self.media_lengths = {MEDIA_TOKENS["image"][0]: num_visual_tokens}
76
+
77
+ def repeat_image_tokens(
78
+ self, hw_tokens, with_row_separator=True, add_global_local_separator=False
79
+ ):
80
+ if len(hw_tokens) == 3:
81
+ T, H, W = hw_tokens
82
+ else:
83
+ H, W = hw_tokens
84
+
85
+ repeated_tokens = []
86
+
87
+ if add_global_local_separator:
88
+ global_local_separator = self(IMAGE_GLOBAL_LOCAL_SEPARATOR, add_special_tokens=False)[
89
+ "input_ids"
90
+ ]
91
+
92
+ repeated_tokens += global_local_separator
93
+
94
+ if with_row_separator:
95
+ row_sep = self(IMAGE_ROW_SEPARATOR, add_special_tokens=False)["input_ids"]
96
+
97
+ for h_idx in range(H):
98
+ repeated_tokens += [self.media_tokens[IMAGE]] * W
99
+ if with_row_separator and h_idx != H - 1:
100
+ repeated_tokens += row_sep
101
+
102
+ return repeated_tokens
103
+
104
+ def encode_text_only(self, prompt: str, add_special_tokens: bool = False) -> list:
105
+ # Text-only Data
106
+ # split prompt into chunks by role tokens
107
+ tokens_to_split = [_AI, _HUMAN]
108
+ pattern = "|".join(map(re.escape, tokens_to_split))
109
+ chunk_strs = re.split(f"({pattern})", prompt)
110
+ chunk_strs = [x for x in chunk_strs if len(x) > 0]
111
+
112
+ enc_chunk = []
113
+ for idx, chunk_str in enumerate(chunk_strs):
114
+ curr_chunk = self(chunk_str, add_special_tokens=False)["input_ids"]
115
+ enc_chunk += curr_chunk
116
+ return enc_chunk
117
+
118
+ def encode_prompt(
119
+ self, prompt: str, max_length: int | None = None, image_meta: dict | None = None
120
+ ) -> dict:
121
+ """Tokenize prompt which consists of image-text or text only, with role tokens.
122
+ Role pattern is "AI: " or "Human: ".
123
+
124
+ Args:
125
+ prompt
126
+ max_length (int or None): here, max_length is used for truncation.
127
+ If max_length is None, no truncation is applied.
128
+ """
129
+ max_length = max_length or _INFINITE # if None, set to infinite for no-truncation
130
+
131
+ # output enc_chunk
132
+ enc_chunk = []
133
+
134
+ # Text-only or Image-Text Data
135
+ # split prompt into chunks by media and role tokens
136
+ tokens_to_split = list(self.media_tokens.keys()) + [_AI, _HUMAN]
137
+ pattern = "|".join(map(re.escape, tokens_to_split))
138
+ chunk_strs = re.split(f"({pattern})", prompt)
139
+ chunk_strs = [x for x in chunk_strs if len(x) > 0]
140
+ # tokenize chunks
141
+ img_idx = 0 # for sync with image_meta
142
+ for idx, chunk_str in enumerate(chunk_strs):
143
+ if chunk_str in self.media_tokens:
144
+ if chunk_str == IMAGE:
145
+ image_token_thw = (
146
+ image_meta["image_token_thw"][img_idx]
147
+ if image_meta.get("image_token_thw")
148
+ else None
149
+ )
150
+
151
+ media_tokens = self.repeat_image_tokens(
152
+ image_token_thw,
153
+ with_row_separator=True,
154
+ add_global_local_separator=True,
155
+ )
156
+ # increment image index
157
+ img_idx += 1
158
+
159
+ else:
160
+ raise ValueError("Unknown chunk str", chunk_str)
161
+
162
+ enc_chunk += media_tokens
163
+
164
+ else:
165
+ curr_chunk = self(chunk_str, add_special_tokens=False)["input_ids"]
166
+ enc_chunk += curr_chunk
167
+
168
+ L = len(enc_chunk)
169
+
170
+ input_ids = torch.as_tensor(enc_chunk, dtype=torch.long)
171
+ attention_mask = torch.ones_like(input_ids)
172
+
173
+ assert L <= max_length, (
174
+ f"[Length exceeded] Input sequence length ({L}) is greater than "
175
+ f"the allowed max_length ({max_length}). "
176
+ "Please truncate the sequence or increase max_length."
177
+ )
178
+
179
+ return {
180
+ "input_ids": input_ids, # [L]
181
+ "seq_length": L, # int
182
+ "attention_mask": attention_mask, # [L]
183
+ }
184
+
185
+ def batch_collate_pad(
186
+ self,
187
+ batch: list,
188
+ padding: str,
189
+ padding_side: str,
190
+ max_length: int | None,
191
+ ) -> dict[str, torch.LongTensor]:
192
+ """Collate batch and pad/truncate to the same length
193
+
194
+ Args:
195
+ batch
196
+ padding ("longest" or "max_length")
197
+ padding_side ("left" or "right")
198
+ pad_value (int)
199
+ max_length (int or None): if padding == "max_length", max_length should be given
200
+ """
201
+ if padding == "max_length":
202
+ assert max_length is not None, "max_length should be given if padding == 'max_length'"
203
+ else:
204
+ # if padding == 'longest' and max_length is None, set to infinite for no-truncation
205
+ max_length = max_length or _INFINITE
206
+
207
+ input_ids = [sample["input_ids"] for sample in batch]
208
+ attention_mask = [sample["attention_mask"] for sample in batch]
209
+ seq_length = [sample["seq_length"] for sample in batch]
210
+
211
+ input_ids = _pad_trunc(input_ids, padding, padding_side, self.pad_token_id, max_length)
212
+ attention_mask = _pad_trunc(attention_mask, padding, padding_side, 0, max_length)
213
+ seq_length = torch.as_tensor(seq_length, dtype=torch.long)
214
+
215
+ return {
216
+ "input_ids": input_ids,
217
+ "attention_mask": attention_mask,
218
+ "seq_length": seq_length,
219
+ }
220
+
221
+ def get_chat_template(self) -> str:
222
+ """Method for bw-compat: old HF transformers (e.g., 4.41.0) does not have get_chat_template
223
+ """
224
+ return self.chat_template
225
+
226
+
227
+ class KananaVTokenizer(PreTrainedTokenizer, KananaVTokenizerMixin):
228
+ def __init__(self, **kwargs):
229
+ super().__init__(**kwargs)
230
+
231
+ def encode(self, text, add_special_tokens=False) -> list:
232
+ return self.encode_text_only(prompt=text, add_special_tokens=add_special_tokens)
233
+
234
+
235
+ class KananaVTokenizerFast(PreTrainedTokenizerFast, KananaVTokenizerMixin):
236
+ def __init__(self, **kwargs):
237
+ super().__init__(**kwargs)
238
+
239
+ def encode(self, text, add_special_tokens=False) -> list:
240
+ return self.encode_text_only(prompt=text, add_special_tokens=add_special_tokens)
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44e2a3cfdfa7530be35f0d72c39b37ff438d4a1e69cc285b3ee461987d0bfa7
3
+ size 17210623
tokenizer_config.json ADDED
@@ -0,0 +1,2095 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoTokenizer": ["tokenization.KananaVTokenizer", "tokenization.KananaVTokenizerFast"]
4
+ },
5
+ "added_tokens_decoder": {
6
+ "128000": {
7
+ "content": "<|begin_of_text|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "128001": {
15
+ "content": "<|end_of_text|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "128002": {
23
+ "content": "<|reserved_special_token_0|>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "128003": {
31
+ "content": "<|reserved_special_token_1|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "128004": {
39
+ "content": "<|reserved_special_token_2|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "128005": {
47
+ "content": "<|reserved_special_token_3|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "128006": {
55
+ "content": "<|start_header_id|>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "128007": {
63
+ "content": "<|end_header_id|>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "128008": {
71
+ "content": "<|reserved_special_token_4|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "128009": {
79
+ "content": "<|eot_id|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "128010": {
87
+ "content": "<|reserved_special_token_5|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "128011": {
95
+ "content": "<|reserved_special_token_6|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "128012": {
103
+ "content": "<|reserved_special_token_7|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "128013": {
111
+ "content": "<|reserved_special_token_8|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "128014": {
119
+ "content": "<|reserved_special_token_9|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "128015": {
127
+ "content": "<|reserved_special_token_10|>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "128016": {
135
+ "content": "<|reserved_special_token_11|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "128017": {
143
+ "content": "<|reserved_special_token_12|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": true
149
+ },
150
+ "128018": {
151
+ "content": "<|reserved_special_token_13|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": true
157
+ },
158
+ "128019": {
159
+ "content": "<|reserved_special_token_14|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": true
165
+ },
166
+ "128020": {
167
+ "content": "<|reserved_special_token_15|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": true
173
+ },
174
+ "128021": {
175
+ "content": "<|reserved_special_token_16|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": true
181
+ },
182
+ "128022": {
183
+ "content": "<|reserved_special_token_17|>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ },
190
+ "128023": {
191
+ "content": "<|reserved_special_token_18|>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "128024": {
199
+ "content": "<|reserved_special_token_19|>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "128025": {
207
+ "content": "<|reserved_special_token_20|>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "128026": {
215
+ "content": "<|reserved_special_token_21|>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "128027": {
223
+ "content": "<|reserved_special_token_22|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "128028": {
231
+ "content": "<|reserved_special_token_23|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "128029": {
239
+ "content": "<|reserved_special_token_24|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "128030": {
247
+ "content": "<|reserved_special_token_25|>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ },
254
+ "128031": {
255
+ "content": "<|reserved_special_token_26|>",
256
+ "lstrip": false,
257
+ "normalized": false,
258
+ "rstrip": false,
259
+ "single_word": false,
260
+ "special": true
261
+ },
262
+ "128032": {
263
+ "content": "<|reserved_special_token_27|>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false,
268
+ "special": true
269
+ },
270
+ "128033": {
271
+ "content": "<|reserved_special_token_28|>",
272
+ "lstrip": false,
273
+ "normalized": false,
274
+ "rstrip": false,
275
+ "single_word": false,
276
+ "special": true
277
+ },
278
+ "128034": {
279
+ "content": "<|reserved_special_token_29|>",
280
+ "lstrip": false,
281
+ "normalized": false,
282
+ "rstrip": false,
283
+ "single_word": false,
284
+ "special": true
285
+ },
286
+ "128035": {
287
+ "content": "<|reserved_special_token_30|>",
288
+ "lstrip": false,
289
+ "normalized": false,
290
+ "rstrip": false,
291
+ "single_word": false,
292
+ "special": true
293
+ },
294
+ "128036": {
295
+ "content": "<|reserved_special_token_31|>",
296
+ "lstrip": false,
297
+ "normalized": false,
298
+ "rstrip": false,
299
+ "single_word": false,
300
+ "special": true
301
+ },
302
+ "128037": {
303
+ "content": "<|reserved_special_token_32|>",
304
+ "lstrip": false,
305
+ "normalized": false,
306
+ "rstrip": false,
307
+ "single_word": false,
308
+ "special": true
309
+ },
310
+ "128038": {
311
+ "content": "<|reserved_special_token_33|>",
312
+ "lstrip": false,
313
+ "normalized": false,
314
+ "rstrip": false,
315
+ "single_word": false,
316
+ "special": true
317
+ },
318
+ "128039": {
319
+ "content": "<|reserved_special_token_34|>",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false,
324
+ "special": true
325
+ },
326
+ "128040": {
327
+ "content": "<|reserved_special_token_35|>",
328
+ "lstrip": false,
329
+ "normalized": false,
330
+ "rstrip": false,
331
+ "single_word": false,
332
+ "special": true
333
+ },
334
+ "128041": {
335
+ "content": "<|reserved_special_token_36|>",
336
+ "lstrip": false,
337
+ "normalized": false,
338
+ "rstrip": false,
339
+ "single_word": false,
340
+ "special": true
341
+ },
342
+ "128042": {
343
+ "content": "<|reserved_special_token_37|>",
344
+ "lstrip": false,
345
+ "normalized": false,
346
+ "rstrip": false,
347
+ "single_word": false,
348
+ "special": true
349
+ },
350
+ "128043": {
351
+ "content": "<|reserved_special_token_38|>",
352
+ "lstrip": false,
353
+ "normalized": false,
354
+ "rstrip": false,
355
+ "single_word": false,
356
+ "special": true
357
+ },
358
+ "128044": {
359
+ "content": "<|reserved_special_token_39|>",
360
+ "lstrip": false,
361
+ "normalized": false,
362
+ "rstrip": false,
363
+ "single_word": false,
364
+ "special": true
365
+ },
366
+ "128045": {
367
+ "content": "<|reserved_special_token_40|>",
368
+ "lstrip": false,
369
+ "normalized": false,
370
+ "rstrip": false,
371
+ "single_word": false,
372
+ "special": true
373
+ },
374
+ "128046": {
375
+ "content": "<|reserved_special_token_41|>",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false,
380
+ "special": true
381
+ },
382
+ "128047": {
383
+ "content": "<|reserved_special_token_42|>",
384
+ "lstrip": false,
385
+ "normalized": false,
386
+ "rstrip": false,
387
+ "single_word": false,
388
+ "special": true
389
+ },
390
+ "128048": {
391
+ "content": "<|reserved_special_token_43|>",
392
+ "lstrip": false,
393
+ "normalized": false,
394
+ "rstrip": false,
395
+ "single_word": false,
396
+ "special": true
397
+ },
398
+ "128049": {
399
+ "content": "<|reserved_special_token_44|>",
400
+ "lstrip": false,
401
+ "normalized": false,
402
+ "rstrip": false,
403
+ "single_word": false,
404
+ "special": true
405
+ },
406
+ "128050": {
407
+ "content": "<|reserved_special_token_45|>",
408
+ "lstrip": false,
409
+ "normalized": false,
410
+ "rstrip": false,
411
+ "single_word": false,
412
+ "special": true
413
+ },
414
+ "128051": {
415
+ "content": "<|reserved_special_token_46|>",
416
+ "lstrip": false,
417
+ "normalized": false,
418
+ "rstrip": false,
419
+ "single_word": false,
420
+ "special": true
421
+ },
422
+ "128052": {
423
+ "content": "<|reserved_special_token_47|>",
424
+ "lstrip": false,
425
+ "normalized": false,
426
+ "rstrip": false,
427
+ "single_word": false,
428
+ "special": true
429
+ },
430
+ "128053": {
431
+ "content": "<|reserved_special_token_48|>",
432
+ "lstrip": false,
433
+ "normalized": false,
434
+ "rstrip": false,
435
+ "single_word": false,
436
+ "special": true
437
+ },
438
+ "128054": {
439
+ "content": "<|reserved_special_token_49|>",
440
+ "lstrip": false,
441
+ "normalized": false,
442
+ "rstrip": false,
443
+ "single_word": false,
444
+ "special": true
445
+ },
446
+ "128055": {
447
+ "content": "<|reserved_special_token_50|>",
448
+ "lstrip": false,
449
+ "normalized": false,
450
+ "rstrip": false,
451
+ "single_word": false,
452
+ "special": true
453
+ },
454
+ "128056": {
455
+ "content": "<|reserved_special_token_51|>",
456
+ "lstrip": false,
457
+ "normalized": false,
458
+ "rstrip": false,
459
+ "single_word": false,
460
+ "special": true
461
+ },
462
+ "128057": {
463
+ "content": "<|reserved_special_token_52|>",
464
+ "lstrip": false,
465
+ "normalized": false,
466
+ "rstrip": false,
467
+ "single_word": false,
468
+ "special": true
469
+ },
470
+ "128058": {
471
+ "content": "<|reserved_special_token_53|>",
472
+ "lstrip": false,
473
+ "normalized": false,
474
+ "rstrip": false,
475
+ "single_word": false,
476
+ "special": true
477
+ },
478
+ "128059": {
479
+ "content": "<|reserved_special_token_54|>",
480
+ "lstrip": false,
481
+ "normalized": false,
482
+ "rstrip": false,
483
+ "single_word": false,
484
+ "special": true
485
+ },
486
+ "128060": {
487
+ "content": "<|reserved_special_token_55|>",
488
+ "lstrip": false,
489
+ "normalized": false,
490
+ "rstrip": false,
491
+ "single_word": false,
492
+ "special": true
493
+ },
494
+ "128061": {
495
+ "content": "<|reserved_special_token_56|>",
496
+ "lstrip": false,
497
+ "normalized": false,
498
+ "rstrip": false,
499
+ "single_word": false,
500
+ "special": true
501
+ },
502
+ "128062": {
503
+ "content": "<|reserved_special_token_57|>",
504
+ "lstrip": false,
505
+ "normalized": false,
506
+ "rstrip": false,
507
+ "single_word": false,
508
+ "special": true
509
+ },
510
+ "128063": {
511
+ "content": "<|reserved_special_token_58|>",
512
+ "lstrip": false,
513
+ "normalized": false,
514
+ "rstrip": false,
515
+ "single_word": false,
516
+ "special": true
517
+ },
518
+ "128064": {
519
+ "content": "<|reserved_special_token_59|>",
520
+ "lstrip": false,
521
+ "normalized": false,
522
+ "rstrip": false,
523
+ "single_word": false,
524
+ "special": true
525
+ },
526
+ "128065": {
527
+ "content": "<|reserved_special_token_60|>",
528
+ "lstrip": false,
529
+ "normalized": false,
530
+ "rstrip": false,
531
+ "single_word": false,
532
+ "special": true
533
+ },
534
+ "128066": {
535
+ "content": "<|reserved_special_token_61|>",
536
+ "lstrip": false,
537
+ "normalized": false,
538
+ "rstrip": false,
539
+ "single_word": false,
540
+ "special": true
541
+ },
542
+ "128067": {
543
+ "content": "<|reserved_special_token_62|>",
544
+ "lstrip": false,
545
+ "normalized": false,
546
+ "rstrip": false,
547
+ "single_word": false,
548
+ "special": true
549
+ },
550
+ "128068": {
551
+ "content": "<|reserved_special_token_63|>",
552
+ "lstrip": false,
553
+ "normalized": false,
554
+ "rstrip": false,
555
+ "single_word": false,
556
+ "special": true
557
+ },
558
+ "128069": {
559
+ "content": "<|reserved_special_token_64|>",
560
+ "lstrip": false,
561
+ "normalized": false,
562
+ "rstrip": false,
563
+ "single_word": false,
564
+ "special": true
565
+ },
566
+ "128070": {
567
+ "content": "<|reserved_special_token_65|>",
568
+ "lstrip": false,
569
+ "normalized": false,
570
+ "rstrip": false,
571
+ "single_word": false,
572
+ "special": true
573
+ },
574
+ "128071": {
575
+ "content": "<|reserved_special_token_66|>",
576
+ "lstrip": false,
577
+ "normalized": false,
578
+ "rstrip": false,
579
+ "single_word": false,
580
+ "special": true
581
+ },
582
+ "128072": {
583
+ "content": "<|reserved_special_token_67|>",
584
+ "lstrip": false,
585
+ "normalized": false,
586
+ "rstrip": false,
587
+ "single_word": false,
588
+ "special": true
589
+ },
590
+ "128073": {
591
+ "content": "<|reserved_special_token_68|>",
592
+ "lstrip": false,
593
+ "normalized": false,
594
+ "rstrip": false,
595
+ "single_word": false,
596
+ "special": true
597
+ },
598
+ "128074": {
599
+ "content": "<|reserved_special_token_69|>",
600
+ "lstrip": false,
601
+ "normalized": false,
602
+ "rstrip": false,
603
+ "single_word": false,
604
+ "special": true
605
+ },
606
+ "128075": {
607
+ "content": "<|reserved_special_token_70|>",
608
+ "lstrip": false,
609
+ "normalized": false,
610
+ "rstrip": false,
611
+ "single_word": false,
612
+ "special": true
613
+ },
614
+ "128076": {
615
+ "content": "<|reserved_special_token_71|>",
616
+ "lstrip": false,
617
+ "normalized": false,
618
+ "rstrip": false,
619
+ "single_word": false,
620
+ "special": true
621
+ },
622
+ "128077": {
623
+ "content": "<|reserved_special_token_72|>",
624
+ "lstrip": false,
625
+ "normalized": false,
626
+ "rstrip": false,
627
+ "single_word": false,
628
+ "special": true
629
+ },
630
+ "128078": {
631
+ "content": "<|reserved_special_token_73|>",
632
+ "lstrip": false,
633
+ "normalized": false,
634
+ "rstrip": false,
635
+ "single_word": false,
636
+ "special": true
637
+ },
638
+ "128079": {
639
+ "content": "<|reserved_special_token_74|>",
640
+ "lstrip": false,
641
+ "normalized": false,
642
+ "rstrip": false,
643
+ "single_word": false,
644
+ "special": true
645
+ },
646
+ "128080": {
647
+ "content": "<|reserved_special_token_75|>",
648
+ "lstrip": false,
649
+ "normalized": false,
650
+ "rstrip": false,
651
+ "single_word": false,
652
+ "special": true
653
+ },
654
+ "128081": {
655
+ "content": "<|reserved_special_token_76|>",
656
+ "lstrip": false,
657
+ "normalized": false,
658
+ "rstrip": false,
659
+ "single_word": false,
660
+ "special": true
661
+ },
662
+ "128082": {
663
+ "content": "<|reserved_special_token_77|>",
664
+ "lstrip": false,
665
+ "normalized": false,
666
+ "rstrip": false,
667
+ "single_word": false,
668
+ "special": true
669
+ },
670
+ "128083": {
671
+ "content": "<|reserved_special_token_78|>",
672
+ "lstrip": false,
673
+ "normalized": false,
674
+ "rstrip": false,
675
+ "single_word": false,
676
+ "special": true
677
+ },
678
+ "128084": {
679
+ "content": "<|reserved_special_token_79|>",
680
+ "lstrip": false,
681
+ "normalized": false,
682
+ "rstrip": false,
683
+ "single_word": false,
684
+ "special": true
685
+ },
686
+ "128085": {
687
+ "content": "<|reserved_special_token_80|>",
688
+ "lstrip": false,
689
+ "normalized": false,
690
+ "rstrip": false,
691
+ "single_word": false,
692
+ "special": true
693
+ },
694
+ "128086": {
695
+ "content": "<|reserved_special_token_81|>",
696
+ "lstrip": false,
697
+ "normalized": false,
698
+ "rstrip": false,
699
+ "single_word": false,
700
+ "special": true
701
+ },
702
+ "128087": {
703
+ "content": "<|reserved_special_token_82|>",
704
+ "lstrip": false,
705
+ "normalized": false,
706
+ "rstrip": false,
707
+ "single_word": false,
708
+ "special": true
709
+ },
710
+ "128088": {
711
+ "content": "<|reserved_special_token_83|>",
712
+ "lstrip": false,
713
+ "normalized": false,
714
+ "rstrip": false,
715
+ "single_word": false,
716
+ "special": true
717
+ },
718
+ "128089": {
719
+ "content": "<|reserved_special_token_84|>",
720
+ "lstrip": false,
721
+ "normalized": false,
722
+ "rstrip": false,
723
+ "single_word": false,
724
+ "special": true
725
+ },
726
+ "128090": {
727
+ "content": "<|reserved_special_token_85|>",
728
+ "lstrip": false,
729
+ "normalized": false,
730
+ "rstrip": false,
731
+ "single_word": false,
732
+ "special": true
733
+ },
734
+ "128091": {
735
+ "content": "<|reserved_special_token_86|>",
736
+ "lstrip": false,
737
+ "normalized": false,
738
+ "rstrip": false,
739
+ "single_word": false,
740
+ "special": true
741
+ },
742
+ "128092": {
743
+ "content": "<|reserved_special_token_87|>",
744
+ "lstrip": false,
745
+ "normalized": false,
746
+ "rstrip": false,
747
+ "single_word": false,
748
+ "special": true
749
+ },
750
+ "128093": {
751
+ "content": "<|reserved_special_token_88|>",
752
+ "lstrip": false,
753
+ "normalized": false,
754
+ "rstrip": false,
755
+ "single_word": false,
756
+ "special": true
757
+ },
758
+ "128094": {
759
+ "content": "<|reserved_special_token_89|>",
760
+ "lstrip": false,
761
+ "normalized": false,
762
+ "rstrip": false,
763
+ "single_word": false,
764
+ "special": true
765
+ },
766
+ "128095": {
767
+ "content": "<|reserved_special_token_90|>",
768
+ "lstrip": false,
769
+ "normalized": false,
770
+ "rstrip": false,
771
+ "single_word": false,
772
+ "special": true
773
+ },
774
+ "128096": {
775
+ "content": "<|reserved_special_token_91|>",
776
+ "lstrip": false,
777
+ "normalized": false,
778
+ "rstrip": false,
779
+ "single_word": false,
780
+ "special": true
781
+ },
782
+ "128097": {
783
+ "content": "<|reserved_special_token_92|>",
784
+ "lstrip": false,
785
+ "normalized": false,
786
+ "rstrip": false,
787
+ "single_word": false,
788
+ "special": true
789
+ },
790
+ "128098": {
791
+ "content": "<|reserved_special_token_93|>",
792
+ "lstrip": false,
793
+ "normalized": false,
794
+ "rstrip": false,
795
+ "single_word": false,
796
+ "special": true
797
+ },
798
+ "128099": {
799
+ "content": "<|reserved_special_token_94|>",
800
+ "lstrip": false,
801
+ "normalized": false,
802
+ "rstrip": false,
803
+ "single_word": false,
804
+ "special": true
805
+ },
806
+ "128100": {
807
+ "content": "<|reserved_special_token_95|>",
808
+ "lstrip": false,
809
+ "normalized": false,
810
+ "rstrip": false,
811
+ "single_word": false,
812
+ "special": true
813
+ },
814
+ "128101": {
815
+ "content": "<|reserved_special_token_96|>",
816
+ "lstrip": false,
817
+ "normalized": false,
818
+ "rstrip": false,
819
+ "single_word": false,
820
+ "special": true
821
+ },
822
+ "128102": {
823
+ "content": "<|reserved_special_token_97|>",
824
+ "lstrip": false,
825
+ "normalized": false,
826
+ "rstrip": false,
827
+ "single_word": false,
828
+ "special": true
829
+ },
830
+ "128103": {
831
+ "content": "<|reserved_special_token_98|>",
832
+ "lstrip": false,
833
+ "normalized": false,
834
+ "rstrip": false,
835
+ "single_word": false,
836
+ "special": true
837
+ },
838
+ "128104": {
839
+ "content": "<|reserved_special_token_99|>",
840
+ "lstrip": false,
841
+ "normalized": false,
842
+ "rstrip": false,
843
+ "single_word": false,
844
+ "special": true
845
+ },
846
+ "128105": {
847
+ "content": "<|reserved_special_token_100|>",
848
+ "lstrip": false,
849
+ "normalized": false,
850
+ "rstrip": false,
851
+ "single_word": false,
852
+ "special": true
853
+ },
854
+ "128106": {
855
+ "content": "<|reserved_special_token_101|>",
856
+ "lstrip": false,
857
+ "normalized": false,
858
+ "rstrip": false,
859
+ "single_word": false,
860
+ "special": true
861
+ },
862
+ "128107": {
863
+ "content": "<|reserved_special_token_102|>",
864
+ "lstrip": false,
865
+ "normalized": false,
866
+ "rstrip": false,
867
+ "single_word": false,
868
+ "special": true
869
+ },
870
+ "128108": {
871
+ "content": "<|reserved_special_token_103|>",
872
+ "lstrip": false,
873
+ "normalized": false,
874
+ "rstrip": false,
875
+ "single_word": false,
876
+ "special": true
877
+ },
878
+ "128109": {
879
+ "content": "<|reserved_special_token_104|>",
880
+ "lstrip": false,
881
+ "normalized": false,
882
+ "rstrip": false,
883
+ "single_word": false,
884
+ "special": true
885
+ },
886
+ "128110": {
887
+ "content": "<|reserved_special_token_105|>",
888
+ "lstrip": false,
889
+ "normalized": false,
890
+ "rstrip": false,
891
+ "single_word": false,
892
+ "special": true
893
+ },
894
+ "128111": {
895
+ "content": "<|reserved_special_token_106|>",
896
+ "lstrip": false,
897
+ "normalized": false,
898
+ "rstrip": false,
899
+ "single_word": false,
900
+ "special": true
901
+ },
902
+ "128112": {
903
+ "content": "<|reserved_special_token_107|>",
904
+ "lstrip": false,
905
+ "normalized": false,
906
+ "rstrip": false,
907
+ "single_word": false,
908
+ "special": true
909
+ },
910
+ "128113": {
911
+ "content": "<|reserved_special_token_108|>",
912
+ "lstrip": false,
913
+ "normalized": false,
914
+ "rstrip": false,
915
+ "single_word": false,
916
+ "special": true
917
+ },
918
+ "128114": {
919
+ "content": "<|reserved_special_token_109|>",
920
+ "lstrip": false,
921
+ "normalized": false,
922
+ "rstrip": false,
923
+ "single_word": false,
924
+ "special": true
925
+ },
926
+ "128115": {
927
+ "content": "<|reserved_special_token_110|>",
928
+ "lstrip": false,
929
+ "normalized": false,
930
+ "rstrip": false,
931
+ "single_word": false,
932
+ "special": true
933
+ },
934
+ "128116": {
935
+ "content": "<|reserved_special_token_111|>",
936
+ "lstrip": false,
937
+ "normalized": false,
938
+ "rstrip": false,
939
+ "single_word": false,
940
+ "special": true
941
+ },
942
+ "128117": {
943
+ "content": "<|reserved_special_token_112|>",
944
+ "lstrip": false,
945
+ "normalized": false,
946
+ "rstrip": false,
947
+ "single_word": false,
948
+ "special": true
949
+ },
950
+ "128118": {
951
+ "content": "<|reserved_special_token_113|>",
952
+ "lstrip": false,
953
+ "normalized": false,
954
+ "rstrip": false,
955
+ "single_word": false,
956
+ "special": true
957
+ },
958
+ "128119": {
959
+ "content": "<|reserved_special_token_114|>",
960
+ "lstrip": false,
961
+ "normalized": false,
962
+ "rstrip": false,
963
+ "single_word": false,
964
+ "special": true
965
+ },
966
+ "128120": {
967
+ "content": "<|reserved_special_token_115|>",
968
+ "lstrip": false,
969
+ "normalized": false,
970
+ "rstrip": false,
971
+ "single_word": false,
972
+ "special": true
973
+ },
974
+ "128121": {
975
+ "content": "<|reserved_special_token_116|>",
976
+ "lstrip": false,
977
+ "normalized": false,
978
+ "rstrip": false,
979
+ "single_word": false,
980
+ "special": true
981
+ },
982
+ "128122": {
983
+ "content": "<|reserved_special_token_117|>",
984
+ "lstrip": false,
985
+ "normalized": false,
986
+ "rstrip": false,
987
+ "single_word": false,
988
+ "special": true
989
+ },
990
+ "128123": {
991
+ "content": "<|reserved_special_token_118|>",
992
+ "lstrip": false,
993
+ "normalized": false,
994
+ "rstrip": false,
995
+ "single_word": false,
996
+ "special": true
997
+ },
998
+ "128124": {
999
+ "content": "<|reserved_special_token_119|>",
1000
+ "lstrip": false,
1001
+ "normalized": false,
1002
+ "rstrip": false,
1003
+ "single_word": false,
1004
+ "special": true
1005
+ },
1006
+ "128125": {
1007
+ "content": "<|reserved_special_token_120|>",
1008
+ "lstrip": false,
1009
+ "normalized": false,
1010
+ "rstrip": false,
1011
+ "single_word": false,
1012
+ "special": true
1013
+ },
1014
+ "128126": {
1015
+ "content": "<|reserved_special_token_121|>",
1016
+ "lstrip": false,
1017
+ "normalized": false,
1018
+ "rstrip": false,
1019
+ "single_word": false,
1020
+ "special": true
1021
+ },
1022
+ "128127": {
1023
+ "content": "<|reserved_special_token_122|>",
1024
+ "lstrip": false,
1025
+ "normalized": false,
1026
+ "rstrip": false,
1027
+ "single_word": false,
1028
+ "special": true
1029
+ },
1030
+ "128128": {
1031
+ "content": "<|reserved_special_token_123|>",
1032
+ "lstrip": false,
1033
+ "normalized": false,
1034
+ "rstrip": false,
1035
+ "single_word": false,
1036
+ "special": true
1037
+ },
1038
+ "128129": {
1039
+ "content": "<|reserved_special_token_124|>",
1040
+ "lstrip": false,
1041
+ "normalized": false,
1042
+ "rstrip": false,
1043
+ "single_word": false,
1044
+ "special": true
1045
+ },
1046
+ "128130": {
1047
+ "content": "<|reserved_special_token_125|>",
1048
+ "lstrip": false,
1049
+ "normalized": false,
1050
+ "rstrip": false,
1051
+ "single_word": false,
1052
+ "special": true
1053
+ },
1054
+ "128131": {
1055
+ "content": "<|reserved_special_token_126|>",
1056
+ "lstrip": false,
1057
+ "normalized": false,
1058
+ "rstrip": false,
1059
+ "single_word": false,
1060
+ "special": true
1061
+ },
1062
+ "128132": {
1063
+ "content": "<|reserved_special_token_127|>",
1064
+ "lstrip": false,
1065
+ "normalized": false,
1066
+ "rstrip": false,
1067
+ "single_word": false,
1068
+ "special": true
1069
+ },
1070
+ "128133": {
1071
+ "content": "<|reserved_special_token_128|>",
1072
+ "lstrip": false,
1073
+ "normalized": false,
1074
+ "rstrip": false,
1075
+ "single_word": false,
1076
+ "special": true
1077
+ },
1078
+ "128134": {
1079
+ "content": "<|reserved_special_token_129|>",
1080
+ "lstrip": false,
1081
+ "normalized": false,
1082
+ "rstrip": false,
1083
+ "single_word": false,
1084
+ "special": true
1085
+ },
1086
+ "128135": {
1087
+ "content": "<|reserved_special_token_130|>",
1088
+ "lstrip": false,
1089
+ "normalized": false,
1090
+ "rstrip": false,
1091
+ "single_word": false,
1092
+ "special": true
1093
+ },
1094
+ "128136": {
1095
+ "content": "<|reserved_special_token_131|>",
1096
+ "lstrip": false,
1097
+ "normalized": false,
1098
+ "rstrip": false,
1099
+ "single_word": false,
1100
+ "special": true
1101
+ },
1102
+ "128137": {
1103
+ "content": "<|reserved_special_token_132|>",
1104
+ "lstrip": false,
1105
+ "normalized": false,
1106
+ "rstrip": false,
1107
+ "single_word": false,
1108
+ "special": true
1109
+ },
1110
+ "128138": {
1111
+ "content": "<|reserved_special_token_133|>",
1112
+ "lstrip": false,
1113
+ "normalized": false,
1114
+ "rstrip": false,
1115
+ "single_word": false,
1116
+ "special": true
1117
+ },
1118
+ "128139": {
1119
+ "content": "<|reserved_special_token_134|>",
1120
+ "lstrip": false,
1121
+ "normalized": false,
1122
+ "rstrip": false,
1123
+ "single_word": false,
1124
+ "special": true
1125
+ },
1126
+ "128140": {
1127
+ "content": "<|reserved_special_token_135|>",
1128
+ "lstrip": false,
1129
+ "normalized": false,
1130
+ "rstrip": false,
1131
+ "single_word": false,
1132
+ "special": true
1133
+ },
1134
+ "128141": {
1135
+ "content": "<|reserved_special_token_136|>",
1136
+ "lstrip": false,
1137
+ "normalized": false,
1138
+ "rstrip": false,
1139
+ "single_word": false,
1140
+ "special": true
1141
+ },
1142
+ "128142": {
1143
+ "content": "<|reserved_special_token_137|>",
1144
+ "lstrip": false,
1145
+ "normalized": false,
1146
+ "rstrip": false,
1147
+ "single_word": false,
1148
+ "special": true
1149
+ },
1150
+ "128143": {
1151
+ "content": "<|reserved_special_token_138|>",
1152
+ "lstrip": false,
1153
+ "normalized": false,
1154
+ "rstrip": false,
1155
+ "single_word": false,
1156
+ "special": true
1157
+ },
1158
+ "128144": {
1159
+ "content": "<|reserved_special_token_139|>",
1160
+ "lstrip": false,
1161
+ "normalized": false,
1162
+ "rstrip": false,
1163
+ "single_word": false,
1164
+ "special": true
1165
+ },
1166
+ "128145": {
1167
+ "content": "<|reserved_special_token_140|>",
1168
+ "lstrip": false,
1169
+ "normalized": false,
1170
+ "rstrip": false,
1171
+ "single_word": false,
1172
+ "special": true
1173
+ },
1174
+ "128146": {
1175
+ "content": "<|reserved_special_token_141|>",
1176
+ "lstrip": false,
1177
+ "normalized": false,
1178
+ "rstrip": false,
1179
+ "single_word": false,
1180
+ "special": true
1181
+ },
1182
+ "128147": {
1183
+ "content": "<|reserved_special_token_142|>",
1184
+ "lstrip": false,
1185
+ "normalized": false,
1186
+ "rstrip": false,
1187
+ "single_word": false,
1188
+ "special": true
1189
+ },
1190
+ "128148": {
1191
+ "content": "<|reserved_special_token_143|>",
1192
+ "lstrip": false,
1193
+ "normalized": false,
1194
+ "rstrip": false,
1195
+ "single_word": false,
1196
+ "special": true
1197
+ },
1198
+ "128149": {
1199
+ "content": "<|reserved_special_token_144|>",
1200
+ "lstrip": false,
1201
+ "normalized": false,
1202
+ "rstrip": false,
1203
+ "single_word": false,
1204
+ "special": true
1205
+ },
1206
+ "128150": {
1207
+ "content": "<|reserved_special_token_145|>",
1208
+ "lstrip": false,
1209
+ "normalized": false,
1210
+ "rstrip": false,
1211
+ "single_word": false,
1212
+ "special": true
1213
+ },
1214
+ "128151": {
1215
+ "content": "<|reserved_special_token_146|>",
1216
+ "lstrip": false,
1217
+ "normalized": false,
1218
+ "rstrip": false,
1219
+ "single_word": false,
1220
+ "special": true
1221
+ },
1222
+ "128152": {
1223
+ "content": "<|reserved_special_token_147|>",
1224
+ "lstrip": false,
1225
+ "normalized": false,
1226
+ "rstrip": false,
1227
+ "single_word": false,
1228
+ "special": true
1229
+ },
1230
+ "128153": {
1231
+ "content": "<|reserved_special_token_148|>",
1232
+ "lstrip": false,
1233
+ "normalized": false,
1234
+ "rstrip": false,
1235
+ "single_word": false,
1236
+ "special": true
1237
+ },
1238
+ "128154": {
1239
+ "content": "<|reserved_special_token_149|>",
1240
+ "lstrip": false,
1241
+ "normalized": false,
1242
+ "rstrip": false,
1243
+ "single_word": false,
1244
+ "special": true
1245
+ },
1246
+ "128155": {
1247
+ "content": "<|reserved_special_token_150|>",
1248
+ "lstrip": false,
1249
+ "normalized": false,
1250
+ "rstrip": false,
1251
+ "single_word": false,
1252
+ "special": true
1253
+ },
1254
+ "128156": {
1255
+ "content": "<|reserved_special_token_151|>",
1256
+ "lstrip": false,
1257
+ "normalized": false,
1258
+ "rstrip": false,
1259
+ "single_word": false,
1260
+ "special": true
1261
+ },
1262
+ "128157": {
1263
+ "content": "<|reserved_special_token_152|>",
1264
+ "lstrip": false,
1265
+ "normalized": false,
1266
+ "rstrip": false,
1267
+ "single_word": false,
1268
+ "special": true
1269
+ },
1270
+ "128158": {
1271
+ "content": "<|reserved_special_token_153|>",
1272
+ "lstrip": false,
1273
+ "normalized": false,
1274
+ "rstrip": false,
1275
+ "single_word": false,
1276
+ "special": true
1277
+ },
1278
+ "128159": {
1279
+ "content": "<|reserved_special_token_154|>",
1280
+ "lstrip": false,
1281
+ "normalized": false,
1282
+ "rstrip": false,
1283
+ "single_word": false,
1284
+ "special": true
1285
+ },
1286
+ "128160": {
1287
+ "content": "<|reserved_special_token_155|>",
1288
+ "lstrip": false,
1289
+ "normalized": false,
1290
+ "rstrip": false,
1291
+ "single_word": false,
1292
+ "special": true
1293
+ },
1294
+ "128161": {
1295
+ "content": "<|reserved_special_token_156|>",
1296
+ "lstrip": false,
1297
+ "normalized": false,
1298
+ "rstrip": false,
1299
+ "single_word": false,
1300
+ "special": true
1301
+ },
1302
+ "128162": {
1303
+ "content": "<|reserved_special_token_157|>",
1304
+ "lstrip": false,
1305
+ "normalized": false,
1306
+ "rstrip": false,
1307
+ "single_word": false,
1308
+ "special": true
1309
+ },
1310
+ "128163": {
1311
+ "content": "<|reserved_special_token_158|>",
1312
+ "lstrip": false,
1313
+ "normalized": false,
1314
+ "rstrip": false,
1315
+ "single_word": false,
1316
+ "special": true
1317
+ },
1318
+ "128164": {
1319
+ "content": "<|reserved_special_token_159|>",
1320
+ "lstrip": false,
1321
+ "normalized": false,
1322
+ "rstrip": false,
1323
+ "single_word": false,
1324
+ "special": true
1325
+ },
1326
+ "128165": {
1327
+ "content": "<|reserved_special_token_160|>",
1328
+ "lstrip": false,
1329
+ "normalized": false,
1330
+ "rstrip": false,
1331
+ "single_word": false,
1332
+ "special": true
1333
+ },
1334
+ "128166": {
1335
+ "content": "<|reserved_special_token_161|>",
1336
+ "lstrip": false,
1337
+ "normalized": false,
1338
+ "rstrip": false,
1339
+ "single_word": false,
1340
+ "special": true
1341
+ },
1342
+ "128167": {
1343
+ "content": "<|reserved_special_token_162|>",
1344
+ "lstrip": false,
1345
+ "normalized": false,
1346
+ "rstrip": false,
1347
+ "single_word": false,
1348
+ "special": true
1349
+ },
1350
+ "128168": {
1351
+ "content": "<|reserved_special_token_163|>",
1352
+ "lstrip": false,
1353
+ "normalized": false,
1354
+ "rstrip": false,
1355
+ "single_word": false,
1356
+ "special": true
1357
+ },
1358
+ "128169": {
1359
+ "content": "<|reserved_special_token_164|>",
1360
+ "lstrip": false,
1361
+ "normalized": false,
1362
+ "rstrip": false,
1363
+ "single_word": false,
1364
+ "special": true
1365
+ },
1366
+ "128170": {
1367
+ "content": "<|reserved_special_token_165|>",
1368
+ "lstrip": false,
1369
+ "normalized": false,
1370
+ "rstrip": false,
1371
+ "single_word": false,
1372
+ "special": true
1373
+ },
1374
+ "128171": {
1375
+ "content": "<|reserved_special_token_166|>",
1376
+ "lstrip": false,
1377
+ "normalized": false,
1378
+ "rstrip": false,
1379
+ "single_word": false,
1380
+ "special": true
1381
+ },
1382
+ "128172": {
1383
+ "content": "<|reserved_special_token_167|>",
1384
+ "lstrip": false,
1385
+ "normalized": false,
1386
+ "rstrip": false,
1387
+ "single_word": false,
1388
+ "special": true
1389
+ },
1390
+ "128173": {
1391
+ "content": "<|reserved_special_token_168|>",
1392
+ "lstrip": false,
1393
+ "normalized": false,
1394
+ "rstrip": false,
1395
+ "single_word": false,
1396
+ "special": true
1397
+ },
1398
+ "128174": {
1399
+ "content": "<|reserved_special_token_169|>",
1400
+ "lstrip": false,
1401
+ "normalized": false,
1402
+ "rstrip": false,
1403
+ "single_word": false,
1404
+ "special": true
1405
+ },
1406
+ "128175": {
1407
+ "content": "<|reserved_special_token_170|>",
1408
+ "lstrip": false,
1409
+ "normalized": false,
1410
+ "rstrip": false,
1411
+ "single_word": false,
1412
+ "special": true
1413
+ },
1414
+ "128176": {
1415
+ "content": "<|reserved_special_token_171|>",
1416
+ "lstrip": false,
1417
+ "normalized": false,
1418
+ "rstrip": false,
1419
+ "single_word": false,
1420
+ "special": true
1421
+ },
1422
+ "128177": {
1423
+ "content": "<|reserved_special_token_172|>",
1424
+ "lstrip": false,
1425
+ "normalized": false,
1426
+ "rstrip": false,
1427
+ "single_word": false,
1428
+ "special": true
1429
+ },
1430
+ "128178": {
1431
+ "content": "<|reserved_special_token_173|>",
1432
+ "lstrip": false,
1433
+ "normalized": false,
1434
+ "rstrip": false,
1435
+ "single_word": false,
1436
+ "special": true
1437
+ },
1438
+ "128179": {
1439
+ "content": "<|reserved_special_token_174|>",
1440
+ "lstrip": false,
1441
+ "normalized": false,
1442
+ "rstrip": false,
1443
+ "single_word": false,
1444
+ "special": true
1445
+ },
1446
+ "128180": {
1447
+ "content": "<|reserved_special_token_175|>",
1448
+ "lstrip": false,
1449
+ "normalized": false,
1450
+ "rstrip": false,
1451
+ "single_word": false,
1452
+ "special": true
1453
+ },
1454
+ "128181": {
1455
+ "content": "<|reserved_special_token_176|>",
1456
+ "lstrip": false,
1457
+ "normalized": false,
1458
+ "rstrip": false,
1459
+ "single_word": false,
1460
+ "special": true
1461
+ },
1462
+ "128182": {
1463
+ "content": "<|reserved_special_token_177|>",
1464
+ "lstrip": false,
1465
+ "normalized": false,
1466
+ "rstrip": false,
1467
+ "single_word": false,
1468
+ "special": true
1469
+ },
1470
+ "128183": {
1471
+ "content": "<|reserved_special_token_178|>",
1472
+ "lstrip": false,
1473
+ "normalized": false,
1474
+ "rstrip": false,
1475
+ "single_word": false,
1476
+ "special": true
1477
+ },
1478
+ "128184": {
1479
+ "content": "<|reserved_special_token_179|>",
1480
+ "lstrip": false,
1481
+ "normalized": false,
1482
+ "rstrip": false,
1483
+ "single_word": false,
1484
+ "special": true
1485
+ },
1486
+ "128185": {
1487
+ "content": "<|reserved_special_token_180|>",
1488
+ "lstrip": false,
1489
+ "normalized": false,
1490
+ "rstrip": false,
1491
+ "single_word": false,
1492
+ "special": true
1493
+ },
1494
+ "128186": {
1495
+ "content": "<|reserved_special_token_181|>",
1496
+ "lstrip": false,
1497
+ "normalized": false,
1498
+ "rstrip": false,
1499
+ "single_word": false,
1500
+ "special": true
1501
+ },
1502
+ "128187": {
1503
+ "content": "<|reserved_special_token_182|>",
1504
+ "lstrip": false,
1505
+ "normalized": false,
1506
+ "rstrip": false,
1507
+ "single_word": false,
1508
+ "special": true
1509
+ },
1510
+ "128188": {
1511
+ "content": "<|reserved_special_token_183|>",
1512
+ "lstrip": false,
1513
+ "normalized": false,
1514
+ "rstrip": false,
1515
+ "single_word": false,
1516
+ "special": true
1517
+ },
1518
+ "128189": {
1519
+ "content": "<|reserved_special_token_184|>",
1520
+ "lstrip": false,
1521
+ "normalized": false,
1522
+ "rstrip": false,
1523
+ "single_word": false,
1524
+ "special": true
1525
+ },
1526
+ "128190": {
1527
+ "content": "<|reserved_special_token_185|>",
1528
+ "lstrip": false,
1529
+ "normalized": false,
1530
+ "rstrip": false,
1531
+ "single_word": false,
1532
+ "special": true
1533
+ },
1534
+ "128191": {
1535
+ "content": "<|reserved_special_token_186|>",
1536
+ "lstrip": false,
1537
+ "normalized": false,
1538
+ "rstrip": false,
1539
+ "single_word": false,
1540
+ "special": true
1541
+ },
1542
+ "128192": {
1543
+ "content": "<|reserved_special_token_187|>",
1544
+ "lstrip": false,
1545
+ "normalized": false,
1546
+ "rstrip": false,
1547
+ "single_word": false,
1548
+ "special": true
1549
+ },
1550
+ "128193": {
1551
+ "content": "<|reserved_special_token_188|>",
1552
+ "lstrip": false,
1553
+ "normalized": false,
1554
+ "rstrip": false,
1555
+ "single_word": false,
1556
+ "special": true
1557
+ },
1558
+ "128194": {
1559
+ "content": "<|reserved_special_token_189|>",
1560
+ "lstrip": false,
1561
+ "normalized": false,
1562
+ "rstrip": false,
1563
+ "single_word": false,
1564
+ "special": true
1565
+ },
1566
+ "128195": {
1567
+ "content": "<|reserved_special_token_190|>",
1568
+ "lstrip": false,
1569
+ "normalized": false,
1570
+ "rstrip": false,
1571
+ "single_word": false,
1572
+ "special": true
1573
+ },
1574
+ "128196": {
1575
+ "content": "<|reserved_special_token_191|>",
1576
+ "lstrip": false,
1577
+ "normalized": false,
1578
+ "rstrip": false,
1579
+ "single_word": false,
1580
+ "special": true
1581
+ },
1582
+ "128197": {
1583
+ "content": "<|reserved_special_token_192|>",
1584
+ "lstrip": false,
1585
+ "normalized": false,
1586
+ "rstrip": false,
1587
+ "single_word": false,
1588
+ "special": true
1589
+ },
1590
+ "128198": {
1591
+ "content": "<|reserved_special_token_193|>",
1592
+ "lstrip": false,
1593
+ "normalized": false,
1594
+ "rstrip": false,
1595
+ "single_word": false,
1596
+ "special": true
1597
+ },
1598
+ "128199": {
1599
+ "content": "<|reserved_special_token_194|>",
1600
+ "lstrip": false,
1601
+ "normalized": false,
1602
+ "rstrip": false,
1603
+ "single_word": false,
1604
+ "special": true
1605
+ },
1606
+ "128200": {
1607
+ "content": "<|reserved_special_token_195|>",
1608
+ "lstrip": false,
1609
+ "normalized": false,
1610
+ "rstrip": false,
1611
+ "single_word": false,
1612
+ "special": true
1613
+ },
1614
+ "128201": {
1615
+ "content": "<|reserved_special_token_196|>",
1616
+ "lstrip": false,
1617
+ "normalized": false,
1618
+ "rstrip": false,
1619
+ "single_word": false,
1620
+ "special": true
1621
+ },
1622
+ "128202": {
1623
+ "content": "<|reserved_special_token_197|>",
1624
+ "lstrip": false,
1625
+ "normalized": false,
1626
+ "rstrip": false,
1627
+ "single_word": false,
1628
+ "special": true
1629
+ },
1630
+ "128203": {
1631
+ "content": "<|reserved_special_token_198|>",
1632
+ "lstrip": false,
1633
+ "normalized": false,
1634
+ "rstrip": false,
1635
+ "single_word": false,
1636
+ "special": true
1637
+ },
1638
+ "128204": {
1639
+ "content": "<|reserved_special_token_199|>",
1640
+ "lstrip": false,
1641
+ "normalized": false,
1642
+ "rstrip": false,
1643
+ "single_word": false,
1644
+ "special": true
1645
+ },
1646
+ "128205": {
1647
+ "content": "<|reserved_special_token_200|>",
1648
+ "lstrip": false,
1649
+ "normalized": false,
1650
+ "rstrip": false,
1651
+ "single_word": false,
1652
+ "special": true
1653
+ },
1654
+ "128206": {
1655
+ "content": "<|reserved_special_token_201|>",
1656
+ "lstrip": false,
1657
+ "normalized": false,
1658
+ "rstrip": false,
1659
+ "single_word": false,
1660
+ "special": true
1661
+ },
1662
+ "128207": {
1663
+ "content": "<|reserved_special_token_202|>",
1664
+ "lstrip": false,
1665
+ "normalized": false,
1666
+ "rstrip": false,
1667
+ "single_word": false,
1668
+ "special": true
1669
+ },
1670
+ "128208": {
1671
+ "content": "<|reserved_special_token_203|>",
1672
+ "lstrip": false,
1673
+ "normalized": false,
1674
+ "rstrip": false,
1675
+ "single_word": false,
1676
+ "special": true
1677
+ },
1678
+ "128209": {
1679
+ "content": "<|reserved_special_token_204|>",
1680
+ "lstrip": false,
1681
+ "normalized": false,
1682
+ "rstrip": false,
1683
+ "single_word": false,
1684
+ "special": true
1685
+ },
1686
+ "128210": {
1687
+ "content": "<|reserved_special_token_205|>",
1688
+ "lstrip": false,
1689
+ "normalized": false,
1690
+ "rstrip": false,
1691
+ "single_word": false,
1692
+ "special": true
1693
+ },
1694
+ "128211": {
1695
+ "content": "<|reserved_special_token_206|>",
1696
+ "lstrip": false,
1697
+ "normalized": false,
1698
+ "rstrip": false,
1699
+ "single_word": false,
1700
+ "special": true
1701
+ },
1702
+ "128212": {
1703
+ "content": "<|reserved_special_token_207|>",
1704
+ "lstrip": false,
1705
+ "normalized": false,
1706
+ "rstrip": false,
1707
+ "single_word": false,
1708
+ "special": true
1709
+ },
1710
+ "128213": {
1711
+ "content": "<|reserved_special_token_208|>",
1712
+ "lstrip": false,
1713
+ "normalized": false,
1714
+ "rstrip": false,
1715
+ "single_word": false,
1716
+ "special": true
1717
+ },
1718
+ "128214": {
1719
+ "content": "<|reserved_special_token_209|>",
1720
+ "lstrip": false,
1721
+ "normalized": false,
1722
+ "rstrip": false,
1723
+ "single_word": false,
1724
+ "special": true
1725
+ },
1726
+ "128215": {
1727
+ "content": "<|reserved_special_token_210|>",
1728
+ "lstrip": false,
1729
+ "normalized": false,
1730
+ "rstrip": false,
1731
+ "single_word": false,
1732
+ "special": true
1733
+ },
1734
+ "128216": {
1735
+ "content": "<|reserved_special_token_211|>",
1736
+ "lstrip": false,
1737
+ "normalized": false,
1738
+ "rstrip": false,
1739
+ "single_word": false,
1740
+ "special": true
1741
+ },
1742
+ "128217": {
1743
+ "content": "<|reserved_special_token_212|>",
1744
+ "lstrip": false,
1745
+ "normalized": false,
1746
+ "rstrip": false,
1747
+ "single_word": false,
1748
+ "special": true
1749
+ },
1750
+ "128218": {
1751
+ "content": "<|reserved_special_token_213|>",
1752
+ "lstrip": false,
1753
+ "normalized": false,
1754
+ "rstrip": false,
1755
+ "single_word": false,
1756
+ "special": true
1757
+ },
1758
+ "128219": {
1759
+ "content": "<|reserved_special_token_214|>",
1760
+ "lstrip": false,
1761
+ "normalized": false,
1762
+ "rstrip": false,
1763
+ "single_word": false,
1764
+ "special": true
1765
+ },
1766
+ "128220": {
1767
+ "content": "<|reserved_special_token_215|>",
1768
+ "lstrip": false,
1769
+ "normalized": false,
1770
+ "rstrip": false,
1771
+ "single_word": false,
1772
+ "special": true
1773
+ },
1774
+ "128221": {
1775
+ "content": "<|reserved_special_token_216|>",
1776
+ "lstrip": false,
1777
+ "normalized": false,
1778
+ "rstrip": false,
1779
+ "single_word": false,
1780
+ "special": true
1781
+ },
1782
+ "128222": {
1783
+ "content": "<|reserved_special_token_217|>",
1784
+ "lstrip": false,
1785
+ "normalized": false,
1786
+ "rstrip": false,
1787
+ "single_word": false,
1788
+ "special": true
1789
+ },
1790
+ "128223": {
1791
+ "content": "<|reserved_special_token_218|>",
1792
+ "lstrip": false,
1793
+ "normalized": false,
1794
+ "rstrip": false,
1795
+ "single_word": false,
1796
+ "special": true
1797
+ },
1798
+ "128224": {
1799
+ "content": "<|reserved_special_token_219|>",
1800
+ "lstrip": false,
1801
+ "normalized": false,
1802
+ "rstrip": false,
1803
+ "single_word": false,
1804
+ "special": true
1805
+ },
1806
+ "128225": {
1807
+ "content": "<|reserved_special_token_220|>",
1808
+ "lstrip": false,
1809
+ "normalized": false,
1810
+ "rstrip": false,
1811
+ "single_word": false,
1812
+ "special": true
1813
+ },
1814
+ "128226": {
1815
+ "content": "<|reserved_special_token_221|>",
1816
+ "lstrip": false,
1817
+ "normalized": false,
1818
+ "rstrip": false,
1819
+ "single_word": false,
1820
+ "special": true
1821
+ },
1822
+ "128227": {
1823
+ "content": "<|reserved_special_token_222|>",
1824
+ "lstrip": false,
1825
+ "normalized": false,
1826
+ "rstrip": false,
1827
+ "single_word": false,
1828
+ "special": true
1829
+ },
1830
+ "128228": {
1831
+ "content": "<|reserved_special_token_223|>",
1832
+ "lstrip": false,
1833
+ "normalized": false,
1834
+ "rstrip": false,
1835
+ "single_word": false,
1836
+ "special": true
1837
+ },
1838
+ "128229": {
1839
+ "content": "<|reserved_special_token_224|>",
1840
+ "lstrip": false,
1841
+ "normalized": false,
1842
+ "rstrip": false,
1843
+ "single_word": false,
1844
+ "special": true
1845
+ },
1846
+ "128230": {
1847
+ "content": "<|reserved_special_token_225|>",
1848
+ "lstrip": false,
1849
+ "normalized": false,
1850
+ "rstrip": false,
1851
+ "single_word": false,
1852
+ "special": true
1853
+ },
1854
+ "128231": {
1855
+ "content": "<|reserved_special_token_226|>",
1856
+ "lstrip": false,
1857
+ "normalized": false,
1858
+ "rstrip": false,
1859
+ "single_word": false,
1860
+ "special": true
1861
+ },
1862
+ "128232": {
1863
+ "content": "<|reserved_special_token_227|>",
1864
+ "lstrip": false,
1865
+ "normalized": false,
1866
+ "rstrip": false,
1867
+ "single_word": false,
1868
+ "special": true
1869
+ },
1870
+ "128233": {
1871
+ "content": "<|reserved_special_token_228|>",
1872
+ "lstrip": false,
1873
+ "normalized": false,
1874
+ "rstrip": false,
1875
+ "single_word": false,
1876
+ "special": true
1877
+ },
1878
+ "128234": {
1879
+ "content": "<|reserved_special_token_229|>",
1880
+ "lstrip": false,
1881
+ "normalized": false,
1882
+ "rstrip": false,
1883
+ "single_word": false,
1884
+ "special": true
1885
+ },
1886
+ "128235": {
1887
+ "content": "<|reserved_special_token_230|>",
1888
+ "lstrip": false,
1889
+ "normalized": false,
1890
+ "rstrip": false,
1891
+ "single_word": false,
1892
+ "special": true
1893
+ },
1894
+ "128236": {
1895
+ "content": "<|reserved_special_token_231|>",
1896
+ "lstrip": false,
1897
+ "normalized": false,
1898
+ "rstrip": false,
1899
+ "single_word": false,
1900
+ "special": true
1901
+ },
1902
+ "128237": {
1903
+ "content": "<|reserved_special_token_232|>",
1904
+ "lstrip": false,
1905
+ "normalized": false,
1906
+ "rstrip": false,
1907
+ "single_word": false,
1908
+ "special": true
1909
+ },
1910
+ "128238": {
1911
+ "content": "<|reserved_special_token_233|>",
1912
+ "lstrip": false,
1913
+ "normalized": false,
1914
+ "rstrip": false,
1915
+ "single_word": false,
1916
+ "special": true
1917
+ },
1918
+ "128239": {
1919
+ "content": "<|reserved_special_token_234|>",
1920
+ "lstrip": false,
1921
+ "normalized": false,
1922
+ "rstrip": false,
1923
+ "single_word": false,
1924
+ "special": true
1925
+ },
1926
+ "128240": {
1927
+ "content": "<|reserved_special_token_235|>",
1928
+ "lstrip": false,
1929
+ "normalized": false,
1930
+ "rstrip": false,
1931
+ "single_word": false,
1932
+ "special": true
1933
+ },
1934
+ "128241": {
1935
+ "content": "<|reserved_special_token_236|>",
1936
+ "lstrip": false,
1937
+ "normalized": false,
1938
+ "rstrip": false,
1939
+ "single_word": false,
1940
+ "special": true
1941
+ },
1942
+ "128242": {
1943
+ "content": "<|reserved_special_token_237|>",
1944
+ "lstrip": false,
1945
+ "normalized": false,
1946
+ "rstrip": false,
1947
+ "single_word": false,
1948
+ "special": true
1949
+ },
1950
+ "128243": {
1951
+ "content": "<|reserved_special_token_238|>",
1952
+ "lstrip": false,
1953
+ "normalized": false,
1954
+ "rstrip": false,
1955
+ "single_word": false,
1956
+ "special": true
1957
+ },
1958
+ "128244": {
1959
+ "content": "<|reserved_special_token_239|>",
1960
+ "lstrip": false,
1961
+ "normalized": false,
1962
+ "rstrip": false,
1963
+ "single_word": false,
1964
+ "special": true
1965
+ },
1966
+ "128245": {
1967
+ "content": "<|reserved_special_token_240|>",
1968
+ "lstrip": false,
1969
+ "normalized": false,
1970
+ "rstrip": false,
1971
+ "single_word": false,
1972
+ "special": true
1973
+ },
1974
+ "128246": {
1975
+ "content": "<|reserved_special_token_241|>",
1976
+ "lstrip": false,
1977
+ "normalized": false,
1978
+ "rstrip": false,
1979
+ "single_word": false,
1980
+ "special": true
1981
+ },
1982
+ "128247": {
1983
+ "content": "<|reserved_special_token_242|>",
1984
+ "lstrip": false,
1985
+ "normalized": false,
1986
+ "rstrip": false,
1987
+ "single_word": false,
1988
+ "special": true
1989
+ },
1990
+ "128248": {
1991
+ "content": "<|reserved_special_token_243|>",
1992
+ "lstrip": false,
1993
+ "normalized": false,
1994
+ "rstrip": false,
1995
+ "single_word": false,
1996
+ "special": true
1997
+ },
1998
+ "128249": {
1999
+ "content": "<|reserved_special_token_244|>",
2000
+ "lstrip": false,
2001
+ "normalized": false,
2002
+ "rstrip": false,
2003
+ "single_word": false,
2004
+ "special": true
2005
+ },
2006
+ "128250": {
2007
+ "content": "<|reserved_special_token_245|>",
2008
+ "lstrip": false,
2009
+ "normalized": false,
2010
+ "rstrip": false,
2011
+ "single_word": false,
2012
+ "special": true
2013
+ },
2014
+ "128251": {
2015
+ "content": "<|reserved_special_token_246|>",
2016
+ "lstrip": false,
2017
+ "normalized": false,
2018
+ "rstrip": false,
2019
+ "single_word": false,
2020
+ "special": true
2021
+ },
2022
+ "128252": {
2023
+ "content": "<|reserved_special_token_247|>",
2024
+ "lstrip": false,
2025
+ "normalized": false,
2026
+ "rstrip": false,
2027
+ "single_word": false,
2028
+ "special": true
2029
+ },
2030
+ "128253": {
2031
+ "content": "<|reserved_special_token_248|>",
2032
+ "lstrip": false,
2033
+ "normalized": false,
2034
+ "rstrip": false,
2035
+ "single_word": false,
2036
+ "special": true
2037
+ },
2038
+ "128254": {
2039
+ "content": "<|reserved_special_token_249|>",
2040
+ "lstrip": false,
2041
+ "normalized": false,
2042
+ "rstrip": false,
2043
+ "single_word": false,
2044
+ "special": true
2045
+ },
2046
+ "128255": {
2047
+ "content": "<|reserved_special_token_250|>",
2048
+ "lstrip": false,
2049
+ "normalized": false,
2050
+ "rstrip": false,
2051
+ "single_word": false,
2052
+ "special": true
2053
+ },
2054
+ "128256": {
2055
+ "content": "<|eom_id|>",
2056
+ "lstrip": false,
2057
+ "normalized": false,
2058
+ "rstrip": false,
2059
+ "single_word": false,
2060
+ "special": true
2061
+ },
2062
+ "128257": {
2063
+ "content": "<|python_tag|>",
2064
+ "lstrip": false,
2065
+ "normalized": false,
2066
+ "rstrip": false,
2067
+ "single_word": false,
2068
+ "special": true
2069
+ },
2070
+ "128258": {
2071
+ "content": "<|NONE|>",
2072
+ "lstrip": false,
2073
+ "normalized": false,
2074
+ "rstrip": false,
2075
+ "single_word": false,
2076
+ "special": true
2077
+ }
2078
+ },
2079
+ "bos_token": "<|begin_of_text|>",
2080
+ "chat_template": "{# version=v3-llama3.1 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- if examples_info | length > 0 -%}\n {# Append each example info #}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{ \"\\n\" + offset + param_declaration }}\n{%- endmacro -%}\n\n{%- macro convert_data_type(param_type) -%}\n {%- if param_type == \"integer\" or param_type == \"float\" -%}\n {{ \"number\" }}\n {%- else -%}\n {{ param_type }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_type(param) -%}\n {%- set param_type = \"any\" -%}\n\n {%- if \"type\" in param -%}\n {%- set raw_param_type = param[\"type\"] -%}\n {%- if raw_param_type is iterable and raw_param_type is not string -%}\n {%- set param_type = raw_param_type | join(\" | \") -%}\n {%- else -%}\n {%- set param_type = raw_param_type -%}\n {%- endif -%}\n {{ convert_data_type(param_type) }}\n {%- elif \"oneOf\" in param -%}\n {%- set one_of_types = param[\"oneOf\"]|selectattr(\"type\", \"defined\")|list -%}\n {%- set one_of_types = one_of_types|map(attribute=\"type\")|unique|list -%}\n {{ convert_data_type(one_of_types | join(\" | \")) }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_format_param(param) -%}\n {%- if \"format\" in param -%}\n {{ param[\"format\"] }}\n {%- elif \"oneOf\" in param -%}\n {%- set formats = [] -%}\n {%- for item in param[\"oneOf\"] -%}\n {%- if \"format\" in item -%}\n {%- if item[\"format\"] == param[\"oneOf\"][-1][\"format\"] -%}\n {{ item[\"format\"] }}\n {%- else -%}\n {{ item[\"format\"] + \" or \"}}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\" }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_info(param) -%}\n {%- set param_type = param.get(\"type\", \"any\") -%}\n {%- set format_param = get_format_param(param) -%}\n\n {%- if \"description\" in param or \"default\" in param or format_param != \"<|NONE|>\" or param[\"maximum\"] or param[\"minimum\"] or param[\"maxLength\"] or param[\"minLength\"] -%}\n {{ \"//\" }}\n {%- if \"description\" in param -%}\n {%- set desc = param[\"description\"] -%}\n {%- if not desc.endswith(\".\") -%}\n {%- set desc = desc + \".\" -%}\n {%- endif -%}\n {{ \" \" + desc }}\n {%- endif -%}\n\n {%- if \"default\" in param -%}\n {%- set default_value = param[\"default\"] -%}\n {%- if param_type == \"string\" -%}\n {%- set default_value = '\"' ~ default_value ~ '\"' -%}\n {%- endif -%}\n {{ \" Default=\" ~ default_value ~ \".\" }}\n {%- endif -%}\n\n {%- set format_param = get_format_param(param) -%}\n {%- if format_param != \"<|NONE|>\" -%}\n {{ \" Format=\" ~ format_param }}\n {%- endif -%}\n\n {%- for field, field_name in [(\"maximum\", \"Maximum\"), (\"minimum\", \"Minimum\"), (\"maxLength\", \"Maximum length\"), (\"minLength\", \"Minimum length\")] -%}\n {%- if field in param -%}\n {{ \" \" + field_name ~ \"=\" ~ param[field] }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\"}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_enum_option_str(enum_options) -%}\n {%- for v in enum_options -%}\n {%- if v is string -%}\n {{ '\"' + v + '\"' }}\n {%- else -%}\n {{ v }}\n {%- endif -%}\n {%- if enum_options|length > 0 and v != enum_options[-1] -%}\n {{ \" | \" }}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro get_array_typescript(param_name, param_dic, depth) -%}\n {%- set offset = '' -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- set items_info = param_dic.get('items', {}) -%}\n\n {%- if items_info|length == 0 -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": []\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- set array_type = get_param_type(items_info) -%}\n {%- if array_type == 'object' -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": {\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"{\" }}\n {%- endif -%}\n {{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}\n {{- \"\\n\" + offset + \"}[]\" }}\n {%- elif array_type == 'array' -%}\n {%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}\n {%- if not param_name -%}\n {{ \"\\n\" + item_info + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + item_info|trim + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if 'enum' in items_info -%}\n {%- set item_type = get_enum_option_str(items_info['enum']) -%}\n {%- if param_name is none -%}\n {{ \"(\" + item_type + \")[]\"}}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": (\" + item_type + \")[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if param_name is none -%}\n {{ \"\\n\" + array_type + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + array_type + \"[],\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}\n {%- set res = \"\" -%}\n {%- for param_name, param in properties.items() -%}\n {%- if param is mapping -%}\n {%- set comment_info = get_param_info(param) -%}\n {# Param Examples #}\n {%- set examples_info = [] -%}\n {%- if \"examples\" in param -%}\n {%- set examples_info = [\"Example \" + param_name + \":\"] -%}\n {%- set examples_info = examples_info + param[\"examples\"] -%}\n {%- endif -%}\n\n {# Param Name declaration #}\n {%- set param_declaration = param_name -%}\n {%- if required_params is iterable and param_name not in required_params -%}\n {%- set param_declaration = param_declaration + \"?\" -%}\n {%- endif -%}\n\n {%- set param_type = get_param_type(param) -%}\n\n {# Handle indentation based on depth #}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n\n {%- if param_type == \"object\" -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": {\" -%}\n {{ \"\\n\" + offset + param_declaration -}}\n {{- get_parameter_typescript(param.get(\"properties\", {}), param.get(\"required\", []), depth + 1) -}}\n {{- \"\\n\" + offset + \"},\" }}\n {%- elif param_type == \"array\" -%}\n {%- set item_info = param.get(\"items\", {}) -%}\n {%- if \"type\" not in item_info -%}\n {%- set param_declaration = param_declaration + \": [],\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- else -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}\n {%- if not array_declaration.endswith(\",\") -%}\n {%- set array_declaration = array_declaration + \",\" -%}\n {%- endif -%}\n {{ array_declaration}}\n {%- endif -%}\n {%- else -%}\n {%- if \"enum\" in param -%}\n {%- set param_type = get_enum_option_str(param[\"enum\"]) -%}\n {%- endif -%}\n {%- if \"nullable\" in param and param[\"nullable\"] -%}\n {%- set param_type = param_type + \" | null\" -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": \" + param_type + \",\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro generate_schema_from_functions(functions, namespace='functions') -%}\n {{ \"// Supported function definitions that should be called when necessary.\\n\" -}}\n {{- \"namespace \" + namespace + \" {\\n\\n\" -}}\n\n {%- for function in functions -%}\n {%- if function.get(\"function\") -%}\n {%- set function = function.get(\"function\") -%}\n {%- endif -%}\n\n {%- set function_name = function.get(\"name\") -%}\n {%- if function_name -%}\n {%- set description = function.get('description', '') -%}\n {%- set parameters = function.get('parameters', {}) -%}\n {{- \"// \" + description + \"\\n\" -}}\n {{- \"type \" + function_name -}}\n {%- if parameters and parameters.get(\"properties\") -%}\n {{- \" = (_: {\" -}}\n {%- set required_params = parameters.get(\"required\", []) -%}\n {{ get_parameter_typescript(parameters.get(\"properties\"), required_params, 0) -}}\n {{- \"\\n}) => any;\\n\\n\" }}\n {%- else -%}\n {{ \" = () => any;\\n\\n\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {{ \"} // namespace \" + namespace }}\n{%- endmacro -%}\n\n{%- if not tools is defined -%}\n {%- set tools = none -%}\n{%- endif -%}\n\n{%- set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", \"code_interpreter\") | list | length > 0 -%}\n{%- if has_code_interpreter -%}\n {%- set tools = tools | rejectattr(\"type\", \"equalto\", \"code_interpreter\") | list -%}\n{%- endif -%}\n\n{#- System message + builtin tools #}\n{{- bos_token + \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if has_code_interpreter %}\n {{- \"Environment: ipython\\n\\n\" }}\n{%- else -%}\n {{ \"\"}}\n{%- endif %}\n{%- if tools %}\n {{- \"\\nYou have access to the following functions:\\n\\n\" }}\n {%- for t in tools %}\n {%- if \"type\" in t -%}\n {{ \"Use the function '\" + t[\"function\"][\"name\"] + \"' to '\" + t[\"function\"][\"description\"] + \"'\\n\" + t[\"function\"] | tojson() }}\n {%- else -%}\n {{ \"Use the function '\" + t[\"name\"] + \"' to '\" + t[\"description\"] + \"'\\n\" + t | tojson }}\n {%- endif -%}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- '\\nThink very carefully before calling functions.\\nIf a you choose to call a function ONLY reply in the following format:\\n<{start_tag}={function_name}>{parameters}{end_tag}\\nwhere\\n\\nstart_tag => `<function`\\nparameters => a JSON dict with the function argument name as key and function argument value as value.\\nend_tag => `</function>`\\n\\nHere is an example,\\n<function=example_function_name>{\"example_name\": \"example_value\"}</function>\\n\\nReminder:\\n- If looking for real time information use relevant functions before falling back to brave_search\\n- Function calls MUST follow the specified format, start with <function= and end with </function>\\n- Required parameters MUST be specified\\n- Only call one function at a time\\n- Put the entire function call reply on one line\\n\\n' -}}\n{%- endif %}\n{{- \"<|eot_id|>\" -}}\n\n{%- for message in messages -%}\n {%- if message['role'] == 'user' or message['role'] == 'system' -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|start_header_id|>ipython<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- else -%}\n {%- if (message['content'] and message['content']|length > 0) or ('tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0) -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'}}\n {%- endif -%}\n {%- if message['content'] and message['content']|length > 0 -%}\n {{ message['content'] }}\n {%- endif -%}\n {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0 -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- if tool_call[\"function\"][\"name\"] == \"python\" -%}\n {{ '<|python_tag|>' + tool_call['function']['arguments'] }}\n {%- else -%}\n {{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] + '</function>' }}\n {%- endif -%}\n {%- endfor -%}\n {{ '<|eom_id|>' }}\n {%- elif message['content'] and message['content']|length > 0 -%}\n {{ '<|eot_id|>' }}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif -%}\n",
2081
+ "clean_up_tokenization_spaces": true,
2082
+ "eos_token": "<|eot_id|>",
2083
+ "extra_special_tokens": {},
2084
+ "max_length": 8192,
2085
+ "model_input_names": [
2086
+ "input_ids",
2087
+ "attention_mask"
2088
+ ],
2089
+ "model_max_length": 1000000000000000019884624838656,
2090
+ "pad_token": "<|end_of_text|>",
2091
+ "stride": 0,
2092
+ "tokenizer_class": "PreTrainedTokenizer",
2093
+ "truncation_side": "right",
2094
+ "truncation_strategy": "longest_first"
2095
+ }