openvino-ci commited on
Commit
b8e8ef5
·
verified ·
1 Parent(s): bdcbb10

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ license_link: https://huggingface.co/microsoft/phi-4/resolve/main/LICENSE
4
+ base_model:
5
+ - microsoft/phi-4
6
+ base_model_relation: quantized
7
+ language:
8
+ - en
9
+ pipeline_tag: text-generation
10
+ tags:
11
+ - phi
12
+ - nlp
13
+ - math
14
+ - code
15
+ - chat
16
+ - conversational
17
+ library_name: transformers
18
+ ---
19
+
20
+ # phi-4-int8-ov
21
+ * Model creator: [microsoft](https://huggingface.co/microsoft)
22
+ * Original model: [phi-4](https://huggingface.co/microsoft/phi-4)
23
+
24
+ ## Description
25
+ This is [phi-4](https://huggingface.co/microsoft/phi-4) model converted to the [OpenVINO™ IR](https://docs.openvino.ai/2025/documentation/openvino-ir-format.html) (Intermediate Representation) format with weights compressed to INT8 by [NNCF](https://github.com/openvinotoolkit/nncf).
26
+
27
+ ## Quantization Parameters
28
+
29
+ Weight compression was performed using `nncf.compress_weights` with the following parameters:
30
+
31
+ * mode: **INT8_ASYM**
32
+
33
+ For more information on quantization, check the [OpenVINO model optimization guide](https://docs.openvino.ai/2025/openvino-workflow/model-optimization-guide/weight-compression.html).
34
+
35
+ ## Compatibility
36
+
37
+ The provided OpenVINO™ IR model is compatible with:
38
+
39
+ * OpenVINO version 2025.1.0 and higher
40
+ * Optimum Intel 1.24.0 and higher
41
+
42
+ ## Running Model Inference with [Optimum Intel](https://huggingface.co/docs/optimum/intel/index)
43
+
44
+ 1. Install packages required for using [Optimum Intel](https://huggingface.co/docs/optimum/intel/index) integration with the OpenVINO backend:
45
+
46
+ ```
47
+ pip install optimum[openvino]
48
+ ```
49
+
50
+ 2. Run model inference:
51
+
52
+ ```
53
+ from transformers import AutoTokenizer
54
+ from optimum.intel.openvino import OVModelForCausalLM
55
+
56
+ model_id = "OpenVINO/phi-4-int8-ov"
57
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
58
+ model = OVModelForCausalLM.from_pretrained(model_id)
59
+
60
+ inputs = tokenizer("What is OpenVINO?", return_tensors="pt")
61
+
62
+ outputs = model.generate(**inputs, max_length=200)
63
+ text = tokenizer.batch_decode(outputs)[0]
64
+ print(text)
65
+ ```
66
+
67
+ For more examples and possible optimizations, refer to the [Inference with Optimum Intel](https://docs.openvino.ai/2025/openvino-workflow-generative/inference-with-optimum-intel.html).
68
+
69
+ ## Running Model Inference with [OpenVINO GenAI](https://github.com/openvinotoolkit/openvino.genai)
70
+
71
+
72
+ 1. Install packages required for using OpenVINO GenAI.
73
+ ```
74
+ pip install openvino-genai huggingface_hub
75
+ ```
76
+
77
+ 2. Download model from HuggingFace Hub
78
+
79
+ ```
80
+ import huggingface_hub as hf_hub
81
+
82
+ model_id = "OpenVINO/phi-4-int8-ov"
83
+ model_path = "phi-4-int8-ov"
84
+
85
+ hf_hub.snapshot_download(model_id, local_dir=model_path)
86
+
87
+ ```
88
+
89
+ 3. Run model inference:
90
+
91
+ ```
92
+ import openvino_genai as ov_genai
93
+
94
+ device = "CPU"
95
+ pipe = ov_genai.LLMPipeline(model_path, device)
96
+ print(pipe.generate("What is OpenVINO?", max_length=200))
97
+ ```
98
+
99
+ More GenAI usage examples can be found in OpenVINO GenAI library [docs](https://docs.openvino.ai/2025/openvino-workflow-generative/inference-with-genai.html) and [samples](https://github.com/openvinotoolkit/openvino.genai?tab=readme-ov-file#openvino-genai-samples)
100
+
101
+ You can find more detaild usage examples in OpenVINO Notebooks:
102
+
103
+ - [LLM](https://openvinotoolkit.github.io/openvino_notebooks/?search=LLM)
104
+ - [RAG text generation](https://openvinotoolkit.github.io/openvino_notebooks/?search=RAG+system&tasks=Text+Generation)
105
+
106
+ ## Limitations
107
+
108
+ Check the original [model card](https://huggingface.co/microsoft/phi-4) for limitations.
109
+
110
+ ## Legal information
111
+
112
+ The original model is distributed under [mit](https://huggingface.co/microsoft/phi-4/resolve/main/LICENSE) license. More details can be found in [phi-4](https://huggingface.co/microsoft/phi-4).
113
+
114
+ ## Disclaimer
115
+
116
+ Intel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights.
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "architectures": [
4
+ "Phi3ForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 100257,
9
+ "embd_pdrop": 0.0,
10
+ "eos_token_id": 100265,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 17920,
15
+ "max_position_embeddings": 16384,
16
+ "model_type": "phi3",
17
+ "num_attention_heads": 40,
18
+ "num_hidden_layers": 40,
19
+ "num_key_value_heads": 10,
20
+ "original_max_position_embeddings": 16384,
21
+ "pad_token_id": 100349,
22
+ "partial_rotary_factor": 1.0,
23
+ "resid_pdrop": 0.0,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 250000,
27
+ "sliding_window": null,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "bfloat16",
30
+ "transformers_version": "4.51.3",
31
+ "use_cache": true,
32
+ "vocab_size": 100352
33
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": 100265,
5
+ "pad_token_id": 100349,
6
+ "transformers_version": "4.51.3"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
openvino_detokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148a63f48579c1ed8cb166df1aec16306f7fc193a2afa166bd9409bab7f76415
3
+ size 1448178
openvino_detokenizer.xml ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="detokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_210547" type="Parameter" version="opset1">
5
+ <data shape="?,?" element_type="i64" />
6
+ <output>
7
+ <port id="0" precision="I64" names="Parameter_210547">
8
+ <dim>-1</dim>
9
+ <dim>-1</dim>
10
+ </port>
11
+ </output>
12
+ </layer>
13
+ <layer id="1" name="Convert_210717" type="Convert" version="opset1">
14
+ <data destination_type="i32" />
15
+ <input>
16
+ <port id="0" precision="I64">
17
+ <dim>-1</dim>
18
+ <dim>-1</dim>
19
+ </port>
20
+ </input>
21
+ <output>
22
+ <port id="1" precision="I32">
23
+ <dim>-1</dim>
24
+ <dim>-1</dim>
25
+ </port>
26
+ </output>
27
+ </layer>
28
+ <layer id="2" name="Constant_210549" type="Const" version="opset1">
29
+ <data element_type="i32" shape="100352" offset="0" size="401408" />
30
+ <output>
31
+ <port id="0" precision="I32">
32
+ <dim>100352</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="Constant_210551" type="Const" version="opset1">
37
+ <data element_type="i32" shape="100352" offset="401408" size="401408" />
38
+ <output>
39
+ <port id="0" precision="I32">
40
+ <dim>100352</dim>
41
+ </port>
42
+ </output>
43
+ </layer>
44
+ <layer id="4" name="Constant_210553" type="Const" version="opset1">
45
+ <data element_type="u8" shape="644978" offset="802816" size="644978" />
46
+ <output>
47
+ <port id="0" precision="U8">
48
+ <dim>644978</dim>
49
+ </port>
50
+ </output>
51
+ </layer>
52
+ <layer id="5" name="Slice_210558" type="Const" version="opset1">
53
+ <data element_type="i32" shape="96" offset="1447794" size="384" />
54
+ <output>
55
+ <port id="0" precision="I32">
56
+ <dim>96</dim>
57
+ </port>
58
+ </output>
59
+ </layer>
60
+ <layer id="6" name="VocabDecoder_210560" type="VocabDecoder" version="extension">
61
+ <data skip_tokens="" />
62
+ <input>
63
+ <port id="0" precision="I32">
64
+ <dim>-1</dim>
65
+ <dim>-1</dim>
66
+ </port>
67
+ <port id="1" precision="I32">
68
+ <dim>100352</dim>
69
+ </port>
70
+ <port id="2" precision="I32">
71
+ <dim>100352</dim>
72
+ </port>
73
+ <port id="3" precision="U8">
74
+ <dim>644978</dim>
75
+ </port>
76
+ <port id="4" precision="I32">
77
+ <dim>96</dim>
78
+ </port>
79
+ </input>
80
+ <output>
81
+ <port id="5" precision="I32">
82
+ <dim>-1</dim>
83
+ </port>
84
+ <port id="6" precision="I32">
85
+ <dim>-1</dim>
86
+ </port>
87
+ <port id="7" precision="I32">
88
+ <dim>-1</dim>
89
+ </port>
90
+ <port id="8" precision="I32">
91
+ <dim>-1</dim>
92
+ </port>
93
+ <port id="9" precision="U8">
94
+ <dim>-1</dim>
95
+ </port>
96
+ </output>
97
+ </layer>
98
+ <layer id="7" name="FuzeRagged_210561" type="FuzeRagged" version="extension">
99
+ <input>
100
+ <port id="0" precision="I32">
101
+ <dim>-1</dim>
102
+ </port>
103
+ <port id="1" precision="I32">
104
+ <dim>-1</dim>
105
+ </port>
106
+ <port id="2" precision="I32">
107
+ <dim>-1</dim>
108
+ </port>
109
+ <port id="3" precision="I32">
110
+ <dim>-1</dim>
111
+ </port>
112
+ </input>
113
+ <output>
114
+ <port id="4" precision="I32">
115
+ <dim>-1</dim>
116
+ </port>
117
+ <port id="5" precision="I32">
118
+ <dim>-1</dim>
119
+ </port>
120
+ </output>
121
+ </layer>
122
+ <layer id="8" name="UTF8Validate_210562" type="UTF8Validate" version="extension">
123
+ <data replace_mode="true" />
124
+ <input>
125
+ <port id="0" precision="I32">
126
+ <dim>-1</dim>
127
+ </port>
128
+ <port id="1" precision="I32">
129
+ <dim>-1</dim>
130
+ </port>
131
+ <port id="2" precision="U8">
132
+ <dim>-1</dim>
133
+ </port>
134
+ </input>
135
+ <output>
136
+ <port id="3" precision="I32">
137
+ <dim>-1</dim>
138
+ </port>
139
+ <port id="4" precision="I32">
140
+ <dim>-1</dim>
141
+ </port>
142
+ <port id="5" precision="U8">
143
+ <dim>-1</dim>
144
+ </port>
145
+ </output>
146
+ </layer>
147
+ <layer id="9" name="StringTensorPack_210563" type="StringTensorPack" version="opset15">
148
+ <input>
149
+ <port id="0" precision="I32">
150
+ <dim>-1</dim>
151
+ </port>
152
+ <port id="1" precision="I32">
153
+ <dim>-1</dim>
154
+ </port>
155
+ <port id="2" precision="U8">
156
+ <dim>-1</dim>
157
+ </port>
158
+ </input>
159
+ <output>
160
+ <port id="3" precision="STRING" names="Result_210564,string_output">
161
+ <dim>-1</dim>
162
+ </port>
163
+ </output>
164
+ </layer>
165
+ <layer id="10" name="Result_210564" type="Result" version="opset1" output_names="Result_210564,string_output">
166
+ <input>
167
+ <port id="0" precision="STRING">
168
+ <dim>-1</dim>
169
+ </port>
170
+ </input>
171
+ </layer>
172
+ </layers>
173
+ <edges>
174
+ <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
175
+ <edge from-layer="1" from-port="1" to-layer="6" to-port="0" />
176
+ <edge from-layer="2" from-port="0" to-layer="6" to-port="1" />
177
+ <edge from-layer="3" from-port="0" to-layer="6" to-port="2" />
178
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="3" />
179
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="4" />
180
+ <edge from-layer="6" from-port="7" to-layer="7" to-port="2" />
181
+ <edge from-layer="6" from-port="9" to-layer="8" to-port="2" />
182
+ <edge from-layer="6" from-port="8" to-layer="7" to-port="3" />
183
+ <edge from-layer="6" from-port="6" to-layer="7" to-port="1" />
184
+ <edge from-layer="6" from-port="5" to-layer="7" to-port="0" />
185
+ <edge from-layer="7" from-port="4" to-layer="8" to-port="0" />
186
+ <edge from-layer="7" from-port="5" to-layer="8" to-port="1" />
187
+ <edge from-layer="8" from-port="3" to-layer="9" to-port="0" />
188
+ <edge from-layer="8" from-port="4" to-layer="9" to-port="1" />
189
+ <edge from-layer="8" from-port="5" to-layer="9" to-port="2" />
190
+ <edge from-layer="9" from-port="3" to-layer="10" to-port="0" />
191
+ </edges>
192
+ <rt_info>
193
+ <add_attention_mask value="True" />
194
+ <add_prefix_space />
195
+ <add_special_tokens value="True" />
196
+ <bos_token_id value="100257" />
197
+ <chat_template value="{% for message in messages %}{% if (message['role'] == 'system') %}{{'&lt;|im_start|>system&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% elif (message['role'] == 'user') %}{{'&lt;|im_start|>user&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'&lt;|im_start|>assistant&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|im_start|>assistant&lt;|im_sep|>' }}{% endif %}" />
198
+ <clean_up_tokenization_spaces />
199
+ <detokenizer_input_type value="i64" />
200
+ <eos_token_id value="100265" />
201
+ <handle_special_tokens_with_re />
202
+ <max_length />
203
+ <number_of_inputs value="1" />
204
+ <openvino_tokenizers_version value="2025.1.0.0-523-710ddf14de8" />
205
+ <openvino_version value="2025.1.0-18503-6fec06580ab-releases/2025/1" />
206
+ <original_tokenizer_class value="&lt;class 'transformers.models.gpt2.tokenization_gpt2_fast.GPT2TokenizerFast'>" />
207
+ <pad_token_id value="100349" />
208
+ <sentencepiece_version value="0.2.0" />
209
+ <skip_special_tokens value="True" />
210
+ <streaming_detokenizer value="False" />
211
+ <tiktoken_version value="0.9.0" />
212
+ <tokenizer_output_type value="i64" />
213
+ <tokenizers_version value="0.21.1" />
214
+ <transformers_version value="4.51.3" />
215
+ <use_max_padding value="False" />
216
+ <use_sentencepiece_backend value="False" />
217
+ <utf8_replace_mode value="replace" />
218
+ <with_detokenizer value="True" />
219
+ </rt_info>
220
+ </net>
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1be399925b63f149f5db300d263f7fc8b750756c2772f8545161fd9cabcfb2a
3
+ size 14667805228
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
openvino_tokenizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f73906a310fc23c269ee249a71c2c27d9d025ad4d865af10bc9a902e78462f34
3
+ size 3801254
openvino_tokenizer.xml ADDED
@@ -0,0 +1,686 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0"?>
2
+ <net name="tokenizer" version="11">
3
+ <layers>
4
+ <layer id="0" name="Parameter_210429" type="Parameter" version="opset1">
5
+ <data shape="?" element_type="string" />
6
+ <output>
7
+ <port id="0" precision="STRING" names="Parameter_210429">
8
+ <dim>-1</dim>
9
+ </port>
10
+ </output>
11
+ </layer>
12
+ <layer id="1" name="Constant_210435" type="Const" version="opset1">
13
+ <data element_type="i64" shape="" offset="0" size="8" />
14
+ <output>
15
+ <port id="0" precision="I64" />
16
+ </output>
17
+ </layer>
18
+ <layer id="2" name="StringTensorUnpack_210430" type="StringTensorUnpack" version="opset15">
19
+ <input>
20
+ <port id="0" precision="STRING">
21
+ <dim>-1</dim>
22
+ </port>
23
+ </input>
24
+ <output>
25
+ <port id="1" precision="I32">
26
+ <dim>-1</dim>
27
+ </port>
28
+ <port id="2" precision="I32">
29
+ <dim>-1</dim>
30
+ </port>
31
+ <port id="3" precision="U8">
32
+ <dim>-1</dim>
33
+ </port>
34
+ </output>
35
+ </layer>
36
+ <layer id="3" name="ShapeOf_210431" type="ShapeOf" version="opset3">
37
+ <data output_type="i64" />
38
+ <input>
39
+ <port id="0" precision="I32">
40
+ <dim>-1</dim>
41
+ </port>
42
+ </input>
43
+ <output>
44
+ <port id="1" precision="I64">
45
+ <dim>1</dim>
46
+ </port>
47
+ </output>
48
+ </layer>
49
+ <layer id="4" name="Constant_210432" type="Const" version="opset1">
50
+ <data element_type="i64" shape="" offset="0" size="8" />
51
+ <output>
52
+ <port id="0" precision="I64" />
53
+ </output>
54
+ </layer>
55
+ <layer id="5" name="Constant_210433" type="Const" version="opset1">
56
+ <data element_type="i64" shape="" offset="0" size="8" />
57
+ <output>
58
+ <port id="0" precision="I64" />
59
+ </output>
60
+ </layer>
61
+ <layer id="6" name="Gather_210434" type="Gather" version="opset8">
62
+ <data batch_dims="0" />
63
+ <input>
64
+ <port id="0" precision="I64">
65
+ <dim>1</dim>
66
+ </port>
67
+ <port id="1" precision="I64" />
68
+ <port id="2" precision="I64" />
69
+ </input>
70
+ <output>
71
+ <port id="3" precision="I64" />
72
+ </output>
73
+ </layer>
74
+ <layer id="7" name="Constant_210436" type="Const" version="opset1">
75
+ <data element_type="i64" shape="" offset="8" size="8" />
76
+ <output>
77
+ <port id="0" precision="I64" />
78
+ </output>
79
+ </layer>
80
+ <layer id="8" name="Range_210437" type="Range" version="opset4">
81
+ <data output_type="i32" />
82
+ <input>
83
+ <port id="0" precision="I64" />
84
+ <port id="1" precision="I64" />
85
+ <port id="2" precision="I64" />
86
+ </input>
87
+ <output>
88
+ <port id="3" precision="I32">
89
+ <dim>-1</dim>
90
+ </port>
91
+ </output>
92
+ </layer>
93
+ <layer id="9" name="Constant_210438" type="Const" version="opset1">
94
+ <data element_type="i64" shape="" offset="8" size="8" />
95
+ <output>
96
+ <port id="0" precision="I64" />
97
+ </output>
98
+ </layer>
99
+ <layer id="10" name="Constant_210439" type="Const" version="opset1">
100
+ <data element_type="i64" shape="" offset="8" size="8" />
101
+ <output>
102
+ <port id="0" precision="I64" />
103
+ </output>
104
+ </layer>
105
+ <layer id="11" name="Add_210440" type="Add" version="opset1">
106
+ <data auto_broadcast="numpy" />
107
+ <input>
108
+ <port id="0" precision="I64" />
109
+ <port id="1" precision="I64" />
110
+ </input>
111
+ <output>
112
+ <port id="2" precision="I64" />
113
+ </output>
114
+ </layer>
115
+ <layer id="12" name="Constant_210441" type="Const" version="opset1">
116
+ <data element_type="i64" shape="" offset="8" size="8" />
117
+ <output>
118
+ <port id="0" precision="I64" />
119
+ </output>
120
+ </layer>
121
+ <layer id="13" name="Range_210442" type="Range" version="opset4">
122
+ <data output_type="i32" />
123
+ <input>
124
+ <port id="0" precision="I64" />
125
+ <port id="1" precision="I64" />
126
+ <port id="2" precision="I64" />
127
+ </input>
128
+ <output>
129
+ <port id="3" precision="I32">
130
+ <dim>-1</dim>
131
+ </port>
132
+ </output>
133
+ </layer>
134
+ <layer id="14" name="Constant_210504" type="Const" version="opset1">
135
+ <data element_type="u8" shape="3163" offset="16" size="3163" />
136
+ <output>
137
+ <port id="0" precision="U8">
138
+ <dim>3163</dim>
139
+ </port>
140
+ </output>
141
+ </layer>
142
+ <layer id="15" name="SpecialTokensSplit_210505" type="SpecialTokensSplit" version="extension">
143
+ <input>
144
+ <port id="0" precision="I32">
145
+ <dim>-1</dim>
146
+ </port>
147
+ <port id="1" precision="I32">
148
+ <dim>-1</dim>
149
+ </port>
150
+ <port id="2" precision="I32">
151
+ <dim>-1</dim>
152
+ </port>
153
+ <port id="3" precision="I32">
154
+ <dim>-1</dim>
155
+ </port>
156
+ <port id="4" precision="U8">
157
+ <dim>-1</dim>
158
+ </port>
159
+ <port id="5" precision="U8">
160
+ <dim>3163</dim>
161
+ </port>
162
+ </input>
163
+ <output>
164
+ <port id="6" precision="I32">
165
+ <dim>-1</dim>
166
+ </port>
167
+ <port id="7" precision="I32">
168
+ <dim>-1</dim>
169
+ </port>
170
+ <port id="8" precision="I32">
171
+ <dim>-1</dim>
172
+ </port>
173
+ <port id="9" precision="I32">
174
+ <dim>-1</dim>
175
+ </port>
176
+ <port id="10" precision="U8">
177
+ <dim>-1</dim>
178
+ </port>
179
+ <port id="11" precision="BOOL">
180
+ <dim>-1</dim>
181
+ </port>
182
+ </output>
183
+ </layer>
184
+ <layer id="16" name="Constant_210507" type="Const" version="opset1">
185
+ <data element_type="u8" shape="115" offset="3179" size="115" />
186
+ <output>
187
+ <port id="0" precision="U8">
188
+ <dim>115</dim>
189
+ </port>
190
+ </output>
191
+ </layer>
192
+ <layer id="17" name="RegexSplit_210508" type="RegexSplit" version="extension">
193
+ <data behaviour="remove" invert="true" max_splits="-1" />
194
+ <input>
195
+ <port id="0" precision="I32">
196
+ <dim>-1</dim>
197
+ </port>
198
+ <port id="1" precision="I32">
199
+ <dim>-1</dim>
200
+ </port>
201
+ <port id="2" precision="I32">
202
+ <dim>-1</dim>
203
+ </port>
204
+ <port id="3" precision="I32">
205
+ <dim>-1</dim>
206
+ </port>
207
+ <port id="4" precision="U8">
208
+ <dim>-1</dim>
209
+ </port>
210
+ <port id="5" precision="BOOL">
211
+ <dim>-1</dim>
212
+ </port>
213
+ <port id="6" precision="U8">
214
+ <dim>115</dim>
215
+ </port>
216
+ </input>
217
+ <output>
218
+ <port id="7" precision="I32">
219
+ <dim>-1</dim>
220
+ </port>
221
+ <port id="8" precision="I32">
222
+ <dim>-1</dim>
223
+ </port>
224
+ <port id="9" precision="I32">
225
+ <dim>-1</dim>
226
+ </port>
227
+ <port id="10" precision="I32">
228
+ <dim>-1</dim>
229
+ </port>
230
+ <port id="11" precision="U8">
231
+ <dim>-1</dim>
232
+ </port>
233
+ <port id="12" precision="BOOL">
234
+ <dim>-1</dim>
235
+ </port>
236
+ </output>
237
+ </layer>
238
+ <layer id="18" name="Constant_210510" type="Const" version="opset1">
239
+ <data element_type="i32" shape="100352" offset="3294" size="401408" />
240
+ <output>
241
+ <port id="0" precision="I32">
242
+ <dim>100352</dim>
243
+ </port>
244
+ </output>
245
+ </layer>
246
+ <layer id="19" name="Constant_210512" type="Const" version="opset1">
247
+ <data element_type="i32" shape="100352" offset="404702" size="401408" />
248
+ <output>
249
+ <port id="0" precision="I32">
250
+ <dim>100352</dim>
251
+ </port>
252
+ </output>
253
+ </layer>
254
+ <layer id="20" name="Constant_210514" type="Const" version="opset1">
255
+ <data element_type="u8" shape="645170" offset="806110" size="645170" />
256
+ <output>
257
+ <port id="0" precision="U8">
258
+ <dim>645170</dim>
259
+ </port>
260
+ </output>
261
+ </layer>
262
+ <layer id="21" name="Constant_210522" type="Const" version="opset1">
263
+ <data element_type="i32" shape="100000" offset="1451280" size="400000" />
264
+ <output>
265
+ <port id="0" precision="I32">
266
+ <dim>100000</dim>
267
+ </port>
268
+ </output>
269
+ </layer>
270
+ <layer id="22" name="Constant_210524" type="Const" version="opset1">
271
+ <data element_type="i32" shape="100000" offset="1851280" size="400000" />
272
+ <output>
273
+ <port id="0" precision="I32">
274
+ <dim>100000</dim>
275
+ </port>
276
+ </output>
277
+ </layer>
278
+ <layer id="23" name="Constant_210526" type="Const" version="opset1">
279
+ <data element_type="u8" shape="309844" offset="2251280" size="309844" />
280
+ <output>
281
+ <port id="0" precision="U8">
282
+ <dim>309844</dim>
283
+ </port>
284
+ </output>
285
+ </layer>
286
+ <layer id="24" name="Constant_210528" type="Const" version="opset1">
287
+ <data element_type="i32" shape="100000" offset="2561124" size="400000" />
288
+ <output>
289
+ <port id="0" precision="I32">
290
+ <dim>100000</dim>
291
+ </port>
292
+ </output>
293
+ </layer>
294
+ <layer id="25" name="Constant_210530" type="Const" version="opset1">
295
+ <data element_type="i32" shape="100000" offset="2961124" size="400000" />
296
+ <output>
297
+ <port id="0" precision="I32">
298
+ <dim>100000</dim>
299
+ </port>
300
+ </output>
301
+ </layer>
302
+ <layer id="26" name="Constant_210532" type="Const" version="opset1">
303
+ <data element_type="u8" shape="333730" offset="3361124" size="333730" />
304
+ <output>
305
+ <port id="0" precision="U8">
306
+ <dim>333730</dim>
307
+ </port>
308
+ </output>
309
+ </layer>
310
+ <layer id="27" name="Constant_210516" type="Const" version="opset1">
311
+ <data element_type="i32" shape="4128" offset="3694854" size="16512" />
312
+ <output>
313
+ <port id="0" precision="I32">
314
+ <dim>4128</dim>
315
+ </port>
316
+ </output>
317
+ </layer>
318
+ <layer id="28" name="Constant_210518" type="Const" version="opset1">
319
+ <data element_type="i32" shape="4128" offset="3711366" size="16512" />
320
+ <output>
321
+ <port id="0" precision="I32">
322
+ <dim>4128</dim>
323
+ </port>
324
+ </output>
325
+ </layer>
326
+ <layer id="29" name="Constant_210520" type="Const" version="opset1">
327
+ <data element_type="u8" shape="56852" offset="3727878" size="56852" />
328
+ <output>
329
+ <port id="0" precision="U8">
330
+ <dim>56852</dim>
331
+ </port>
332
+ </output>
333
+ </layer>
334
+ <layer id="30" name="Constant_210533" type="Const" version="opset1">
335
+ <data element_type="i32" shape="4128" offset="3784730" size="16512" />
336
+ <output>
337
+ <port id="0" precision="I32">
338
+ <dim>4128</dim>
339
+ </port>
340
+ </output>
341
+ </layer>
342
+ <layer id="31" name="BPETokenizer_210534" type="BPETokenizer" version="extension">
343
+ <data unk_token="" fuse_unk="false" suffix_indicator="" end_suffix="" byte_fallback="false" cache_capacity="20070" />
344
+ <input>
345
+ <port id="0" precision="I32">
346
+ <dim>-1</dim>
347
+ </port>
348
+ <port id="1" precision="I32">
349
+ <dim>-1</dim>
350
+ </port>
351
+ <port id="2" precision="I32">
352
+ <dim>-1</dim>
353
+ </port>
354
+ <port id="3" precision="I32">
355
+ <dim>-1</dim>
356
+ </port>
357
+ <port id="4" precision="U8">
358
+ <dim>-1</dim>
359
+ </port>
360
+ <port id="5" precision="I32">
361
+ <dim>100352</dim>
362
+ </port>
363
+ <port id="6" precision="I32">
364
+ <dim>100352</dim>
365
+ </port>
366
+ <port id="7" precision="U8">
367
+ <dim>645170</dim>
368
+ </port>
369
+ <port id="8" precision="I32">
370
+ <dim>100000</dim>
371
+ </port>
372
+ <port id="9" precision="I32">
373
+ <dim>100000</dim>
374
+ </port>
375
+ <port id="10" precision="U8">
376
+ <dim>309844</dim>
377
+ </port>
378
+ <port id="11" precision="I32">
379
+ <dim>100000</dim>
380
+ </port>
381
+ <port id="12" precision="I32">
382
+ <dim>100000</dim>
383
+ </port>
384
+ <port id="13" precision="U8">
385
+ <dim>333730</dim>
386
+ </port>
387
+ <port id="14" precision="I32">
388
+ <dim>4128</dim>
389
+ </port>
390
+ <port id="15" precision="I32">
391
+ <dim>4128</dim>
392
+ </port>
393
+ <port id="16" precision="U8">
394
+ <dim>56852</dim>
395
+ </port>
396
+ <port id="17" precision="I32">
397
+ <dim>4128</dim>
398
+ </port>
399
+ </input>
400
+ <output>
401
+ <port id="18" precision="I32">
402
+ <dim>-1</dim>
403
+ </port>
404
+ <port id="19" precision="I32">
405
+ <dim>-1</dim>
406
+ </port>
407
+ <port id="20" precision="I32">
408
+ <dim>-1</dim>
409
+ </port>
410
+ </output>
411
+ </layer>
412
+ <layer id="32" name="Subtract_210535" type="Subtract" version="opset1">
413
+ <data auto_broadcast="numpy" />
414
+ <input>
415
+ <port id="0" precision="I32">
416
+ <dim>-1</dim>
417
+ </port>
418
+ <port id="1" precision="I32">
419
+ <dim>-1</dim>
420
+ </port>
421
+ </input>
422
+ <output>
423
+ <port id="2" precision="I32">
424
+ <dim>-1</dim>
425
+ </port>
426
+ </output>
427
+ </layer>
428
+ <layer id="33" name="Constant_210536" type="Const" version="opset1">
429
+ <data element_type="i32" shape="" offset="3801242" size="4" />
430
+ <output>
431
+ <port id="0" precision="I32" />
432
+ </output>
433
+ </layer>
434
+ <layer id="34" name="Minimum_210537" type="Minimum" version="opset1">
435
+ <data auto_broadcast="numpy" />
436
+ <input>
437
+ <port id="0" precision="I32">
438
+ <dim>-1</dim>
439
+ </port>
440
+ <port id="1" precision="I32" />
441
+ </input>
442
+ <output>
443
+ <port id="2" precision="I32">
444
+ <dim>-1</dim>
445
+ </port>
446
+ </output>
447
+ </layer>
448
+ <layer id="35" name="Subtract_210538" type="Subtract" version="opset1">
449
+ <data auto_broadcast="numpy" />
450
+ <input>
451
+ <port id="0" precision="I32">
452
+ <dim>-1</dim>
453
+ </port>
454
+ <port id="1" precision="I32">
455
+ <dim>-1</dim>
456
+ </port>
457
+ </input>
458
+ <output>
459
+ <port id="2" precision="I32">
460
+ <dim>-1</dim>
461
+ </port>
462
+ </output>
463
+ </layer>
464
+ <layer id="36" name="Subtract_210539" type="Subtract" version="opset1">
465
+ <data auto_broadcast="numpy" />
466
+ <input>
467
+ <port id="0" precision="I32">
468
+ <dim>-1</dim>
469
+ </port>
470
+ <port id="1" precision="I32">
471
+ <dim>-1</dim>
472
+ </port>
473
+ </input>
474
+ <output>
475
+ <port id="2" precision="I32">
476
+ <dim>-1</dim>
477
+ </port>
478
+ </output>
479
+ </layer>
480
+ <layer id="37" name="Constant_210540" type="Const" version="opset1">
481
+ <data element_type="i32" shape="" offset="3801246" size="4" />
482
+ <output>
483
+ <port id="0" precision="I32" />
484
+ </output>
485
+ </layer>
486
+ <layer id="38" name="ReduceMax_210541" type="ReduceMax" version="opset1">
487
+ <data keep_dims="false" />
488
+ <input>
489
+ <port id="0" precision="I32">
490
+ <dim>-1</dim>
491
+ </port>
492
+ <port id="1" precision="I32" />
493
+ </input>
494
+ <output>
495
+ <port id="2" precision="I32" />
496
+ </output>
497
+ </layer>
498
+ <layer id="39" name="Constant_210542" type="Const" version="opset1">
499
+ <data element_type="i32" shape="" offset="3801250" size="4" />
500
+ <output>
501
+ <port id="0" precision="I32" />
502
+ </output>
503
+ </layer>
504
+ <layer id="40" name="RaggedToDense_210543" type="RaggedToDense" version="extension">
505
+ <data pad_right="false" m_pad_max_length="false" />
506
+ <input>
507
+ <port id="0" precision="I32">
508
+ <dim>-1</dim>
509
+ </port>
510
+ <port id="1" precision="I32">
511
+ <dim>-1</dim>
512
+ </port>
513
+ <port id="2" precision="I32">
514
+ <dim>-1</dim>
515
+ </port>
516
+ <port id="3" precision="I32" />
517
+ <port id="4" precision="I32" />
518
+ </input>
519
+ <output>
520
+ <port id="5" precision="I32">
521
+ <dim>-1</dim>
522
+ <dim>-1</dim>
523
+ </port>
524
+ <port id="6" precision="BOOL">
525
+ <dim>-1</dim>
526
+ <dim>-1</dim>
527
+ </port>
528
+ </output>
529
+ </layer>
530
+ <layer id="41" name="Convert_210544" type="Convert" version="opset1">
531
+ <data destination_type="i32" />
532
+ <input>
533
+ <port id="0" precision="BOOL">
534
+ <dim>-1</dim>
535
+ <dim>-1</dim>
536
+ </port>
537
+ </input>
538
+ <output>
539
+ <port id="1" precision="I32">
540
+ <dim>-1</dim>
541
+ <dim>-1</dim>
542
+ </port>
543
+ </output>
544
+ </layer>
545
+ <layer id="42" name="Convert_210544.0" type="Convert" version="opset1">
546
+ <data destination_type="i64" />
547
+ <input>
548
+ <port id="0" precision="I32">
549
+ <dim>-1</dim>
550
+ <dim>-1</dim>
551
+ </port>
552
+ </input>
553
+ <output>
554
+ <port id="1" precision="I64" names="attention_mask">
555
+ <dim>-1</dim>
556
+ <dim>-1</dim>
557
+ </port>
558
+ </output>
559
+ </layer>
560
+ <layer id="44" name="RaggedToDense_210543.0" type="Convert" version="opset1">
561
+ <data destination_type="i64" />
562
+ <input>
563
+ <port id="0" precision="I32">
564
+ <dim>-1</dim>
565
+ <dim>-1</dim>
566
+ </port>
567
+ </input>
568
+ <output>
569
+ <port id="1" precision="I64" names="input_ids">
570
+ <dim>-1</dim>
571
+ <dim>-1</dim>
572
+ </port>
573
+ </output>
574
+ </layer>
575
+ <layer id="45" name="Result_210545" type="Result" version="opset1" output_names="input_ids">
576
+ <input>
577
+ <port id="0" precision="I64">
578
+ <dim>-1</dim>
579
+ <dim>-1</dim>
580
+ </port>
581
+ </input>
582
+ </layer>
583
+ <layer id="43" name="Result_210546" type="Result" version="opset1" output_names="attention_mask">
584
+ <input>
585
+ <port id="0" precision="I64">
586
+ <dim>-1</dim>
587
+ <dim>-1</dim>
588
+ </port>
589
+ </input>
590
+ </layer>
591
+ </layers>
592
+ <edges>
593
+ <edge from-layer="0" from-port="0" to-layer="2" to-port="0" />
594
+ <edge from-layer="1" from-port="0" to-layer="8" to-port="0" />
595
+ <edge from-layer="2" from-port="2" to-layer="15" to-port="3" />
596
+ <edge from-layer="2" from-port="3" to-layer="15" to-port="4" />
597
+ <edge from-layer="2" from-port="1" to-layer="15" to-port="2" />
598
+ <edge from-layer="2" from-port="1" to-layer="3" to-port="0" />
599
+ <edge from-layer="3" from-port="1" to-layer="6" to-port="0" />
600
+ <edge from-layer="4" from-port="0" to-layer="6" to-port="1" />
601
+ <edge from-layer="5" from-port="0" to-layer="6" to-port="2" />
602
+ <edge from-layer="6" from-port="3" to-layer="8" to-port="1" />
603
+ <edge from-layer="6" from-port="3" to-layer="11" to-port="0" />
604
+ <edge from-layer="7" from-port="0" to-layer="8" to-port="2" />
605
+ <edge from-layer="8" from-port="3" to-layer="15" to-port="0" />
606
+ <edge from-layer="9" from-port="0" to-layer="13" to-port="0" />
607
+ <edge from-layer="10" from-port="0" to-layer="11" to-port="1" />
608
+ <edge from-layer="11" from-port="2" to-layer="13" to-port="1" />
609
+ <edge from-layer="12" from-port="0" to-layer="13" to-port="2" />
610
+ <edge from-layer="13" from-port="3" to-layer="15" to-port="1" />
611
+ <edge from-layer="14" from-port="0" to-layer="15" to-port="5" />
612
+ <edge from-layer="15" from-port="6" to-layer="17" to-port="0" />
613
+ <edge from-layer="15" from-port="7" to-layer="17" to-port="1" />
614
+ <edge from-layer="15" from-port="8" to-layer="17" to-port="2" />
615
+ <edge from-layer="15" from-port="9" to-layer="17" to-port="3" />
616
+ <edge from-layer="15" from-port="10" to-layer="17" to-port="4" />
617
+ <edge from-layer="15" from-port="11" to-layer="17" to-port="5" />
618
+ <edge from-layer="16" from-port="0" to-layer="17" to-port="6" />
619
+ <edge from-layer="17" from-port="7" to-layer="31" to-port="0" />
620
+ <edge from-layer="17" from-port="8" to-layer="31" to-port="1" />
621
+ <edge from-layer="17" from-port="9" to-layer="31" to-port="2" />
622
+ <edge from-layer="17" from-port="10" to-layer="31" to-port="3" />
623
+ <edge from-layer="17" from-port="11" to-layer="31" to-port="4" />
624
+ <edge from-layer="18" from-port="0" to-layer="31" to-port="5" />
625
+ <edge from-layer="19" from-port="0" to-layer="31" to-port="6" />
626
+ <edge from-layer="20" from-port="0" to-layer="31" to-port="7" />
627
+ <edge from-layer="21" from-port="0" to-layer="31" to-port="8" />
628
+ <edge from-layer="22" from-port="0" to-layer="31" to-port="9" />
629
+ <edge from-layer="23" from-port="0" to-layer="31" to-port="10" />
630
+ <edge from-layer="24" from-port="0" to-layer="31" to-port="11" />
631
+ <edge from-layer="25" from-port="0" to-layer="31" to-port="12" />
632
+ <edge from-layer="26" from-port="0" to-layer="31" to-port="13" />
633
+ <edge from-layer="27" from-port="0" to-layer="31" to-port="14" />
634
+ <edge from-layer="28" from-port="0" to-layer="31" to-port="15" />
635
+ <edge from-layer="29" from-port="0" to-layer="31" to-port="16" />
636
+ <edge from-layer="30" from-port="0" to-layer="31" to-port="17" />
637
+ <edge from-layer="31" from-port="19" to-layer="35" to-port="0" />
638
+ <edge from-layer="31" from-port="20" to-layer="40" to-port="2" />
639
+ <edge from-layer="31" from-port="19" to-layer="40" to-port="1" />
640
+ <edge from-layer="31" from-port="19" to-layer="36" to-port="0" />
641
+ <edge from-layer="31" from-port="18" to-layer="32" to-port="1" />
642
+ <edge from-layer="31" from-port="19" to-layer="32" to-port="0" />
643
+ <edge from-layer="32" from-port="2" to-layer="34" to-port="0" />
644
+ <edge from-layer="33" from-port="0" to-layer="34" to-port="1" />
645
+ <edge from-layer="34" from-port="2" to-layer="35" to-port="1" />
646
+ <edge from-layer="35" from-port="2" to-layer="36" to-port="1" />
647
+ <edge from-layer="35" from-port="2" to-layer="40" to-port="0" />
648
+ <edge from-layer="36" from-port="2" to-layer="38" to-port="0" />
649
+ <edge from-layer="37" from-port="0" to-layer="38" to-port="1" />
650
+ <edge from-layer="38" from-port="2" to-layer="40" to-port="3" />
651
+ <edge from-layer="39" from-port="0" to-layer="40" to-port="4" />
652
+ <edge from-layer="40" from-port="6" to-layer="41" to-port="0" />
653
+ <edge from-layer="40" from-port="5" to-layer="44" to-port="0" />
654
+ <edge from-layer="41" from-port="1" to-layer="42" to-port="0" />
655
+ <edge from-layer="42" from-port="1" to-layer="43" to-port="0" />
656
+ <edge from-layer="44" from-port="1" to-layer="45" to-port="0" />
657
+ </edges>
658
+ <rt_info>
659
+ <add_attention_mask value="True" />
660
+ <add_prefix_space />
661
+ <add_special_tokens value="True" />
662
+ <bos_token_id value="100257" />
663
+ <chat_template value="{% for message in messages %}{% if (message['role'] == 'system') %}{{'&lt;|im_start|>system&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% elif (message['role'] == 'user') %}{{'&lt;|im_start|>user&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'&lt;|im_start|>assistant&lt;|im_sep|>' + message['content'] + '&lt;|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '&lt;|im_start|>assistant&lt;|im_sep|>' }}{% endif %}" />
664
+ <clean_up_tokenization_spaces />
665
+ <detokenizer_input_type value="i64" />
666
+ <eos_token_id value="100265" />
667
+ <handle_special_tokens_with_re />
668
+ <max_length />
669
+ <number_of_inputs value="1" />
670
+ <openvino_tokenizers_version value="2025.1.0.0-523-710ddf14de8" />
671
+ <openvino_version value="2025.1.0-18503-6fec06580ab-releases/2025/1" />
672
+ <original_tokenizer_class value="&lt;class 'transformers.models.gpt2.tokenization_gpt2_fast.GPT2TokenizerFast'>" />
673
+ <pad_token_id value="100349" />
674
+ <sentencepiece_version value="0.2.0" />
675
+ <skip_special_tokens value="True" />
676
+ <streaming_detokenizer value="False" />
677
+ <tiktoken_version value="0.9.0" />
678
+ <tokenizer_output_type value="i64" />
679
+ <tokenizers_version value="0.21.1" />
680
+ <transformers_version value="4.51.3" />
681
+ <use_max_padding value="False" />
682
+ <use_sentencepiece_backend value="False" />
683
+ <utf8_replace_mode value="replace" />
684
+ <with_detokenizer value="True" />
685
+ </rt_info>
686
+ </net>
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": true,
5
+ "normalized": false,
6
+ "rstrip": true,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": true,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|dummy_85|>",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": "<|endoftext|>"
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,782 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "100256": {
5
+ "content": "<|dummy_0|>",
6
+ "lstrip": true,
7
+ "normalized": false,
8
+ "rstrip": true,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "100257": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": true,
15
+ "normalized": false,
16
+ "rstrip": true,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "100258": {
21
+ "content": "<|fim_prefix|>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": true,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "100259": {
29
+ "content": "<|fim_middle|>",
30
+ "lstrip": true,
31
+ "normalized": false,
32
+ "rstrip": true,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "100260": {
37
+ "content": "<|fim_suffix|>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": true,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "100261": {
45
+ "content": "<|dummy_1|>",
46
+ "lstrip": true,
47
+ "normalized": false,
48
+ "rstrip": true,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "100262": {
53
+ "content": "<|dummy_2|>",
54
+ "lstrip": true,
55
+ "normalized": false,
56
+ "rstrip": true,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "100263": {
61
+ "content": "<|dummy_3|>",
62
+ "lstrip": true,
63
+ "normalized": false,
64
+ "rstrip": true,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "100264": {
69
+ "content": "<|im_start|>",
70
+ "lstrip": true,
71
+ "normalized": false,
72
+ "rstrip": true,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "100265": {
77
+ "content": "<|im_end|>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": true,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "100266": {
85
+ "content": "<|im_sep|>",
86
+ "lstrip": true,
87
+ "normalized": false,
88
+ "rstrip": true,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "100267": {
93
+ "content": "<|dummy_4|>",
94
+ "lstrip": true,
95
+ "normalized": false,
96
+ "rstrip": true,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "100268": {
101
+ "content": "<|dummy_5|>",
102
+ "lstrip": true,
103
+ "normalized": false,
104
+ "rstrip": true,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "100269": {
109
+ "content": "<|dummy_6|>",
110
+ "lstrip": true,
111
+ "normalized": false,
112
+ "rstrip": true,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "100270": {
117
+ "content": "<|dummy_7|>",
118
+ "lstrip": true,
119
+ "normalized": false,
120
+ "rstrip": true,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "100271": {
125
+ "content": "<|dummy_8|>",
126
+ "lstrip": true,
127
+ "normalized": false,
128
+ "rstrip": true,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "100272": {
133
+ "content": "<|dummy_9|>",
134
+ "lstrip": true,
135
+ "normalized": false,
136
+ "rstrip": true,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "100273": {
141
+ "content": "<|dummy_10|>",
142
+ "lstrip": true,
143
+ "normalized": false,
144
+ "rstrip": true,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "100274": {
149
+ "content": "<|dummy_11|>",
150
+ "lstrip": true,
151
+ "normalized": false,
152
+ "rstrip": true,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "100275": {
157
+ "content": "<|dummy_12|>",
158
+ "lstrip": true,
159
+ "normalized": false,
160
+ "rstrip": true,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "100276": {
165
+ "content": "<|endofprompt|>",
166
+ "lstrip": true,
167
+ "normalized": false,
168
+ "rstrip": true,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "100277": {
173
+ "content": "<|dummy_13|>",
174
+ "lstrip": true,
175
+ "normalized": false,
176
+ "rstrip": true,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "100278": {
181
+ "content": "<|dummy_14|>",
182
+ "lstrip": true,
183
+ "normalized": false,
184
+ "rstrip": true,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "100279": {
189
+ "content": "<|dummy_15|>",
190
+ "lstrip": true,
191
+ "normalized": false,
192
+ "rstrip": true,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "100280": {
197
+ "content": "<|dummy_16|>",
198
+ "lstrip": true,
199
+ "normalized": false,
200
+ "rstrip": true,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "100281": {
205
+ "content": "<|dummy_17|>",
206
+ "lstrip": true,
207
+ "normalized": false,
208
+ "rstrip": true,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "100282": {
213
+ "content": "<|dummy_18|>",
214
+ "lstrip": true,
215
+ "normalized": false,
216
+ "rstrip": true,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "100283": {
221
+ "content": "<|dummy_19|>",
222
+ "lstrip": true,
223
+ "normalized": false,
224
+ "rstrip": true,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "100284": {
229
+ "content": "<|dummy_20|>",
230
+ "lstrip": true,
231
+ "normalized": false,
232
+ "rstrip": true,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "100285": {
237
+ "content": "<|dummy_21|>",
238
+ "lstrip": true,
239
+ "normalized": false,
240
+ "rstrip": true,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "100286": {
245
+ "content": "<|dummy_22|>",
246
+ "lstrip": true,
247
+ "normalized": false,
248
+ "rstrip": true,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "100287": {
253
+ "content": "<|dummy_23|>",
254
+ "lstrip": true,
255
+ "normalized": false,
256
+ "rstrip": true,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "100288": {
261
+ "content": "<|dummy_24|>",
262
+ "lstrip": true,
263
+ "normalized": false,
264
+ "rstrip": true,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "100289": {
269
+ "content": "<|dummy_25|>",
270
+ "lstrip": true,
271
+ "normalized": false,
272
+ "rstrip": true,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "100290": {
277
+ "content": "<|dummy_26|>",
278
+ "lstrip": true,
279
+ "normalized": false,
280
+ "rstrip": true,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "100291": {
285
+ "content": "<|dummy_27|>",
286
+ "lstrip": true,
287
+ "normalized": false,
288
+ "rstrip": true,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "100292": {
293
+ "content": "<|dummy_28|>",
294
+ "lstrip": true,
295
+ "normalized": false,
296
+ "rstrip": true,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "100293": {
301
+ "content": "<|dummy_29|>",
302
+ "lstrip": true,
303
+ "normalized": false,
304
+ "rstrip": true,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "100294": {
309
+ "content": "<|dummy_30|>",
310
+ "lstrip": true,
311
+ "normalized": false,
312
+ "rstrip": true,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "100295": {
317
+ "content": "<|dummy_31|>",
318
+ "lstrip": true,
319
+ "normalized": false,
320
+ "rstrip": true,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "100296": {
325
+ "content": "<|dummy_32|>",
326
+ "lstrip": true,
327
+ "normalized": false,
328
+ "rstrip": true,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "100297": {
333
+ "content": "<|dummy_33|>",
334
+ "lstrip": true,
335
+ "normalized": false,
336
+ "rstrip": true,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "100298": {
341
+ "content": "<|dummy_34|>",
342
+ "lstrip": true,
343
+ "normalized": false,
344
+ "rstrip": true,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "100299": {
349
+ "content": "<|dummy_35|>",
350
+ "lstrip": true,
351
+ "normalized": false,
352
+ "rstrip": true,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "100300": {
357
+ "content": "<|dummy_36|>",
358
+ "lstrip": true,
359
+ "normalized": false,
360
+ "rstrip": true,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "100301": {
365
+ "content": "<|dummy_37|>",
366
+ "lstrip": true,
367
+ "normalized": false,
368
+ "rstrip": true,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "100302": {
373
+ "content": "<|dummy_38|>",
374
+ "lstrip": true,
375
+ "normalized": false,
376
+ "rstrip": true,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "100303": {
381
+ "content": "<|dummy_39|>",
382
+ "lstrip": true,
383
+ "normalized": false,
384
+ "rstrip": true,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "100304": {
389
+ "content": "<|dummy_40|>",
390
+ "lstrip": true,
391
+ "normalized": false,
392
+ "rstrip": true,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "100305": {
397
+ "content": "<|dummy_41|>",
398
+ "lstrip": true,
399
+ "normalized": false,
400
+ "rstrip": true,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "100306": {
405
+ "content": "<|dummy_42|>",
406
+ "lstrip": true,
407
+ "normalized": false,
408
+ "rstrip": true,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "100307": {
413
+ "content": "<|dummy_43|>",
414
+ "lstrip": true,
415
+ "normalized": false,
416
+ "rstrip": true,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "100308": {
421
+ "content": "<|dummy_44|>",
422
+ "lstrip": true,
423
+ "normalized": false,
424
+ "rstrip": true,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "100309": {
429
+ "content": "<|dummy_45|>",
430
+ "lstrip": true,
431
+ "normalized": false,
432
+ "rstrip": true,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "100310": {
437
+ "content": "<|dummy_46|>",
438
+ "lstrip": true,
439
+ "normalized": false,
440
+ "rstrip": true,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "100311": {
445
+ "content": "<|dummy_47|>",
446
+ "lstrip": true,
447
+ "normalized": false,
448
+ "rstrip": true,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "100312": {
453
+ "content": "<|dummy_48|>",
454
+ "lstrip": true,
455
+ "normalized": false,
456
+ "rstrip": true,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "100313": {
461
+ "content": "<|dummy_49|>",
462
+ "lstrip": true,
463
+ "normalized": false,
464
+ "rstrip": true,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "100314": {
469
+ "content": "<|dummy_50|>",
470
+ "lstrip": true,
471
+ "normalized": false,
472
+ "rstrip": true,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "100315": {
477
+ "content": "<|dummy_51|>",
478
+ "lstrip": true,
479
+ "normalized": false,
480
+ "rstrip": true,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "100316": {
485
+ "content": "<|dummy_52|>",
486
+ "lstrip": true,
487
+ "normalized": false,
488
+ "rstrip": true,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "100317": {
493
+ "content": "<|dummy_53|>",
494
+ "lstrip": true,
495
+ "normalized": false,
496
+ "rstrip": true,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "100318": {
501
+ "content": "<|dummy_54|>",
502
+ "lstrip": true,
503
+ "normalized": false,
504
+ "rstrip": true,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "100319": {
509
+ "content": "<|dummy_55|>",
510
+ "lstrip": true,
511
+ "normalized": false,
512
+ "rstrip": true,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "100320": {
517
+ "content": "<|dummy_56|>",
518
+ "lstrip": true,
519
+ "normalized": false,
520
+ "rstrip": true,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "100321": {
525
+ "content": "<|dummy_57|>",
526
+ "lstrip": true,
527
+ "normalized": false,
528
+ "rstrip": true,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "100322": {
533
+ "content": "<|dummy_58|>",
534
+ "lstrip": true,
535
+ "normalized": false,
536
+ "rstrip": true,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "100323": {
541
+ "content": "<|dummy_59|>",
542
+ "lstrip": true,
543
+ "normalized": false,
544
+ "rstrip": true,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "100324": {
549
+ "content": "<|dummy_60|>",
550
+ "lstrip": true,
551
+ "normalized": false,
552
+ "rstrip": true,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "100325": {
557
+ "content": "<|dummy_61|>",
558
+ "lstrip": true,
559
+ "normalized": false,
560
+ "rstrip": true,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "100326": {
565
+ "content": "<|dummy_62|>",
566
+ "lstrip": true,
567
+ "normalized": false,
568
+ "rstrip": true,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "100327": {
573
+ "content": "<|dummy_63|>",
574
+ "lstrip": true,
575
+ "normalized": false,
576
+ "rstrip": true,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "100328": {
581
+ "content": "<|dummy_64|>",
582
+ "lstrip": true,
583
+ "normalized": false,
584
+ "rstrip": true,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "100329": {
589
+ "content": "<|dummy_65|>",
590
+ "lstrip": true,
591
+ "normalized": false,
592
+ "rstrip": true,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "100330": {
597
+ "content": "<|dummy_66|>",
598
+ "lstrip": true,
599
+ "normalized": false,
600
+ "rstrip": true,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "100331": {
605
+ "content": "<|dummy_67|>",
606
+ "lstrip": true,
607
+ "normalized": false,
608
+ "rstrip": true,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "100332": {
613
+ "content": "<|dummy_68|>",
614
+ "lstrip": true,
615
+ "normalized": false,
616
+ "rstrip": true,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "100333": {
621
+ "content": "<|dummy_69|>",
622
+ "lstrip": true,
623
+ "normalized": false,
624
+ "rstrip": true,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "100334": {
629
+ "content": "<|dummy_70|>",
630
+ "lstrip": true,
631
+ "normalized": false,
632
+ "rstrip": true,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "100335": {
637
+ "content": "<|dummy_71|>",
638
+ "lstrip": true,
639
+ "normalized": false,
640
+ "rstrip": true,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "100336": {
645
+ "content": "<|dummy_72|>",
646
+ "lstrip": true,
647
+ "normalized": false,
648
+ "rstrip": true,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "100337": {
653
+ "content": "<|dummy_73|>",
654
+ "lstrip": true,
655
+ "normalized": false,
656
+ "rstrip": true,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "100338": {
661
+ "content": "<|dummy_74|>",
662
+ "lstrip": true,
663
+ "normalized": false,
664
+ "rstrip": true,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "100339": {
669
+ "content": "<|dummy_75|>",
670
+ "lstrip": true,
671
+ "normalized": false,
672
+ "rstrip": true,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "100340": {
677
+ "content": "<|dummy_76|>",
678
+ "lstrip": true,
679
+ "normalized": false,
680
+ "rstrip": true,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "100341": {
685
+ "content": "<|dummy_77|>",
686
+ "lstrip": true,
687
+ "normalized": false,
688
+ "rstrip": true,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "100342": {
693
+ "content": "<|dummy_78|>",
694
+ "lstrip": true,
695
+ "normalized": false,
696
+ "rstrip": true,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "100343": {
701
+ "content": "<|dummy_79|>",
702
+ "lstrip": true,
703
+ "normalized": false,
704
+ "rstrip": true,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "100344": {
709
+ "content": "<|dummy_80|>",
710
+ "lstrip": true,
711
+ "normalized": false,
712
+ "rstrip": true,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "100345": {
717
+ "content": "<|dummy_81|>",
718
+ "lstrip": true,
719
+ "normalized": false,
720
+ "rstrip": true,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "100346": {
725
+ "content": "<|dummy_82|>",
726
+ "lstrip": true,
727
+ "normalized": false,
728
+ "rstrip": true,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "100347": {
733
+ "content": "<|dummy_83|>",
734
+ "lstrip": true,
735
+ "normalized": false,
736
+ "rstrip": true,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "100348": {
741
+ "content": "<|dummy_84|>",
742
+ "lstrip": true,
743
+ "normalized": false,
744
+ "rstrip": true,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "100349": {
749
+ "content": "<|dummy_85|>",
750
+ "lstrip": true,
751
+ "normalized": false,
752
+ "rstrip": true,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "100350": {
757
+ "content": "<|dummy_86|>",
758
+ "lstrip": true,
759
+ "normalized": false,
760
+ "rstrip": true,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "100351": {
765
+ "content": "<|dummy_87|>",
766
+ "lstrip": true,
767
+ "normalized": false,
768
+ "rstrip": true,
769
+ "single_word": false,
770
+ "special": true
771
+ }
772
+ },
773
+ "bos_token": "<|endoftext|>",
774
+ "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
775
+ "clean_up_tokenization_spaces": false,
776
+ "eos_token": "<|im_end|>",
777
+ "extra_special_tokens": {},
778
+ "model_max_length": 16384,
779
+ "pad_token": "<|dummy_85|>",
780
+ "tokenizer_class": "GPT2Tokenizer",
781
+ "unk_token": "<|endoftext|>"
782
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff