Commit
·
2812032
1
Parent(s):
920220c
Upload Phi-4-mini-reasoning ONNX models
Browse files- .gitattributes +2 -1
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt +0 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json +0 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json +3 -0
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json +3 -0
- gpu/gpu-int4-rtn-block-32/added_tokens.json +3 -0
- gpu/gpu-int4-rtn-block-32/genai_config.json +3 -0
- gpu/gpu-int4-rtn-block-32/merges.txt +0 -0
- gpu/gpu-int4-rtn-block-32/model.onnx +3 -0
- gpu/gpu-int4-rtn-block-32/model.onnx.data +3 -0
- gpu/gpu-int4-rtn-block-32/special_tokens_map.json +3 -0
- gpu/gpu-int4-rtn-block-32/tokenizer.json +3 -0
- gpu/gpu-int4-rtn-block-32/tokenizer_config.json +3 -0
- gpu/gpu-int4-rtn-block-32/vocab.json +3 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md +0 -0
- npu/qnn-int4/added_tokens.json +3 -0
- npu/qnn-int4/config.json +3 -0
- npu/qnn-int4/genai_config.json +3 -0
- npu/qnn-int4/merges.txt +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx +0 -0
- {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx +0 -0
- npu/qnn-int4/special_tokens_map.json +3 -0
- npu/qnn-int4/tokenizer.json +3 -0
- npu/qnn-int4/tokenizer_config.json +3 -0
- npu/qnn-int4/vocab.json +3 -0
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json +0 -12
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json +0 -144
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json +0 -391
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json +0 -30
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json +0 -116
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
-
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.onnx.data filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
|
3 |
+
size 249
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcfa1e663f2bc867f8dc62fae65dd0924f0a4d68b43d1234df742dd19171470
|
3 |
+
size 1520
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt
RENAMED
File without changes
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:929ee60b9dfcca848a2c62c6533e18c6e74d9d657e01f577e5b54602c02e5ad5
|
3 |
+
size 52119126
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd04a1bdfd475e5909a7a90a948b4fb425b4a565293ec36e34df7ac8c39fe8a4
|
3 |
+
size 4856573952
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
|
3 |
+
size 587
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json
RENAMED
File without changes
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
|
3 |
+
size 3216
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
|
3 |
+
size 3910310
|
gpu/gpu-int4-rtn-block-32/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
|
3 |
+
size 249
|
gpu/gpu-int4-rtn-block-32/genai_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44539bd5e6a1a53e451698216f326b3f030f206ccea77b35ca5a4137df436835
|
3 |
+
size 1569
|
gpu/gpu-int4-rtn-block-32/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
gpu/gpu-int4-rtn-block-32/model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59ab70c351e50301c1cf4cca3f23176bba73e43f0d94cea26d471ba43b898f65
|
3 |
+
size 287586
|
gpu/gpu-int4-rtn-block-32/model.onnx.data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fc3832908b14538d2d53597507a10160e1307c45e153fa5822d82e6248471c3
|
3 |
+
size 3413194752
|
gpu/gpu-int4-rtn-block-32/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
|
3 |
+
size 587
|
gpu/gpu-int4-rtn-block-32/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
|
3 |
+
size 15524194
|
gpu/gpu-int4-rtn-block-32/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
|
3 |
+
size 3216
|
gpu/gpu-int4-rtn-block-32/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
|
3 |
+
size 3910310
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md
RENAMED
File without changes
|
npu/qnn-int4/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
|
3 |
+
size 249
|
npu/qnn-int4/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac65d86061d3d0d704ee2511fd0eb8713ef19eb6eedba17c3080a4165d5b933b
|
3 |
+
size 2504
|
npu/qnn-int4/genai_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cf1863737bcd75cb59c56e8b505ed316132162a47689f85ff52556471dd6698
|
3 |
+
size 16942
|
npu/qnn-int4/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx
RENAMED
File without changes
|
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx
RENAMED
File without changes
|
npu/qnn-int4/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
|
3 |
+
size 587
|
npu/qnn-int4/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
|
3 |
+
size 15524194
|
npu/qnn-int4/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dcb238a84365a07e19dd4334f82da442eea64da30ecbfa3718c06557ea3a589
|
3 |
+
size 3323
|
npu/qnn-int4/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
|
3 |
+
size 3910310
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"<|/tool_call|>": 200026,
|
3 |
-
"<|/tool|>": 200024,
|
4 |
-
"<|assistant|>": 200019,
|
5 |
-
"<|end|>": 200020,
|
6 |
-
"<|system|>": 200022,
|
7 |
-
"<|tag|>": 200028,
|
8 |
-
"<|tool_call|>": 200025,
|
9 |
-
"<|tool_response|>": 200027,
|
10 |
-
"<|tool|>": 200023,
|
11 |
-
"<|user|>": 200021
|
12 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json
DELETED
@@ -1,144 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "Phi-4-mini-instruct",
|
3 |
-
"architectures": [
|
4 |
-
"Phi3ForCausalLM"
|
5 |
-
],
|
6 |
-
"attention_bias": false,
|
7 |
-
"attention_dropout": 0.0,
|
8 |
-
"auto_map": {
|
9 |
-
"AutoConfig": "configuration_phi3.Phi3Config",
|
10 |
-
"AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
|
11 |
-
"AutoTokenizer": "Xenova/gpt-4o"
|
12 |
-
},
|
13 |
-
"bos_token_id": 199999,
|
14 |
-
"embd_pdrop": 0.0,
|
15 |
-
"eos_token_id": 199999,
|
16 |
-
"full_attn_mod": 1,
|
17 |
-
"hidden_act": "silu",
|
18 |
-
"hidden_size": 3072,
|
19 |
-
"initializer_range": 0.02,
|
20 |
-
"intermediate_size": 8192,
|
21 |
-
"interpolate_factor": 1,
|
22 |
-
"lm_head_bias": false,
|
23 |
-
"max_position_embeddings": 131072,
|
24 |
-
"mlp_bias": false,
|
25 |
-
"model_type": "phi3",
|
26 |
-
"num_attention_heads": 24,
|
27 |
-
"num_hidden_layers": 32,
|
28 |
-
"num_key_value_heads": 8,
|
29 |
-
"original_max_position_embeddings": 4096,
|
30 |
-
"pad_token_id": 199999,
|
31 |
-
"partial_rotary_factor": 0.75,
|
32 |
-
"resid_pdrop": 0.0,
|
33 |
-
"rms_norm_eps": 1e-05,
|
34 |
-
"rope_scaling": {
|
35 |
-
"long_factor": [
|
36 |
-
1,
|
37 |
-
1.118320672,
|
38 |
-
1.250641126,
|
39 |
-
1.398617824,
|
40 |
-
1.564103225,
|
41 |
-
1.74916897,
|
42 |
-
1.956131817,
|
43 |
-
2.187582649,
|
44 |
-
2.446418898,
|
45 |
-
2.735880826,
|
46 |
-
3.059592084,
|
47 |
-
3.421605075,
|
48 |
-
3.826451687,
|
49 |
-
4.279200023,
|
50 |
-
4.785517845,
|
51 |
-
5.351743533,
|
52 |
-
5.984965424,
|
53 |
-
6.693110555,
|
54 |
-
7.485043894,
|
55 |
-
8.370679318,
|
56 |
-
9.36110372,
|
57 |
-
10.4687158,
|
58 |
-
11.70738129,
|
59 |
-
13.09260651,
|
60 |
-
14.64173252,
|
61 |
-
16.37415215,
|
62 |
-
18.31155283,
|
63 |
-
20.47818807,
|
64 |
-
22.90118105,
|
65 |
-
25.61086418,
|
66 |
-
28.64115884,
|
67 |
-
32.03,
|
68 |
-
32.1,
|
69 |
-
32.13,
|
70 |
-
32.23,
|
71 |
-
32.6,
|
72 |
-
32.61,
|
73 |
-
32.64,
|
74 |
-
32.66,
|
75 |
-
32.7,
|
76 |
-
32.71,
|
77 |
-
32.93,
|
78 |
-
32.97,
|
79 |
-
33.28,
|
80 |
-
33.49,
|
81 |
-
33.5,
|
82 |
-
44.16,
|
83 |
-
47.77
|
84 |
-
],
|
85 |
-
"short_factor": [
|
86 |
-
1.0,
|
87 |
-
1.0,
|
88 |
-
1.0,
|
89 |
-
1.0,
|
90 |
-
1.0,
|
91 |
-
1.0,
|
92 |
-
1.0,
|
93 |
-
1.0,
|
94 |
-
1.0,
|
95 |
-
1.0,
|
96 |
-
1.0,
|
97 |
-
1.0,
|
98 |
-
1.0,
|
99 |
-
1.0,
|
100 |
-
1.0,
|
101 |
-
1.0,
|
102 |
-
1.0,
|
103 |
-
1.0,
|
104 |
-
1.0,
|
105 |
-
1.0,
|
106 |
-
1.0,
|
107 |
-
1.0,
|
108 |
-
1.0,
|
109 |
-
1.0,
|
110 |
-
1.0,
|
111 |
-
1.0,
|
112 |
-
1.0,
|
113 |
-
1.0,
|
114 |
-
1.0,
|
115 |
-
1.0,
|
116 |
-
1.0,
|
117 |
-
1.0,
|
118 |
-
1.0,
|
119 |
-
1.0,
|
120 |
-
1.0,
|
121 |
-
1.0,
|
122 |
-
1.0,
|
123 |
-
1.0,
|
124 |
-
1.0,
|
125 |
-
1.0,
|
126 |
-
1.0,
|
127 |
-
1.0,
|
128 |
-
1.0,
|
129 |
-
1.0,
|
130 |
-
1.0,
|
131 |
-
1.0,
|
132 |
-
1.0,
|
133 |
-
1.0
|
134 |
-
],
|
135 |
-
"type": "longrope"
|
136 |
-
},
|
137 |
-
"rope_theta": 10000.0,
|
138 |
-
"sliding_window": 262144,
|
139 |
-
"tie_word_embeddings": true,
|
140 |
-
"torch_dtype": "bfloat16",
|
141 |
-
"transformers_version": "4.45.0",
|
142 |
-
"use_cache": true,
|
143 |
-
"vocab_size": 200064
|
144 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json
DELETED
@@ -1,391 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"model": {
|
3 |
-
"bos_token_id": 199999,
|
4 |
-
"context_length": 4096,
|
5 |
-
"decoder": {
|
6 |
-
"session_options": {
|
7 |
-
"log_id": "onnxruntime-genai",
|
8 |
-
"provider_options": []
|
9 |
-
},
|
10 |
-
"head_size": 128,
|
11 |
-
"hidden_size": 3072,
|
12 |
-
"inputs": {
|
13 |
-
"input_ids": "input_ids",
|
14 |
-
"attention_mask": "attention_mask",
|
15 |
-
"past_key_names": "past_keys_%d",
|
16 |
-
"past_value_names": "past_values_%d",
|
17 |
-
"past_sequence_length": "past_seq_len",
|
18 |
-
"total_sequence_length": "total_seq_len"
|
19 |
-
},
|
20 |
-
"outputs": {
|
21 |
-
"logits": "logits",
|
22 |
-
"present_key_names": "present_keys_%d",
|
23 |
-
"present_value_names": "present_values_%d"
|
24 |
-
},
|
25 |
-
"num_attention_heads": 24,
|
26 |
-
"num_hidden_layers": 32,
|
27 |
-
"num_key_value_heads": 8,
|
28 |
-
"sliding_window": {
|
29 |
-
"window_size": 64,
|
30 |
-
"pad_value": 0,
|
31 |
-
"alignment": "left",
|
32 |
-
"slide_key_value_cache": false
|
33 |
-
},
|
34 |
-
"pipeline": [
|
35 |
-
{
|
36 |
-
"embedding": {
|
37 |
-
"filename": "phi_4_mini_embeddings.all.quant.onnx",
|
38 |
-
"inputs": [
|
39 |
-
"input_ids"
|
40 |
-
],
|
41 |
-
"outputs": [
|
42 |
-
"input_hidden_states"
|
43 |
-
]
|
44 |
-
},
|
45 |
-
"prompt-processor": {
|
46 |
-
"filename": "phi_4_mini_ctx.onnx_ctx.onnx",
|
47 |
-
"session_options": {
|
48 |
-
"log_id": "onnxruntime-genai.prompt-processor",
|
49 |
-
"provider_options": [
|
50 |
-
{
|
51 |
-
"qnn": {
|
52 |
-
"backend_path": "QnnHtp.dll",
|
53 |
-
"htp_performance_mode": "burst",
|
54 |
-
"htp_graph_finalization_optimization_mode": "3",
|
55 |
-
"soc_model": "60"
|
56 |
-
}
|
57 |
-
}
|
58 |
-
],
|
59 |
-
"intra_op_num_threads": 12,
|
60 |
-
"inter_op_num_threads": 1
|
61 |
-
},
|
62 |
-
"inputs": [
|
63 |
-
"past_keys_0",
|
64 |
-
"past_values_0",
|
65 |
-
"past_keys_1",
|
66 |
-
"past_values_1",
|
67 |
-
"past_keys_2",
|
68 |
-
"past_values_2",
|
69 |
-
"past_keys_3",
|
70 |
-
"past_values_3",
|
71 |
-
"past_keys_4",
|
72 |
-
"past_values_4",
|
73 |
-
"past_keys_5",
|
74 |
-
"past_values_5",
|
75 |
-
"past_keys_6",
|
76 |
-
"past_values_6",
|
77 |
-
"past_keys_7",
|
78 |
-
"past_values_7",
|
79 |
-
"past_keys_8",
|
80 |
-
"past_values_8",
|
81 |
-
"past_keys_9",
|
82 |
-
"past_values_9",
|
83 |
-
"past_keys_10",
|
84 |
-
"past_values_10",
|
85 |
-
"past_keys_11",
|
86 |
-
"past_values_11",
|
87 |
-
"past_keys_12",
|
88 |
-
"past_values_12",
|
89 |
-
"past_keys_13",
|
90 |
-
"past_values_13",
|
91 |
-
"past_keys_14",
|
92 |
-
"past_values_14",
|
93 |
-
"past_keys_15",
|
94 |
-
"past_values_15",
|
95 |
-
"past_keys_16",
|
96 |
-
"past_values_16",
|
97 |
-
"past_keys_17",
|
98 |
-
"past_values_17",
|
99 |
-
"past_keys_18",
|
100 |
-
"past_values_18",
|
101 |
-
"past_keys_19",
|
102 |
-
"past_values_19",
|
103 |
-
"past_keys_20",
|
104 |
-
"past_values_20",
|
105 |
-
"past_keys_21",
|
106 |
-
"past_values_21",
|
107 |
-
"past_keys_22",
|
108 |
-
"past_values_22",
|
109 |
-
"past_keys_23",
|
110 |
-
"past_values_23",
|
111 |
-
"past_keys_24",
|
112 |
-
"past_values_24",
|
113 |
-
"past_keys_25",
|
114 |
-
"past_values_25",
|
115 |
-
"past_keys_26",
|
116 |
-
"past_values_26",
|
117 |
-
"past_keys_27",
|
118 |
-
"past_values_27",
|
119 |
-
"past_keys_28",
|
120 |
-
"past_values_28",
|
121 |
-
"past_keys_29",
|
122 |
-
"past_values_29",
|
123 |
-
"past_keys_30",
|
124 |
-
"past_values_30",
|
125 |
-
"past_keys_31",
|
126 |
-
"past_values_31",
|
127 |
-
"input_hidden_states",
|
128 |
-
"past_seq_len",
|
129 |
-
"total_seq_len"
|
130 |
-
],
|
131 |
-
"outputs": [
|
132 |
-
"output_hidden_states",
|
133 |
-
"present_keys_0",
|
134 |
-
"present_values_0",
|
135 |
-
"present_keys_1",
|
136 |
-
"present_values_1",
|
137 |
-
"present_keys_2",
|
138 |
-
"present_values_2",
|
139 |
-
"present_keys_3",
|
140 |
-
"present_values_3",
|
141 |
-
"present_keys_4",
|
142 |
-
"present_values_4",
|
143 |
-
"present_keys_5",
|
144 |
-
"present_values_5",
|
145 |
-
"present_keys_6",
|
146 |
-
"present_values_6",
|
147 |
-
"present_keys_7",
|
148 |
-
"present_values_7",
|
149 |
-
"present_keys_8",
|
150 |
-
"present_values_8",
|
151 |
-
"present_keys_9",
|
152 |
-
"present_values_9",
|
153 |
-
"present_keys_10",
|
154 |
-
"present_values_10",
|
155 |
-
"present_keys_11",
|
156 |
-
"present_values_11",
|
157 |
-
"present_keys_12",
|
158 |
-
"present_values_12",
|
159 |
-
"present_keys_13",
|
160 |
-
"present_values_13",
|
161 |
-
"present_keys_14",
|
162 |
-
"present_values_14",
|
163 |
-
"present_keys_15",
|
164 |
-
"present_values_15",
|
165 |
-
"present_keys_16",
|
166 |
-
"present_values_16",
|
167 |
-
"present_keys_17",
|
168 |
-
"present_values_17",
|
169 |
-
"present_keys_18",
|
170 |
-
"present_values_18",
|
171 |
-
"present_keys_19",
|
172 |
-
"present_values_19",
|
173 |
-
"present_keys_20",
|
174 |
-
"present_values_20",
|
175 |
-
"present_keys_21",
|
176 |
-
"present_values_21",
|
177 |
-
"present_keys_22",
|
178 |
-
"present_values_22",
|
179 |
-
"present_keys_23",
|
180 |
-
"present_values_23",
|
181 |
-
"present_keys_24",
|
182 |
-
"present_values_24",
|
183 |
-
"present_keys_25",
|
184 |
-
"present_values_25",
|
185 |
-
"present_keys_26",
|
186 |
-
"present_values_26",
|
187 |
-
"present_keys_27",
|
188 |
-
"present_values_27",
|
189 |
-
"present_keys_28",
|
190 |
-
"present_values_28",
|
191 |
-
"present_keys_29",
|
192 |
-
"present_values_29",
|
193 |
-
"present_keys_30",
|
194 |
-
"present_values_30",
|
195 |
-
"present_keys_31",
|
196 |
-
"present_values_31"
|
197 |
-
],
|
198 |
-
"run_on_token_gen": false
|
199 |
-
},
|
200 |
-
"token-generator": {
|
201 |
-
"filename": "phi_4_mini_iter.onnx_ctx.onnx",
|
202 |
-
"session_options": {
|
203 |
-
"log_id": "onnxruntime-genai.token-generator",
|
204 |
-
"provider_options": [
|
205 |
-
{
|
206 |
-
"qnn": {
|
207 |
-
"backend_path": "QnnHtp.dll",
|
208 |
-
"htp_performance_mode": "burst",
|
209 |
-
"htp_graph_finalization_optimization_mode": "3",
|
210 |
-
"soc_model": "60"
|
211 |
-
}
|
212 |
-
}
|
213 |
-
],
|
214 |
-
"intra_op_num_threads": 12,
|
215 |
-
"inter_op_num_threads": 1
|
216 |
-
},
|
217 |
-
"inputs": [
|
218 |
-
"past_keys_0",
|
219 |
-
"past_values_0",
|
220 |
-
"past_keys_1",
|
221 |
-
"past_values_1",
|
222 |
-
"past_keys_2",
|
223 |
-
"past_values_2",
|
224 |
-
"past_keys_3",
|
225 |
-
"past_values_3",
|
226 |
-
"past_keys_4",
|
227 |
-
"past_values_4",
|
228 |
-
"past_keys_5",
|
229 |
-
"past_values_5",
|
230 |
-
"past_keys_6",
|
231 |
-
"past_values_6",
|
232 |
-
"past_keys_7",
|
233 |
-
"past_values_7",
|
234 |
-
"past_keys_8",
|
235 |
-
"past_values_8",
|
236 |
-
"past_keys_9",
|
237 |
-
"past_values_9",
|
238 |
-
"past_keys_10",
|
239 |
-
"past_values_10",
|
240 |
-
"past_keys_11",
|
241 |
-
"past_values_11",
|
242 |
-
"past_keys_12",
|
243 |
-
"past_values_12",
|
244 |
-
"past_keys_13",
|
245 |
-
"past_values_13",
|
246 |
-
"past_keys_14",
|
247 |
-
"past_values_14",
|
248 |
-
"past_keys_15",
|
249 |
-
"past_values_15",
|
250 |
-
"past_keys_16",
|
251 |
-
"past_values_16",
|
252 |
-
"past_keys_17",
|
253 |
-
"past_values_17",
|
254 |
-
"past_keys_18",
|
255 |
-
"past_values_18",
|
256 |
-
"past_keys_19",
|
257 |
-
"past_values_19",
|
258 |
-
"past_keys_20",
|
259 |
-
"past_values_20",
|
260 |
-
"past_keys_21",
|
261 |
-
"past_values_21",
|
262 |
-
"past_keys_22",
|
263 |
-
"past_values_22",
|
264 |
-
"past_keys_23",
|
265 |
-
"past_values_23",
|
266 |
-
"past_keys_24",
|
267 |
-
"past_values_24",
|
268 |
-
"past_keys_25",
|
269 |
-
"past_values_25",
|
270 |
-
"past_keys_26",
|
271 |
-
"past_values_26",
|
272 |
-
"past_keys_27",
|
273 |
-
"past_values_27",
|
274 |
-
"past_keys_28",
|
275 |
-
"past_values_28",
|
276 |
-
"past_keys_29",
|
277 |
-
"past_values_29",
|
278 |
-
"past_keys_30",
|
279 |
-
"past_values_30",
|
280 |
-
"past_keys_31",
|
281 |
-
"past_values_31",
|
282 |
-
"input_hidden_states",
|
283 |
-
"past_seq_len",
|
284 |
-
"total_seq_len"
|
285 |
-
],
|
286 |
-
"outputs": [
|
287 |
-
"output_hidden_states",
|
288 |
-
"present_keys_0",
|
289 |
-
"present_values_0",
|
290 |
-
"present_keys_1",
|
291 |
-
"present_values_1",
|
292 |
-
"present_keys_2",
|
293 |
-
"present_values_2",
|
294 |
-
"present_keys_3",
|
295 |
-
"present_values_3",
|
296 |
-
"present_keys_4",
|
297 |
-
"present_values_4",
|
298 |
-
"present_keys_5",
|
299 |
-
"present_values_5",
|
300 |
-
"present_keys_6",
|
301 |
-
"present_values_6",
|
302 |
-
"present_keys_7",
|
303 |
-
"present_values_7",
|
304 |
-
"present_keys_8",
|
305 |
-
"present_values_8",
|
306 |
-
"present_keys_9",
|
307 |
-
"present_values_9",
|
308 |
-
"present_keys_10",
|
309 |
-
"present_values_10",
|
310 |
-
"present_keys_11",
|
311 |
-
"present_values_11",
|
312 |
-
"present_keys_12",
|
313 |
-
"present_values_12",
|
314 |
-
"present_keys_13",
|
315 |
-
"present_values_13",
|
316 |
-
"present_keys_14",
|
317 |
-
"present_values_14",
|
318 |
-
"present_keys_15",
|
319 |
-
"present_values_15",
|
320 |
-
"present_keys_16",
|
321 |
-
"present_values_16",
|
322 |
-
"present_keys_17",
|
323 |
-
"present_values_17",
|
324 |
-
"present_keys_18",
|
325 |
-
"present_values_18",
|
326 |
-
"present_keys_19",
|
327 |
-
"present_values_19",
|
328 |
-
"present_keys_20",
|
329 |
-
"present_values_20",
|
330 |
-
"present_keys_21",
|
331 |
-
"present_values_21",
|
332 |
-
"present_keys_22",
|
333 |
-
"present_values_22",
|
334 |
-
"present_keys_23",
|
335 |
-
"present_values_23",
|
336 |
-
"present_keys_24",
|
337 |
-
"present_values_24",
|
338 |
-
"present_keys_25",
|
339 |
-
"present_values_25",
|
340 |
-
"present_keys_26",
|
341 |
-
"present_values_26",
|
342 |
-
"present_keys_27",
|
343 |
-
"present_values_27",
|
344 |
-
"present_keys_28",
|
345 |
-
"present_values_28",
|
346 |
-
"present_keys_29",
|
347 |
-
"present_values_29",
|
348 |
-
"present_keys_30",
|
349 |
-
"present_values_30",
|
350 |
-
"present_keys_31",
|
351 |
-
"present_values_31"
|
352 |
-
],
|
353 |
-
"run_on_prompt": false
|
354 |
-
},
|
355 |
-
"transformer-head": {
|
356 |
-
"filename": "phi_4_mini_lm_head.all.quant.onnx",
|
357 |
-
"inputs": [
|
358 |
-
"output_hidden_states"
|
359 |
-
],
|
360 |
-
"outputs": [
|
361 |
-
"logits"
|
362 |
-
]
|
363 |
-
}
|
364 |
-
}
|
365 |
-
]
|
366 |
-
},
|
367 |
-
"eos_token_id": [
|
368 |
-
200020,
|
369 |
-
199999
|
370 |
-
],
|
371 |
-
"pad_token_id": 199999,
|
372 |
-
"type": "decoder-pipeline",
|
373 |
-
"vocab_size": 200064
|
374 |
-
},
|
375 |
-
"search": {
|
376 |
-
"diversity_penalty": 0.0,
|
377 |
-
"do_sample": true,
|
378 |
-
"early_stopping": true,
|
379 |
-
"length_penalty": 1.0,
|
380 |
-
"max_length": 4096,
|
381 |
-
"min_length": 0,
|
382 |
-
"no_repeat_ngram_size": 0,
|
383 |
-
"num_beams": 1,
|
384 |
-
"num_return_sequences": 1,
|
385 |
-
"past_present_share_buffer": true,
|
386 |
-
"repetition_penalty": 1.0,
|
387 |
-
"temperature": 0.6,
|
388 |
-
"top_k": 5,
|
389 |
-
"top_p": 0.95
|
390 |
-
}
|
391 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": {
|
3 |
-
"content": "<|endoftext|>",
|
4 |
-
"lstrip": false,
|
5 |
-
"normalized": false,
|
6 |
-
"rstrip": false,
|
7 |
-
"single_word": false
|
8 |
-
},
|
9 |
-
"eos_token": {
|
10 |
-
"content": "<|endoftext|>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": false,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"pad_token": {
|
17 |
-
"content": "<|endoftext|>",
|
18 |
-
"lstrip": false,
|
19 |
-
"normalized": false,
|
20 |
-
"rstrip": false,
|
21 |
-
"single_word": false
|
22 |
-
},
|
23 |
-
"unk_token": {
|
24 |
-
"content": "<|endoftext|>",
|
25 |
-
"lstrip": false,
|
26 |
-
"normalized": false,
|
27 |
-
"rstrip": false,
|
28 |
-
"single_word": false
|
29 |
-
}
|
30 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json
DELETED
@@ -1,116 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_bos_token": false,
|
3 |
-
"add_eos_token": false,
|
4 |
-
"add_prefix_space": false,
|
5 |
-
"added_tokens_decoder": {
|
6 |
-
"199999": {
|
7 |
-
"content": "<|endoftext|>",
|
8 |
-
"lstrip": false,
|
9 |
-
"normalized": false,
|
10 |
-
"rstrip": false,
|
11 |
-
"single_word": false,
|
12 |
-
"special": true
|
13 |
-
},
|
14 |
-
"200018": {
|
15 |
-
"content": "<|endofprompt|>",
|
16 |
-
"lstrip": false,
|
17 |
-
"normalized": false,
|
18 |
-
"rstrip": false,
|
19 |
-
"single_word": false,
|
20 |
-
"special": true
|
21 |
-
},
|
22 |
-
"200019": {
|
23 |
-
"content": "<|assistant|>",
|
24 |
-
"lstrip": false,
|
25 |
-
"normalized": false,
|
26 |
-
"rstrip": true,
|
27 |
-
"single_word": false,
|
28 |
-
"special": true
|
29 |
-
},
|
30 |
-
"200020": {
|
31 |
-
"content": "<|end|>",
|
32 |
-
"lstrip": false,
|
33 |
-
"normalized": false,
|
34 |
-
"rstrip": true,
|
35 |
-
"single_word": false,
|
36 |
-
"special": true
|
37 |
-
},
|
38 |
-
"200021": {
|
39 |
-
"content": "<|user|>",
|
40 |
-
"lstrip": false,
|
41 |
-
"normalized": false,
|
42 |
-
"rstrip": true,
|
43 |
-
"single_word": false,
|
44 |
-
"special": true
|
45 |
-
},
|
46 |
-
"200022": {
|
47 |
-
"content": "<|system|>",
|
48 |
-
"lstrip": false,
|
49 |
-
"normalized": false,
|
50 |
-
"rstrip": true,
|
51 |
-
"single_word": false,
|
52 |
-
"special": true
|
53 |
-
},
|
54 |
-
"200023": {
|
55 |
-
"content": "<|tool|>",
|
56 |
-
"lstrip": false,
|
57 |
-
"normalized": false,
|
58 |
-
"rstrip": true,
|
59 |
-
"single_word": false,
|
60 |
-
"special": false
|
61 |
-
},
|
62 |
-
"200024": {
|
63 |
-
"content": "<|/tool|>",
|
64 |
-
"lstrip": false,
|
65 |
-
"normalized": false,
|
66 |
-
"rstrip": true,
|
67 |
-
"single_word": false,
|
68 |
-
"special": false
|
69 |
-
},
|
70 |
-
"200025": {
|
71 |
-
"content": "<|tool_call|>",
|
72 |
-
"lstrip": false,
|
73 |
-
"normalized": false,
|
74 |
-
"rstrip": true,
|
75 |
-
"single_word": false,
|
76 |
-
"special": false
|
77 |
-
},
|
78 |
-
"200026": {
|
79 |
-
"content": "<|/tool_call|>",
|
80 |
-
"lstrip": false,
|
81 |
-
"normalized": false,
|
82 |
-
"rstrip": true,
|
83 |
-
"single_word": false,
|
84 |
-
"special": false
|
85 |
-
},
|
86 |
-
"200027": {
|
87 |
-
"content": "<|tool_response|>",
|
88 |
-
"lstrip": false,
|
89 |
-
"normalized": false,
|
90 |
-
"rstrip": true,
|
91 |
-
"single_word": false,
|
92 |
-
"special": false
|
93 |
-
},
|
94 |
-
"200028": {
|
95 |
-
"content": "<|tag|>",
|
96 |
-
"lstrip": false,
|
97 |
-
"normalized": false,
|
98 |
-
"rstrip": true,
|
99 |
-
"single_word": false,
|
100 |
-
"special": true
|
101 |
-
}
|
102 |
-
},
|
103 |
-
"bos_token": "<|endoftext|>",
|
104 |
-
"chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
|
105 |
-
"clean_up_tokenization_spaces": false,
|
106 |
-
"eos_token": "<|endoftext|>",
|
107 |
-
"extra_special_tokens": {},
|
108 |
-
"max_length": 1024,
|
109 |
-
"model_max_length": 128000,
|
110 |
-
"pad_token": "<|endoftext|>",
|
111 |
-
"stride": 0,
|
112 |
-
"tokenizer_class": "GPT2Tokenizer",
|
113 |
-
"truncation_side": "right",
|
114 |
-
"truncation_strategy": "longest_first",
|
115 |
-
"unk_token": "<|endoftext|>"
|
116 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|