kvaishnavi commited on
Commit
2812032
·
1 Parent(s): 920220c

Upload Phi-4-mini-reasoning ONNX models

Browse files
Files changed (43) hide show
  1. .gitattributes +2 -1
  2. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json +3 -0
  3. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +3 -0
  4. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt +0 -0
  5. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx +3 -0
  6. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data +3 -0
  7. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json +3 -0
  8. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json +0 -0
  9. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json +3 -0
  10. cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json +3 -0
  11. gpu/gpu-int4-rtn-block-32/added_tokens.json +3 -0
  12. gpu/gpu-int4-rtn-block-32/genai_config.json +3 -0
  13. gpu/gpu-int4-rtn-block-32/merges.txt +0 -0
  14. gpu/gpu-int4-rtn-block-32/model.onnx +3 -0
  15. gpu/gpu-int4-rtn-block-32/model.onnx.data +3 -0
  16. gpu/gpu-int4-rtn-block-32/special_tokens_map.json +3 -0
  17. gpu/gpu-int4-rtn-block-32/tokenizer.json +3 -0
  18. gpu/gpu-int4-rtn-block-32/tokenizer_config.json +3 -0
  19. gpu/gpu-int4-rtn-block-32/vocab.json +3 -0
  20. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE +0 -0
  21. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md +0 -0
  22. npu/qnn-int4/added_tokens.json +3 -0
  23. npu/qnn-int4/config.json +3 -0
  24. npu/qnn-int4/genai_config.json +3 -0
  25. npu/qnn-int4/merges.txt +0 -0
  26. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin +0 -0
  27. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin +0 -0
  28. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin +0 -0
  29. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin +0 -0
  30. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx +0 -0
  31. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx +0 -0
  32. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx +0 -0
  33. {phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx +0 -0
  34. npu/qnn-int4/special_tokens_map.json +3 -0
  35. npu/qnn-int4/tokenizer.json +3 -0
  36. npu/qnn-int4/tokenizer_config.json +3 -0
  37. npu/qnn-int4/vocab.json +3 -0
  38. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json +0 -12
  39. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json +0 -144
  40. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json +0 -391
  41. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json +0 -30
  42. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json +0 -116
  43. phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json +0 -0
.gitattributes CHANGED
@@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
37
+ *.json filter=lfs diff=lfs merge=lfs -text
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
3
+ size 249
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fcfa1e663f2bc867f8dc62fae65dd0924f0a4d68b43d1234df742dd19171470
3
+ size 1520
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/merges.txt RENAMED
File without changes
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929ee60b9dfcca848a2c62c6533e18c6e74d9d657e01f577e5b54602c02e5ad5
3
+ size 52119126
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd04a1bdfd475e5909a7a90a948b4fb425b4a565293ec36e34df7ac8c39fe8a4
3
+ size 4856573952
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
3
+ size 587
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4}/tokenizer.json RENAMED
File without changes
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
3
+ size 3216
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
3
+ size 3910310
gpu/gpu-int4-rtn-block-32/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
3
+ size 249
gpu/gpu-int4-rtn-block-32/genai_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44539bd5e6a1a53e451698216f326b3f030f206ccea77b35ca5a4137df436835
3
+ size 1569
gpu/gpu-int4-rtn-block-32/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
gpu/gpu-int4-rtn-block-32/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ab70c351e50301c1cf4cca3f23176bba73e43f0d94cea26d471ba43b898f65
3
+ size 287586
gpu/gpu-int4-rtn-block-32/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fc3832908b14538d2d53597507a10160e1307c45e153fa5822d82e6248471c3
3
+ size 3413194752
gpu/gpu-int4-rtn-block-32/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
3
+ size 587
gpu/gpu-int4-rtn-block-32/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
3
+ size 15524194
gpu/gpu-int4-rtn-block-32/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d7cf2abc1139ffc61368dbbacd521d1be29baa30e461382f3bea947cba16ec
3
+ size 3216
gpu/gpu-int4-rtn-block-32/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
3
+ size 3910310
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/LICENSE RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/README.md RENAMED
File without changes
npu/qnn-int4/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f2aceb0f20b71dd1f4bcc7e052e4412946bf281840b8f83d39f259571af486
3
+ size 249
npu/qnn-int4/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac65d86061d3d0d704ee2511fd0eb8713ef19eb6eedba17c3080a4165d5b933b
3
+ size 2504
npu/qnn-int4/genai_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cf1863737bcd75cb59c56e8b505ed316132162a47689f85ff52556471dd6698
3
+ size 16942
npu/qnn-int4/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_1.bin RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_2.bin RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_3.bin RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_cb_4.bin RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_ctx.onnx_ctx.onnx RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_embeddings.all.quant.onnx RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_iter.onnx_ctx.onnx RENAMED
File without changes
{phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx → npu/qnn-int4}/phi_4_mini_lm_head.all.quant.onnx RENAMED
File without changes
npu/qnn-int4/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff38493227d813e29fcf8406e8e90062f1f031aa47d589325e9c31d89ac7cc3
3
+ size 587
npu/qnn-int4/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08ed885956f70d877a4d9078ec9e3119d8b68a8d579003e230be18cad66911c
3
+ size 15524194
npu/qnn-int4/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dcb238a84365a07e19dd4334f82da442eea64da30ecbfa3718c06557ea3a589
3
+ size 3323
npu/qnn-int4/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb65a857824fa6615bb1782d95d882617a8bbce1da0317118586b36f39e98bd
3
+ size 3910310
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/added_tokens.json DELETED
@@ -1,12 +0,0 @@
1
- {
2
- "<|/tool_call|>": 200026,
3
- "<|/tool|>": 200024,
4
- "<|assistant|>": 200019,
5
- "<|end|>": 200020,
6
- "<|system|>": 200022,
7
- "<|tag|>": 200028,
8
- "<|tool_call|>": 200025,
9
- "<|tool_response|>": 200027,
10
- "<|tool|>": 200023,
11
- "<|user|>": 200021
12
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/config.json DELETED
@@ -1,144 +0,0 @@
1
- {
2
- "_name_or_path": "Phi-4-mini-instruct",
3
- "architectures": [
4
- "Phi3ForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "auto_map": {
9
- "AutoConfig": "configuration_phi3.Phi3Config",
10
- "AutoModelForCausalLM": "modeling_phi3.Phi3ForCausalLM",
11
- "AutoTokenizer": "Xenova/gpt-4o"
12
- },
13
- "bos_token_id": 199999,
14
- "embd_pdrop": 0.0,
15
- "eos_token_id": 199999,
16
- "full_attn_mod": 1,
17
- "hidden_act": "silu",
18
- "hidden_size": 3072,
19
- "initializer_range": 0.02,
20
- "intermediate_size": 8192,
21
- "interpolate_factor": 1,
22
- "lm_head_bias": false,
23
- "max_position_embeddings": 131072,
24
- "mlp_bias": false,
25
- "model_type": "phi3",
26
- "num_attention_heads": 24,
27
- "num_hidden_layers": 32,
28
- "num_key_value_heads": 8,
29
- "original_max_position_embeddings": 4096,
30
- "pad_token_id": 199999,
31
- "partial_rotary_factor": 0.75,
32
- "resid_pdrop": 0.0,
33
- "rms_norm_eps": 1e-05,
34
- "rope_scaling": {
35
- "long_factor": [
36
- 1,
37
- 1.118320672,
38
- 1.250641126,
39
- 1.398617824,
40
- 1.564103225,
41
- 1.74916897,
42
- 1.956131817,
43
- 2.187582649,
44
- 2.446418898,
45
- 2.735880826,
46
- 3.059592084,
47
- 3.421605075,
48
- 3.826451687,
49
- 4.279200023,
50
- 4.785517845,
51
- 5.351743533,
52
- 5.984965424,
53
- 6.693110555,
54
- 7.485043894,
55
- 8.370679318,
56
- 9.36110372,
57
- 10.4687158,
58
- 11.70738129,
59
- 13.09260651,
60
- 14.64173252,
61
- 16.37415215,
62
- 18.31155283,
63
- 20.47818807,
64
- 22.90118105,
65
- 25.61086418,
66
- 28.64115884,
67
- 32.03,
68
- 32.1,
69
- 32.13,
70
- 32.23,
71
- 32.6,
72
- 32.61,
73
- 32.64,
74
- 32.66,
75
- 32.7,
76
- 32.71,
77
- 32.93,
78
- 32.97,
79
- 33.28,
80
- 33.49,
81
- 33.5,
82
- 44.16,
83
- 47.77
84
- ],
85
- "short_factor": [
86
- 1.0,
87
- 1.0,
88
- 1.0,
89
- 1.0,
90
- 1.0,
91
- 1.0,
92
- 1.0,
93
- 1.0,
94
- 1.0,
95
- 1.0,
96
- 1.0,
97
- 1.0,
98
- 1.0,
99
- 1.0,
100
- 1.0,
101
- 1.0,
102
- 1.0,
103
- 1.0,
104
- 1.0,
105
- 1.0,
106
- 1.0,
107
- 1.0,
108
- 1.0,
109
- 1.0,
110
- 1.0,
111
- 1.0,
112
- 1.0,
113
- 1.0,
114
- 1.0,
115
- 1.0,
116
- 1.0,
117
- 1.0,
118
- 1.0,
119
- 1.0,
120
- 1.0,
121
- 1.0,
122
- 1.0,
123
- 1.0,
124
- 1.0,
125
- 1.0,
126
- 1.0,
127
- 1.0,
128
- 1.0,
129
- 1.0,
130
- 1.0,
131
- 1.0,
132
- 1.0,
133
- 1.0
134
- ],
135
- "type": "longrope"
136
- },
137
- "rope_theta": 10000.0,
138
- "sliding_window": 262144,
139
- "tie_word_embeddings": true,
140
- "torch_dtype": "bfloat16",
141
- "transformers_version": "4.45.0",
142
- "use_cache": true,
143
- "vocab_size": 200064
144
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/genai_config.json DELETED
@@ -1,391 +0,0 @@
1
- {
2
- "model": {
3
- "bos_token_id": 199999,
4
- "context_length": 4096,
5
- "decoder": {
6
- "session_options": {
7
- "log_id": "onnxruntime-genai",
8
- "provider_options": []
9
- },
10
- "head_size": 128,
11
- "hidden_size": 3072,
12
- "inputs": {
13
- "input_ids": "input_ids",
14
- "attention_mask": "attention_mask",
15
- "past_key_names": "past_keys_%d",
16
- "past_value_names": "past_values_%d",
17
- "past_sequence_length": "past_seq_len",
18
- "total_sequence_length": "total_seq_len"
19
- },
20
- "outputs": {
21
- "logits": "logits",
22
- "present_key_names": "present_keys_%d",
23
- "present_value_names": "present_values_%d"
24
- },
25
- "num_attention_heads": 24,
26
- "num_hidden_layers": 32,
27
- "num_key_value_heads": 8,
28
- "sliding_window": {
29
- "window_size": 64,
30
- "pad_value": 0,
31
- "alignment": "left",
32
- "slide_key_value_cache": false
33
- },
34
- "pipeline": [
35
- {
36
- "embedding": {
37
- "filename": "phi_4_mini_embeddings.all.quant.onnx",
38
- "inputs": [
39
- "input_ids"
40
- ],
41
- "outputs": [
42
- "input_hidden_states"
43
- ]
44
- },
45
- "prompt-processor": {
46
- "filename": "phi_4_mini_ctx.onnx_ctx.onnx",
47
- "session_options": {
48
- "log_id": "onnxruntime-genai.prompt-processor",
49
- "provider_options": [
50
- {
51
- "qnn": {
52
- "backend_path": "QnnHtp.dll",
53
- "htp_performance_mode": "burst",
54
- "htp_graph_finalization_optimization_mode": "3",
55
- "soc_model": "60"
56
- }
57
- }
58
- ],
59
- "intra_op_num_threads": 12,
60
- "inter_op_num_threads": 1
61
- },
62
- "inputs": [
63
- "past_keys_0",
64
- "past_values_0",
65
- "past_keys_1",
66
- "past_values_1",
67
- "past_keys_2",
68
- "past_values_2",
69
- "past_keys_3",
70
- "past_values_3",
71
- "past_keys_4",
72
- "past_values_4",
73
- "past_keys_5",
74
- "past_values_5",
75
- "past_keys_6",
76
- "past_values_6",
77
- "past_keys_7",
78
- "past_values_7",
79
- "past_keys_8",
80
- "past_values_8",
81
- "past_keys_9",
82
- "past_values_9",
83
- "past_keys_10",
84
- "past_values_10",
85
- "past_keys_11",
86
- "past_values_11",
87
- "past_keys_12",
88
- "past_values_12",
89
- "past_keys_13",
90
- "past_values_13",
91
- "past_keys_14",
92
- "past_values_14",
93
- "past_keys_15",
94
- "past_values_15",
95
- "past_keys_16",
96
- "past_values_16",
97
- "past_keys_17",
98
- "past_values_17",
99
- "past_keys_18",
100
- "past_values_18",
101
- "past_keys_19",
102
- "past_values_19",
103
- "past_keys_20",
104
- "past_values_20",
105
- "past_keys_21",
106
- "past_values_21",
107
- "past_keys_22",
108
- "past_values_22",
109
- "past_keys_23",
110
- "past_values_23",
111
- "past_keys_24",
112
- "past_values_24",
113
- "past_keys_25",
114
- "past_values_25",
115
- "past_keys_26",
116
- "past_values_26",
117
- "past_keys_27",
118
- "past_values_27",
119
- "past_keys_28",
120
- "past_values_28",
121
- "past_keys_29",
122
- "past_values_29",
123
- "past_keys_30",
124
- "past_values_30",
125
- "past_keys_31",
126
- "past_values_31",
127
- "input_hidden_states",
128
- "past_seq_len",
129
- "total_seq_len"
130
- ],
131
- "outputs": [
132
- "output_hidden_states",
133
- "present_keys_0",
134
- "present_values_0",
135
- "present_keys_1",
136
- "present_values_1",
137
- "present_keys_2",
138
- "present_values_2",
139
- "present_keys_3",
140
- "present_values_3",
141
- "present_keys_4",
142
- "present_values_4",
143
- "present_keys_5",
144
- "present_values_5",
145
- "present_keys_6",
146
- "present_values_6",
147
- "present_keys_7",
148
- "present_values_7",
149
- "present_keys_8",
150
- "present_values_8",
151
- "present_keys_9",
152
- "present_values_9",
153
- "present_keys_10",
154
- "present_values_10",
155
- "present_keys_11",
156
- "present_values_11",
157
- "present_keys_12",
158
- "present_values_12",
159
- "present_keys_13",
160
- "present_values_13",
161
- "present_keys_14",
162
- "present_values_14",
163
- "present_keys_15",
164
- "present_values_15",
165
- "present_keys_16",
166
- "present_values_16",
167
- "present_keys_17",
168
- "present_values_17",
169
- "present_keys_18",
170
- "present_values_18",
171
- "present_keys_19",
172
- "present_values_19",
173
- "present_keys_20",
174
- "present_values_20",
175
- "present_keys_21",
176
- "present_values_21",
177
- "present_keys_22",
178
- "present_values_22",
179
- "present_keys_23",
180
- "present_values_23",
181
- "present_keys_24",
182
- "present_values_24",
183
- "present_keys_25",
184
- "present_values_25",
185
- "present_keys_26",
186
- "present_values_26",
187
- "present_keys_27",
188
- "present_values_27",
189
- "present_keys_28",
190
- "present_values_28",
191
- "present_keys_29",
192
- "present_values_29",
193
- "present_keys_30",
194
- "present_values_30",
195
- "present_keys_31",
196
- "present_values_31"
197
- ],
198
- "run_on_token_gen": false
199
- },
200
- "token-generator": {
201
- "filename": "phi_4_mini_iter.onnx_ctx.onnx",
202
- "session_options": {
203
- "log_id": "onnxruntime-genai.token-generator",
204
- "provider_options": [
205
- {
206
- "qnn": {
207
- "backend_path": "QnnHtp.dll",
208
- "htp_performance_mode": "burst",
209
- "htp_graph_finalization_optimization_mode": "3",
210
- "soc_model": "60"
211
- }
212
- }
213
- ],
214
- "intra_op_num_threads": 12,
215
- "inter_op_num_threads": 1
216
- },
217
- "inputs": [
218
- "past_keys_0",
219
- "past_values_0",
220
- "past_keys_1",
221
- "past_values_1",
222
- "past_keys_2",
223
- "past_values_2",
224
- "past_keys_3",
225
- "past_values_3",
226
- "past_keys_4",
227
- "past_values_4",
228
- "past_keys_5",
229
- "past_values_5",
230
- "past_keys_6",
231
- "past_values_6",
232
- "past_keys_7",
233
- "past_values_7",
234
- "past_keys_8",
235
- "past_values_8",
236
- "past_keys_9",
237
- "past_values_9",
238
- "past_keys_10",
239
- "past_values_10",
240
- "past_keys_11",
241
- "past_values_11",
242
- "past_keys_12",
243
- "past_values_12",
244
- "past_keys_13",
245
- "past_values_13",
246
- "past_keys_14",
247
- "past_values_14",
248
- "past_keys_15",
249
- "past_values_15",
250
- "past_keys_16",
251
- "past_values_16",
252
- "past_keys_17",
253
- "past_values_17",
254
- "past_keys_18",
255
- "past_values_18",
256
- "past_keys_19",
257
- "past_values_19",
258
- "past_keys_20",
259
- "past_values_20",
260
- "past_keys_21",
261
- "past_values_21",
262
- "past_keys_22",
263
- "past_values_22",
264
- "past_keys_23",
265
- "past_values_23",
266
- "past_keys_24",
267
- "past_values_24",
268
- "past_keys_25",
269
- "past_values_25",
270
- "past_keys_26",
271
- "past_values_26",
272
- "past_keys_27",
273
- "past_values_27",
274
- "past_keys_28",
275
- "past_values_28",
276
- "past_keys_29",
277
- "past_values_29",
278
- "past_keys_30",
279
- "past_values_30",
280
- "past_keys_31",
281
- "past_values_31",
282
- "input_hidden_states",
283
- "past_seq_len",
284
- "total_seq_len"
285
- ],
286
- "outputs": [
287
- "output_hidden_states",
288
- "present_keys_0",
289
- "present_values_0",
290
- "present_keys_1",
291
- "present_values_1",
292
- "present_keys_2",
293
- "present_values_2",
294
- "present_keys_3",
295
- "present_values_3",
296
- "present_keys_4",
297
- "present_values_4",
298
- "present_keys_5",
299
- "present_values_5",
300
- "present_keys_6",
301
- "present_values_6",
302
- "present_keys_7",
303
- "present_values_7",
304
- "present_keys_8",
305
- "present_values_8",
306
- "present_keys_9",
307
- "present_values_9",
308
- "present_keys_10",
309
- "present_values_10",
310
- "present_keys_11",
311
- "present_values_11",
312
- "present_keys_12",
313
- "present_values_12",
314
- "present_keys_13",
315
- "present_values_13",
316
- "present_keys_14",
317
- "present_values_14",
318
- "present_keys_15",
319
- "present_values_15",
320
- "present_keys_16",
321
- "present_values_16",
322
- "present_keys_17",
323
- "present_values_17",
324
- "present_keys_18",
325
- "present_values_18",
326
- "present_keys_19",
327
- "present_values_19",
328
- "present_keys_20",
329
- "present_values_20",
330
- "present_keys_21",
331
- "present_values_21",
332
- "present_keys_22",
333
- "present_values_22",
334
- "present_keys_23",
335
- "present_values_23",
336
- "present_keys_24",
337
- "present_values_24",
338
- "present_keys_25",
339
- "present_values_25",
340
- "present_keys_26",
341
- "present_values_26",
342
- "present_keys_27",
343
- "present_values_27",
344
- "present_keys_28",
345
- "present_values_28",
346
- "present_keys_29",
347
- "present_values_29",
348
- "present_keys_30",
349
- "present_values_30",
350
- "present_keys_31",
351
- "present_values_31"
352
- ],
353
- "run_on_prompt": false
354
- },
355
- "transformer-head": {
356
- "filename": "phi_4_mini_lm_head.all.quant.onnx",
357
- "inputs": [
358
- "output_hidden_states"
359
- ],
360
- "outputs": [
361
- "logits"
362
- ]
363
- }
364
- }
365
- ]
366
- },
367
- "eos_token_id": [
368
- 200020,
369
- 199999
370
- ],
371
- "pad_token_id": 199999,
372
- "type": "decoder-pipeline",
373
- "vocab_size": 200064
374
- },
375
- "search": {
376
- "diversity_penalty": 0.0,
377
- "do_sample": true,
378
- "early_stopping": true,
379
- "length_penalty": 1.0,
380
- "max_length": 4096,
381
- "min_length": 0,
382
- "no_repeat_ngram_size": 0,
383
- "num_beams": 1,
384
- "num_return_sequences": 1,
385
- "past_present_share_buffer": true,
386
- "repetition_penalty": 1.0,
387
- "temperature": 0.6,
388
- "top_k": 5,
389
- "top_p": 0.95
390
- }
391
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/special_tokens_map.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|endoftext|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<|endoftext|>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- }
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/tokenizer_config.json DELETED
@@ -1,116 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
- "add_prefix_space": false,
5
- "added_tokens_decoder": {
6
- "199999": {
7
- "content": "<|endoftext|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "200018": {
15
- "content": "<|endofprompt|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "200019": {
23
- "content": "<|assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": true,
27
- "single_word": false,
28
- "special": true
29
- },
30
- "200020": {
31
- "content": "<|end|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": true,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "200021": {
39
- "content": "<|user|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": true,
43
- "single_word": false,
44
- "special": true
45
- },
46
- "200022": {
47
- "content": "<|system|>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": true,
51
- "single_word": false,
52
- "special": true
53
- },
54
- "200023": {
55
- "content": "<|tool|>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": true,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "200024": {
63
- "content": "<|/tool|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": true,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "200025": {
71
- "content": "<|tool_call|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": true,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "200026": {
79
- "content": "<|/tool_call|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": true,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "200027": {
87
- "content": "<|tool_response|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": true,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "200028": {
95
- "content": "<|tag|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": true,
99
- "single_word": false,
100
- "special": true
101
- }
102
- },
103
- "bos_token": "<|endoftext|>",
104
- "chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
105
- "clean_up_tokenization_spaces": false,
106
- "eos_token": "<|endoftext|>",
107
- "extra_special_tokens": {},
108
- "max_length": 1024,
109
- "model_max_length": 128000,
110
- "pad_token": "<|endoftext|>",
111
- "stride": 0,
112
- "tokenizer_class": "GPT2Tokenizer",
113
- "truncation_side": "right",
114
- "truncation_strategy": "longest_first",
115
- "unk_token": "<|endoftext|>"
116
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
phi-4-mini-reasoning-onnx/phi-4-mini-reasoning-onnx/npu/qnn-phi4-mini-reasoning-onnx/vocab.json DELETED
The diff for this file is too large to render. See raw diff