Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +24 -0
- context_encoding_model/_tp0_bk0/graph.neff +3 -0
- context_encoding_model/_tp0_bk0/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff +3 -0
- context_encoding_model/_tp0_bk1/graph.neff +3 -0
- context_encoding_model/_tp0_bk1/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff +3 -0
- context_encoding_model/_tp0_bk2/graph.neff +3 -0
- context_encoding_model/_tp0_bk2/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff +3 -0
- context_encoding_model/_tp0_bk3/compile_flags.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.json +1 -0
- context_encoding_model/_tp0_bk3/global_metric_store.json +1079 -0
- context_encoding_model/_tp0_bk3/graph.neff +3 -0
- context_encoding_model/_tp0_bk3/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff +3 -0
- context_encoding_model/_tp0_bk3/neuron_config.json +213 -0
- context_encoding_model/_tp0_bk4/command.txt +1 -0
- context_encoding_model/_tp0_bk4/compile_flags.MODULE_d342327da795afc2aa68+5e8b788a.json +1 -0
- context_encoding_model/_tp0_bk4/global_metric_store.json +1079 -0
- context_encoding_model/_tp0_bk4/graph.neff +3 -0
- context_encoding_model/_tp0_bk4/log-neuron-cc.txt +0 -0
- context_encoding_model/_tp0_bk4/metaneff.pb +3 -0
- context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb +3 -0
- context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff +3 -0
- context_encoding_model/_tp0_bk4/neuron_config.json +213 -0
- layout_opt/command.txt +1 -0
- layout_opt/graph.neff +3 -0
- layout_opt/log-neuron-cc.txt +0 -0
- layout_opt/metaneff +874 -0
- layout_opt/model/graph.hlo +3 -0
- model.pt +3 -0
- token_generation_model/_tp0_bk0/graph.neff +3 -0
- token_generation_model/_tp0_bk0/metaneff.pb +3 -0
- token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.hlo_module.pb +3 -0
- token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff +3 -0
- token_generation_model/_tp0_bk0/wrapped_neff.hlo +3 -0
- token_generation_model/_tp0_bk1/graph.neff +3 -0
- token_generation_model/_tp0_bk1/metaneff.pb +3 -0
- token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.hlo_module.pb +3 -0
- token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff +3 -0
- token_generation_model/_tp0_bk2/graph.neff +3 -0
- token_generation_model/_tp0_bk2/metaneff.pb +3 -0
- token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.hlo_module.pb +3 -0
- token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff +3 -0
- token_generation_model/_tp0_bk3/graph.neff +3 -0
- token_generation_model/_tp0_bk3/metaneff.pb +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
token_generation_model/_tp0_bk3/graph.neff filter=lfs diff=lfs merge=lfs -text
|
38 |
+
token_generation_model/_tp0_bk3/model.MODULE_668122c92a86c0ce6817+f94fe8ed.neff filter=lfs diff=lfs merge=lfs -text
|
39 |
+
token_generation_model/_tp0_bk4/model.MODULE_fb6decaa94b1936d08da+1b5847e3.neff filter=lfs diff=lfs merge=lfs -text
|
40 |
+
token_generation_model/_tp0_bk4/graph.neff filter=lfs diff=lfs merge=lfs -text
|
41 |
+
token_generation_model/_tp0_bk0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
42 |
+
token_generation_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
|
43 |
+
token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff filter=lfs diff=lfs merge=lfs -text
|
44 |
+
token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff filter=lfs diff=lfs merge=lfs -text
|
45 |
+
token_generation_model/_tp0_bk2/graph.neff filter=lfs diff=lfs merge=lfs -text
|
46 |
+
token_generation_model/_tp0_bk1/graph.neff filter=lfs diff=lfs merge=lfs -text
|
47 |
+
token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff filter=lfs diff=lfs merge=lfs -text
|
48 |
+
context_encoding_model/_tp0_bk1/graph.neff filter=lfs diff=lfs merge=lfs -text
|
49 |
+
context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff filter=lfs diff=lfs merge=lfs -text
|
50 |
+
context_encoding_model/_tp0_bk2/graph.neff filter=lfs diff=lfs merge=lfs -text
|
51 |
+
context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff filter=lfs diff=lfs merge=lfs -text
|
52 |
+
context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff filter=lfs diff=lfs merge=lfs -text
|
53 |
+
context_encoding_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
|
54 |
+
context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff filter=lfs diff=lfs merge=lfs -text
|
55 |
+
context_encoding_model/_tp0_bk3/graph.neff filter=lfs diff=lfs merge=lfs -text
|
56 |
+
context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff filter=lfs diff=lfs merge=lfs -text
|
57 |
+
context_encoding_model/_tp0_bk4/graph.neff filter=lfs diff=lfs merge=lfs -text
|
58 |
+
layout_opt/graph.neff filter=lfs diff=lfs merge=lfs -text
|
59 |
+
layout_opt/model/graph.hlo filter=lfs diff=lfs merge=lfs -text
|
context_encoding_model/_tp0_bk0/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4204eb4a15cdd349ac9a8e820ca7e3720613827e792ac79e7a5dd1055080e37
|
3 |
+
size 625664
|
context_encoding_model/_tp0_bk0/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:026981247cc92ae3d4098052e6e5cd96444bcad2ad94540d0cedbaf5978e6a67
|
3 |
+
size 873633
|
context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b83c27a0c2c3a9734291ce7f47544f4494b27f1c8a6c5b171a2abaead1f7e45c
|
3 |
+
size 939543
|
context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4204eb4a15cdd349ac9a8e820ca7e3720613827e792ac79e7a5dd1055080e37
|
3 |
+
size 625664
|
context_encoding_model/_tp0_bk1/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97771a78aebed34c313542b68a55aa0b7ad1bcc196ef7859e9c6d32f2aca5755
|
3 |
+
size 728064
|
context_encoding_model/_tp0_bk1/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7919366f46c6b8e36ccac5f786a1a8c01287cf244d988f1b58a68be4356face6
|
3 |
+
size 971205
|
context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:925d5603c197f320b5a97bcd1bb270fee71d58a600ff2ac6d2ac1c4ce205b7b6
|
3 |
+
size 1037079
|
context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97771a78aebed34c313542b68a55aa0b7ad1bcc196ef7859e9c6d32f2aca5755
|
3 |
+
size 728064
|
context_encoding_model/_tp0_bk2/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:984d840f5e118d1ed3bba502a877aa785002b074a22a45b384cd2172958beb3f
|
3 |
+
size 1035264
|
context_encoding_model/_tp0_bk2/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0cb712ca363e2b1bd7dcf4027263b606df90abb05f672ae1e8fb4af5f2b3616
|
3 |
+
size 1167813
|
context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdf8c3a993bb11cf1a28c6df55c6f130c69c4908ba4dde20d7bbc2356b5f2f53
|
3 |
+
size 1233687
|
context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:984d840f5e118d1ed3bba502a877aa785002b074a22a45b384cd2172958beb3f
|
3 |
+
size 1035264
|
context_encoding_model/_tp0_bk3/compile_flags.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/models/mistral-7b-v0.3-instruct-neuronx/context_encoding_model/_tp0_bk3/log-neuron-cc.txt"]
|
context_encoding_model/_tp0_bk3/global_metric_store.json
ADDED
@@ -0,0 +1,1079 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Average": {
|
3 |
+
"tensorizer": {
|
4 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.79875946044922,
|
5 |
+
"StaticProfiler::AveragePartitionUtilization": 99.50694274902344,
|
6 |
+
"StaticProfiler::AveragePeUtilization": 99.19517517089844,
|
7 |
+
"StaticProfiler::LocalizationEfficiency": 80.37861633300781,
|
8 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 88.63314819335938,
|
9 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
10 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"Count": {
|
14 |
+
"tensorizer": {
|
15 |
+
"StaticProfiler::AverageFractalPeUtilization": 1.0,
|
16 |
+
"StaticProfiler::AveragePartitionUtilization": 1.0,
|
17 |
+
"StaticProfiler::AveragePeUtilization": 1.0,
|
18 |
+
"StaticProfiler::LocalizationEfficiency": 1.0,
|
19 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
|
20 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
|
21 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"Sum": {
|
25 |
+
"compiletime": {
|
26 |
+
"AGOrderingAnalysisPass": 0.01833963394165039,
|
27 |
+
"AffinePredicateResolution": 0.0011298656463623047,
|
28 |
+
"AliasDependencyElimination": 0.0003044605255126953,
|
29 |
+
"AliasDependencyInduction": 0.0059871673583984375,
|
30 |
+
"AliasDependencyReset": 0.024695634841918945,
|
31 |
+
"BFComputeCutting": 0.0022745132446289063,
|
32 |
+
"BirCodeGenLoop": 0.12499594688415527,
|
33 |
+
"CCOpFusion": 0.025257110595703125,
|
34 |
+
"CanonicalizeConv": 2.300000051036477e-05,
|
35 |
+
"CanonicalizeDAGForPGTiling": 0.0039975643157958984,
|
36 |
+
"CanonicalizeForTensorizer": 5.6000000768108293e-05,
|
37 |
+
"CanonicalizeIR": 0.0017023086547851563,
|
38 |
+
"Canonicalizer": 0.0013979999348521233,
|
39 |
+
"CoalesceCCOp": 0.006863117218017578,
|
40 |
+
"CommuteConcat": 0.0009205341339111328,
|
41 |
+
"DMALocalityOpt": 0.0034034252166748047,
|
42 |
+
"DMAProfiler": 0.007045269012451172,
|
43 |
+
"DMATilingProfiler": 0.004607439041137695,
|
44 |
+
"DataLocalityOpt": 0.15957880020141602,
|
45 |
+
"DataStreaming": 0.037320613861083984,
|
46 |
+
"DeConcat": 0.0007259845733642578,
|
47 |
+
"DeadCodeElimination": 0.0009546279907226563,
|
48 |
+
"DeadStoreElimination": 0.006250619888305664,
|
49 |
+
"DelinearIndices": 0.005332231521606445,
|
50 |
+
"Delinearization": 0.0033500194549560547,
|
51 |
+
"DoNothing": 0.00037598609924316406,
|
52 |
+
"DramToDramTranspose": 0.020763397216796875,
|
53 |
+
"DumpGraphAndMetadata": 0.025223493576049805,
|
54 |
+
"EliminateDivs": 0.0023469924926757813,
|
55 |
+
"ExpandBatchNorm": 0.001692056655883789,
|
56 |
+
"ExpandISAMacro": 0.009050607681274414,
|
57 |
+
"FactorizeBlkDims": 0.009798526763916016,
|
58 |
+
"FactorizeThreadAxesInFreeDims": 0.002184152603149414,
|
59 |
+
"FlattenMacroLoop": 0.0022482872009277344,
|
60 |
+
"GenericAccessSimplifier": 0.0009622573852539063,
|
61 |
+
"HoistCompute": 6.000000212225132e-06,
|
62 |
+
"IdentifyCrossPassTensors": 5.999999848427251e-05,
|
63 |
+
"InferInitValue": 0.027300357818603516,
|
64 |
+
"InferIntrinsicOnCC": 0.009199380874633789,
|
65 |
+
"InferNeuronTensor": 0.028067350387573242,
|
66 |
+
"InferNonlocalTensors": 0.014671802520751953,
|
67 |
+
"InferPSumTensor": 0.08141279220581055,
|
68 |
+
"InlineNativeKernels": 0.002727031707763672,
|
69 |
+
"InsertIOTransposes": 0.017727136611938477,
|
70 |
+
"InsertLocalTransposes": 0.004176616668701172,
|
71 |
+
"InsertOffloadedTransposes": 0.002771615982055664,
|
72 |
+
"LICM": 0.005248069763183594,
|
73 |
+
"LateLegalizeInst": 0.007282733917236328,
|
74 |
+
"LateLegalizePostSplit": 0.0045223236083984375,
|
75 |
+
"LateLowerReshapeOp": 0.0012927055358886719,
|
76 |
+
"LateLowerTensorOp": 0.0014028549194335938,
|
77 |
+
"LateNeuronInstComb": 0.016957759857177734,
|
78 |
+
"LayoutPreprocessing": 0.026221275329589844,
|
79 |
+
"LayoutPreprocessingAndAnalysis": 0.07468867301940918,
|
80 |
+
"LayoutRequirementAnalysis": 0.004823446273803711,
|
81 |
+
"LegalizeCCOpLayout": 0.0023353099822998047,
|
82 |
+
"LegalizeOpLevelAlias": 0.0013494491577148438,
|
83 |
+
"LegalizePartitionReduce": 0.0018906593322753906,
|
84 |
+
"LegalizeSundaAccess": 0.06240987777709961,
|
85 |
+
"LegalizeSundaMacro": 0.04256129264831543,
|
86 |
+
"LegalizeType": 0.006028175354003906,
|
87 |
+
"LocalLayoutOpt": 0.016018390655517578,
|
88 |
+
"LoopFusion": 0.005109071731567383,
|
89 |
+
"LoopSplitting": 0.00048542022705078125,
|
90 |
+
"LowerBroadcast": 0.003258943557739258,
|
91 |
+
"LowerCCOpBlockAxis": 0.0038700103759765625,
|
92 |
+
"LowerComplexBroadcast": 0.004511594772338867,
|
93 |
+
"LowerIntrinsics": 0.32482099533081055,
|
94 |
+
"LowerTensorOp": 0.010710477828979492,
|
95 |
+
"LowerTranspose": 0.054924726486206055,
|
96 |
+
"MacroGeneration": 0.061620473861694336,
|
97 |
+
"MaskPropagation": 0.002919435501098633,
|
98 |
+
"MemcastMotion": 3.400000059627928e-05,
|
99 |
+
"MemcpyElimination": 0.02559375762939453,
|
100 |
+
"MutateDataType": 0.0014896392822265625,
|
101 |
+
"NeuronAliasDependencyInduction": 0.0002808570861816406,
|
102 |
+
"NeuronAliasDependencyReset": 0.05649685859680176,
|
103 |
+
"NeuronInstComb": 0.005097627639770508,
|
104 |
+
"NeuronLICM": 0.014602899551391602,
|
105 |
+
"NeuronLoopFusion": 0.009732246398925781,
|
106 |
+
"NeuronLoopInterchange": 0.0025072097778320313,
|
107 |
+
"NeuronSimplifier": 0.03835606575012207,
|
108 |
+
"NeuronSimplifyPredicates": 0.009032487869262695,
|
109 |
+
"NeuronValueNumbering": 0.003210306167602539,
|
110 |
+
"OptimizeAliasedCopyChain": 0.0007545948028564453,
|
111 |
+
"OptimizeNKIKernels": 0.6443507671356201,
|
112 |
+
"PAGLayoutOpt": 0.20021605491638184,
|
113 |
+
"PComputeCutting": 0.0046160221099853516,
|
114 |
+
"PGLayoutTilingPipeline": 0.6925618648529053,
|
115 |
+
"PGTiling": 0.21065187454223633,
|
116 |
+
"PadElimination": 0.00038623809814453125,
|
117 |
+
"ParAxesAnnotation": 0.052834510803222656,
|
118 |
+
"PartialLoopFusion": 0.051622629165649414,
|
119 |
+
"PartialSimdFusion": 0.014065980911254883,
|
120 |
+
"PenguinizeFunctions": 5.199999941396527e-05,
|
121 |
+
"PerfectLoopNest": 0.0019462108612060547,
|
122 |
+
"PruneFunctions": 4.3000000005122274e-05,
|
123 |
+
"RecognizeOpIdiom": 0.0037450790405273438,
|
124 |
+
"Recompute": 0.0004031658172607422,
|
125 |
+
"RelaxPredicates": 0.03561973571777344,
|
126 |
+
"Rematerialization": 0.0018870830535888672,
|
127 |
+
"RemoveOptimizationBarriers": 6.500000017695129e-05,
|
128 |
+
"ReshapeWeights": 0.0009450912475585938,
|
129 |
+
"ResolveAccessConflict": 0.0038840770721435547,
|
130 |
+
"ResolveComplicatePredicates": 0.0011222362518310547,
|
131 |
+
"RewriteReplicationMatmul": 0.0017135143280029297,
|
132 |
+
"RewriteWeights": 0.0024623870849609375,
|
133 |
+
"SFKVectorizer": 0.19468188285827637,
|
134 |
+
"ScatterMotion": 1.0000000656873453e-05,
|
135 |
+
"SimpleAllReduceTiling": 0.0037994384765625,
|
136 |
+
"Simplifier": 0.0030031204223632813,
|
137 |
+
"SimplifyMacroPredicates": 0.005193233489990234,
|
138 |
+
"SimplifyNeuronTensor": 0.38555216789245605,
|
139 |
+
"SimplifySlice": 0.001062631607055664,
|
140 |
+
"SimplifyTensor": 0.009534358978271484,
|
141 |
+
"SpillPSum": 0.053937673568725586,
|
142 |
+
"SplitAPUnionSets": 0.013537406921386719,
|
143 |
+
"SplitAccGrp": 0.0014171600341796875,
|
144 |
+
"StaticProfiler": 0.005720615386962891,
|
145 |
+
"StaticTransposeLocalTensor": 0.003614664077758789,
|
146 |
+
"SundaISel": 0.09031486511230469,
|
147 |
+
"TCTransform": 0.0008947849273681641,
|
148 |
+
"TensorInitialization": 0.010958433151245117,
|
149 |
+
"TensorOpSimplifier": 0.005278110504150391,
|
150 |
+
"TensorOpTransform": 0.020787477493286133,
|
151 |
+
"TensorizerLegalizationPass": 6.299999949987978e-05,
|
152 |
+
"TileCCOps": 0.005544900894165039,
|
153 |
+
"TilingProfiler": 0.007747173309326172,
|
154 |
+
"TransformConvOp": 0.003238677978515625,
|
155 |
+
"TritiumFusion": 0.16130614280700684,
|
156 |
+
"ValueNumbering": 0.0018999576568603516,
|
157 |
+
"VectorizeDMA": 0.0017979145050048828,
|
158 |
+
"VectorizeMatMult": 0.007079362869262695,
|
159 |
+
"VerifySupportedOps": 4.900000203633681e-05,
|
160 |
+
"WeightCoalescing": 0.0033416748046875,
|
161 |
+
"ZeroSizeTensorElimination": 0.00022983551025390625,
|
162 |
+
"algsimp": 0.0024079999420791864,
|
163 |
+
"batchnorm_expander": 4.999999873689376e-05,
|
164 |
+
"boundary-marker-removal": 1.3999999282532372e-05,
|
165 |
+
"call-inliner": 0.0004330000083427876,
|
166 |
+
"canonicalize-boundary-marker": 1.6999998479150236e-05,
|
167 |
+
"collective-stream-id-checker": 9.699999645818025e-05,
|
168 |
+
"comparison-expander": 0.0005000000237487257,
|
169 |
+
"computation-deduplicator": 7.700000423938036e-05,
|
170 |
+
"conditional-to-select": 2.099999983329326e-05,
|
171 |
+
"config-lowering": 0.00019799999427050352,
|
172 |
+
"constant-statistics": 0.0005200000014156103,
|
173 |
+
"constant_folding": 0.000295000005280599,
|
174 |
+
"cse": 5.499999679159373e-05,
|
175 |
+
"dce": 8.099999831756577e-05,
|
176 |
+
"dot_decomposer": 0.0013620000099763274,
|
177 |
+
"dynamic-slice-transpose": 1.5999999959603883e-05,
|
178 |
+
"eliminate-redundant-compare": 0.00025499999173916876,
|
179 |
+
"emit-offloaded-dropout": 7.100000220816582e-05,
|
180 |
+
"flatten-call-graph": 0.0007510000141337514,
|
181 |
+
"fuse-send-recv": 8.70000003487803e-05,
|
182 |
+
"hilo::LegalizeAlias": 1.5999999959603883e-05,
|
183 |
+
"hilo::NeuronInstCombine": 0.00015199999324977398,
|
184 |
+
"hilo::NeuronOpFusion": 5.0000002374872565e-05,
|
185 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 6.70000008540228e-05,
|
186 |
+
"hilo::ScheduleFusion": 3.999999989900971e-06,
|
187 |
+
"hilo::SixtyFourHack": 7.599999662488699e-05,
|
188 |
+
"hilo::VerifyAliasing": 8.999999408842996e-06,
|
189 |
+
"hlo-mac-count": 0.0012550000101327896,
|
190 |
+
"hlo-verifier": 0.008069000206887722,
|
191 |
+
"instruction-histogram": 0.001006999984383583,
|
192 |
+
"io-con-pipe-begin": 7.999999979801942e-06,
|
193 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
194 |
+
"io-layout-normalization": 0.001221999991685152,
|
195 |
+
"io-statistics": 9.200000204145908e-05,
|
196 |
+
"legalize-ccops": 3.999999989900971e-06,
|
197 |
+
"legalize-compare": 1.4999999621068127e-05,
|
198 |
+
"lower-argminmax-custom-call": 1.300000076298602e-05,
|
199 |
+
"map-inline": 0.0007819999591447413,
|
200 |
+
"metadata-naming": 6.800000119255856e-05,
|
201 |
+
"mlir::detail::OpToOpPassAdaptor": 0.00011300000187475234,
|
202 |
+
"mlir::hlo::MhloToPyPenguin": 0.07539799809455872,
|
203 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00035600000410340726,
|
204 |
+
"mlir::mhlo::LowerComplexPass": 0.0005510000046342611,
|
205 |
+
"native-to-custom-softmax": 0.0006350000621750951,
|
206 |
+
"native-to-custom-softmax-dx": 0.0006360000115819275,
|
207 |
+
"operand_upcaster": 6.799999391660094e-05,
|
208 |
+
"opt-barrier-removal": 0.0004710000066552311,
|
209 |
+
"post-par-pipe-begin": 1.4000000192027073e-05,
|
210 |
+
"post-par-pipe-end": 0.0,
|
211 |
+
"post-partition-simplification": 0.0020860000513494015,
|
212 |
+
"pre-par-pipe-begin": 9.999999974752427e-07,
|
213 |
+
"pre-par-pipe-end": 0.0,
|
214 |
+
"pre-partition-simplification": 0.21597300469875336,
|
215 |
+
"replace-minimum-constant": 0.00034199998481199145,
|
216 |
+
"reshape-mover": 0.00011600000289035961,
|
217 |
+
"simplify-concat": 0.00017500000831205398,
|
218 |
+
"simplify-while-loops": 0.00010400000610388815,
|
219 |
+
"transform-variadic-reduce": 8.299999899463728e-05,
|
220 |
+
"tuple-simplifier": 0.00028500001644715667,
|
221 |
+
"unpack-nested-aws-ntwsr": 0.0003440000000409782,
|
222 |
+
"unroll-while-loop": 1.900000097521115e-05,
|
223 |
+
"zero_sized_hlo_elimination": 0.0008210000232793391
|
224 |
+
},
|
225 |
+
"hilo": {
|
226 |
+
"ConstantSize": 2106325.0,
|
227 |
+
"HloInputCount": 359.0,
|
228 |
+
"HloMacCount": 231995342848.0,
|
229 |
+
"HloOutputCount": 65.0,
|
230 |
+
"IfmapSize": 7785168896.0,
|
231 |
+
"OfmapSize": 536870912.0,
|
232 |
+
"OutputsReadFromCount": 0.0,
|
233 |
+
"PassthroughTensorsCount": 0.0,
|
234 |
+
"RedundantOutputCount": 0.0,
|
235 |
+
"Traffic": 854718848.0
|
236 |
+
},
|
237 |
+
"tensorizer": {
|
238 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 17056.0,
|
239 |
+
"StaticProfiler::AifUb": 568.2581176757813,
|
240 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 456.7580261230469,
|
241 |
+
"StaticProfiler::AverageDmaLength": 1314.3221435546875,
|
242 |
+
"StaticProfiler::DDRTransferBytes": 407087136.0,
|
243 |
+
"StaticProfiler::InternalTransferBytes": 48342036.0,
|
244 |
+
"StaticProfiler::LoadExpanded": 310291.0,
|
245 |
+
"StaticProfiler::StoreExpanded": 6699.0,
|
246 |
+
"StaticProfiler::TotalDMAExpanded": 316990.0,
|
247 |
+
"StaticProfiler::TotalDynamicInstancesCount": 19674.0,
|
248 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19578.0,
|
249 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
250 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
251 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
252 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
253 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
|
254 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 14848.0,
|
255 |
+
"TilingProfiler::NumPfTransposes": 4.0,
|
256 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
257 |
+
"TilingProfiler::NumPfTransposesForLocal": 1.0,
|
258 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
|
259 |
+
"TilingProfiler::PfTransposeInstructions": 769.0,
|
260 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
261 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
|
262 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
|
263 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
|
264 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 319.0,
|
265 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
266 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
267 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
268 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
269 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
270 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
271 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
272 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
273 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
274 |
+
}
|
275 |
+
},
|
276 |
+
"all": {
|
277 |
+
"compiletime": {
|
278 |
+
"algsimp": 0.0021410000044852495,
|
279 |
+
"call-inliner": 0.00039599998854100704,
|
280 |
+
"collective-stream-id-checker": 7.79999973019585e-05,
|
281 |
+
"comparison-expander": 0.00048099999548867345,
|
282 |
+
"constant-statistics": 0.0005200000014156103,
|
283 |
+
"constant_folding": 0.0002629999944474548,
|
284 |
+
"dce": 7.699999696342275e-05,
|
285 |
+
"dot_decomposer": 0.0013620000099763274,
|
286 |
+
"eliminate-redundant-compare": 0.00024199999461416155,
|
287 |
+
"flatten-call-graph": 0.0007140000234358013,
|
288 |
+
"hlo-mac-count": 0.0009169999975711107,
|
289 |
+
"hlo-verifier": 0.007406999822705984,
|
290 |
+
"instruction-histogram": 0.001006999984383583,
|
291 |
+
"io-con-pipe-begin": 7.999999979801942e-06,
|
292 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
293 |
+
"io-layout-normalization": 0.001221999991685152,
|
294 |
+
"io-statistics": 9.200000204145908e-05,
|
295 |
+
"map-inline": 0.0007389999809674919,
|
296 |
+
"native-to-custom-softmax": 0.0005990000208839774,
|
297 |
+
"native-to-custom-softmax-dx": 0.0004440000047907233,
|
298 |
+
"opt-barrier-removal": 0.0004710000066552311,
|
299 |
+
"pre-par-pipe-begin": 9.999999974752427e-07,
|
300 |
+
"pre-par-pipe-end": 0.0,
|
301 |
+
"pre-partition-simplification": 0.21597300469875336,
|
302 |
+
"replace-minimum-constant": 0.00030899999546818435,
|
303 |
+
"reshape-mover": 0.00010299999848939478,
|
304 |
+
"simplify-while-loops": 9.40000027185306e-05,
|
305 |
+
"tuple-simplifier": 0.00026699999580159783,
|
306 |
+
"unpack-nested-aws-ntwsr": 0.0003319999959785491,
|
307 |
+
"unroll-while-loop": 1.8000000636675395e-05,
|
308 |
+
"zero_sized_hlo_elimination": 0.0008210000232793391
|
309 |
+
}
|
310 |
+
},
|
311 |
+
"cumsum": {
|
312 |
+
"compiletime": {
|
313 |
+
"CoalesceCCOp": 0.000293731689453125,
|
314 |
+
"DMALocalityOpt": 0.00022101402282714844,
|
315 |
+
"DMAProfiler": 0.0010464191436767578,
|
316 |
+
"DataStreaming": 0.00040221214294433594,
|
317 |
+
"DoNothing": 0.00025200843811035156,
|
318 |
+
"ExpandISAMacro": 0.0005903244018554688,
|
319 |
+
"FactorizeBlkDims": 0.0005807876586914063,
|
320 |
+
"InferPSumTensor": 0.0005562305450439453,
|
321 |
+
"LateLegalizeInst": 0.00046944618225097656,
|
322 |
+
"LateNeuronInstComb": 0.0006792545318603516,
|
323 |
+
"LegalizeSundaAccess": 0.0017774105072021484,
|
324 |
+
"LegalizeType": 0.00032138824462890625,
|
325 |
+
"LowerBroadcast": 0.0003333091735839844,
|
326 |
+
"LowerIntrinsics": 0.0002849102020263672,
|
327 |
+
"LowerTranspose": 0.00046753883361816406,
|
328 |
+
"NeuronInstComb": 0.0008723735809326172,
|
329 |
+
"NeuronLICM": 0.00047659873962402344,
|
330 |
+
"NeuronSimplifyPredicates": 0.0030825138092041016,
|
331 |
+
"NeuronValueNumbering": 0.0004870891571044922,
|
332 |
+
"SFKVectorizer": 0.003458738327026367,
|
333 |
+
"SimpleAllReduceTiling": 0.0002646446228027344,
|
334 |
+
"SimplifyNeuronTensor": 0.0004863739013671875,
|
335 |
+
"SpillPSum": 0.0005884170532226563,
|
336 |
+
"WeightCoalescing": 0.00028324127197265625
|
337 |
+
}
|
338 |
+
},
|
339 |
+
"sg00": {
|
340 |
+
"compiletime": {
|
341 |
+
"CanonicalizeConv": 2.300000051036477e-05,
|
342 |
+
"CanonicalizeForTensorizer": 2.2000000171829015e-05,
|
343 |
+
"Canonicalizer": 0.0005029999883845448,
|
344 |
+
"HoistCompute": 9.999999974752427e-07,
|
345 |
+
"IdentifyCrossPassTensors": 2.2000000171829015e-05,
|
346 |
+
"MemcastMotion": 7.999999979801942e-06,
|
347 |
+
"PenguinizeFunctions": 2.2000000171829015e-05,
|
348 |
+
"PruneFunctions": 1.5999999959603883e-05,
|
349 |
+
"RemoveOptimizationBarriers": 2.4000000848900527e-05,
|
350 |
+
"ScatterMotion": 1.9999999949504854e-06,
|
351 |
+
"TensorizerLegalizationPass": 3.400000059627928e-05,
|
352 |
+
"VerifySupportedOps": 1.5999999959603883e-05,
|
353 |
+
"algsimp": 6.900000153109431e-05,
|
354 |
+
"batchnorm_expander": 1.2999999853491317e-05,
|
355 |
+
"boundary-marker-removal": 3.000000106112566e-06,
|
356 |
+
"call-inliner": 7.999999979801942e-06,
|
357 |
+
"canonicalize-boundary-marker": 3.999999989900971e-06,
|
358 |
+
"collective-stream-id-checker": 3.000000106112566e-06,
|
359 |
+
"comparison-expander": 3.999999989900971e-06,
|
360 |
+
"computation-deduplicator": 1.5999999959603883e-05,
|
361 |
+
"conditional-to-select": 4.999999873689376e-06,
|
362 |
+
"config-lowering": 5.8000001445179805e-05,
|
363 |
+
"constant_folding": 7.999999979801942e-06,
|
364 |
+
"cse": 1.4999999621068127e-05,
|
365 |
+
"dce": 9.999999974752427e-07,
|
366 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
367 |
+
"eliminate-redundant-compare": 3.000000106112566e-06,
|
368 |
+
"emit-offloaded-dropout": 2.099999983329326e-05,
|
369 |
+
"flatten-call-graph": 7.999999979801942e-06,
|
370 |
+
"fuse-send-recv": 2.499999936844688e-05,
|
371 |
+
"hilo::LegalizeAlias": 7.000000096013537e-06,
|
372 |
+
"hilo::NeuronInstCombine": 5.400000009103678e-05,
|
373 |
+
"hilo::NeuronOpFusion": 6.000000212225132e-06,
|
374 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 2.4000000848900527e-05,
|
375 |
+
"hilo::ScheduleFusion": 0.0,
|
376 |
+
"hilo::SixtyFourHack": 1.4999999621068127e-05,
|
377 |
+
"hilo::VerifyAliasing": 3.999999989900971e-06,
|
378 |
+
"hlo-mac-count": 3.7999998312443495e-05,
|
379 |
+
"hlo-verifier": 0.00017100000695791095,
|
380 |
+
"legalize-ccops": 9.999999974752427e-07,
|
381 |
+
"legalize-compare": 3.999999989900971e-06,
|
382 |
+
"lower-argminmax-custom-call": 3.000000106112566e-06,
|
383 |
+
"map-inline": 1.1000000085914508e-05,
|
384 |
+
"metadata-naming": 1.700000029813964e-05,
|
385 |
+
"mlir::detail::OpToOpPassAdaptor": 3.7000001611886546e-05,
|
386 |
+
"mlir::hlo::MhloToPyPenguin": 0.033358000218868256,
|
387 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00013499999477062374,
|
388 |
+
"mlir::mhlo::LowerComplexPass": 0.00014200000441633165,
|
389 |
+
"native-to-custom-softmax": 7.999999979801942e-06,
|
390 |
+
"native-to-custom-softmax-dx": 9.999999747378752e-05,
|
391 |
+
"operand_upcaster": 1.8999999156221747e-05,
|
392 |
+
"post-par-pipe-begin": 1.9999999949504854e-06,
|
393 |
+
"post-par-pipe-end": 0.0,
|
394 |
+
"post-partition-simplification": 0.0005610000225715339,
|
395 |
+
"replace-minimum-constant": 9.000000318337698e-06,
|
396 |
+
"reshape-mover": 3.999999989900971e-06,
|
397 |
+
"simplify-concat": 5.199999941396527e-05,
|
398 |
+
"simplify-while-loops": 1.9999999949504854e-06,
|
399 |
+
"transform-variadic-reduce": 9.000000318337698e-06,
|
400 |
+
"tuple-simplifier": 3.999999989900971e-06,
|
401 |
+
"unpack-nested-aws-ntwsr": 3.000000106112566e-06,
|
402 |
+
"unroll-while-loop": 0.0
|
403 |
+
},
|
404 |
+
"hilo": {
|
405 |
+
"ArithmeticIntensity": 221.8579559326172,
|
406 |
+
"ConstantSize": 2106325.0,
|
407 |
+
"HloInputCount": 359.0,
|
408 |
+
"HloMacCount": 25769803776.0,
|
409 |
+
"HloOutputCount": 65.0,
|
410 |
+
"IfmapSize": 7785168896.0,
|
411 |
+
"OfmapSize": 536870912.0,
|
412 |
+
"OutputsReadFromCount": 0.0,
|
413 |
+
"PassthroughTensorsCount": 0.0,
|
414 |
+
"RedundantOutputCount": 0.0,
|
415 |
+
"Traffic": 232309024.0
|
416 |
+
}
|
417 |
+
},
|
418 |
+
"sg0000": {
|
419 |
+
"compiletime": {
|
420 |
+
"AGOrderingAnalysisPass": 0.07846212387084961,
|
421 |
+
"AffinePredicateResolution": 0.0015842914581298828,
|
422 |
+
"AliasDependencyElimination": 0.0002803802490234375,
|
423 |
+
"AliasDependencyInduction": 0.03549337387084961,
|
424 |
+
"AliasDependencyReset": 0.06158638000488281,
|
425 |
+
"BFComputeCutting": 0.003358125686645508,
|
426 |
+
"BirCodeGenLoop": 0.06645083427429199,
|
427 |
+
"CCOpFusion": 0.03297877311706543,
|
428 |
+
"CanonicalizeDAGForPGTiling": 0.0029740333557128906,
|
429 |
+
"CanonicalizeIR": 0.0038878917694091797,
|
430 |
+
"CoalesceCCOp": 0.0058116912841796875,
|
431 |
+
"CommuteConcat": 0.0010180473327636719,
|
432 |
+
"DMALocalityOpt": 0.0015497207641601563,
|
433 |
+
"DMAProfiler": 0.005065441131591797,
|
434 |
+
"DMATilingProfiler": 0.004613637924194336,
|
435 |
+
"DataLocalityOpt": 0.16799569129943848,
|
436 |
+
"DataStreaming": 0.00627899169921875,
|
437 |
+
"DeConcat": 0.0015079975128173828,
|
438 |
+
"DeadCodeElimination": 0.0011029243469238281,
|
439 |
+
"DeadStoreElimination": 0.06819939613342285,
|
440 |
+
"DelinearIndices": 0.0475771427154541,
|
441 |
+
"Delinearization": 0.003088235855102539,
|
442 |
+
"DoNothing": 0.0001838207244873047,
|
443 |
+
"DramToDramTranspose": 0.08775472640991211,
|
444 |
+
"DumpGraphAndMetadata": 0.013874053955078125,
|
445 |
+
"EliminateDivs": 0.006442070007324219,
|
446 |
+
"ExpandBatchNorm": 0.00305938720703125,
|
447 |
+
"ExpandISAMacro": 0.00470423698425293,
|
448 |
+
"FactorizeBlkDims": 0.026311397552490234,
|
449 |
+
"FactorizeThreadAxesInFreeDims": 0.0019838809967041016,
|
450 |
+
"FlattenMacroLoop": 0.004168987274169922,
|
451 |
+
"GenericAccessSimplifier": 0.0016493797302246094,
|
452 |
+
"InferInitValue": 0.05328845977783203,
|
453 |
+
"InferIntrinsicOnCC": 0.009886503219604492,
|
454 |
+
"InferNeuronTensor": 0.08689069747924805,
|
455 |
+
"InferNonlocalTensors": 0.2075808048248291,
|
456 |
+
"InferPSumTensor": 0.12219834327697754,
|
457 |
+
"InlineNativeKernels": 0.002942323684692383,
|
458 |
+
"InsertIOTransposes": 0.019949674606323242,
|
459 |
+
"InsertLocalTransposes": 0.0066678524017333984,
|
460 |
+
"InsertOffloadedTransposes": 0.005246877670288086,
|
461 |
+
"LICM": 0.002876758575439453,
|
462 |
+
"LateLegalizeInst": 0.009313821792602539,
|
463 |
+
"LateLegalizePostSplit": 0.0034275054931640625,
|
464 |
+
"LateLowerReshapeOp": 0.001237630844116211,
|
465 |
+
"LateLowerTensorOp": 0.036368370056152344,
|
466 |
+
"LateNeuronInstComb": 0.019298315048217773,
|
467 |
+
"LayoutPreprocessing": 0.0656280517578125,
|
468 |
+
"LayoutPreprocessingAndAnalysis": 0.0845177173614502,
|
469 |
+
"LayoutRequirementAnalysis": 0.006539821624755859,
|
470 |
+
"LegalizeCCOpLayout": 0.002690553665161133,
|
471 |
+
"LegalizeOpLevelAlias": 0.002089977264404297,
|
472 |
+
"LegalizePartitionReduce": 0.0019116401672363281,
|
473 |
+
"LegalizeSundaAccess": 0.04238390922546387,
|
474 |
+
"LegalizeSundaMacro": 0.008917093276977539,
|
475 |
+
"LegalizeType": 0.00662541389465332,
|
476 |
+
"LocalLayoutOpt": 0.017171859741210938,
|
477 |
+
"LoopFusion": 0.04693031311035156,
|
478 |
+
"LoopSplitting": 0.0004513263702392578,
|
479 |
+
"LowerBroadcast": 0.0021796226501464844,
|
480 |
+
"LowerCCOpBlockAxis": 0.005298614501953125,
|
481 |
+
"LowerComplexBroadcast": 0.002663135528564453,
|
482 |
+
"LowerIntrinsics": 0.08481836318969727,
|
483 |
+
"LowerTensorOp": 0.05078911781311035,
|
484 |
+
"LowerTranspose": 0.052706241607666016,
|
485 |
+
"MacroGeneration": 0.16595196723937988,
|
486 |
+
"MaskPropagation": 0.00496983528137207,
|
487 |
+
"MemcpyElimination": 0.27239394187927246,
|
488 |
+
"MutateDataType": 0.0022711753845214844,
|
489 |
+
"NeuronAliasDependencyInduction": 0.00037479400634765625,
|
490 |
+
"NeuronAliasDependencyReset": 0.012241363525390625,
|
491 |
+
"NeuronInstComb": 0.010676145553588867,
|
492 |
+
"NeuronLICM": 0.01803445816040039,
|
493 |
+
"NeuronLoopFusion": 0.01843857765197754,
|
494 |
+
"NeuronLoopInterchange": 0.0022115707397460938,
|
495 |
+
"NeuronSimplifier": 0.011580228805541992,
|
496 |
+
"NeuronSimplifyPredicates": 0.017709970474243164,
|
497 |
+
"NeuronValueNumbering": 0.045330047607421875,
|
498 |
+
"OptimizeAliasedCopyChain": 0.0012116432189941406,
|
499 |
+
"OptimizeNKIKernels": 0.04246807098388672,
|
500 |
+
"PAGLayoutOpt": 0.38617491722106934,
|
501 |
+
"PComputeCutting": 0.008383512496948242,
|
502 |
+
"PGLayoutTilingPipeline": 1.3029937744140625,
|
503 |
+
"PGTiling": 0.34752726554870605,
|
504 |
+
"PadElimination": 0.0006172657012939453,
|
505 |
+
"ParAxesAnnotation": 0.36298155784606934,
|
506 |
+
"PartialLoopFusion": 0.024132490158081055,
|
507 |
+
"PartialSimdFusion": 0.026205062866210938,
|
508 |
+
"PerfectLoopNest": 0.0019898414611816406,
|
509 |
+
"RecognizeOpIdiom": 0.006145477294921875,
|
510 |
+
"Recompute": 0.00034356117248535156,
|
511 |
+
"RelaxPredicates": 0.0044634342193603516,
|
512 |
+
"Rematerialization": 0.004605531692504883,
|
513 |
+
"ReshapeWeights": 0.0008733272552490234,
|
514 |
+
"ResolveAccessConflict": 0.003629446029663086,
|
515 |
+
"ResolveComplicatePredicates": 0.0018143653869628906,
|
516 |
+
"RewriteReplicationMatmul": 0.001529693603515625,
|
517 |
+
"RewriteWeights": 0.0036728382110595703,
|
518 |
+
"SFKVectorizer": 0.5580539703369141,
|
519 |
+
"SimpleAllReduceTiling": 0.0026845932006835938,
|
520 |
+
"Simplifier": 0.0046727657318115234,
|
521 |
+
"SimplifyMacroPredicates": 0.01622939109802246,
|
522 |
+
"SimplifyNeuronTensor": 0.015488386154174805,
|
523 |
+
"SimplifySlice": 0.0018961429595947266,
|
524 |
+
"SimplifyTensor": 0.006178140640258789,
|
525 |
+
"SpillPSum": 0.06065011024475098,
|
526 |
+
"SplitAPUnionSets": 0.075592041015625,
|
527 |
+
"SplitAccGrp": 0.0017442703247070313,
|
528 |
+
"StaticProfiler": 0.00494384765625,
|
529 |
+
"StaticTransposeLocalTensor": 0.0146331787109375,
|
530 |
+
"SundaISel": 0.056458473205566406,
|
531 |
+
"TCTransform": 0.001115560531616211,
|
532 |
+
"TensorInitialization": 0.021691322326660156,
|
533 |
+
"TensorOpSimplifier": 0.010814189910888672,
|
534 |
+
"TensorOpTransform": 0.07015466690063477,
|
535 |
+
"TileCCOps": 0.007310152053833008,
|
536 |
+
"TilingProfiler": 0.012901067733764648,
|
537 |
+
"TransformConvOp": 0.00453495979309082,
|
538 |
+
"TritiumFusion": 0.10158801078796387,
|
539 |
+
"ValueNumbering": 0.0044324398040771484,
|
540 |
+
"VectorizeDMA": 0.006791114807128906,
|
541 |
+
"VectorizeMatMult": 0.01838517189025879,
|
542 |
+
"WeightCoalescing": 0.004769086837768555,
|
543 |
+
"ZeroSizeTensorElimination": 0.00017833709716796875
|
544 |
+
},
|
545 |
+
"tensorizer": {
|
546 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 5791.0,
|
547 |
+
"StaticProfiler::AifUb": 261.38446044921875,
|
548 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 680.3948364257813,
|
549 |
+
"StaticProfiler::AverageDmaLength": 2076.933837890625,
|
550 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.95938110351563,
|
551 |
+
"StaticProfiler::AveragePartitionUtilization": 99.89742279052734,
|
552 |
+
"StaticProfiler::AveragePeUtilization": 99.83380126953125,
|
553 |
+
"StaticProfiler::DDRTransferBytes": 87646472.0,
|
554 |
+
"StaticProfiler::InternalTransferBytes": 114032640.0,
|
555 |
+
"StaticProfiler::LoadExpanded": 20995.0,
|
556 |
+
"StaticProfiler::LocalizationEfficiency": 260.3042297363281,
|
557 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 391.4252624511719,
|
558 |
+
"StaticProfiler::StoreExpanded": 10753.0,
|
559 |
+
"StaticProfiler::TotalDMAExpanded": 31748.0,
|
560 |
+
"StaticProfiler::TotalDynamicInstancesCount": 8459.0,
|
561 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 8453.0,
|
562 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
563 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
564 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
565 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
566 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
567 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
568 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 96.0,
|
569 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 3080.0,
|
570 |
+
"TilingProfiler::NumPfTransposes": 7.0,
|
571 |
+
"TilingProfiler::NumPfTransposesForIo": 1.0,
|
572 |
+
"TilingProfiler::NumPfTransposesForLocal": 5.0,
|
573 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 1.0,
|
574 |
+
"TilingProfiler::PfTransposeInstructions": 1632.0,
|
575 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 256.0,
|
576 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1248.0,
|
577 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 128.0,
|
578 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
579 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 612.0,
|
580 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
581 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
582 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
583 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
584 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
585 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
586 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
587 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
588 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
589 |
+
}
|
590 |
+
},
|
591 |
+
"sg0001": {
|
592 |
+
"compiletime": {
|
593 |
+
"AGOrderingAnalysisPass": 0.19782710075378418,
|
594 |
+
"AffinePredicateResolution": 0.0014352798461914063,
|
595 |
+
"AliasDependencyElimination": 0.00022602081298828125,
|
596 |
+
"AliasDependencyInduction": 0.008897542953491211,
|
597 |
+
"AliasDependencyReset": 0.07564544677734375,
|
598 |
+
"BFComputeCutting": 0.0038797855377197266,
|
599 |
+
"BirCodeGenLoop": 0.08420419692993164,
|
600 |
+
"CCOpFusion": 0.0411074161529541,
|
601 |
+
"CanonicalizeDAGForPGTiling": 0.004708290100097656,
|
602 |
+
"CanonicalizeIR": 0.001739501953125,
|
603 |
+
"CoalesceCCOp": 0.005135774612426758,
|
604 |
+
"CommuteConcat": 0.0010938644409179688,
|
605 |
+
"DMALocalityOpt": 0.0010821819305419922,
|
606 |
+
"DMAProfiler": 0.03509354591369629,
|
607 |
+
"DMATilingProfiler": 0.008334875106811523,
|
608 |
+
"DataLocalityOpt": 0.3732140064239502,
|
609 |
+
"DataStreaming": 0.004484653472900391,
|
610 |
+
"DeConcat": 0.0014607906341552734,
|
611 |
+
"DeadCodeElimination": 0.002012491226196289,
|
612 |
+
"DeadStoreElimination": 0.06306838989257813,
|
613 |
+
"DelinearIndices": 0.03899812698364258,
|
614 |
+
"Delinearization": 0.015190839767456055,
|
615 |
+
"DoNothing": 0.00013589859008789063,
|
616 |
+
"DramToDramTranspose": 0.05379915237426758,
|
617 |
+
"DumpGraphAndMetadata": 0.053969621658325195,
|
618 |
+
"EliminateDivs": 0.005895376205444336,
|
619 |
+
"ExpandBatchNorm": 0.0030879974365234375,
|
620 |
+
"ExpandISAMacro": 0.002570629119873047,
|
621 |
+
"FactorizeBlkDims": 0.03216910362243652,
|
622 |
+
"FactorizeThreadAxesInFreeDims": 0.0017580986022949219,
|
623 |
+
"FlattenMacroLoop": 0.004896402359008789,
|
624 |
+
"GenericAccessSimplifier": 0.001070261001586914,
|
625 |
+
"InferInitValue": 0.09278488159179688,
|
626 |
+
"InferIntrinsicOnCC": 0.010787725448608398,
|
627 |
+
"InferNeuronTensor": 0.16329479217529297,
|
628 |
+
"InferNonlocalTensors": 0.08827400207519531,
|
629 |
+
"InferPSumTensor": 0.041254281997680664,
|
630 |
+
"InlineNativeKernels": 0.002732515335083008,
|
631 |
+
"InsertIOTransposes": 0.030591964721679688,
|
632 |
+
"InsertLocalTransposes": 0.0069196224212646484,
|
633 |
+
"InsertOffloadedTransposes": 0.0034880638122558594,
|
634 |
+
"LICM": 0.0034477710723876953,
|
635 |
+
"LateLegalizeInst": 0.005655765533447266,
|
636 |
+
"LateLegalizePostSplit": 0.003046751022338867,
|
637 |
+
"LateLowerReshapeOp": 0.0013928413391113281,
|
638 |
+
"LateLowerTensorOp": 0.0053386688232421875,
|
639 |
+
"LateNeuronInstComb": 0.027225971221923828,
|
640 |
+
"LayoutPreprocessing": 0.047040700912475586,
|
641 |
+
"LayoutPreprocessingAndAnalysis": 0.12968659400939941,
|
642 |
+
"LayoutRequirementAnalysis": 0.01332712173461914,
|
643 |
+
"LegalizeCCOpLayout": 0.0019299983978271484,
|
644 |
+
"LegalizeOpLevelAlias": 0.0019905567169189453,
|
645 |
+
"LegalizePartitionReduce": 0.0013320446014404297,
|
646 |
+
"LegalizeSundaAccess": 0.0154571533203125,
|
647 |
+
"LegalizeSundaMacro": 0.018419265747070313,
|
648 |
+
"LegalizeType": 0.0047800540924072266,
|
649 |
+
"LocalLayoutOpt": 0.029850482940673828,
|
650 |
+
"LoopFusion": 0.006402492523193359,
|
651 |
+
"LoopSplitting": 0.0006403923034667969,
|
652 |
+
"LowerBroadcast": 0.0029153823852539063,
|
653 |
+
"LowerCCOpBlockAxis": 0.005182743072509766,
|
654 |
+
"LowerComplexBroadcast": 0.0022389888763427734,
|
655 |
+
"LowerIntrinsics": 0.056134939193725586,
|
656 |
+
"LowerTensorOp": 0.01170802116394043,
|
657 |
+
"LowerTranspose": 0.0226747989654541,
|
658 |
+
"MacroGeneration": 0.12812113761901855,
|
659 |
+
"MaskPropagation": 0.003968477249145508,
|
660 |
+
"MemcpyElimination": 0.1272127628326416,
|
661 |
+
"MutateDataType": 0.0016314983367919922,
|
662 |
+
"NeuronAliasDependencyInduction": 0.0003142356872558594,
|
663 |
+
"NeuronAliasDependencyReset": 0.011624336242675781,
|
664 |
+
"NeuronInstComb": 0.00946044921875,
|
665 |
+
"NeuronLICM": 0.008498668670654297,
|
666 |
+
"NeuronLoopFusion": 0.01998734474182129,
|
667 |
+
"NeuronLoopInterchange": 0.0018498897552490234,
|
668 |
+
"NeuronSimplifier": 0.03274989128112793,
|
669 |
+
"NeuronSimplifyPredicates": 0.001984834671020508,
|
670 |
+
"NeuronValueNumbering": 0.03443026542663574,
|
671 |
+
"OptimizeAliasedCopyChain": 0.0008573532104492188,
|
672 |
+
"OptimizeNKIKernels": 0.0016489028930664063,
|
673 |
+
"PAGLayoutOpt": 0.52590012550354,
|
674 |
+
"PComputeCutting": 0.007617473602294922,
|
675 |
+
"PGLayoutTilingPipeline": 1.6884160041809082,
|
676 |
+
"PGTiling": 0.42557621002197266,
|
677 |
+
"PadElimination": 0.0004146099090576172,
|
678 |
+
"ParAxesAnnotation": 0.49584078788757324,
|
679 |
+
"PartialLoopFusion": 0.04620671272277832,
|
680 |
+
"PartialSimdFusion": 0.04396200180053711,
|
681 |
+
"PerfectLoopNest": 0.002160310745239258,
|
682 |
+
"RecognizeOpIdiom": 0.004221677780151367,
|
683 |
+
"Recompute": 0.0006210803985595703,
|
684 |
+
"RelaxPredicates": 0.0031533241271972656,
|
685 |
+
"Rematerialization": 0.0020017623901367188,
|
686 |
+
"ReshapeWeights": 0.0012595653533935547,
|
687 |
+
"ResolveAccessConflict": 0.034206390380859375,
|
688 |
+
"ResolveComplicatePredicates": 0.001447916030883789,
|
689 |
+
"RewriteReplicationMatmul": 0.003072500228881836,
|
690 |
+
"RewriteWeights": 0.005293369293212891,
|
691 |
+
"SFKVectorizer": 0.31648850440979004,
|
692 |
+
"SimpleAllReduceTiling": 0.0026230812072753906,
|
693 |
+
"Simplifier": 0.00507354736328125,
|
694 |
+
"SimplifyMacroPredicates": 0.011813640594482422,
|
695 |
+
"SimplifyNeuronTensor": 0.029469728469848633,
|
696 |
+
"SimplifySlice": 0.0010852813720703125,
|
697 |
+
"SimplifyTensor": 0.006476879119873047,
|
698 |
+
"SpillPSum": 0.047782182693481445,
|
699 |
+
"SplitAPUnionSets": 0.022653579711914063,
|
700 |
+
"SplitAccGrp": 0.0025262832641601563,
|
701 |
+
"StaticProfiler": 0.03480696678161621,
|
702 |
+
"StaticTransposeLocalTensor": 0.006014108657836914,
|
703 |
+
"SundaISel": 0.05354189872741699,
|
704 |
+
"TCTransform": 0.0011737346649169922,
|
705 |
+
"TensorInitialization": 0.004692554473876953,
|
706 |
+
"TensorOpSimplifier": 0.007290840148925781,
|
707 |
+
"TensorOpTransform": 0.039176225662231445,
|
708 |
+
"TileCCOps": 0.009789466857910156,
|
709 |
+
"TilingProfiler": 0.02116703987121582,
|
710 |
+
"TransformConvOp": 0.002421855926513672,
|
711 |
+
"TritiumFusion": 0.24414300918579102,
|
712 |
+
"ValueNumbering": 0.002656698226928711,
|
713 |
+
"VectorizeDMA": 0.0018146038055419922,
|
714 |
+
"VectorizeMatMult": 0.034119606018066406,
|
715 |
+
"WeightCoalescing": 0.002785921096801758,
|
716 |
+
"ZeroSizeTensorElimination": 0.00019216537475585938
|
717 |
+
},
|
718 |
+
"tensorizer": {
|
719 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 17420.0,
|
720 |
+
"StaticProfiler::AifUb": 844.2889404296875,
|
721 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 712.69189453125,
|
722 |
+
"StaticProfiler::AverageDmaLength": 1079.260986328125,
|
723 |
+
"StaticProfiler::AverageFractalPeUtilization": 100.0,
|
724 |
+
"StaticProfiler::AveragePartitionUtilization": 99.85012817382813,
|
725 |
+
"StaticProfiler::AveragePeUtilization": 100.0,
|
726 |
+
"StaticProfiler::DDRTransferBytes": 339836928.0,
|
727 |
+
"StaticProfiler::InternalTransferBytes": 106692608.0,
|
728 |
+
"StaticProfiler::LoadExpanded": 296193.0,
|
729 |
+
"StaticProfiler::LocalizationEfficiency": 84.41326904296875,
|
730 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 93.66107177734375,
|
731 |
+
"StaticProfiler::StoreExpanded": 10241.0,
|
732 |
+
"StaticProfiler::TotalDMAExpanded": 306434.0,
|
733 |
+
"StaticProfiler::TotalDynamicInstancesCount": 21356.0,
|
734 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 21356.0,
|
735 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
736 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
737 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
738 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
739 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
740 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
741 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 64.0,
|
742 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 13824.0,
|
743 |
+
"TilingProfiler::NumPfTransposes": 9.0,
|
744 |
+
"TilingProfiler::NumPfTransposesForIo": 3.0,
|
745 |
+
"TilingProfiler::NumPfTransposesForLocal": 4.0,
|
746 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
747 |
+
"TilingProfiler::PfTransposeInstructions": 1904.0,
|
748 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 272.0,
|
749 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1120.0,
|
750 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 512.0,
|
751 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
752 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 683.0,
|
753 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
754 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
755 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
756 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
757 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
758 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
759 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
760 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
761 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
762 |
+
}
|
763 |
+
},
|
764 |
+
"sg0002": {
|
765 |
+
"compiletime": {
|
766 |
+
"AGOrderingAnalysisPass": 0.01833963394165039,
|
767 |
+
"AffinePredicateResolution": 0.0011298656463623047,
|
768 |
+
"AliasDependencyElimination": 0.0003044605255126953,
|
769 |
+
"AliasDependencyInduction": 0.0059871673583984375,
|
770 |
+
"AliasDependencyReset": 0.024695634841918945,
|
771 |
+
"BFComputeCutting": 0.0022745132446289063,
|
772 |
+
"BirCodeGenLoop": 0.12499594688415527,
|
773 |
+
"CCOpFusion": 0.025257110595703125,
|
774 |
+
"CanonicalizeDAGForPGTiling": 0.0039975643157958984,
|
775 |
+
"CanonicalizeIR": 0.0017023086547851563,
|
776 |
+
"CoalesceCCOp": 0.006569385528564453,
|
777 |
+
"CommuteConcat": 0.0009205341339111328,
|
778 |
+
"DMALocalityOpt": 0.0031824111938476563,
|
779 |
+
"DMAProfiler": 0.005998849868774414,
|
780 |
+
"DMATilingProfiler": 0.004607439041137695,
|
781 |
+
"DataLocalityOpt": 0.15957880020141602,
|
782 |
+
"DataStreaming": 0.03691840171813965,
|
783 |
+
"DeConcat": 0.0007259845733642578,
|
784 |
+
"DeadCodeElimination": 0.0009546279907226563,
|
785 |
+
"DeadStoreElimination": 0.006250619888305664,
|
786 |
+
"DelinearIndices": 0.005332231521606445,
|
787 |
+
"Delinearization": 0.0033500194549560547,
|
788 |
+
"DoNothing": 0.0001239776611328125,
|
789 |
+
"DramToDramTranspose": 0.020763397216796875,
|
790 |
+
"DumpGraphAndMetadata": 0.025223493576049805,
|
791 |
+
"EliminateDivs": 0.0023469924926757813,
|
792 |
+
"ExpandBatchNorm": 0.001692056655883789,
|
793 |
+
"ExpandISAMacro": 0.008460283279418945,
|
794 |
+
"FactorizeBlkDims": 0.00921773910522461,
|
795 |
+
"FactorizeThreadAxesInFreeDims": 0.002184152603149414,
|
796 |
+
"FlattenMacroLoop": 0.0022482872009277344,
|
797 |
+
"GenericAccessSimplifier": 0.0009622573852539063,
|
798 |
+
"InferInitValue": 0.027300357818603516,
|
799 |
+
"InferIntrinsicOnCC": 0.009199380874633789,
|
800 |
+
"InferNeuronTensor": 0.028067350387573242,
|
801 |
+
"InferNonlocalTensors": 0.014671802520751953,
|
802 |
+
"InferPSumTensor": 0.0808565616607666,
|
803 |
+
"InlineNativeKernels": 0.002727031707763672,
|
804 |
+
"InsertIOTransposes": 0.017727136611938477,
|
805 |
+
"InsertLocalTransposes": 0.004176616668701172,
|
806 |
+
"InsertOffloadedTransposes": 0.002771615982055664,
|
807 |
+
"LICM": 0.005248069763183594,
|
808 |
+
"LateLegalizeInst": 0.0068132877349853516,
|
809 |
+
"LateLegalizePostSplit": 0.0045223236083984375,
|
810 |
+
"LateLowerReshapeOp": 0.0012927055358886719,
|
811 |
+
"LateLowerTensorOp": 0.0014028549194335938,
|
812 |
+
"LateNeuronInstComb": 0.016278505325317383,
|
813 |
+
"LayoutPreprocessing": 0.026221275329589844,
|
814 |
+
"LayoutPreprocessingAndAnalysis": 0.07468867301940918,
|
815 |
+
"LayoutRequirementAnalysis": 0.004823446273803711,
|
816 |
+
"LegalizeCCOpLayout": 0.0023353099822998047,
|
817 |
+
"LegalizeOpLevelAlias": 0.0013494491577148438,
|
818 |
+
"LegalizePartitionReduce": 0.0018906593322753906,
|
819 |
+
"LegalizeSundaAccess": 0.06063246726989746,
|
820 |
+
"LegalizeSundaMacro": 0.04256129264831543,
|
821 |
+
"LegalizeType": 0.005706787109375,
|
822 |
+
"LocalLayoutOpt": 0.016018390655517578,
|
823 |
+
"LoopFusion": 0.005109071731567383,
|
824 |
+
"LoopSplitting": 0.00048542022705078125,
|
825 |
+
"LowerBroadcast": 0.0029256343841552734,
|
826 |
+
"LowerCCOpBlockAxis": 0.0038700103759765625,
|
827 |
+
"LowerComplexBroadcast": 0.004511594772338867,
|
828 |
+
"LowerIntrinsics": 0.3245360851287842,
|
829 |
+
"LowerTensorOp": 0.010710477828979492,
|
830 |
+
"LowerTranspose": 0.05445718765258789,
|
831 |
+
"MacroGeneration": 0.061620473861694336,
|
832 |
+
"MaskPropagation": 0.002919435501098633,
|
833 |
+
"MemcpyElimination": 0.02559375762939453,
|
834 |
+
"MutateDataType": 0.0014896392822265625,
|
835 |
+
"NeuronAliasDependencyInduction": 0.0002808570861816406,
|
836 |
+
"NeuronAliasDependencyReset": 0.05649685859680176,
|
837 |
+
"NeuronInstComb": 0.004225254058837891,
|
838 |
+
"NeuronLICM": 0.014126300811767578,
|
839 |
+
"NeuronLoopFusion": 0.009732246398925781,
|
840 |
+
"NeuronLoopInterchange": 0.0025072097778320313,
|
841 |
+
"NeuronSimplifier": 0.03835606575012207,
|
842 |
+
"NeuronSimplifyPredicates": 0.005949974060058594,
|
843 |
+
"NeuronValueNumbering": 0.002723217010498047,
|
844 |
+
"OptimizeAliasedCopyChain": 0.0007545948028564453,
|
845 |
+
"OptimizeNKIKernels": 0.6443507671356201,
|
846 |
+
"PAGLayoutOpt": 0.20021605491638184,
|
847 |
+
"PComputeCutting": 0.0046160221099853516,
|
848 |
+
"PGLayoutTilingPipeline": 0.6925618648529053,
|
849 |
+
"PGTiling": 0.21065187454223633,
|
850 |
+
"PadElimination": 0.00038623809814453125,
|
851 |
+
"ParAxesAnnotation": 0.052834510803222656,
|
852 |
+
"PartialLoopFusion": 0.051622629165649414,
|
853 |
+
"PartialSimdFusion": 0.014065980911254883,
|
854 |
+
"PerfectLoopNest": 0.0019462108612060547,
|
855 |
+
"RecognizeOpIdiom": 0.0037450790405273438,
|
856 |
+
"Recompute": 0.0004031658172607422,
|
857 |
+
"RelaxPredicates": 0.03561973571777344,
|
858 |
+
"Rematerialization": 0.0018870830535888672,
|
859 |
+
"ReshapeWeights": 0.0009450912475585938,
|
860 |
+
"ResolveAccessConflict": 0.0038840770721435547,
|
861 |
+
"ResolveComplicatePredicates": 0.0011222362518310547,
|
862 |
+
"RewriteReplicationMatmul": 0.0017135143280029297,
|
863 |
+
"RewriteWeights": 0.0024623870849609375,
|
864 |
+
"SFKVectorizer": 0.19122314453125,
|
865 |
+
"SimpleAllReduceTiling": 0.0035347938537597656,
|
866 |
+
"Simplifier": 0.0030031204223632813,
|
867 |
+
"SimplifyMacroPredicates": 0.005193233489990234,
|
868 |
+
"SimplifyNeuronTensor": 0.38506579399108887,
|
869 |
+
"SimplifySlice": 0.001062631607055664,
|
870 |
+
"SimplifyTensor": 0.009534358978271484,
|
871 |
+
"SpillPSum": 0.05334925651550293,
|
872 |
+
"SplitAPUnionSets": 0.013537406921386719,
|
873 |
+
"SplitAccGrp": 0.0014171600341796875,
|
874 |
+
"StaticProfiler": 0.005720615386962891,
|
875 |
+
"StaticTransposeLocalTensor": 0.003614664077758789,
|
876 |
+
"SundaISel": 0.09031486511230469,
|
877 |
+
"TCTransform": 0.0008947849273681641,
|
878 |
+
"TensorInitialization": 0.010958433151245117,
|
879 |
+
"TensorOpSimplifier": 0.005278110504150391,
|
880 |
+
"TensorOpTransform": 0.020787477493286133,
|
881 |
+
"TileCCOps": 0.005544900894165039,
|
882 |
+
"TilingProfiler": 0.007747173309326172,
|
883 |
+
"TransformConvOp": 0.003238677978515625,
|
884 |
+
"TritiumFusion": 0.16130614280700684,
|
885 |
+
"ValueNumbering": 0.0018999576568603516,
|
886 |
+
"VectorizeDMA": 0.0017979145050048828,
|
887 |
+
"VectorizeMatMult": 0.007079362869262695,
|
888 |
+
"WeightCoalescing": 0.0030584335327148438,
|
889 |
+
"ZeroSizeTensorElimination": 0.00022983551025390625
|
890 |
+
},
|
891 |
+
"tensorizer": {
|
892 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 17056.0,
|
893 |
+
"StaticProfiler::AifUb": 568.2581176757813,
|
894 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 456.7580261230469,
|
895 |
+
"StaticProfiler::AverageDmaLength": 1314.3221435546875,
|
896 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.79875946044922,
|
897 |
+
"StaticProfiler::AveragePartitionUtilization": 99.50694274902344,
|
898 |
+
"StaticProfiler::AveragePeUtilization": 99.19517517089844,
|
899 |
+
"StaticProfiler::DDRTransferBytes": 407087136.0,
|
900 |
+
"StaticProfiler::InternalTransferBytes": 48342036.0,
|
901 |
+
"StaticProfiler::LoadExpanded": 310291.0,
|
902 |
+
"StaticProfiler::LocalizationEfficiency": 80.37861633300781,
|
903 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 88.63314819335938,
|
904 |
+
"StaticProfiler::StoreExpanded": 6699.0,
|
905 |
+
"StaticProfiler::TotalDMAExpanded": 316990.0,
|
906 |
+
"StaticProfiler::TotalDynamicInstancesCount": 19674.0,
|
907 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19578.0,
|
908 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
909 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
910 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
911 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
912 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
913 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
914 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
|
915 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 14848.0,
|
916 |
+
"TilingProfiler::NumPfTransposes": 4.0,
|
917 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
918 |
+
"TilingProfiler::NumPfTransposesForLocal": 1.0,
|
919 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
|
920 |
+
"TilingProfiler::PfTransposeInstructions": 769.0,
|
921 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
922 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
|
923 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
|
924 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
|
925 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 319.0,
|
926 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
927 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
928 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
929 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
930 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
931 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
932 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
933 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
934 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
935 |
+
}
|
936 |
+
},
|
937 |
+
"sg01": {
|
938 |
+
"compiletime": {
|
939 |
+
"CanonicalizeConv": 0.0,
|
940 |
+
"CanonicalizeForTensorizer": 1.9999999494757503e-05,
|
941 |
+
"Canonicalizer": 0.0003800000122282654,
|
942 |
+
"HoistCompute": 3.000000106112566e-06,
|
943 |
+
"IdentifyCrossPassTensors": 1.9999999494757503e-05,
|
944 |
+
"MemcastMotion": 1.2000000424450263e-05,
|
945 |
+
"PenguinizeFunctions": 1.9999999494757503e-05,
|
946 |
+
"PruneFunctions": 1.700000029813964e-05,
|
947 |
+
"RemoveOptimizationBarriers": 2.499999936844688e-05,
|
948 |
+
"ScatterMotion": 7.000000096013537e-06,
|
949 |
+
"TensorizerLegalizationPass": 2.2000000171829015e-05,
|
950 |
+
"VerifySupportedOps": 1.4999999621068127e-05,
|
951 |
+
"algsimp": 0.00012199999764561653,
|
952 |
+
"batchnorm_expander": 2.2000000171829015e-05,
|
953 |
+
"boundary-marker-removal": 7.000000096013537e-06,
|
954 |
+
"call-inliner": 1.700000029813964e-05,
|
955 |
+
"canonicalize-boundary-marker": 7.999999979801942e-06,
|
956 |
+
"collective-stream-id-checker": 9.999999747378752e-06,
|
957 |
+
"comparison-expander": 9.000000318337698e-06,
|
958 |
+
"computation-deduplicator": 3.600000127335079e-05,
|
959 |
+
"conditional-to-select": 9.000000318337698e-06,
|
960 |
+
"config-lowering": 7.699999696342275e-05,
|
961 |
+
"constant_folding": 1.2999999853491317e-05,
|
962 |
+
"cse": 2.5999999706982635e-05,
|
963 |
+
"dce": 1.9999999949504854e-06,
|
964 |
+
"dynamic-slice-transpose": 7.999999979801942e-06,
|
965 |
+
"eliminate-redundant-compare": 7.000000096013537e-06,
|
966 |
+
"emit-offloaded-dropout": 2.9000000722589903e-05,
|
967 |
+
"flatten-call-graph": 1.700000029813964e-05,
|
968 |
+
"fuse-send-recv": 3.7999998312443495e-05,
|
969 |
+
"hilo::LegalizeAlias": 7.000000096013537e-06,
|
970 |
+
"hilo::NeuronInstCombine": 4.8000001697801054e-05,
|
971 |
+
"hilo::NeuronOpFusion": 2.300000051036477e-05,
|
972 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 2.499999936844688e-05,
|
973 |
+
"hilo::ScheduleFusion": 0.0,
|
974 |
+
"hilo::SixtyFourHack": 1.8000000636675395e-05,
|
975 |
+
"hilo::VerifyAliasing": 3.999999989900971e-06,
|
976 |
+
"hlo-mac-count": 5.900000178371556e-05,
|
977 |
+
"hlo-verifier": 0.00028700000257231295,
|
978 |
+
"legalize-ccops": 1.9999999949504854e-06,
|
979 |
+
"legalize-compare": 7.000000096013537e-06,
|
980 |
+
"lower-argminmax-custom-call": 7.000000096013537e-06,
|
981 |
+
"map-inline": 1.8000000636675395e-05,
|
982 |
+
"metadata-naming": 3.400000059627928e-05,
|
983 |
+
"mlir::detail::OpToOpPassAdaptor": 4.099999932805076e-05,
|
984 |
+
"mlir::hlo::MhloToPyPenguin": 0.025769000872969627,
|
985 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00010399999882793054,
|
986 |
+
"mlir::mhlo::LowerComplexPass": 0.00024399999529123306,
|
987 |
+
"native-to-custom-softmax": 1.700000029813964e-05,
|
988 |
+
"native-to-custom-softmax-dx": 4.70000013592653e-05,
|
989 |
+
"operand_upcaster": 2.9999999242136255e-05,
|
990 |
+
"post-par-pipe-begin": 9.000000318337698e-06,
|
991 |
+
"post-par-pipe-end": 0.0,
|
992 |
+
"post-partition-simplification": 0.0009069999796338379,
|
993 |
+
"replace-minimum-constant": 1.4000000192027073e-05,
|
994 |
+
"reshape-mover": 6.000000212225132e-06,
|
995 |
+
"simplify-concat": 7.300000288523734e-05,
|
996 |
+
"simplify-while-loops": 4.999999873689376e-06,
|
997 |
+
"transform-variadic-reduce": 1.2999999853491317e-05,
|
998 |
+
"tuple-simplifier": 9.000000318337698e-06,
|
999 |
+
"unpack-nested-aws-ntwsr": 4.999999873689376e-06,
|
1000 |
+
"unroll-while-loop": 9.999999974752427e-07
|
1001 |
+
},
|
1002 |
+
"hilo": {
|
1003 |
+
"ArithmeticIntensity": 808.5779418945313,
|
1004 |
+
"HloMacCount": 115964116992.0,
|
1005 |
+
"Traffic": 286834720.0
|
1006 |
+
}
|
1007 |
+
},
|
1008 |
+
"sg02": {
|
1009 |
+
"compiletime": {
|
1010 |
+
"CanonicalizeConv": 0.0,
|
1011 |
+
"CanonicalizeForTensorizer": 1.4000000192027073e-05,
|
1012 |
+
"Canonicalizer": 0.0005150000215508044,
|
1013 |
+
"HoistCompute": 1.9999999949504854e-06,
|
1014 |
+
"IdentifyCrossPassTensors": 1.8000000636675395e-05,
|
1015 |
+
"MemcastMotion": 1.4000000192027073e-05,
|
1016 |
+
"PenguinizeFunctions": 9.999999747378752e-06,
|
1017 |
+
"PruneFunctions": 9.999999747378752e-06,
|
1018 |
+
"RemoveOptimizationBarriers": 1.5999999959603883e-05,
|
1019 |
+
"ScatterMotion": 9.999999974752427e-07,
|
1020 |
+
"TensorizerLegalizationPass": 7.000000096013537e-06,
|
1021 |
+
"VerifySupportedOps": 1.8000000636675395e-05,
|
1022 |
+
"algsimp": 7.599999662488699e-05,
|
1023 |
+
"batchnorm_expander": 1.4999999621068127e-05,
|
1024 |
+
"boundary-marker-removal": 3.999999989900971e-06,
|
1025 |
+
"call-inliner": 1.2000000424450263e-05,
|
1026 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
1027 |
+
"collective-stream-id-checker": 6.000000212225132e-06,
|
1028 |
+
"comparison-expander": 6.000000212225132e-06,
|
1029 |
+
"computation-deduplicator": 2.499999936844688e-05,
|
1030 |
+
"conditional-to-select": 7.000000096013537e-06,
|
1031 |
+
"config-lowering": 6.299999949987978e-05,
|
1032 |
+
"constant_folding": 1.1000000085914508e-05,
|
1033 |
+
"cse": 1.4000000192027073e-05,
|
1034 |
+
"dce": 9.999999974752427e-07,
|
1035 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
1036 |
+
"eliminate-redundant-compare": 3.000000106112566e-06,
|
1037 |
+
"emit-offloaded-dropout": 2.099999983329326e-05,
|
1038 |
+
"flatten-call-graph": 1.2000000424450263e-05,
|
1039 |
+
"fuse-send-recv": 2.4000000848900527e-05,
|
1040 |
+
"hilo::LegalizeAlias": 1.9999999949504854e-06,
|
1041 |
+
"hilo::NeuronInstCombine": 4.999999873689376e-05,
|
1042 |
+
"hilo::NeuronOpFusion": 2.099999983329326e-05,
|
1043 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 1.8000000636675395e-05,
|
1044 |
+
"hilo::ScheduleFusion": 3.999999989900971e-06,
|
1045 |
+
"hilo::SixtyFourHack": 4.3000000005122274e-05,
|
1046 |
+
"hilo::VerifyAliasing": 9.999999974752427e-07,
|
1047 |
+
"hlo-mac-count": 0.0002410000015515834,
|
1048 |
+
"hlo-verifier": 0.00020399999630171806,
|
1049 |
+
"legalize-ccops": 9.999999974752427e-07,
|
1050 |
+
"legalize-compare": 3.999999989900971e-06,
|
1051 |
+
"lower-argminmax-custom-call": 3.000000106112566e-06,
|
1052 |
+
"map-inline": 1.4000000192027073e-05,
|
1053 |
+
"metadata-naming": 1.700000029813964e-05,
|
1054 |
+
"mlir::detail::OpToOpPassAdaptor": 3.5000000934815034e-05,
|
1055 |
+
"mlir::hlo::MhloToPyPenguin": 0.01627100072801113,
|
1056 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00011700000322889537,
|
1057 |
+
"mlir::mhlo::LowerComplexPass": 0.00016500000492669642,
|
1058 |
+
"native-to-custom-softmax": 1.1000000085914508e-05,
|
1059 |
+
"native-to-custom-softmax-dx": 4.5000000682193786e-05,
|
1060 |
+
"operand_upcaster": 1.8999999156221747e-05,
|
1061 |
+
"post-par-pipe-begin": 3.000000106112566e-06,
|
1062 |
+
"post-par-pipe-end": 0.0,
|
1063 |
+
"post-partition-simplification": 0.0006179999909363687,
|
1064 |
+
"replace-minimum-constant": 9.999999747378752e-06,
|
1065 |
+
"reshape-mover": 3.000000106112566e-06,
|
1066 |
+
"simplify-concat": 4.999999873689376e-05,
|
1067 |
+
"simplify-while-loops": 3.000000106112566e-06,
|
1068 |
+
"transform-variadic-reduce": 6.0999998822808266e-05,
|
1069 |
+
"tuple-simplifier": 4.999999873689376e-06,
|
1070 |
+
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
|
1071 |
+
"unroll-while-loop": 0.0
|
1072 |
+
},
|
1073 |
+
"hilo": {
|
1074 |
+
"ArithmeticIntensity": 537.9506225585938,
|
1075 |
+
"HloMacCount": 90261422080.0,
|
1076 |
+
"Traffic": 335575104.0
|
1077 |
+
}
|
1078 |
+
}
|
1079 |
+
}
|
context_encoding_model/_tp0_bk3/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9330f87daaab052682ce2a183d9908828cede116976ffca894ecbd7ea31a028c
|
3 |
+
size 1731584
|
context_encoding_model/_tp0_bk3/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:592de62bcecb744077fb8bd9e5363e57ea2543bf82083638f8ad2039a512933c
|
3 |
+
size 1561029
|
context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba393ab52af446df672f41632e2c112cb7e051a45d99da03d33fb1f12262cca6
|
3 |
+
size 1626903
|
context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9330f87daaab052682ce2a183d9908828cede116976ffca894ecbd7ea31a028c
|
3 |
+
size 1731584
|
context_encoding_model/_tp0_bk3/neuron_config.json
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": false,
|
3 |
+
"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
|
4 |
+
"add_cross_attention": false,
|
5 |
+
"architectures": [
|
6 |
+
"MistralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"attribute_map": {},
|
10 |
+
"bad_words_ids": null,
|
11 |
+
"begin_suppress_tokens": null,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"chunk_size_feed_forward": 0,
|
14 |
+
"cross_attention_hidden_size": null,
|
15 |
+
"decoder_start_token_id": null,
|
16 |
+
"diversity_penalty": 0.0,
|
17 |
+
"do_sample": false,
|
18 |
+
"early_stopping": false,
|
19 |
+
"encoder_no_repeat_ngram_size": 0,
|
20 |
+
"eos_token_id": 2,
|
21 |
+
"exponential_decay_length_penalty": null,
|
22 |
+
"finetuning_task": null,
|
23 |
+
"forced_bos_token_id": null,
|
24 |
+
"forced_eos_token_id": null,
|
25 |
+
"fused_spec_config": null,
|
26 |
+
"head_dim": 128,
|
27 |
+
"hidden_act": "silu",
|
28 |
+
"hidden_size": 4096,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1"
|
32 |
+
},
|
33 |
+
"initializer_range": 0.02,
|
34 |
+
"intermediate_size": 14336,
|
35 |
+
"is_decoder": false,
|
36 |
+
"is_encoder_decoder": false,
|
37 |
+
"label2id": {
|
38 |
+
"LABEL_0": 0,
|
39 |
+
"LABEL_1": 1
|
40 |
+
},
|
41 |
+
"length_penalty": 1.0,
|
42 |
+
"max_length": 20,
|
43 |
+
"max_position_embeddings": 32768,
|
44 |
+
"metadata": null,
|
45 |
+
"min_length": 0,
|
46 |
+
"model_type": "mistral",
|
47 |
+
"neuron_config": {
|
48 |
+
"activation_quantization_type": null,
|
49 |
+
"allow_input_truncation": false,
|
50 |
+
"apply_seq_ids_mask": false,
|
51 |
+
"async_mode": false,
|
52 |
+
"attention_dp_degree": 1,
|
53 |
+
"attention_dtype": null,
|
54 |
+
"attn_block_cte_nki_kernel_enabled": false,
|
55 |
+
"attn_block_tkg_nki_kernel_cache_update": false,
|
56 |
+
"attn_block_tkg_nki_kernel_enabled": false,
|
57 |
+
"attn_cls": "NeuronLlamaAttention",
|
58 |
+
"attn_kernel_enabled": null,
|
59 |
+
"attn_tkg_builtin_kernel_enabled": false,
|
60 |
+
"attn_tkg_nki_kernel_enabled": false,
|
61 |
+
"batch_size": 1,
|
62 |
+
"bucket_n_active_tokens": true,
|
63 |
+
"buckets": [
|
64 |
+
1024
|
65 |
+
],
|
66 |
+
"cast_type": "config",
|
67 |
+
"cc_pipeline_tiling_factor": 2,
|
68 |
+
"chunked_prefill_config": null,
|
69 |
+
"context_encoding_buckets": [
|
70 |
+
1024
|
71 |
+
],
|
72 |
+
"cp_degree": 1,
|
73 |
+
"ctx_batch_size": 1,
|
74 |
+
"disable_kv_cache_tiling": false,
|
75 |
+
"draft_model_modules_to_not_convert": null,
|
76 |
+
"enable_bucketing": true,
|
77 |
+
"enable_eagle_draft_input_norm": false,
|
78 |
+
"enable_eagle_speculation": false,
|
79 |
+
"enable_fused_speculation": false,
|
80 |
+
"enable_long_context_mode": false,
|
81 |
+
"enable_output_completion_notifications": false,
|
82 |
+
"enable_spill_reload_dge": false,
|
83 |
+
"enable_token_tree": false,
|
84 |
+
"ep_degree": 1,
|
85 |
+
"expert_mlp_nki_kernel_enabled": null,
|
86 |
+
"flash_decoding_enabled": false,
|
87 |
+
"fused_qkv": false,
|
88 |
+
"fused_rmsnorm_skip_gamma": false,
|
89 |
+
"is_block_kv_layout": null,
|
90 |
+
"is_chunked_prefill": false,
|
91 |
+
"is_continuous_batching": true,
|
92 |
+
"is_eagle_draft": false,
|
93 |
+
"is_medusa": false,
|
94 |
+
"is_prefill_stage": true,
|
95 |
+
"is_prefix_caching": false,
|
96 |
+
"k_cache_transposed": false,
|
97 |
+
"kv_cache_batch_size": 4,
|
98 |
+
"kv_cache_padding_size": 0,
|
99 |
+
"kv_cache_quant": false,
|
100 |
+
"kv_cache_tiling": false,
|
101 |
+
"layer_boundary_markers": false,
|
102 |
+
"lm_head_pad": false,
|
103 |
+
"lm_head_pad_alignment_size": 1,
|
104 |
+
"local_ranks_size": 2,
|
105 |
+
"logical_nc_config": 1,
|
106 |
+
"lora_config": null,
|
107 |
+
"max_batch_size": 4,
|
108 |
+
"max_context_length": 2048,
|
109 |
+
"max_length": 2048,
|
110 |
+
"max_new_tokens": null,
|
111 |
+
"medusa_speculation_length": 0,
|
112 |
+
"medusa_tree": null,
|
113 |
+
"mlp_kernel_enabled": false,
|
114 |
+
"mlp_kernel_fuse_residual_add": false,
|
115 |
+
"modules_to_not_convert": null,
|
116 |
+
"moe_fused_nki_kernel_enabled": null,
|
117 |
+
"n_active_tokens": 2048,
|
118 |
+
"n_positions": 2048,
|
119 |
+
"num_medusa_heads": 0,
|
120 |
+
"on_cpu": false,
|
121 |
+
"on_device_sampling_config": {
|
122 |
+
"deterministic": false,
|
123 |
+
"do_sample": false,
|
124 |
+
"dynamic": true,
|
125 |
+
"global_topk": 256,
|
126 |
+
"on_device_sampling_config": true,
|
127 |
+
"temperature": 1.0,
|
128 |
+
"top_k": 1,
|
129 |
+
"top_k_kernel_enabled": false,
|
130 |
+
"top_p": 1.0
|
131 |
+
},
|
132 |
+
"output_logits": false,
|
133 |
+
"overrides_torch_dtype": true,
|
134 |
+
"pa_block_size": 2048,
|
135 |
+
"pa_num_blocks": 4,
|
136 |
+
"padding_side": "right",
|
137 |
+
"pp_degree": 1,
|
138 |
+
"prefix_buckets": null,
|
139 |
+
"qk_layernorm": false,
|
140 |
+
"qkv_kernel_enabled": false,
|
141 |
+
"qkv_kernel_fuse_residual_add": false,
|
142 |
+
"qkv_kernel_nbsd_layout": false,
|
143 |
+
"quantization_dtype": "int8",
|
144 |
+
"quantization_type": "per_tensor_symmetric",
|
145 |
+
"quantize_clamp_bound": Infinity,
|
146 |
+
"quantized": false,
|
147 |
+
"quantized_checkpoints_path": null,
|
148 |
+
"quantized_mlp_kernel_enabled": false,
|
149 |
+
"rmsnorm_quantize_kernel_enabled": false,
|
150 |
+
"router_topk_nki_kernel_enabled": null,
|
151 |
+
"rpl_reduce_dtype": null,
|
152 |
+
"save_sharded_checkpoint": true,
|
153 |
+
"scratchpad_page_size": null,
|
154 |
+
"seq_len": 2048,
|
155 |
+
"seq_len_threshold_for_cc_tiling": 16384,
|
156 |
+
"sequence_parallel_enabled": false,
|
157 |
+
"shared_mlp_nki_kernel_enabled": null,
|
158 |
+
"skip_sharding": false,
|
159 |
+
"skip_warmup": false,
|
160 |
+
"spec_batch_size": 4,
|
161 |
+
"speculation_length": 0,
|
162 |
+
"start_rank_id": 0,
|
163 |
+
"target": null,
|
164 |
+
"tile_cc": false,
|
165 |
+
"tkg_batch_size": 4,
|
166 |
+
"token_generation_buckets": null,
|
167 |
+
"token_tree_config": null,
|
168 |
+
"torch_dtype": "bfloat16",
|
169 |
+
"tp_degree": 2,
|
170 |
+
"vocab_parallel": false,
|
171 |
+
"weight_gather_seq_len_threshold": 32768,
|
172 |
+
"weights_to_skip_layout_optimization": [],
|
173 |
+
"world_size": 2
|
174 |
+
},
|
175 |
+
"no_repeat_ngram_size": 0,
|
176 |
+
"num_attention_heads": 32,
|
177 |
+
"num_beam_groups": 1,
|
178 |
+
"num_beams": 1,
|
179 |
+
"num_cores_per_group": 1,
|
180 |
+
"num_hidden_layers": 32,
|
181 |
+
"num_key_value_heads": 8,
|
182 |
+
"num_return_sequences": 1,
|
183 |
+
"output_attentions": false,
|
184 |
+
"output_hidden_states": false,
|
185 |
+
"output_scores": false,
|
186 |
+
"pad_token_id": 0,
|
187 |
+
"prefix": null,
|
188 |
+
"problem_type": null,
|
189 |
+
"pruned_heads": {},
|
190 |
+
"remove_invalid_values": false,
|
191 |
+
"repetition_penalty": 1.0,
|
192 |
+
"return_dict": true,
|
193 |
+
"return_dict_in_generate": false,
|
194 |
+
"rms_norm_eps": 1e-05,
|
195 |
+
"rope_theta": 1000000.0,
|
196 |
+
"sep_token_id": null,
|
197 |
+
"sliding_window": null,
|
198 |
+
"suppress_tokens": null,
|
199 |
+
"task_specific_params": null,
|
200 |
+
"temperature": 1.0,
|
201 |
+
"tf_legacy_loss": false,
|
202 |
+
"tie_encoder_decoder": false,
|
203 |
+
"tie_word_embeddings": false,
|
204 |
+
"tokenizer_class": null,
|
205 |
+
"top_k": 50,
|
206 |
+
"top_p": 1.0,
|
207 |
+
"torchscript": false,
|
208 |
+
"transformers_version": "4.42.0.dev0",
|
209 |
+
"typical_p": 1.0,
|
210 |
+
"use_bfloat16": false,
|
211 |
+
"use_cache": true,
|
212 |
+
"vocab_size": 32768
|
213 |
+
}
|
context_encoding_model/_tp0_bk4/command.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
neuronx-cc compile --framework=XLA model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb --output model.MODULE_d342327da795afc2aa68+5e8b788a.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' --lnc=1 -O1 '--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true' --logfile=log-neuron-cc.txt --verbose=35
|
context_encoding_model/_tp0_bk4/compile_flags.MODULE_d342327da795afc2aa68+5e8b788a.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/models/mistral-7b-v0.3-instruct-neuronx/context_encoding_model/_tp0_bk4/log-neuron-cc.txt"]
|
context_encoding_model/_tp0_bk4/global_metric_store.json
ADDED
@@ -0,0 +1,1079 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Average": {
|
3 |
+
"tensorizer": {
|
4 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.88423156738281,
|
5 |
+
"StaticProfiler::AveragePartitionUtilization": 99.71043395996094,
|
6 |
+
"StaticProfiler::AveragePeUtilization": 99.53581237792969,
|
7 |
+
"StaticProfiler::LocalizationEfficiency": 41.61907196044922,
|
8 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 45.55835723876953,
|
9 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
10 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"Count": {
|
14 |
+
"tensorizer": {
|
15 |
+
"StaticProfiler::AverageFractalPeUtilization": 1.0,
|
16 |
+
"StaticProfiler::AveragePartitionUtilization": 1.0,
|
17 |
+
"StaticProfiler::AveragePeUtilization": 1.0,
|
18 |
+
"StaticProfiler::LocalizationEfficiency": 1.0,
|
19 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
|
20 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
|
21 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"Sum": {
|
25 |
+
"compiletime": {
|
26 |
+
"AGOrderingAnalysisPass": 0.06499266624450684,
|
27 |
+
"AffinePredicateResolution": 0.00141143798828125,
|
28 |
+
"AliasDependencyElimination": 0.00017595291137695313,
|
29 |
+
"AliasDependencyInduction": 0.006516218185424805,
|
30 |
+
"AliasDependencyReset": 0.02024674415588379,
|
31 |
+
"BFComputeCutting": 0.0023620128631591797,
|
32 |
+
"BirCodeGenLoop": 0.13731598854064941,
|
33 |
+
"CCOpFusion": 0.02620387077331543,
|
34 |
+
"CanonicalizeConv": 6.0999998822808266e-05,
|
35 |
+
"CanonicalizeDAGForPGTiling": 0.0074574947357177734,
|
36 |
+
"CanonicalizeForTensorizer": 4.999999509891495e-05,
|
37 |
+
"CanonicalizeIR": 0.0019347667694091797,
|
38 |
+
"Canonicalizer": 0.0009759999811649323,
|
39 |
+
"CoalesceCCOp": 0.005659818649291992,
|
40 |
+
"CommuteConcat": 0.0009889602661132813,
|
41 |
+
"DMALocalityOpt": 0.0024099349975585938,
|
42 |
+
"DMAProfiler": 0.008657455444335938,
|
43 |
+
"DMATilingProfiler": 0.04570889472961426,
|
44 |
+
"DataLocalityOpt": 0.1127479076385498,
|
45 |
+
"DataStreaming": 0.007959365844726563,
|
46 |
+
"DeConcat": 0.0007421970367431641,
|
47 |
+
"DeadCodeElimination": 0.002073049545288086,
|
48 |
+
"DeadStoreElimination": 0.006093263626098633,
|
49 |
+
"DelinearIndices": 0.010124444961547852,
|
50 |
+
"Delinearization": 0.005106449127197266,
|
51 |
+
"DoNothing": 0.0003044605255126953,
|
52 |
+
"DramToDramTranspose": 0.03771638870239258,
|
53 |
+
"DumpGraphAndMetadata": 0.05296611785888672,
|
54 |
+
"EliminateDivs": 0.0021944046020507813,
|
55 |
+
"ExpandBatchNorm": 0.0015587806701660156,
|
56 |
+
"ExpandISAMacro": 0.005168437957763672,
|
57 |
+
"FactorizeBlkDims": 0.011832475662231445,
|
58 |
+
"FactorizeThreadAxesInFreeDims": 0.0014889240264892578,
|
59 |
+
"FlattenMacroLoop": 0.0025510787963867188,
|
60 |
+
"GenericAccessSimplifier": 0.0009717941284179688,
|
61 |
+
"HoistCompute": 1.3999999282532372e-05,
|
62 |
+
"IdentifyCrossPassTensors": 4.099999932805076e-05,
|
63 |
+
"InferInitValue": 0.030786514282226563,
|
64 |
+
"InferIntrinsicOnCC": 0.012189865112304688,
|
65 |
+
"InferNeuronTensor": 0.0819096565246582,
|
66 |
+
"InferNonlocalTensors": 0.025629520416259766,
|
67 |
+
"InferPSumTensor": 0.08477997779846191,
|
68 |
+
"InlineNativeKernels": 0.003083944320678711,
|
69 |
+
"InsertIOTransposes": 0.02764296531677246,
|
70 |
+
"InsertLocalTransposes": 0.0040624141693115234,
|
71 |
+
"InsertOffloadedTransposes": 0.005682229995727539,
|
72 |
+
"LICM": 0.003050565719604492,
|
73 |
+
"LateLegalizeInst": 0.02321314811706543,
|
74 |
+
"LateLegalizePostSplit": 0.004519462585449219,
|
75 |
+
"LateLowerReshapeOp": 0.0023851394653320313,
|
76 |
+
"LateLowerTensorOp": 0.0016567707061767578,
|
77 |
+
"LateNeuronInstComb": 0.011125564575195313,
|
78 |
+
"LayoutPreprocessing": 0.06753706932067871,
|
79 |
+
"LayoutPreprocessingAndAnalysis": 0.16236424446105957,
|
80 |
+
"LayoutRequirementAnalysis": 0.005420684814453125,
|
81 |
+
"LegalizeCCOpLayout": 0.0023717880249023438,
|
82 |
+
"LegalizeOpLevelAlias": 0.0012898445129394531,
|
83 |
+
"LegalizePartitionReduce": 0.0011932849884033203,
|
84 |
+
"LegalizeSundaAccess": 0.026709556579589844,
|
85 |
+
"LegalizeSundaMacro": 0.012512683868408203,
|
86 |
+
"LegalizeType": 0.04736900329589844,
|
87 |
+
"LocalLayoutOpt": 0.0263979434967041,
|
88 |
+
"LoopFusion": 0.005193948745727539,
|
89 |
+
"LoopSplitting": 0.0005512237548828125,
|
90 |
+
"LowerBroadcast": 0.04221224784851074,
|
91 |
+
"LowerCCOpBlockAxis": 0.008313655853271484,
|
92 |
+
"LowerComplexBroadcast": 0.0025756359100341797,
|
93 |
+
"LowerIntrinsics": 0.11752676963806152,
|
94 |
+
"LowerTensorOp": 0.010608196258544922,
|
95 |
+
"LowerTranspose": 0.08257818222045898,
|
96 |
+
"MacroGeneration": 0.07271862030029297,
|
97 |
+
"MaskPropagation": 0.005186557769775391,
|
98 |
+
"MemcastMotion": 2.2000000171829015e-05,
|
99 |
+
"MemcpyElimination": 0.026259899139404297,
|
100 |
+
"MutateDataType": 0.0013203620910644531,
|
101 |
+
"NeuronAliasDependencyInduction": 0.0002338886260986328,
|
102 |
+
"NeuronAliasDependencyReset": 0.029464006423950195,
|
103 |
+
"NeuronInstComb": 0.004740476608276367,
|
104 |
+
"NeuronLICM": 0.01508331298828125,
|
105 |
+
"NeuronLoopFusion": 0.00891876220703125,
|
106 |
+
"NeuronLoopInterchange": 0.0014586448669433594,
|
107 |
+
"NeuronSimplifier": 0.009086847305297852,
|
108 |
+
"NeuronSimplifyPredicates": 0.006235837936401367,
|
109 |
+
"NeuronValueNumbering": 0.0030777454376220703,
|
110 |
+
"OptimizeAliasedCopyChain": 0.0006422996520996094,
|
111 |
+
"OptimizeNKIKernels": 0.5174376964569092,
|
112 |
+
"PAGLayoutOpt": 0.12734031677246094,
|
113 |
+
"PComputeCutting": 0.005000591278076172,
|
114 |
+
"PGLayoutTilingPipeline": 0.8229436874389648,
|
115 |
+
"PGTiling": 0.26772499084472656,
|
116 |
+
"PadElimination": 0.0005135536193847656,
|
117 |
+
"ParAxesAnnotation": 0.07412934303283691,
|
118 |
+
"PartialLoopFusion": 0.013575553894042969,
|
119 |
+
"PartialSimdFusion": 0.011231422424316406,
|
120 |
+
"PenguinizeFunctions": 4.099999932805076e-05,
|
121 |
+
"PerfectLoopNest": 0.0019729137420654297,
|
122 |
+
"PruneFunctions": 2.5000001187436283e-05,
|
123 |
+
"RecognizeOpIdiom": 0.0038080215454101563,
|
124 |
+
"Recompute": 0.00034308433532714844,
|
125 |
+
"RelaxPredicates": 0.004430532455444336,
|
126 |
+
"Rematerialization": 0.002201557159423828,
|
127 |
+
"RemoveOptimizationBarriers": 7.999999797903001e-05,
|
128 |
+
"ReshapeWeights": 0.0009114742279052734,
|
129 |
+
"ResolveAccessConflict": 0.027348041534423828,
|
130 |
+
"ResolveComplicatePredicates": 0.0011477470397949219,
|
131 |
+
"RewriteReplicationMatmul": 0.0025103092193603516,
|
132 |
+
"RewriteWeights": 0.0029447078704833984,
|
133 |
+
"SFKVectorizer": 0.19977569580078125,
|
134 |
+
"ScatterMotion": 3.400000059627928e-05,
|
135 |
+
"SimpleAllReduceTiling": 0.0034945011138916016,
|
136 |
+
"Simplifier": 0.003106832504272461,
|
137 |
+
"SimplifyMacroPredicates": 0.03599357604980469,
|
138 |
+
"SimplifyNeuronTensor": 0.18126153945922852,
|
139 |
+
"SimplifySlice": 0.0016787052154541016,
|
140 |
+
"SimplifyTensor": 0.04330563545227051,
|
141 |
+
"SpillPSum": 0.06513023376464844,
|
142 |
+
"SplitAPUnionSets": 0.012967586517333984,
|
143 |
+
"SplitAccGrp": 0.0015358924865722656,
|
144 |
+
"StaticProfiler": 0.00551915168762207,
|
145 |
+
"StaticTransposeLocalTensor": 0.004834890365600586,
|
146 |
+
"SundaISel": 0.0945746898651123,
|
147 |
+
"TCTransform": 0.0009295940399169922,
|
148 |
+
"TensorInitialization": 0.006634950637817383,
|
149 |
+
"TensorOpSimplifier": 0.005204439163208008,
|
150 |
+
"TensorOpTransform": 0.02082967758178711,
|
151 |
+
"TensorizerLegalizationPass": 5.0000002374872565e-05,
|
152 |
+
"TileCCOps": 0.006725311279296875,
|
153 |
+
"TilingProfiler": 0.016322612762451172,
|
154 |
+
"TransformConvOp": 0.0029544830322265625,
|
155 |
+
"TritiumFusion": 0.09467315673828125,
|
156 |
+
"ValueNumbering": 0.0020852088928222656,
|
157 |
+
"VectorizeDMA": 0.0017535686492919922,
|
158 |
+
"VectorizeMatMult": 0.008865118026733398,
|
159 |
+
"VerifySupportedOps": 3.300000025774352e-05,
|
160 |
+
"WeightCoalescing": 0.003345489501953125,
|
161 |
+
"ZeroSizeTensorElimination": 0.00018644332885742188,
|
162 |
+
"algsimp": 0.0030140001326799393,
|
163 |
+
"batchnorm_expander": 4.400000034365803e-05,
|
164 |
+
"boundary-marker-removal": 1.1000000085914508e-05,
|
165 |
+
"call-inliner": 0.0004670000053010881,
|
166 |
+
"canonicalize-boundary-marker": 1.4999999621068127e-05,
|
167 |
+
"collective-stream-id-checker": 0.00010199999815085903,
|
168 |
+
"comparison-expander": 0.0005569999921135604,
|
169 |
+
"computation-deduplicator": 6.500000017695129e-05,
|
170 |
+
"conditional-to-select": 1.700000029813964e-05,
|
171 |
+
"config-lowering": 0.0001630000042496249,
|
172 |
+
"constant-statistics": 0.0005039999959990382,
|
173 |
+
"constant_folding": 0.0002969999914057553,
|
174 |
+
"cse": 6.0999998822808266e-05,
|
175 |
+
"dce": 8.600000001024455e-05,
|
176 |
+
"dot_decomposer": 0.001433999976143241,
|
177 |
+
"dynamic-slice-transpose": 1.2999999853491317e-05,
|
178 |
+
"eliminate-redundant-compare": 0.0002640000020619482,
|
179 |
+
"emit-offloaded-dropout": 6.500000017695129e-05,
|
180 |
+
"flatten-call-graph": 0.0007960000075399876,
|
181 |
+
"fuse-send-recv": 7.000000186963007e-05,
|
182 |
+
"hilo::LegalizeAlias": 1.4999999621068127e-05,
|
183 |
+
"hilo::NeuronInstCombine": 0.00012399999832268804,
|
184 |
+
"hilo::NeuronOpFusion": 6.399999983841553e-05,
|
185 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 4.5000000682193786e-05,
|
186 |
+
"hilo::ScheduleFusion": 1.300000076298602e-05,
|
187 |
+
"hilo::SixtyFourHack": 6.800000119255856e-05,
|
188 |
+
"hilo::VerifyAliasing": 6.000000212225132e-06,
|
189 |
+
"hlo-mac-count": 0.0012410000199452043,
|
190 |
+
"hlo-verifier": 0.010365999303758144,
|
191 |
+
"instruction-histogram": 0.0010479999473318458,
|
192 |
+
"io-con-pipe-begin": 7.999999979801942e-06,
|
193 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
194 |
+
"io-layout-normalization": 0.0016609999584034085,
|
195 |
+
"io-statistics": 0.0001049999991664663,
|
196 |
+
"legalize-ccops": 4.999999873689376e-06,
|
197 |
+
"legalize-compare": 1.1000000085914508e-05,
|
198 |
+
"lower-argminmax-custom-call": 9.999999747378752e-06,
|
199 |
+
"map-inline": 0.0009129999671131372,
|
200 |
+
"metadata-naming": 5.400000372901559e-05,
|
201 |
+
"mlir::detail::OpToOpPassAdaptor": 8.199999865610152e-05,
|
202 |
+
"mlir::hlo::MhloToPyPenguin": 0.07495799660682678,
|
203 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00035899996873922646,
|
204 |
+
"mlir::mhlo::LowerComplexPass": 0.0005389999714680016,
|
205 |
+
"native-to-custom-softmax": 0.000842000066768378,
|
206 |
+
"native-to-custom-softmax-dx": 0.0008819999638944864,
|
207 |
+
"operand_upcaster": 6.800000119255856e-05,
|
208 |
+
"opt-barrier-removal": 0.0005799999926239252,
|
209 |
+
"post-par-pipe-begin": 1.700000029813964e-05,
|
210 |
+
"post-par-pipe-end": 0.0,
|
211 |
+
"post-partition-simplification": 0.0018259999342262745,
|
212 |
+
"pre-par-pipe-begin": 9.999999974752427e-07,
|
213 |
+
"pre-par-pipe-end": 0.0,
|
214 |
+
"pre-partition-simplification": 0.2598330080509186,
|
215 |
+
"replace-minimum-constant": 0.0004039999912492931,
|
216 |
+
"reshape-mover": 0.00012399999832268804,
|
217 |
+
"simplify-concat": 0.0001630000042496249,
|
218 |
+
"simplify-while-loops": 0.00010000000474974513,
|
219 |
+
"transform-variadic-reduce": 0.0001939999929163605,
|
220 |
+
"tuple-simplifier": 0.0003140000335406512,
|
221 |
+
"unpack-nested-aws-ntwsr": 0.0003929999948013574,
|
222 |
+
"unroll-while-loop": 1.8000000636675395e-05,
|
223 |
+
"zero_sized_hlo_elimination": 0.0009759999811649323
|
224 |
+
},
|
225 |
+
"hilo": {
|
226 |
+
"ConstantSize": 4203477.0,
|
227 |
+
"HloInputCount": 359.0,
|
228 |
+
"HloMacCount": 481103446016.0,
|
229 |
+
"HloOutputCount": 65.0,
|
230 |
+
"IfmapSize": 7785177088.0,
|
231 |
+
"OfmapSize": 536870912.0,
|
232 |
+
"OutputsReadFromCount": 0.0,
|
233 |
+
"PassthroughTensorsCount": 0.0,
|
234 |
+
"RedundantOutputCount": 0.0,
|
235 |
+
"Traffic": 975382912.0
|
236 |
+
},
|
237 |
+
"tensorizer": {
|
238 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 28921.0,
|
239 |
+
"StaticProfiler::AifUb": 1080.6693115234375,
|
240 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 449.7645263671875,
|
241 |
+
"StaticProfiler::AverageDmaLength": 1323.6162109375,
|
242 |
+
"StaticProfiler::DDRTransferBytes": 826525760.0,
|
243 |
+
"StaticProfiler::InternalTransferBytes": 96576528.0,
|
244 |
+
"StaticProfiler::LoadExpanded": 619540.0,
|
245 |
+
"StaticProfiler::StoreExpanded": 12842.0,
|
246 |
+
"StaticProfiler::TotalDMAExpanded": 632382.0,
|
247 |
+
"StaticProfiler::TotalDynamicInstancesCount": 34834.0,
|
248 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 34738.0,
|
249 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
250 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
251 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
252 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
253 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
|
254 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
|
255 |
+
"TilingProfiler::NumPfTransposes": 4.0,
|
256 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
257 |
+
"TilingProfiler::NumPfTransposesForLocal": 1.0,
|
258 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
|
259 |
+
"TilingProfiler::PfTransposeInstructions": 1537.0,
|
260 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
261 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
|
262 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1536.0,
|
263 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 10.0,
|
264 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 626.0,
|
265 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
266 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
267 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
268 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
269 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
270 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
271 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
272 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
273 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
274 |
+
}
|
275 |
+
},
|
276 |
+
"all": {
|
277 |
+
"compiletime": {
|
278 |
+
"algsimp": 0.00279300007969141,
|
279 |
+
"call-inliner": 0.00043799998820759356,
|
280 |
+
"collective-stream-id-checker": 8.600000001024455e-05,
|
281 |
+
"comparison-expander": 0.0005419999943114817,
|
282 |
+
"constant-statistics": 0.0005039999959990382,
|
283 |
+
"constant_folding": 0.0002699999895412475,
|
284 |
+
"dce": 8.099999831756577e-05,
|
285 |
+
"dot_decomposer": 0.001433999976143241,
|
286 |
+
"eliminate-redundant-compare": 0.00025400001322850585,
|
287 |
+
"flatten-call-graph": 0.0007660000119358301,
|
288 |
+
"hlo-mac-count": 0.0009599999757483602,
|
289 |
+
"hlo-verifier": 0.009782999753952026,
|
290 |
+
"instruction-histogram": 0.0010479999473318458,
|
291 |
+
"io-con-pipe-begin": 7.999999979801942e-06,
|
292 |
+
"io-con-pipe-end": 9.999999974752427e-07,
|
293 |
+
"io-layout-normalization": 0.0016609999584034085,
|
294 |
+
"io-statistics": 0.0001049999991664663,
|
295 |
+
"map-inline": 0.0008759999764151871,
|
296 |
+
"native-to-custom-softmax": 0.0008140000281855464,
|
297 |
+
"native-to-custom-softmax-dx": 0.0007149999728426337,
|
298 |
+
"opt-barrier-removal": 0.0005799999926239252,
|
299 |
+
"pre-par-pipe-begin": 9.999999974752427e-07,
|
300 |
+
"pre-par-pipe-end": 0.0,
|
301 |
+
"pre-partition-simplification": 0.2598330080509186,
|
302 |
+
"replace-minimum-constant": 0.0003769999893847853,
|
303 |
+
"reshape-mover": 0.00011300000187475234,
|
304 |
+
"simplify-while-loops": 9.300000237999484e-05,
|
305 |
+
"tuple-simplifier": 0.0003000000142492354,
|
306 |
+
"unpack-nested-aws-ntwsr": 0.0003809999907389283,
|
307 |
+
"unroll-while-loop": 1.8000000636675395e-05,
|
308 |
+
"zero_sized_hlo_elimination": 0.0009759999811649323
|
309 |
+
}
|
310 |
+
},
|
311 |
+
"cumsum": {
|
312 |
+
"compiletime": {
|
313 |
+
"CoalesceCCOp": 0.00027561187744140625,
|
314 |
+
"DMALocalityOpt": 0.0002129077911376953,
|
315 |
+
"DMAProfiler": 0.0009992122650146484,
|
316 |
+
"DataStreaming": 0.0003039836883544922,
|
317 |
+
"DoNothing": 0.0001742839813232422,
|
318 |
+
"ExpandISAMacro": 0.0005218982696533203,
|
319 |
+
"FactorizeBlkDims": 0.0004630088806152344,
|
320 |
+
"InferPSumTensor": 0.0004932880401611328,
|
321 |
+
"LateLegalizeInst": 0.0005190372467041016,
|
322 |
+
"LateNeuronInstComb": 0.0005123615264892578,
|
323 |
+
"LegalizeSundaAccess": 0.0015988349914550781,
|
324 |
+
"LegalizeType": 0.00028014183044433594,
|
325 |
+
"LowerBroadcast": 0.00025653839111328125,
|
326 |
+
"LowerIntrinsics": 0.0002598762512207031,
|
327 |
+
"LowerTranspose": 0.00026535987854003906,
|
328 |
+
"NeuronInstComb": 0.0005023479461669922,
|
329 |
+
"NeuronLICM": 0.00043654441833496094,
|
330 |
+
"NeuronSimplifyPredicates": 0.0028448104858398438,
|
331 |
+
"NeuronValueNumbering": 0.0004410743713378906,
|
332 |
+
"SFKVectorizer": 0.0033159255981445313,
|
333 |
+
"SimpleAllReduceTiling": 0.00028634071350097656,
|
334 |
+
"SimplifyNeuronTensor": 0.0004749298095703125,
|
335 |
+
"SpillPSum": 0.0005846023559570313,
|
336 |
+
"WeightCoalescing": 0.00024771690368652344
|
337 |
+
}
|
338 |
+
},
|
339 |
+
"sg00": {
|
340 |
+
"compiletime": {
|
341 |
+
"CanonicalizeConv": 4.3000000005122274e-05,
|
342 |
+
"CanonicalizeForTensorizer": 1.4999999621068127e-05,
|
343 |
+
"Canonicalizer": 0.00034500000765547156,
|
344 |
+
"HoistCompute": 1.9999999949504854e-06,
|
345 |
+
"IdentifyCrossPassTensors": 1.2999999853491317e-05,
|
346 |
+
"MemcastMotion": 1.2999999853491317e-05,
|
347 |
+
"PenguinizeFunctions": 1.2999999853491317e-05,
|
348 |
+
"PruneFunctions": 1.2000000424450263e-05,
|
349 |
+
"RemoveOptimizationBarriers": 1.2999999853491317e-05,
|
350 |
+
"ScatterMotion": 2.9000000722589903e-05,
|
351 |
+
"TensorizerLegalizationPass": 2.099999983329326e-05,
|
352 |
+
"VerifySupportedOps": 9.999999747378752e-06,
|
353 |
+
"algsimp": 7.699999696342275e-05,
|
354 |
+
"batchnorm_expander": 1.4000000192027073e-05,
|
355 |
+
"boundary-marker-removal": 3.000000106112566e-06,
|
356 |
+
"call-inliner": 9.000000318337698e-06,
|
357 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
358 |
+
"collective-stream-id-checker": 3.999999989900971e-06,
|
359 |
+
"comparison-expander": 3.999999989900971e-06,
|
360 |
+
"computation-deduplicator": 1.8000000636675395e-05,
|
361 |
+
"conditional-to-select": 4.999999873689376e-06,
|
362 |
+
"config-lowering": 5.6000000768108293e-05,
|
363 |
+
"constant_folding": 7.999999979801942e-06,
|
364 |
+
"cse": 2.499999936844688e-05,
|
365 |
+
"dce": 1.9999999949504854e-06,
|
366 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
367 |
+
"eliminate-redundant-compare": 3.999999989900971e-06,
|
368 |
+
"emit-offloaded-dropout": 2.099999983329326e-05,
|
369 |
+
"flatten-call-graph": 7.999999979801942e-06,
|
370 |
+
"fuse-send-recv": 2.300000051036477e-05,
|
371 |
+
"hilo::LegalizeAlias": 4.999999873689376e-06,
|
372 |
+
"hilo::NeuronInstCombine": 2.9999999242136255e-05,
|
373 |
+
"hilo::NeuronOpFusion": 1.8000000636675395e-05,
|
374 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 4.999999873689376e-06,
|
375 |
+
"hilo::ScheduleFusion": 9.999999974752427e-07,
|
376 |
+
"hilo::SixtyFourHack": 1.1000000085914508e-05,
|
377 |
+
"hilo::VerifyAliasing": 1.9999999949504854e-06,
|
378 |
+
"hlo-mac-count": 3.7000001611886546e-05,
|
379 |
+
"hlo-verifier": 0.00017499999376013875,
|
380 |
+
"legalize-ccops": 1.9999999949504854e-06,
|
381 |
+
"legalize-compare": 3.999999989900971e-06,
|
382 |
+
"lower-argminmax-custom-call": 3.000000106112566e-06,
|
383 |
+
"map-inline": 1.1000000085914508e-05,
|
384 |
+
"metadata-naming": 1.700000029813964e-05,
|
385 |
+
"mlir::detail::OpToOpPassAdaptor": 3.300000025774352e-05,
|
386 |
+
"mlir::hlo::MhloToPyPenguin": 0.03136799857020378,
|
387 |
+
"mlir::mhlo::LowerComplexExtraPass": 9.899999713525176e-05,
|
388 |
+
"mlir::mhlo::LowerComplexPass": 0.00019999999494757503,
|
389 |
+
"native-to-custom-softmax": 7.999999979801942e-06,
|
390 |
+
"native-to-custom-softmax-dx": 9.300000237999484e-05,
|
391 |
+
"operand_upcaster": 2.700000004551839e-05,
|
392 |
+
"post-par-pipe-begin": 1.9999999949504854e-06,
|
393 |
+
"post-par-pipe-end": 0.0,
|
394 |
+
"post-partition-simplification": 0.0006479999865405262,
|
395 |
+
"replace-minimum-constant": 9.000000318337698e-06,
|
396 |
+
"reshape-mover": 3.999999989900971e-06,
|
397 |
+
"simplify-concat": 5.900000178371556e-05,
|
398 |
+
"simplify-while-loops": 1.9999999949504854e-06,
|
399 |
+
"transform-variadic-reduce": 9.000000318337698e-06,
|
400 |
+
"tuple-simplifier": 3.999999989900971e-06,
|
401 |
+
"unpack-nested-aws-ntwsr": 3.000000106112566e-06,
|
402 |
+
"unroll-while-loop": 0.0
|
403 |
+
},
|
404 |
+
"hilo": {
|
405 |
+
"ArithmeticIntensity": 439.27252197265625,
|
406 |
+
"ConstantSize": 4203477.0,
|
407 |
+
"HloInputCount": 359.0,
|
408 |
+
"HloMacCount": 60129542144.0,
|
409 |
+
"HloOutputCount": 65.0,
|
410 |
+
"IfmapSize": 7785177088.0,
|
411 |
+
"OfmapSize": 536870912.0,
|
412 |
+
"OutputsReadFromCount": 0.0,
|
413 |
+
"PassthroughTensorsCount": 0.0,
|
414 |
+
"RedundantOutputCount": 0.0,
|
415 |
+
"Traffic": 273768736.0
|
416 |
+
}
|
417 |
+
},
|
418 |
+
"sg0000": {
|
419 |
+
"compiletime": {
|
420 |
+
"AGOrderingAnalysisPass": 0.13596534729003906,
|
421 |
+
"AffinePredicateResolution": 0.0015311241149902344,
|
422 |
+
"AliasDependencyElimination": 0.0001938343048095703,
|
423 |
+
"AliasDependencyInduction": 0.007838010787963867,
|
424 |
+
"AliasDependencyReset": 0.15939617156982422,
|
425 |
+
"BFComputeCutting": 0.006036996841430664,
|
426 |
+
"BirCodeGenLoop": 0.38369321823120117,
|
427 |
+
"CCOpFusion": 0.15093040466308594,
|
428 |
+
"CanonicalizeDAGForPGTiling": 0.014190196990966797,
|
429 |
+
"CanonicalizeIR": 0.0019371509552001953,
|
430 |
+
"CoalesceCCOp": 0.0029022693634033203,
|
431 |
+
"CommuteConcat": 0.0010671615600585938,
|
432 |
+
"DMALocalityOpt": 0.0018265247344970703,
|
433 |
+
"DMAProfiler": 0.006582021713256836,
|
434 |
+
"DMATilingProfiler": 0.005391597747802734,
|
435 |
+
"DataLocalityOpt": 0.20601868629455566,
|
436 |
+
"DataStreaming": 0.00843048095703125,
|
437 |
+
"DeConcat": 0.0018315315246582031,
|
438 |
+
"DeadCodeElimination": 0.0020117759704589844,
|
439 |
+
"DeadStoreElimination": 0.027777433395385742,
|
440 |
+
"DelinearIndices": 0.029506444931030273,
|
441 |
+
"Delinearization": 0.003535747528076172,
|
442 |
+
"DoNothing": 0.0001571178436279297,
|
443 |
+
"DramToDramTranspose": 0.07804989814758301,
|
444 |
+
"DumpGraphAndMetadata": 0.04837989807128906,
|
445 |
+
"EliminateDivs": 0.0034132003784179688,
|
446 |
+
"ExpandBatchNorm": 0.0020427703857421875,
|
447 |
+
"ExpandISAMacro": 0.0035333633422851563,
|
448 |
+
"FactorizeBlkDims": 0.06211543083190918,
|
449 |
+
"FactorizeThreadAxesInFreeDims": 0.0018017292022705078,
|
450 |
+
"FlattenMacroLoop": 0.005364418029785156,
|
451 |
+
"GenericAccessSimplifier": 0.0018382072448730469,
|
452 |
+
"InferInitValue": 0.04181218147277832,
|
453 |
+
"InferIntrinsicOnCC": 0.05515456199645996,
|
454 |
+
"InferNeuronTensor": 0.08455061912536621,
|
455 |
+
"InferNonlocalTensors": 0.3793964385986328,
|
456 |
+
"InferPSumTensor": 0.06014227867126465,
|
457 |
+
"InlineNativeKernels": 0.0018780231475830078,
|
458 |
+
"InsertIOTransposes": 0.05663871765136719,
|
459 |
+
"InsertLocalTransposes": 0.013693094253540039,
|
460 |
+
"InsertOffloadedTransposes": 0.003034353256225586,
|
461 |
+
"LICM": 0.0034589767456054688,
|
462 |
+
"LateLegalizeInst": 0.01206350326538086,
|
463 |
+
"LateLegalizePostSplit": 0.004300355911254883,
|
464 |
+
"LateLowerReshapeOp": 0.001447439193725586,
|
465 |
+
"LateLowerTensorOp": 0.005361080169677734,
|
466 |
+
"LateNeuronInstComb": 0.028362512588500977,
|
467 |
+
"LayoutPreprocessing": 0.17102479934692383,
|
468 |
+
"LayoutPreprocessingAndAnalysis": 0.20053863525390625,
|
469 |
+
"LayoutRequirementAnalysis": 0.00810098648071289,
|
470 |
+
"LegalizeCCOpLayout": 0.002534151077270508,
|
471 |
+
"LegalizeOpLevelAlias": 0.0013082027435302734,
|
472 |
+
"LegalizePartitionReduce": 0.0018541812896728516,
|
473 |
+
"LegalizeSundaAccess": 0.06417489051818848,
|
474 |
+
"LegalizeSundaMacro": 0.011395931243896484,
|
475 |
+
"LegalizeType": 0.004536867141723633,
|
476 |
+
"LocalLayoutOpt": 0.019284486770629883,
|
477 |
+
"LoopFusion": 0.005501747131347656,
|
478 |
+
"LoopSplitting": 0.0007183551788330078,
|
479 |
+
"LowerBroadcast": 0.0020034313201904297,
|
480 |
+
"LowerCCOpBlockAxis": 0.006723642349243164,
|
481 |
+
"LowerComplexBroadcast": 0.0025110244750976563,
|
482 |
+
"LowerIntrinsics": 0.04395008087158203,
|
483 |
+
"LowerTensorOp": 0.01201629638671875,
|
484 |
+
"LowerTranspose": 0.015764951705932617,
|
485 |
+
"MacroGeneration": 0.1732039451599121,
|
486 |
+
"MaskPropagation": 0.006498575210571289,
|
487 |
+
"MemcpyElimination": 0.13526344299316406,
|
488 |
+
"MutateDataType": 0.0024404525756835938,
|
489 |
+
"NeuronAliasDependencyInduction": 0.00028133392333984375,
|
490 |
+
"NeuronAliasDependencyReset": 0.027801036834716797,
|
491 |
+
"NeuronInstComb": 0.014089107513427734,
|
492 |
+
"NeuronLICM": 0.011513233184814453,
|
493 |
+
"NeuronLoopFusion": 0.018094778060913086,
|
494 |
+
"NeuronLoopInterchange": 0.002248525619506836,
|
495 |
+
"NeuronSimplifier": 0.014221668243408203,
|
496 |
+
"NeuronSimplifyPredicates": 0.04183816909790039,
|
497 |
+
"NeuronValueNumbering": 0.010004520416259766,
|
498 |
+
"OptimizeAliasedCopyChain": 0.0007202625274658203,
|
499 |
+
"OptimizeNKIKernels": 0.0027985572814941406,
|
500 |
+
"PAGLayoutOpt": 0.6076157093048096,
|
501 |
+
"PComputeCutting": 0.01562190055847168,
|
502 |
+
"PGLayoutTilingPipeline": 1.8925251960754395,
|
503 |
+
"PGTiling": 0.4175417423248291,
|
504 |
+
"PadElimination": 0.0005469322204589844,
|
505 |
+
"ParAxesAnnotation": 0.5765500068664551,
|
506 |
+
"PartialLoopFusion": 0.06665897369384766,
|
507 |
+
"PartialSimdFusion": 0.06845211982727051,
|
508 |
+
"PerfectLoopNest": 0.002520322799682617,
|
509 |
+
"RecognizeOpIdiom": 0.0038416385650634766,
|
510 |
+
"Recompute": 0.00042510032653808594,
|
511 |
+
"RelaxPredicates": 0.004330158233642578,
|
512 |
+
"Rematerialization": 0.0048253536224365234,
|
513 |
+
"ReshapeWeights": 0.0009126663208007813,
|
514 |
+
"ResolveAccessConflict": 0.007032871246337891,
|
515 |
+
"ResolveComplicatePredicates": 0.0016722679138183594,
|
516 |
+
"RewriteReplicationMatmul": 0.0017805099487304688,
|
517 |
+
"RewriteWeights": 0.00464630126953125,
|
518 |
+
"SFKVectorizer": 0.6191775798797607,
|
519 |
+
"SimpleAllReduceTiling": 0.0028734207153320313,
|
520 |
+
"Simplifier": 0.04510617256164551,
|
521 |
+
"SimplifyMacroPredicates": 0.03182697296142578,
|
522 |
+
"SimplifyNeuronTensor": 0.018846511840820313,
|
523 |
+
"SimplifySlice": 0.0010728836059570313,
|
524 |
+
"SimplifyTensor": 0.00718235969543457,
|
525 |
+
"SpillPSum": 0.02726292610168457,
|
526 |
+
"SplitAPUnionSets": 0.20770835876464844,
|
527 |
+
"SplitAccGrp": 0.0018444061279296875,
|
528 |
+
"StaticProfiler": 0.009473562240600586,
|
529 |
+
"StaticTransposeLocalTensor": 0.0051805973052978516,
|
530 |
+
"SundaISel": 0.0508725643157959,
|
531 |
+
"TCTransform": 0.0011992454528808594,
|
532 |
+
"TensorInitialization": 0.02745676040649414,
|
533 |
+
"TensorOpSimplifier": 0.006608009338378906,
|
534 |
+
"TensorOpTransform": 0.026006698608398438,
|
535 |
+
"TileCCOps": 0.008507728576660156,
|
536 |
+
"TilingProfiler": 0.015691757202148438,
|
537 |
+
"TransformConvOp": 0.002533435821533203,
|
538 |
+
"TritiumFusion": 0.1403183937072754,
|
539 |
+
"ValueNumbering": 0.0023522377014160156,
|
540 |
+
"VectorizeDMA": 0.006667613983154297,
|
541 |
+
"VectorizeMatMult": 0.025510072708129883,
|
542 |
+
"WeightCoalescing": 0.002580881118774414,
|
543 |
+
"ZeroSizeTensorElimination": 0.0002429485321044922
|
544 |
+
},
|
545 |
+
"tensorizer": {
|
546 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 15146.0,
|
547 |
+
"StaticProfiler::AifUb": 590.4973754882813,
|
548 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 748.2540283203125,
|
549 |
+
"StaticProfiler::AverageDmaLength": 2622.051025390625,
|
550 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.97018432617188,
|
551 |
+
"StaticProfiler::AveragePartitionUtilization": 99.92617797851563,
|
552 |
+
"StaticProfiler::AveragePeUtilization": 99.87796020507813,
|
553 |
+
"StaticProfiler::DDRTransferBytes": 196215040.0,
|
554 |
+
"StaticProfiler::InternalTransferBytes": 332922880.0,
|
555 |
+
"StaticProfiler::LoadExpanded": 37252.0,
|
556 |
+
"StaticProfiler::LocalizationEfficiency": 126.71589660644531,
|
557 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 180.83277893066406,
|
558 |
+
"StaticProfiler::StoreExpanded": 16897.0,
|
559 |
+
"StaticProfiler::TotalDMAExpanded": 54149.0,
|
560 |
+
"StaticProfiler::TotalDynamicInstancesCount": 23848.0,
|
561 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 23836.0,
|
562 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
563 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
564 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
565 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
566 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
567 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
568 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 192.0,
|
569 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 7184.0,
|
570 |
+
"TilingProfiler::NumPfTransposes": 8.0,
|
571 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
572 |
+
"TilingProfiler::NumPfTransposesForLocal": 6.0,
|
573 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
574 |
+
"TilingProfiler::PfTransposeInstructions": 5568.0,
|
575 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
576 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 4800.0,
|
577 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
|
578 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
579 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 1764.0,
|
580 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
581 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
582 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
583 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
584 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
585 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
586 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
587 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
588 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
589 |
+
}
|
590 |
+
},
|
591 |
+
"sg0001": {
|
592 |
+
"compiletime": {
|
593 |
+
"AGOrderingAnalysisPass": 0.1609482765197754,
|
594 |
+
"AffinePredicateResolution": 0.0013859272003173828,
|
595 |
+
"AliasDependencyElimination": 0.00018930435180664063,
|
596 |
+
"AliasDependencyInduction": 0.01599717140197754,
|
597 |
+
"AliasDependencyReset": 0.031088829040527344,
|
598 |
+
"BFComputeCutting": 0.004532814025878906,
|
599 |
+
"BirCodeGenLoop": 0.21514463424682617,
|
600 |
+
"CCOpFusion": 0.2648317813873291,
|
601 |
+
"CanonicalizeDAGForPGTiling": 0.003528594970703125,
|
602 |
+
"CanonicalizeIR": 0.0019960403442382813,
|
603 |
+
"CoalesceCCOp": 0.00243377685546875,
|
604 |
+
"CommuteConcat": 0.0011005401611328125,
|
605 |
+
"DMALocalityOpt": 0.0013654232025146484,
|
606 |
+
"DMAProfiler": 0.008886098861694336,
|
607 |
+
"DMATilingProfiler": 0.005682706832885742,
|
608 |
+
"DataLocalityOpt": 0.2774043083190918,
|
609 |
+
"DataStreaming": 0.007985115051269531,
|
610 |
+
"DeConcat": 0.001863241195678711,
|
611 |
+
"DeadCodeElimination": 0.0013644695281982422,
|
612 |
+
"DeadStoreElimination": 0.07262182235717773,
|
613 |
+
"DelinearIndices": 0.047678232192993164,
|
614 |
+
"Delinearization": 0.004866838455200195,
|
615 |
+
"DoNothing": 0.00013303756713867188,
|
616 |
+
"DramToDramTranspose": 0.0971534252166748,
|
617 |
+
"DumpGraphAndMetadata": 0.013672351837158203,
|
618 |
+
"EliminateDivs": 0.003657102584838867,
|
619 |
+
"ExpandBatchNorm": 0.0016169548034667969,
|
620 |
+
"ExpandISAMacro": 0.0034465789794921875,
|
621 |
+
"FactorizeBlkDims": 0.060559749603271484,
|
622 |
+
"FactorizeThreadAxesInFreeDims": 0.0021708011627197266,
|
623 |
+
"FlattenMacroLoop": 0.004648447036743164,
|
624 |
+
"GenericAccessSimplifier": 0.000980377197265625,
|
625 |
+
"InferInitValue": 0.05812406539916992,
|
626 |
+
"InferIntrinsicOnCC": 0.010819196701049805,
|
627 |
+
"InferNeuronTensor": 0.14679336547851563,
|
628 |
+
"InferNonlocalTensors": 0.034285783767700195,
|
629 |
+
"InferPSumTensor": 0.09114336967468262,
|
630 |
+
"InlineNativeKernels": 0.0017209053039550781,
|
631 |
+
"InsertIOTransposes": 0.0731968879699707,
|
632 |
+
"InsertLocalTransposes": 0.0275421142578125,
|
633 |
+
"InsertOffloadedTransposes": 0.007097005844116211,
|
634 |
+
"LICM": 0.0033905506134033203,
|
635 |
+
"LateLegalizeInst": 0.006936788558959961,
|
636 |
+
"LateLegalizePostSplit": 0.003220081329345703,
|
637 |
+
"LateLowerReshapeOp": 0.0016317367553710938,
|
638 |
+
"LateLowerTensorOp": 0.005948543548583984,
|
639 |
+
"LateNeuronInstComb": 0.018251657485961914,
|
640 |
+
"LayoutPreprocessing": 0.09319257736206055,
|
641 |
+
"LayoutPreprocessingAndAnalysis": 0.11977434158325195,
|
642 |
+
"LayoutRequirementAnalysis": 0.009629964828491211,
|
643 |
+
"LegalizeCCOpLayout": 0.0020868778228759766,
|
644 |
+
"LegalizeOpLevelAlias": 0.0011761188507080078,
|
645 |
+
"LegalizePartitionReduce": 0.001623392105102539,
|
646 |
+
"LegalizeSundaAccess": 0.021021366119384766,
|
647 |
+
"LegalizeSundaMacro": 0.012225627899169922,
|
648 |
+
"LegalizeType": 0.02536749839782715,
|
649 |
+
"LocalLayoutOpt": 0.04628801345825195,
|
650 |
+
"LoopFusion": 0.005954742431640625,
|
651 |
+
"LoopSplitting": 0.0006933212280273438,
|
652 |
+
"LowerBroadcast": 0.0018084049224853516,
|
653 |
+
"LowerCCOpBlockAxis": 0.006256580352783203,
|
654 |
+
"LowerComplexBroadcast": 0.002477884292602539,
|
655 |
+
"LowerIntrinsics": 0.03852725028991699,
|
656 |
+
"LowerTensorOp": 0.010782480239868164,
|
657 |
+
"LowerTranspose": 0.018457412719726563,
|
658 |
+
"MacroGeneration": 0.1307680606842041,
|
659 |
+
"MaskPropagation": 0.0035936832427978516,
|
660 |
+
"MemcpyElimination": 0.15900325775146484,
|
661 |
+
"MutateDataType": 0.001459360122680664,
|
662 |
+
"NeuronAliasDependencyInduction": 0.00030994415283203125,
|
663 |
+
"NeuronAliasDependencyReset": 0.0227048397064209,
|
664 |
+
"NeuronInstComb": 0.01124882698059082,
|
665 |
+
"NeuronLICM": 0.010287761688232422,
|
666 |
+
"NeuronLoopFusion": 0.06714057922363281,
|
667 |
+
"NeuronLoopInterchange": 0.0033617019653320313,
|
668 |
+
"NeuronSimplifier": 0.015295267105102539,
|
669 |
+
"NeuronSimplifyPredicates": 0.002671957015991211,
|
670 |
+
"NeuronValueNumbering": 0.004712104797363281,
|
671 |
+
"OptimizeAliasedCopyChain": 0.0008287429809570313,
|
672 |
+
"OptimizeNKIKernels": 0.0030798912048339844,
|
673 |
+
"PAGLayoutOpt": 0.4701688289642334,
|
674 |
+
"PComputeCutting": 0.008523941040039063,
|
675 |
+
"PGLayoutTilingPipeline": 1.527449607849121,
|
676 |
+
"PGTiling": 0.562786340713501,
|
677 |
+
"PadElimination": 0.0005154609680175781,
|
678 |
+
"ParAxesAnnotation": 0.4113032817840576,
|
679 |
+
"PartialLoopFusion": 0.03786206245422363,
|
680 |
+
"PartialSimdFusion": 0.09660077095031738,
|
681 |
+
"PerfectLoopNest": 0.0025701522827148438,
|
682 |
+
"RecognizeOpIdiom": 0.004408836364746094,
|
683 |
+
"Recompute": 0.0004405975341796875,
|
684 |
+
"RelaxPredicates": 0.004298210144042969,
|
685 |
+
"Rematerialization": 0.0020570755004882813,
|
686 |
+
"ReshapeWeights": 0.0008633136749267578,
|
687 |
+
"ResolveAccessConflict": 0.004068136215209961,
|
688 |
+
"ResolveComplicatePredicates": 0.0015447139739990234,
|
689 |
+
"RewriteReplicationMatmul": 0.0018274784088134766,
|
690 |
+
"RewriteWeights": 0.024018287658691406,
|
691 |
+
"SFKVectorizer": 0.5714495182037354,
|
692 |
+
"SimpleAllReduceTiling": 0.05605673789978027,
|
693 |
+
"Simplifier": 0.03458523750305176,
|
694 |
+
"SimplifyMacroPredicates": 0.007905960083007813,
|
695 |
+
"SimplifyNeuronTensor": 0.05205702781677246,
|
696 |
+
"SimplifySlice": 0.0012252330780029297,
|
697 |
+
"SimplifyTensor": 0.007117748260498047,
|
698 |
+
"SpillPSum": 0.0394134521484375,
|
699 |
+
"SplitAPUnionSets": 0.0830078125,
|
700 |
+
"SplitAccGrp": 0.0015587806701660156,
|
701 |
+
"StaticProfiler": 0.008753538131713867,
|
702 |
+
"StaticTransposeLocalTensor": 0.03607439994812012,
|
703 |
+
"SundaISel": 0.06672215461730957,
|
704 |
+
"TCTransform": 0.0011696815490722656,
|
705 |
+
"TensorInitialization": 0.006832122802734375,
|
706 |
+
"TensorOpSimplifier": 0.0061838626861572266,
|
707 |
+
"TensorOpTransform": 0.03341221809387207,
|
708 |
+
"TileCCOps": 0.00767970085144043,
|
709 |
+
"TilingProfiler": 0.07469630241394043,
|
710 |
+
"TransformConvOp": 0.00249481201171875,
|
711 |
+
"TritiumFusion": 0.3289809226989746,
|
712 |
+
"ValueNumbering": 0.0027396678924560547,
|
713 |
+
"VectorizeDMA": 0.0023260116577148438,
|
714 |
+
"VectorizeMatMult": 0.05879783630371094,
|
715 |
+
"WeightCoalescing": 0.002382993698120117,
|
716 |
+
"ZeroSizeTensorElimination": 0.0001971721649169922
|
717 |
+
},
|
718 |
+
"tensorizer": {
|
719 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 37569.0,
|
720 |
+
"StaticProfiler::AifUb": 1576.160400390625,
|
721 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 624.52294921875,
|
722 |
+
"StaticProfiler::AverageDmaLength": 1256.79248046875,
|
723 |
+
"StaticProfiler::AverageFractalPeUtilization": 100.0,
|
724 |
+
"StaticProfiler::AveragePartitionUtilization": 99.870361328125,
|
725 |
+
"StaticProfiler::AveragePeUtilization": 100.0,
|
726 |
+
"StaticProfiler::DDRTransferBytes": 818020352.0,
|
727 |
+
"StaticProfiler::InternalTransferBytes": 284688384.0,
|
728 |
+
"StaticProfiler::LoadExpanded": 616833.0,
|
729 |
+
"StaticProfiler::LocalizationEfficiency": 39.6230583190918,
|
730 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 43.16416549682617,
|
731 |
+
"StaticProfiler::StoreExpanded": 17409.0,
|
732 |
+
"StaticProfiler::TotalDMAExpanded": 634242.0,
|
733 |
+
"StaticProfiler::TotalDynamicInstancesCount": 49371.0,
|
734 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 49371.0,
|
735 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
736 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
737 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
738 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
739 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
740 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
741 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 128.0,
|
742 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 28672.0,
|
743 |
+
"TilingProfiler::NumPfTransposes": 9.0,
|
744 |
+
"TilingProfiler::NumPfTransposesForIo": 3.0,
|
745 |
+
"TilingProfiler::NumPfTransposesForLocal": 4.0,
|
746 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 2.0,
|
747 |
+
"TilingProfiler::PfTransposeInstructions": 5856.0,
|
748 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 544.0,
|
749 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 4288.0,
|
750 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
|
751 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
|
752 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 1876.0,
|
753 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
754 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
755 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
756 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
757 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
758 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
759 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
760 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
761 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
762 |
+
}
|
763 |
+
},
|
764 |
+
"sg0002": {
|
765 |
+
"compiletime": {
|
766 |
+
"AGOrderingAnalysisPass": 0.06499266624450684,
|
767 |
+
"AffinePredicateResolution": 0.00141143798828125,
|
768 |
+
"AliasDependencyElimination": 0.00017595291137695313,
|
769 |
+
"AliasDependencyInduction": 0.006516218185424805,
|
770 |
+
"AliasDependencyReset": 0.02024674415588379,
|
771 |
+
"BFComputeCutting": 0.0023620128631591797,
|
772 |
+
"BirCodeGenLoop": 0.13731598854064941,
|
773 |
+
"CCOpFusion": 0.02620387077331543,
|
774 |
+
"CanonicalizeDAGForPGTiling": 0.0074574947357177734,
|
775 |
+
"CanonicalizeIR": 0.0019347667694091797,
|
776 |
+
"CoalesceCCOp": 0.005384206771850586,
|
777 |
+
"CommuteConcat": 0.0009889602661132813,
|
778 |
+
"DMALocalityOpt": 0.0021970272064208984,
|
779 |
+
"DMAProfiler": 0.007658243179321289,
|
780 |
+
"DMATilingProfiler": 0.04570889472961426,
|
781 |
+
"DataLocalityOpt": 0.1127479076385498,
|
782 |
+
"DataStreaming": 0.00765538215637207,
|
783 |
+
"DeConcat": 0.0007421970367431641,
|
784 |
+
"DeadCodeElimination": 0.002073049545288086,
|
785 |
+
"DeadStoreElimination": 0.006093263626098633,
|
786 |
+
"DelinearIndices": 0.010124444961547852,
|
787 |
+
"Delinearization": 0.005106449127197266,
|
788 |
+
"DoNothing": 0.00013017654418945313,
|
789 |
+
"DramToDramTranspose": 0.03771638870239258,
|
790 |
+
"DumpGraphAndMetadata": 0.05296611785888672,
|
791 |
+
"EliminateDivs": 0.0021944046020507813,
|
792 |
+
"ExpandBatchNorm": 0.0015587806701660156,
|
793 |
+
"ExpandISAMacro": 0.0046465396881103516,
|
794 |
+
"FactorizeBlkDims": 0.011369466781616211,
|
795 |
+
"FactorizeThreadAxesInFreeDims": 0.0014889240264892578,
|
796 |
+
"FlattenMacroLoop": 0.0025510787963867188,
|
797 |
+
"GenericAccessSimplifier": 0.0009717941284179688,
|
798 |
+
"InferInitValue": 0.030786514282226563,
|
799 |
+
"InferIntrinsicOnCC": 0.012189865112304688,
|
800 |
+
"InferNeuronTensor": 0.0819096565246582,
|
801 |
+
"InferNonlocalTensors": 0.025629520416259766,
|
802 |
+
"InferPSumTensor": 0.08428668975830078,
|
803 |
+
"InlineNativeKernels": 0.003083944320678711,
|
804 |
+
"InsertIOTransposes": 0.02764296531677246,
|
805 |
+
"InsertLocalTransposes": 0.0040624141693115234,
|
806 |
+
"InsertOffloadedTransposes": 0.005682229995727539,
|
807 |
+
"LICM": 0.003050565719604492,
|
808 |
+
"LateLegalizeInst": 0.022694110870361328,
|
809 |
+
"LateLegalizePostSplit": 0.004519462585449219,
|
810 |
+
"LateLowerReshapeOp": 0.0023851394653320313,
|
811 |
+
"LateLowerTensorOp": 0.0016567707061767578,
|
812 |
+
"LateNeuronInstComb": 0.010613203048706055,
|
813 |
+
"LayoutPreprocessing": 0.06753706932067871,
|
814 |
+
"LayoutPreprocessingAndAnalysis": 0.16236424446105957,
|
815 |
+
"LayoutRequirementAnalysis": 0.005420684814453125,
|
816 |
+
"LegalizeCCOpLayout": 0.0023717880249023438,
|
817 |
+
"LegalizeOpLevelAlias": 0.0012898445129394531,
|
818 |
+
"LegalizePartitionReduce": 0.0011932849884033203,
|
819 |
+
"LegalizeSundaAccess": 0.025110721588134766,
|
820 |
+
"LegalizeSundaMacro": 0.012512683868408203,
|
821 |
+
"LegalizeType": 0.0470888614654541,
|
822 |
+
"LocalLayoutOpt": 0.0263979434967041,
|
823 |
+
"LoopFusion": 0.005193948745727539,
|
824 |
+
"LoopSplitting": 0.0005512237548828125,
|
825 |
+
"LowerBroadcast": 0.04195570945739746,
|
826 |
+
"LowerCCOpBlockAxis": 0.008313655853271484,
|
827 |
+
"LowerComplexBroadcast": 0.0025756359100341797,
|
828 |
+
"LowerIntrinsics": 0.11726689338684082,
|
829 |
+
"LowerTensorOp": 0.010608196258544922,
|
830 |
+
"LowerTranspose": 0.08231282234191895,
|
831 |
+
"MacroGeneration": 0.07271862030029297,
|
832 |
+
"MaskPropagation": 0.005186557769775391,
|
833 |
+
"MemcpyElimination": 0.026259899139404297,
|
834 |
+
"MutateDataType": 0.0013203620910644531,
|
835 |
+
"NeuronAliasDependencyInduction": 0.0002338886260986328,
|
836 |
+
"NeuronAliasDependencyReset": 0.029464006423950195,
|
837 |
+
"NeuronInstComb": 0.004238128662109375,
|
838 |
+
"NeuronLICM": 0.014646768569946289,
|
839 |
+
"NeuronLoopFusion": 0.00891876220703125,
|
840 |
+
"NeuronLoopInterchange": 0.0014586448669433594,
|
841 |
+
"NeuronSimplifier": 0.009086847305297852,
|
842 |
+
"NeuronSimplifyPredicates": 0.0033910274505615234,
|
843 |
+
"NeuronValueNumbering": 0.0026366710662841797,
|
844 |
+
"OptimizeAliasedCopyChain": 0.0006422996520996094,
|
845 |
+
"OptimizeNKIKernels": 0.5174376964569092,
|
846 |
+
"PAGLayoutOpt": 0.12734031677246094,
|
847 |
+
"PComputeCutting": 0.005000591278076172,
|
848 |
+
"PGLayoutTilingPipeline": 0.8229436874389648,
|
849 |
+
"PGTiling": 0.26772499084472656,
|
850 |
+
"PadElimination": 0.0005135536193847656,
|
851 |
+
"ParAxesAnnotation": 0.07412934303283691,
|
852 |
+
"PartialLoopFusion": 0.013575553894042969,
|
853 |
+
"PartialSimdFusion": 0.011231422424316406,
|
854 |
+
"PerfectLoopNest": 0.0019729137420654297,
|
855 |
+
"RecognizeOpIdiom": 0.0038080215454101563,
|
856 |
+
"Recompute": 0.00034308433532714844,
|
857 |
+
"RelaxPredicates": 0.004430532455444336,
|
858 |
+
"Rematerialization": 0.002201557159423828,
|
859 |
+
"ReshapeWeights": 0.0009114742279052734,
|
860 |
+
"ResolveAccessConflict": 0.027348041534423828,
|
861 |
+
"ResolveComplicatePredicates": 0.0011477470397949219,
|
862 |
+
"RewriteReplicationMatmul": 0.0025103092193603516,
|
863 |
+
"RewriteWeights": 0.0029447078704833984,
|
864 |
+
"SFKVectorizer": 0.19645977020263672,
|
865 |
+
"SimpleAllReduceTiling": 0.003208160400390625,
|
866 |
+
"Simplifier": 0.003106832504272461,
|
867 |
+
"SimplifyMacroPredicates": 0.03599357604980469,
|
868 |
+
"SimplifyNeuronTensor": 0.1807866096496582,
|
869 |
+
"SimplifySlice": 0.0016787052154541016,
|
870 |
+
"SimplifyTensor": 0.04330563545227051,
|
871 |
+
"SpillPSum": 0.0645456314086914,
|
872 |
+
"SplitAPUnionSets": 0.012967586517333984,
|
873 |
+
"SplitAccGrp": 0.0015358924865722656,
|
874 |
+
"StaticProfiler": 0.00551915168762207,
|
875 |
+
"StaticTransposeLocalTensor": 0.004834890365600586,
|
876 |
+
"SundaISel": 0.0945746898651123,
|
877 |
+
"TCTransform": 0.0009295940399169922,
|
878 |
+
"TensorInitialization": 0.006634950637817383,
|
879 |
+
"TensorOpSimplifier": 0.005204439163208008,
|
880 |
+
"TensorOpTransform": 0.02082967758178711,
|
881 |
+
"TileCCOps": 0.006725311279296875,
|
882 |
+
"TilingProfiler": 0.016322612762451172,
|
883 |
+
"TransformConvOp": 0.0029544830322265625,
|
884 |
+
"TritiumFusion": 0.09467315673828125,
|
885 |
+
"ValueNumbering": 0.0020852088928222656,
|
886 |
+
"VectorizeDMA": 0.0017535686492919922,
|
887 |
+
"VectorizeMatMult": 0.008865118026733398,
|
888 |
+
"WeightCoalescing": 0.0030977725982666016,
|
889 |
+
"ZeroSizeTensorElimination": 0.00018644332885742188
|
890 |
+
},
|
891 |
+
"tensorizer": {
|
892 |
+
"DMATilingProfiler::TotalInstructionsAfterTiling": 28921.0,
|
893 |
+
"StaticProfiler::AifUb": 1080.6693115234375,
|
894 |
+
"StaticProfiler::ArithmeticIntensityTensorizer": 449.7645263671875,
|
895 |
+
"StaticProfiler::AverageDmaLength": 1323.6162109375,
|
896 |
+
"StaticProfiler::AverageFractalPeUtilization": 99.88423156738281,
|
897 |
+
"StaticProfiler::AveragePartitionUtilization": 99.71043395996094,
|
898 |
+
"StaticProfiler::AveragePeUtilization": 99.53581237792969,
|
899 |
+
"StaticProfiler::DDRTransferBytes": 826525760.0,
|
900 |
+
"StaticProfiler::InternalTransferBytes": 96576528.0,
|
901 |
+
"StaticProfiler::LoadExpanded": 619540.0,
|
902 |
+
"StaticProfiler::LocalizationEfficiency": 41.61907196044922,
|
903 |
+
"StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 45.55835723876953,
|
904 |
+
"StaticProfiler::StoreExpanded": 12842.0,
|
905 |
+
"StaticProfiler::TotalDMAExpanded": 632382.0,
|
906 |
+
"StaticProfiler::TotalDynamicInstancesCount": 34834.0,
|
907 |
+
"StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 34738.0,
|
908 |
+
"StaticProfiler::TotalLNCComm": 0.0,
|
909 |
+
"StaticProfiler::TotalLNCCommTransfer": 0.0,
|
910 |
+
"TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
|
911 |
+
"TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
|
912 |
+
"TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
|
913 |
+
"TilingProfiler::DmaInstructionsAfterTiling": 0.0,
|
914 |
+
"TilingProfiler::GenericInstructionsAfterTiling": 4.0,
|
915 |
+
"TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
|
916 |
+
"TilingProfiler::NumPfTransposes": 4.0,
|
917 |
+
"TilingProfiler::NumPfTransposesForIo": 0.0,
|
918 |
+
"TilingProfiler::NumPfTransposesForLocal": 1.0,
|
919 |
+
"TilingProfiler::NumPfTransposesForNonlocal": 3.0,
|
920 |
+
"TilingProfiler::PfTransposeInstructions": 1537.0,
|
921 |
+
"TilingProfiler::PfTransposeInstructionsForIo": 0.0,
|
922 |
+
"TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
|
923 |
+
"TilingProfiler::PfTransposeInstructionsForNonlocal": 1536.0,
|
924 |
+
"TilingProfiler::ReduceInstructionsAfterTiling": 10.0,
|
925 |
+
"TilingProfiler::SimdInstructionsAfterTiling": 626.0,
|
926 |
+
"TilingProfiler::TotalInstructionsAfterTiling": 0.0,
|
927 |
+
"TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
|
928 |
+
"TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
|
929 |
+
"TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
|
930 |
+
"TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
|
931 |
+
"TransformConvOp::conv2d_column_packing": 0.0,
|
932 |
+
"TransformConvOp::conv2d_column_packing_1": 0.0,
|
933 |
+
"TransformConvOp::conv2d_column_packing_io10": 0.0,
|
934 |
+
"TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
|
935 |
+
}
|
936 |
+
},
|
937 |
+
"sg01": {
|
938 |
+
"compiletime": {
|
939 |
+
"CanonicalizeConv": 7.000000096013537e-06,
|
940 |
+
"CanonicalizeForTensorizer": 1.9999999494757503e-05,
|
941 |
+
"Canonicalizer": 0.00028300000121816993,
|
942 |
+
"HoistCompute": 1.9999999949504854e-06,
|
943 |
+
"IdentifyCrossPassTensors": 1.4999999621068127e-05,
|
944 |
+
"MemcastMotion": 9.000000318337698e-06,
|
945 |
+
"PenguinizeFunctions": 1.8000000636675395e-05,
|
946 |
+
"PruneFunctions": 4.999999873689376e-06,
|
947 |
+
"RemoveOptimizationBarriers": 5.0999999075429514e-05,
|
948 |
+
"ScatterMotion": 3.999999989900971e-06,
|
949 |
+
"TensorizerLegalizationPass": 2.2000000171829015e-05,
|
950 |
+
"VerifySupportedOps": 1.1000000085914508e-05,
|
951 |
+
"algsimp": 6.800000119255856e-05,
|
952 |
+
"batchnorm_expander": 1.5999999959603883e-05,
|
953 |
+
"boundary-marker-removal": 3.999999989900971e-06,
|
954 |
+
"call-inliner": 9.000000318337698e-06,
|
955 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
956 |
+
"collective-stream-id-checker": 7.000000096013537e-06,
|
957 |
+
"comparison-expander": 4.999999873689376e-06,
|
958 |
+
"computation-deduplicator": 2.4000000848900527e-05,
|
959 |
+
"conditional-to-select": 4.999999873689376e-06,
|
960 |
+
"config-lowering": 4.600000102072954e-05,
|
961 |
+
"constant_folding": 9.999999747378752e-06,
|
962 |
+
"cse": 2.2000000171829015e-05,
|
963 |
+
"dce": 1.9999999949504854e-06,
|
964 |
+
"dynamic-slice-transpose": 3.999999989900971e-06,
|
965 |
+
"eliminate-redundant-compare": 3.000000106112566e-06,
|
966 |
+
"emit-offloaded-dropout": 2.300000051036477e-05,
|
967 |
+
"flatten-call-graph": 9.000000318337698e-06,
|
968 |
+
"fuse-send-recv": 2.5999999706982635e-05,
|
969 |
+
"hilo::LegalizeAlias": 7.999999979801942e-06,
|
970 |
+
"hilo::NeuronInstCombine": 7.79999973019585e-05,
|
971 |
+
"hilo::NeuronOpFusion": 2.8000000384054147e-05,
|
972 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 2.300000051036477e-05,
|
973 |
+
"hilo::ScheduleFusion": 9.999999974752427e-07,
|
974 |
+
"hilo::SixtyFourHack": 1.5999999959603883e-05,
|
975 |
+
"hilo::VerifyAliasing": 3.000000106112566e-06,
|
976 |
+
"hlo-mac-count": 3.899999865097925e-05,
|
977 |
+
"hlo-verifier": 0.00021499999274965376,
|
978 |
+
"legalize-ccops": 1.9999999949504854e-06,
|
979 |
+
"legalize-compare": 3.999999989900971e-06,
|
980 |
+
"lower-argminmax-custom-call": 3.999999989900971e-06,
|
981 |
+
"map-inline": 1.1000000085914508e-05,
|
982 |
+
"metadata-naming": 2.300000051036477e-05,
|
983 |
+
"mlir::detail::OpToOpPassAdaptor": 2.9999999242136255e-05,
|
984 |
+
"mlir::hlo::MhloToPyPenguin": 0.025178000330924988,
|
985 |
+
"mlir::mhlo::LowerComplexExtraPass": 8.70000003487803e-05,
|
986 |
+
"mlir::mhlo::LowerComplexPass": 0.00013099999341648072,
|
987 |
+
"native-to-custom-softmax": 9.999999747378752e-06,
|
988 |
+
"native-to-custom-softmax-dx": 3.600000127335079e-05,
|
989 |
+
"operand_upcaster": 2.300000051036477e-05,
|
990 |
+
"post-par-pipe-begin": 1.1000000085914508e-05,
|
991 |
+
"post-par-pipe-end": 0.0,
|
992 |
+
"post-partition-simplification": 0.000590000010561198,
|
993 |
+
"replace-minimum-constant": 7.000000096013537e-06,
|
994 |
+
"reshape-mover": 3.999999989900971e-06,
|
995 |
+
"simplify-concat": 5.400000009103678e-05,
|
996 |
+
"simplify-while-loops": 1.9999999949504854e-06,
|
997 |
+
"transform-variadic-reduce": 9.999999747378752e-06,
|
998 |
+
"tuple-simplifier": 4.999999873689376e-06,
|
999 |
+
"unpack-nested-aws-ntwsr": 3.999999989900971e-06,
|
1000 |
+
"unroll-while-loop": 0.0
|
1001 |
+
},
|
1002 |
+
"hilo": {
|
1003 |
+
"ArithmeticIntensity": 1411.2052001953125,
|
1004 |
+
"HloMacCount": 240518168576.0,
|
1005 |
+
"Traffic": 340869152.0
|
1006 |
+
}
|
1007 |
+
},
|
1008 |
+
"sg02": {
|
1009 |
+
"compiletime": {
|
1010 |
+
"CanonicalizeConv": 1.1000000085914508e-05,
|
1011 |
+
"CanonicalizeForTensorizer": 1.4999999621068127e-05,
|
1012 |
+
"Canonicalizer": 0.0003480000013951212,
|
1013 |
+
"HoistCompute": 9.999999747378752e-06,
|
1014 |
+
"IdentifyCrossPassTensors": 1.2999999853491317e-05,
|
1015 |
+
"MemcastMotion": 0.0,
|
1016 |
+
"PenguinizeFunctions": 9.999999747378752e-06,
|
1017 |
+
"PruneFunctions": 7.999999979801942e-06,
|
1018 |
+
"RemoveOptimizationBarriers": 1.5999999959603883e-05,
|
1019 |
+
"ScatterMotion": 9.999999974752427e-07,
|
1020 |
+
"TensorizerLegalizationPass": 7.000000096013537e-06,
|
1021 |
+
"VerifySupportedOps": 1.2000000424450263e-05,
|
1022 |
+
"algsimp": 7.599999662488699e-05,
|
1023 |
+
"batchnorm_expander": 1.4000000192027073e-05,
|
1024 |
+
"boundary-marker-removal": 3.999999989900971e-06,
|
1025 |
+
"call-inliner": 1.1000000085914508e-05,
|
1026 |
+
"canonicalize-boundary-marker": 4.999999873689376e-06,
|
1027 |
+
"collective-stream-id-checker": 4.999999873689376e-06,
|
1028 |
+
"comparison-expander": 6.000000212225132e-06,
|
1029 |
+
"computation-deduplicator": 2.300000051036477e-05,
|
1030 |
+
"conditional-to-select": 7.000000096013537e-06,
|
1031 |
+
"config-lowering": 6.0999998822808266e-05,
|
1032 |
+
"constant_folding": 9.000000318337698e-06,
|
1033 |
+
"cse": 1.4000000192027073e-05,
|
1034 |
+
"dce": 9.999999974752427e-07,
|
1035 |
+
"dynamic-slice-transpose": 4.999999873689376e-06,
|
1036 |
+
"eliminate-redundant-compare": 3.000000106112566e-06,
|
1037 |
+
"emit-offloaded-dropout": 2.099999983329326e-05,
|
1038 |
+
"flatten-call-graph": 1.2999999853491317e-05,
|
1039 |
+
"fuse-send-recv": 2.099999983329326e-05,
|
1040 |
+
"hilo::LegalizeAlias": 1.9999999949504854e-06,
|
1041 |
+
"hilo::NeuronInstCombine": 1.5999999959603883e-05,
|
1042 |
+
"hilo::NeuronOpFusion": 1.8000000636675395e-05,
|
1043 |
+
"hilo::ReplaceTokenTypeWithU8Pass": 1.700000029813964e-05,
|
1044 |
+
"hilo::ScheduleFusion": 1.1000000085914508e-05,
|
1045 |
+
"hilo::SixtyFourHack": 4.099999932805076e-05,
|
1046 |
+
"hilo::VerifyAliasing": 9.999999974752427e-07,
|
1047 |
+
"hlo-mac-count": 0.00020500000391621143,
|
1048 |
+
"hlo-verifier": 0.00019299999985378236,
|
1049 |
+
"legalize-ccops": 9.999999974752427e-07,
|
1050 |
+
"legalize-compare": 3.000000106112566e-06,
|
1051 |
+
"lower-argminmax-custom-call": 3.000000106112566e-06,
|
1052 |
+
"map-inline": 1.4999999621068127e-05,
|
1053 |
+
"metadata-naming": 1.4000000192027073e-05,
|
1054 |
+
"mlir::detail::OpToOpPassAdaptor": 1.8999999156221747e-05,
|
1055 |
+
"mlir::hlo::MhloToPyPenguin": 0.018411999568343163,
|
1056 |
+
"mlir::mhlo::LowerComplexExtraPass": 0.00017299999308306724,
|
1057 |
+
"mlir::mhlo::LowerComplexPass": 0.00020799999765586108,
|
1058 |
+
"native-to-custom-softmax": 9.999999747378752e-06,
|
1059 |
+
"native-to-custom-softmax-dx": 3.7999998312443495e-05,
|
1060 |
+
"operand_upcaster": 1.8000000636675395e-05,
|
1061 |
+
"post-par-pipe-begin": 3.999999989900971e-06,
|
1062 |
+
"post-par-pipe-end": 0.0,
|
1063 |
+
"post-partition-simplification": 0.0005879999953322113,
|
1064 |
+
"replace-minimum-constant": 1.1000000085914508e-05,
|
1065 |
+
"reshape-mover": 3.000000106112566e-06,
|
1066 |
+
"simplify-concat": 4.999999873689376e-05,
|
1067 |
+
"simplify-while-loops": 3.000000106112566e-06,
|
1068 |
+
"transform-variadic-reduce": 0.00017499999376013875,
|
1069 |
+
"tuple-simplifier": 4.999999873689376e-06,
|
1070 |
+
"unpack-nested-aws-ntwsr": 4.999999873689376e-06,
|
1071 |
+
"unroll-while-loop": 0.0
|
1072 |
+
},
|
1073 |
+
"hilo": {
|
1074 |
+
"ArithmeticIntensity": 1000.4613647460938,
|
1075 |
+
"HloMacCount": 180455735296.0,
|
1076 |
+
"Traffic": 360745024.0
|
1077 |
+
}
|
1078 |
+
}
|
1079 |
+
}
|
context_encoding_model/_tp0_bk4/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93d468f8c91c8ae558da4744c631e0351092b98d3698d8a39f05082867c022a7
|
3 |
+
size 3298304
|
context_encoding_model/_tp0_bk4/log-neuron-cc.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
context_encoding_model/_tp0_bk4/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14460b3b6b308407432a80fca62093da7cc19d3c26c9d018e923b97ffe30fde0
|
3 |
+
size 2347463
|
context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:592a9cdc4c9b4697249af595e7e4e7ae477f80acdebaede8842f0734e5baf50e
|
3 |
+
size 2413336
|
context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93d468f8c91c8ae558da4744c631e0351092b98d3698d8a39f05082867c022a7
|
3 |
+
size 3298304
|
context_encoding_model/_tp0_bk4/neuron_config.json
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": false,
|
3 |
+
"_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
|
4 |
+
"add_cross_attention": false,
|
5 |
+
"architectures": [
|
6 |
+
"MistralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"attribute_map": {},
|
10 |
+
"bad_words_ids": null,
|
11 |
+
"begin_suppress_tokens": null,
|
12 |
+
"bos_token_id": 1,
|
13 |
+
"chunk_size_feed_forward": 0,
|
14 |
+
"cross_attention_hidden_size": null,
|
15 |
+
"decoder_start_token_id": null,
|
16 |
+
"diversity_penalty": 0.0,
|
17 |
+
"do_sample": false,
|
18 |
+
"early_stopping": false,
|
19 |
+
"encoder_no_repeat_ngram_size": 0,
|
20 |
+
"eos_token_id": 2,
|
21 |
+
"exponential_decay_length_penalty": null,
|
22 |
+
"finetuning_task": null,
|
23 |
+
"forced_bos_token_id": null,
|
24 |
+
"forced_eos_token_id": null,
|
25 |
+
"fused_spec_config": null,
|
26 |
+
"head_dim": 128,
|
27 |
+
"hidden_act": "silu",
|
28 |
+
"hidden_size": 4096,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1"
|
32 |
+
},
|
33 |
+
"initializer_range": 0.02,
|
34 |
+
"intermediate_size": 14336,
|
35 |
+
"is_decoder": false,
|
36 |
+
"is_encoder_decoder": false,
|
37 |
+
"label2id": {
|
38 |
+
"LABEL_0": 0,
|
39 |
+
"LABEL_1": 1
|
40 |
+
},
|
41 |
+
"length_penalty": 1.0,
|
42 |
+
"max_length": 20,
|
43 |
+
"max_position_embeddings": 32768,
|
44 |
+
"metadata": null,
|
45 |
+
"min_length": 0,
|
46 |
+
"model_type": "mistral",
|
47 |
+
"neuron_config": {
|
48 |
+
"activation_quantization_type": null,
|
49 |
+
"allow_input_truncation": false,
|
50 |
+
"apply_seq_ids_mask": false,
|
51 |
+
"async_mode": false,
|
52 |
+
"attention_dp_degree": 1,
|
53 |
+
"attention_dtype": null,
|
54 |
+
"attn_block_cte_nki_kernel_enabled": false,
|
55 |
+
"attn_block_tkg_nki_kernel_cache_update": false,
|
56 |
+
"attn_block_tkg_nki_kernel_enabled": false,
|
57 |
+
"attn_cls": "NeuronLlamaAttention",
|
58 |
+
"attn_kernel_enabled": null,
|
59 |
+
"attn_tkg_builtin_kernel_enabled": false,
|
60 |
+
"attn_tkg_nki_kernel_enabled": false,
|
61 |
+
"batch_size": 1,
|
62 |
+
"bucket_n_active_tokens": true,
|
63 |
+
"buckets": [
|
64 |
+
2048
|
65 |
+
],
|
66 |
+
"cast_type": "config",
|
67 |
+
"cc_pipeline_tiling_factor": 2,
|
68 |
+
"chunked_prefill_config": null,
|
69 |
+
"context_encoding_buckets": [
|
70 |
+
2048
|
71 |
+
],
|
72 |
+
"cp_degree": 1,
|
73 |
+
"ctx_batch_size": 1,
|
74 |
+
"disable_kv_cache_tiling": false,
|
75 |
+
"draft_model_modules_to_not_convert": null,
|
76 |
+
"enable_bucketing": true,
|
77 |
+
"enable_eagle_draft_input_norm": false,
|
78 |
+
"enable_eagle_speculation": false,
|
79 |
+
"enable_fused_speculation": false,
|
80 |
+
"enable_long_context_mode": false,
|
81 |
+
"enable_output_completion_notifications": false,
|
82 |
+
"enable_spill_reload_dge": false,
|
83 |
+
"enable_token_tree": false,
|
84 |
+
"ep_degree": 1,
|
85 |
+
"expert_mlp_nki_kernel_enabled": null,
|
86 |
+
"flash_decoding_enabled": false,
|
87 |
+
"fused_qkv": false,
|
88 |
+
"fused_rmsnorm_skip_gamma": false,
|
89 |
+
"is_block_kv_layout": null,
|
90 |
+
"is_chunked_prefill": false,
|
91 |
+
"is_continuous_batching": true,
|
92 |
+
"is_eagle_draft": false,
|
93 |
+
"is_medusa": false,
|
94 |
+
"is_prefill_stage": true,
|
95 |
+
"is_prefix_caching": false,
|
96 |
+
"k_cache_transposed": false,
|
97 |
+
"kv_cache_batch_size": 4,
|
98 |
+
"kv_cache_padding_size": 0,
|
99 |
+
"kv_cache_quant": false,
|
100 |
+
"kv_cache_tiling": false,
|
101 |
+
"layer_boundary_markers": false,
|
102 |
+
"lm_head_pad": false,
|
103 |
+
"lm_head_pad_alignment_size": 1,
|
104 |
+
"local_ranks_size": 2,
|
105 |
+
"logical_nc_config": 1,
|
106 |
+
"lora_config": null,
|
107 |
+
"max_batch_size": 4,
|
108 |
+
"max_context_length": 2048,
|
109 |
+
"max_length": 2048,
|
110 |
+
"max_new_tokens": null,
|
111 |
+
"medusa_speculation_length": 0,
|
112 |
+
"medusa_tree": null,
|
113 |
+
"mlp_kernel_enabled": false,
|
114 |
+
"mlp_kernel_fuse_residual_add": false,
|
115 |
+
"modules_to_not_convert": null,
|
116 |
+
"moe_fused_nki_kernel_enabled": null,
|
117 |
+
"n_active_tokens": 2048,
|
118 |
+
"n_positions": 2048,
|
119 |
+
"num_medusa_heads": 0,
|
120 |
+
"on_cpu": false,
|
121 |
+
"on_device_sampling_config": {
|
122 |
+
"deterministic": false,
|
123 |
+
"do_sample": false,
|
124 |
+
"dynamic": true,
|
125 |
+
"global_topk": 256,
|
126 |
+
"on_device_sampling_config": true,
|
127 |
+
"temperature": 1.0,
|
128 |
+
"top_k": 1,
|
129 |
+
"top_k_kernel_enabled": false,
|
130 |
+
"top_p": 1.0
|
131 |
+
},
|
132 |
+
"output_logits": false,
|
133 |
+
"overrides_torch_dtype": true,
|
134 |
+
"pa_block_size": 2048,
|
135 |
+
"pa_num_blocks": 4,
|
136 |
+
"padding_side": "right",
|
137 |
+
"pp_degree": 1,
|
138 |
+
"prefix_buckets": null,
|
139 |
+
"qk_layernorm": false,
|
140 |
+
"qkv_kernel_enabled": false,
|
141 |
+
"qkv_kernel_fuse_residual_add": false,
|
142 |
+
"qkv_kernel_nbsd_layout": false,
|
143 |
+
"quantization_dtype": "int8",
|
144 |
+
"quantization_type": "per_tensor_symmetric",
|
145 |
+
"quantize_clamp_bound": Infinity,
|
146 |
+
"quantized": false,
|
147 |
+
"quantized_checkpoints_path": null,
|
148 |
+
"quantized_mlp_kernel_enabled": false,
|
149 |
+
"rmsnorm_quantize_kernel_enabled": false,
|
150 |
+
"router_topk_nki_kernel_enabled": null,
|
151 |
+
"rpl_reduce_dtype": null,
|
152 |
+
"save_sharded_checkpoint": true,
|
153 |
+
"scratchpad_page_size": null,
|
154 |
+
"seq_len": 2048,
|
155 |
+
"seq_len_threshold_for_cc_tiling": 16384,
|
156 |
+
"sequence_parallel_enabled": false,
|
157 |
+
"shared_mlp_nki_kernel_enabled": null,
|
158 |
+
"skip_sharding": false,
|
159 |
+
"skip_warmup": false,
|
160 |
+
"spec_batch_size": 4,
|
161 |
+
"speculation_length": 0,
|
162 |
+
"start_rank_id": 0,
|
163 |
+
"target": null,
|
164 |
+
"tile_cc": false,
|
165 |
+
"tkg_batch_size": 4,
|
166 |
+
"token_generation_buckets": null,
|
167 |
+
"token_tree_config": null,
|
168 |
+
"torch_dtype": "bfloat16",
|
169 |
+
"tp_degree": 2,
|
170 |
+
"vocab_parallel": false,
|
171 |
+
"weight_gather_seq_len_threshold": 32768,
|
172 |
+
"weights_to_skip_layout_optimization": [],
|
173 |
+
"world_size": 2
|
174 |
+
},
|
175 |
+
"no_repeat_ngram_size": 0,
|
176 |
+
"num_attention_heads": 32,
|
177 |
+
"num_beam_groups": 1,
|
178 |
+
"num_beams": 1,
|
179 |
+
"num_cores_per_group": 1,
|
180 |
+
"num_hidden_layers": 32,
|
181 |
+
"num_key_value_heads": 8,
|
182 |
+
"num_return_sequences": 1,
|
183 |
+
"output_attentions": false,
|
184 |
+
"output_hidden_states": false,
|
185 |
+
"output_scores": false,
|
186 |
+
"pad_token_id": 0,
|
187 |
+
"prefix": null,
|
188 |
+
"problem_type": null,
|
189 |
+
"pruned_heads": {},
|
190 |
+
"remove_invalid_values": false,
|
191 |
+
"repetition_penalty": 1.0,
|
192 |
+
"return_dict": true,
|
193 |
+
"return_dict_in_generate": false,
|
194 |
+
"rms_norm_eps": 1e-05,
|
195 |
+
"rope_theta": 1000000.0,
|
196 |
+
"sep_token_id": null,
|
197 |
+
"sliding_window": null,
|
198 |
+
"suppress_tokens": null,
|
199 |
+
"task_specific_params": null,
|
200 |
+
"temperature": 1.0,
|
201 |
+
"tf_legacy_loss": false,
|
202 |
+
"tie_encoder_decoder": false,
|
203 |
+
"tie_word_embeddings": false,
|
204 |
+
"tokenizer_class": null,
|
205 |
+
"top_k": 50,
|
206 |
+
"top_p": 1.0,
|
207 |
+
"torchscript": false,
|
208 |
+
"transformers_version": "4.42.0.dev0",
|
209 |
+
"typical_p": 1.0,
|
210 |
+
"use_bfloat16": false,
|
211 |
+
"use_cache": true,
|
212 |
+
"vocab_size": 32768
|
213 |
+
}
|
layout_opt/command.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
neuronx-cc compile graph.hlo --framework XLA --target trn1 --output graph.neff --model-type=transformer -O1 --lnc=1 '--internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=log-neuron-cc.txt --verbose=35
|
layout_opt/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:140da06783df36b3d25d8903dc194df46247db9b5b03ef10b1abebe30d252275
|
3 |
+
size 5848064
|
layout_opt/log-neuron-cc.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
layout_opt/metaneff
ADDED
@@ -0,0 +1,874 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
(
|
3 |
+
input0���2embed_tokens.weight8
|
4 |
+
;
|
5 |
+
input1� �2'layers.0.self_attn.o_proj.o_proj.weight8
|
6 |
+
=
|
7 |
+
input2�� 2)layers.0.self_attn.qkv_proj.v_proj.weight8
|
8 |
+
1
|
9 |
+
input3� 2layers.0.input_layernorm.weight8
|
10 |
+
=
|
11 |
+
input4�� 2)layers.0.self_attn.qkv_proj.k_proj.weight8
|
12 |
+
=
|
13 |
+
input5�� 2)layers.0.self_attn.qkv_proj.q_proj.weight8
|
14 |
+
1
|
15 |
+
input6� �82layers.0.mlp.down_proj.weight8
|
16 |
+
/
|
17 |
+
input7�8� 2layers.0.mlp.up_proj.weight8
|
18 |
+
:
|
19 |
+
input8� 2(layers.0.post_attention_layernorm.weight8
|
20 |
+
1
|
21 |
+
input9�8� 2layers.0.mlp.gate_proj.weight8
|
22 |
+
<
|
23 |
+
input10� �2'layers.1.self_attn.o_proj.o_proj.weight8
|
24 |
+
>
|
25 |
+
input11�� 2)layers.1.self_attn.qkv_proj.v_proj.weight8
|
26 |
+
2
|
27 |
+
input12� 2layers.1.input_layernorm.weight8
|
28 |
+
>
|
29 |
+
input13�� 2)layers.1.self_attn.qkv_proj.k_proj.weight8
|
30 |
+
>
|
31 |
+
input14�� 2)layers.1.self_attn.qkv_proj.q_proj.weight8
|
32 |
+
2
|
33 |
+
input15� �82layers.1.mlp.down_proj.weight8
|
34 |
+
0
|
35 |
+
input16�8� 2layers.1.mlp.up_proj.weight8
|
36 |
+
;
|
37 |
+
input17� 2(layers.1.post_attention_layernorm.weight8
|
38 |
+
2
|
39 |
+
input18�8� 2layers.1.mlp.gate_proj.weight8
|
40 |
+
<
|
41 |
+
input19� �2'layers.2.self_attn.o_proj.o_proj.weight8
|
42 |
+
>
|
43 |
+
input20�� 2)layers.2.self_attn.qkv_proj.v_proj.weight8
|
44 |
+
2
|
45 |
+
input21� 2layers.2.input_layernorm.weight8
|
46 |
+
>
|
47 |
+
input22�� 2)layers.2.self_attn.qkv_proj.k_proj.weight8
|
48 |
+
>
|
49 |
+
input23�� 2)layers.2.self_attn.qkv_proj.q_proj.weight8
|
50 |
+
2
|
51 |
+
input24� �82layers.2.mlp.down_proj.weight8
|
52 |
+
0
|
53 |
+
input25�8� 2layers.2.mlp.up_proj.weight8
|
54 |
+
;
|
55 |
+
input26� 2(layers.2.post_attention_layernorm.weight8
|
56 |
+
2
|
57 |
+
input27�8� 2layers.2.mlp.gate_proj.weight8
|
58 |
+
<
|
59 |
+
input28� �2'layers.3.self_attn.o_proj.o_proj.weight8
|
60 |
+
>
|
61 |
+
input29�� 2)layers.3.self_attn.qkv_proj.v_proj.weight8
|
62 |
+
2
|
63 |
+
input30� 2layers.3.input_layernorm.weight8
|
64 |
+
>
|
65 |
+
input31�� 2)layers.3.self_attn.qkv_proj.k_proj.weight8
|
66 |
+
>
|
67 |
+
input32�� 2)layers.3.self_attn.qkv_proj.q_proj.weight8
|
68 |
+
2
|
69 |
+
input33� �82layers.3.mlp.down_proj.weight8
|
70 |
+
0
|
71 |
+
input34�8� 2layers.3.mlp.up_proj.weight8
|
72 |
+
;
|
73 |
+
input35� 2(layers.3.post_attention_layernorm.weight8
|
74 |
+
2
|
75 |
+
input36�8� 2layers.3.mlp.gate_proj.weight8
|
76 |
+
<
|
77 |
+
input37� �2'layers.4.self_attn.o_proj.o_proj.weight8
|
78 |
+
>
|
79 |
+
input38�� 2)layers.4.self_attn.qkv_proj.v_proj.weight8
|
80 |
+
2
|
81 |
+
input39� 2layers.4.input_layernorm.weight8
|
82 |
+
>
|
83 |
+
input40�� 2)layers.4.self_attn.qkv_proj.k_proj.weight8
|
84 |
+
>
|
85 |
+
input41�� 2)layers.4.self_attn.qkv_proj.q_proj.weight8
|
86 |
+
2
|
87 |
+
input42� �82layers.4.mlp.down_proj.weight8
|
88 |
+
0
|
89 |
+
input43�8� 2layers.4.mlp.up_proj.weight8
|
90 |
+
;
|
91 |
+
input44� 2(layers.4.post_attention_layernorm.weight8
|
92 |
+
2
|
93 |
+
input45�8� 2layers.4.mlp.gate_proj.weight8
|
94 |
+
<
|
95 |
+
input46� �2'layers.5.self_attn.o_proj.o_proj.weight8
|
96 |
+
>
|
97 |
+
input47�� 2)layers.5.self_attn.qkv_proj.v_proj.weight8
|
98 |
+
2
|
99 |
+
input48� 2layers.5.input_layernorm.weight8
|
100 |
+
>
|
101 |
+
input49�� 2)layers.5.self_attn.qkv_proj.k_proj.weight8
|
102 |
+
>
|
103 |
+
input50�� 2)layers.5.self_attn.qkv_proj.q_proj.weight8
|
104 |
+
2
|
105 |
+
input51� �82layers.5.mlp.down_proj.weight8
|
106 |
+
0
|
107 |
+
input52�8� 2layers.5.mlp.up_proj.weight8
|
108 |
+
;
|
109 |
+
input53� 2(layers.5.post_attention_layernorm.weight8
|
110 |
+
2
|
111 |
+
input54�8� 2layers.5.mlp.gate_proj.weight8
|
112 |
+
<
|
113 |
+
input55� �2'layers.6.self_attn.o_proj.o_proj.weight8
|
114 |
+
>
|
115 |
+
input56�� 2)layers.6.self_attn.qkv_proj.v_proj.weight8
|
116 |
+
2
|
117 |
+
input57� 2layers.6.input_layernorm.weight8
|
118 |
+
>
|
119 |
+
input58�� 2)layers.6.self_attn.qkv_proj.k_proj.weight8
|
120 |
+
>
|
121 |
+
input59�� 2)layers.6.self_attn.qkv_proj.q_proj.weight8
|
122 |
+
2
|
123 |
+
input60� �82layers.6.mlp.down_proj.weight8
|
124 |
+
0
|
125 |
+
input61�8� 2layers.6.mlp.up_proj.weight8
|
126 |
+
;
|
127 |
+
input62� 2(layers.6.post_attention_layernorm.weight8
|
128 |
+
2
|
129 |
+
input63�8� 2layers.6.mlp.gate_proj.weight8
|
130 |
+
<
|
131 |
+
input64� �2'layers.7.self_attn.o_proj.o_proj.weight8
|
132 |
+
>
|
133 |
+
input65�� 2)layers.7.self_attn.qkv_proj.v_proj.weight8
|
134 |
+
2
|
135 |
+
input66� 2layers.7.input_layernorm.weight8
|
136 |
+
>
|
137 |
+
input67�� 2)layers.7.self_attn.qkv_proj.k_proj.weight8
|
138 |
+
>
|
139 |
+
input68�� 2)layers.7.self_attn.qkv_proj.q_proj.weight8
|
140 |
+
2
|
141 |
+
input69� �82layers.7.mlp.down_proj.weight8
|
142 |
+
0
|
143 |
+
input70�8� 2layers.7.mlp.up_proj.weight8
|
144 |
+
;
|
145 |
+
input71� 2(layers.7.post_attention_layernorm.weight8
|
146 |
+
2
|
147 |
+
input72�8� 2layers.7.mlp.gate_proj.weight8
|
148 |
+
<
|
149 |
+
input73� �2'layers.8.self_attn.o_proj.o_proj.weight8
|
150 |
+
>
|
151 |
+
input74�� 2)layers.8.self_attn.qkv_proj.v_proj.weight8
|
152 |
+
2
|
153 |
+
input75� 2layers.8.input_layernorm.weight8
|
154 |
+
>
|
155 |
+
input76�� 2)layers.8.self_attn.qkv_proj.k_proj.weight8
|
156 |
+
>
|
157 |
+
input77�� 2)layers.8.self_attn.qkv_proj.q_proj.weight8
|
158 |
+
2
|
159 |
+
input78� �82layers.8.mlp.down_proj.weight8
|
160 |
+
0
|
161 |
+
input79�8� 2layers.8.mlp.up_proj.weight8
|
162 |
+
;
|
163 |
+
input80� 2(layers.8.post_attention_layernorm.weight8
|
164 |
+
2
|
165 |
+
input81�8� 2layers.8.mlp.gate_proj.weight8
|
166 |
+
<
|
167 |
+
input82� �2'layers.9.self_attn.o_proj.o_proj.weight8
|
168 |
+
>
|
169 |
+
input83�� 2)layers.9.self_attn.qkv_proj.v_proj.weight8
|
170 |
+
2
|
171 |
+
input84� 2layers.9.input_layernorm.weight8
|
172 |
+
>
|
173 |
+
input85�� 2)layers.9.self_attn.qkv_proj.k_proj.weight8
|
174 |
+
>
|
175 |
+
input86�� 2)layers.9.self_attn.qkv_proj.q_proj.weight8
|
176 |
+
2
|
177 |
+
input87� �82layers.9.mlp.down_proj.weight8
|
178 |
+
0
|
179 |
+
input88�8� 2layers.9.mlp.up_proj.weight8
|
180 |
+
;
|
181 |
+
input89� 2(layers.9.post_attention_layernorm.weight8
|
182 |
+
2
|
183 |
+
input90�8� 2layers.9.mlp.gate_proj.weight8
|
184 |
+
=
|
185 |
+
input91� �2(layers.10.self_attn.o_proj.o_proj.weight8
|
186 |
+
?
|
187 |
+
input92�� 2*layers.10.self_attn.qkv_proj.v_proj.weight8
|
188 |
+
3
|
189 |
+
input93� 2 layers.10.input_layernorm.weight8
|
190 |
+
?
|
191 |
+
input94�� 2*layers.10.self_attn.qkv_proj.k_proj.weight8
|
192 |
+
?
|
193 |
+
input95�� 2*layers.10.self_attn.qkv_proj.q_proj.weight8
|
194 |
+
3
|
195 |
+
input96� �82layers.10.mlp.down_proj.weight8
|
196 |
+
1
|
197 |
+
input97�8� 2layers.10.mlp.up_proj.weight8
|
198 |
+
<
|
199 |
+
input98� 2)layers.10.post_attention_layernorm.weight8
|
200 |
+
3
|
201 |
+
input99�8� 2layers.10.mlp.gate_proj.weight8
|
202 |
+
>
|
203 |
+
input100� �2(layers.11.self_attn.o_proj.o_proj.weight8
|
204 |
+
@
|
205 |
+
input101�� 2*layers.11.self_attn.qkv_proj.v_proj.weight8
|
206 |
+
4
|
207 |
+
input102� 2 layers.11.input_layernorm.weight8
|
208 |
+
@
|
209 |
+
input103�� 2*layers.11.self_attn.qkv_proj.k_proj.weight8
|
210 |
+
@
|
211 |
+
input104�� 2*layers.11.self_attn.qkv_proj.q_proj.weight8
|
212 |
+
4
|
213 |
+
input105� �82layers.11.mlp.down_proj.weight8
|
214 |
+
2
|
215 |
+
input106�8� 2layers.11.mlp.up_proj.weight8
|
216 |
+
=
|
217 |
+
input107� 2)layers.11.post_attention_layernorm.weight8
|
218 |
+
4
|
219 |
+
input108�8� 2layers.11.mlp.gate_proj.weight8
|
220 |
+
>
|
221 |
+
input109� �2(layers.12.self_attn.o_proj.o_proj.weight8
|
222 |
+
@
|
223 |
+
input110�� 2*layers.12.self_attn.qkv_proj.v_proj.weight8
|
224 |
+
4
|
225 |
+
input111� 2 layers.12.input_layernorm.weight8
|
226 |
+
@
|
227 |
+
input112�� 2*layers.12.self_attn.qkv_proj.k_proj.weight8
|
228 |
+
@
|
229 |
+
input113�� 2*layers.12.self_attn.qkv_proj.q_proj.weight8
|
230 |
+
4
|
231 |
+
input114� �82layers.12.mlp.down_proj.weight8
|
232 |
+
2
|
233 |
+
input115�8� 2layers.12.mlp.up_proj.weight8
|
234 |
+
=
|
235 |
+
input116� 2)layers.12.post_attention_layernorm.weight8
|
236 |
+
4
|
237 |
+
input117�8� 2layers.12.mlp.gate_proj.weight8
|
238 |
+
>
|
239 |
+
input118� �2(layers.13.self_attn.o_proj.o_proj.weight8
|
240 |
+
@
|
241 |
+
input119�� 2*layers.13.self_attn.qkv_proj.v_proj.weight8
|
242 |
+
4
|
243 |
+
input120� 2 layers.13.input_layernorm.weight8
|
244 |
+
@
|
245 |
+
input121�� 2*layers.13.self_attn.qkv_proj.k_proj.weight8
|
246 |
+
@
|
247 |
+
input122�� 2*layers.13.self_attn.qkv_proj.q_proj.weight8
|
248 |
+
4
|
249 |
+
input123� �82layers.13.mlp.down_proj.weight8
|
250 |
+
2
|
251 |
+
input124�8� 2layers.13.mlp.up_proj.weight8
|
252 |
+
=
|
253 |
+
input125� 2)layers.13.post_attention_layernorm.weight8
|
254 |
+
4
|
255 |
+
input126�8� 2layers.13.mlp.gate_proj.weight8
|
256 |
+
>
|
257 |
+
input127� �2(layers.14.self_attn.o_proj.o_proj.weight8
|
258 |
+
@
|
259 |
+
input128�� 2*layers.14.self_attn.qkv_proj.v_proj.weight8
|
260 |
+
4
|
261 |
+
input129� 2 layers.14.input_layernorm.weight8
|
262 |
+
@
|
263 |
+
input130�� 2*layers.14.self_attn.qkv_proj.k_proj.weight8
|
264 |
+
@
|
265 |
+
input131�� 2*layers.14.self_attn.qkv_proj.q_proj.weight8
|
266 |
+
4
|
267 |
+
input132� �82layers.14.mlp.down_proj.weight8
|
268 |
+
2
|
269 |
+
input133�8� 2layers.14.mlp.up_proj.weight8
|
270 |
+
=
|
271 |
+
input134� 2)layers.14.post_attention_layernorm.weight8
|
272 |
+
4
|
273 |
+
input135�8� 2layers.14.mlp.gate_proj.weight8
|
274 |
+
>
|
275 |
+
input136� �2(layers.15.self_attn.o_proj.o_proj.weight8
|
276 |
+
@
|
277 |
+
input137�� 2*layers.15.self_attn.qkv_proj.v_proj.weight8
|
278 |
+
4
|
279 |
+
input138� 2 layers.15.input_layernorm.weight8
|
280 |
+
@
|
281 |
+
input139�� 2*layers.15.self_attn.qkv_proj.k_proj.weight8
|
282 |
+
@
|
283 |
+
input140�� 2*layers.15.self_attn.qkv_proj.q_proj.weight8
|
284 |
+
4
|
285 |
+
input141� �82layers.15.mlp.down_proj.weight8
|
286 |
+
2
|
287 |
+
input142�8� 2layers.15.mlp.up_proj.weight8
|
288 |
+
=
|
289 |
+
input143� 2)layers.15.post_attention_layernorm.weight8
|
290 |
+
4
|
291 |
+
input144�8� 2layers.15.mlp.gate_proj.weight8
|
292 |
+
>
|
293 |
+
input145� �2(layers.16.self_attn.o_proj.o_proj.weight8
|
294 |
+
@
|
295 |
+
input146�� 2*layers.16.self_attn.qkv_proj.v_proj.weight8
|
296 |
+
4
|
297 |
+
input147� 2 layers.16.input_layernorm.weight8
|
298 |
+
@
|
299 |
+
input148�� 2*layers.16.self_attn.qkv_proj.k_proj.weight8
|
300 |
+
@
|
301 |
+
input149�� 2*layers.16.self_attn.qkv_proj.q_proj.weight8
|
302 |
+
4
|
303 |
+
input150� �82layers.16.mlp.down_proj.weight8
|
304 |
+
2
|
305 |
+
input151�8� 2layers.16.mlp.up_proj.weight8
|
306 |
+
=
|
307 |
+
input152� 2)layers.16.post_attention_layernorm.weight8
|
308 |
+
4
|
309 |
+
input153�8� 2layers.16.mlp.gate_proj.weight8
|
310 |
+
>
|
311 |
+
input154� �2(layers.17.self_attn.o_proj.o_proj.weight8
|
312 |
+
@
|
313 |
+
input155�� 2*layers.17.self_attn.qkv_proj.v_proj.weight8
|
314 |
+
4
|
315 |
+
input156� 2 layers.17.input_layernorm.weight8
|
316 |
+
@
|
317 |
+
input157�� 2*layers.17.self_attn.qkv_proj.k_proj.weight8
|
318 |
+
@
|
319 |
+
input158�� 2*layers.17.self_attn.qkv_proj.q_proj.weight8
|
320 |
+
4
|
321 |
+
input159� �82layers.17.mlp.down_proj.weight8
|
322 |
+
2
|
323 |
+
input160�8� 2layers.17.mlp.up_proj.weight8
|
324 |
+
=
|
325 |
+
input161� 2)layers.17.post_attention_layernorm.weight8
|
326 |
+
4
|
327 |
+
input162�8� 2layers.17.mlp.gate_proj.weight8
|
328 |
+
>
|
329 |
+
input163� �2(layers.18.self_attn.o_proj.o_proj.weight8
|
330 |
+
@
|
331 |
+
input164�� 2*layers.18.self_attn.qkv_proj.v_proj.weight8
|
332 |
+
4
|
333 |
+
input165� 2 layers.18.input_layernorm.weight8
|
334 |
+
@
|
335 |
+
input166�� 2*layers.18.self_attn.qkv_proj.k_proj.weight8
|
336 |
+
@
|
337 |
+
input167�� 2*layers.18.self_attn.qkv_proj.q_proj.weight8
|
338 |
+
4
|
339 |
+
input168� �82layers.18.mlp.down_proj.weight8
|
340 |
+
2
|
341 |
+
input169�8� 2layers.18.mlp.up_proj.weight8
|
342 |
+
=
|
343 |
+
input170� 2)layers.18.post_attention_layernorm.weight8
|
344 |
+
4
|
345 |
+
input171�8� 2layers.18.mlp.gate_proj.weight8
|
346 |
+
>
|
347 |
+
input172� �2(layers.19.self_attn.o_proj.o_proj.weight8
|
348 |
+
@
|
349 |
+
input173�� 2*layers.19.self_attn.qkv_proj.v_proj.weight8
|
350 |
+
4
|
351 |
+
input174� 2 layers.19.input_layernorm.weight8
|
352 |
+
@
|
353 |
+
input175�� 2*layers.19.self_attn.qkv_proj.k_proj.weight8
|
354 |
+
@
|
355 |
+
input176�� 2*layers.19.self_attn.qkv_proj.q_proj.weight8
|
356 |
+
4
|
357 |
+
input177� �82layers.19.mlp.down_proj.weight8
|
358 |
+
2
|
359 |
+
input178�8� 2layers.19.mlp.up_proj.weight8
|
360 |
+
=
|
361 |
+
input179� 2)layers.19.post_attention_layernorm.weight8
|
362 |
+
4
|
363 |
+
input180�8� 2layers.19.mlp.gate_proj.weight8
|
364 |
+
>
|
365 |
+
input181� �2(layers.20.self_attn.o_proj.o_proj.weight8
|
366 |
+
@
|
367 |
+
input182�� 2*layers.20.self_attn.qkv_proj.v_proj.weight8
|
368 |
+
4
|
369 |
+
input183� 2 layers.20.input_layernorm.weight8
|
370 |
+
@
|
371 |
+
input184�� 2*layers.20.self_attn.qkv_proj.k_proj.weight8
|
372 |
+
@
|
373 |
+
input185�� 2*layers.20.self_attn.qkv_proj.q_proj.weight8
|
374 |
+
4
|
375 |
+
input186� �82layers.20.mlp.down_proj.weight8
|
376 |
+
2
|
377 |
+
input187�8� 2layers.20.mlp.up_proj.weight8
|
378 |
+
=
|
379 |
+
input188� 2)layers.20.post_attention_layernorm.weight8
|
380 |
+
4
|
381 |
+
input189�8� 2layers.20.mlp.gate_proj.weight8
|
382 |
+
>
|
383 |
+
input190� �2(layers.21.self_attn.o_proj.o_proj.weight8
|
384 |
+
@
|
385 |
+
input191�� 2*layers.21.self_attn.qkv_proj.v_proj.weight8
|
386 |
+
4
|
387 |
+
input192� 2 layers.21.input_layernorm.weight8
|
388 |
+
@
|
389 |
+
input193�� 2*layers.21.self_attn.qkv_proj.k_proj.weight8
|
390 |
+
@
|
391 |
+
input194�� 2*layers.21.self_attn.qkv_proj.q_proj.weight8
|
392 |
+
4
|
393 |
+
input195� �82layers.21.mlp.down_proj.weight8
|
394 |
+
2
|
395 |
+
input196�8� 2layers.21.mlp.up_proj.weight8
|
396 |
+
=
|
397 |
+
input197� 2)layers.21.post_attention_layernorm.weight8
|
398 |
+
4
|
399 |
+
input198�8� 2layers.21.mlp.gate_proj.weight8
|
400 |
+
>
|
401 |
+
input199� �2(layers.22.self_attn.o_proj.o_proj.weight8
|
402 |
+
@
|
403 |
+
input200�� 2*layers.22.self_attn.qkv_proj.v_proj.weight8
|
404 |
+
4
|
405 |
+
input201� 2 layers.22.input_layernorm.weight8
|
406 |
+
@
|
407 |
+
input202�� 2*layers.22.self_attn.qkv_proj.k_proj.weight8
|
408 |
+
@
|
409 |
+
input203�� 2*layers.22.self_attn.qkv_proj.q_proj.weight8
|
410 |
+
4
|
411 |
+
input204� �82layers.22.mlp.down_proj.weight8
|
412 |
+
2
|
413 |
+
input205�8� 2layers.22.mlp.up_proj.weight8
|
414 |
+
=
|
415 |
+
input206� 2)layers.22.post_attention_layernorm.weight8
|
416 |
+
4
|
417 |
+
input207�8� 2layers.22.mlp.gate_proj.weight8
|
418 |
+
>
|
419 |
+
input208� �2(layers.23.self_attn.o_proj.o_proj.weight8
|
420 |
+
@
|
421 |
+
input209�� 2*layers.23.self_attn.qkv_proj.v_proj.weight8
|
422 |
+
4
|
423 |
+
input210� 2 layers.23.input_layernorm.weight8
|
424 |
+
@
|
425 |
+
input211�� 2*layers.23.self_attn.qkv_proj.k_proj.weight8
|
426 |
+
@
|
427 |
+
input212�� 2*layers.23.self_attn.qkv_proj.q_proj.weight8
|
428 |
+
4
|
429 |
+
input213� �82layers.23.mlp.down_proj.weight8
|
430 |
+
2
|
431 |
+
input214�8� 2layers.23.mlp.up_proj.weight8
|
432 |
+
=
|
433 |
+
input215� 2)layers.23.post_attention_layernorm.weight8
|
434 |
+
4
|
435 |
+
input216�8� 2layers.23.mlp.gate_proj.weight8
|
436 |
+
>
|
437 |
+
input217� �2(layers.24.self_attn.o_proj.o_proj.weight8
|
438 |
+
@
|
439 |
+
input218�� 2*layers.24.self_attn.qkv_proj.v_proj.weight8
|
440 |
+
4
|
441 |
+
input219� 2 layers.24.input_layernorm.weight8
|
442 |
+
@
|
443 |
+
input220�� 2*layers.24.self_attn.qkv_proj.k_proj.weight8
|
444 |
+
@
|
445 |
+
input221�� 2*layers.24.self_attn.qkv_proj.q_proj.weight8
|
446 |
+
4
|
447 |
+
input222� �82layers.24.mlp.down_proj.weight8
|
448 |
+
2
|
449 |
+
input223�8� 2layers.24.mlp.up_proj.weight8
|
450 |
+
=
|
451 |
+
input224� 2)layers.24.post_attention_layernorm.weight8
|
452 |
+
4
|
453 |
+
input225�8� 2layers.24.mlp.gate_proj.weight8
|
454 |
+
>
|
455 |
+
input226� �2(layers.25.self_attn.o_proj.o_proj.weight8
|
456 |
+
@
|
457 |
+
input227�� 2*layers.25.self_attn.qkv_proj.v_proj.weight8
|
458 |
+
4
|
459 |
+
input228� 2 layers.25.input_layernorm.weight8
|
460 |
+
@
|
461 |
+
input229�� 2*layers.25.self_attn.qkv_proj.k_proj.weight8
|
462 |
+
@
|
463 |
+
input230�� 2*layers.25.self_attn.qkv_proj.q_proj.weight8
|
464 |
+
4
|
465 |
+
input231� �82layers.25.mlp.down_proj.weight8
|
466 |
+
2
|
467 |
+
input232�8� 2layers.25.mlp.up_proj.weight8
|
468 |
+
=
|
469 |
+
input233� 2)layers.25.post_attention_layernorm.weight8
|
470 |
+
4
|
471 |
+
input234�8� 2layers.25.mlp.gate_proj.weight8
|
472 |
+
>
|
473 |
+
input235� �2(layers.26.self_attn.o_proj.o_proj.weight8
|
474 |
+
@
|
475 |
+
input236�� 2*layers.26.self_attn.qkv_proj.v_proj.weight8
|
476 |
+
4
|
477 |
+
input237� 2 layers.26.input_layernorm.weight8
|
478 |
+
@
|
479 |
+
input238�� 2*layers.26.self_attn.qkv_proj.k_proj.weight8
|
480 |
+
@
|
481 |
+
input239�� 2*layers.26.self_attn.qkv_proj.q_proj.weight8
|
482 |
+
4
|
483 |
+
input240� �82layers.26.mlp.down_proj.weight8
|
484 |
+
2
|
485 |
+
input241�8� 2layers.26.mlp.up_proj.weight8
|
486 |
+
=
|
487 |
+
input242� 2)layers.26.post_attention_layernorm.weight8
|
488 |
+
4
|
489 |
+
input243�8� 2layers.26.mlp.gate_proj.weight8
|
490 |
+
>
|
491 |
+
input244� �2(layers.27.self_attn.o_proj.o_proj.weight8
|
492 |
+
@
|
493 |
+
input245�� 2*layers.27.self_attn.qkv_proj.v_proj.weight8
|
494 |
+
4
|
495 |
+
input246� 2 layers.27.input_layernorm.weight8
|
496 |
+
@
|
497 |
+
input247�� 2*layers.27.self_attn.qkv_proj.k_proj.weight8
|
498 |
+
@
|
499 |
+
input248�� 2*layers.27.self_attn.qkv_proj.q_proj.weight8
|
500 |
+
4
|
501 |
+
input249� �82layers.27.mlp.down_proj.weight8
|
502 |
+
2
|
503 |
+
input250�8� 2layers.27.mlp.up_proj.weight8
|
504 |
+
=
|
505 |
+
input251� 2)layers.27.post_attention_layernorm.weight8
|
506 |
+
4
|
507 |
+
input252�8� 2layers.27.mlp.gate_proj.weight8
|
508 |
+
>
|
509 |
+
input253� �2(layers.28.self_attn.o_proj.o_proj.weight8
|
510 |
+
@
|
511 |
+
input254�� 2*layers.28.self_attn.qkv_proj.v_proj.weight8
|
512 |
+
4
|
513 |
+
input255� 2 layers.28.input_layernorm.weight8
|
514 |
+
@
|
515 |
+
input256�� 2*layers.28.self_attn.qkv_proj.k_proj.weight8
|
516 |
+
@
|
517 |
+
input257�� 2*layers.28.self_attn.qkv_proj.q_proj.weight8
|
518 |
+
4
|
519 |
+
input258� �82layers.28.mlp.down_proj.weight8
|
520 |
+
2
|
521 |
+
input259�8� 2layers.28.mlp.up_proj.weight8
|
522 |
+
=
|
523 |
+
input260� 2)layers.28.post_attention_layernorm.weight8
|
524 |
+
4
|
525 |
+
input261�8� 2layers.28.mlp.gate_proj.weight8
|
526 |
+
>
|
527 |
+
input262� �2(layers.29.self_attn.o_proj.o_proj.weight8
|
528 |
+
@
|
529 |
+
input263�� 2*layers.29.self_attn.qkv_proj.v_proj.weight8
|
530 |
+
4
|
531 |
+
input264� 2 layers.29.input_layernorm.weight8
|
532 |
+
@
|
533 |
+
input265�� 2*layers.29.self_attn.qkv_proj.k_proj.weight8
|
534 |
+
@
|
535 |
+
input266�� 2*layers.29.self_attn.qkv_proj.q_proj.weight8
|
536 |
+
4
|
537 |
+
input267� �82layers.29.mlp.down_proj.weight8
|
538 |
+
2
|
539 |
+
input268�8� 2layers.29.mlp.up_proj.weight8
|
540 |
+
=
|
541 |
+
input269� 2)layers.29.post_attention_layernorm.weight8
|
542 |
+
4
|
543 |
+
input270�8� 2layers.29.mlp.gate_proj.weight8
|
544 |
+
>
|
545 |
+
input271� �2(layers.30.self_attn.o_proj.o_proj.weight8
|
546 |
+
@
|
547 |
+
input272�� 2*layers.30.self_attn.qkv_proj.v_proj.weight8
|
548 |
+
4
|
549 |
+
input273� 2 layers.30.input_layernorm.weight8
|
550 |
+
@
|
551 |
+
input274�� 2*layers.30.self_attn.qkv_proj.k_proj.weight8
|
552 |
+
@
|
553 |
+
input275�� 2*layers.30.self_attn.qkv_proj.q_proj.weight8
|
554 |
+
4
|
555 |
+
input276� �82layers.30.mlp.down_proj.weight8
|
556 |
+
2
|
557 |
+
input277�8� 2layers.30.mlp.up_proj.weight8
|
558 |
+
=
|
559 |
+
input278� 2)layers.30.post_attention_layernorm.weight8
|
560 |
+
4
|
561 |
+
input279�8� 2layers.30.mlp.gate_proj.weight8
|
562 |
+
>
|
563 |
+
input280� �2(layers.31.self_attn.o_proj.o_proj.weight8
|
564 |
+
@
|
565 |
+
input281�� 2*layers.31.self_attn.qkv_proj.v_proj.weight8
|
566 |
+
4
|
567 |
+
input282� 2 layers.31.input_layernorm.weight8
|
568 |
+
@
|
569 |
+
input283�� 2*layers.31.self_attn.qkv_proj.k_proj.weight8
|
570 |
+
@
|
571 |
+
input284�� 2*layers.31.self_attn.qkv_proj.q_proj.weight8
|
572 |
+
4
|
573 |
+
input285� �82layers.31.mlp.down_proj.weight8
|
574 |
+
2
|
575 |
+
input286�8� 2layers.31.mlp.up_proj.weight8
|
576 |
+
=
|
577 |
+
input287� 2)layers.31.post_attention_layernorm.weight8
|
578 |
+
4
|
579 |
+
input288�8� 2layers.31.mlp.gate_proj.weight8
|
580 |
+
%
|
581 |
+
input289��� 2lm_head.weight8
|
582 |
+
|
583 |
+
input290� 2norm.weight8'
|
584 |
+
output0���2embed_tokens.weight>
|
585 |
+
output1��2'layers.0.self_attn.o_proj.o_proj.weight>
|
586 |
+
output2� �2)layers.0.self_attn.qkv_proj.v_proj.weight1
|
587 |
+
output3� 2layers.0.input_layernorm.weight>
|
588 |
+
output4� @2)layers.0.self_attn.qkv_proj.k_proj.weight?
|
589 |
+
output5� @2)layers.0.self_attn.qkv_proj.q_proj.weight3
|
590 |
+
output6 ��2layers.0.mlp.down_proj.weight0
|
591 |
+
output78� �2layers.0.mlp.up_proj.weight:
|
592 |
+
output8� 2(layers.0.post_attention_layernorm.weight2
|
593 |
+
output98� �2layers.0.mlp.gate_proj.weight?
|
594 |
+
output10��2'layers.1.self_attn.o_proj.o_proj.weight?
|
595 |
+
output11� �2)layers.1.self_attn.qkv_proj.v_proj.weight2
|
596 |
+
output12� 2layers.1.input_layernorm.weight?
|
597 |
+
output13� @2)layers.1.self_attn.qkv_proj.k_proj.weight@
|
598 |
+
output14� @2)layers.1.self_attn.qkv_proj.q_proj.weight4
|
599 |
+
output15 ��2layers.1.mlp.down_proj.weight1
|
600 |
+
output168� �2layers.1.mlp.up_proj.weight;
|
601 |
+
output17� 2(layers.1.post_attention_layernorm.weight3
|
602 |
+
output188� �2layers.1.mlp.gate_proj.weight?
|
603 |
+
output19��2'layers.2.self_attn.o_proj.o_proj.weight?
|
604 |
+
output20� �2)layers.2.self_attn.qkv_proj.v_proj.weight2
|
605 |
+
output21� 2layers.2.input_layernorm.weight?
|
606 |
+
output22� @2)layers.2.self_attn.qkv_proj.k_proj.weight@
|
607 |
+
output23� @2)layers.2.self_attn.qkv_proj.q_proj.weight4
|
608 |
+
output24 ��2layers.2.mlp.down_proj.weight1
|
609 |
+
output258� �2layers.2.mlp.up_proj.weight;
|
610 |
+
output26� 2(layers.2.post_attention_layernorm.weight3
|
611 |
+
output278� �2layers.2.mlp.gate_proj.weight?
|
612 |
+
output28��2'layers.3.self_attn.o_proj.o_proj.weight?
|
613 |
+
output29� �2)layers.3.self_attn.qkv_proj.v_proj.weight2
|
614 |
+
output30� 2layers.3.input_layernorm.weight?
|
615 |
+
output31� @2)layers.3.self_attn.qkv_proj.k_proj.weight@
|
616 |
+
output32� @2)layers.3.self_attn.qkv_proj.q_proj.weight4
|
617 |
+
output33 ��2layers.3.mlp.down_proj.weight1
|
618 |
+
output348� �2layers.3.mlp.up_proj.weight;
|
619 |
+
output35� 2(layers.3.post_attention_layernorm.weight3
|
620 |
+
output368� �2layers.3.mlp.gate_proj.weight?
|
621 |
+
output37��2'layers.4.self_attn.o_proj.o_proj.weight?
|
622 |
+
output38� �2)layers.4.self_attn.qkv_proj.v_proj.weight2
|
623 |
+
output39� 2layers.4.input_layernorm.weight?
|
624 |
+
output40� @2)layers.4.self_attn.qkv_proj.k_proj.weight@
|
625 |
+
output41� @2)layers.4.self_attn.qkv_proj.q_proj.weight4
|
626 |
+
output42 ��2layers.4.mlp.down_proj.weight1
|
627 |
+
output438� �2layers.4.mlp.up_proj.weight;
|
628 |
+
output44� 2(layers.4.post_attention_layernorm.weight3
|
629 |
+
output458� �2layers.4.mlp.gate_proj.weight?
|
630 |
+
output46��2'layers.5.self_attn.o_proj.o_proj.weight?
|
631 |
+
output47� �2)layers.5.self_attn.qkv_proj.v_proj.weight2
|
632 |
+
output48� 2layers.5.input_layernorm.weight?
|
633 |
+
output49� @2)layers.5.self_attn.qkv_proj.k_proj.weight@
|
634 |
+
output50� @2)layers.5.self_attn.qkv_proj.q_proj.weight4
|
635 |
+
output51 ��2layers.5.mlp.down_proj.weight1
|
636 |
+
output528� �2layers.5.mlp.up_proj.weight;
|
637 |
+
output53� 2(layers.5.post_attention_layernorm.weight3
|
638 |
+
output548� �2layers.5.mlp.gate_proj.weight?
|
639 |
+
output55��2'layers.6.self_attn.o_proj.o_proj.weight?
|
640 |
+
output56� �2)layers.6.self_attn.qkv_proj.v_proj.weight2
|
641 |
+
output57� 2layers.6.input_layernorm.weight?
|
642 |
+
output58� @2)layers.6.self_attn.qkv_proj.k_proj.weight@
|
643 |
+
output59� @2)layers.6.self_attn.qkv_proj.q_proj.weight4
|
644 |
+
output60 ��2layers.6.mlp.down_proj.weight1
|
645 |
+
output618� �2layers.6.mlp.up_proj.weight;
|
646 |
+
output62� 2(layers.6.post_attention_layernorm.weight3
|
647 |
+
output638� �2layers.6.mlp.gate_proj.weight?
|
648 |
+
output64��2'layers.7.self_attn.o_proj.o_proj.weight?
|
649 |
+
output65� �2)layers.7.self_attn.qkv_proj.v_proj.weight2
|
650 |
+
output66� 2layers.7.input_layernorm.weight?
|
651 |
+
output67� @2)layers.7.self_attn.qkv_proj.k_proj.weight@
|
652 |
+
output68� @2)layers.7.self_attn.qkv_proj.q_proj.weight4
|
653 |
+
output69 ��2layers.7.mlp.down_proj.weight1
|
654 |
+
output708� �2layers.7.mlp.up_proj.weight;
|
655 |
+
output71� 2(layers.7.post_attention_layernorm.weight3
|
656 |
+
output728� �2layers.7.mlp.gate_proj.weight?
|
657 |
+
output73��2'layers.8.self_attn.o_proj.o_proj.weight?
|
658 |
+
output74� �2)layers.8.self_attn.qkv_proj.v_proj.weight2
|
659 |
+
output75� 2layers.8.input_layernorm.weight?
|
660 |
+
output76� @2)layers.8.self_attn.qkv_proj.k_proj.weight@
|
661 |
+
output77� @2)layers.8.self_attn.qkv_proj.q_proj.weight4
|
662 |
+
output78 ��2layers.8.mlp.down_proj.weight1
|
663 |
+
output798� �2layers.8.mlp.up_proj.weight;
|
664 |
+
output80� 2(layers.8.post_attention_layernorm.weight3
|
665 |
+
output818� �2layers.8.mlp.gate_proj.weight?
|
666 |
+
output82��2'layers.9.self_attn.o_proj.o_proj.weight?
|
667 |
+
output83� �2)layers.9.self_attn.qkv_proj.v_proj.weight2
|
668 |
+
output84� 2layers.9.input_layernorm.weight?
|
669 |
+
output85� @2)layers.9.self_attn.qkv_proj.k_proj.weight@
|
670 |
+
output86� @2)layers.9.self_attn.qkv_proj.q_proj.weight4
|
671 |
+
output87 ��2layers.9.mlp.down_proj.weight1
|
672 |
+
output888� �2layers.9.mlp.up_proj.weight;
|
673 |
+
output89� 2(layers.9.post_attention_layernorm.weight3
|
674 |
+
output908� �2layers.9.mlp.gate_proj.weight@
|
675 |
+
output91��2(layers.10.self_attn.o_proj.o_proj.weight@
|
676 |
+
output92� �2*layers.10.self_attn.qkv_proj.v_proj.weight3
|
677 |
+
output93� 2 layers.10.input_layernorm.weight@
|
678 |
+
output94� @2*layers.10.self_attn.qkv_proj.k_proj.weightA
|
679 |
+
output95� @2*layers.10.self_attn.qkv_proj.q_proj.weight5
|
680 |
+
output96 ��2layers.10.mlp.down_proj.weight2
|
681 |
+
output978� �2layers.10.mlp.up_proj.weight<
|
682 |
+
output98� 2)layers.10.post_attention_layernorm.weight4
|
683 |
+
output998� �2layers.10.mlp.gate_proj.weightA
|
684 |
+
output100��2(layers.11.self_attn.o_proj.o_proj.weightA
|
685 |
+
output101� �2*layers.11.self_attn.qkv_proj.v_proj.weight4
|
686 |
+
output102� 2 layers.11.input_layernorm.weightA
|
687 |
+
output103� @2*layers.11.self_attn.qkv_proj.k_proj.weightB
|
688 |
+
output104� @2*layers.11.self_attn.qkv_proj.q_proj.weight6
|
689 |
+
output105 ��2layers.11.mlp.down_proj.weight3
|
690 |
+
output1068� �2layers.11.mlp.up_proj.weight=
|
691 |
+
output107� 2)layers.11.post_attention_layernorm.weight5
|
692 |
+
output1088� �2layers.11.mlp.gate_proj.weightA
|
693 |
+
output109��2(layers.12.self_attn.o_proj.o_proj.weightA
|
694 |
+
output110� �2*layers.12.self_attn.qkv_proj.v_proj.weight4
|
695 |
+
output111� 2 layers.12.input_layernorm.weightA
|
696 |
+
output112� @2*layers.12.self_attn.qkv_proj.k_proj.weightB
|
697 |
+
output113� @2*layers.12.self_attn.qkv_proj.q_proj.weight6
|
698 |
+
output114 ��2layers.12.mlp.down_proj.weight3
|
699 |
+
output1158� �2layers.12.mlp.up_proj.weight=
|
700 |
+
output116� 2)layers.12.post_attention_layernorm.weight5
|
701 |
+
output1178� �2layers.12.mlp.gate_proj.weightA
|
702 |
+
output118��2(layers.13.self_attn.o_proj.o_proj.weightA
|
703 |
+
output119� �2*layers.13.self_attn.qkv_proj.v_proj.weight4
|
704 |
+
output120� 2 layers.13.input_layernorm.weightA
|
705 |
+
output121� @2*layers.13.self_attn.qkv_proj.k_proj.weightB
|
706 |
+
output122� @2*layers.13.self_attn.qkv_proj.q_proj.weight6
|
707 |
+
output123 ��2layers.13.mlp.down_proj.weight3
|
708 |
+
output1248� �2layers.13.mlp.up_proj.weight=
|
709 |
+
output125� 2)layers.13.post_attention_layernorm.weight5
|
710 |
+
output1268� �2layers.13.mlp.gate_proj.weightA
|
711 |
+
output127��2(layers.14.self_attn.o_proj.o_proj.weightA
|
712 |
+
output128� �2*layers.14.self_attn.qkv_proj.v_proj.weight4
|
713 |
+
output129� 2 layers.14.input_layernorm.weightA
|
714 |
+
output130� @2*layers.14.self_attn.qkv_proj.k_proj.weightB
|
715 |
+
output131� @2*layers.14.self_attn.qkv_proj.q_proj.weight6
|
716 |
+
output132 ��2layers.14.mlp.down_proj.weight3
|
717 |
+
output1338� �2layers.14.mlp.up_proj.weight=
|
718 |
+
output134� 2)layers.14.post_attention_layernorm.weight5
|
719 |
+
output1358� �2layers.14.mlp.gate_proj.weightA
|
720 |
+
output136��2(layers.15.self_attn.o_proj.o_proj.weightA
|
721 |
+
output137� �2*layers.15.self_attn.qkv_proj.v_proj.weight4
|
722 |
+
output138� 2 layers.15.input_layernorm.weightA
|
723 |
+
output139� @2*layers.15.self_attn.qkv_proj.k_proj.weightB
|
724 |
+
output140� @2*layers.15.self_attn.qkv_proj.q_proj.weight6
|
725 |
+
output141 ��2layers.15.mlp.down_proj.weight3
|
726 |
+
output1428� �2layers.15.mlp.up_proj.weight=
|
727 |
+
output143� 2)layers.15.post_attention_layernorm.weight5
|
728 |
+
output1448� �2layers.15.mlp.gate_proj.weightA
|
729 |
+
output145��2(layers.16.self_attn.o_proj.o_proj.weightA
|
730 |
+
output146� �2*layers.16.self_attn.qkv_proj.v_proj.weight4
|
731 |
+
output147� 2 layers.16.input_layernorm.weightA
|
732 |
+
output148� @2*layers.16.self_attn.qkv_proj.k_proj.weightB
|
733 |
+
output149� @2*layers.16.self_attn.qkv_proj.q_proj.weight6
|
734 |
+
output150 ��2layers.16.mlp.down_proj.weight3
|
735 |
+
output1518� �2layers.16.mlp.up_proj.weight=
|
736 |
+
output152� 2)layers.16.post_attention_layernorm.weight5
|
737 |
+
output1538� �2layers.16.mlp.gate_proj.weightA
|
738 |
+
output154��2(layers.17.self_attn.o_proj.o_proj.weightA
|
739 |
+
output155� �2*layers.17.self_attn.qkv_proj.v_proj.weight4
|
740 |
+
output156� 2 layers.17.input_layernorm.weightA
|
741 |
+
output157� @2*layers.17.self_attn.qkv_proj.k_proj.weightB
|
742 |
+
output158� @2*layers.17.self_attn.qkv_proj.q_proj.weight6
|
743 |
+
output159 ��2layers.17.mlp.down_proj.weight3
|
744 |
+
output1608� �2layers.17.mlp.up_proj.weight=
|
745 |
+
output161� 2)layers.17.post_attention_layernorm.weight5
|
746 |
+
output1628� �2layers.17.mlp.gate_proj.weightA
|
747 |
+
output163��2(layers.18.self_attn.o_proj.o_proj.weightA
|
748 |
+
output164� �2*layers.18.self_attn.qkv_proj.v_proj.weight4
|
749 |
+
output165� 2 layers.18.input_layernorm.weightA
|
750 |
+
output166� @2*layers.18.self_attn.qkv_proj.k_proj.weightB
|
751 |
+
output167� @2*layers.18.self_attn.qkv_proj.q_proj.weight6
|
752 |
+
output168 ��2layers.18.mlp.down_proj.weight3
|
753 |
+
output1698� �2layers.18.mlp.up_proj.weight=
|
754 |
+
output170� 2)layers.18.post_attention_layernorm.weight5
|
755 |
+
output1718� �2layers.18.mlp.gate_proj.weightA
|
756 |
+
output172��2(layers.19.self_attn.o_proj.o_proj.weightA
|
757 |
+
output173� �2*layers.19.self_attn.qkv_proj.v_proj.weight4
|
758 |
+
output174� 2 layers.19.input_layernorm.weightA
|
759 |
+
output175� @2*layers.19.self_attn.qkv_proj.k_proj.weightB
|
760 |
+
output176� @2*layers.19.self_attn.qkv_proj.q_proj.weight6
|
761 |
+
output177 ��2layers.19.mlp.down_proj.weight3
|
762 |
+
output1788� �2layers.19.mlp.up_proj.weight=
|
763 |
+
output179� 2)layers.19.post_attention_layernorm.weight5
|
764 |
+
output1808� �2layers.19.mlp.gate_proj.weightA
|
765 |
+
output181��2(layers.20.self_attn.o_proj.o_proj.weightA
|
766 |
+
output182� �2*layers.20.self_attn.qkv_proj.v_proj.weight4
|
767 |
+
output183� 2 layers.20.input_layernorm.weightA
|
768 |
+
output184� @2*layers.20.self_attn.qkv_proj.k_proj.weightB
|
769 |
+
output185� @2*layers.20.self_attn.qkv_proj.q_proj.weight6
|
770 |
+
output186 ��2layers.20.mlp.down_proj.weight3
|
771 |
+
output1878� �2layers.20.mlp.up_proj.weight=
|
772 |
+
output188� 2)layers.20.post_attention_layernorm.weight5
|
773 |
+
output1898� �2layers.20.mlp.gate_proj.weightA
|
774 |
+
output190��2(layers.21.self_attn.o_proj.o_proj.weightA
|
775 |
+
output191� �2*layers.21.self_attn.qkv_proj.v_proj.weight4
|
776 |
+
output192� 2 layers.21.input_layernorm.weightA
|
777 |
+
output193� @2*layers.21.self_attn.qkv_proj.k_proj.weightB
|
778 |
+
output194� @2*layers.21.self_attn.qkv_proj.q_proj.weight6
|
779 |
+
output195 ��2layers.21.mlp.down_proj.weight3
|
780 |
+
output1968� �2layers.21.mlp.up_proj.weight=
|
781 |
+
output197� 2)layers.21.post_attention_layernorm.weight5
|
782 |
+
output1988� �2layers.21.mlp.gate_proj.weightA
|
783 |
+
output199��2(layers.22.self_attn.o_proj.o_proj.weightA
|
784 |
+
output200� �2*layers.22.self_attn.qkv_proj.v_proj.weight4
|
785 |
+
output201� 2 layers.22.input_layernorm.weightA
|
786 |
+
output202� @2*layers.22.self_attn.qkv_proj.k_proj.weightB
|
787 |
+
output203� @2*layers.22.self_attn.qkv_proj.q_proj.weight6
|
788 |
+
output204 ��2layers.22.mlp.down_proj.weight3
|
789 |
+
output2058� �2layers.22.mlp.up_proj.weight=
|
790 |
+
output206� 2)layers.22.post_attention_layernorm.weight5
|
791 |
+
output2078� �2layers.22.mlp.gate_proj.weightA
|
792 |
+
output208��2(layers.23.self_attn.o_proj.o_proj.weightA
|
793 |
+
output209� �2*layers.23.self_attn.qkv_proj.v_proj.weight4
|
794 |
+
output210� 2 layers.23.input_layernorm.weightA
|
795 |
+
output211� @2*layers.23.self_attn.qkv_proj.k_proj.weightB
|
796 |
+
output212� @2*layers.23.self_attn.qkv_proj.q_proj.weight6
|
797 |
+
output213 ��2layers.23.mlp.down_proj.weight3
|
798 |
+
output2148� �2layers.23.mlp.up_proj.weight=
|
799 |
+
output215� 2)layers.23.post_attention_layernorm.weight5
|
800 |
+
output2168� �2layers.23.mlp.gate_proj.weightA
|
801 |
+
output217��2(layers.24.self_attn.o_proj.o_proj.weightA
|
802 |
+
output218� �2*layers.24.self_attn.qkv_proj.v_proj.weight4
|
803 |
+
output219� 2 layers.24.input_layernorm.weightA
|
804 |
+
output220� @2*layers.24.self_attn.qkv_proj.k_proj.weightB
|
805 |
+
output221� @2*layers.24.self_attn.qkv_proj.q_proj.weight6
|
806 |
+
output222 ��2layers.24.mlp.down_proj.weight3
|
807 |
+
output2238� �2layers.24.mlp.up_proj.weight=
|
808 |
+
output224� 2)layers.24.post_attention_layernorm.weight5
|
809 |
+
output2258� �2layers.24.mlp.gate_proj.weightA
|
810 |
+
output226��2(layers.25.self_attn.o_proj.o_proj.weightA
|
811 |
+
output227� �2*layers.25.self_attn.qkv_proj.v_proj.weight4
|
812 |
+
output228� 2 layers.25.input_layernorm.weightA
|
813 |
+
output229� @2*layers.25.self_attn.qkv_proj.k_proj.weightB
|
814 |
+
output230� @2*layers.25.self_attn.qkv_proj.q_proj.weight6
|
815 |
+
output231 ��2layers.25.mlp.down_proj.weight3
|
816 |
+
output2328� �2layers.25.mlp.up_proj.weight=
|
817 |
+
output233� 2)layers.25.post_attention_layernorm.weight5
|
818 |
+
output2348� �2layers.25.mlp.gate_proj.weightA
|
819 |
+
output235��2(layers.26.self_attn.o_proj.o_proj.weightA
|
820 |
+
output236� �2*layers.26.self_attn.qkv_proj.v_proj.weight4
|
821 |
+
output237� 2 layers.26.input_layernorm.weightA
|
822 |
+
output238� @2*layers.26.self_attn.qkv_proj.k_proj.weightB
|
823 |
+
output239� @2*layers.26.self_attn.qkv_proj.q_proj.weight6
|
824 |
+
output240 ��2layers.26.mlp.down_proj.weight3
|
825 |
+
output2418� �2layers.26.mlp.up_proj.weight=
|
826 |
+
output242� 2)layers.26.post_attention_layernorm.weight5
|
827 |
+
output2438� �2layers.26.mlp.gate_proj.weightA
|
828 |
+
output244��2(layers.27.self_attn.o_proj.o_proj.weightA
|
829 |
+
output245� �2*layers.27.self_attn.qkv_proj.v_proj.weight4
|
830 |
+
output246� 2 layers.27.input_layernorm.weightA
|
831 |
+
output247� @2*layers.27.self_attn.qkv_proj.k_proj.weightB
|
832 |
+
output248� @2*layers.27.self_attn.qkv_proj.q_proj.weight6
|
833 |
+
output249 ��2layers.27.mlp.down_proj.weight3
|
834 |
+
output2508� �2layers.27.mlp.up_proj.weight=
|
835 |
+
output251� 2)layers.27.post_attention_layernorm.weight5
|
836 |
+
output2528� �2layers.27.mlp.gate_proj.weightA
|
837 |
+
output253��2(layers.28.self_attn.o_proj.o_proj.weightA
|
838 |
+
output254� �2*layers.28.self_attn.qkv_proj.v_proj.weight4
|
839 |
+
output255� 2 layers.28.input_layernorm.weightA
|
840 |
+
output256� @2*layers.28.self_attn.qkv_proj.k_proj.weightB
|
841 |
+
output257� @2*layers.28.self_attn.qkv_proj.q_proj.weight6
|
842 |
+
output258 ��2layers.28.mlp.down_proj.weight3
|
843 |
+
output2598� �2layers.28.mlp.up_proj.weight=
|
844 |
+
output260� 2)layers.28.post_attention_layernorm.weight5
|
845 |
+
output2618� �2layers.28.mlp.gate_proj.weightA
|
846 |
+
output262��2(layers.29.self_attn.o_proj.o_proj.weightA
|
847 |
+
output263� �2*layers.29.self_attn.qkv_proj.v_proj.weight4
|
848 |
+
output264� 2 layers.29.input_layernorm.weightA
|
849 |
+
output265� @2*layers.29.self_attn.qkv_proj.k_proj.weightB
|
850 |
+
output266� @2*layers.29.self_attn.qkv_proj.q_proj.weight6
|
851 |
+
output267 ��2layers.29.mlp.down_proj.weight3
|
852 |
+
output2688� �2layers.29.mlp.up_proj.weight=
|
853 |
+
output269� 2)layers.29.post_attention_layernorm.weight5
|
854 |
+
output2708� �2layers.29.mlp.gate_proj.weightA
|
855 |
+
output271��2(layers.30.self_attn.o_proj.o_proj.weightA
|
856 |
+
output272� �2*layers.30.self_attn.qkv_proj.v_proj.weight4
|
857 |
+
output273� 2 layers.30.input_layernorm.weightA
|
858 |
+
output274� @2*layers.30.self_attn.qkv_proj.k_proj.weightB
|
859 |
+
output275� @2*layers.30.self_attn.qkv_proj.q_proj.weight6
|
860 |
+
output276 ��2layers.30.mlp.down_proj.weight3
|
861 |
+
output2778� �2layers.30.mlp.up_proj.weight=
|
862 |
+
output278� 2)layers.30.post_attention_layernorm.weight5
|
863 |
+
output2798� �2layers.30.mlp.gate_proj.weightA
|
864 |
+
output280��2(layers.31.self_attn.o_proj.o_proj.weightA
|
865 |
+
output281� �2*layers.31.self_attn.qkv_proj.v_proj.weight4
|
866 |
+
output282� 2 layers.31.input_layernorm.weightA
|
867 |
+
output283� @2*layers.31.self_attn.qkv_proj.k_proj.weightB
|
868 |
+
output284� @2*layers.31.self_attn.qkv_proj.q_proj.weight6
|
869 |
+
output285 ��2layers.31.mlp.down_proj.weight3
|
870 |
+
output2868� �2layers.31.mlp.up_proj.weight=
|
871 |
+
output287� 2)layers.31.post_attention_layernorm.weight5
|
872 |
+
output2888� �2layers.31.mlp.gate_proj.weight&
|
873 |
+
output289�� �2lm_head.weight
|
874 |
+
output290� 2norm.weight
|
layout_opt/model/graph.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7a76fc5f3f76d1d69d57e0e784721bafd07e3a61734f6594e8c815123a8a771
|
3 |
+
size 176877
|
model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6fd0c1be612908d3c6ede5cdedda302359d5279c7d078eaadcb48d17389030
|
3 |
+
size 53720651
|
token_generation_model/_tp0_bk0/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f084665c9d486b682a226970bf7ab5170c50859a2f0cc8e46fee1811b6421349
|
3 |
+
size 5612544
|
token_generation_model/_tp0_bk0/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db597e62ef7dd0b4be31a941ce01bedf8ff4e3e418a571d927191d0fe1ac7749
|
3 |
+
size 823209
|
token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b572d17843a963e8042dbdafc202058bb0d10fd3f7ce91e3f20bc1db70324d7
|
3 |
+
size 802071
|
token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f084665c9d486b682a226970bf7ab5170c50859a2f0cc8e46fee1811b6421349
|
3 |
+
size 5612544
|
token_generation_model/_tp0_bk0/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f156b82b448a0ce64eea8895c40e4e50c9f548dea673deb2d7d240fc9df9fe8
|
3 |
+
size 5786483
|
token_generation_model/_tp0_bk1/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6be078d7f3a246715b61da99d6878d44882d85a11eaa1685534babf11cde59e9
|
3 |
+
size 5684224
|
token_generation_model/_tp0_bk1/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:615f3bac52a53bbac64e539d918447282af0cafb90b1e95367b365bbeead8e67
|
3 |
+
size 822474
|
token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb1806c4248848e0be8635c95d728fa881fc2015e31447ec893a0beeb8b9509d
|
3 |
+
size 889786
|
token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6be078d7f3a246715b61da99d6878d44882d85a11eaa1685534babf11cde59e9
|
3 |
+
size 5684224
|
token_generation_model/_tp0_bk2/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29908d241bdaf407bcb11c682477667cf52ec74ad12cdf8d715dc83bd83a5cbe
|
3 |
+
size 5766144
|
token_generation_model/_tp0_bk2/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b42bbad04ce54008f13f9aa4f7bc225fbf166e7956990acd2074f725ca8e6c9b
|
3 |
+
size 822474
|
token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6c389f0365aeb0a927dc422d904d7ad06f1694d48b6db93e81f9c03a07b7cfc
|
3 |
+
size 889786
|
token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29908d241bdaf407bcb11c682477667cf52ec74ad12cdf8d715dc83bd83a5cbe
|
3 |
+
size 5766144
|
token_generation_model/_tp0_bk3/graph.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c34bc7faf28217f485f87fb2c1965b9511b78ff793098e760345b19c84e6079
|
3 |
+
size 5970944
|
token_generation_model/_tp0_bk3/metaneff.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22dccc9c15899011ced61b37b01373a7c26af06476601e88b3b3130f496e557b
|
3 |
+
size 822474
|