dimabr commited on
Commit
345c745
·
verified ·
1 Parent(s): e8d3353

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +24 -0
  2. context_encoding_model/_tp0_bk0/graph.neff +3 -0
  3. context_encoding_model/_tp0_bk0/metaneff.pb +3 -0
  4. context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.hlo_module.pb +3 -0
  5. context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff +3 -0
  6. context_encoding_model/_tp0_bk1/graph.neff +3 -0
  7. context_encoding_model/_tp0_bk1/metaneff.pb +3 -0
  8. context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.hlo_module.pb +3 -0
  9. context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff +3 -0
  10. context_encoding_model/_tp0_bk2/graph.neff +3 -0
  11. context_encoding_model/_tp0_bk2/metaneff.pb +3 -0
  12. context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.hlo_module.pb +3 -0
  13. context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff +3 -0
  14. context_encoding_model/_tp0_bk3/compile_flags.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.json +1 -0
  15. context_encoding_model/_tp0_bk3/global_metric_store.json +1079 -0
  16. context_encoding_model/_tp0_bk3/graph.neff +3 -0
  17. context_encoding_model/_tp0_bk3/metaneff.pb +3 -0
  18. context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.hlo_module.pb +3 -0
  19. context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff +3 -0
  20. context_encoding_model/_tp0_bk3/neuron_config.json +213 -0
  21. context_encoding_model/_tp0_bk4/command.txt +1 -0
  22. context_encoding_model/_tp0_bk4/compile_flags.MODULE_d342327da795afc2aa68+5e8b788a.json +1 -0
  23. context_encoding_model/_tp0_bk4/global_metric_store.json +1079 -0
  24. context_encoding_model/_tp0_bk4/graph.neff +3 -0
  25. context_encoding_model/_tp0_bk4/log-neuron-cc.txt +0 -0
  26. context_encoding_model/_tp0_bk4/metaneff.pb +3 -0
  27. context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb +3 -0
  28. context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff +3 -0
  29. context_encoding_model/_tp0_bk4/neuron_config.json +213 -0
  30. layout_opt/command.txt +1 -0
  31. layout_opt/graph.neff +3 -0
  32. layout_opt/log-neuron-cc.txt +0 -0
  33. layout_opt/metaneff +874 -0
  34. layout_opt/model/graph.hlo +3 -0
  35. model.pt +3 -0
  36. token_generation_model/_tp0_bk0/graph.neff +3 -0
  37. token_generation_model/_tp0_bk0/metaneff.pb +3 -0
  38. token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.hlo_module.pb +3 -0
  39. token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff +3 -0
  40. token_generation_model/_tp0_bk0/wrapped_neff.hlo +3 -0
  41. token_generation_model/_tp0_bk1/graph.neff +3 -0
  42. token_generation_model/_tp0_bk1/metaneff.pb +3 -0
  43. token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.hlo_module.pb +3 -0
  44. token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff +3 -0
  45. token_generation_model/_tp0_bk2/graph.neff +3 -0
  46. token_generation_model/_tp0_bk2/metaneff.pb +3 -0
  47. token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.hlo_module.pb +3 -0
  48. token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff +3 -0
  49. token_generation_model/_tp0_bk3/graph.neff +3 -0
  50. token_generation_model/_tp0_bk3/metaneff.pb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.model.v3 filter=lfs diff=lfs merge=lfs -text
37
+ token_generation_model/_tp0_bk3/graph.neff filter=lfs diff=lfs merge=lfs -text
38
+ token_generation_model/_tp0_bk3/model.MODULE_668122c92a86c0ce6817+f94fe8ed.neff filter=lfs diff=lfs merge=lfs -text
39
+ token_generation_model/_tp0_bk4/model.MODULE_fb6decaa94b1936d08da+1b5847e3.neff filter=lfs diff=lfs merge=lfs -text
40
+ token_generation_model/_tp0_bk4/graph.neff filter=lfs diff=lfs merge=lfs -text
41
+ token_generation_model/_tp0_bk0/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
42
+ token_generation_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
43
+ token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff filter=lfs diff=lfs merge=lfs -text
44
+ token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff filter=lfs diff=lfs merge=lfs -text
45
+ token_generation_model/_tp0_bk2/graph.neff filter=lfs diff=lfs merge=lfs -text
46
+ token_generation_model/_tp0_bk1/graph.neff filter=lfs diff=lfs merge=lfs -text
47
+ token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff filter=lfs diff=lfs merge=lfs -text
48
+ context_encoding_model/_tp0_bk1/graph.neff filter=lfs diff=lfs merge=lfs -text
49
+ context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff filter=lfs diff=lfs merge=lfs -text
50
+ context_encoding_model/_tp0_bk2/graph.neff filter=lfs diff=lfs merge=lfs -text
51
+ context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff filter=lfs diff=lfs merge=lfs -text
52
+ context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff filter=lfs diff=lfs merge=lfs -text
53
+ context_encoding_model/_tp0_bk0/graph.neff filter=lfs diff=lfs merge=lfs -text
54
+ context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff filter=lfs diff=lfs merge=lfs -text
55
+ context_encoding_model/_tp0_bk3/graph.neff filter=lfs diff=lfs merge=lfs -text
56
+ context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff filter=lfs diff=lfs merge=lfs -text
57
+ context_encoding_model/_tp0_bk4/graph.neff filter=lfs diff=lfs merge=lfs -text
58
+ layout_opt/graph.neff filter=lfs diff=lfs merge=lfs -text
59
+ layout_opt/model/graph.hlo filter=lfs diff=lfs merge=lfs -text
context_encoding_model/_tp0_bk0/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4204eb4a15cdd349ac9a8e820ca7e3720613827e792ac79e7a5dd1055080e37
3
+ size 625664
context_encoding_model/_tp0_bk0/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026981247cc92ae3d4098052e6e5cd96444bcad2ad94540d0cedbaf5978e6a67
3
+ size 873633
context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83c27a0c2c3a9734291ce7f47544f4494b27f1c8a6c5b171a2abaead1f7e45c
3
+ size 939543
context_encoding_model/_tp0_bk0/model.MODULE_c6824be80aab0b095843+cc19d8a1.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4204eb4a15cdd349ac9a8e820ca7e3720613827e792ac79e7a5dd1055080e37
3
+ size 625664
context_encoding_model/_tp0_bk1/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97771a78aebed34c313542b68a55aa0b7ad1bcc196ef7859e9c6d32f2aca5755
3
+ size 728064
context_encoding_model/_tp0_bk1/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7919366f46c6b8e36ccac5f786a1a8c01287cf244d988f1b58a68be4356face6
3
+ size 971205
context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925d5603c197f320b5a97bcd1bb270fee71d58a600ff2ac6d2ac1c4ce205b7b6
3
+ size 1037079
context_encoding_model/_tp0_bk1/model.MODULE_68c159ab1fef44a40212+6a9a7e72.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97771a78aebed34c313542b68a55aa0b7ad1bcc196ef7859e9c6d32f2aca5755
3
+ size 728064
context_encoding_model/_tp0_bk2/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:984d840f5e118d1ed3bba502a877aa785002b074a22a45b384cd2172958beb3f
3
+ size 1035264
context_encoding_model/_tp0_bk2/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0cb712ca363e2b1bd7dcf4027263b606df90abb05f672ae1e8fb4af5f2b3616
3
+ size 1167813
context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf8c3a993bb11cf1a28c6df55c6f130c69c4908ba4dde20d7bbc2356b5f2f53
3
+ size 1233687
context_encoding_model/_tp0_bk2/model.MODULE_78e5291800ea5b96a03b+442879bd.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:984d840f5e118d1ed3bba502a877aa785002b074a22a45b384cd2172958beb3f
3
+ size 1035264
context_encoding_model/_tp0_bk3/compile_flags.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/models/mistral-7b-v0.3-instruct-neuronx/context_encoding_model/_tp0_bk3/log-neuron-cc.txt"]
context_encoding_model/_tp0_bk3/global_metric_store.json ADDED
@@ -0,0 +1,1079 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Average": {
3
+ "tensorizer": {
4
+ "StaticProfiler::AverageFractalPeUtilization": 99.79875946044922,
5
+ "StaticProfiler::AveragePartitionUtilization": 99.50694274902344,
6
+ "StaticProfiler::AveragePeUtilization": 99.19517517089844,
7
+ "StaticProfiler::LocalizationEfficiency": 80.37861633300781,
8
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 88.63314819335938,
9
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
10
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
11
+ }
12
+ },
13
+ "Count": {
14
+ "tensorizer": {
15
+ "StaticProfiler::AverageFractalPeUtilization": 1.0,
16
+ "StaticProfiler::AveragePartitionUtilization": 1.0,
17
+ "StaticProfiler::AveragePeUtilization": 1.0,
18
+ "StaticProfiler::LocalizationEfficiency": 1.0,
19
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
20
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
21
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
22
+ }
23
+ },
24
+ "Sum": {
25
+ "compiletime": {
26
+ "AGOrderingAnalysisPass": 0.01833963394165039,
27
+ "AffinePredicateResolution": 0.0011298656463623047,
28
+ "AliasDependencyElimination": 0.0003044605255126953,
29
+ "AliasDependencyInduction": 0.0059871673583984375,
30
+ "AliasDependencyReset": 0.024695634841918945,
31
+ "BFComputeCutting": 0.0022745132446289063,
32
+ "BirCodeGenLoop": 0.12499594688415527,
33
+ "CCOpFusion": 0.025257110595703125,
34
+ "CanonicalizeConv": 2.300000051036477e-05,
35
+ "CanonicalizeDAGForPGTiling": 0.0039975643157958984,
36
+ "CanonicalizeForTensorizer": 5.6000000768108293e-05,
37
+ "CanonicalizeIR": 0.0017023086547851563,
38
+ "Canonicalizer": 0.0013979999348521233,
39
+ "CoalesceCCOp": 0.006863117218017578,
40
+ "CommuteConcat": 0.0009205341339111328,
41
+ "DMALocalityOpt": 0.0034034252166748047,
42
+ "DMAProfiler": 0.007045269012451172,
43
+ "DMATilingProfiler": 0.004607439041137695,
44
+ "DataLocalityOpt": 0.15957880020141602,
45
+ "DataStreaming": 0.037320613861083984,
46
+ "DeConcat": 0.0007259845733642578,
47
+ "DeadCodeElimination": 0.0009546279907226563,
48
+ "DeadStoreElimination": 0.006250619888305664,
49
+ "DelinearIndices": 0.005332231521606445,
50
+ "Delinearization": 0.0033500194549560547,
51
+ "DoNothing": 0.00037598609924316406,
52
+ "DramToDramTranspose": 0.020763397216796875,
53
+ "DumpGraphAndMetadata": 0.025223493576049805,
54
+ "EliminateDivs": 0.0023469924926757813,
55
+ "ExpandBatchNorm": 0.001692056655883789,
56
+ "ExpandISAMacro": 0.009050607681274414,
57
+ "FactorizeBlkDims": 0.009798526763916016,
58
+ "FactorizeThreadAxesInFreeDims": 0.002184152603149414,
59
+ "FlattenMacroLoop": 0.0022482872009277344,
60
+ "GenericAccessSimplifier": 0.0009622573852539063,
61
+ "HoistCompute": 6.000000212225132e-06,
62
+ "IdentifyCrossPassTensors": 5.999999848427251e-05,
63
+ "InferInitValue": 0.027300357818603516,
64
+ "InferIntrinsicOnCC": 0.009199380874633789,
65
+ "InferNeuronTensor": 0.028067350387573242,
66
+ "InferNonlocalTensors": 0.014671802520751953,
67
+ "InferPSumTensor": 0.08141279220581055,
68
+ "InlineNativeKernels": 0.002727031707763672,
69
+ "InsertIOTransposes": 0.017727136611938477,
70
+ "InsertLocalTransposes": 0.004176616668701172,
71
+ "InsertOffloadedTransposes": 0.002771615982055664,
72
+ "LICM": 0.005248069763183594,
73
+ "LateLegalizeInst": 0.007282733917236328,
74
+ "LateLegalizePostSplit": 0.0045223236083984375,
75
+ "LateLowerReshapeOp": 0.0012927055358886719,
76
+ "LateLowerTensorOp": 0.0014028549194335938,
77
+ "LateNeuronInstComb": 0.016957759857177734,
78
+ "LayoutPreprocessing": 0.026221275329589844,
79
+ "LayoutPreprocessingAndAnalysis": 0.07468867301940918,
80
+ "LayoutRequirementAnalysis": 0.004823446273803711,
81
+ "LegalizeCCOpLayout": 0.0023353099822998047,
82
+ "LegalizeOpLevelAlias": 0.0013494491577148438,
83
+ "LegalizePartitionReduce": 0.0018906593322753906,
84
+ "LegalizeSundaAccess": 0.06240987777709961,
85
+ "LegalizeSundaMacro": 0.04256129264831543,
86
+ "LegalizeType": 0.006028175354003906,
87
+ "LocalLayoutOpt": 0.016018390655517578,
88
+ "LoopFusion": 0.005109071731567383,
89
+ "LoopSplitting": 0.00048542022705078125,
90
+ "LowerBroadcast": 0.003258943557739258,
91
+ "LowerCCOpBlockAxis": 0.0038700103759765625,
92
+ "LowerComplexBroadcast": 0.004511594772338867,
93
+ "LowerIntrinsics": 0.32482099533081055,
94
+ "LowerTensorOp": 0.010710477828979492,
95
+ "LowerTranspose": 0.054924726486206055,
96
+ "MacroGeneration": 0.061620473861694336,
97
+ "MaskPropagation": 0.002919435501098633,
98
+ "MemcastMotion": 3.400000059627928e-05,
99
+ "MemcpyElimination": 0.02559375762939453,
100
+ "MutateDataType": 0.0014896392822265625,
101
+ "NeuronAliasDependencyInduction": 0.0002808570861816406,
102
+ "NeuronAliasDependencyReset": 0.05649685859680176,
103
+ "NeuronInstComb": 0.005097627639770508,
104
+ "NeuronLICM": 0.014602899551391602,
105
+ "NeuronLoopFusion": 0.009732246398925781,
106
+ "NeuronLoopInterchange": 0.0025072097778320313,
107
+ "NeuronSimplifier": 0.03835606575012207,
108
+ "NeuronSimplifyPredicates": 0.009032487869262695,
109
+ "NeuronValueNumbering": 0.003210306167602539,
110
+ "OptimizeAliasedCopyChain": 0.0007545948028564453,
111
+ "OptimizeNKIKernels": 0.6443507671356201,
112
+ "PAGLayoutOpt": 0.20021605491638184,
113
+ "PComputeCutting": 0.0046160221099853516,
114
+ "PGLayoutTilingPipeline": 0.6925618648529053,
115
+ "PGTiling": 0.21065187454223633,
116
+ "PadElimination": 0.00038623809814453125,
117
+ "ParAxesAnnotation": 0.052834510803222656,
118
+ "PartialLoopFusion": 0.051622629165649414,
119
+ "PartialSimdFusion": 0.014065980911254883,
120
+ "PenguinizeFunctions": 5.199999941396527e-05,
121
+ "PerfectLoopNest": 0.0019462108612060547,
122
+ "PruneFunctions": 4.3000000005122274e-05,
123
+ "RecognizeOpIdiom": 0.0037450790405273438,
124
+ "Recompute": 0.0004031658172607422,
125
+ "RelaxPredicates": 0.03561973571777344,
126
+ "Rematerialization": 0.0018870830535888672,
127
+ "RemoveOptimizationBarriers": 6.500000017695129e-05,
128
+ "ReshapeWeights": 0.0009450912475585938,
129
+ "ResolveAccessConflict": 0.0038840770721435547,
130
+ "ResolveComplicatePredicates": 0.0011222362518310547,
131
+ "RewriteReplicationMatmul": 0.0017135143280029297,
132
+ "RewriteWeights": 0.0024623870849609375,
133
+ "SFKVectorizer": 0.19468188285827637,
134
+ "ScatterMotion": 1.0000000656873453e-05,
135
+ "SimpleAllReduceTiling": 0.0037994384765625,
136
+ "Simplifier": 0.0030031204223632813,
137
+ "SimplifyMacroPredicates": 0.005193233489990234,
138
+ "SimplifyNeuronTensor": 0.38555216789245605,
139
+ "SimplifySlice": 0.001062631607055664,
140
+ "SimplifyTensor": 0.009534358978271484,
141
+ "SpillPSum": 0.053937673568725586,
142
+ "SplitAPUnionSets": 0.013537406921386719,
143
+ "SplitAccGrp": 0.0014171600341796875,
144
+ "StaticProfiler": 0.005720615386962891,
145
+ "StaticTransposeLocalTensor": 0.003614664077758789,
146
+ "SundaISel": 0.09031486511230469,
147
+ "TCTransform": 0.0008947849273681641,
148
+ "TensorInitialization": 0.010958433151245117,
149
+ "TensorOpSimplifier": 0.005278110504150391,
150
+ "TensorOpTransform": 0.020787477493286133,
151
+ "TensorizerLegalizationPass": 6.299999949987978e-05,
152
+ "TileCCOps": 0.005544900894165039,
153
+ "TilingProfiler": 0.007747173309326172,
154
+ "TransformConvOp": 0.003238677978515625,
155
+ "TritiumFusion": 0.16130614280700684,
156
+ "ValueNumbering": 0.0018999576568603516,
157
+ "VectorizeDMA": 0.0017979145050048828,
158
+ "VectorizeMatMult": 0.007079362869262695,
159
+ "VerifySupportedOps": 4.900000203633681e-05,
160
+ "WeightCoalescing": 0.0033416748046875,
161
+ "ZeroSizeTensorElimination": 0.00022983551025390625,
162
+ "algsimp": 0.0024079999420791864,
163
+ "batchnorm_expander": 4.999999873689376e-05,
164
+ "boundary-marker-removal": 1.3999999282532372e-05,
165
+ "call-inliner": 0.0004330000083427876,
166
+ "canonicalize-boundary-marker": 1.6999998479150236e-05,
167
+ "collective-stream-id-checker": 9.699999645818025e-05,
168
+ "comparison-expander": 0.0005000000237487257,
169
+ "computation-deduplicator": 7.700000423938036e-05,
170
+ "conditional-to-select": 2.099999983329326e-05,
171
+ "config-lowering": 0.00019799999427050352,
172
+ "constant-statistics": 0.0005200000014156103,
173
+ "constant_folding": 0.000295000005280599,
174
+ "cse": 5.499999679159373e-05,
175
+ "dce": 8.099999831756577e-05,
176
+ "dot_decomposer": 0.0013620000099763274,
177
+ "dynamic-slice-transpose": 1.5999999959603883e-05,
178
+ "eliminate-redundant-compare": 0.00025499999173916876,
179
+ "emit-offloaded-dropout": 7.100000220816582e-05,
180
+ "flatten-call-graph": 0.0007510000141337514,
181
+ "fuse-send-recv": 8.70000003487803e-05,
182
+ "hilo::LegalizeAlias": 1.5999999959603883e-05,
183
+ "hilo::NeuronInstCombine": 0.00015199999324977398,
184
+ "hilo::NeuronOpFusion": 5.0000002374872565e-05,
185
+ "hilo::ReplaceTokenTypeWithU8Pass": 6.70000008540228e-05,
186
+ "hilo::ScheduleFusion": 3.999999989900971e-06,
187
+ "hilo::SixtyFourHack": 7.599999662488699e-05,
188
+ "hilo::VerifyAliasing": 8.999999408842996e-06,
189
+ "hlo-mac-count": 0.0012550000101327896,
190
+ "hlo-verifier": 0.008069000206887722,
191
+ "instruction-histogram": 0.001006999984383583,
192
+ "io-con-pipe-begin": 7.999999979801942e-06,
193
+ "io-con-pipe-end": 9.999999974752427e-07,
194
+ "io-layout-normalization": 0.001221999991685152,
195
+ "io-statistics": 9.200000204145908e-05,
196
+ "legalize-ccops": 3.999999989900971e-06,
197
+ "legalize-compare": 1.4999999621068127e-05,
198
+ "lower-argminmax-custom-call": 1.300000076298602e-05,
199
+ "map-inline": 0.0007819999591447413,
200
+ "metadata-naming": 6.800000119255856e-05,
201
+ "mlir::detail::OpToOpPassAdaptor": 0.00011300000187475234,
202
+ "mlir::hlo::MhloToPyPenguin": 0.07539799809455872,
203
+ "mlir::mhlo::LowerComplexExtraPass": 0.00035600000410340726,
204
+ "mlir::mhlo::LowerComplexPass": 0.0005510000046342611,
205
+ "native-to-custom-softmax": 0.0006350000621750951,
206
+ "native-to-custom-softmax-dx": 0.0006360000115819275,
207
+ "operand_upcaster": 6.799999391660094e-05,
208
+ "opt-barrier-removal": 0.0004710000066552311,
209
+ "post-par-pipe-begin": 1.4000000192027073e-05,
210
+ "post-par-pipe-end": 0.0,
211
+ "post-partition-simplification": 0.0020860000513494015,
212
+ "pre-par-pipe-begin": 9.999999974752427e-07,
213
+ "pre-par-pipe-end": 0.0,
214
+ "pre-partition-simplification": 0.21597300469875336,
215
+ "replace-minimum-constant": 0.00034199998481199145,
216
+ "reshape-mover": 0.00011600000289035961,
217
+ "simplify-concat": 0.00017500000831205398,
218
+ "simplify-while-loops": 0.00010400000610388815,
219
+ "transform-variadic-reduce": 8.299999899463728e-05,
220
+ "tuple-simplifier": 0.00028500001644715667,
221
+ "unpack-nested-aws-ntwsr": 0.0003440000000409782,
222
+ "unroll-while-loop": 1.900000097521115e-05,
223
+ "zero_sized_hlo_elimination": 0.0008210000232793391
224
+ },
225
+ "hilo": {
226
+ "ConstantSize": 2106325.0,
227
+ "HloInputCount": 359.0,
228
+ "HloMacCount": 231995342848.0,
229
+ "HloOutputCount": 65.0,
230
+ "IfmapSize": 7785168896.0,
231
+ "OfmapSize": 536870912.0,
232
+ "OutputsReadFromCount": 0.0,
233
+ "PassthroughTensorsCount": 0.0,
234
+ "RedundantOutputCount": 0.0,
235
+ "Traffic": 854718848.0
236
+ },
237
+ "tensorizer": {
238
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 17056.0,
239
+ "StaticProfiler::AifUb": 568.2581176757813,
240
+ "StaticProfiler::ArithmeticIntensityTensorizer": 456.7580261230469,
241
+ "StaticProfiler::AverageDmaLength": 1314.3221435546875,
242
+ "StaticProfiler::DDRTransferBytes": 407087136.0,
243
+ "StaticProfiler::InternalTransferBytes": 48342036.0,
244
+ "StaticProfiler::LoadExpanded": 310291.0,
245
+ "StaticProfiler::StoreExpanded": 6699.0,
246
+ "StaticProfiler::TotalDMAExpanded": 316990.0,
247
+ "StaticProfiler::TotalDynamicInstancesCount": 19674.0,
248
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19578.0,
249
+ "StaticProfiler::TotalLNCComm": 0.0,
250
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
251
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
252
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
253
+ "TilingProfiler::GenericInstructionsAfterTiling": 4.0,
254
+ "TilingProfiler::MatMultInstructionsAfterTiling": 14848.0,
255
+ "TilingProfiler::NumPfTransposes": 4.0,
256
+ "TilingProfiler::NumPfTransposesForIo": 0.0,
257
+ "TilingProfiler::NumPfTransposesForLocal": 1.0,
258
+ "TilingProfiler::NumPfTransposesForNonlocal": 3.0,
259
+ "TilingProfiler::PfTransposeInstructions": 769.0,
260
+ "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
261
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
262
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
263
+ "TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
264
+ "TilingProfiler::SimdInstructionsAfterTiling": 319.0,
265
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
266
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
267
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
268
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
269
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
270
+ "TransformConvOp::conv2d_column_packing": 0.0,
271
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
272
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
273
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
274
+ }
275
+ },
276
+ "all": {
277
+ "compiletime": {
278
+ "algsimp": 0.0021410000044852495,
279
+ "call-inliner": 0.00039599998854100704,
280
+ "collective-stream-id-checker": 7.79999973019585e-05,
281
+ "comparison-expander": 0.00048099999548867345,
282
+ "constant-statistics": 0.0005200000014156103,
283
+ "constant_folding": 0.0002629999944474548,
284
+ "dce": 7.699999696342275e-05,
285
+ "dot_decomposer": 0.0013620000099763274,
286
+ "eliminate-redundant-compare": 0.00024199999461416155,
287
+ "flatten-call-graph": 0.0007140000234358013,
288
+ "hlo-mac-count": 0.0009169999975711107,
289
+ "hlo-verifier": 0.007406999822705984,
290
+ "instruction-histogram": 0.001006999984383583,
291
+ "io-con-pipe-begin": 7.999999979801942e-06,
292
+ "io-con-pipe-end": 9.999999974752427e-07,
293
+ "io-layout-normalization": 0.001221999991685152,
294
+ "io-statistics": 9.200000204145908e-05,
295
+ "map-inline": 0.0007389999809674919,
296
+ "native-to-custom-softmax": 0.0005990000208839774,
297
+ "native-to-custom-softmax-dx": 0.0004440000047907233,
298
+ "opt-barrier-removal": 0.0004710000066552311,
299
+ "pre-par-pipe-begin": 9.999999974752427e-07,
300
+ "pre-par-pipe-end": 0.0,
301
+ "pre-partition-simplification": 0.21597300469875336,
302
+ "replace-minimum-constant": 0.00030899999546818435,
303
+ "reshape-mover": 0.00010299999848939478,
304
+ "simplify-while-loops": 9.40000027185306e-05,
305
+ "tuple-simplifier": 0.00026699999580159783,
306
+ "unpack-nested-aws-ntwsr": 0.0003319999959785491,
307
+ "unroll-while-loop": 1.8000000636675395e-05,
308
+ "zero_sized_hlo_elimination": 0.0008210000232793391
309
+ }
310
+ },
311
+ "cumsum": {
312
+ "compiletime": {
313
+ "CoalesceCCOp": 0.000293731689453125,
314
+ "DMALocalityOpt": 0.00022101402282714844,
315
+ "DMAProfiler": 0.0010464191436767578,
316
+ "DataStreaming": 0.00040221214294433594,
317
+ "DoNothing": 0.00025200843811035156,
318
+ "ExpandISAMacro": 0.0005903244018554688,
319
+ "FactorizeBlkDims": 0.0005807876586914063,
320
+ "InferPSumTensor": 0.0005562305450439453,
321
+ "LateLegalizeInst": 0.00046944618225097656,
322
+ "LateNeuronInstComb": 0.0006792545318603516,
323
+ "LegalizeSundaAccess": 0.0017774105072021484,
324
+ "LegalizeType": 0.00032138824462890625,
325
+ "LowerBroadcast": 0.0003333091735839844,
326
+ "LowerIntrinsics": 0.0002849102020263672,
327
+ "LowerTranspose": 0.00046753883361816406,
328
+ "NeuronInstComb": 0.0008723735809326172,
329
+ "NeuronLICM": 0.00047659873962402344,
330
+ "NeuronSimplifyPredicates": 0.0030825138092041016,
331
+ "NeuronValueNumbering": 0.0004870891571044922,
332
+ "SFKVectorizer": 0.003458738327026367,
333
+ "SimpleAllReduceTiling": 0.0002646446228027344,
334
+ "SimplifyNeuronTensor": 0.0004863739013671875,
335
+ "SpillPSum": 0.0005884170532226563,
336
+ "WeightCoalescing": 0.00028324127197265625
337
+ }
338
+ },
339
+ "sg00": {
340
+ "compiletime": {
341
+ "CanonicalizeConv": 2.300000051036477e-05,
342
+ "CanonicalizeForTensorizer": 2.2000000171829015e-05,
343
+ "Canonicalizer": 0.0005029999883845448,
344
+ "HoistCompute": 9.999999974752427e-07,
345
+ "IdentifyCrossPassTensors": 2.2000000171829015e-05,
346
+ "MemcastMotion": 7.999999979801942e-06,
347
+ "PenguinizeFunctions": 2.2000000171829015e-05,
348
+ "PruneFunctions": 1.5999999959603883e-05,
349
+ "RemoveOptimizationBarriers": 2.4000000848900527e-05,
350
+ "ScatterMotion": 1.9999999949504854e-06,
351
+ "TensorizerLegalizationPass": 3.400000059627928e-05,
352
+ "VerifySupportedOps": 1.5999999959603883e-05,
353
+ "algsimp": 6.900000153109431e-05,
354
+ "batchnorm_expander": 1.2999999853491317e-05,
355
+ "boundary-marker-removal": 3.000000106112566e-06,
356
+ "call-inliner": 7.999999979801942e-06,
357
+ "canonicalize-boundary-marker": 3.999999989900971e-06,
358
+ "collective-stream-id-checker": 3.000000106112566e-06,
359
+ "comparison-expander": 3.999999989900971e-06,
360
+ "computation-deduplicator": 1.5999999959603883e-05,
361
+ "conditional-to-select": 4.999999873689376e-06,
362
+ "config-lowering": 5.8000001445179805e-05,
363
+ "constant_folding": 7.999999979801942e-06,
364
+ "cse": 1.4999999621068127e-05,
365
+ "dce": 9.999999974752427e-07,
366
+ "dynamic-slice-transpose": 3.999999989900971e-06,
367
+ "eliminate-redundant-compare": 3.000000106112566e-06,
368
+ "emit-offloaded-dropout": 2.099999983329326e-05,
369
+ "flatten-call-graph": 7.999999979801942e-06,
370
+ "fuse-send-recv": 2.499999936844688e-05,
371
+ "hilo::LegalizeAlias": 7.000000096013537e-06,
372
+ "hilo::NeuronInstCombine": 5.400000009103678e-05,
373
+ "hilo::NeuronOpFusion": 6.000000212225132e-06,
374
+ "hilo::ReplaceTokenTypeWithU8Pass": 2.4000000848900527e-05,
375
+ "hilo::ScheduleFusion": 0.0,
376
+ "hilo::SixtyFourHack": 1.4999999621068127e-05,
377
+ "hilo::VerifyAliasing": 3.999999989900971e-06,
378
+ "hlo-mac-count": 3.7999998312443495e-05,
379
+ "hlo-verifier": 0.00017100000695791095,
380
+ "legalize-ccops": 9.999999974752427e-07,
381
+ "legalize-compare": 3.999999989900971e-06,
382
+ "lower-argminmax-custom-call": 3.000000106112566e-06,
383
+ "map-inline": 1.1000000085914508e-05,
384
+ "metadata-naming": 1.700000029813964e-05,
385
+ "mlir::detail::OpToOpPassAdaptor": 3.7000001611886546e-05,
386
+ "mlir::hlo::MhloToPyPenguin": 0.033358000218868256,
387
+ "mlir::mhlo::LowerComplexExtraPass": 0.00013499999477062374,
388
+ "mlir::mhlo::LowerComplexPass": 0.00014200000441633165,
389
+ "native-to-custom-softmax": 7.999999979801942e-06,
390
+ "native-to-custom-softmax-dx": 9.999999747378752e-05,
391
+ "operand_upcaster": 1.8999999156221747e-05,
392
+ "post-par-pipe-begin": 1.9999999949504854e-06,
393
+ "post-par-pipe-end": 0.0,
394
+ "post-partition-simplification": 0.0005610000225715339,
395
+ "replace-minimum-constant": 9.000000318337698e-06,
396
+ "reshape-mover": 3.999999989900971e-06,
397
+ "simplify-concat": 5.199999941396527e-05,
398
+ "simplify-while-loops": 1.9999999949504854e-06,
399
+ "transform-variadic-reduce": 9.000000318337698e-06,
400
+ "tuple-simplifier": 3.999999989900971e-06,
401
+ "unpack-nested-aws-ntwsr": 3.000000106112566e-06,
402
+ "unroll-while-loop": 0.0
403
+ },
404
+ "hilo": {
405
+ "ArithmeticIntensity": 221.8579559326172,
406
+ "ConstantSize": 2106325.0,
407
+ "HloInputCount": 359.0,
408
+ "HloMacCount": 25769803776.0,
409
+ "HloOutputCount": 65.0,
410
+ "IfmapSize": 7785168896.0,
411
+ "OfmapSize": 536870912.0,
412
+ "OutputsReadFromCount": 0.0,
413
+ "PassthroughTensorsCount": 0.0,
414
+ "RedundantOutputCount": 0.0,
415
+ "Traffic": 232309024.0
416
+ }
417
+ },
418
+ "sg0000": {
419
+ "compiletime": {
420
+ "AGOrderingAnalysisPass": 0.07846212387084961,
421
+ "AffinePredicateResolution": 0.0015842914581298828,
422
+ "AliasDependencyElimination": 0.0002803802490234375,
423
+ "AliasDependencyInduction": 0.03549337387084961,
424
+ "AliasDependencyReset": 0.06158638000488281,
425
+ "BFComputeCutting": 0.003358125686645508,
426
+ "BirCodeGenLoop": 0.06645083427429199,
427
+ "CCOpFusion": 0.03297877311706543,
428
+ "CanonicalizeDAGForPGTiling": 0.0029740333557128906,
429
+ "CanonicalizeIR": 0.0038878917694091797,
430
+ "CoalesceCCOp": 0.0058116912841796875,
431
+ "CommuteConcat": 0.0010180473327636719,
432
+ "DMALocalityOpt": 0.0015497207641601563,
433
+ "DMAProfiler": 0.005065441131591797,
434
+ "DMATilingProfiler": 0.004613637924194336,
435
+ "DataLocalityOpt": 0.16799569129943848,
436
+ "DataStreaming": 0.00627899169921875,
437
+ "DeConcat": 0.0015079975128173828,
438
+ "DeadCodeElimination": 0.0011029243469238281,
439
+ "DeadStoreElimination": 0.06819939613342285,
440
+ "DelinearIndices": 0.0475771427154541,
441
+ "Delinearization": 0.003088235855102539,
442
+ "DoNothing": 0.0001838207244873047,
443
+ "DramToDramTranspose": 0.08775472640991211,
444
+ "DumpGraphAndMetadata": 0.013874053955078125,
445
+ "EliminateDivs": 0.006442070007324219,
446
+ "ExpandBatchNorm": 0.00305938720703125,
447
+ "ExpandISAMacro": 0.00470423698425293,
448
+ "FactorizeBlkDims": 0.026311397552490234,
449
+ "FactorizeThreadAxesInFreeDims": 0.0019838809967041016,
450
+ "FlattenMacroLoop": 0.004168987274169922,
451
+ "GenericAccessSimplifier": 0.0016493797302246094,
452
+ "InferInitValue": 0.05328845977783203,
453
+ "InferIntrinsicOnCC": 0.009886503219604492,
454
+ "InferNeuronTensor": 0.08689069747924805,
455
+ "InferNonlocalTensors": 0.2075808048248291,
456
+ "InferPSumTensor": 0.12219834327697754,
457
+ "InlineNativeKernels": 0.002942323684692383,
458
+ "InsertIOTransposes": 0.019949674606323242,
459
+ "InsertLocalTransposes": 0.0066678524017333984,
460
+ "InsertOffloadedTransposes": 0.005246877670288086,
461
+ "LICM": 0.002876758575439453,
462
+ "LateLegalizeInst": 0.009313821792602539,
463
+ "LateLegalizePostSplit": 0.0034275054931640625,
464
+ "LateLowerReshapeOp": 0.001237630844116211,
465
+ "LateLowerTensorOp": 0.036368370056152344,
466
+ "LateNeuronInstComb": 0.019298315048217773,
467
+ "LayoutPreprocessing": 0.0656280517578125,
468
+ "LayoutPreprocessingAndAnalysis": 0.0845177173614502,
469
+ "LayoutRequirementAnalysis": 0.006539821624755859,
470
+ "LegalizeCCOpLayout": 0.002690553665161133,
471
+ "LegalizeOpLevelAlias": 0.002089977264404297,
472
+ "LegalizePartitionReduce": 0.0019116401672363281,
473
+ "LegalizeSundaAccess": 0.04238390922546387,
474
+ "LegalizeSundaMacro": 0.008917093276977539,
475
+ "LegalizeType": 0.00662541389465332,
476
+ "LocalLayoutOpt": 0.017171859741210938,
477
+ "LoopFusion": 0.04693031311035156,
478
+ "LoopSplitting": 0.0004513263702392578,
479
+ "LowerBroadcast": 0.0021796226501464844,
480
+ "LowerCCOpBlockAxis": 0.005298614501953125,
481
+ "LowerComplexBroadcast": 0.002663135528564453,
482
+ "LowerIntrinsics": 0.08481836318969727,
483
+ "LowerTensorOp": 0.05078911781311035,
484
+ "LowerTranspose": 0.052706241607666016,
485
+ "MacroGeneration": 0.16595196723937988,
486
+ "MaskPropagation": 0.00496983528137207,
487
+ "MemcpyElimination": 0.27239394187927246,
488
+ "MutateDataType": 0.0022711753845214844,
489
+ "NeuronAliasDependencyInduction": 0.00037479400634765625,
490
+ "NeuronAliasDependencyReset": 0.012241363525390625,
491
+ "NeuronInstComb": 0.010676145553588867,
492
+ "NeuronLICM": 0.01803445816040039,
493
+ "NeuronLoopFusion": 0.01843857765197754,
494
+ "NeuronLoopInterchange": 0.0022115707397460938,
495
+ "NeuronSimplifier": 0.011580228805541992,
496
+ "NeuronSimplifyPredicates": 0.017709970474243164,
497
+ "NeuronValueNumbering": 0.045330047607421875,
498
+ "OptimizeAliasedCopyChain": 0.0012116432189941406,
499
+ "OptimizeNKIKernels": 0.04246807098388672,
500
+ "PAGLayoutOpt": 0.38617491722106934,
501
+ "PComputeCutting": 0.008383512496948242,
502
+ "PGLayoutTilingPipeline": 1.3029937744140625,
503
+ "PGTiling": 0.34752726554870605,
504
+ "PadElimination": 0.0006172657012939453,
505
+ "ParAxesAnnotation": 0.36298155784606934,
506
+ "PartialLoopFusion": 0.024132490158081055,
507
+ "PartialSimdFusion": 0.026205062866210938,
508
+ "PerfectLoopNest": 0.0019898414611816406,
509
+ "RecognizeOpIdiom": 0.006145477294921875,
510
+ "Recompute": 0.00034356117248535156,
511
+ "RelaxPredicates": 0.0044634342193603516,
512
+ "Rematerialization": 0.004605531692504883,
513
+ "ReshapeWeights": 0.0008733272552490234,
514
+ "ResolveAccessConflict": 0.003629446029663086,
515
+ "ResolveComplicatePredicates": 0.0018143653869628906,
516
+ "RewriteReplicationMatmul": 0.001529693603515625,
517
+ "RewriteWeights": 0.0036728382110595703,
518
+ "SFKVectorizer": 0.5580539703369141,
519
+ "SimpleAllReduceTiling": 0.0026845932006835938,
520
+ "Simplifier": 0.0046727657318115234,
521
+ "SimplifyMacroPredicates": 0.01622939109802246,
522
+ "SimplifyNeuronTensor": 0.015488386154174805,
523
+ "SimplifySlice": 0.0018961429595947266,
524
+ "SimplifyTensor": 0.006178140640258789,
525
+ "SpillPSum": 0.06065011024475098,
526
+ "SplitAPUnionSets": 0.075592041015625,
527
+ "SplitAccGrp": 0.0017442703247070313,
528
+ "StaticProfiler": 0.00494384765625,
529
+ "StaticTransposeLocalTensor": 0.0146331787109375,
530
+ "SundaISel": 0.056458473205566406,
531
+ "TCTransform": 0.001115560531616211,
532
+ "TensorInitialization": 0.021691322326660156,
533
+ "TensorOpSimplifier": 0.010814189910888672,
534
+ "TensorOpTransform": 0.07015466690063477,
535
+ "TileCCOps": 0.007310152053833008,
536
+ "TilingProfiler": 0.012901067733764648,
537
+ "TransformConvOp": 0.00453495979309082,
538
+ "TritiumFusion": 0.10158801078796387,
539
+ "ValueNumbering": 0.0044324398040771484,
540
+ "VectorizeDMA": 0.006791114807128906,
541
+ "VectorizeMatMult": 0.01838517189025879,
542
+ "WeightCoalescing": 0.004769086837768555,
543
+ "ZeroSizeTensorElimination": 0.00017833709716796875
544
+ },
545
+ "tensorizer": {
546
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 5791.0,
547
+ "StaticProfiler::AifUb": 261.38446044921875,
548
+ "StaticProfiler::ArithmeticIntensityTensorizer": 680.3948364257813,
549
+ "StaticProfiler::AverageDmaLength": 2076.933837890625,
550
+ "StaticProfiler::AverageFractalPeUtilization": 99.95938110351563,
551
+ "StaticProfiler::AveragePartitionUtilization": 99.89742279052734,
552
+ "StaticProfiler::AveragePeUtilization": 99.83380126953125,
553
+ "StaticProfiler::DDRTransferBytes": 87646472.0,
554
+ "StaticProfiler::InternalTransferBytes": 114032640.0,
555
+ "StaticProfiler::LoadExpanded": 20995.0,
556
+ "StaticProfiler::LocalizationEfficiency": 260.3042297363281,
557
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 391.4252624511719,
558
+ "StaticProfiler::StoreExpanded": 10753.0,
559
+ "StaticProfiler::TotalDMAExpanded": 31748.0,
560
+ "StaticProfiler::TotalDynamicInstancesCount": 8459.0,
561
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 8453.0,
562
+ "StaticProfiler::TotalLNCComm": 0.0,
563
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
564
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
565
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
566
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
567
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
568
+ "TilingProfiler::GenericInstructionsAfterTiling": 96.0,
569
+ "TilingProfiler::MatMultInstructionsAfterTiling": 3080.0,
570
+ "TilingProfiler::NumPfTransposes": 7.0,
571
+ "TilingProfiler::NumPfTransposesForIo": 1.0,
572
+ "TilingProfiler::NumPfTransposesForLocal": 5.0,
573
+ "TilingProfiler::NumPfTransposesForNonlocal": 1.0,
574
+ "TilingProfiler::PfTransposeInstructions": 1632.0,
575
+ "TilingProfiler::PfTransposeInstructionsForIo": 256.0,
576
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1248.0,
577
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 128.0,
578
+ "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
579
+ "TilingProfiler::SimdInstructionsAfterTiling": 612.0,
580
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
581
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
582
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
583
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
584
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
585
+ "TransformConvOp::conv2d_column_packing": 0.0,
586
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
587
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
588
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
589
+ }
590
+ },
591
+ "sg0001": {
592
+ "compiletime": {
593
+ "AGOrderingAnalysisPass": 0.19782710075378418,
594
+ "AffinePredicateResolution": 0.0014352798461914063,
595
+ "AliasDependencyElimination": 0.00022602081298828125,
596
+ "AliasDependencyInduction": 0.008897542953491211,
597
+ "AliasDependencyReset": 0.07564544677734375,
598
+ "BFComputeCutting": 0.0038797855377197266,
599
+ "BirCodeGenLoop": 0.08420419692993164,
600
+ "CCOpFusion": 0.0411074161529541,
601
+ "CanonicalizeDAGForPGTiling": 0.004708290100097656,
602
+ "CanonicalizeIR": 0.001739501953125,
603
+ "CoalesceCCOp": 0.005135774612426758,
604
+ "CommuteConcat": 0.0010938644409179688,
605
+ "DMALocalityOpt": 0.0010821819305419922,
606
+ "DMAProfiler": 0.03509354591369629,
607
+ "DMATilingProfiler": 0.008334875106811523,
608
+ "DataLocalityOpt": 0.3732140064239502,
609
+ "DataStreaming": 0.004484653472900391,
610
+ "DeConcat": 0.0014607906341552734,
611
+ "DeadCodeElimination": 0.002012491226196289,
612
+ "DeadStoreElimination": 0.06306838989257813,
613
+ "DelinearIndices": 0.03899812698364258,
614
+ "Delinearization": 0.015190839767456055,
615
+ "DoNothing": 0.00013589859008789063,
616
+ "DramToDramTranspose": 0.05379915237426758,
617
+ "DumpGraphAndMetadata": 0.053969621658325195,
618
+ "EliminateDivs": 0.005895376205444336,
619
+ "ExpandBatchNorm": 0.0030879974365234375,
620
+ "ExpandISAMacro": 0.002570629119873047,
621
+ "FactorizeBlkDims": 0.03216910362243652,
622
+ "FactorizeThreadAxesInFreeDims": 0.0017580986022949219,
623
+ "FlattenMacroLoop": 0.004896402359008789,
624
+ "GenericAccessSimplifier": 0.001070261001586914,
625
+ "InferInitValue": 0.09278488159179688,
626
+ "InferIntrinsicOnCC": 0.010787725448608398,
627
+ "InferNeuronTensor": 0.16329479217529297,
628
+ "InferNonlocalTensors": 0.08827400207519531,
629
+ "InferPSumTensor": 0.041254281997680664,
630
+ "InlineNativeKernels": 0.002732515335083008,
631
+ "InsertIOTransposes": 0.030591964721679688,
632
+ "InsertLocalTransposes": 0.0069196224212646484,
633
+ "InsertOffloadedTransposes": 0.0034880638122558594,
634
+ "LICM": 0.0034477710723876953,
635
+ "LateLegalizeInst": 0.005655765533447266,
636
+ "LateLegalizePostSplit": 0.003046751022338867,
637
+ "LateLowerReshapeOp": 0.0013928413391113281,
638
+ "LateLowerTensorOp": 0.0053386688232421875,
639
+ "LateNeuronInstComb": 0.027225971221923828,
640
+ "LayoutPreprocessing": 0.047040700912475586,
641
+ "LayoutPreprocessingAndAnalysis": 0.12968659400939941,
642
+ "LayoutRequirementAnalysis": 0.01332712173461914,
643
+ "LegalizeCCOpLayout": 0.0019299983978271484,
644
+ "LegalizeOpLevelAlias": 0.0019905567169189453,
645
+ "LegalizePartitionReduce": 0.0013320446014404297,
646
+ "LegalizeSundaAccess": 0.0154571533203125,
647
+ "LegalizeSundaMacro": 0.018419265747070313,
648
+ "LegalizeType": 0.0047800540924072266,
649
+ "LocalLayoutOpt": 0.029850482940673828,
650
+ "LoopFusion": 0.006402492523193359,
651
+ "LoopSplitting": 0.0006403923034667969,
652
+ "LowerBroadcast": 0.0029153823852539063,
653
+ "LowerCCOpBlockAxis": 0.005182743072509766,
654
+ "LowerComplexBroadcast": 0.0022389888763427734,
655
+ "LowerIntrinsics": 0.056134939193725586,
656
+ "LowerTensorOp": 0.01170802116394043,
657
+ "LowerTranspose": 0.0226747989654541,
658
+ "MacroGeneration": 0.12812113761901855,
659
+ "MaskPropagation": 0.003968477249145508,
660
+ "MemcpyElimination": 0.1272127628326416,
661
+ "MutateDataType": 0.0016314983367919922,
662
+ "NeuronAliasDependencyInduction": 0.0003142356872558594,
663
+ "NeuronAliasDependencyReset": 0.011624336242675781,
664
+ "NeuronInstComb": 0.00946044921875,
665
+ "NeuronLICM": 0.008498668670654297,
666
+ "NeuronLoopFusion": 0.01998734474182129,
667
+ "NeuronLoopInterchange": 0.0018498897552490234,
668
+ "NeuronSimplifier": 0.03274989128112793,
669
+ "NeuronSimplifyPredicates": 0.001984834671020508,
670
+ "NeuronValueNumbering": 0.03443026542663574,
671
+ "OptimizeAliasedCopyChain": 0.0008573532104492188,
672
+ "OptimizeNKIKernels": 0.0016489028930664063,
673
+ "PAGLayoutOpt": 0.52590012550354,
674
+ "PComputeCutting": 0.007617473602294922,
675
+ "PGLayoutTilingPipeline": 1.6884160041809082,
676
+ "PGTiling": 0.42557621002197266,
677
+ "PadElimination": 0.0004146099090576172,
678
+ "ParAxesAnnotation": 0.49584078788757324,
679
+ "PartialLoopFusion": 0.04620671272277832,
680
+ "PartialSimdFusion": 0.04396200180053711,
681
+ "PerfectLoopNest": 0.002160310745239258,
682
+ "RecognizeOpIdiom": 0.004221677780151367,
683
+ "Recompute": 0.0006210803985595703,
684
+ "RelaxPredicates": 0.0031533241271972656,
685
+ "Rematerialization": 0.0020017623901367188,
686
+ "ReshapeWeights": 0.0012595653533935547,
687
+ "ResolveAccessConflict": 0.034206390380859375,
688
+ "ResolveComplicatePredicates": 0.001447916030883789,
689
+ "RewriteReplicationMatmul": 0.003072500228881836,
690
+ "RewriteWeights": 0.005293369293212891,
691
+ "SFKVectorizer": 0.31648850440979004,
692
+ "SimpleAllReduceTiling": 0.0026230812072753906,
693
+ "Simplifier": 0.00507354736328125,
694
+ "SimplifyMacroPredicates": 0.011813640594482422,
695
+ "SimplifyNeuronTensor": 0.029469728469848633,
696
+ "SimplifySlice": 0.0010852813720703125,
697
+ "SimplifyTensor": 0.006476879119873047,
698
+ "SpillPSum": 0.047782182693481445,
699
+ "SplitAPUnionSets": 0.022653579711914063,
700
+ "SplitAccGrp": 0.0025262832641601563,
701
+ "StaticProfiler": 0.03480696678161621,
702
+ "StaticTransposeLocalTensor": 0.006014108657836914,
703
+ "SundaISel": 0.05354189872741699,
704
+ "TCTransform": 0.0011737346649169922,
705
+ "TensorInitialization": 0.004692554473876953,
706
+ "TensorOpSimplifier": 0.007290840148925781,
707
+ "TensorOpTransform": 0.039176225662231445,
708
+ "TileCCOps": 0.009789466857910156,
709
+ "TilingProfiler": 0.02116703987121582,
710
+ "TransformConvOp": 0.002421855926513672,
711
+ "TritiumFusion": 0.24414300918579102,
712
+ "ValueNumbering": 0.002656698226928711,
713
+ "VectorizeDMA": 0.0018146038055419922,
714
+ "VectorizeMatMult": 0.034119606018066406,
715
+ "WeightCoalescing": 0.002785921096801758,
716
+ "ZeroSizeTensorElimination": 0.00019216537475585938
717
+ },
718
+ "tensorizer": {
719
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 17420.0,
720
+ "StaticProfiler::AifUb": 844.2889404296875,
721
+ "StaticProfiler::ArithmeticIntensityTensorizer": 712.69189453125,
722
+ "StaticProfiler::AverageDmaLength": 1079.260986328125,
723
+ "StaticProfiler::AverageFractalPeUtilization": 100.0,
724
+ "StaticProfiler::AveragePartitionUtilization": 99.85012817382813,
725
+ "StaticProfiler::AveragePeUtilization": 100.0,
726
+ "StaticProfiler::DDRTransferBytes": 339836928.0,
727
+ "StaticProfiler::InternalTransferBytes": 106692608.0,
728
+ "StaticProfiler::LoadExpanded": 296193.0,
729
+ "StaticProfiler::LocalizationEfficiency": 84.41326904296875,
730
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 93.66107177734375,
731
+ "StaticProfiler::StoreExpanded": 10241.0,
732
+ "StaticProfiler::TotalDMAExpanded": 306434.0,
733
+ "StaticProfiler::TotalDynamicInstancesCount": 21356.0,
734
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 21356.0,
735
+ "StaticProfiler::TotalLNCComm": 0.0,
736
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
737
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
738
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
739
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
740
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
741
+ "TilingProfiler::GenericInstructionsAfterTiling": 64.0,
742
+ "TilingProfiler::MatMultInstructionsAfterTiling": 13824.0,
743
+ "TilingProfiler::NumPfTransposes": 9.0,
744
+ "TilingProfiler::NumPfTransposesForIo": 3.0,
745
+ "TilingProfiler::NumPfTransposesForLocal": 4.0,
746
+ "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
747
+ "TilingProfiler::PfTransposeInstructions": 1904.0,
748
+ "TilingProfiler::PfTransposeInstructionsForIo": 272.0,
749
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1120.0,
750
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 512.0,
751
+ "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
752
+ "TilingProfiler::SimdInstructionsAfterTiling": 683.0,
753
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
754
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
755
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
756
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
757
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
758
+ "TransformConvOp::conv2d_column_packing": 0.0,
759
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
760
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
761
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
762
+ }
763
+ },
764
+ "sg0002": {
765
+ "compiletime": {
766
+ "AGOrderingAnalysisPass": 0.01833963394165039,
767
+ "AffinePredicateResolution": 0.0011298656463623047,
768
+ "AliasDependencyElimination": 0.0003044605255126953,
769
+ "AliasDependencyInduction": 0.0059871673583984375,
770
+ "AliasDependencyReset": 0.024695634841918945,
771
+ "BFComputeCutting": 0.0022745132446289063,
772
+ "BirCodeGenLoop": 0.12499594688415527,
773
+ "CCOpFusion": 0.025257110595703125,
774
+ "CanonicalizeDAGForPGTiling": 0.0039975643157958984,
775
+ "CanonicalizeIR": 0.0017023086547851563,
776
+ "CoalesceCCOp": 0.006569385528564453,
777
+ "CommuteConcat": 0.0009205341339111328,
778
+ "DMALocalityOpt": 0.0031824111938476563,
779
+ "DMAProfiler": 0.005998849868774414,
780
+ "DMATilingProfiler": 0.004607439041137695,
781
+ "DataLocalityOpt": 0.15957880020141602,
782
+ "DataStreaming": 0.03691840171813965,
783
+ "DeConcat": 0.0007259845733642578,
784
+ "DeadCodeElimination": 0.0009546279907226563,
785
+ "DeadStoreElimination": 0.006250619888305664,
786
+ "DelinearIndices": 0.005332231521606445,
787
+ "Delinearization": 0.0033500194549560547,
788
+ "DoNothing": 0.0001239776611328125,
789
+ "DramToDramTranspose": 0.020763397216796875,
790
+ "DumpGraphAndMetadata": 0.025223493576049805,
791
+ "EliminateDivs": 0.0023469924926757813,
792
+ "ExpandBatchNorm": 0.001692056655883789,
793
+ "ExpandISAMacro": 0.008460283279418945,
794
+ "FactorizeBlkDims": 0.00921773910522461,
795
+ "FactorizeThreadAxesInFreeDims": 0.002184152603149414,
796
+ "FlattenMacroLoop": 0.0022482872009277344,
797
+ "GenericAccessSimplifier": 0.0009622573852539063,
798
+ "InferInitValue": 0.027300357818603516,
799
+ "InferIntrinsicOnCC": 0.009199380874633789,
800
+ "InferNeuronTensor": 0.028067350387573242,
801
+ "InferNonlocalTensors": 0.014671802520751953,
802
+ "InferPSumTensor": 0.0808565616607666,
803
+ "InlineNativeKernels": 0.002727031707763672,
804
+ "InsertIOTransposes": 0.017727136611938477,
805
+ "InsertLocalTransposes": 0.004176616668701172,
806
+ "InsertOffloadedTransposes": 0.002771615982055664,
807
+ "LICM": 0.005248069763183594,
808
+ "LateLegalizeInst": 0.0068132877349853516,
809
+ "LateLegalizePostSplit": 0.0045223236083984375,
810
+ "LateLowerReshapeOp": 0.0012927055358886719,
811
+ "LateLowerTensorOp": 0.0014028549194335938,
812
+ "LateNeuronInstComb": 0.016278505325317383,
813
+ "LayoutPreprocessing": 0.026221275329589844,
814
+ "LayoutPreprocessingAndAnalysis": 0.07468867301940918,
815
+ "LayoutRequirementAnalysis": 0.004823446273803711,
816
+ "LegalizeCCOpLayout": 0.0023353099822998047,
817
+ "LegalizeOpLevelAlias": 0.0013494491577148438,
818
+ "LegalizePartitionReduce": 0.0018906593322753906,
819
+ "LegalizeSundaAccess": 0.06063246726989746,
820
+ "LegalizeSundaMacro": 0.04256129264831543,
821
+ "LegalizeType": 0.005706787109375,
822
+ "LocalLayoutOpt": 0.016018390655517578,
823
+ "LoopFusion": 0.005109071731567383,
824
+ "LoopSplitting": 0.00048542022705078125,
825
+ "LowerBroadcast": 0.0029256343841552734,
826
+ "LowerCCOpBlockAxis": 0.0038700103759765625,
827
+ "LowerComplexBroadcast": 0.004511594772338867,
828
+ "LowerIntrinsics": 0.3245360851287842,
829
+ "LowerTensorOp": 0.010710477828979492,
830
+ "LowerTranspose": 0.05445718765258789,
831
+ "MacroGeneration": 0.061620473861694336,
832
+ "MaskPropagation": 0.002919435501098633,
833
+ "MemcpyElimination": 0.02559375762939453,
834
+ "MutateDataType": 0.0014896392822265625,
835
+ "NeuronAliasDependencyInduction": 0.0002808570861816406,
836
+ "NeuronAliasDependencyReset": 0.05649685859680176,
837
+ "NeuronInstComb": 0.004225254058837891,
838
+ "NeuronLICM": 0.014126300811767578,
839
+ "NeuronLoopFusion": 0.009732246398925781,
840
+ "NeuronLoopInterchange": 0.0025072097778320313,
841
+ "NeuronSimplifier": 0.03835606575012207,
842
+ "NeuronSimplifyPredicates": 0.005949974060058594,
843
+ "NeuronValueNumbering": 0.002723217010498047,
844
+ "OptimizeAliasedCopyChain": 0.0007545948028564453,
845
+ "OptimizeNKIKernels": 0.6443507671356201,
846
+ "PAGLayoutOpt": 0.20021605491638184,
847
+ "PComputeCutting": 0.0046160221099853516,
848
+ "PGLayoutTilingPipeline": 0.6925618648529053,
849
+ "PGTiling": 0.21065187454223633,
850
+ "PadElimination": 0.00038623809814453125,
851
+ "ParAxesAnnotation": 0.052834510803222656,
852
+ "PartialLoopFusion": 0.051622629165649414,
853
+ "PartialSimdFusion": 0.014065980911254883,
854
+ "PerfectLoopNest": 0.0019462108612060547,
855
+ "RecognizeOpIdiom": 0.0037450790405273438,
856
+ "Recompute": 0.0004031658172607422,
857
+ "RelaxPredicates": 0.03561973571777344,
858
+ "Rematerialization": 0.0018870830535888672,
859
+ "ReshapeWeights": 0.0009450912475585938,
860
+ "ResolveAccessConflict": 0.0038840770721435547,
861
+ "ResolveComplicatePredicates": 0.0011222362518310547,
862
+ "RewriteReplicationMatmul": 0.0017135143280029297,
863
+ "RewriteWeights": 0.0024623870849609375,
864
+ "SFKVectorizer": 0.19122314453125,
865
+ "SimpleAllReduceTiling": 0.0035347938537597656,
866
+ "Simplifier": 0.0030031204223632813,
867
+ "SimplifyMacroPredicates": 0.005193233489990234,
868
+ "SimplifyNeuronTensor": 0.38506579399108887,
869
+ "SimplifySlice": 0.001062631607055664,
870
+ "SimplifyTensor": 0.009534358978271484,
871
+ "SpillPSum": 0.05334925651550293,
872
+ "SplitAPUnionSets": 0.013537406921386719,
873
+ "SplitAccGrp": 0.0014171600341796875,
874
+ "StaticProfiler": 0.005720615386962891,
875
+ "StaticTransposeLocalTensor": 0.003614664077758789,
876
+ "SundaISel": 0.09031486511230469,
877
+ "TCTransform": 0.0008947849273681641,
878
+ "TensorInitialization": 0.010958433151245117,
879
+ "TensorOpSimplifier": 0.005278110504150391,
880
+ "TensorOpTransform": 0.020787477493286133,
881
+ "TileCCOps": 0.005544900894165039,
882
+ "TilingProfiler": 0.007747173309326172,
883
+ "TransformConvOp": 0.003238677978515625,
884
+ "TritiumFusion": 0.16130614280700684,
885
+ "ValueNumbering": 0.0018999576568603516,
886
+ "VectorizeDMA": 0.0017979145050048828,
887
+ "VectorizeMatMult": 0.007079362869262695,
888
+ "WeightCoalescing": 0.0030584335327148438,
889
+ "ZeroSizeTensorElimination": 0.00022983551025390625
890
+ },
891
+ "tensorizer": {
892
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 17056.0,
893
+ "StaticProfiler::AifUb": 568.2581176757813,
894
+ "StaticProfiler::ArithmeticIntensityTensorizer": 456.7580261230469,
895
+ "StaticProfiler::AverageDmaLength": 1314.3221435546875,
896
+ "StaticProfiler::AverageFractalPeUtilization": 99.79875946044922,
897
+ "StaticProfiler::AveragePartitionUtilization": 99.50694274902344,
898
+ "StaticProfiler::AveragePeUtilization": 99.19517517089844,
899
+ "StaticProfiler::DDRTransferBytes": 407087136.0,
900
+ "StaticProfiler::InternalTransferBytes": 48342036.0,
901
+ "StaticProfiler::LoadExpanded": 310291.0,
902
+ "StaticProfiler::LocalizationEfficiency": 80.37861633300781,
903
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 88.63314819335938,
904
+ "StaticProfiler::StoreExpanded": 6699.0,
905
+ "StaticProfiler::TotalDMAExpanded": 316990.0,
906
+ "StaticProfiler::TotalDynamicInstancesCount": 19674.0,
907
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 19578.0,
908
+ "StaticProfiler::TotalLNCComm": 0.0,
909
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
910
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
911
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
912
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
913
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
914
+ "TilingProfiler::GenericInstructionsAfterTiling": 4.0,
915
+ "TilingProfiler::MatMultInstructionsAfterTiling": 14848.0,
916
+ "TilingProfiler::NumPfTransposes": 4.0,
917
+ "TilingProfiler::NumPfTransposesForIo": 0.0,
918
+ "TilingProfiler::NumPfTransposesForLocal": 1.0,
919
+ "TilingProfiler::NumPfTransposesForNonlocal": 3.0,
920
+ "TilingProfiler::PfTransposeInstructions": 769.0,
921
+ "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
922
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
923
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
924
+ "TilingProfiler::ReduceInstructionsAfterTiling": 6.0,
925
+ "TilingProfiler::SimdInstructionsAfterTiling": 319.0,
926
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
927
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
928
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
929
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
930
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
931
+ "TransformConvOp::conv2d_column_packing": 0.0,
932
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
933
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
934
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
935
+ }
936
+ },
937
+ "sg01": {
938
+ "compiletime": {
939
+ "CanonicalizeConv": 0.0,
940
+ "CanonicalizeForTensorizer": 1.9999999494757503e-05,
941
+ "Canonicalizer": 0.0003800000122282654,
942
+ "HoistCompute": 3.000000106112566e-06,
943
+ "IdentifyCrossPassTensors": 1.9999999494757503e-05,
944
+ "MemcastMotion": 1.2000000424450263e-05,
945
+ "PenguinizeFunctions": 1.9999999494757503e-05,
946
+ "PruneFunctions": 1.700000029813964e-05,
947
+ "RemoveOptimizationBarriers": 2.499999936844688e-05,
948
+ "ScatterMotion": 7.000000096013537e-06,
949
+ "TensorizerLegalizationPass": 2.2000000171829015e-05,
950
+ "VerifySupportedOps": 1.4999999621068127e-05,
951
+ "algsimp": 0.00012199999764561653,
952
+ "batchnorm_expander": 2.2000000171829015e-05,
953
+ "boundary-marker-removal": 7.000000096013537e-06,
954
+ "call-inliner": 1.700000029813964e-05,
955
+ "canonicalize-boundary-marker": 7.999999979801942e-06,
956
+ "collective-stream-id-checker": 9.999999747378752e-06,
957
+ "comparison-expander": 9.000000318337698e-06,
958
+ "computation-deduplicator": 3.600000127335079e-05,
959
+ "conditional-to-select": 9.000000318337698e-06,
960
+ "config-lowering": 7.699999696342275e-05,
961
+ "constant_folding": 1.2999999853491317e-05,
962
+ "cse": 2.5999999706982635e-05,
963
+ "dce": 1.9999999949504854e-06,
964
+ "dynamic-slice-transpose": 7.999999979801942e-06,
965
+ "eliminate-redundant-compare": 7.000000096013537e-06,
966
+ "emit-offloaded-dropout": 2.9000000722589903e-05,
967
+ "flatten-call-graph": 1.700000029813964e-05,
968
+ "fuse-send-recv": 3.7999998312443495e-05,
969
+ "hilo::LegalizeAlias": 7.000000096013537e-06,
970
+ "hilo::NeuronInstCombine": 4.8000001697801054e-05,
971
+ "hilo::NeuronOpFusion": 2.300000051036477e-05,
972
+ "hilo::ReplaceTokenTypeWithU8Pass": 2.499999936844688e-05,
973
+ "hilo::ScheduleFusion": 0.0,
974
+ "hilo::SixtyFourHack": 1.8000000636675395e-05,
975
+ "hilo::VerifyAliasing": 3.999999989900971e-06,
976
+ "hlo-mac-count": 5.900000178371556e-05,
977
+ "hlo-verifier": 0.00028700000257231295,
978
+ "legalize-ccops": 1.9999999949504854e-06,
979
+ "legalize-compare": 7.000000096013537e-06,
980
+ "lower-argminmax-custom-call": 7.000000096013537e-06,
981
+ "map-inline": 1.8000000636675395e-05,
982
+ "metadata-naming": 3.400000059627928e-05,
983
+ "mlir::detail::OpToOpPassAdaptor": 4.099999932805076e-05,
984
+ "mlir::hlo::MhloToPyPenguin": 0.025769000872969627,
985
+ "mlir::mhlo::LowerComplexExtraPass": 0.00010399999882793054,
986
+ "mlir::mhlo::LowerComplexPass": 0.00024399999529123306,
987
+ "native-to-custom-softmax": 1.700000029813964e-05,
988
+ "native-to-custom-softmax-dx": 4.70000013592653e-05,
989
+ "operand_upcaster": 2.9999999242136255e-05,
990
+ "post-par-pipe-begin": 9.000000318337698e-06,
991
+ "post-par-pipe-end": 0.0,
992
+ "post-partition-simplification": 0.0009069999796338379,
993
+ "replace-minimum-constant": 1.4000000192027073e-05,
994
+ "reshape-mover": 6.000000212225132e-06,
995
+ "simplify-concat": 7.300000288523734e-05,
996
+ "simplify-while-loops": 4.999999873689376e-06,
997
+ "transform-variadic-reduce": 1.2999999853491317e-05,
998
+ "tuple-simplifier": 9.000000318337698e-06,
999
+ "unpack-nested-aws-ntwsr": 4.999999873689376e-06,
1000
+ "unroll-while-loop": 9.999999974752427e-07
1001
+ },
1002
+ "hilo": {
1003
+ "ArithmeticIntensity": 808.5779418945313,
1004
+ "HloMacCount": 115964116992.0,
1005
+ "Traffic": 286834720.0
1006
+ }
1007
+ },
1008
+ "sg02": {
1009
+ "compiletime": {
1010
+ "CanonicalizeConv": 0.0,
1011
+ "CanonicalizeForTensorizer": 1.4000000192027073e-05,
1012
+ "Canonicalizer": 0.0005150000215508044,
1013
+ "HoistCompute": 1.9999999949504854e-06,
1014
+ "IdentifyCrossPassTensors": 1.8000000636675395e-05,
1015
+ "MemcastMotion": 1.4000000192027073e-05,
1016
+ "PenguinizeFunctions": 9.999999747378752e-06,
1017
+ "PruneFunctions": 9.999999747378752e-06,
1018
+ "RemoveOptimizationBarriers": 1.5999999959603883e-05,
1019
+ "ScatterMotion": 9.999999974752427e-07,
1020
+ "TensorizerLegalizationPass": 7.000000096013537e-06,
1021
+ "VerifySupportedOps": 1.8000000636675395e-05,
1022
+ "algsimp": 7.599999662488699e-05,
1023
+ "batchnorm_expander": 1.4999999621068127e-05,
1024
+ "boundary-marker-removal": 3.999999989900971e-06,
1025
+ "call-inliner": 1.2000000424450263e-05,
1026
+ "canonicalize-boundary-marker": 4.999999873689376e-06,
1027
+ "collective-stream-id-checker": 6.000000212225132e-06,
1028
+ "comparison-expander": 6.000000212225132e-06,
1029
+ "computation-deduplicator": 2.499999936844688e-05,
1030
+ "conditional-to-select": 7.000000096013537e-06,
1031
+ "config-lowering": 6.299999949987978e-05,
1032
+ "constant_folding": 1.1000000085914508e-05,
1033
+ "cse": 1.4000000192027073e-05,
1034
+ "dce": 9.999999974752427e-07,
1035
+ "dynamic-slice-transpose": 3.999999989900971e-06,
1036
+ "eliminate-redundant-compare": 3.000000106112566e-06,
1037
+ "emit-offloaded-dropout": 2.099999983329326e-05,
1038
+ "flatten-call-graph": 1.2000000424450263e-05,
1039
+ "fuse-send-recv": 2.4000000848900527e-05,
1040
+ "hilo::LegalizeAlias": 1.9999999949504854e-06,
1041
+ "hilo::NeuronInstCombine": 4.999999873689376e-05,
1042
+ "hilo::NeuronOpFusion": 2.099999983329326e-05,
1043
+ "hilo::ReplaceTokenTypeWithU8Pass": 1.8000000636675395e-05,
1044
+ "hilo::ScheduleFusion": 3.999999989900971e-06,
1045
+ "hilo::SixtyFourHack": 4.3000000005122274e-05,
1046
+ "hilo::VerifyAliasing": 9.999999974752427e-07,
1047
+ "hlo-mac-count": 0.0002410000015515834,
1048
+ "hlo-verifier": 0.00020399999630171806,
1049
+ "legalize-ccops": 9.999999974752427e-07,
1050
+ "legalize-compare": 3.999999989900971e-06,
1051
+ "lower-argminmax-custom-call": 3.000000106112566e-06,
1052
+ "map-inline": 1.4000000192027073e-05,
1053
+ "metadata-naming": 1.700000029813964e-05,
1054
+ "mlir::detail::OpToOpPassAdaptor": 3.5000000934815034e-05,
1055
+ "mlir::hlo::MhloToPyPenguin": 0.01627100072801113,
1056
+ "mlir::mhlo::LowerComplexExtraPass": 0.00011700000322889537,
1057
+ "mlir::mhlo::LowerComplexPass": 0.00016500000492669642,
1058
+ "native-to-custom-softmax": 1.1000000085914508e-05,
1059
+ "native-to-custom-softmax-dx": 4.5000000682193786e-05,
1060
+ "operand_upcaster": 1.8999999156221747e-05,
1061
+ "post-par-pipe-begin": 3.000000106112566e-06,
1062
+ "post-par-pipe-end": 0.0,
1063
+ "post-partition-simplification": 0.0006179999909363687,
1064
+ "replace-minimum-constant": 9.999999747378752e-06,
1065
+ "reshape-mover": 3.000000106112566e-06,
1066
+ "simplify-concat": 4.999999873689376e-05,
1067
+ "simplify-while-loops": 3.000000106112566e-06,
1068
+ "transform-variadic-reduce": 6.0999998822808266e-05,
1069
+ "tuple-simplifier": 4.999999873689376e-06,
1070
+ "unpack-nested-aws-ntwsr": 3.999999989900971e-06,
1071
+ "unroll-while-loop": 0.0
1072
+ },
1073
+ "hilo": {
1074
+ "ArithmeticIntensity": 537.9506225585938,
1075
+ "HloMacCount": 90261422080.0,
1076
+ "Traffic": 335575104.0
1077
+ }
1078
+ }
1079
+ }
context_encoding_model/_tp0_bk3/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9330f87daaab052682ce2a183d9908828cede116976ffca894ecbd7ea31a028c
3
+ size 1731584
context_encoding_model/_tp0_bk3/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:592de62bcecb744077fb8bd9e5363e57ea2543bf82083638f8ad2039a512933c
3
+ size 1561029
context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba393ab52af446df672f41632e2c112cb7e051a45d99da03d33fb1f12262cca6
3
+ size 1626903
context_encoding_model/_tp0_bk3/model.MODULE_2e1f11fbf72d40b46e64+5ae2bfda.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9330f87daaab052682ce2a183d9908828cede116976ffca894ecbd7ea31a028c
3
+ size 1731584
context_encoding_model/_tp0_bk3/neuron_config.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": false,
3
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
4
+ "add_cross_attention": false,
5
+ "architectures": [
6
+ "MistralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attribute_map": {},
10
+ "bad_words_ids": null,
11
+ "begin_suppress_tokens": null,
12
+ "bos_token_id": 1,
13
+ "chunk_size_feed_forward": 0,
14
+ "cross_attention_hidden_size": null,
15
+ "decoder_start_token_id": null,
16
+ "diversity_penalty": 0.0,
17
+ "do_sample": false,
18
+ "early_stopping": false,
19
+ "encoder_no_repeat_ngram_size": 0,
20
+ "eos_token_id": 2,
21
+ "exponential_decay_length_penalty": null,
22
+ "finetuning_task": null,
23
+ "forced_bos_token_id": null,
24
+ "forced_eos_token_id": null,
25
+ "fused_spec_config": null,
26
+ "head_dim": 128,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 4096,
29
+ "id2label": {
30
+ "0": "LABEL_0",
31
+ "1": "LABEL_1"
32
+ },
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 14336,
35
+ "is_decoder": false,
36
+ "is_encoder_decoder": false,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1
40
+ },
41
+ "length_penalty": 1.0,
42
+ "max_length": 20,
43
+ "max_position_embeddings": 32768,
44
+ "metadata": null,
45
+ "min_length": 0,
46
+ "model_type": "mistral",
47
+ "neuron_config": {
48
+ "activation_quantization_type": null,
49
+ "allow_input_truncation": false,
50
+ "apply_seq_ids_mask": false,
51
+ "async_mode": false,
52
+ "attention_dp_degree": 1,
53
+ "attention_dtype": null,
54
+ "attn_block_cte_nki_kernel_enabled": false,
55
+ "attn_block_tkg_nki_kernel_cache_update": false,
56
+ "attn_block_tkg_nki_kernel_enabled": false,
57
+ "attn_cls": "NeuronLlamaAttention",
58
+ "attn_kernel_enabled": null,
59
+ "attn_tkg_builtin_kernel_enabled": false,
60
+ "attn_tkg_nki_kernel_enabled": false,
61
+ "batch_size": 1,
62
+ "bucket_n_active_tokens": true,
63
+ "buckets": [
64
+ 1024
65
+ ],
66
+ "cast_type": "config",
67
+ "cc_pipeline_tiling_factor": 2,
68
+ "chunked_prefill_config": null,
69
+ "context_encoding_buckets": [
70
+ 1024
71
+ ],
72
+ "cp_degree": 1,
73
+ "ctx_batch_size": 1,
74
+ "disable_kv_cache_tiling": false,
75
+ "draft_model_modules_to_not_convert": null,
76
+ "enable_bucketing": true,
77
+ "enable_eagle_draft_input_norm": false,
78
+ "enable_eagle_speculation": false,
79
+ "enable_fused_speculation": false,
80
+ "enable_long_context_mode": false,
81
+ "enable_output_completion_notifications": false,
82
+ "enable_spill_reload_dge": false,
83
+ "enable_token_tree": false,
84
+ "ep_degree": 1,
85
+ "expert_mlp_nki_kernel_enabled": null,
86
+ "flash_decoding_enabled": false,
87
+ "fused_qkv": false,
88
+ "fused_rmsnorm_skip_gamma": false,
89
+ "is_block_kv_layout": null,
90
+ "is_chunked_prefill": false,
91
+ "is_continuous_batching": true,
92
+ "is_eagle_draft": false,
93
+ "is_medusa": false,
94
+ "is_prefill_stage": true,
95
+ "is_prefix_caching": false,
96
+ "k_cache_transposed": false,
97
+ "kv_cache_batch_size": 4,
98
+ "kv_cache_padding_size": 0,
99
+ "kv_cache_quant": false,
100
+ "kv_cache_tiling": false,
101
+ "layer_boundary_markers": false,
102
+ "lm_head_pad": false,
103
+ "lm_head_pad_alignment_size": 1,
104
+ "local_ranks_size": 2,
105
+ "logical_nc_config": 1,
106
+ "lora_config": null,
107
+ "max_batch_size": 4,
108
+ "max_context_length": 2048,
109
+ "max_length": 2048,
110
+ "max_new_tokens": null,
111
+ "medusa_speculation_length": 0,
112
+ "medusa_tree": null,
113
+ "mlp_kernel_enabled": false,
114
+ "mlp_kernel_fuse_residual_add": false,
115
+ "modules_to_not_convert": null,
116
+ "moe_fused_nki_kernel_enabled": null,
117
+ "n_active_tokens": 2048,
118
+ "n_positions": 2048,
119
+ "num_medusa_heads": 0,
120
+ "on_cpu": false,
121
+ "on_device_sampling_config": {
122
+ "deterministic": false,
123
+ "do_sample": false,
124
+ "dynamic": true,
125
+ "global_topk": 256,
126
+ "on_device_sampling_config": true,
127
+ "temperature": 1.0,
128
+ "top_k": 1,
129
+ "top_k_kernel_enabled": false,
130
+ "top_p": 1.0
131
+ },
132
+ "output_logits": false,
133
+ "overrides_torch_dtype": true,
134
+ "pa_block_size": 2048,
135
+ "pa_num_blocks": 4,
136
+ "padding_side": "right",
137
+ "pp_degree": 1,
138
+ "prefix_buckets": null,
139
+ "qk_layernorm": false,
140
+ "qkv_kernel_enabled": false,
141
+ "qkv_kernel_fuse_residual_add": false,
142
+ "qkv_kernel_nbsd_layout": false,
143
+ "quantization_dtype": "int8",
144
+ "quantization_type": "per_tensor_symmetric",
145
+ "quantize_clamp_bound": Infinity,
146
+ "quantized": false,
147
+ "quantized_checkpoints_path": null,
148
+ "quantized_mlp_kernel_enabled": false,
149
+ "rmsnorm_quantize_kernel_enabled": false,
150
+ "router_topk_nki_kernel_enabled": null,
151
+ "rpl_reduce_dtype": null,
152
+ "save_sharded_checkpoint": true,
153
+ "scratchpad_page_size": null,
154
+ "seq_len": 2048,
155
+ "seq_len_threshold_for_cc_tiling": 16384,
156
+ "sequence_parallel_enabled": false,
157
+ "shared_mlp_nki_kernel_enabled": null,
158
+ "skip_sharding": false,
159
+ "skip_warmup": false,
160
+ "spec_batch_size": 4,
161
+ "speculation_length": 0,
162
+ "start_rank_id": 0,
163
+ "target": null,
164
+ "tile_cc": false,
165
+ "tkg_batch_size": 4,
166
+ "token_generation_buckets": null,
167
+ "token_tree_config": null,
168
+ "torch_dtype": "bfloat16",
169
+ "tp_degree": 2,
170
+ "vocab_parallel": false,
171
+ "weight_gather_seq_len_threshold": 32768,
172
+ "weights_to_skip_layout_optimization": [],
173
+ "world_size": 2
174
+ },
175
+ "no_repeat_ngram_size": 0,
176
+ "num_attention_heads": 32,
177
+ "num_beam_groups": 1,
178
+ "num_beams": 1,
179
+ "num_cores_per_group": 1,
180
+ "num_hidden_layers": 32,
181
+ "num_key_value_heads": 8,
182
+ "num_return_sequences": 1,
183
+ "output_attentions": false,
184
+ "output_hidden_states": false,
185
+ "output_scores": false,
186
+ "pad_token_id": 0,
187
+ "prefix": null,
188
+ "problem_type": null,
189
+ "pruned_heads": {},
190
+ "remove_invalid_values": false,
191
+ "repetition_penalty": 1.0,
192
+ "return_dict": true,
193
+ "return_dict_in_generate": false,
194
+ "rms_norm_eps": 1e-05,
195
+ "rope_theta": 1000000.0,
196
+ "sep_token_id": null,
197
+ "sliding_window": null,
198
+ "suppress_tokens": null,
199
+ "task_specific_params": null,
200
+ "temperature": 1.0,
201
+ "tf_legacy_loss": false,
202
+ "tie_encoder_decoder": false,
203
+ "tie_word_embeddings": false,
204
+ "tokenizer_class": null,
205
+ "top_k": 50,
206
+ "top_p": 1.0,
207
+ "torchscript": false,
208
+ "transformers_version": "4.42.0.dev0",
209
+ "typical_p": 1.0,
210
+ "use_bfloat16": false,
211
+ "use_cache": true,
212
+ "vocab_size": 32768
213
+ }
context_encoding_model/_tp0_bk4/command.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ neuronx-cc compile --framework=XLA model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb --output model.MODULE_d342327da795afc2aa68+5e8b788a.neff --target=trn1 --auto-cast=none --model-type=transformer '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ' --lnc=1 -O1 '--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true' --logfile=log-neuron-cc.txt --verbose=35
context_encoding_model/_tp0_bk4/compile_flags.MODULE_d342327da795afc2aa68+5e8b788a.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "--lnc=1", "-O1", "--internal-hlo2tensorizer-options= --modular-flow-mac-threshold=10 --verify-hlo=true", "--logfile=/models/mistral-7b-v0.3-instruct-neuronx/context_encoding_model/_tp0_bk4/log-neuron-cc.txt"]
context_encoding_model/_tp0_bk4/global_metric_store.json ADDED
@@ -0,0 +1,1079 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Average": {
3
+ "tensorizer": {
4
+ "StaticProfiler::AverageFractalPeUtilization": 99.88423156738281,
5
+ "StaticProfiler::AveragePartitionUtilization": 99.71043395996094,
6
+ "StaticProfiler::AveragePeUtilization": 99.53581237792969,
7
+ "StaticProfiler::LocalizationEfficiency": 41.61907196044922,
8
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 45.55835723876953,
9
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
10
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0
11
+ }
12
+ },
13
+ "Count": {
14
+ "tensorizer": {
15
+ "StaticProfiler::AverageFractalPeUtilization": 1.0,
16
+ "StaticProfiler::AveragePartitionUtilization": 1.0,
17
+ "StaticProfiler::AveragePeUtilization": 1.0,
18
+ "StaticProfiler::LocalizationEfficiency": 1.0,
19
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 1.0,
20
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 1.0,
21
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 1.0
22
+ }
23
+ },
24
+ "Sum": {
25
+ "compiletime": {
26
+ "AGOrderingAnalysisPass": 0.06499266624450684,
27
+ "AffinePredicateResolution": 0.00141143798828125,
28
+ "AliasDependencyElimination": 0.00017595291137695313,
29
+ "AliasDependencyInduction": 0.006516218185424805,
30
+ "AliasDependencyReset": 0.02024674415588379,
31
+ "BFComputeCutting": 0.0023620128631591797,
32
+ "BirCodeGenLoop": 0.13731598854064941,
33
+ "CCOpFusion": 0.02620387077331543,
34
+ "CanonicalizeConv": 6.0999998822808266e-05,
35
+ "CanonicalizeDAGForPGTiling": 0.0074574947357177734,
36
+ "CanonicalizeForTensorizer": 4.999999509891495e-05,
37
+ "CanonicalizeIR": 0.0019347667694091797,
38
+ "Canonicalizer": 0.0009759999811649323,
39
+ "CoalesceCCOp": 0.005659818649291992,
40
+ "CommuteConcat": 0.0009889602661132813,
41
+ "DMALocalityOpt": 0.0024099349975585938,
42
+ "DMAProfiler": 0.008657455444335938,
43
+ "DMATilingProfiler": 0.04570889472961426,
44
+ "DataLocalityOpt": 0.1127479076385498,
45
+ "DataStreaming": 0.007959365844726563,
46
+ "DeConcat": 0.0007421970367431641,
47
+ "DeadCodeElimination": 0.002073049545288086,
48
+ "DeadStoreElimination": 0.006093263626098633,
49
+ "DelinearIndices": 0.010124444961547852,
50
+ "Delinearization": 0.005106449127197266,
51
+ "DoNothing": 0.0003044605255126953,
52
+ "DramToDramTranspose": 0.03771638870239258,
53
+ "DumpGraphAndMetadata": 0.05296611785888672,
54
+ "EliminateDivs": 0.0021944046020507813,
55
+ "ExpandBatchNorm": 0.0015587806701660156,
56
+ "ExpandISAMacro": 0.005168437957763672,
57
+ "FactorizeBlkDims": 0.011832475662231445,
58
+ "FactorizeThreadAxesInFreeDims": 0.0014889240264892578,
59
+ "FlattenMacroLoop": 0.0025510787963867188,
60
+ "GenericAccessSimplifier": 0.0009717941284179688,
61
+ "HoistCompute": 1.3999999282532372e-05,
62
+ "IdentifyCrossPassTensors": 4.099999932805076e-05,
63
+ "InferInitValue": 0.030786514282226563,
64
+ "InferIntrinsicOnCC": 0.012189865112304688,
65
+ "InferNeuronTensor": 0.0819096565246582,
66
+ "InferNonlocalTensors": 0.025629520416259766,
67
+ "InferPSumTensor": 0.08477997779846191,
68
+ "InlineNativeKernels": 0.003083944320678711,
69
+ "InsertIOTransposes": 0.02764296531677246,
70
+ "InsertLocalTransposes": 0.0040624141693115234,
71
+ "InsertOffloadedTransposes": 0.005682229995727539,
72
+ "LICM": 0.003050565719604492,
73
+ "LateLegalizeInst": 0.02321314811706543,
74
+ "LateLegalizePostSplit": 0.004519462585449219,
75
+ "LateLowerReshapeOp": 0.0023851394653320313,
76
+ "LateLowerTensorOp": 0.0016567707061767578,
77
+ "LateNeuronInstComb": 0.011125564575195313,
78
+ "LayoutPreprocessing": 0.06753706932067871,
79
+ "LayoutPreprocessingAndAnalysis": 0.16236424446105957,
80
+ "LayoutRequirementAnalysis": 0.005420684814453125,
81
+ "LegalizeCCOpLayout": 0.0023717880249023438,
82
+ "LegalizeOpLevelAlias": 0.0012898445129394531,
83
+ "LegalizePartitionReduce": 0.0011932849884033203,
84
+ "LegalizeSundaAccess": 0.026709556579589844,
85
+ "LegalizeSundaMacro": 0.012512683868408203,
86
+ "LegalizeType": 0.04736900329589844,
87
+ "LocalLayoutOpt": 0.0263979434967041,
88
+ "LoopFusion": 0.005193948745727539,
89
+ "LoopSplitting": 0.0005512237548828125,
90
+ "LowerBroadcast": 0.04221224784851074,
91
+ "LowerCCOpBlockAxis": 0.008313655853271484,
92
+ "LowerComplexBroadcast": 0.0025756359100341797,
93
+ "LowerIntrinsics": 0.11752676963806152,
94
+ "LowerTensorOp": 0.010608196258544922,
95
+ "LowerTranspose": 0.08257818222045898,
96
+ "MacroGeneration": 0.07271862030029297,
97
+ "MaskPropagation": 0.005186557769775391,
98
+ "MemcastMotion": 2.2000000171829015e-05,
99
+ "MemcpyElimination": 0.026259899139404297,
100
+ "MutateDataType": 0.0013203620910644531,
101
+ "NeuronAliasDependencyInduction": 0.0002338886260986328,
102
+ "NeuronAliasDependencyReset": 0.029464006423950195,
103
+ "NeuronInstComb": 0.004740476608276367,
104
+ "NeuronLICM": 0.01508331298828125,
105
+ "NeuronLoopFusion": 0.00891876220703125,
106
+ "NeuronLoopInterchange": 0.0014586448669433594,
107
+ "NeuronSimplifier": 0.009086847305297852,
108
+ "NeuronSimplifyPredicates": 0.006235837936401367,
109
+ "NeuronValueNumbering": 0.0030777454376220703,
110
+ "OptimizeAliasedCopyChain": 0.0006422996520996094,
111
+ "OptimizeNKIKernels": 0.5174376964569092,
112
+ "PAGLayoutOpt": 0.12734031677246094,
113
+ "PComputeCutting": 0.005000591278076172,
114
+ "PGLayoutTilingPipeline": 0.8229436874389648,
115
+ "PGTiling": 0.26772499084472656,
116
+ "PadElimination": 0.0005135536193847656,
117
+ "ParAxesAnnotation": 0.07412934303283691,
118
+ "PartialLoopFusion": 0.013575553894042969,
119
+ "PartialSimdFusion": 0.011231422424316406,
120
+ "PenguinizeFunctions": 4.099999932805076e-05,
121
+ "PerfectLoopNest": 0.0019729137420654297,
122
+ "PruneFunctions": 2.5000001187436283e-05,
123
+ "RecognizeOpIdiom": 0.0038080215454101563,
124
+ "Recompute": 0.00034308433532714844,
125
+ "RelaxPredicates": 0.004430532455444336,
126
+ "Rematerialization": 0.002201557159423828,
127
+ "RemoveOptimizationBarriers": 7.999999797903001e-05,
128
+ "ReshapeWeights": 0.0009114742279052734,
129
+ "ResolveAccessConflict": 0.027348041534423828,
130
+ "ResolveComplicatePredicates": 0.0011477470397949219,
131
+ "RewriteReplicationMatmul": 0.0025103092193603516,
132
+ "RewriteWeights": 0.0029447078704833984,
133
+ "SFKVectorizer": 0.19977569580078125,
134
+ "ScatterMotion": 3.400000059627928e-05,
135
+ "SimpleAllReduceTiling": 0.0034945011138916016,
136
+ "Simplifier": 0.003106832504272461,
137
+ "SimplifyMacroPredicates": 0.03599357604980469,
138
+ "SimplifyNeuronTensor": 0.18126153945922852,
139
+ "SimplifySlice": 0.0016787052154541016,
140
+ "SimplifyTensor": 0.04330563545227051,
141
+ "SpillPSum": 0.06513023376464844,
142
+ "SplitAPUnionSets": 0.012967586517333984,
143
+ "SplitAccGrp": 0.0015358924865722656,
144
+ "StaticProfiler": 0.00551915168762207,
145
+ "StaticTransposeLocalTensor": 0.004834890365600586,
146
+ "SundaISel": 0.0945746898651123,
147
+ "TCTransform": 0.0009295940399169922,
148
+ "TensorInitialization": 0.006634950637817383,
149
+ "TensorOpSimplifier": 0.005204439163208008,
150
+ "TensorOpTransform": 0.02082967758178711,
151
+ "TensorizerLegalizationPass": 5.0000002374872565e-05,
152
+ "TileCCOps": 0.006725311279296875,
153
+ "TilingProfiler": 0.016322612762451172,
154
+ "TransformConvOp": 0.0029544830322265625,
155
+ "TritiumFusion": 0.09467315673828125,
156
+ "ValueNumbering": 0.0020852088928222656,
157
+ "VectorizeDMA": 0.0017535686492919922,
158
+ "VectorizeMatMult": 0.008865118026733398,
159
+ "VerifySupportedOps": 3.300000025774352e-05,
160
+ "WeightCoalescing": 0.003345489501953125,
161
+ "ZeroSizeTensorElimination": 0.00018644332885742188,
162
+ "algsimp": 0.0030140001326799393,
163
+ "batchnorm_expander": 4.400000034365803e-05,
164
+ "boundary-marker-removal": 1.1000000085914508e-05,
165
+ "call-inliner": 0.0004670000053010881,
166
+ "canonicalize-boundary-marker": 1.4999999621068127e-05,
167
+ "collective-stream-id-checker": 0.00010199999815085903,
168
+ "comparison-expander": 0.0005569999921135604,
169
+ "computation-deduplicator": 6.500000017695129e-05,
170
+ "conditional-to-select": 1.700000029813964e-05,
171
+ "config-lowering": 0.0001630000042496249,
172
+ "constant-statistics": 0.0005039999959990382,
173
+ "constant_folding": 0.0002969999914057553,
174
+ "cse": 6.0999998822808266e-05,
175
+ "dce": 8.600000001024455e-05,
176
+ "dot_decomposer": 0.001433999976143241,
177
+ "dynamic-slice-transpose": 1.2999999853491317e-05,
178
+ "eliminate-redundant-compare": 0.0002640000020619482,
179
+ "emit-offloaded-dropout": 6.500000017695129e-05,
180
+ "flatten-call-graph": 0.0007960000075399876,
181
+ "fuse-send-recv": 7.000000186963007e-05,
182
+ "hilo::LegalizeAlias": 1.4999999621068127e-05,
183
+ "hilo::NeuronInstCombine": 0.00012399999832268804,
184
+ "hilo::NeuronOpFusion": 6.399999983841553e-05,
185
+ "hilo::ReplaceTokenTypeWithU8Pass": 4.5000000682193786e-05,
186
+ "hilo::ScheduleFusion": 1.300000076298602e-05,
187
+ "hilo::SixtyFourHack": 6.800000119255856e-05,
188
+ "hilo::VerifyAliasing": 6.000000212225132e-06,
189
+ "hlo-mac-count": 0.0012410000199452043,
190
+ "hlo-verifier": 0.010365999303758144,
191
+ "instruction-histogram": 0.0010479999473318458,
192
+ "io-con-pipe-begin": 7.999999979801942e-06,
193
+ "io-con-pipe-end": 9.999999974752427e-07,
194
+ "io-layout-normalization": 0.0016609999584034085,
195
+ "io-statistics": 0.0001049999991664663,
196
+ "legalize-ccops": 4.999999873689376e-06,
197
+ "legalize-compare": 1.1000000085914508e-05,
198
+ "lower-argminmax-custom-call": 9.999999747378752e-06,
199
+ "map-inline": 0.0009129999671131372,
200
+ "metadata-naming": 5.400000372901559e-05,
201
+ "mlir::detail::OpToOpPassAdaptor": 8.199999865610152e-05,
202
+ "mlir::hlo::MhloToPyPenguin": 0.07495799660682678,
203
+ "mlir::mhlo::LowerComplexExtraPass": 0.00035899996873922646,
204
+ "mlir::mhlo::LowerComplexPass": 0.0005389999714680016,
205
+ "native-to-custom-softmax": 0.000842000066768378,
206
+ "native-to-custom-softmax-dx": 0.0008819999638944864,
207
+ "operand_upcaster": 6.800000119255856e-05,
208
+ "opt-barrier-removal": 0.0005799999926239252,
209
+ "post-par-pipe-begin": 1.700000029813964e-05,
210
+ "post-par-pipe-end": 0.0,
211
+ "post-partition-simplification": 0.0018259999342262745,
212
+ "pre-par-pipe-begin": 9.999999974752427e-07,
213
+ "pre-par-pipe-end": 0.0,
214
+ "pre-partition-simplification": 0.2598330080509186,
215
+ "replace-minimum-constant": 0.0004039999912492931,
216
+ "reshape-mover": 0.00012399999832268804,
217
+ "simplify-concat": 0.0001630000042496249,
218
+ "simplify-while-loops": 0.00010000000474974513,
219
+ "transform-variadic-reduce": 0.0001939999929163605,
220
+ "tuple-simplifier": 0.0003140000335406512,
221
+ "unpack-nested-aws-ntwsr": 0.0003929999948013574,
222
+ "unroll-while-loop": 1.8000000636675395e-05,
223
+ "zero_sized_hlo_elimination": 0.0009759999811649323
224
+ },
225
+ "hilo": {
226
+ "ConstantSize": 4203477.0,
227
+ "HloInputCount": 359.0,
228
+ "HloMacCount": 481103446016.0,
229
+ "HloOutputCount": 65.0,
230
+ "IfmapSize": 7785177088.0,
231
+ "OfmapSize": 536870912.0,
232
+ "OutputsReadFromCount": 0.0,
233
+ "PassthroughTensorsCount": 0.0,
234
+ "RedundantOutputCount": 0.0,
235
+ "Traffic": 975382912.0
236
+ },
237
+ "tensorizer": {
238
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 28921.0,
239
+ "StaticProfiler::AifUb": 1080.6693115234375,
240
+ "StaticProfiler::ArithmeticIntensityTensorizer": 449.7645263671875,
241
+ "StaticProfiler::AverageDmaLength": 1323.6162109375,
242
+ "StaticProfiler::DDRTransferBytes": 826525760.0,
243
+ "StaticProfiler::InternalTransferBytes": 96576528.0,
244
+ "StaticProfiler::LoadExpanded": 619540.0,
245
+ "StaticProfiler::StoreExpanded": 12842.0,
246
+ "StaticProfiler::TotalDMAExpanded": 632382.0,
247
+ "StaticProfiler::TotalDynamicInstancesCount": 34834.0,
248
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 34738.0,
249
+ "StaticProfiler::TotalLNCComm": 0.0,
250
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
251
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
252
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
253
+ "TilingProfiler::GenericInstructionsAfterTiling": 4.0,
254
+ "TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
255
+ "TilingProfiler::NumPfTransposes": 4.0,
256
+ "TilingProfiler::NumPfTransposesForIo": 0.0,
257
+ "TilingProfiler::NumPfTransposesForLocal": 1.0,
258
+ "TilingProfiler::NumPfTransposesForNonlocal": 3.0,
259
+ "TilingProfiler::PfTransposeInstructions": 1537.0,
260
+ "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
261
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
262
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 1536.0,
263
+ "TilingProfiler::ReduceInstructionsAfterTiling": 10.0,
264
+ "TilingProfiler::SimdInstructionsAfterTiling": 626.0,
265
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
266
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
267
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
268
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
269
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
270
+ "TransformConvOp::conv2d_column_packing": 0.0,
271
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
272
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
273
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
274
+ }
275
+ },
276
+ "all": {
277
+ "compiletime": {
278
+ "algsimp": 0.00279300007969141,
279
+ "call-inliner": 0.00043799998820759356,
280
+ "collective-stream-id-checker": 8.600000001024455e-05,
281
+ "comparison-expander": 0.0005419999943114817,
282
+ "constant-statistics": 0.0005039999959990382,
283
+ "constant_folding": 0.0002699999895412475,
284
+ "dce": 8.099999831756577e-05,
285
+ "dot_decomposer": 0.001433999976143241,
286
+ "eliminate-redundant-compare": 0.00025400001322850585,
287
+ "flatten-call-graph": 0.0007660000119358301,
288
+ "hlo-mac-count": 0.0009599999757483602,
289
+ "hlo-verifier": 0.009782999753952026,
290
+ "instruction-histogram": 0.0010479999473318458,
291
+ "io-con-pipe-begin": 7.999999979801942e-06,
292
+ "io-con-pipe-end": 9.999999974752427e-07,
293
+ "io-layout-normalization": 0.0016609999584034085,
294
+ "io-statistics": 0.0001049999991664663,
295
+ "map-inline": 0.0008759999764151871,
296
+ "native-to-custom-softmax": 0.0008140000281855464,
297
+ "native-to-custom-softmax-dx": 0.0007149999728426337,
298
+ "opt-barrier-removal": 0.0005799999926239252,
299
+ "pre-par-pipe-begin": 9.999999974752427e-07,
300
+ "pre-par-pipe-end": 0.0,
301
+ "pre-partition-simplification": 0.2598330080509186,
302
+ "replace-minimum-constant": 0.0003769999893847853,
303
+ "reshape-mover": 0.00011300000187475234,
304
+ "simplify-while-loops": 9.300000237999484e-05,
305
+ "tuple-simplifier": 0.0003000000142492354,
306
+ "unpack-nested-aws-ntwsr": 0.0003809999907389283,
307
+ "unroll-while-loop": 1.8000000636675395e-05,
308
+ "zero_sized_hlo_elimination": 0.0009759999811649323
309
+ }
310
+ },
311
+ "cumsum": {
312
+ "compiletime": {
313
+ "CoalesceCCOp": 0.00027561187744140625,
314
+ "DMALocalityOpt": 0.0002129077911376953,
315
+ "DMAProfiler": 0.0009992122650146484,
316
+ "DataStreaming": 0.0003039836883544922,
317
+ "DoNothing": 0.0001742839813232422,
318
+ "ExpandISAMacro": 0.0005218982696533203,
319
+ "FactorizeBlkDims": 0.0004630088806152344,
320
+ "InferPSumTensor": 0.0004932880401611328,
321
+ "LateLegalizeInst": 0.0005190372467041016,
322
+ "LateNeuronInstComb": 0.0005123615264892578,
323
+ "LegalizeSundaAccess": 0.0015988349914550781,
324
+ "LegalizeType": 0.00028014183044433594,
325
+ "LowerBroadcast": 0.00025653839111328125,
326
+ "LowerIntrinsics": 0.0002598762512207031,
327
+ "LowerTranspose": 0.00026535987854003906,
328
+ "NeuronInstComb": 0.0005023479461669922,
329
+ "NeuronLICM": 0.00043654441833496094,
330
+ "NeuronSimplifyPredicates": 0.0028448104858398438,
331
+ "NeuronValueNumbering": 0.0004410743713378906,
332
+ "SFKVectorizer": 0.0033159255981445313,
333
+ "SimpleAllReduceTiling": 0.00028634071350097656,
334
+ "SimplifyNeuronTensor": 0.0004749298095703125,
335
+ "SpillPSum": 0.0005846023559570313,
336
+ "WeightCoalescing": 0.00024771690368652344
337
+ }
338
+ },
339
+ "sg00": {
340
+ "compiletime": {
341
+ "CanonicalizeConv": 4.3000000005122274e-05,
342
+ "CanonicalizeForTensorizer": 1.4999999621068127e-05,
343
+ "Canonicalizer": 0.00034500000765547156,
344
+ "HoistCompute": 1.9999999949504854e-06,
345
+ "IdentifyCrossPassTensors": 1.2999999853491317e-05,
346
+ "MemcastMotion": 1.2999999853491317e-05,
347
+ "PenguinizeFunctions": 1.2999999853491317e-05,
348
+ "PruneFunctions": 1.2000000424450263e-05,
349
+ "RemoveOptimizationBarriers": 1.2999999853491317e-05,
350
+ "ScatterMotion": 2.9000000722589903e-05,
351
+ "TensorizerLegalizationPass": 2.099999983329326e-05,
352
+ "VerifySupportedOps": 9.999999747378752e-06,
353
+ "algsimp": 7.699999696342275e-05,
354
+ "batchnorm_expander": 1.4000000192027073e-05,
355
+ "boundary-marker-removal": 3.000000106112566e-06,
356
+ "call-inliner": 9.000000318337698e-06,
357
+ "canonicalize-boundary-marker": 4.999999873689376e-06,
358
+ "collective-stream-id-checker": 3.999999989900971e-06,
359
+ "comparison-expander": 3.999999989900971e-06,
360
+ "computation-deduplicator": 1.8000000636675395e-05,
361
+ "conditional-to-select": 4.999999873689376e-06,
362
+ "config-lowering": 5.6000000768108293e-05,
363
+ "constant_folding": 7.999999979801942e-06,
364
+ "cse": 2.499999936844688e-05,
365
+ "dce": 1.9999999949504854e-06,
366
+ "dynamic-slice-transpose": 3.999999989900971e-06,
367
+ "eliminate-redundant-compare": 3.999999989900971e-06,
368
+ "emit-offloaded-dropout": 2.099999983329326e-05,
369
+ "flatten-call-graph": 7.999999979801942e-06,
370
+ "fuse-send-recv": 2.300000051036477e-05,
371
+ "hilo::LegalizeAlias": 4.999999873689376e-06,
372
+ "hilo::NeuronInstCombine": 2.9999999242136255e-05,
373
+ "hilo::NeuronOpFusion": 1.8000000636675395e-05,
374
+ "hilo::ReplaceTokenTypeWithU8Pass": 4.999999873689376e-06,
375
+ "hilo::ScheduleFusion": 9.999999974752427e-07,
376
+ "hilo::SixtyFourHack": 1.1000000085914508e-05,
377
+ "hilo::VerifyAliasing": 1.9999999949504854e-06,
378
+ "hlo-mac-count": 3.7000001611886546e-05,
379
+ "hlo-verifier": 0.00017499999376013875,
380
+ "legalize-ccops": 1.9999999949504854e-06,
381
+ "legalize-compare": 3.999999989900971e-06,
382
+ "lower-argminmax-custom-call": 3.000000106112566e-06,
383
+ "map-inline": 1.1000000085914508e-05,
384
+ "metadata-naming": 1.700000029813964e-05,
385
+ "mlir::detail::OpToOpPassAdaptor": 3.300000025774352e-05,
386
+ "mlir::hlo::MhloToPyPenguin": 0.03136799857020378,
387
+ "mlir::mhlo::LowerComplexExtraPass": 9.899999713525176e-05,
388
+ "mlir::mhlo::LowerComplexPass": 0.00019999999494757503,
389
+ "native-to-custom-softmax": 7.999999979801942e-06,
390
+ "native-to-custom-softmax-dx": 9.300000237999484e-05,
391
+ "operand_upcaster": 2.700000004551839e-05,
392
+ "post-par-pipe-begin": 1.9999999949504854e-06,
393
+ "post-par-pipe-end": 0.0,
394
+ "post-partition-simplification": 0.0006479999865405262,
395
+ "replace-minimum-constant": 9.000000318337698e-06,
396
+ "reshape-mover": 3.999999989900971e-06,
397
+ "simplify-concat": 5.900000178371556e-05,
398
+ "simplify-while-loops": 1.9999999949504854e-06,
399
+ "transform-variadic-reduce": 9.000000318337698e-06,
400
+ "tuple-simplifier": 3.999999989900971e-06,
401
+ "unpack-nested-aws-ntwsr": 3.000000106112566e-06,
402
+ "unroll-while-loop": 0.0
403
+ },
404
+ "hilo": {
405
+ "ArithmeticIntensity": 439.27252197265625,
406
+ "ConstantSize": 4203477.0,
407
+ "HloInputCount": 359.0,
408
+ "HloMacCount": 60129542144.0,
409
+ "HloOutputCount": 65.0,
410
+ "IfmapSize": 7785177088.0,
411
+ "OfmapSize": 536870912.0,
412
+ "OutputsReadFromCount": 0.0,
413
+ "PassthroughTensorsCount": 0.0,
414
+ "RedundantOutputCount": 0.0,
415
+ "Traffic": 273768736.0
416
+ }
417
+ },
418
+ "sg0000": {
419
+ "compiletime": {
420
+ "AGOrderingAnalysisPass": 0.13596534729003906,
421
+ "AffinePredicateResolution": 0.0015311241149902344,
422
+ "AliasDependencyElimination": 0.0001938343048095703,
423
+ "AliasDependencyInduction": 0.007838010787963867,
424
+ "AliasDependencyReset": 0.15939617156982422,
425
+ "BFComputeCutting": 0.006036996841430664,
426
+ "BirCodeGenLoop": 0.38369321823120117,
427
+ "CCOpFusion": 0.15093040466308594,
428
+ "CanonicalizeDAGForPGTiling": 0.014190196990966797,
429
+ "CanonicalizeIR": 0.0019371509552001953,
430
+ "CoalesceCCOp": 0.0029022693634033203,
431
+ "CommuteConcat": 0.0010671615600585938,
432
+ "DMALocalityOpt": 0.0018265247344970703,
433
+ "DMAProfiler": 0.006582021713256836,
434
+ "DMATilingProfiler": 0.005391597747802734,
435
+ "DataLocalityOpt": 0.20601868629455566,
436
+ "DataStreaming": 0.00843048095703125,
437
+ "DeConcat": 0.0018315315246582031,
438
+ "DeadCodeElimination": 0.0020117759704589844,
439
+ "DeadStoreElimination": 0.027777433395385742,
440
+ "DelinearIndices": 0.029506444931030273,
441
+ "Delinearization": 0.003535747528076172,
442
+ "DoNothing": 0.0001571178436279297,
443
+ "DramToDramTranspose": 0.07804989814758301,
444
+ "DumpGraphAndMetadata": 0.04837989807128906,
445
+ "EliminateDivs": 0.0034132003784179688,
446
+ "ExpandBatchNorm": 0.0020427703857421875,
447
+ "ExpandISAMacro": 0.0035333633422851563,
448
+ "FactorizeBlkDims": 0.06211543083190918,
449
+ "FactorizeThreadAxesInFreeDims": 0.0018017292022705078,
450
+ "FlattenMacroLoop": 0.005364418029785156,
451
+ "GenericAccessSimplifier": 0.0018382072448730469,
452
+ "InferInitValue": 0.04181218147277832,
453
+ "InferIntrinsicOnCC": 0.05515456199645996,
454
+ "InferNeuronTensor": 0.08455061912536621,
455
+ "InferNonlocalTensors": 0.3793964385986328,
456
+ "InferPSumTensor": 0.06014227867126465,
457
+ "InlineNativeKernels": 0.0018780231475830078,
458
+ "InsertIOTransposes": 0.05663871765136719,
459
+ "InsertLocalTransposes": 0.013693094253540039,
460
+ "InsertOffloadedTransposes": 0.003034353256225586,
461
+ "LICM": 0.0034589767456054688,
462
+ "LateLegalizeInst": 0.01206350326538086,
463
+ "LateLegalizePostSplit": 0.004300355911254883,
464
+ "LateLowerReshapeOp": 0.001447439193725586,
465
+ "LateLowerTensorOp": 0.005361080169677734,
466
+ "LateNeuronInstComb": 0.028362512588500977,
467
+ "LayoutPreprocessing": 0.17102479934692383,
468
+ "LayoutPreprocessingAndAnalysis": 0.20053863525390625,
469
+ "LayoutRequirementAnalysis": 0.00810098648071289,
470
+ "LegalizeCCOpLayout": 0.002534151077270508,
471
+ "LegalizeOpLevelAlias": 0.0013082027435302734,
472
+ "LegalizePartitionReduce": 0.0018541812896728516,
473
+ "LegalizeSundaAccess": 0.06417489051818848,
474
+ "LegalizeSundaMacro": 0.011395931243896484,
475
+ "LegalizeType": 0.004536867141723633,
476
+ "LocalLayoutOpt": 0.019284486770629883,
477
+ "LoopFusion": 0.005501747131347656,
478
+ "LoopSplitting": 0.0007183551788330078,
479
+ "LowerBroadcast": 0.0020034313201904297,
480
+ "LowerCCOpBlockAxis": 0.006723642349243164,
481
+ "LowerComplexBroadcast": 0.0025110244750976563,
482
+ "LowerIntrinsics": 0.04395008087158203,
483
+ "LowerTensorOp": 0.01201629638671875,
484
+ "LowerTranspose": 0.015764951705932617,
485
+ "MacroGeneration": 0.1732039451599121,
486
+ "MaskPropagation": 0.006498575210571289,
487
+ "MemcpyElimination": 0.13526344299316406,
488
+ "MutateDataType": 0.0024404525756835938,
489
+ "NeuronAliasDependencyInduction": 0.00028133392333984375,
490
+ "NeuronAliasDependencyReset": 0.027801036834716797,
491
+ "NeuronInstComb": 0.014089107513427734,
492
+ "NeuronLICM": 0.011513233184814453,
493
+ "NeuronLoopFusion": 0.018094778060913086,
494
+ "NeuronLoopInterchange": 0.002248525619506836,
495
+ "NeuronSimplifier": 0.014221668243408203,
496
+ "NeuronSimplifyPredicates": 0.04183816909790039,
497
+ "NeuronValueNumbering": 0.010004520416259766,
498
+ "OptimizeAliasedCopyChain": 0.0007202625274658203,
499
+ "OptimizeNKIKernels": 0.0027985572814941406,
500
+ "PAGLayoutOpt": 0.6076157093048096,
501
+ "PComputeCutting": 0.01562190055847168,
502
+ "PGLayoutTilingPipeline": 1.8925251960754395,
503
+ "PGTiling": 0.4175417423248291,
504
+ "PadElimination": 0.0005469322204589844,
505
+ "ParAxesAnnotation": 0.5765500068664551,
506
+ "PartialLoopFusion": 0.06665897369384766,
507
+ "PartialSimdFusion": 0.06845211982727051,
508
+ "PerfectLoopNest": 0.002520322799682617,
509
+ "RecognizeOpIdiom": 0.0038416385650634766,
510
+ "Recompute": 0.00042510032653808594,
511
+ "RelaxPredicates": 0.004330158233642578,
512
+ "Rematerialization": 0.0048253536224365234,
513
+ "ReshapeWeights": 0.0009126663208007813,
514
+ "ResolveAccessConflict": 0.007032871246337891,
515
+ "ResolveComplicatePredicates": 0.0016722679138183594,
516
+ "RewriteReplicationMatmul": 0.0017805099487304688,
517
+ "RewriteWeights": 0.00464630126953125,
518
+ "SFKVectorizer": 0.6191775798797607,
519
+ "SimpleAllReduceTiling": 0.0028734207153320313,
520
+ "Simplifier": 0.04510617256164551,
521
+ "SimplifyMacroPredicates": 0.03182697296142578,
522
+ "SimplifyNeuronTensor": 0.018846511840820313,
523
+ "SimplifySlice": 0.0010728836059570313,
524
+ "SimplifyTensor": 0.00718235969543457,
525
+ "SpillPSum": 0.02726292610168457,
526
+ "SplitAPUnionSets": 0.20770835876464844,
527
+ "SplitAccGrp": 0.0018444061279296875,
528
+ "StaticProfiler": 0.009473562240600586,
529
+ "StaticTransposeLocalTensor": 0.0051805973052978516,
530
+ "SundaISel": 0.0508725643157959,
531
+ "TCTransform": 0.0011992454528808594,
532
+ "TensorInitialization": 0.02745676040649414,
533
+ "TensorOpSimplifier": 0.006608009338378906,
534
+ "TensorOpTransform": 0.026006698608398438,
535
+ "TileCCOps": 0.008507728576660156,
536
+ "TilingProfiler": 0.015691757202148438,
537
+ "TransformConvOp": 0.002533435821533203,
538
+ "TritiumFusion": 0.1403183937072754,
539
+ "ValueNumbering": 0.0023522377014160156,
540
+ "VectorizeDMA": 0.006667613983154297,
541
+ "VectorizeMatMult": 0.025510072708129883,
542
+ "WeightCoalescing": 0.002580881118774414,
543
+ "ZeroSizeTensorElimination": 0.0002429485321044922
544
+ },
545
+ "tensorizer": {
546
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 15146.0,
547
+ "StaticProfiler::AifUb": 590.4973754882813,
548
+ "StaticProfiler::ArithmeticIntensityTensorizer": 748.2540283203125,
549
+ "StaticProfiler::AverageDmaLength": 2622.051025390625,
550
+ "StaticProfiler::AverageFractalPeUtilization": 99.97018432617188,
551
+ "StaticProfiler::AveragePartitionUtilization": 99.92617797851563,
552
+ "StaticProfiler::AveragePeUtilization": 99.87796020507813,
553
+ "StaticProfiler::DDRTransferBytes": 196215040.0,
554
+ "StaticProfiler::InternalTransferBytes": 332922880.0,
555
+ "StaticProfiler::LoadExpanded": 37252.0,
556
+ "StaticProfiler::LocalizationEfficiency": 126.71589660644531,
557
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 180.83277893066406,
558
+ "StaticProfiler::StoreExpanded": 16897.0,
559
+ "StaticProfiler::TotalDMAExpanded": 54149.0,
560
+ "StaticProfiler::TotalDynamicInstancesCount": 23848.0,
561
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 23836.0,
562
+ "StaticProfiler::TotalLNCComm": 0.0,
563
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
564
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
565
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
566
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
567
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
568
+ "TilingProfiler::GenericInstructionsAfterTiling": 192.0,
569
+ "TilingProfiler::MatMultInstructionsAfterTiling": 7184.0,
570
+ "TilingProfiler::NumPfTransposes": 8.0,
571
+ "TilingProfiler::NumPfTransposesForIo": 0.0,
572
+ "TilingProfiler::NumPfTransposesForLocal": 6.0,
573
+ "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
574
+ "TilingProfiler::PfTransposeInstructions": 5568.0,
575
+ "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
576
+ "TilingProfiler::PfTransposeInstructionsForLocal": 4800.0,
577
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 768.0,
578
+ "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
579
+ "TilingProfiler::SimdInstructionsAfterTiling": 1764.0,
580
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
581
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
582
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
583
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
584
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
585
+ "TransformConvOp::conv2d_column_packing": 0.0,
586
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
587
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
588
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
589
+ }
590
+ },
591
+ "sg0001": {
592
+ "compiletime": {
593
+ "AGOrderingAnalysisPass": 0.1609482765197754,
594
+ "AffinePredicateResolution": 0.0013859272003173828,
595
+ "AliasDependencyElimination": 0.00018930435180664063,
596
+ "AliasDependencyInduction": 0.01599717140197754,
597
+ "AliasDependencyReset": 0.031088829040527344,
598
+ "BFComputeCutting": 0.004532814025878906,
599
+ "BirCodeGenLoop": 0.21514463424682617,
600
+ "CCOpFusion": 0.2648317813873291,
601
+ "CanonicalizeDAGForPGTiling": 0.003528594970703125,
602
+ "CanonicalizeIR": 0.0019960403442382813,
603
+ "CoalesceCCOp": 0.00243377685546875,
604
+ "CommuteConcat": 0.0011005401611328125,
605
+ "DMALocalityOpt": 0.0013654232025146484,
606
+ "DMAProfiler": 0.008886098861694336,
607
+ "DMATilingProfiler": 0.005682706832885742,
608
+ "DataLocalityOpt": 0.2774043083190918,
609
+ "DataStreaming": 0.007985115051269531,
610
+ "DeConcat": 0.001863241195678711,
611
+ "DeadCodeElimination": 0.0013644695281982422,
612
+ "DeadStoreElimination": 0.07262182235717773,
613
+ "DelinearIndices": 0.047678232192993164,
614
+ "Delinearization": 0.004866838455200195,
615
+ "DoNothing": 0.00013303756713867188,
616
+ "DramToDramTranspose": 0.0971534252166748,
617
+ "DumpGraphAndMetadata": 0.013672351837158203,
618
+ "EliminateDivs": 0.003657102584838867,
619
+ "ExpandBatchNorm": 0.0016169548034667969,
620
+ "ExpandISAMacro": 0.0034465789794921875,
621
+ "FactorizeBlkDims": 0.060559749603271484,
622
+ "FactorizeThreadAxesInFreeDims": 0.0021708011627197266,
623
+ "FlattenMacroLoop": 0.004648447036743164,
624
+ "GenericAccessSimplifier": 0.000980377197265625,
625
+ "InferInitValue": 0.05812406539916992,
626
+ "InferIntrinsicOnCC": 0.010819196701049805,
627
+ "InferNeuronTensor": 0.14679336547851563,
628
+ "InferNonlocalTensors": 0.034285783767700195,
629
+ "InferPSumTensor": 0.09114336967468262,
630
+ "InlineNativeKernels": 0.0017209053039550781,
631
+ "InsertIOTransposes": 0.0731968879699707,
632
+ "InsertLocalTransposes": 0.0275421142578125,
633
+ "InsertOffloadedTransposes": 0.007097005844116211,
634
+ "LICM": 0.0033905506134033203,
635
+ "LateLegalizeInst": 0.006936788558959961,
636
+ "LateLegalizePostSplit": 0.003220081329345703,
637
+ "LateLowerReshapeOp": 0.0016317367553710938,
638
+ "LateLowerTensorOp": 0.005948543548583984,
639
+ "LateNeuronInstComb": 0.018251657485961914,
640
+ "LayoutPreprocessing": 0.09319257736206055,
641
+ "LayoutPreprocessingAndAnalysis": 0.11977434158325195,
642
+ "LayoutRequirementAnalysis": 0.009629964828491211,
643
+ "LegalizeCCOpLayout": 0.0020868778228759766,
644
+ "LegalizeOpLevelAlias": 0.0011761188507080078,
645
+ "LegalizePartitionReduce": 0.001623392105102539,
646
+ "LegalizeSundaAccess": 0.021021366119384766,
647
+ "LegalizeSundaMacro": 0.012225627899169922,
648
+ "LegalizeType": 0.02536749839782715,
649
+ "LocalLayoutOpt": 0.04628801345825195,
650
+ "LoopFusion": 0.005954742431640625,
651
+ "LoopSplitting": 0.0006933212280273438,
652
+ "LowerBroadcast": 0.0018084049224853516,
653
+ "LowerCCOpBlockAxis": 0.006256580352783203,
654
+ "LowerComplexBroadcast": 0.002477884292602539,
655
+ "LowerIntrinsics": 0.03852725028991699,
656
+ "LowerTensorOp": 0.010782480239868164,
657
+ "LowerTranspose": 0.018457412719726563,
658
+ "MacroGeneration": 0.1307680606842041,
659
+ "MaskPropagation": 0.0035936832427978516,
660
+ "MemcpyElimination": 0.15900325775146484,
661
+ "MutateDataType": 0.001459360122680664,
662
+ "NeuronAliasDependencyInduction": 0.00030994415283203125,
663
+ "NeuronAliasDependencyReset": 0.0227048397064209,
664
+ "NeuronInstComb": 0.01124882698059082,
665
+ "NeuronLICM": 0.010287761688232422,
666
+ "NeuronLoopFusion": 0.06714057922363281,
667
+ "NeuronLoopInterchange": 0.0033617019653320313,
668
+ "NeuronSimplifier": 0.015295267105102539,
669
+ "NeuronSimplifyPredicates": 0.002671957015991211,
670
+ "NeuronValueNumbering": 0.004712104797363281,
671
+ "OptimizeAliasedCopyChain": 0.0008287429809570313,
672
+ "OptimizeNKIKernels": 0.0030798912048339844,
673
+ "PAGLayoutOpt": 0.4701688289642334,
674
+ "PComputeCutting": 0.008523941040039063,
675
+ "PGLayoutTilingPipeline": 1.527449607849121,
676
+ "PGTiling": 0.562786340713501,
677
+ "PadElimination": 0.0005154609680175781,
678
+ "ParAxesAnnotation": 0.4113032817840576,
679
+ "PartialLoopFusion": 0.03786206245422363,
680
+ "PartialSimdFusion": 0.09660077095031738,
681
+ "PerfectLoopNest": 0.0025701522827148438,
682
+ "RecognizeOpIdiom": 0.004408836364746094,
683
+ "Recompute": 0.0004405975341796875,
684
+ "RelaxPredicates": 0.004298210144042969,
685
+ "Rematerialization": 0.0020570755004882813,
686
+ "ReshapeWeights": 0.0008633136749267578,
687
+ "ResolveAccessConflict": 0.004068136215209961,
688
+ "ResolveComplicatePredicates": 0.0015447139739990234,
689
+ "RewriteReplicationMatmul": 0.0018274784088134766,
690
+ "RewriteWeights": 0.024018287658691406,
691
+ "SFKVectorizer": 0.5714495182037354,
692
+ "SimpleAllReduceTiling": 0.05605673789978027,
693
+ "Simplifier": 0.03458523750305176,
694
+ "SimplifyMacroPredicates": 0.007905960083007813,
695
+ "SimplifyNeuronTensor": 0.05205702781677246,
696
+ "SimplifySlice": 0.0012252330780029297,
697
+ "SimplifyTensor": 0.007117748260498047,
698
+ "SpillPSum": 0.0394134521484375,
699
+ "SplitAPUnionSets": 0.0830078125,
700
+ "SplitAccGrp": 0.0015587806701660156,
701
+ "StaticProfiler": 0.008753538131713867,
702
+ "StaticTransposeLocalTensor": 0.03607439994812012,
703
+ "SundaISel": 0.06672215461730957,
704
+ "TCTransform": 0.0011696815490722656,
705
+ "TensorInitialization": 0.006832122802734375,
706
+ "TensorOpSimplifier": 0.0061838626861572266,
707
+ "TensorOpTransform": 0.03341221809387207,
708
+ "TileCCOps": 0.00767970085144043,
709
+ "TilingProfiler": 0.07469630241394043,
710
+ "TransformConvOp": 0.00249481201171875,
711
+ "TritiumFusion": 0.3289809226989746,
712
+ "ValueNumbering": 0.0027396678924560547,
713
+ "VectorizeDMA": 0.0023260116577148438,
714
+ "VectorizeMatMult": 0.05879783630371094,
715
+ "WeightCoalescing": 0.002382993698120117,
716
+ "ZeroSizeTensorElimination": 0.0001971721649169922
717
+ },
718
+ "tensorizer": {
719
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 37569.0,
720
+ "StaticProfiler::AifUb": 1576.160400390625,
721
+ "StaticProfiler::ArithmeticIntensityTensorizer": 624.52294921875,
722
+ "StaticProfiler::AverageDmaLength": 1256.79248046875,
723
+ "StaticProfiler::AverageFractalPeUtilization": 100.0,
724
+ "StaticProfiler::AveragePartitionUtilization": 99.870361328125,
725
+ "StaticProfiler::AveragePeUtilization": 100.0,
726
+ "StaticProfiler::DDRTransferBytes": 818020352.0,
727
+ "StaticProfiler::InternalTransferBytes": 284688384.0,
728
+ "StaticProfiler::LoadExpanded": 616833.0,
729
+ "StaticProfiler::LocalizationEfficiency": 39.6230583190918,
730
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 43.16416549682617,
731
+ "StaticProfiler::StoreExpanded": 17409.0,
732
+ "StaticProfiler::TotalDMAExpanded": 634242.0,
733
+ "StaticProfiler::TotalDynamicInstancesCount": 49371.0,
734
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 49371.0,
735
+ "StaticProfiler::TotalLNCComm": 0.0,
736
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
737
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
738
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
739
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
740
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
741
+ "TilingProfiler::GenericInstructionsAfterTiling": 128.0,
742
+ "TilingProfiler::MatMultInstructionsAfterTiling": 28672.0,
743
+ "TilingProfiler::NumPfTransposes": 9.0,
744
+ "TilingProfiler::NumPfTransposesForIo": 3.0,
745
+ "TilingProfiler::NumPfTransposesForLocal": 4.0,
746
+ "TilingProfiler::NumPfTransposesForNonlocal": 2.0,
747
+ "TilingProfiler::PfTransposeInstructions": 5856.0,
748
+ "TilingProfiler::PfTransposeInstructionsForIo": 544.0,
749
+ "TilingProfiler::PfTransposeInstructionsForLocal": 4288.0,
750
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 1024.0,
751
+ "TilingProfiler::ReduceInstructionsAfterTiling": 0.0,
752
+ "TilingProfiler::SimdInstructionsAfterTiling": 1876.0,
753
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
754
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
755
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
756
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
757
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
758
+ "TransformConvOp::conv2d_column_packing": 0.0,
759
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
760
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
761
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
762
+ }
763
+ },
764
+ "sg0002": {
765
+ "compiletime": {
766
+ "AGOrderingAnalysisPass": 0.06499266624450684,
767
+ "AffinePredicateResolution": 0.00141143798828125,
768
+ "AliasDependencyElimination": 0.00017595291137695313,
769
+ "AliasDependencyInduction": 0.006516218185424805,
770
+ "AliasDependencyReset": 0.02024674415588379,
771
+ "BFComputeCutting": 0.0023620128631591797,
772
+ "BirCodeGenLoop": 0.13731598854064941,
773
+ "CCOpFusion": 0.02620387077331543,
774
+ "CanonicalizeDAGForPGTiling": 0.0074574947357177734,
775
+ "CanonicalizeIR": 0.0019347667694091797,
776
+ "CoalesceCCOp": 0.005384206771850586,
777
+ "CommuteConcat": 0.0009889602661132813,
778
+ "DMALocalityOpt": 0.0021970272064208984,
779
+ "DMAProfiler": 0.007658243179321289,
780
+ "DMATilingProfiler": 0.04570889472961426,
781
+ "DataLocalityOpt": 0.1127479076385498,
782
+ "DataStreaming": 0.00765538215637207,
783
+ "DeConcat": 0.0007421970367431641,
784
+ "DeadCodeElimination": 0.002073049545288086,
785
+ "DeadStoreElimination": 0.006093263626098633,
786
+ "DelinearIndices": 0.010124444961547852,
787
+ "Delinearization": 0.005106449127197266,
788
+ "DoNothing": 0.00013017654418945313,
789
+ "DramToDramTranspose": 0.03771638870239258,
790
+ "DumpGraphAndMetadata": 0.05296611785888672,
791
+ "EliminateDivs": 0.0021944046020507813,
792
+ "ExpandBatchNorm": 0.0015587806701660156,
793
+ "ExpandISAMacro": 0.0046465396881103516,
794
+ "FactorizeBlkDims": 0.011369466781616211,
795
+ "FactorizeThreadAxesInFreeDims": 0.0014889240264892578,
796
+ "FlattenMacroLoop": 0.0025510787963867188,
797
+ "GenericAccessSimplifier": 0.0009717941284179688,
798
+ "InferInitValue": 0.030786514282226563,
799
+ "InferIntrinsicOnCC": 0.012189865112304688,
800
+ "InferNeuronTensor": 0.0819096565246582,
801
+ "InferNonlocalTensors": 0.025629520416259766,
802
+ "InferPSumTensor": 0.08428668975830078,
803
+ "InlineNativeKernels": 0.003083944320678711,
804
+ "InsertIOTransposes": 0.02764296531677246,
805
+ "InsertLocalTransposes": 0.0040624141693115234,
806
+ "InsertOffloadedTransposes": 0.005682229995727539,
807
+ "LICM": 0.003050565719604492,
808
+ "LateLegalizeInst": 0.022694110870361328,
809
+ "LateLegalizePostSplit": 0.004519462585449219,
810
+ "LateLowerReshapeOp": 0.0023851394653320313,
811
+ "LateLowerTensorOp": 0.0016567707061767578,
812
+ "LateNeuronInstComb": 0.010613203048706055,
813
+ "LayoutPreprocessing": 0.06753706932067871,
814
+ "LayoutPreprocessingAndAnalysis": 0.16236424446105957,
815
+ "LayoutRequirementAnalysis": 0.005420684814453125,
816
+ "LegalizeCCOpLayout": 0.0023717880249023438,
817
+ "LegalizeOpLevelAlias": 0.0012898445129394531,
818
+ "LegalizePartitionReduce": 0.0011932849884033203,
819
+ "LegalizeSundaAccess": 0.025110721588134766,
820
+ "LegalizeSundaMacro": 0.012512683868408203,
821
+ "LegalizeType": 0.0470888614654541,
822
+ "LocalLayoutOpt": 0.0263979434967041,
823
+ "LoopFusion": 0.005193948745727539,
824
+ "LoopSplitting": 0.0005512237548828125,
825
+ "LowerBroadcast": 0.04195570945739746,
826
+ "LowerCCOpBlockAxis": 0.008313655853271484,
827
+ "LowerComplexBroadcast": 0.0025756359100341797,
828
+ "LowerIntrinsics": 0.11726689338684082,
829
+ "LowerTensorOp": 0.010608196258544922,
830
+ "LowerTranspose": 0.08231282234191895,
831
+ "MacroGeneration": 0.07271862030029297,
832
+ "MaskPropagation": 0.005186557769775391,
833
+ "MemcpyElimination": 0.026259899139404297,
834
+ "MutateDataType": 0.0013203620910644531,
835
+ "NeuronAliasDependencyInduction": 0.0002338886260986328,
836
+ "NeuronAliasDependencyReset": 0.029464006423950195,
837
+ "NeuronInstComb": 0.004238128662109375,
838
+ "NeuronLICM": 0.014646768569946289,
839
+ "NeuronLoopFusion": 0.00891876220703125,
840
+ "NeuronLoopInterchange": 0.0014586448669433594,
841
+ "NeuronSimplifier": 0.009086847305297852,
842
+ "NeuronSimplifyPredicates": 0.0033910274505615234,
843
+ "NeuronValueNumbering": 0.0026366710662841797,
844
+ "OptimizeAliasedCopyChain": 0.0006422996520996094,
845
+ "OptimizeNKIKernels": 0.5174376964569092,
846
+ "PAGLayoutOpt": 0.12734031677246094,
847
+ "PComputeCutting": 0.005000591278076172,
848
+ "PGLayoutTilingPipeline": 0.8229436874389648,
849
+ "PGTiling": 0.26772499084472656,
850
+ "PadElimination": 0.0005135536193847656,
851
+ "ParAxesAnnotation": 0.07412934303283691,
852
+ "PartialLoopFusion": 0.013575553894042969,
853
+ "PartialSimdFusion": 0.011231422424316406,
854
+ "PerfectLoopNest": 0.0019729137420654297,
855
+ "RecognizeOpIdiom": 0.0038080215454101563,
856
+ "Recompute": 0.00034308433532714844,
857
+ "RelaxPredicates": 0.004430532455444336,
858
+ "Rematerialization": 0.002201557159423828,
859
+ "ReshapeWeights": 0.0009114742279052734,
860
+ "ResolveAccessConflict": 0.027348041534423828,
861
+ "ResolveComplicatePredicates": 0.0011477470397949219,
862
+ "RewriteReplicationMatmul": 0.0025103092193603516,
863
+ "RewriteWeights": 0.0029447078704833984,
864
+ "SFKVectorizer": 0.19645977020263672,
865
+ "SimpleAllReduceTiling": 0.003208160400390625,
866
+ "Simplifier": 0.003106832504272461,
867
+ "SimplifyMacroPredicates": 0.03599357604980469,
868
+ "SimplifyNeuronTensor": 0.1807866096496582,
869
+ "SimplifySlice": 0.0016787052154541016,
870
+ "SimplifyTensor": 0.04330563545227051,
871
+ "SpillPSum": 0.0645456314086914,
872
+ "SplitAPUnionSets": 0.012967586517333984,
873
+ "SplitAccGrp": 0.0015358924865722656,
874
+ "StaticProfiler": 0.00551915168762207,
875
+ "StaticTransposeLocalTensor": 0.004834890365600586,
876
+ "SundaISel": 0.0945746898651123,
877
+ "TCTransform": 0.0009295940399169922,
878
+ "TensorInitialization": 0.006634950637817383,
879
+ "TensorOpSimplifier": 0.005204439163208008,
880
+ "TensorOpTransform": 0.02082967758178711,
881
+ "TileCCOps": 0.006725311279296875,
882
+ "TilingProfiler": 0.016322612762451172,
883
+ "TransformConvOp": 0.0029544830322265625,
884
+ "TritiumFusion": 0.09467315673828125,
885
+ "ValueNumbering": 0.0020852088928222656,
886
+ "VectorizeDMA": 0.0017535686492919922,
887
+ "VectorizeMatMult": 0.008865118026733398,
888
+ "WeightCoalescing": 0.0030977725982666016,
889
+ "ZeroSizeTensorElimination": 0.00018644332885742188
890
+ },
891
+ "tensorizer": {
892
+ "DMATilingProfiler::TotalInstructionsAfterTiling": 28921.0,
893
+ "StaticProfiler::AifUb": 1080.6693115234375,
894
+ "StaticProfiler::ArithmeticIntensityTensorizer": 449.7645263671875,
895
+ "StaticProfiler::AverageDmaLength": 1323.6162109375,
896
+ "StaticProfiler::AverageFractalPeUtilization": 99.88423156738281,
897
+ "StaticProfiler::AveragePartitionUtilization": 99.71043395996094,
898
+ "StaticProfiler::AveragePeUtilization": 99.53581237792969,
899
+ "StaticProfiler::DDRTransferBytes": 826525760.0,
900
+ "StaticProfiler::InternalTransferBytes": 96576528.0,
901
+ "StaticProfiler::LoadExpanded": 619540.0,
902
+ "StaticProfiler::LocalizationEfficiency": 41.61907196044922,
903
+ "StaticProfiler::LocalizationEfficiencyIgnoreNonlocal": 45.55835723876953,
904
+ "StaticProfiler::StoreExpanded": 12842.0,
905
+ "StaticProfiler::TotalDMAExpanded": 632382.0,
906
+ "StaticProfiler::TotalDynamicInstancesCount": 34834.0,
907
+ "StaticProfiler::TotalDynamicInstancesWithMmPackedCount": 34738.0,
908
+ "StaticProfiler::TotalLNCComm": 0.0,
909
+ "StaticProfiler::TotalLNCCommTransfer": 0.0,
910
+ "TilingProfiler::AveragePartitionUtilizationAfterTiling": 0.0,
911
+ "TilingProfiler::AveragePeUtilizationAfterTiling": 0.0,
912
+ "TilingProfiler::BatchnormInstructionsAfterTiling": 0.0,
913
+ "TilingProfiler::DmaInstructionsAfterTiling": 0.0,
914
+ "TilingProfiler::GenericInstructionsAfterTiling": 4.0,
915
+ "TilingProfiler::MatMultInstructionsAfterTiling": 25600.0,
916
+ "TilingProfiler::NumPfTransposes": 4.0,
917
+ "TilingProfiler::NumPfTransposesForIo": 0.0,
918
+ "TilingProfiler::NumPfTransposesForLocal": 1.0,
919
+ "TilingProfiler::NumPfTransposesForNonlocal": 3.0,
920
+ "TilingProfiler::PfTransposeInstructions": 1537.0,
921
+ "TilingProfiler::PfTransposeInstructionsForIo": 0.0,
922
+ "TilingProfiler::PfTransposeInstructionsForLocal": 1.0,
923
+ "TilingProfiler::PfTransposeInstructionsForNonlocal": 1536.0,
924
+ "TilingProfiler::ReduceInstructionsAfterTiling": 10.0,
925
+ "TilingProfiler::SimdInstructionsAfterTiling": 626.0,
926
+ "TilingProfiler::TotalInstructionsAfterTiling": 0.0,
927
+ "TransformConvOp::Conv1d_depthwise_bf01_oi01_bf01": 0.0,
928
+ "TransformConvOp::Conv2d_dw_fb01_io01_01bf_rep_nhwc_Pcinh": 0.0,
929
+ "TransformConvOp::Conv2d_pbp_0f1b_0i1o_01fb_experimental_1": 0.0,
930
+ "TransformConvOp::Conv2d_pbp_fb01_io01_01bf_experimental_1": 0.0,
931
+ "TransformConvOp::conv2d_column_packing": 0.0,
932
+ "TransformConvOp::conv2d_column_packing_1": 0.0,
933
+ "TransformConvOp::conv2d_column_packing_io10": 0.0,
934
+ "TransformConvOp::conv2d_depthwise_f01b_o01i_bf01": 0.0
935
+ }
936
+ },
937
+ "sg01": {
938
+ "compiletime": {
939
+ "CanonicalizeConv": 7.000000096013537e-06,
940
+ "CanonicalizeForTensorizer": 1.9999999494757503e-05,
941
+ "Canonicalizer": 0.00028300000121816993,
942
+ "HoistCompute": 1.9999999949504854e-06,
943
+ "IdentifyCrossPassTensors": 1.4999999621068127e-05,
944
+ "MemcastMotion": 9.000000318337698e-06,
945
+ "PenguinizeFunctions": 1.8000000636675395e-05,
946
+ "PruneFunctions": 4.999999873689376e-06,
947
+ "RemoveOptimizationBarriers": 5.0999999075429514e-05,
948
+ "ScatterMotion": 3.999999989900971e-06,
949
+ "TensorizerLegalizationPass": 2.2000000171829015e-05,
950
+ "VerifySupportedOps": 1.1000000085914508e-05,
951
+ "algsimp": 6.800000119255856e-05,
952
+ "batchnorm_expander": 1.5999999959603883e-05,
953
+ "boundary-marker-removal": 3.999999989900971e-06,
954
+ "call-inliner": 9.000000318337698e-06,
955
+ "canonicalize-boundary-marker": 4.999999873689376e-06,
956
+ "collective-stream-id-checker": 7.000000096013537e-06,
957
+ "comparison-expander": 4.999999873689376e-06,
958
+ "computation-deduplicator": 2.4000000848900527e-05,
959
+ "conditional-to-select": 4.999999873689376e-06,
960
+ "config-lowering": 4.600000102072954e-05,
961
+ "constant_folding": 9.999999747378752e-06,
962
+ "cse": 2.2000000171829015e-05,
963
+ "dce": 1.9999999949504854e-06,
964
+ "dynamic-slice-transpose": 3.999999989900971e-06,
965
+ "eliminate-redundant-compare": 3.000000106112566e-06,
966
+ "emit-offloaded-dropout": 2.300000051036477e-05,
967
+ "flatten-call-graph": 9.000000318337698e-06,
968
+ "fuse-send-recv": 2.5999999706982635e-05,
969
+ "hilo::LegalizeAlias": 7.999999979801942e-06,
970
+ "hilo::NeuronInstCombine": 7.79999973019585e-05,
971
+ "hilo::NeuronOpFusion": 2.8000000384054147e-05,
972
+ "hilo::ReplaceTokenTypeWithU8Pass": 2.300000051036477e-05,
973
+ "hilo::ScheduleFusion": 9.999999974752427e-07,
974
+ "hilo::SixtyFourHack": 1.5999999959603883e-05,
975
+ "hilo::VerifyAliasing": 3.000000106112566e-06,
976
+ "hlo-mac-count": 3.899999865097925e-05,
977
+ "hlo-verifier": 0.00021499999274965376,
978
+ "legalize-ccops": 1.9999999949504854e-06,
979
+ "legalize-compare": 3.999999989900971e-06,
980
+ "lower-argminmax-custom-call": 3.999999989900971e-06,
981
+ "map-inline": 1.1000000085914508e-05,
982
+ "metadata-naming": 2.300000051036477e-05,
983
+ "mlir::detail::OpToOpPassAdaptor": 2.9999999242136255e-05,
984
+ "mlir::hlo::MhloToPyPenguin": 0.025178000330924988,
985
+ "mlir::mhlo::LowerComplexExtraPass": 8.70000003487803e-05,
986
+ "mlir::mhlo::LowerComplexPass": 0.00013099999341648072,
987
+ "native-to-custom-softmax": 9.999999747378752e-06,
988
+ "native-to-custom-softmax-dx": 3.600000127335079e-05,
989
+ "operand_upcaster": 2.300000051036477e-05,
990
+ "post-par-pipe-begin": 1.1000000085914508e-05,
991
+ "post-par-pipe-end": 0.0,
992
+ "post-partition-simplification": 0.000590000010561198,
993
+ "replace-minimum-constant": 7.000000096013537e-06,
994
+ "reshape-mover": 3.999999989900971e-06,
995
+ "simplify-concat": 5.400000009103678e-05,
996
+ "simplify-while-loops": 1.9999999949504854e-06,
997
+ "transform-variadic-reduce": 9.999999747378752e-06,
998
+ "tuple-simplifier": 4.999999873689376e-06,
999
+ "unpack-nested-aws-ntwsr": 3.999999989900971e-06,
1000
+ "unroll-while-loop": 0.0
1001
+ },
1002
+ "hilo": {
1003
+ "ArithmeticIntensity": 1411.2052001953125,
1004
+ "HloMacCount": 240518168576.0,
1005
+ "Traffic": 340869152.0
1006
+ }
1007
+ },
1008
+ "sg02": {
1009
+ "compiletime": {
1010
+ "CanonicalizeConv": 1.1000000085914508e-05,
1011
+ "CanonicalizeForTensorizer": 1.4999999621068127e-05,
1012
+ "Canonicalizer": 0.0003480000013951212,
1013
+ "HoistCompute": 9.999999747378752e-06,
1014
+ "IdentifyCrossPassTensors": 1.2999999853491317e-05,
1015
+ "MemcastMotion": 0.0,
1016
+ "PenguinizeFunctions": 9.999999747378752e-06,
1017
+ "PruneFunctions": 7.999999979801942e-06,
1018
+ "RemoveOptimizationBarriers": 1.5999999959603883e-05,
1019
+ "ScatterMotion": 9.999999974752427e-07,
1020
+ "TensorizerLegalizationPass": 7.000000096013537e-06,
1021
+ "VerifySupportedOps": 1.2000000424450263e-05,
1022
+ "algsimp": 7.599999662488699e-05,
1023
+ "batchnorm_expander": 1.4000000192027073e-05,
1024
+ "boundary-marker-removal": 3.999999989900971e-06,
1025
+ "call-inliner": 1.1000000085914508e-05,
1026
+ "canonicalize-boundary-marker": 4.999999873689376e-06,
1027
+ "collective-stream-id-checker": 4.999999873689376e-06,
1028
+ "comparison-expander": 6.000000212225132e-06,
1029
+ "computation-deduplicator": 2.300000051036477e-05,
1030
+ "conditional-to-select": 7.000000096013537e-06,
1031
+ "config-lowering": 6.0999998822808266e-05,
1032
+ "constant_folding": 9.000000318337698e-06,
1033
+ "cse": 1.4000000192027073e-05,
1034
+ "dce": 9.999999974752427e-07,
1035
+ "dynamic-slice-transpose": 4.999999873689376e-06,
1036
+ "eliminate-redundant-compare": 3.000000106112566e-06,
1037
+ "emit-offloaded-dropout": 2.099999983329326e-05,
1038
+ "flatten-call-graph": 1.2999999853491317e-05,
1039
+ "fuse-send-recv": 2.099999983329326e-05,
1040
+ "hilo::LegalizeAlias": 1.9999999949504854e-06,
1041
+ "hilo::NeuronInstCombine": 1.5999999959603883e-05,
1042
+ "hilo::NeuronOpFusion": 1.8000000636675395e-05,
1043
+ "hilo::ReplaceTokenTypeWithU8Pass": 1.700000029813964e-05,
1044
+ "hilo::ScheduleFusion": 1.1000000085914508e-05,
1045
+ "hilo::SixtyFourHack": 4.099999932805076e-05,
1046
+ "hilo::VerifyAliasing": 9.999999974752427e-07,
1047
+ "hlo-mac-count": 0.00020500000391621143,
1048
+ "hlo-verifier": 0.00019299999985378236,
1049
+ "legalize-ccops": 9.999999974752427e-07,
1050
+ "legalize-compare": 3.000000106112566e-06,
1051
+ "lower-argminmax-custom-call": 3.000000106112566e-06,
1052
+ "map-inline": 1.4999999621068127e-05,
1053
+ "metadata-naming": 1.4000000192027073e-05,
1054
+ "mlir::detail::OpToOpPassAdaptor": 1.8999999156221747e-05,
1055
+ "mlir::hlo::MhloToPyPenguin": 0.018411999568343163,
1056
+ "mlir::mhlo::LowerComplexExtraPass": 0.00017299999308306724,
1057
+ "mlir::mhlo::LowerComplexPass": 0.00020799999765586108,
1058
+ "native-to-custom-softmax": 9.999999747378752e-06,
1059
+ "native-to-custom-softmax-dx": 3.7999998312443495e-05,
1060
+ "operand_upcaster": 1.8000000636675395e-05,
1061
+ "post-par-pipe-begin": 3.999999989900971e-06,
1062
+ "post-par-pipe-end": 0.0,
1063
+ "post-partition-simplification": 0.0005879999953322113,
1064
+ "replace-minimum-constant": 1.1000000085914508e-05,
1065
+ "reshape-mover": 3.000000106112566e-06,
1066
+ "simplify-concat": 4.999999873689376e-05,
1067
+ "simplify-while-loops": 3.000000106112566e-06,
1068
+ "transform-variadic-reduce": 0.00017499999376013875,
1069
+ "tuple-simplifier": 4.999999873689376e-06,
1070
+ "unpack-nested-aws-ntwsr": 4.999999873689376e-06,
1071
+ "unroll-while-loop": 0.0
1072
+ },
1073
+ "hilo": {
1074
+ "ArithmeticIntensity": 1000.4613647460938,
1075
+ "HloMacCount": 180455735296.0,
1076
+ "Traffic": 360745024.0
1077
+ }
1078
+ }
1079
+ }
context_encoding_model/_tp0_bk4/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d468f8c91c8ae558da4744c631e0351092b98d3698d8a39f05082867c022a7
3
+ size 3298304
context_encoding_model/_tp0_bk4/log-neuron-cc.txt ADDED
The diff for this file is too large to render. See raw diff
 
context_encoding_model/_tp0_bk4/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14460b3b6b308407432a80fca62093da7cc19d3c26c9d018e923b97ffe30fde0
3
+ size 2347463
context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:592a9cdc4c9b4697249af595e7e4e7ae477f80acdebaede8842f0734e5baf50e
3
+ size 2413336
context_encoding_model/_tp0_bk4/model.MODULE_d342327da795afc2aa68+5e8b788a.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93d468f8c91c8ae558da4744c631e0351092b98d3698d8a39f05082867c022a7
3
+ size 3298304
context_encoding_model/_tp0_bk4/neuron_config.json ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": false,
3
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.3",
4
+ "add_cross_attention": false,
5
+ "architectures": [
6
+ "MistralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "attribute_map": {},
10
+ "bad_words_ids": null,
11
+ "begin_suppress_tokens": null,
12
+ "bos_token_id": 1,
13
+ "chunk_size_feed_forward": 0,
14
+ "cross_attention_hidden_size": null,
15
+ "decoder_start_token_id": null,
16
+ "diversity_penalty": 0.0,
17
+ "do_sample": false,
18
+ "early_stopping": false,
19
+ "encoder_no_repeat_ngram_size": 0,
20
+ "eos_token_id": 2,
21
+ "exponential_decay_length_penalty": null,
22
+ "finetuning_task": null,
23
+ "forced_bos_token_id": null,
24
+ "forced_eos_token_id": null,
25
+ "fused_spec_config": null,
26
+ "head_dim": 128,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 4096,
29
+ "id2label": {
30
+ "0": "LABEL_0",
31
+ "1": "LABEL_1"
32
+ },
33
+ "initializer_range": 0.02,
34
+ "intermediate_size": 14336,
35
+ "is_decoder": false,
36
+ "is_encoder_decoder": false,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1
40
+ },
41
+ "length_penalty": 1.0,
42
+ "max_length": 20,
43
+ "max_position_embeddings": 32768,
44
+ "metadata": null,
45
+ "min_length": 0,
46
+ "model_type": "mistral",
47
+ "neuron_config": {
48
+ "activation_quantization_type": null,
49
+ "allow_input_truncation": false,
50
+ "apply_seq_ids_mask": false,
51
+ "async_mode": false,
52
+ "attention_dp_degree": 1,
53
+ "attention_dtype": null,
54
+ "attn_block_cte_nki_kernel_enabled": false,
55
+ "attn_block_tkg_nki_kernel_cache_update": false,
56
+ "attn_block_tkg_nki_kernel_enabled": false,
57
+ "attn_cls": "NeuronLlamaAttention",
58
+ "attn_kernel_enabled": null,
59
+ "attn_tkg_builtin_kernel_enabled": false,
60
+ "attn_tkg_nki_kernel_enabled": false,
61
+ "batch_size": 1,
62
+ "bucket_n_active_tokens": true,
63
+ "buckets": [
64
+ 2048
65
+ ],
66
+ "cast_type": "config",
67
+ "cc_pipeline_tiling_factor": 2,
68
+ "chunked_prefill_config": null,
69
+ "context_encoding_buckets": [
70
+ 2048
71
+ ],
72
+ "cp_degree": 1,
73
+ "ctx_batch_size": 1,
74
+ "disable_kv_cache_tiling": false,
75
+ "draft_model_modules_to_not_convert": null,
76
+ "enable_bucketing": true,
77
+ "enable_eagle_draft_input_norm": false,
78
+ "enable_eagle_speculation": false,
79
+ "enable_fused_speculation": false,
80
+ "enable_long_context_mode": false,
81
+ "enable_output_completion_notifications": false,
82
+ "enable_spill_reload_dge": false,
83
+ "enable_token_tree": false,
84
+ "ep_degree": 1,
85
+ "expert_mlp_nki_kernel_enabled": null,
86
+ "flash_decoding_enabled": false,
87
+ "fused_qkv": false,
88
+ "fused_rmsnorm_skip_gamma": false,
89
+ "is_block_kv_layout": null,
90
+ "is_chunked_prefill": false,
91
+ "is_continuous_batching": true,
92
+ "is_eagle_draft": false,
93
+ "is_medusa": false,
94
+ "is_prefill_stage": true,
95
+ "is_prefix_caching": false,
96
+ "k_cache_transposed": false,
97
+ "kv_cache_batch_size": 4,
98
+ "kv_cache_padding_size": 0,
99
+ "kv_cache_quant": false,
100
+ "kv_cache_tiling": false,
101
+ "layer_boundary_markers": false,
102
+ "lm_head_pad": false,
103
+ "lm_head_pad_alignment_size": 1,
104
+ "local_ranks_size": 2,
105
+ "logical_nc_config": 1,
106
+ "lora_config": null,
107
+ "max_batch_size": 4,
108
+ "max_context_length": 2048,
109
+ "max_length": 2048,
110
+ "max_new_tokens": null,
111
+ "medusa_speculation_length": 0,
112
+ "medusa_tree": null,
113
+ "mlp_kernel_enabled": false,
114
+ "mlp_kernel_fuse_residual_add": false,
115
+ "modules_to_not_convert": null,
116
+ "moe_fused_nki_kernel_enabled": null,
117
+ "n_active_tokens": 2048,
118
+ "n_positions": 2048,
119
+ "num_medusa_heads": 0,
120
+ "on_cpu": false,
121
+ "on_device_sampling_config": {
122
+ "deterministic": false,
123
+ "do_sample": false,
124
+ "dynamic": true,
125
+ "global_topk": 256,
126
+ "on_device_sampling_config": true,
127
+ "temperature": 1.0,
128
+ "top_k": 1,
129
+ "top_k_kernel_enabled": false,
130
+ "top_p": 1.0
131
+ },
132
+ "output_logits": false,
133
+ "overrides_torch_dtype": true,
134
+ "pa_block_size": 2048,
135
+ "pa_num_blocks": 4,
136
+ "padding_side": "right",
137
+ "pp_degree": 1,
138
+ "prefix_buckets": null,
139
+ "qk_layernorm": false,
140
+ "qkv_kernel_enabled": false,
141
+ "qkv_kernel_fuse_residual_add": false,
142
+ "qkv_kernel_nbsd_layout": false,
143
+ "quantization_dtype": "int8",
144
+ "quantization_type": "per_tensor_symmetric",
145
+ "quantize_clamp_bound": Infinity,
146
+ "quantized": false,
147
+ "quantized_checkpoints_path": null,
148
+ "quantized_mlp_kernel_enabled": false,
149
+ "rmsnorm_quantize_kernel_enabled": false,
150
+ "router_topk_nki_kernel_enabled": null,
151
+ "rpl_reduce_dtype": null,
152
+ "save_sharded_checkpoint": true,
153
+ "scratchpad_page_size": null,
154
+ "seq_len": 2048,
155
+ "seq_len_threshold_for_cc_tiling": 16384,
156
+ "sequence_parallel_enabled": false,
157
+ "shared_mlp_nki_kernel_enabled": null,
158
+ "skip_sharding": false,
159
+ "skip_warmup": false,
160
+ "spec_batch_size": 4,
161
+ "speculation_length": 0,
162
+ "start_rank_id": 0,
163
+ "target": null,
164
+ "tile_cc": false,
165
+ "tkg_batch_size": 4,
166
+ "token_generation_buckets": null,
167
+ "token_tree_config": null,
168
+ "torch_dtype": "bfloat16",
169
+ "tp_degree": 2,
170
+ "vocab_parallel": false,
171
+ "weight_gather_seq_len_threshold": 32768,
172
+ "weights_to_skip_layout_optimization": [],
173
+ "world_size": 2
174
+ },
175
+ "no_repeat_ngram_size": 0,
176
+ "num_attention_heads": 32,
177
+ "num_beam_groups": 1,
178
+ "num_beams": 1,
179
+ "num_cores_per_group": 1,
180
+ "num_hidden_layers": 32,
181
+ "num_key_value_heads": 8,
182
+ "num_return_sequences": 1,
183
+ "output_attentions": false,
184
+ "output_hidden_states": false,
185
+ "output_scores": false,
186
+ "pad_token_id": 0,
187
+ "prefix": null,
188
+ "problem_type": null,
189
+ "pruned_heads": {},
190
+ "remove_invalid_values": false,
191
+ "repetition_penalty": 1.0,
192
+ "return_dict": true,
193
+ "return_dict_in_generate": false,
194
+ "rms_norm_eps": 1e-05,
195
+ "rope_theta": 1000000.0,
196
+ "sep_token_id": null,
197
+ "sliding_window": null,
198
+ "suppress_tokens": null,
199
+ "task_specific_params": null,
200
+ "temperature": 1.0,
201
+ "tf_legacy_loss": false,
202
+ "tie_encoder_decoder": false,
203
+ "tie_word_embeddings": false,
204
+ "tokenizer_class": null,
205
+ "top_k": 50,
206
+ "top_p": 1.0,
207
+ "torchscript": false,
208
+ "transformers_version": "4.42.0.dev0",
209
+ "typical_p": 1.0,
210
+ "use_bfloat16": false,
211
+ "use_cache": true,
212
+ "vocab_size": 32768
213
+ }
layout_opt/command.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ neuronx-cc compile graph.hlo --framework XLA --target trn1 --output graph.neff --model-type=transformer -O1 --lnc=1 '--internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --verify-hlo=false' --logfile=log-neuron-cc.txt --verbose=35
layout_opt/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140da06783df36b3d25d8903dc194df46247db9b5b03ef10b1abebe30d252275
3
+ size 5848064
layout_opt/log-neuron-cc.txt ADDED
The diff for this file is too large to render. See raw diff
 
layout_opt/metaneff ADDED
@@ -0,0 +1,874 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ (
3
+ input0���2embed_tokens.weight8
4
+ ;
5
+ input1� �2'layers.0.self_attn.o_proj.o_proj.weight8
6
+ =
7
+ input2�� 2)layers.0.self_attn.qkv_proj.v_proj.weight8
8
+ 1
9
+ input3� 2layers.0.input_layernorm.weight8
10
+ =
11
+ input4�� 2)layers.0.self_attn.qkv_proj.k_proj.weight8
12
+ =
13
+ input5�� 2)layers.0.self_attn.qkv_proj.q_proj.weight8
14
+ 1
15
+ input6� �82layers.0.mlp.down_proj.weight8
16
+ /
17
+ input7�8� 2layers.0.mlp.up_proj.weight8
18
+ :
19
+ input8� 2(layers.0.post_attention_layernorm.weight8
20
+ 1
21
+ input9�8� 2layers.0.mlp.gate_proj.weight8
22
+ <
23
+ input10� �2'layers.1.self_attn.o_proj.o_proj.weight8
24
+ >
25
+ input11�� 2)layers.1.self_attn.qkv_proj.v_proj.weight8
26
+ 2
27
+ input12� 2layers.1.input_layernorm.weight8
28
+ >
29
+ input13�� 2)layers.1.self_attn.qkv_proj.k_proj.weight8
30
+ >
31
+ input14�� 2)layers.1.self_attn.qkv_proj.q_proj.weight8
32
+ 2
33
+ input15� �82layers.1.mlp.down_proj.weight8
34
+ 0
35
+ input16�8� 2layers.1.mlp.up_proj.weight8
36
+ ;
37
+ input17� 2(layers.1.post_attention_layernorm.weight8
38
+ 2
39
+ input18�8� 2layers.1.mlp.gate_proj.weight8
40
+ <
41
+ input19� �2'layers.2.self_attn.o_proj.o_proj.weight8
42
+ >
43
+ input20�� 2)layers.2.self_attn.qkv_proj.v_proj.weight8
44
+ 2
45
+ input21� 2layers.2.input_layernorm.weight8
46
+ >
47
+ input22�� 2)layers.2.self_attn.qkv_proj.k_proj.weight8
48
+ >
49
+ input23�� 2)layers.2.self_attn.qkv_proj.q_proj.weight8
50
+ 2
51
+ input24� �82layers.2.mlp.down_proj.weight8
52
+ 0
53
+ input25�8� 2layers.2.mlp.up_proj.weight8
54
+ ;
55
+ input26� 2(layers.2.post_attention_layernorm.weight8
56
+ 2
57
+ input27�8� 2layers.2.mlp.gate_proj.weight8
58
+ <
59
+ input28� �2'layers.3.self_attn.o_proj.o_proj.weight8
60
+ >
61
+ input29�� 2)layers.3.self_attn.qkv_proj.v_proj.weight8
62
+ 2
63
+ input30� 2layers.3.input_layernorm.weight8
64
+ >
65
+ input31�� 2)layers.3.self_attn.qkv_proj.k_proj.weight8
66
+ >
67
+ input32�� 2)layers.3.self_attn.qkv_proj.q_proj.weight8
68
+ 2
69
+ input33� �82layers.3.mlp.down_proj.weight8
70
+ 0
71
+ input34�8� 2layers.3.mlp.up_proj.weight8
72
+ ;
73
+ input35� 2(layers.3.post_attention_layernorm.weight8
74
+ 2
75
+ input36�8� 2layers.3.mlp.gate_proj.weight8
76
+ <
77
+ input37� �2'layers.4.self_attn.o_proj.o_proj.weight8
78
+ >
79
+ input38�� 2)layers.4.self_attn.qkv_proj.v_proj.weight8
80
+ 2
81
+ input39� 2layers.4.input_layernorm.weight8
82
+ >
83
+ input40�� 2)layers.4.self_attn.qkv_proj.k_proj.weight8
84
+ >
85
+ input41�� 2)layers.4.self_attn.qkv_proj.q_proj.weight8
86
+ 2
87
+ input42� �82layers.4.mlp.down_proj.weight8
88
+ 0
89
+ input43�8� 2layers.4.mlp.up_proj.weight8
90
+ ;
91
+ input44� 2(layers.4.post_attention_layernorm.weight8
92
+ 2
93
+ input45�8� 2layers.4.mlp.gate_proj.weight8
94
+ <
95
+ input46� �2'layers.5.self_attn.o_proj.o_proj.weight8
96
+ >
97
+ input47�� 2)layers.5.self_attn.qkv_proj.v_proj.weight8
98
+ 2
99
+ input48� 2layers.5.input_layernorm.weight8
100
+ >
101
+ input49�� 2)layers.5.self_attn.qkv_proj.k_proj.weight8
102
+ >
103
+ input50�� 2)layers.5.self_attn.qkv_proj.q_proj.weight8
104
+ 2
105
+ input51� �82layers.5.mlp.down_proj.weight8
106
+ 0
107
+ input52�8� 2layers.5.mlp.up_proj.weight8
108
+ ;
109
+ input53� 2(layers.5.post_attention_layernorm.weight8
110
+ 2
111
+ input54�8� 2layers.5.mlp.gate_proj.weight8
112
+ <
113
+ input55� �2'layers.6.self_attn.o_proj.o_proj.weight8
114
+ >
115
+ input56�� 2)layers.6.self_attn.qkv_proj.v_proj.weight8
116
+ 2
117
+ input57� 2layers.6.input_layernorm.weight8
118
+ >
119
+ input58�� 2)layers.6.self_attn.qkv_proj.k_proj.weight8
120
+ >
121
+ input59�� 2)layers.6.self_attn.qkv_proj.q_proj.weight8
122
+ 2
123
+ input60� �82layers.6.mlp.down_proj.weight8
124
+ 0
125
+ input61�8� 2layers.6.mlp.up_proj.weight8
126
+ ;
127
+ input62� 2(layers.6.post_attention_layernorm.weight8
128
+ 2
129
+ input63�8� 2layers.6.mlp.gate_proj.weight8
130
+ <
131
+ input64� �2'layers.7.self_attn.o_proj.o_proj.weight8
132
+ >
133
+ input65�� 2)layers.7.self_attn.qkv_proj.v_proj.weight8
134
+ 2
135
+ input66� 2layers.7.input_layernorm.weight8
136
+ >
137
+ input67�� 2)layers.7.self_attn.qkv_proj.k_proj.weight8
138
+ >
139
+ input68�� 2)layers.7.self_attn.qkv_proj.q_proj.weight8
140
+ 2
141
+ input69� �82layers.7.mlp.down_proj.weight8
142
+ 0
143
+ input70�8� 2layers.7.mlp.up_proj.weight8
144
+ ;
145
+ input71� 2(layers.7.post_attention_layernorm.weight8
146
+ 2
147
+ input72�8� 2layers.7.mlp.gate_proj.weight8
148
+ <
149
+ input73� �2'layers.8.self_attn.o_proj.o_proj.weight8
150
+ >
151
+ input74�� 2)layers.8.self_attn.qkv_proj.v_proj.weight8
152
+ 2
153
+ input75� 2layers.8.input_layernorm.weight8
154
+ >
155
+ input76�� 2)layers.8.self_attn.qkv_proj.k_proj.weight8
156
+ >
157
+ input77�� 2)layers.8.self_attn.qkv_proj.q_proj.weight8
158
+ 2
159
+ input78� �82layers.8.mlp.down_proj.weight8
160
+ 0
161
+ input79�8� 2layers.8.mlp.up_proj.weight8
162
+ ;
163
+ input80� 2(layers.8.post_attention_layernorm.weight8
164
+ 2
165
+ input81�8� 2layers.8.mlp.gate_proj.weight8
166
+ <
167
+ input82� �2'layers.9.self_attn.o_proj.o_proj.weight8
168
+ >
169
+ input83�� 2)layers.9.self_attn.qkv_proj.v_proj.weight8
170
+ 2
171
+ input84� 2layers.9.input_layernorm.weight8
172
+ >
173
+ input85�� 2)layers.9.self_attn.qkv_proj.k_proj.weight8
174
+ >
175
+ input86�� 2)layers.9.self_attn.qkv_proj.q_proj.weight8
176
+ 2
177
+ input87� �82layers.9.mlp.down_proj.weight8
178
+ 0
179
+ input88�8� 2layers.9.mlp.up_proj.weight8
180
+ ;
181
+ input89� 2(layers.9.post_attention_layernorm.weight8
182
+ 2
183
+ input90�8� 2layers.9.mlp.gate_proj.weight8
184
+ =
185
+ input91� �2(layers.10.self_attn.o_proj.o_proj.weight8
186
+ ?
187
+ input92�� 2*layers.10.self_attn.qkv_proj.v_proj.weight8
188
+ 3
189
+ input93� 2 layers.10.input_layernorm.weight8
190
+ ?
191
+ input94�� 2*layers.10.self_attn.qkv_proj.k_proj.weight8
192
+ ?
193
+ input95�� 2*layers.10.self_attn.qkv_proj.q_proj.weight8
194
+ 3
195
+ input96� �82layers.10.mlp.down_proj.weight8
196
+ 1
197
+ input97�8� 2layers.10.mlp.up_proj.weight8
198
+ <
199
+ input98� 2)layers.10.post_attention_layernorm.weight8
200
+ 3
201
+ input99�8� 2layers.10.mlp.gate_proj.weight8
202
+ >
203
+ input100� �2(layers.11.self_attn.o_proj.o_proj.weight8
204
+ @
205
+ input101�� 2*layers.11.self_attn.qkv_proj.v_proj.weight8
206
+ 4
207
+ input102� 2 layers.11.input_layernorm.weight8
208
+ @
209
+ input103�� 2*layers.11.self_attn.qkv_proj.k_proj.weight8
210
+ @
211
+ input104�� 2*layers.11.self_attn.qkv_proj.q_proj.weight8
212
+ 4
213
+ input105� �82layers.11.mlp.down_proj.weight8
214
+ 2
215
+ input106�8� 2layers.11.mlp.up_proj.weight8
216
+ =
217
+ input107� 2)layers.11.post_attention_layernorm.weight8
218
+ 4
219
+ input108�8� 2layers.11.mlp.gate_proj.weight8
220
+ >
221
+ input109� �2(layers.12.self_attn.o_proj.o_proj.weight8
222
+ @
223
+ input110�� 2*layers.12.self_attn.qkv_proj.v_proj.weight8
224
+ 4
225
+ input111� 2 layers.12.input_layernorm.weight8
226
+ @
227
+ input112�� 2*layers.12.self_attn.qkv_proj.k_proj.weight8
228
+ @
229
+ input113�� 2*layers.12.self_attn.qkv_proj.q_proj.weight8
230
+ 4
231
+ input114� �82layers.12.mlp.down_proj.weight8
232
+ 2
233
+ input115�8� 2layers.12.mlp.up_proj.weight8
234
+ =
235
+ input116� 2)layers.12.post_attention_layernorm.weight8
236
+ 4
237
+ input117�8� 2layers.12.mlp.gate_proj.weight8
238
+ >
239
+ input118� �2(layers.13.self_attn.o_proj.o_proj.weight8
240
+ @
241
+ input119�� 2*layers.13.self_attn.qkv_proj.v_proj.weight8
242
+ 4
243
+ input120� 2 layers.13.input_layernorm.weight8
244
+ @
245
+ input121�� 2*layers.13.self_attn.qkv_proj.k_proj.weight8
246
+ @
247
+ input122�� 2*layers.13.self_attn.qkv_proj.q_proj.weight8
248
+ 4
249
+ input123� �82layers.13.mlp.down_proj.weight8
250
+ 2
251
+ input124�8� 2layers.13.mlp.up_proj.weight8
252
+ =
253
+ input125� 2)layers.13.post_attention_layernorm.weight8
254
+ 4
255
+ input126�8� 2layers.13.mlp.gate_proj.weight8
256
+ >
257
+ input127� �2(layers.14.self_attn.o_proj.o_proj.weight8
258
+ @
259
+ input128�� 2*layers.14.self_attn.qkv_proj.v_proj.weight8
260
+ 4
261
+ input129� 2 layers.14.input_layernorm.weight8
262
+ @
263
+ input130�� 2*layers.14.self_attn.qkv_proj.k_proj.weight8
264
+ @
265
+ input131�� 2*layers.14.self_attn.qkv_proj.q_proj.weight8
266
+ 4
267
+ input132� �82layers.14.mlp.down_proj.weight8
268
+ 2
269
+ input133�8� 2layers.14.mlp.up_proj.weight8
270
+ =
271
+ input134� 2)layers.14.post_attention_layernorm.weight8
272
+ 4
273
+ input135�8� 2layers.14.mlp.gate_proj.weight8
274
+ >
275
+ input136� �2(layers.15.self_attn.o_proj.o_proj.weight8
276
+ @
277
+ input137�� 2*layers.15.self_attn.qkv_proj.v_proj.weight8
278
+ 4
279
+ input138� 2 layers.15.input_layernorm.weight8
280
+ @
281
+ input139�� 2*layers.15.self_attn.qkv_proj.k_proj.weight8
282
+ @
283
+ input140�� 2*layers.15.self_attn.qkv_proj.q_proj.weight8
284
+ 4
285
+ input141� �82layers.15.mlp.down_proj.weight8
286
+ 2
287
+ input142�8� 2layers.15.mlp.up_proj.weight8
288
+ =
289
+ input143� 2)layers.15.post_attention_layernorm.weight8
290
+ 4
291
+ input144�8� 2layers.15.mlp.gate_proj.weight8
292
+ >
293
+ input145� �2(layers.16.self_attn.o_proj.o_proj.weight8
294
+ @
295
+ input146�� 2*layers.16.self_attn.qkv_proj.v_proj.weight8
296
+ 4
297
+ input147� 2 layers.16.input_layernorm.weight8
298
+ @
299
+ input148�� 2*layers.16.self_attn.qkv_proj.k_proj.weight8
300
+ @
301
+ input149�� 2*layers.16.self_attn.qkv_proj.q_proj.weight8
302
+ 4
303
+ input150� �82layers.16.mlp.down_proj.weight8
304
+ 2
305
+ input151�8� 2layers.16.mlp.up_proj.weight8
306
+ =
307
+ input152� 2)layers.16.post_attention_layernorm.weight8
308
+ 4
309
+ input153�8� 2layers.16.mlp.gate_proj.weight8
310
+ >
311
+ input154� �2(layers.17.self_attn.o_proj.o_proj.weight8
312
+ @
313
+ input155�� 2*layers.17.self_attn.qkv_proj.v_proj.weight8
314
+ 4
315
+ input156� 2 layers.17.input_layernorm.weight8
316
+ @
317
+ input157�� 2*layers.17.self_attn.qkv_proj.k_proj.weight8
318
+ @
319
+ input158�� 2*layers.17.self_attn.qkv_proj.q_proj.weight8
320
+ 4
321
+ input159� �82layers.17.mlp.down_proj.weight8
322
+ 2
323
+ input160�8� 2layers.17.mlp.up_proj.weight8
324
+ =
325
+ input161� 2)layers.17.post_attention_layernorm.weight8
326
+ 4
327
+ input162�8� 2layers.17.mlp.gate_proj.weight8
328
+ >
329
+ input163� �2(layers.18.self_attn.o_proj.o_proj.weight8
330
+ @
331
+ input164�� 2*layers.18.self_attn.qkv_proj.v_proj.weight8
332
+ 4
333
+ input165� 2 layers.18.input_layernorm.weight8
334
+ @
335
+ input166�� 2*layers.18.self_attn.qkv_proj.k_proj.weight8
336
+ @
337
+ input167�� 2*layers.18.self_attn.qkv_proj.q_proj.weight8
338
+ 4
339
+ input168� �82layers.18.mlp.down_proj.weight8
340
+ 2
341
+ input169�8� 2layers.18.mlp.up_proj.weight8
342
+ =
343
+ input170� 2)layers.18.post_attention_layernorm.weight8
344
+ 4
345
+ input171�8� 2layers.18.mlp.gate_proj.weight8
346
+ >
347
+ input172� �2(layers.19.self_attn.o_proj.o_proj.weight8
348
+ @
349
+ input173�� 2*layers.19.self_attn.qkv_proj.v_proj.weight8
350
+ 4
351
+ input174� 2 layers.19.input_layernorm.weight8
352
+ @
353
+ input175�� 2*layers.19.self_attn.qkv_proj.k_proj.weight8
354
+ @
355
+ input176�� 2*layers.19.self_attn.qkv_proj.q_proj.weight8
356
+ 4
357
+ input177� �82layers.19.mlp.down_proj.weight8
358
+ 2
359
+ input178�8� 2layers.19.mlp.up_proj.weight8
360
+ =
361
+ input179� 2)layers.19.post_attention_layernorm.weight8
362
+ 4
363
+ input180�8� 2layers.19.mlp.gate_proj.weight8
364
+ >
365
+ input181� �2(layers.20.self_attn.o_proj.o_proj.weight8
366
+ @
367
+ input182�� 2*layers.20.self_attn.qkv_proj.v_proj.weight8
368
+ 4
369
+ input183� 2 layers.20.input_layernorm.weight8
370
+ @
371
+ input184�� 2*layers.20.self_attn.qkv_proj.k_proj.weight8
372
+ @
373
+ input185�� 2*layers.20.self_attn.qkv_proj.q_proj.weight8
374
+ 4
375
+ input186� �82layers.20.mlp.down_proj.weight8
376
+ 2
377
+ input187�8� 2layers.20.mlp.up_proj.weight8
378
+ =
379
+ input188� 2)layers.20.post_attention_layernorm.weight8
380
+ 4
381
+ input189�8� 2layers.20.mlp.gate_proj.weight8
382
+ >
383
+ input190� �2(layers.21.self_attn.o_proj.o_proj.weight8
384
+ @
385
+ input191�� 2*layers.21.self_attn.qkv_proj.v_proj.weight8
386
+ 4
387
+ input192� 2 layers.21.input_layernorm.weight8
388
+ @
389
+ input193�� 2*layers.21.self_attn.qkv_proj.k_proj.weight8
390
+ @
391
+ input194�� 2*layers.21.self_attn.qkv_proj.q_proj.weight8
392
+ 4
393
+ input195� �82layers.21.mlp.down_proj.weight8
394
+ 2
395
+ input196�8� 2layers.21.mlp.up_proj.weight8
396
+ =
397
+ input197� 2)layers.21.post_attention_layernorm.weight8
398
+ 4
399
+ input198�8� 2layers.21.mlp.gate_proj.weight8
400
+ >
401
+ input199� �2(layers.22.self_attn.o_proj.o_proj.weight8
402
+ @
403
+ input200�� 2*layers.22.self_attn.qkv_proj.v_proj.weight8
404
+ 4
405
+ input201� 2 layers.22.input_layernorm.weight8
406
+ @
407
+ input202�� 2*layers.22.self_attn.qkv_proj.k_proj.weight8
408
+ @
409
+ input203�� 2*layers.22.self_attn.qkv_proj.q_proj.weight8
410
+ 4
411
+ input204� �82layers.22.mlp.down_proj.weight8
412
+ 2
413
+ input205�8� 2layers.22.mlp.up_proj.weight8
414
+ =
415
+ input206� 2)layers.22.post_attention_layernorm.weight8
416
+ 4
417
+ input207�8� 2layers.22.mlp.gate_proj.weight8
418
+ >
419
+ input208� �2(layers.23.self_attn.o_proj.o_proj.weight8
420
+ @
421
+ input209�� 2*layers.23.self_attn.qkv_proj.v_proj.weight8
422
+ 4
423
+ input210� 2 layers.23.input_layernorm.weight8
424
+ @
425
+ input211�� 2*layers.23.self_attn.qkv_proj.k_proj.weight8
426
+ @
427
+ input212�� 2*layers.23.self_attn.qkv_proj.q_proj.weight8
428
+ 4
429
+ input213� �82layers.23.mlp.down_proj.weight8
430
+ 2
431
+ input214�8� 2layers.23.mlp.up_proj.weight8
432
+ =
433
+ input215� 2)layers.23.post_attention_layernorm.weight8
434
+ 4
435
+ input216�8� 2layers.23.mlp.gate_proj.weight8
436
+ >
437
+ input217� �2(layers.24.self_attn.o_proj.o_proj.weight8
438
+ @
439
+ input218�� 2*layers.24.self_attn.qkv_proj.v_proj.weight8
440
+ 4
441
+ input219� 2 layers.24.input_layernorm.weight8
442
+ @
443
+ input220�� 2*layers.24.self_attn.qkv_proj.k_proj.weight8
444
+ @
445
+ input221�� 2*layers.24.self_attn.qkv_proj.q_proj.weight8
446
+ 4
447
+ input222� �82layers.24.mlp.down_proj.weight8
448
+ 2
449
+ input223�8� 2layers.24.mlp.up_proj.weight8
450
+ =
451
+ input224� 2)layers.24.post_attention_layernorm.weight8
452
+ 4
453
+ input225�8� 2layers.24.mlp.gate_proj.weight8
454
+ >
455
+ input226� �2(layers.25.self_attn.o_proj.o_proj.weight8
456
+ @
457
+ input227�� 2*layers.25.self_attn.qkv_proj.v_proj.weight8
458
+ 4
459
+ input228� 2 layers.25.input_layernorm.weight8
460
+ @
461
+ input229�� 2*layers.25.self_attn.qkv_proj.k_proj.weight8
462
+ @
463
+ input230�� 2*layers.25.self_attn.qkv_proj.q_proj.weight8
464
+ 4
465
+ input231� �82layers.25.mlp.down_proj.weight8
466
+ 2
467
+ input232�8� 2layers.25.mlp.up_proj.weight8
468
+ =
469
+ input233� 2)layers.25.post_attention_layernorm.weight8
470
+ 4
471
+ input234�8� 2layers.25.mlp.gate_proj.weight8
472
+ >
473
+ input235� �2(layers.26.self_attn.o_proj.o_proj.weight8
474
+ @
475
+ input236�� 2*layers.26.self_attn.qkv_proj.v_proj.weight8
476
+ 4
477
+ input237� 2 layers.26.input_layernorm.weight8
478
+ @
479
+ input238�� 2*layers.26.self_attn.qkv_proj.k_proj.weight8
480
+ @
481
+ input239�� 2*layers.26.self_attn.qkv_proj.q_proj.weight8
482
+ 4
483
+ input240� �82layers.26.mlp.down_proj.weight8
484
+ 2
485
+ input241�8� 2layers.26.mlp.up_proj.weight8
486
+ =
487
+ input242� 2)layers.26.post_attention_layernorm.weight8
488
+ 4
489
+ input243�8� 2layers.26.mlp.gate_proj.weight8
490
+ >
491
+ input244� �2(layers.27.self_attn.o_proj.o_proj.weight8
492
+ @
493
+ input245�� 2*layers.27.self_attn.qkv_proj.v_proj.weight8
494
+ 4
495
+ input246� 2 layers.27.input_layernorm.weight8
496
+ @
497
+ input247�� 2*layers.27.self_attn.qkv_proj.k_proj.weight8
498
+ @
499
+ input248�� 2*layers.27.self_attn.qkv_proj.q_proj.weight8
500
+ 4
501
+ input249� �82layers.27.mlp.down_proj.weight8
502
+ 2
503
+ input250�8� 2layers.27.mlp.up_proj.weight8
504
+ =
505
+ input251� 2)layers.27.post_attention_layernorm.weight8
506
+ 4
507
+ input252�8� 2layers.27.mlp.gate_proj.weight8
508
+ >
509
+ input253� �2(layers.28.self_attn.o_proj.o_proj.weight8
510
+ @
511
+ input254�� 2*layers.28.self_attn.qkv_proj.v_proj.weight8
512
+ 4
513
+ input255� 2 layers.28.input_layernorm.weight8
514
+ @
515
+ input256�� 2*layers.28.self_attn.qkv_proj.k_proj.weight8
516
+ @
517
+ input257�� 2*layers.28.self_attn.qkv_proj.q_proj.weight8
518
+ 4
519
+ input258� �82layers.28.mlp.down_proj.weight8
520
+ 2
521
+ input259�8� 2layers.28.mlp.up_proj.weight8
522
+ =
523
+ input260� 2)layers.28.post_attention_layernorm.weight8
524
+ 4
525
+ input261�8� 2layers.28.mlp.gate_proj.weight8
526
+ >
527
+ input262� �2(layers.29.self_attn.o_proj.o_proj.weight8
528
+ @
529
+ input263�� 2*layers.29.self_attn.qkv_proj.v_proj.weight8
530
+ 4
531
+ input264� 2 layers.29.input_layernorm.weight8
532
+ @
533
+ input265�� 2*layers.29.self_attn.qkv_proj.k_proj.weight8
534
+ @
535
+ input266�� 2*layers.29.self_attn.qkv_proj.q_proj.weight8
536
+ 4
537
+ input267� �82layers.29.mlp.down_proj.weight8
538
+ 2
539
+ input268�8� 2layers.29.mlp.up_proj.weight8
540
+ =
541
+ input269� 2)layers.29.post_attention_layernorm.weight8
542
+ 4
543
+ input270�8� 2layers.29.mlp.gate_proj.weight8
544
+ >
545
+ input271� �2(layers.30.self_attn.o_proj.o_proj.weight8
546
+ @
547
+ input272�� 2*layers.30.self_attn.qkv_proj.v_proj.weight8
548
+ 4
549
+ input273� 2 layers.30.input_layernorm.weight8
550
+ @
551
+ input274�� 2*layers.30.self_attn.qkv_proj.k_proj.weight8
552
+ @
553
+ input275�� 2*layers.30.self_attn.qkv_proj.q_proj.weight8
554
+ 4
555
+ input276� �82layers.30.mlp.down_proj.weight8
556
+ 2
557
+ input277�8� 2layers.30.mlp.up_proj.weight8
558
+ =
559
+ input278� 2)layers.30.post_attention_layernorm.weight8
560
+ 4
561
+ input279�8� 2layers.30.mlp.gate_proj.weight8
562
+ >
563
+ input280� �2(layers.31.self_attn.o_proj.o_proj.weight8
564
+ @
565
+ input281�� 2*layers.31.self_attn.qkv_proj.v_proj.weight8
566
+ 4
567
+ input282� 2 layers.31.input_layernorm.weight8
568
+ @
569
+ input283�� 2*layers.31.self_attn.qkv_proj.k_proj.weight8
570
+ @
571
+ input284�� 2*layers.31.self_attn.qkv_proj.q_proj.weight8
572
+ 4
573
+ input285� �82layers.31.mlp.down_proj.weight8
574
+ 2
575
+ input286�8� 2layers.31.mlp.up_proj.weight8
576
+ =
577
+ input287� 2)layers.31.post_attention_layernorm.weight8
578
+ 4
579
+ input288�8� 2layers.31.mlp.gate_proj.weight8
580
+ %
581
+ input289��� 2lm_head.weight8
582
+ 
583
+ input290� 2 norm.weight8'
584
+ output0���2embed_tokens.weight>
585
+ output1��2'layers.0.self_attn.o_proj.o_proj.weight>
586
+ output2� �2)layers.0.self_attn.qkv_proj.v_proj.weight1
587
+ output3� 2layers.0.input_layernorm.weight>
588
+ output4� @2)layers.0.self_attn.qkv_proj.k_proj.weight?
589
+ output5� @2)layers.0.self_attn.qkv_proj.q_proj.weight3
590
+ output6 ��2layers.0.mlp.down_proj.weight0
591
+ output78� �2layers.0.mlp.up_proj.weight:
592
+ output8� 2(layers.0.post_attention_layernorm.weight2
593
+ output98� �2layers.0.mlp.gate_proj.weight?
594
+ output10��2'layers.1.self_attn.o_proj.o_proj.weight?
595
+ output11� �2)layers.1.self_attn.qkv_proj.v_proj.weight2
596
+ output12� 2layers.1.input_layernorm.weight?
597
+ output13� @2)layers.1.self_attn.qkv_proj.k_proj.weight@
598
+ output14� @2)layers.1.self_attn.qkv_proj.q_proj.weight4
599
+ output15 ��2layers.1.mlp.down_proj.weight1
600
+ output168� �2layers.1.mlp.up_proj.weight;
601
+ output17� 2(layers.1.post_attention_layernorm.weight3
602
+ output188� �2layers.1.mlp.gate_proj.weight?
603
+ output19��2'layers.2.self_attn.o_proj.o_proj.weight?
604
+ output20� �2)layers.2.self_attn.qkv_proj.v_proj.weight2
605
+ output21� 2layers.2.input_layernorm.weight?
606
+ output22� @2)layers.2.self_attn.qkv_proj.k_proj.weight@
607
+ output23� @2)layers.2.self_attn.qkv_proj.q_proj.weight4
608
+ output24 ��2layers.2.mlp.down_proj.weight1
609
+ output258� �2layers.2.mlp.up_proj.weight;
610
+ output26� 2(layers.2.post_attention_layernorm.weight3
611
+ output278� �2layers.2.mlp.gate_proj.weight?
612
+ output28��2'layers.3.self_attn.o_proj.o_proj.weight?
613
+ output29� �2)layers.3.self_attn.qkv_proj.v_proj.weight2
614
+ output30� 2layers.3.input_layernorm.weight?
615
+ output31� @2)layers.3.self_attn.qkv_proj.k_proj.weight@
616
+ output32� @2)layers.3.self_attn.qkv_proj.q_proj.weight4
617
+ output33 ��2layers.3.mlp.down_proj.weight1
618
+ output348� �2layers.3.mlp.up_proj.weight;
619
+ output35� 2(layers.3.post_attention_layernorm.weight3
620
+ output368� �2layers.3.mlp.gate_proj.weight?
621
+ output37��2'layers.4.self_attn.o_proj.o_proj.weight?
622
+ output38� �2)layers.4.self_attn.qkv_proj.v_proj.weight2
623
+ output39� 2layers.4.input_layernorm.weight?
624
+ output40� @2)layers.4.self_attn.qkv_proj.k_proj.weight@
625
+ output41� @2)layers.4.self_attn.qkv_proj.q_proj.weight4
626
+ output42 ��2layers.4.mlp.down_proj.weight1
627
+ output438� �2layers.4.mlp.up_proj.weight;
628
+ output44� 2(layers.4.post_attention_layernorm.weight3
629
+ output458� �2layers.4.mlp.gate_proj.weight?
630
+ output46��2'layers.5.self_attn.o_proj.o_proj.weight?
631
+ output47� �2)layers.5.self_attn.qkv_proj.v_proj.weight2
632
+ output48� 2layers.5.input_layernorm.weight?
633
+ output49� @2)layers.5.self_attn.qkv_proj.k_proj.weight@
634
+ output50� @2)layers.5.self_attn.qkv_proj.q_proj.weight4
635
+ output51 ��2layers.5.mlp.down_proj.weight1
636
+ output528� �2layers.5.mlp.up_proj.weight;
637
+ output53� 2(layers.5.post_attention_layernorm.weight3
638
+ output548� �2layers.5.mlp.gate_proj.weight?
639
+ output55��2'layers.6.self_attn.o_proj.o_proj.weight?
640
+ output56� �2)layers.6.self_attn.qkv_proj.v_proj.weight2
641
+ output57� 2layers.6.input_layernorm.weight?
642
+ output58� @2)layers.6.self_attn.qkv_proj.k_proj.weight@
643
+ output59� @2)layers.6.self_attn.qkv_proj.q_proj.weight4
644
+ output60 ��2layers.6.mlp.down_proj.weight1
645
+ output618� �2layers.6.mlp.up_proj.weight;
646
+ output62� 2(layers.6.post_attention_layernorm.weight3
647
+ output638� �2layers.6.mlp.gate_proj.weight?
648
+ output64��2'layers.7.self_attn.o_proj.o_proj.weight?
649
+ output65� �2)layers.7.self_attn.qkv_proj.v_proj.weight2
650
+ output66� 2layers.7.input_layernorm.weight?
651
+ output67� @2)layers.7.self_attn.qkv_proj.k_proj.weight@
652
+ output68� @2)layers.7.self_attn.qkv_proj.q_proj.weight4
653
+ output69 ��2layers.7.mlp.down_proj.weight1
654
+ output708� �2layers.7.mlp.up_proj.weight;
655
+ output71� 2(layers.7.post_attention_layernorm.weight3
656
+ output728� �2layers.7.mlp.gate_proj.weight?
657
+ output73��2'layers.8.self_attn.o_proj.o_proj.weight?
658
+ output74� �2)layers.8.self_attn.qkv_proj.v_proj.weight2
659
+ output75� 2layers.8.input_layernorm.weight?
660
+ output76� @2)layers.8.self_attn.qkv_proj.k_proj.weight@
661
+ output77� @2)layers.8.self_attn.qkv_proj.q_proj.weight4
662
+ output78 ��2layers.8.mlp.down_proj.weight1
663
+ output798� �2layers.8.mlp.up_proj.weight;
664
+ output80� 2(layers.8.post_attention_layernorm.weight3
665
+ output818� �2layers.8.mlp.gate_proj.weight?
666
+ output82��2'layers.9.self_attn.o_proj.o_proj.weight?
667
+ output83� �2)layers.9.self_attn.qkv_proj.v_proj.weight2
668
+ output84� 2layers.9.input_layernorm.weight?
669
+ output85� @2)layers.9.self_attn.qkv_proj.k_proj.weight@
670
+ output86� @2)layers.9.self_attn.qkv_proj.q_proj.weight4
671
+ output87 ��2layers.9.mlp.down_proj.weight1
672
+ output888� �2layers.9.mlp.up_proj.weight;
673
+ output89� 2(layers.9.post_attention_layernorm.weight3
674
+ output908� �2layers.9.mlp.gate_proj.weight@
675
+ output91��2(layers.10.self_attn.o_proj.o_proj.weight@
676
+ output92� �2*layers.10.self_attn.qkv_proj.v_proj.weight3
677
+ output93� 2 layers.10.input_layernorm.weight@
678
+ output94� @2*layers.10.self_attn.qkv_proj.k_proj.weightA
679
+ output95� @2*layers.10.self_attn.qkv_proj.q_proj.weight5
680
+ output96 ��2layers.10.mlp.down_proj.weight2
681
+ output978� �2layers.10.mlp.up_proj.weight<
682
+ output98� 2)layers.10.post_attention_layernorm.weight4
683
+ output998� �2layers.10.mlp.gate_proj.weightA
684
+ output100��2(layers.11.self_attn.o_proj.o_proj.weightA
685
+ output101� �2*layers.11.self_attn.qkv_proj.v_proj.weight4
686
+ output102� 2 layers.11.input_layernorm.weightA
687
+ output103� @2*layers.11.self_attn.qkv_proj.k_proj.weightB
688
+ output104� @2*layers.11.self_attn.qkv_proj.q_proj.weight6
689
+ output105 ��2layers.11.mlp.down_proj.weight3
690
+ output1068� �2layers.11.mlp.up_proj.weight=
691
+ output107� 2)layers.11.post_attention_layernorm.weight5
692
+ output1088� �2layers.11.mlp.gate_proj.weightA
693
+ output109��2(layers.12.self_attn.o_proj.o_proj.weightA
694
+ output110� �2*layers.12.self_attn.qkv_proj.v_proj.weight4
695
+ output111� 2 layers.12.input_layernorm.weightA
696
+ output112� @2*layers.12.self_attn.qkv_proj.k_proj.weightB
697
+ output113� @2*layers.12.self_attn.qkv_proj.q_proj.weight6
698
+ output114 ��2layers.12.mlp.down_proj.weight3
699
+ output1158� �2layers.12.mlp.up_proj.weight=
700
+ output116� 2)layers.12.post_attention_layernorm.weight5
701
+ output1178� �2layers.12.mlp.gate_proj.weightA
702
+ output118��2(layers.13.self_attn.o_proj.o_proj.weightA
703
+ output119� �2*layers.13.self_attn.qkv_proj.v_proj.weight4
704
+ output120� 2 layers.13.input_layernorm.weightA
705
+ output121� @2*layers.13.self_attn.qkv_proj.k_proj.weightB
706
+ output122� @2*layers.13.self_attn.qkv_proj.q_proj.weight6
707
+ output123 ��2layers.13.mlp.down_proj.weight3
708
+ output1248� �2layers.13.mlp.up_proj.weight=
709
+ output125� 2)layers.13.post_attention_layernorm.weight5
710
+ output1268� �2layers.13.mlp.gate_proj.weightA
711
+ output127��2(layers.14.self_attn.o_proj.o_proj.weightA
712
+ output128� �2*layers.14.self_attn.qkv_proj.v_proj.weight4
713
+ output129� 2 layers.14.input_layernorm.weightA
714
+ output130� @2*layers.14.self_attn.qkv_proj.k_proj.weightB
715
+ output131� @2*layers.14.self_attn.qkv_proj.q_proj.weight6
716
+ output132 ��2layers.14.mlp.down_proj.weight3
717
+ output1338� �2layers.14.mlp.up_proj.weight=
718
+ output134� 2)layers.14.post_attention_layernorm.weight5
719
+ output1358� �2layers.14.mlp.gate_proj.weightA
720
+ output136��2(layers.15.self_attn.o_proj.o_proj.weightA
721
+ output137� �2*layers.15.self_attn.qkv_proj.v_proj.weight4
722
+ output138� 2 layers.15.input_layernorm.weightA
723
+ output139� @2*layers.15.self_attn.qkv_proj.k_proj.weightB
724
+ output140� @2*layers.15.self_attn.qkv_proj.q_proj.weight6
725
+ output141 ��2layers.15.mlp.down_proj.weight3
726
+ output1428� �2layers.15.mlp.up_proj.weight=
727
+ output143� 2)layers.15.post_attention_layernorm.weight5
728
+ output1448� �2layers.15.mlp.gate_proj.weightA
729
+ output145��2(layers.16.self_attn.o_proj.o_proj.weightA
730
+ output146� �2*layers.16.self_attn.qkv_proj.v_proj.weight4
731
+ output147� 2 layers.16.input_layernorm.weightA
732
+ output148� @2*layers.16.self_attn.qkv_proj.k_proj.weightB
733
+ output149� @2*layers.16.self_attn.qkv_proj.q_proj.weight6
734
+ output150 ��2layers.16.mlp.down_proj.weight3
735
+ output1518� �2layers.16.mlp.up_proj.weight=
736
+ output152� 2)layers.16.post_attention_layernorm.weight5
737
+ output1538� �2layers.16.mlp.gate_proj.weightA
738
+ output154��2(layers.17.self_attn.o_proj.o_proj.weightA
739
+ output155� �2*layers.17.self_attn.qkv_proj.v_proj.weight4
740
+ output156� 2 layers.17.input_layernorm.weightA
741
+ output157� @2*layers.17.self_attn.qkv_proj.k_proj.weightB
742
+ output158� @2*layers.17.self_attn.qkv_proj.q_proj.weight6
743
+ output159 ��2layers.17.mlp.down_proj.weight3
744
+ output1608� �2layers.17.mlp.up_proj.weight=
745
+ output161� 2)layers.17.post_attention_layernorm.weight5
746
+ output1628� �2layers.17.mlp.gate_proj.weightA
747
+ output163��2(layers.18.self_attn.o_proj.o_proj.weightA
748
+ output164� �2*layers.18.self_attn.qkv_proj.v_proj.weight4
749
+ output165� 2 layers.18.input_layernorm.weightA
750
+ output166� @2*layers.18.self_attn.qkv_proj.k_proj.weightB
751
+ output167� @2*layers.18.self_attn.qkv_proj.q_proj.weight6
752
+ output168 ��2layers.18.mlp.down_proj.weight3
753
+ output1698� �2layers.18.mlp.up_proj.weight=
754
+ output170� 2)layers.18.post_attention_layernorm.weight5
755
+ output1718� �2layers.18.mlp.gate_proj.weightA
756
+ output172��2(layers.19.self_attn.o_proj.o_proj.weightA
757
+ output173� �2*layers.19.self_attn.qkv_proj.v_proj.weight4
758
+ output174� 2 layers.19.input_layernorm.weightA
759
+ output175� @2*layers.19.self_attn.qkv_proj.k_proj.weightB
760
+ output176� @2*layers.19.self_attn.qkv_proj.q_proj.weight6
761
+ output177 ��2layers.19.mlp.down_proj.weight3
762
+ output1788� �2layers.19.mlp.up_proj.weight=
763
+ output179� 2)layers.19.post_attention_layernorm.weight5
764
+ output1808� �2layers.19.mlp.gate_proj.weightA
765
+ output181��2(layers.20.self_attn.o_proj.o_proj.weightA
766
+ output182� �2*layers.20.self_attn.qkv_proj.v_proj.weight4
767
+ output183� 2 layers.20.input_layernorm.weightA
768
+ output184� @2*layers.20.self_attn.qkv_proj.k_proj.weightB
769
+ output185� @2*layers.20.self_attn.qkv_proj.q_proj.weight6
770
+ output186 ��2layers.20.mlp.down_proj.weight3
771
+ output1878� �2layers.20.mlp.up_proj.weight=
772
+ output188� 2)layers.20.post_attention_layernorm.weight5
773
+ output1898� �2layers.20.mlp.gate_proj.weightA
774
+ output190��2(layers.21.self_attn.o_proj.o_proj.weightA
775
+ output191� �2*layers.21.self_attn.qkv_proj.v_proj.weight4
776
+ output192� 2 layers.21.input_layernorm.weightA
777
+ output193� @2*layers.21.self_attn.qkv_proj.k_proj.weightB
778
+ output194� @2*layers.21.self_attn.qkv_proj.q_proj.weight6
779
+ output195 ��2layers.21.mlp.down_proj.weight3
780
+ output1968� �2layers.21.mlp.up_proj.weight=
781
+ output197� 2)layers.21.post_attention_layernorm.weight5
782
+ output1988� �2layers.21.mlp.gate_proj.weightA
783
+ output199��2(layers.22.self_attn.o_proj.o_proj.weightA
784
+ output200� �2*layers.22.self_attn.qkv_proj.v_proj.weight4
785
+ output201� 2 layers.22.input_layernorm.weightA
786
+ output202� @2*layers.22.self_attn.qkv_proj.k_proj.weightB
787
+ output203� @2*layers.22.self_attn.qkv_proj.q_proj.weight6
788
+ output204 ��2layers.22.mlp.down_proj.weight3
789
+ output2058� �2layers.22.mlp.up_proj.weight=
790
+ output206� 2)layers.22.post_attention_layernorm.weight5
791
+ output2078� �2layers.22.mlp.gate_proj.weightA
792
+ output208��2(layers.23.self_attn.o_proj.o_proj.weightA
793
+ output209� �2*layers.23.self_attn.qkv_proj.v_proj.weight4
794
+ output210� 2 layers.23.input_layernorm.weightA
795
+ output211� @2*layers.23.self_attn.qkv_proj.k_proj.weightB
796
+ output212� @2*layers.23.self_attn.qkv_proj.q_proj.weight6
797
+ output213 ��2layers.23.mlp.down_proj.weight3
798
+ output2148� �2layers.23.mlp.up_proj.weight=
799
+ output215� 2)layers.23.post_attention_layernorm.weight5
800
+ output2168� �2layers.23.mlp.gate_proj.weightA
801
+ output217��2(layers.24.self_attn.o_proj.o_proj.weightA
802
+ output218� �2*layers.24.self_attn.qkv_proj.v_proj.weight4
803
+ output219� 2 layers.24.input_layernorm.weightA
804
+ output220� @2*layers.24.self_attn.qkv_proj.k_proj.weightB
805
+ output221� @2*layers.24.self_attn.qkv_proj.q_proj.weight6
806
+ output222 ��2layers.24.mlp.down_proj.weight3
807
+ output2238� �2layers.24.mlp.up_proj.weight=
808
+ output224� 2)layers.24.post_attention_layernorm.weight5
809
+ output2258� �2layers.24.mlp.gate_proj.weightA
810
+ output226��2(layers.25.self_attn.o_proj.o_proj.weightA
811
+ output227� �2*layers.25.self_attn.qkv_proj.v_proj.weight4
812
+ output228� 2 layers.25.input_layernorm.weightA
813
+ output229� @2*layers.25.self_attn.qkv_proj.k_proj.weightB
814
+ output230� @2*layers.25.self_attn.qkv_proj.q_proj.weight6
815
+ output231 ��2layers.25.mlp.down_proj.weight3
816
+ output2328� �2layers.25.mlp.up_proj.weight=
817
+ output233� 2)layers.25.post_attention_layernorm.weight5
818
+ output2348� �2layers.25.mlp.gate_proj.weightA
819
+ output235��2(layers.26.self_attn.o_proj.o_proj.weightA
820
+ output236� �2*layers.26.self_attn.qkv_proj.v_proj.weight4
821
+ output237� 2 layers.26.input_layernorm.weightA
822
+ output238� @2*layers.26.self_attn.qkv_proj.k_proj.weightB
823
+ output239� @2*layers.26.self_attn.qkv_proj.q_proj.weight6
824
+ output240 ��2layers.26.mlp.down_proj.weight3
825
+ output2418� �2layers.26.mlp.up_proj.weight=
826
+ output242� 2)layers.26.post_attention_layernorm.weight5
827
+ output2438� �2layers.26.mlp.gate_proj.weightA
828
+ output244��2(layers.27.self_attn.o_proj.o_proj.weightA
829
+ output245� �2*layers.27.self_attn.qkv_proj.v_proj.weight4
830
+ output246� 2 layers.27.input_layernorm.weightA
831
+ output247� @2*layers.27.self_attn.qkv_proj.k_proj.weightB
832
+ output248� @2*layers.27.self_attn.qkv_proj.q_proj.weight6
833
+ output249 ��2layers.27.mlp.down_proj.weight3
834
+ output2508� �2layers.27.mlp.up_proj.weight=
835
+ output251� 2)layers.27.post_attention_layernorm.weight5
836
+ output2528� �2layers.27.mlp.gate_proj.weightA
837
+ output253��2(layers.28.self_attn.o_proj.o_proj.weightA
838
+ output254� �2*layers.28.self_attn.qkv_proj.v_proj.weight4
839
+ output255� 2 layers.28.input_layernorm.weightA
840
+ output256� @2*layers.28.self_attn.qkv_proj.k_proj.weightB
841
+ output257� @2*layers.28.self_attn.qkv_proj.q_proj.weight6
842
+ output258 ��2layers.28.mlp.down_proj.weight3
843
+ output2598� �2layers.28.mlp.up_proj.weight=
844
+ output260� 2)layers.28.post_attention_layernorm.weight5
845
+ output2618� �2layers.28.mlp.gate_proj.weightA
846
+ output262��2(layers.29.self_attn.o_proj.o_proj.weightA
847
+ output263� �2*layers.29.self_attn.qkv_proj.v_proj.weight4
848
+ output264� 2 layers.29.input_layernorm.weightA
849
+ output265� @2*layers.29.self_attn.qkv_proj.k_proj.weightB
850
+ output266� @2*layers.29.self_attn.qkv_proj.q_proj.weight6
851
+ output267 ��2layers.29.mlp.down_proj.weight3
852
+ output2688� �2layers.29.mlp.up_proj.weight=
853
+ output269� 2)layers.29.post_attention_layernorm.weight5
854
+ output2708� �2layers.29.mlp.gate_proj.weightA
855
+ output271��2(layers.30.self_attn.o_proj.o_proj.weightA
856
+ output272� �2*layers.30.self_attn.qkv_proj.v_proj.weight4
857
+ output273� 2 layers.30.input_layernorm.weightA
858
+ output274� @2*layers.30.self_attn.qkv_proj.k_proj.weightB
859
+ output275� @2*layers.30.self_attn.qkv_proj.q_proj.weight6
860
+ output276 ��2layers.30.mlp.down_proj.weight3
861
+ output2778� �2layers.30.mlp.up_proj.weight=
862
+ output278� 2)layers.30.post_attention_layernorm.weight5
863
+ output2798� �2layers.30.mlp.gate_proj.weightA
864
+ output280��2(layers.31.self_attn.o_proj.o_proj.weightA
865
+ output281� �2*layers.31.self_attn.qkv_proj.v_proj.weight4
866
+ output282� 2 layers.31.input_layernorm.weightA
867
+ output283� @2*layers.31.self_attn.qkv_proj.k_proj.weightB
868
+ output284� @2*layers.31.self_attn.qkv_proj.q_proj.weight6
869
+ output285 ��2layers.31.mlp.down_proj.weight3
870
+ output2868� �2layers.31.mlp.up_proj.weight=
871
+ output287� 2)layers.31.post_attention_layernorm.weight5
872
+ output2888� �2layers.31.mlp.gate_proj.weight&
873
+ output289�� �2lm_head.weight
874
+ output290� 2 norm.weight
layout_opt/model/graph.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a76fc5f3f76d1d69d57e0e784721bafd07e3a61734f6594e8c815123a8a771
3
+ size 176877
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db6fd0c1be612908d3c6ede5cdedda302359d5279c7d078eaadcb48d17389030
3
+ size 53720651
token_generation_model/_tp0_bk0/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f084665c9d486b682a226970bf7ab5170c50859a2f0cc8e46fee1811b6421349
3
+ size 5612544
token_generation_model/_tp0_bk0/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db597e62ef7dd0b4be31a941ce01bedf8ff4e3e418a571d927191d0fe1ac7749
3
+ size 823209
token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b572d17843a963e8042dbdafc202058bb0d10fd3f7ce91e3f20bc1db70324d7
3
+ size 802071
token_generation_model/_tp0_bk0/model.MODULE_67d3774d5bacfe6ba851+72d461cc.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f084665c9d486b682a226970bf7ab5170c50859a2f0cc8e46fee1811b6421349
3
+ size 5612544
token_generation_model/_tp0_bk0/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f156b82b448a0ce64eea8895c40e4e50c9f548dea673deb2d7d240fc9df9fe8
3
+ size 5786483
token_generation_model/_tp0_bk1/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be078d7f3a246715b61da99d6878d44882d85a11eaa1685534babf11cde59e9
3
+ size 5684224
token_generation_model/_tp0_bk1/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615f3bac52a53bbac64e539d918447282af0cafb90b1e95367b365bbeead8e67
3
+ size 822474
token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1806c4248848e0be8635c95d728fa881fc2015e31447ec893a0beeb8b9509d
3
+ size 889786
token_generation_model/_tp0_bk1/model.MODULE_92bbfea7801df2fea75e+4948da29.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be078d7f3a246715b61da99d6878d44882d85a11eaa1685534babf11cde59e9
3
+ size 5684224
token_generation_model/_tp0_bk2/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29908d241bdaf407bcb11c682477667cf52ec74ad12cdf8d715dc83bd83a5cbe
3
+ size 5766144
token_generation_model/_tp0_bk2/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b42bbad04ce54008f13f9aa4f7bc225fbf166e7956990acd2074f725ca8e6c9b
3
+ size 822474
token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c389f0365aeb0a927dc422d904d7ad06f1694d48b6db93e81f9c03a07b7cfc
3
+ size 889786
token_generation_model/_tp0_bk2/model.MODULE_2f686dc6ba7ef3326a56+6113de8c.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29908d241bdaf407bcb11c682477667cf52ec74ad12cdf8d715dc83bd83a5cbe
3
+ size 5766144
token_generation_model/_tp0_bk3/graph.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c34bc7faf28217f485f87fb2c1965b9511b78ff793098e760345b19c84e6079
3
+ size 5970944
token_generation_model/_tp0_bk3/metaneff.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dccc9c15899011ced61b37b01373a7c26af06476601e88b3b3130f496e557b
3
+ size 822474