dacorvo HF Staff commited on
Commit
cfbafb7
·
verified ·
1 Parent(s): 3f311cc

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +161 -0
  2. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json +79 -0
  3. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json +79 -0
  4. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json +79 -0
  5. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json +51 -0
  6. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json +77 -0
  7. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json +77 -0
  8. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json +77 -0
  9. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json +77 -0
  10. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json +77 -0
  11. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json +77 -0
  12. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json +77 -0
  13. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json +77 -0
  14. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json +77 -0
  15. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json +77 -0
  16. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json +77 -0
  17. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json +77 -0
  18. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json +77 -0
  19. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json +77 -0
  20. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json +77 -0
  21. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json +77 -0
  22. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json +77 -0
  23. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json +77 -0
  24. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json +77 -0
  25. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json +55 -0
  26. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json +77 -0
  27. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json +77 -0
  28. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json +77 -0
  29. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json +77 -0
  30. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json +78 -0
  31. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json +78 -0
  32. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json +78 -0
  33. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json +78 -0
  34. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json +78 -0
  35. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json +78 -0
  36. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json +78 -0
  37. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json +78 -0
  38. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json +80 -0
  39. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json +56 -0
  40. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json +78 -0
  41. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json +80 -0
  42. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json +78 -0
  43. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json +80 -0
  44. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json +55 -0
  45. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json +49 -0
  46. neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/compile_flags.json +1 -0
  47. neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.done +0 -0
  48. neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.hlo_module.pb +3 -0
  49. neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff +3 -0
  50. neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json +1 -0
.gitattributes CHANGED
@@ -8047,3 +8047,164 @@ neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.
8047
  neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8048
  neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8049
  neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8047
  neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8048
  neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8049
  neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8050
+ neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8051
+ neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8052
+ neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8053
+ neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8054
+ neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
8055
+ neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8056
+ neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8057
+ neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8058
+ neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8059
+ neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8060
+ neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8061
+ neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8062
+ neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8063
+ neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8064
+ neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8065
+ neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8066
+ neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8067
+ neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8068
+ neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8069
+ neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8070
+ neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8071
+ neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8072
+ neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8073
+ neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8074
+ neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8075
+ neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8076
+ neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8077
+ neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8078
+ neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8079
+ neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8080
+ neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8081
+ neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8082
+ neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8083
+ neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8084
+ neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8085
+ neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8086
+ neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8087
+ neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8088
+ neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8089
+ neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8090
+ neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8091
+ neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8092
+ neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
8093
+ neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8094
+ neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8095
+ neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8096
+ neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8097
+ neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8098
+ neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8099
+ neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8100
+ neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8101
+ neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8102
+ neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8103
+ neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8104
+ neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8105
+ neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8106
+ neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8107
+ neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8108
+ neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8109
+ neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8110
+ neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8111
+ neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8112
+ neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8113
+ neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8114
+ neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8115
+ neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8116
+ neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8117
+ neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8118
+ neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8119
+ neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8120
+ neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8121
+ neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8122
+ neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
8123
+ neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8124
+ neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8125
+ neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8126
+ neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8127
+ neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8128
+ neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8129
+ neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8130
+ neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8131
+ neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text
8132
+ neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8133
+ neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8134
+ neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8135
+ neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8136
+ neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8137
+ neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8138
+ neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8139
+ neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8140
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8141
+ neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8142
+ neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8143
+ neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8144
+ neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8145
+ neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8146
+ neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8147
+ neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8148
+ neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8149
+ neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8150
+ neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8151
+ neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8152
+ neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8153
+ neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8154
+ neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8155
+ neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8156
+ neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8157
+ neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8158
+ neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8159
+ neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8160
+ neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8161
+ neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8162
+ neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8163
+ neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8164
+ neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8165
+ neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8166
+ neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8167
+ neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8168
+ neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8169
+ neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8170
+ neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8171
+ neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8172
+ neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8173
+ neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8174
+ neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8175
+ neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8176
+ neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8177
+ neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8178
+ neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8179
+ neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
8180
+ neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text
8181
+ neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8182
+ neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8183
+ neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8184
+ neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8185
+ neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8186
+ neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8187
+ neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8188
+ neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8189
+ neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8190
+ neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8191
+ neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8192
+ neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8193
+ neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8194
+ neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8195
+ neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8196
+ neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8197
+ neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
8198
+ neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8199
+ neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
8200
+ neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8201
+ neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8202
+ neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8203
+ neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
8204
+ neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8205
+ neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8206
+ neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
8207
+ neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
8208
+ neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8209
+ neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
8210
+ neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "ctx_batch_size": 1,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 1,
38
+ "max_context_length": 100,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 100,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev4",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "bfloat16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 1,
59
+ "torch_dtype": "bfloat16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 8.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 128256
79
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "ctx_batch_size": 2,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 2,
38
+ "max_context_length": 100,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 100,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev4",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 2,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 8.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 128256
79
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "ctx_batch_size": 1,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 1,
38
+ "max_context_length": 100,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 100,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev4",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 100,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 1,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 4,
64
+ "num_hidden_layers": 2,
65
+ "num_key_value_heads": 4,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 8.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 128256
79
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "ibm-granite/granite-3.1-2b-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.1,
10
+ "attention_multiplier": 0.015625,
11
+ "embedding_multiplier": 12.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "logits_scaling": 8.0,
17
+ "max_position_embeddings": 131072,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "bf16",
27
+ "batch_size": 4,
28
+ "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
29
+ "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": true,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": "shard-over-heads",
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.2.0.dev5",
37
+ "output_all_logits": false,
38
+ "sequence_length": 4096,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 32,
42
+ "num_hidden_layers": 40,
43
+ "num_key_value_heads": 8,
44
+ "residual_multiplier": 0.22,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 5000000.0,
48
+ "tie_word_embeddings": true,
49
+ "use_cache": true,
50
+ "vocab_size": 49155
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 8,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 8,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 8192,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 28672,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
26
+ "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 24,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 24,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 64,
62
+ "num_hidden_layers": 80,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 131072,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 131072,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 131072,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 131072,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 131072,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 131072,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 2,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
26
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 2,
37
+ "max_context_length": 100,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 100,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 100,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 4,
62
+ "num_hidden_layers": 2,
63
+ "num_key_value_heads": 4,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 16,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 16,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 64,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 64,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 8,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 8,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "bf16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
27
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "shard-over-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 4096,
37
+ "tp_degree": 8
38
+ },
39
+ "num_attention_heads": 32,
40
+ "num_hidden_layers": 32,
41
+ "num_key_value_heads": 8,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 16,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 16,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 8,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 8,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "meta-llama/Meta-Llama-3.1-8B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 14336,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 32,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
26
+ "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 8,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 32,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 8,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 32,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 8.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": false,
75
+ "use_cache": true,
76
+ "vocab_size": 128256
77
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": null,
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 5,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": false,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": false,
28
+ "ctx_batch_size": 4,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 4,
38
+ "max_context_length": 4096,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 4096,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev5",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 4096,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 4,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 16,
65
+ "num_key_value_heads": 8,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 32.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": true,
77
+ "unsloth_fixed": true,
78
+ "use_cache": true,
79
+ "vocab_size": 128256
80
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 4,
26
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
27
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": true,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "shard-over-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 4096,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 32,
40
+ "num_hidden_layers": 16,
41
+ "num_key_value_heads": 8,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 32.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": true,
53
+ "unsloth_fixed": true,
54
+ "use_cache": true,
55
+ "vocab_size": 128256
56
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": true,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 4,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "float16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "float16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "ctx_batch_size": 1,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 4,
38
+ "max_context_length": 4096,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 4096,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev5",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 4096,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 4,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 16,
65
+ "num_key_value_heads": 8,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 32.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": true,
77
+ "unsloth_fixed": true,
78
+ "use_cache": true,
79
+ "vocab_size": 128256
80
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 1,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": null,
26
+ "checkpoint_revision": null,
27
+ "continuous_batching": false,
28
+ "enable_bucketing": false,
29
+ "ep_degree": 1,
30
+ "flash_decoding_enabled": false,
31
+ "fused_qkv": false,
32
+ "glu_mlp": true,
33
+ "is_chunked_prefill": false,
34
+ "local_ranks_size": 2,
35
+ "logical_nc_config": 1,
36
+ "max_batch_size": 1,
37
+ "max_context_length": 4096,
38
+ "max_topk": 256,
39
+ "mlp_kernel_enabled": false,
40
+ "mlp_kernel_fuse_residual_add": false,
41
+ "n_active_tokens": 4096,
42
+ "neuronxcc_version": "2.17.194.0+d312836f",
43
+ "num_cores_per_group": 1,
44
+ "on_device_sampling": true,
45
+ "optimum_neuron_version": "0.2.0.dev5",
46
+ "output_logits": false,
47
+ "padding_side": "right",
48
+ "pp_degree": 1,
49
+ "qk_layernorm": false,
50
+ "qkv_kernel_enabled": false,
51
+ "rpl_reduce_dtype": "bfloat16",
52
+ "sequence_length": 4096,
53
+ "sequence_parallel_enabled": false,
54
+ "speculation_length": 0,
55
+ "start_rank_id": 0,
56
+ "target": null,
57
+ "torch_dtype": "bfloat16",
58
+ "tp_degree": 2,
59
+ "vocab_parallel": false
60
+ },
61
+ "num_attention_heads": 32,
62
+ "num_hidden_layers": 16,
63
+ "num_key_value_heads": 8,
64
+ "pretraining_tp": 1,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_scaling": {
67
+ "factor": 32.0,
68
+ "high_freq_factor": 4.0,
69
+ "low_freq_factor": 1.0,
70
+ "original_max_position_embeddings": 8192,
71
+ "rope_type": "llama3"
72
+ },
73
+ "rope_theta": 500000.0,
74
+ "tie_word_embeddings": true,
75
+ "unsloth_fixed": true,
76
+ "use_cache": true,
77
+ "vocab_size": 128256
78
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "unsloth/Llama-3.2-1B-Instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "NxDNeuronConfig",
20
+ "async_mode": false,
21
+ "attn_kernel_enabled": false,
22
+ "batch_size": 4,
23
+ "capacity_factor": null,
24
+ "cc_pipeline_tiling_factor": 2,
25
+ "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
26
+ "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
27
+ "continuous_batching": true,
28
+ "ctx_batch_size": 4,
29
+ "enable_bucketing": false,
30
+ "ep_degree": 1,
31
+ "flash_decoding_enabled": false,
32
+ "fused_qkv": true,
33
+ "glu_mlp": true,
34
+ "is_chunked_prefill": false,
35
+ "local_ranks_size": 2,
36
+ "logical_nc_config": 1,
37
+ "max_batch_size": 4,
38
+ "max_context_length": 4096,
39
+ "max_topk": 256,
40
+ "mlp_kernel_enabled": false,
41
+ "mlp_kernel_fuse_residual_add": false,
42
+ "n_active_tokens": 4096,
43
+ "neuronxcc_version": "2.17.194.0+d312836f",
44
+ "num_cores_per_group": 1,
45
+ "on_device_sampling": true,
46
+ "optimum_neuron_version": "0.2.0.dev5",
47
+ "output_logits": false,
48
+ "padding_side": "right",
49
+ "pp_degree": 1,
50
+ "qk_layernorm": false,
51
+ "qkv_kernel_enabled": false,
52
+ "rpl_reduce_dtype": "float16",
53
+ "sequence_length": 4096,
54
+ "sequence_parallel_enabled": false,
55
+ "speculation_length": 0,
56
+ "start_rank_id": 0,
57
+ "target": null,
58
+ "tkg_batch_size": 4,
59
+ "torch_dtype": "float16",
60
+ "tp_degree": 2,
61
+ "vocab_parallel": false
62
+ },
63
+ "num_attention_heads": 32,
64
+ "num_hidden_layers": 16,
65
+ "num_key_value_heads": 8,
66
+ "pretraining_tp": 1,
67
+ "rms_norm_eps": 1e-05,
68
+ "rope_scaling": {
69
+ "factor": 32.0,
70
+ "high_freq_factor": 4.0,
71
+ "low_freq_factor": 1.0,
72
+ "original_max_position_embeddings": 8192,
73
+ "rope_type": "llama3"
74
+ },
75
+ "rope_theta": 500000.0,
76
+ "tie_word_embeddings": true,
77
+ "unsloth_fixed": true,
78
+ "use_cache": true,
79
+ "vocab_size": 128256
80
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "microsoft/Phi-3-mini-4k-instruct",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {
11
+ "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config",
12
+ "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM"
13
+ },
14
+ "embd_pdrop": 0.0,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 3072,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 8192,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "phi3",
21
+ "neuron": {
22
+ "_serialized_key": "HloNeuronConfig",
23
+ "all_reduce_dtype": null,
24
+ "allow_flash_attention": false,
25
+ "attention_layout": "HSB",
26
+ "attn_output_transposed": false,
27
+ "auto_cast_type": "bf16",
28
+ "batch_size": 4,
29
+ "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct",
30
+ "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85",
31
+ "collectives_layout": "HSB",
32
+ "continuous_batching": true,
33
+ "fuse_qkv": true,
34
+ "group_query_attention": null,
35
+ "log_softmax_scores": false,
36
+ "neuronxcc_version": "2.17.194.0+d312836f",
37
+ "optimum_neuron_version": "0.2.0.dev5",
38
+ "output_all_logits": false,
39
+ "sequence_length": 4096,
40
+ "tp_degree": 2
41
+ },
42
+ "num_attention_heads": 32,
43
+ "num_hidden_layers": 32,
44
+ "num_key_value_heads": 32,
45
+ "original_max_position_embeddings": 4096,
46
+ "partial_rotary_factor": 1.0,
47
+ "resid_pdrop": 0.0,
48
+ "rms_norm_eps": 1e-05,
49
+ "rope_scaling": null,
50
+ "rope_theta": 10000.0,
51
+ "sliding_window": 2047,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 32064
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "Qwen/Qwen2.5-0.5B",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "HloNeuronConfig",
18
+ "all_reduce_dtype": null,
19
+ "allow_flash_attention": true,
20
+ "attention_layout": "HSB",
21
+ "attn_output_transposed": false,
22
+ "auto_cast_type": "fp16",
23
+ "batch_size": 4,
24
+ "checkpoint_id": "Qwen/Qwen2.5-0.5B",
25
+ "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
26
+ "collectives_layout": "HSB",
27
+ "continuous_batching": true,
28
+ "fuse_qkv": false,
29
+ "group_query_attention": "shard-over-heads",
30
+ "log_softmax_scores": false,
31
+ "neuronxcc_version": "2.17.194.0+d312836f",
32
+ "optimum_neuron_version": "0.2.0.dev5",
33
+ "output_all_logits": false,
34
+ "sequence_length": 4096,
35
+ "tp_degree": 2
36
+ },
37
+ "num_attention_heads": 14,
38
+ "num_hidden_layers": 24,
39
+ "num_key_value_heads": 2,
40
+ "rms_norm_eps": 1e-06,
41
+ "rope_scaling": null,
42
+ "rope_theta": 1000000.0,
43
+ "sliding_window": 32768,
44
+ "tie_word_embeddings": true,
45
+ "use_cache": true,
46
+ "use_mrope": false,
47
+ "use_sliding_window": false,
48
+ "vocab_size": 151936
49
+ }
neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ff37e12281fa8f044284d20485952c2af497d3bb2bfcf50dd83e0572ef21aa4
3
+ size 53031
neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca18599ce5eec9b1297728172da2463337ab3ee0bbbb53457c2a8561f9305fe
3
+ size 213976064
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"