diff --git a/.gitattributes b/.gitattributes index 620702317db7bb3438e2a3adc12be0828bd9dd7b..35f8656131cd801df14f1ce6d950037ca87e71d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10936,3 +10936,19 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_aa6b654bd981334a723e+a9d440f5/wrapped_neff neuronxcc-2.19.8089.0+8ab9f450/MODULE_5cdc2024ee2e6c48bd40+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_5cdc2024ee2e6c48bd40+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ca134f082760cd304e7+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5c9a7ddb696911cf7f93.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5c9a7ddb696911cf7f93.json new file mode 100644 index 0000000000000000000000000000000000000000..d92689a963c368816e69a96cac020e3e773c33a9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/5c9a7ddb696911cf7f93.json @@ -0,0 +1,221 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/f9d1010b22a4e5da4bd5.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/f9d1010b22a4e5da4bd5.json new file mode 100644 index 0000000000000000000000000000000000000000..be707ea121d78147cb14288b8132cec52e5635d0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/f9d1010b22a4e5da4bd5.json @@ -0,0 +1,221 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 10485760, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 16.0, + "high_freq_factor": 1.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/0dd3b941d43fb01b72a8.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/0dd3b941d43fb01b72a8.json new file mode 100644 index 0000000000000000000000000000000000000000..496555dc7246d4c4673d5bab6d17605121cee633 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/0dd3b941d43fb01b72a8.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 1, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 262144, + "max_window_layers": 48, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 768, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "checkpoint_revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": false, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "norm_topk_prob": true, + "num_attention_heads": 32, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 48, + "num_key_value_heads": 4, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000000, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/5c776e5f1eb55e76831a.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/5c776e5f1eb55e76831a.json new file mode 100644 index 0000000000000000000000000000000000000000..21d1d145bcefa5eb378d526e82d61d4b07b37d04 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/qwen3_moe/Qwen/Qwen3-30B-A3B-Instruct-2507/5c776e5f1eb55e76831a.json @@ -0,0 +1,66 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "_task": "text-generation", + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "decoder_sparse_step": 1, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 262144, + "max_window_layers": 48, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 768, + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "checkpoint_revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 16, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.19.8089.0+8ab9f450", + "on_device_sampling": true, + "optimum_neuron_version": "0.3.1.dev6", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 16 + }, + "norm_topk_prob": true, + "num_attention_heads": 32, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 48, + "num_key_value_heads": 4, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000000, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/15f7e443873c3474a6aa.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/15f7e443873c3474a6aa.json new file mode 100644 index 0000000000000000000000000000000000000000..196770eca4fe7ca59d8e3cdb1e5011aaebc20f75 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/15f7e443873c3474a6aa.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "MultiModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-t5", + "_task": null, + "decoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_num_beams": 4, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + }, + "encoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_num_beams": 4, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + } +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/5d5dadc2c2138bf8ab44.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/5d5dadc2c2138bf8ab44.json new file mode 100644 index 0000000000000000000000000000000000000000..ad340944f5e935a03d11fbb70d890f003f6b9d7c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/5d5dadc2c2138bf8ab44.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "MultiModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-t5", + "_task": null, + "decoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": true, + "output_hidden_states": true, + "static_batch_size": 1, + "static_num_beams": 1, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + }, + "encoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": true, + "output_hidden_states": true, + "static_batch_size": 1, + "static_num_beams": 1, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + } +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/d83e395611ddc3a29d9f.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/d83e395611ddc3a29d9f.json new file mode 100644 index 0000000000000000000000000000000000000000..1981fa74bf11c72a546e1c357d3afa293a9d5485 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/d83e395611ddc3a29d9f.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "MultiModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-t5", + "_task": null, + "decoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_num_beams": 1, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + }, + "encoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": false, + "output_hidden_states": false, + "static_batch_size": 1, + "static_num_beams": 1, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + } +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/e57cbde649dde732937d.json b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/e57cbde649dde732937d.json new file mode 100644 index 0000000000000000000000000000000000000000..b550d3bc9697ef54eef3eae64365e7808b7dc281 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev6/t5/hf-internal-testing/tiny-random-t5/e57cbde649dde732937d.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "MultiModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-t5", + "_task": null, + "decoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": true, + "output_hidden_states": true, + "static_batch_size": 1, + "static_num_beams": 4, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + }, + "encoder": { + "classifier_dropout": 0.0, + "d_ff": 37, + "d_kv": 8, + "d_model": 32, + "decoder_start_token_id": 0, + "dense_act_fn": "relu", + "dropout_rate": 0.1, + "feed_forward_proj": "relu", + "gradient_checkpointing": false, + "initializer_factor": 0.002, + "is_encoder_decoder": true, + "is_gated_act": false, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "neuron": { + "auto_cast": "matmul", + "auto_cast_type": "bf16", + "compiler_type": "neuronx-cc", + "compiler_version": "2.19.8089.0+8ab9f450", + "disable_fallback": false, + "disable_fast_relayout": false, + "dynamic_batch_size": false, + "float_dtype": "fp32", + "inline_weights_to_neff": true, + "int_dtype": "int64", + "optlevel": "2", + "output_attentions": true, + "output_hidden_states": true, + "static_batch_size": 1, + "static_num_beams": 4, + "static_sequence_length": 64, + "task": "text2text-generation", + "tensor_parallel_size": 1 + }, + "num_decoder_layers": 5, + "num_heads": 4, + "num_layers": 5, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 8, + "use_cache": true, + "vocab_size": 1103 + } +} \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..853451f0393707d9e09b5c04d0791e824f2d931a --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67438285964b08bb19d5cc976e3c54297f08d1b0df76c2f3fe542e1624483de2 +size 9670898 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..040c3bacc222cd32680954d4f4827f9bd485edda --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_02da278f167522769f43+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851ad19d69a28aebcb76eeb4e4529f9d13bd810ffc4849ea085036f1af5e2beb +size 17439744 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/decoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/decoder/model.neuron index 90dec5f3231ef0fff4ffd800319701569415afa9..fbf87f430a4bc62c87cd8328f3f66961f1d40684 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/decoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/decoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b788d306aa621145bd4c1b566e270308aa9d03f560b54a659020c5087dc28413 -size 1190094 +oid sha256:c062fb6a1f3d277faac7225d1bec3b60dff7c9c7f780cc21ef74a345ad1e0409 +size 1189966 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/encoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/encoder/model.neuron index f48894ddd94baff7166ff9c85ca13d95057e5c6b..4ca805b61a2ba13ba5cce95ec5703450a11d5f3d 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/encoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_15f7e443873c3474a6aa/encoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fd726585ac540b16f29cf45b77f7fa0beb2fa5363f3d09a5a0c25c4007501304 -size 349882 +oid sha256:7c3b2291d6e1e91d862de919369aef0c73f10af7f0d3d06bdac64df2f247ceb8 +size 349818 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16364384937141820797+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16364384937141820797+e30acd3a/model.neff index cc3145ec179a1d9ecbf09975819dff6290577cb2..e883da46ae19f3706a5c19f9feecb8c67415cf4b 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16364384937141820797+e30acd3a/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_16364384937141820797+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bffdd3ff42ffb62a3193a19e15071208accc14a9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddfdd3b97bc014c3753edd934bfc9558878a8ef0a424799cec49b546e76cb5c8 +size 110959906 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.log new file mode 100644 index 0000000000000000000000000000000000000000..0c20de5c3f2f5f49c47bb991ef422999f81d8411 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_234b5c34641e778cc5d2+ed72d204/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_234b5c34641e778cc5d2+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_234b5c34641e778cc5d2+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (18.327946) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-09-10T13:49:56Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (18.327946) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eed265cf35a8a5290ef80a0adeca7e07950b25f3 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0058240e54b46c6b0db4c14f906f09f3ec484e7e04c3f2cb6e1f8eda3c5888d7 +size 2507590 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b23ece6c63b461c1034f4025e67b54bb93748985 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74f90bab7cf259aea0daac2a556646ed4ff30addb7831a0a31f1a3b3cb58934 +size 3533824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f40f60c89bbd8404b66748e996c003733b896506 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_31248f3f93aafaf6dbaa+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd51c5d89811b2361e4f8267cb657832f235da127ac6940aed8e0a70761b6052 +size 3783895 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..77fe912990ecb3204d33eeaf82d249e6dcce79d9 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12093b664d384d4f227211cf30520c33cb2f168bf18efd5680e7a6f0759e3946 +size 103130389 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b8ccbfdf034aad4a8dbe268d227293c4cd972073 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fafff6c8809235ac7997becb99200b923ac6f9c9084ee79e97d7948b30d87e8f +size 7803904 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9160deab4c8483e48f8bfb8cae9c213dca3115c7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_3cbaae1c02abefd7f494+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4013a9888058e7c0e8ac2ce33000d806be95273272575932d5e51e724e37a029 +size 8121064 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..475e9fbfbf5b38f298fddc13e8298d8b6fde22a4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9b145a643ac4afcec643c5a4cf0003579212a43c2ea7b2cab1f7635446c3c40 +size 2458664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e49d477e0aa8132d206d4e5de1cbb0db484e609 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecc4a985b50e6be447dcba49851f6869f4bc0661ddb70c5528f11b5a721ac0a +size 3492864 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..951aaa09cad090316c3428760fb681da1e21e92f --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_43116d82b5805ba3ae20+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58e441608e9340359965eeeb8e0d0229dfbd7a847939c284e69325334c98780 +size 3742824 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c9c6493aaf3cafe305f72691a8490215089d8aab --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df61ac9ae7cc3b8d0b92f84adaaafb47ca0c3968defd912edc4a081cd7c233d6 +size 104317794 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..21fae107ff8e57321fd16dc67fab42c1cfbb2f53 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ab214a26135c9602b57+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2632ba92083ecea71ec19ae561a0ca4e56249ffbd65c624502bc366b7b3cb3d4 +size 38933504 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/decoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/decoder/model.neuron index 98dc7748d0d8480bd13d93c318944c36a1cb55db..d45c38a214bccc8cc84cedcbaf685e0d3a87f3b0 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/decoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/decoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e7ed179c776a09dd00aaa5347656ca55f30a25cec07b7fbb278051a49788c69 -size 633172 +oid sha256:a9d1936dd388b9f965a4e1b24fedb24fb7c6a748789332d0d885bfbff8eded56 +size 633236 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/encoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/encoder/model.neuron index 972416c09677f07c83e9aa8e43b6e74510c21ea9..baf05a201ecf596a763c9179f155b1851be99b38 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/encoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_5d5dadc2c2138bf8ab44/encoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8bfc4ed25f9998aa656120620c0a3c0ce35bfc8aeefd672bc1695585a6bd1320 -size 351030 +oid sha256:12ca8cdd5d2f8186de84a66171ac1d9d021405c8a3c6b37c33e4945f018f175d +size 351094 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..691358a8696986b2175a3ba92c1e7716f5225be1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a428bd8f83c4a722785ed440b47277011681572669d6eb5771db9d093585f13a +size 104067050 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa2c39e05a9b2acc7dae86228d2ab3e233d5c3d6 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0fc0d34f44ca4b39e2257e2fc912b5d9662e0cf5c5772c7343dd108a85eef3 +size 11377664 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c16334ea6373753c8d4d4e18a651af93ea7c8982 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_6ddd5b729b9e4ecede70+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a48a1ffb1043c4f172ef65685fbbcae53db0a3670657a8f321d47245718a5a9 +size 11652584 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b0ed93ba7fc61080e22cbea3a0ec2d008be74bd2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2553215122056d902b1c55e7e737da5a9dafdc192698ac91c466be727a94caf7 +size 9674651 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.log b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.log new file mode 100644 index 0000000000000000000000000000000000000000..438253edebeee7c1aad6e6132f142d782fb65a32 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_7557177e60b224d7d0cc+253d6470/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_7557177e60b224d7d0cc+253d6470.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_7557177e60b224d7d0cc+253d6470.neff', '--target=trn1', '--enable-saturate-infinity', '--enable-mixed-precision-accumulation', '--model-type', 'transformer', '-O1', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2', '--auto-cast=none', '--internal-enable-dge-levels', 'vector_dynamic_offsets', '--internal-hlo2tensorizer-options=--verify-hlo=true', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (17.053570) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-09-11T08:10:18Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (17.053570) exceeds 16GB. Neff won't be able to load on chip - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_869715862416998377+e30acd3a/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_869715862416998377+e30acd3a/model.neff index 8cff81234109cc77899194125e20071944f55496..d5aadd1b622b08625382568c5a3c9751f5850b5e 100644 Binary files a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_869715862416998377+e30acd3a/model.neff and b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_869715862416998377+e30acd3a/model.neff differ diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1569eac18fc7f34b3fe66166f2d7a4a59dbc5aa4 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de0707bd8623b511536894ce23bd06617ecbeb15 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f660949bb2cab99b5089891b82c30be2231452ec8de9805200759f28a28d883a +size 104367341 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..622fcd4513272e80a1d9268edb2d1226fe34fa94 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbb0c02d6e06f337e549+ed72d204/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9293bc3c4e67b2d20c5686b8c1fb26107ef467b1c29589845f6639e1dbbae0f3 +size 38984704 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..34675115220079309bfad2d45680c478c378a3c0 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec00951bc7ebde2143dfa72345eb3a44ed20f182 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd153213a2751e0d15100aa0467bf7538b94a0344bd9890daf2705e96ac1990f +size 103179258 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2815497496f5b360643569f55d27414adbd66e0c --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be1d76828f4c564ad6225fc249f3ddd933295963bc2ef353ff97a48c337e4ea +size 7742464 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..57423eeb950cd6829a93d4a532c610c78db2a82d --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d0af99bdae3af4d95a22+a9d440f5/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b434e9136e6af6215840183871c0f74e8652f547f35b2793081455ddf8ca610c +size 8059735 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/decoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/decoder/model.neuron index e508428cb594c801349275dfcf5fd1e3bf8b1caa..96ca96eee14a7820a06ace716cdf4f74ac59ea93 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/decoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/decoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a125074fe97f2423bbd01a6203536371aed73cab8724c49156b56f13ab714258 -size 621268 +oid sha256:b016e529b0db94f982dc684c6cd597ff95142018c4401531e3d45c334ca808bd +size 621326 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/encoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/encoder/model.neuron index 501702fac626c05876e6664d6955f70236f3045d..2516a5cf490371a0afceba1f02930b588ba9fdc5 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/encoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_d83e395611ddc3a29d9f/encoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b783ea6ed00b83d80a1c1f7198ccb054d0a458fb200f6721567b144aa81ae53 -size 351030 +oid sha256:4dd637aeb4e611e2a48213ff691ba79eaa6d693c11e6a71733df8ddae343641a +size 350074 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/decoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/decoder/model.neuron index eba3f1b9a08a81bd5109cb023b2403751a01d3cd..d4611733bffacfd496a35fad9e4bea0f87864843 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/decoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/decoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eef6db75fc46e7de94c915df2478eb1903e5bfcef82375a6efd61d9e832cec39 -size 1191764 +oid sha256:b80c82c106b87f1642adf055d6ccb3cdd86256f548428faafa86034794404dde +size 1191828 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/encoder/model.neuron b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/encoder/model.neuron index b58f03b66612b5e8263420b251f9eef61a2990bb..93f48730f12ba941c75da88ec403650bb31b6a67 100644 --- a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/encoder/model.neuron +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e57cbde649dde732937d/encoder/model.neuron @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67a159b1dff08a90c7d68e4183bc8e88c58e9c125d0f5310908f0b218529c3b4 -size 350900 +oid sha256:2e3fcf5d6788278c0da76de1c0ccb5d7c3ddd39af9404094ba07cd213c650772 +size 350964 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..cb53ce53583cfe21339b4bed39f5fa1a679cddd1 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb2a94b3c2839ae00c8cc1432100570c2a7c7324 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771da6a5c18eebc304cca5c020b4d530eb3efee0760e3f5fa09bb7450e1d7f59 +size 9621086 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5fe0f8cb744752d9379efc35ae86ca3f4ea02cf7 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_e5e262df0f4573dfd367+253d6470/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd1427362c7fca5fb5fc50a45d02bf2906ae40b0d9c9e428954efdc17841b89 +size 17409024 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/compile_flags.json b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..93354bd42abb66b9b6e56b127441174baa403534 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--enable-saturate-infinity", "--enable-mixed-precision-accumulation", "--model-type", "transformer", "-O1", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2", "--auto-cast=none", "--internal-enable-dge-levels", "vector_dynamic_offsets", "--internal-hlo2tensorizer-options=--verify-hlo=true", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.done b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.hlo_module.pb b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..791a7b8a582f433cd6e5ffd08589614fd9b25f93 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:632ebdbb83a6e8aeca497132602314bf9294c422f665e1cb531ef57d0a392070 +size 8944812 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.neff b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8064d2995067b255360ab5726f53b714b6c7cdb2 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a168d7bce50b39b9770ed7519cba29a18cc644e2ef0c6ad535b822402bbe8bba +size 1977344 diff --git a/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/wrapped_neff.hlo b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..38c7bb883e4004909c2a0df8d8f5965f24f49278 --- /dev/null +++ b/neuronxcc-2.19.8089.0+8ab9f450/MODULE_efa7fe965caa79aca41c+cd3419b6/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0d939811c2a42c6c897da50cd46b7e5fe12620f9127c184da98311cf280908 +size 2249687