{ "_serialized_key": "HloNeuronConfig", "all_reduce_dtype": null, "allow_flash_attention": true, "attention_layout": "HSB", "attn_output_transposed": false, "auto_cast_type": "bf16", "batch_size": 4, "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", "collectives_layout": "HSB", "continuous_batching": true, "fuse_qkv": true, "group_query_attention": "shard-over-heads", "log_softmax_scores": false, "neuronxcc_version": "2.17.194.0+d312836f", "optimum_neuron_version": "0.2.0", "output_all_logits": false, "sequence_length": 4096, "tp_degree": 2 }