aws-neuron
/

optimum-neuron-cache

dacorvo HF Staff commited on Aug 4

Commit

325c041

verified ·

1 Parent(s): bd4212c

Update inference-cache-config/llama.json

Files changed (1) hide show

inference-cache-config/llama.json CHANGED Viewed

@@ -73,9 +73,7 @@
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
-    }
-  ],
-     "meta-llama/Llama-3.2-1B": [
     {
       "batch_size": 4,
       "sequence_length": 4096,
@@ -89,9 +87,7 @@
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
-    }
-  ],
-     "meta-llama/Llama-3.2-3B": [
     {
       "batch_size": 4,
       "sequence_length": 4096,

       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
+    },
     {
       "batch_size": 4,
       "sequence_length": 4096,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
+    },
     {
       "batch_size": 4,
       "sequence_length": 4096,