Rename inference-cache-config/llama3.json to inference-cache-config/llama3-8b.json

Files changed (1) hide show

inference-cache-config/{llama3.json → llama3-8b.json} RENAMED Viewed

@@ -42,19 +42,5 @@
       "num_cores": 8,
       "auto_cast_type": "fp16"
     }
-  ],
-  "meta-llama/Meta-Llama-3-70B": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    }
   ]
 }

       "num_cores": 8,
       "auto_cast_type": "fp16"
     }
   ]
 }