Add batch size 4 configurations for LLama 1B and 3B models
Browse files
    	
        inference-cache-config/llama.json
    CHANGED
    
    | @@ -74,6 +74,14 @@ | |
| 74 | 
             
                  "num_cores": 2,
         | 
| 75 | 
             
                  "auto_cast_type": "bf16"
         | 
| 76 | 
             
                }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 77 | 
             
              ],
         | 
| 78 | 
             
               "meta-llama/Llama-3.2-3B": [
         | 
| 79 | 
             
                {
         | 
| @@ -82,6 +90,14 @@ | |
| 82 | 
             
                  "num_cores": 2,
         | 
| 83 | 
             
                  "auto_cast_type": "bf16"
         | 
| 84 | 
             
                }
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 85 | 
             
              ],
         | 
| 86 | 
             
                 "TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
         | 
| 87 | 
             
                {
         | 
|  | |
| 74 | 
             
                  "num_cores": 2,
         | 
| 75 | 
             
                  "auto_cast_type": "bf16"
         | 
| 76 | 
             
                }
         | 
| 77 | 
            +
              ],
         | 
| 78 | 
            +
                 "meta-llama/Llama-3.2-1B": [
         | 
| 79 | 
            +
                {
         | 
| 80 | 
            +
                  "batch_size": 4,
         | 
| 81 | 
            +
                  "sequence_length": 4096,
         | 
| 82 | 
            +
                  "num_cores": 2,
         | 
| 83 | 
            +
                  "auto_cast_type": "bf16"
         | 
| 84 | 
            +
                }
         | 
| 85 | 
             
              ],
         | 
| 86 | 
             
               "meta-llama/Llama-3.2-3B": [
         | 
| 87 | 
             
                {
         | 
|  | |
| 90 | 
             
                  "num_cores": 2,
         | 
| 91 | 
             
                  "auto_cast_type": "bf16"
         | 
| 92 | 
             
                }
         | 
| 93 | 
            +
              ],
         | 
| 94 | 
            +
                 "meta-llama/Llama-3.2-3B": [
         | 
| 95 | 
            +
                {
         | 
| 96 | 
            +
                  "batch_size": 4,
         | 
| 97 | 
            +
                  "sequence_length": 4096,
         | 
| 98 | 
            +
                  "num_cores": 2,
         | 
| 99 | 
            +
                  "auto_cast_type": "bf16"
         | 
| 100 | 
            +
                }
         | 
| 101 | 
             
              ],
         | 
| 102 | 
             
                 "TinyLlama/TinyLlama-1.1B-Chat-v1.0": [
         | 
| 103 | 
             
                {
         | 

