Added Llama-70b batch_size 4 to inference cache
Browse files
inference-cache-config/llama.json
CHANGED
|
@@ -105,6 +105,12 @@
|
|
| 105 |
"sequence_length": 4096,
|
| 106 |
"num_cores": 24,
|
| 107 |
"auto_cast_type": "fp16"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
}
|
| 109 |
]
|
| 110 |
}
|
|
|
|
| 105 |
"sequence_length": 4096,
|
| 106 |
"num_cores": 24,
|
| 107 |
"auto_cast_type": "fp16"
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"batch_size": 4,
|
| 111 |
+
"sequence_length": 4096,
|
| 112 |
+
"num_cores": 24,
|
| 113 |
+
"auto_cast_type": "fp16"
|
| 114 |
}
|
| 115 |
]
|
| 116 |
}
|