Update inference-cache-config/llama.json
Browse files
inference-cache-config/llama.json
CHANGED
|
@@ -73,9 +73,7 @@
|
|
| 73 |
"sequence_length": 4096,
|
| 74 |
"num_cores": 2,
|
| 75 |
"auto_cast_type": "bf16"
|
| 76 |
-
}
|
| 77 |
-
],
|
| 78 |
-
"meta-llama/Llama-3.2-1B": [
|
| 79 |
{
|
| 80 |
"batch_size": 4,
|
| 81 |
"sequence_length": 4096,
|
|
@@ -89,9 +87,7 @@
|
|
| 89 |
"sequence_length": 4096,
|
| 90 |
"num_cores": 2,
|
| 91 |
"auto_cast_type": "bf16"
|
| 92 |
-
}
|
| 93 |
-
],
|
| 94 |
-
"meta-llama/Llama-3.2-3B": [
|
| 95 |
{
|
| 96 |
"batch_size": 4,
|
| 97 |
"sequence_length": 4096,
|
|
|
|
| 73 |
"sequence_length": 4096,
|
| 74 |
"num_cores": 2,
|
| 75 |
"auto_cast_type": "bf16"
|
| 76 |
+
},
|
|
|
|
|
|
|
| 77 |
{
|
| 78 |
"batch_size": 4,
|
| 79 |
"sequence_length": 4096,
|
|
|
|
| 87 |
"sequence_length": 4096,
|
| 88 |
"num_cores": 2,
|
| 89 |
"auto_cast_type": "bf16"
|
| 90 |
+
},
|
|
|
|
|
|
|
| 91 |
{
|
| 92 |
"batch_size": 4,
|
| 93 |
"sequence_length": 4096,
|