Use princeton-nlp/Sheared-LLaMA-1.3B as a test model
Browse files
inference-cache-config/llama-variants.json
CHANGED
|
@@ -103,7 +103,7 @@
|
|
| 103 |
"auto_cast_type": "fp16"
|
| 104 |
}
|
| 105 |
],
|
| 106 |
-
"
|
| 107 |
{
|
| 108 |
"batch_size": 1,
|
| 109 |
"sequence_length": 4096,
|
|
|
|
| 103 |
"auto_cast_type": "fp16"
|
| 104 |
}
|
| 105 |
],
|
| 106 |
+
"princeton-nlp/Sheared-LLaMA-1.3B": [
|
| 107 |
{
|
| 108 |
"batch_size": 1,
|
| 109 |
"sequence_length": 4096,
|