Upload 4bit dynamic double quant model
Browse files- README.md +6 -1
- config.json +5 -1
- model-00001-of-00002.safetensors +2 -2
README.md
CHANGED
|
@@ -19,6 +19,11 @@ BitsAndBytesConfig(
|
|
| 19 |
bnb_4bit_use_double_quant=True,
|
| 20 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 21 |
bnb_4bit_quant_storage=torch.bfloat16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
)
|
| 23 |
```
|
| 24 |
|
|
@@ -544,4 +549,4 @@ alternatives.
|
|
| 544 |
[jax]: https://github.com/jax-ml/jax
|
| 545 |
[ml-pathways]: https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/
|
| 546 |
[sustainability]: https://sustainability.google/operating-sustainably/
|
| 547 |
-
[gemini-2-paper]: https://arxiv.org/abs/2312.11805
|
|
|
|
| 19 |
bnb_4bit_use_double_quant=True,
|
| 20 |
bnb_4bit_compute_dtype=torch.bfloat16,
|
| 21 |
bnb_4bit_quant_storage=torch.bfloat16,
|
| 22 |
+
llm_int8_skip_modules=[
|
| 23 |
+
"multi_modal_projector",
|
| 24 |
+
"vision_tower",
|
| 25 |
+
"vision_model",
|
| 26 |
+
],
|
| 27 |
)
|
| 28 |
```
|
| 29 |
|
|
|
|
| 549 |
[jax]: https://github.com/jax-ml/jax
|
| 550 |
[ml-pathways]: https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/
|
| 551 |
[sustainability]: https://sustainability.google/operating-sustainably/
|
| 552 |
+
[gemini-2-paper]: https://arxiv.org/abs/2312.11805
|
config.json
CHANGED
|
@@ -21,7 +21,11 @@
|
|
| 21 |
"bnb_4bit_use_double_quant": true,
|
| 22 |
"llm_int8_enable_fp32_cpu_offload": false,
|
| 23 |
"llm_int8_has_fp16_weight": false,
|
| 24 |
-
"llm_int8_skip_modules":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"llm_int8_threshold": 6.0,
|
| 26 |
"load_in_4bit": true,
|
| 27 |
"load_in_8bit": false,
|
|
|
|
| 21 |
"bnb_4bit_use_double_quant": true,
|
| 22 |
"llm_int8_enable_fp32_cpu_offload": false,
|
| 23 |
"llm_int8_has_fp16_weight": false,
|
| 24 |
+
"llm_int8_skip_modules": [
|
| 25 |
+
"multi_modal_projector",
|
| 26 |
+
"vision_tower",
|
| 27 |
+
"vision_model"
|
| 28 |
+
],
|
| 29 |
"llm_int8_threshold": 6.0,
|
| 30 |
"load_in_4bit": true,
|
| 31 |
"load_in_8bit": false,
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c6ed0512682d19ffee93bae2d3e1d681fe1e0af1247aec039099c64145bc9e4
|
| 3 |
+
size 5605791454
|