JunHowie commited on
Commit
ae02ac1
·
verified ·
1 Parent(s): 5dcc479

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +4 -2
  2. config.json +1 -1
  3. model.safetensors +1 -1
  4. quantize_config.json +1 -1
README.md CHANGED
@@ -7,6 +7,7 @@ tags:
7
  - Qwen3
8
  - GPTQ
9
  - Int4
 
10
  - vLLM
11
  base_model:
12
  - Qwen/Qwen3-4B-Instruct-2507
@@ -16,6 +17,8 @@ base_model_relation: quantized
16
  Base model: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
17
 
18
  <i>This model is quantized to 4-bit with a group size of 128.</i>
 
 
19
 
20
  ```
21
  vllm serve JunHowie/Qwen3-4B-Instruct-2507-GPTQ-Int4
@@ -26,7 +29,6 @@ vllm serve JunHowie/Qwen3-4B-Instruct-2507-GPTQ-Int4
26
  vllm>=0.9.2
27
  ```
28
 
29
-
30
  ### 【Model Download】
31
 
32
  ```python
@@ -238,4 +240,4 @@ If you find our work helpful, feel free to give us a cite.
238
  primaryClass={cs.CL},
239
  url={https://arxiv.org/abs/2505.09388},
240
  }
241
- ```
 
7
  - Qwen3
8
  - GPTQ
9
  - Int4
10
+ - 量化修复
11
  - vLLM
12
  base_model:
13
  - Qwen/Qwen3-4B-Instruct-2507
 
17
  Base model: [Qwen/Qwen3-4B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507)
18
 
19
  <i>This model is quantized to 4-bit with a group size of 128.</i>
20
+ <br>
21
+ <i>Compared to earlier quantized versions, the new quantized model demonstrates better tokens/s efficiency. This improvement comes from setting desc_act=False in the quantization configuration.</i>
22
 
23
  ```
24
  vllm serve JunHowie/Qwen3-4B-Instruct-2507-GPTQ-Int4
 
29
  vllm>=0.9.2
30
  ```
31
 
 
32
  ### 【Model Download】
33
 
34
  ```python
 
240
  primaryClass={cs.CL},
241
  url={https://arxiv.org/abs/2505.09388},
242
  }
243
+ ```
config.json CHANGED
@@ -58,7 +58,7 @@
58
  "quantization_config": {
59
  "bits": 4,
60
  "checkpoint_format": "gptq",
61
- "desc_act": true,
62
  "group_size": 128,
63
  "hyb_act": false,
64
  "lm_head": false,
 
58
  "quantization_config": {
59
  "bits": 4,
60
  "checkpoint_format": "gptq",
61
+ "desc_act": false,
62
  "group_size": 128,
63
  "hyb_act": false,
64
  "lm_head": false,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1539086f9229a407a1acfb0838402210a3925ecd76a1a4788cc5f7c857791dc1
3
  size 2669888648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0deda4d7eeae0fb2ca621587603dcb476412abc38f0c3cb197f20c61cea42dd
3
  size 2669888648
quantize_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "bits": 4,
3
  "group_size": 128,
4
- "desc_act": true,
5
  "hyb_act": false,
6
  "sym": true,
7
  "lm_head": false,
 
1
  {
2
  "bits": 4,
3
  "group_size": 128,
4
+ "desc_act": false,
5
  "hyb_act": false,
6
  "sym": true,
7
  "lm_head": false,