JunHowie commited on
Commit
906617f
·
verified ·
1 Parent(s): f6a3853

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. README.md +7 -6
  3. config.json +9 -4
  4. model.safetensors.index.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -5,14 +5,15 @@ license_link: https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507/blob/mai
5
  pipeline_tag: text-generation
6
  tags:
7
  - Qwen3
8
- - AWQ
 
9
  - 量化修复
10
  - vLLM
11
  base_model:
12
  - Qwen/Qwen3-235B-A22B-Thinking-2507
13
  base_model_relation: quantized
14
  ---
15
- # 通义千问3-235B-A22B-Thinking-2507-AWQ
16
  基础型 [Qwen/Qwen3-235B-A22B-Thinking-2507](https://www.modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507)
17
 
18
 
@@ -22,8 +23,8 @@ base_model_relation: quantized
22
  $CONTEXT_LENGTH=32768 # 262144
23
 
24
  vllm serve \
25
- tclf90/Qwen3-235B-A22B-Thinking-2507-AWQ \
26
- --served-model-name Qwen3-235B-A22B-Thinking-2507-AWQ \
27
  --enable-expert-parallel \
28
  --swap-space 16 \
29
  --max-num-seqs 512 \
@@ -53,7 +54,7 @@ vllm>=0.9.2
53
 
54
  | 文件大小 | 最近更新时间 |
55
  |---------|--------------|
56
- | `116GB` | `2025-07-26` |
57
 
58
 
59
 
@@ -61,7 +62,7 @@ vllm>=0.9.2
61
 
62
  ```python
63
  from modelscope import snapshot_download
64
- snapshot_download('tclf90/Qwen3-235B-A22B-Thinking-2507-AWQ', cache_dir="本地路径")
65
  ```
66
 
67
 
 
5
  pipeline_tag: text-generation
6
  tags:
7
  - Qwen3
8
+ - GPTQ
9
+ - Int4-Int8Mix
10
  - 量化修复
11
  - vLLM
12
  base_model:
13
  - Qwen/Qwen3-235B-A22B-Thinking-2507
14
  base_model_relation: quantized
15
  ---
16
+ # 通义千问3-235B-A22B-Thinking-2507-GPTQ-Int4-Int8Mix
17
  基础型 [Qwen/Qwen3-235B-A22B-Thinking-2507](https://www.modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507)
18
 
19
 
 
23
  $CONTEXT_LENGTH=32768 # 262144
24
 
25
  vllm serve \
26
+ tclf90/Qwen3-235B-A22B-Thinking-2507-GPTQ-Int4-Int8Mix \
27
+ --served-model-name Qwen3-235B-A22B-Thinking-2507-GPTQ-Int4-Int8Mix \
28
  --enable-expert-parallel \
29
  --swap-space 16 \
30
  --max-num-seqs 512 \
 
54
 
55
  | 文件大小 | 最近更新时间 |
56
  |---------|--------------|
57
+ | `125GB` | `2025-07-26` |
58
 
59
 
60
 
 
62
 
63
  ```python
64
  from modelscope import snapshot_download
65
+ snapshot_download('tclf90/Qwen3-235B-A22B-Thinking-2507-GPTQ-Int4-Int8Mix', cache_dir="本地路径")
66
  ```
67
 
68
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "name_or_path": "tclf90/Qwen3-235B-A22B-Thinking-2507-AWQ",
3
  "architectures": [
4
  "Qwen3MoeForCausalLM"
5
  ],
@@ -37,10 +37,15 @@
37
  "use_sliding_window": false,
38
  "vocab_size": 151936,
39
  "quantization_config": {
40
- "quant_method": "awq",
41
  "bits": 4,
42
  "group_size": 128,
43
- "version": "gemm",
44
- "zero_point": true
 
 
 
 
 
45
  }
46
  }
 
1
  {
2
+ "name_or_path": "tclf90/Qwen3-235B-A22B-Thinking-2507-GPTQ-Int4-Int8Mix",
3
  "architectures": [
4
  "Qwen3MoeForCausalLM"
5
  ],
 
37
  "use_sliding_window": false,
38
  "vocab_size": 151936,
39
  "quantization_config": {
40
+ "quant_method": "gptq",
41
  "bits": 4,
42
  "group_size": 128,
43
+ "sym": true,
44
+ "desc_act": false,
45
+ "dynamic": {
46
+ "+:model[.]layers[.]([0-6])[.].*": {
47
+ "bits": 8
48
+ }
49
+ }
50
  }
51
  }
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff