Update README.md
Browse files
README.md
CHANGED
@@ -20,7 +20,7 @@ Base model: [zai-org/GLM-4.5-Air](https://huggingface.co/zai-org/GLM-4.5-Air)
|
|
20 |
CONTEXT_LENGTH=32768
|
21 |
|
22 |
vllm serve \
|
23 |
-
|
24 |
--served-model-name GLM-4.5-Air-AWQ-FP16Mix \
|
25 |
--enable-expert-parallel \
|
26 |
--swap-space 16 \
|
@@ -59,8 +59,8 @@ Before the PR is merged, temporarily replace `awq_marlin.py` in `vllm/model_exec
|
|
59 |
|
60 |
### 【Model Download】
|
61 |
```python
|
62 |
-
from
|
63 |
-
snapshot_download('
|
64 |
```
|
65 |
|
66 |
### 【Overview】
|
|
|
20 |
CONTEXT_LENGTH=32768
|
21 |
|
22 |
vllm serve \
|
23 |
+
QuantTrio/GLM-4.5-Air-AWQ-FP16Mix \
|
24 |
--served-model-name GLM-4.5-Air-AWQ-FP16Mix \
|
25 |
--enable-expert-parallel \
|
26 |
--swap-space 16 \
|
|
|
59 |
|
60 |
### 【Model Download】
|
61 |
```python
|
62 |
+
from huggingface_hub import snapshot_download
|
63 |
+
snapshot_download('QuantTrio/GLM-4.5-Air-AWQ-FP16Mix', cache_dir="your_local_path")
|
64 |
```
|
65 |
|
66 |
### 【Overview】
|