Update README.md
Browse files
README.md
CHANGED
@@ -144,7 +144,10 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
144 |
)
|
145 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
146 |
|
|
|
147 |
base_config = Int4WeightOnlyConfig(group_size=128)
|
|
|
|
|
148 |
linear_config = AWQConfig(base_config, step="prepare")
|
149 |
|
150 |
# skip quantizing lm_head since it has different definition in vllm and transformers
|
|
|
144 |
)
|
145 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
146 |
|
147 |
+
# Note: this is only compatible with H100
|
148 |
base_config = Int4WeightOnlyConfig(group_size=128)
|
149 |
+
# for A100, please use the following for base_config:
|
150 |
+
# base_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="tile_packed_to_4d", int4_choose_qparams_algorithm="hqq")
|
151 |
linear_config = AWQConfig(base_config, step="prepare")
|
152 |
|
153 |
# skip quantizing lm_head since it has different definition in vllm and transformers
|