pytorch
/

Qwen3-8B-AWQ-INT4

Text Generation

text-generation-inference

Model card Files Files and versions

jerryzh168 commited on 3 days ago

Commit

574af31

·

verified ·

1 Parent(s): ae7fb3c

Update README.md

Files changed (1) hide show

README.md +3 -0

README.md CHANGED Viewed

@@ -144,7 +144,10 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 base_config = Int4WeightOnlyConfig(group_size=128)
 linear_config = AWQConfig(base_config, step="prepare")
 # skip quantizing lm_head since it has different definition in vllm and transformers

 )
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+# Note: this is only compatible with H100
 base_config = Int4WeightOnlyConfig(group_size=128)
+# for A100, please use the following for base_config:
+# base_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="tile_packed_to_4d", int4_choose_qparams_algorithm="hqq")
 linear_config = AWQConfig(base_config, step="prepare")
 # skip quantizing lm_head since it has different definition in vllm and transformers