jerryzh168 commited on
Commit
574af31
·
verified ·
1 Parent(s): ae7fb3c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -0
README.md CHANGED
@@ -144,7 +144,10 @@ model = AutoModelForCausalLM.from_pretrained(
144
  )
145
  tokenizer = AutoTokenizer.from_pretrained(model_id)
146
 
 
147
  base_config = Int4WeightOnlyConfig(group_size=128)
 
 
148
  linear_config = AWQConfig(base_config, step="prepare")
149
 
150
  # skip quantizing lm_head since it has different definition in vllm and transformers
 
144
  )
145
  tokenizer = AutoTokenizer.from_pretrained(model_id)
146
 
147
+ # Note: this is only compatible with H100
148
  base_config = Int4WeightOnlyConfig(group_size=128)
149
+ # for A100, please use the following for base_config:
150
+ # base_config = Int4WeightOnlyConfig(group_size=128, int4_packing_format="tile_packed_to_4d", int4_choose_qparams_algorithm="hqq")
151
  linear_config = AWQConfig(base_config, step="prepare")
152
 
153
  # skip quantizing lm_head since it has different definition in vllm and transformers