jerryzh168 commited on
Commit
99caebc
·
verified ·
1 Parent(s): 574af31

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -281,7 +281,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TorchAoConfig
281
 
282
  # use "Qwen/Qwen3-8B" or "pytorch/Qwen3-8B-AWQ-INT4"
283
  model_id = "pytorch/Qwen3-8B-AWQ-INT4"
284
- quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
285
  tokenizer = AutoTokenizer.from_pretrained(model_id)
286
 
287
  torch.cuda.reset_peak_memory_stats()
 
281
 
282
  # use "Qwen/Qwen3-8B" or "pytorch/Qwen3-8B-AWQ-INT4"
283
  model_id = "pytorch/Qwen3-8B-AWQ-INT4"
284
+ quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda:0", torch_dtype=torch.bfloat16)
285
  tokenizer = AutoTokenizer.from_pretrained(model_id)
286
 
287
  torch.cuda.reset_peak_memory_stats()