metascroy commited on
Commit
8c43747
·
verified ·
1 Parent(s): e6eb920

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +0 -12
README.md CHANGED
@@ -96,21 +96,9 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
96
 
97
  quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
98
  quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
99
-
100
  quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
101
  tokenizer = AutoTokenizer.from_pretrained(model_id)
102
 
103
- # TODO: use AOPerModuleConfig once fix for tied weights is landed
104
- quantize_(
105
- quantized_model,
106
- embedding_config,
107
- lambda m, fqn: isinstance(m, torch.nn.Embedding)
108
- )
109
- quantize_(
110
- quantized_model,
111
- linear_config,
112
- )
113
-
114
  # Push to hub
115
  # USER_ID = "YOUR_USER_ID"
116
  # save_to = f"{USER_ID}/phi4-mini-8dq4w"
 
96
 
97
  quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
98
  quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
 
99
  quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
100
  tokenizer = AutoTokenizer.from_pretrained(model_id)
101
 
 
 
 
 
 
 
 
 
 
 
 
102
  # Push to hub
103
  # USER_ID = "YOUR_USER_ID"
104
  # save_to = f"{USER_ID}/phi4-mini-8dq4w"