Update README.md
Browse files
README.md
CHANGED
@@ -96,21 +96,9 @@ linear_config = Int8DynamicActivationIntxWeightConfig(
|
|
96 |
|
97 |
quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
|
98 |
quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
|
99 |
-
|
100 |
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
|
101 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
102 |
|
103 |
-
# TODO: use AOPerModuleConfig once fix for tied weights is landed
|
104 |
-
quantize_(
|
105 |
-
quantized_model,
|
106 |
-
embedding_config,
|
107 |
-
lambda m, fqn: isinstance(m, torch.nn.Embedding)
|
108 |
-
)
|
109 |
-
quantize_(
|
110 |
-
quantized_model,
|
111 |
-
linear_config,
|
112 |
-
)
|
113 |
-
|
114 |
# Push to hub
|
115 |
# USER_ID = "YOUR_USER_ID"
|
116 |
# save_to = f"{USER_ID}/phi4-mini-8dq4w"
|
|
|
96 |
|
97 |
quant_config = AOPerModuleConfig({"_default": linear_config, "model.embed_tokens": embedding_config})
|
98 |
quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
|
|
|
99 |
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# Push to hub
|
103 |
# USER_ID = "YOUR_USER_ID"
|
104 |
# save_to = f"{USER_ID}/phi4-mini-8dq4w"
|