Qwen3-4B-2507
Collection
2 items
•
Updated
This is a W8A8 (8-bit weights and 8-bit activations) quantized version of Qwen/Qwen3-4B-Instruct-2507, created using LLM-Compressor.
This model was quantized by itroot.
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from llmcompressor.modifiers.quantization import GPTQModifier
from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
from llmcompressor.transformers import oneshot
model_id = "Qwen/Qwen3-4B-Instruct-2507"
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype="auto",
device_map="auto",
low_cpu_mem_usage=True,
offload_folder="./offload_tmp",
# for 2x 3090s.
max_memory={0: "22GB", 1: "22GB", "cpu": "64GB"},
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
DATASET_SPLIT = "train_sft"
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048
print("Loading and preprocessing calibration dataset...")
ds = load_dataset(DATASET_ID, split=f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES}]")
ds = ds.shuffle(seed=42)
def preprocess(example):
return {
"text": tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
)
}
ds = ds.map(preprocess)
def tokenize(sample):
return tokenizer(
sample["text"],
padding=False,
max_length=MAX_SEQUENCE_LENGTH,
truncation=True,
add_special_tokens=False,
)
ds = ds.map(tokenize, remove_columns=ds.column_names)
print("Dataset ready.")
recipe = [
SmoothQuantModifier(smoothing_strength=0.8),
GPTQModifier(targets="Linear", scheme="W8A8", ignore=["lm_head"]),
]
output_dir = "./Qwen3-4B-Instruct-2507-W8A8"
print(f"Starting one-shot quantization. Output will be in '{output_dir}'")
oneshot(
model=model,
dataset=ds,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
output_dir=output_dir,
)
print("Quantization complete.")
SAVE_DIR = "Qwen3-4B-Instruct-2507-W8A8"
print(f"Saving compressed model and tokenizer to '{SAVE_DIR}'...")
model.save_pretrained(SAVE_DIR, save_compressed=True)
tokenizer.save_pretrained(SAVE_DIR)
Base model
Qwen/Qwen3-4B-Instruct-2507