use the following script to load the model
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# --- Critical: Import the custom functions needed to build the model ---
from wina_utils import apply_wina_to_model
# --- Configuration ---
# The path to your locally saved model, or its ID on the Hugging Face Hub
MODEL_PATH = "moelanoby/Qwen2-1.5B-wina"
# Example after upload:
SPARSITY_LEVEL = 0.65 # THIS MUST STAY 0.65 OR 65% ANYTHING ELSE MIGHT CAUSE ERRORS
def load_custom_wina_model(model_path: str, sparsity: float):
"""
Loads a custom WINA-modified model correctly.
"""
print(f"Loading WINA model from: {model_path}")
# --- Step 1: Load the tokenizer ---
# trust_remote_code=True is needed here because the model folder contains wina_utils.py
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# --- Step 2: Load the BASE model architecture ---
# We load the original model structure first. It will have the standard MLP layers.
# trust_remote_code=True allows it to execute the original model's code if needed.
print("Loading the base model architecture...")
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
# --- Step 3: Manually apply the WINA transformation to the architecture ---
# This swaps the standard MLP layers with our WinaMLP layers, creating the
# correct structure in memory to receive the saved weights.
print("Applying WINA transformation to the loaded architecture...")
apply_wina_to_model(model, sparsity_level=sparsity)
print("\n" + "="*50)
print("SUCCESS: Custom WINA model loaded correctly.")
print("="*50)
return model, tokenizer
if __name__ == "__main__":
wina_model, wina_tokenizer = load_custom_wina_model(MODEL_PATH, SPARSITY_LEVEL)
# You can now use the model for inference
print("\nVerifying model architecture (you should see 'WinaMLP' layers):")
print(wina_model)
- Downloads last month
- 2
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support
Model tree for moelanoby/Qwen2-1.5B-wina
Base model
Qwen/Qwen2-1.5B-Instruct