Exception has occurred: KeyError 'layers.0.attn.to_q.weight'
#12
by
dkackman
- opened
I get this error with diffusers v0.32.1 and pytorch 2.5.1:
Exception has occurred: KeyError 'layers.0.attn.to_q.weight'
when calling load_ip_adapter
This is on a 3090 so am also calling :
pipe._exclude_from_cpu_offload.append("image_encoder")
pipe.enable_sequential_cpu_offload()
Minimal repro code:
import torch
from diffusers.models.transformers import SD3Transformer2DModel
from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3 import (
StableDiffusion3Pipeline,
)
from diffusers.utils import load_image
from transformers import SiglipVisionModel, SiglipImageProcessor
model_path = "stabilityai/stable-diffusion-3.5-large"
image_encoder_path = "google/siglip-so400m-patch14-384"
ip_adapter_path = "InstantX/SD3.5-Large-IP-Adapter"
device = "cuda"
transformer = SD3Transformer2DModel.from_pretrained(
model_path, subfolder="transformer", torch_dtype=torch.bfloat16
)
feature_extractor = SiglipImageProcessor.from_pretrained(
image_encoder_path, torch_dtype=torch.bfloat16
)
image_encoder = SiglipVisionModel.from_pretrained(
image_encoder_path, torch_dtype=torch.bfloat16
)
pipe = StableDiffusion3Pipeline.from_pretrained(
model_path,
transformer=transformer,
torch_dtype=torch.bfloat16,
feature_extractor=feature_extractor,
image_encoder=image_encoder,
)
pipe._exclude_from_cpu_offload.append("image_encoder")
pipe.enable_sequential_cpu_offload()
# Exception has occurred: KeyError
# 'layers.0.attn.to_q.weight'
pipe.load_ip_adapter(ip_adapter_path, subfolder="", weight_name="ip-adapter.bin")
pipe.set_ip_adapter_scale(0.6)
ref_img = load_image(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png"
)
# please note that SD3.5 Large is sensitive to highres generation like 1536x1536
image = pipe(
width=1024,
height=1024,
prompt="a cat",
negative_prompt="lowres, low quality, worst quality",
num_inference_steps=24,
guidance_scale=5.0,
generator=torch.Generator(device).manual_seed(42),
ip_adapter_image=ref_img,
).images[0]
image.save("result.jpg")
Thanks. That works!
I wasn't able to run it with sequential offload as it then threw:RuntimeError: Tensor on device meta is not on the expected device cuda:0!
I was able to get this to run in 24GB VRAM by quantizing the transformer to 4bits (bitsandbytes) instead of using sequential cpu offload:
import torch
from diffusers.models.transformers import SD3Transformer2DModel
from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3 import (
StableDiffusion3Pipeline,
)
from diffusers import BitsAndBytesConfig
from diffusers.utils import load_image
from transformers import SiglipVisionModel, SiglipImageProcessor
model_path = "stabilityai/stable-diffusion-3.5-large"
image_encoder_path = "google/siglip-so400m-patch14-384"
ip_adapter_path = "InstantX/SD3.5-Large-IP-Adapter"
device = "cuda"
transformer = SD3Transformer2DModel.from_pretrained(
model_path,
subfolder="transformer",
torch_dtype=torch.bfloat16,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
),
)
feature_extractor = SiglipImageProcessor.from_pretrained(
image_encoder_path, torch_dtype=torch.bfloat16
)
image_encoder = SiglipVisionModel.from_pretrained(
image_encoder_path, torch_dtype=torch.bfloat16
)
pipe = StableDiffusion3Pipeline.from_pretrained(
model_path,
transformer=transformer,
torch_dtype=torch.bfloat16,
feature_extractor=feature_extractor,
image_encoder=image_encoder,
).to(device)
pipe.load_ip_adapter(
ip_adapter_path,
subfolder="",
weight_name="ip-adapter.bin",
revision="f1f54ca369ae759f9278ae9c87d46def9f133c78",
)
pipe.set_ip_adapter_scale(0.6)
ref_img = load_image(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/ip_adapter_diner.png"
)
# please note that SD3.5 Large is sensitive to highres generation like 1536x1536
image = pipe(
prompt="a marmot drinks a milkshake",
negative_prompt="lowres, low quality, worst quality",
num_inference_steps=24,
guidance_scale=5.0,
generator=torch.Generator(device).manual_seed(42),
ip_adapter_image=ref_img,
).images[0]
image.save("result.jpg")