ValueError: Unrecognized image processor in unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit. Should have a `image_processor_type` key
#3
by
ievnsk
- opened
Exit code: 1. Reason: _path, **kwargs)
File "/usr/local/lib/python3.10/site-packages/transformers/processing_utils.py", line 1143, in _get_arguments_from_pretrained
args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
File "/usr/local/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 579, in from_pretrained
raise ValueError(
ValueError: Unrecognized image processor in unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit. Should have a `image_processor_type` key in its preprocessor_config.json of config.json, or one of the following `model_type` keys in its config.json: align, aria, beit, bit, blip, blip-2, bridgetower, chameleon, chinese_clip, clip, clipseg, conditional_detr, convnext, convnextv2, cvt, data2vec-vision, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dinat, dinov2, donut-swin, dpt, efficientformer, efficientnet, flava, focalnet, fuyu, gemma3, git, glpn, got_ocr2, grounding-dino, groupvit, hiera, idefics, idefics2, idefics3, ijepa, imagegpt, instructblip, instructblipvideo, kosmos-2, layoutlmv2, layoutlmv3, levit, llama4, llava, llava_next, llava_next_video, llava_onevision, mask2former, maskformer, mgp-str, mistral3, mllama, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, nat, nougat, oneformer, owlv2, owlvit, paligemma, perceiver, phi4_multimodal, pix2struct, pixtral, poolformer, prompt_depth_anything, pvt, pvt_v2, qwen2_5_vl, qwen2_vl, regnet, resnet, rt_detr, sam, segformer, seggpt, shieldgemma2, siglip, siglip2, superglue, swiftformer, swin, swin2sr, swinv2, table-transformer, timesformer, timm_wrapper, tvlt, tvp, udop, upernet, van, videomae, vilt, vipllava, vit, vit_hybrid, vit_mae, vit_msn, vitmatte, xclip, yolos, zoedepth
Traceback (most recent call last):
File "/home/user/app/app.py", line 95, in <module>
run()
File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 214, in gradio_handler
raise error("ZeroGPU worker error", res.error_cls)
gradio.exceptions.Error: 'ValueError'
requeremets.txt
transformers==4.51.0
huggingface_hub
spaces
torch
gradio
app.py
import gradio as gr
import spaces
def load_gpu_model():
from transformers import AutoProcessor, Llama4ForConditionalGeneration
import torch
model_id = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
processor = AutoProcessor.from_pretrained(model_id)
model = Llama4ForConditionalGeneration.from_pretrained(
model_id,
attn_implementation="flex_attention",
device_map="auto",
torch_dtype=torch.bfloat16,
)
return processor, model
@spaces.GPU
def run():
gpu_processor, gpu_model = load_gpu_model()
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# Формируем историю сообщений в требуемом формате:
# каждое сообщение представлено словарём с ключами role и content,
# где content — список объектов с типом и текстом.
messages = []
if system_message:
messages.append({
"role": "system",
"content": [{"type": "text", "text": system_message}]
})
for (user_text, assistant_text) in history:
if user_text:
messages.append({
"role": "user",
"content": [{"type": "text", "text": user_text}]
})
if assistant_text:
messages.append({
"role": "assistant",
"content": [{"type": "text", "text": assistant_text}]
})
messages.append({
"role": "user",
"content": [{"type": "text", "text": message}]
})
inputs = gpu_processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(gpu_model.device)
outputs = gpu_model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
demo.launch()
if __name__ == "__main__":
run()
Currently the 4-bit models will only work with Unsloth which we're going to be pushing a PR for soon