ValueError: Unrecognized image processor in unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit. Should have a `image_processor_type` key

#3
by ievnsk - opened
Exit code: 1. Reason: _path, **kwargs)
  File "/usr/local/lib/python3.10/site-packages/transformers/processing_utils.py", line 1143, in _get_arguments_from_pretrained
    args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
  File "/usr/local/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py", line 579, in from_pretrained
    raise ValueError(
ValueError: Unrecognized image processor in unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit. Should have a `image_processor_type` key in its preprocessor_config.json of config.json, or one of the following `model_type` keys in its config.json: align, aria, beit, bit, blip, blip-2, bridgetower, chameleon, chinese_clip, clip, clipseg, conditional_detr, convnext, convnextv2, cvt, data2vec-vision, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dinat, dinov2, donut-swin, dpt, efficientformer, efficientnet, flava, focalnet, fuyu, gemma3, git, glpn, got_ocr2, grounding-dino, groupvit, hiera, idefics, idefics2, idefics3, ijepa, imagegpt, instructblip, instructblipvideo, kosmos-2, layoutlmv2, layoutlmv3, levit, llama4, llava, llava_next, llava_next_video, llava_onevision, mask2former, maskformer, mgp-str, mistral3, mllama, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, nat, nougat, oneformer, owlv2, owlvit, paligemma, perceiver, phi4_multimodal, pix2struct, pixtral, poolformer, prompt_depth_anything, pvt, pvt_v2, qwen2_5_vl, qwen2_vl, regnet, resnet, rt_detr, sam, segformer, seggpt, shieldgemma2, siglip, siglip2, superglue, swiftformer, swin, swin2sr, swinv2, table-transformer, timesformer, timm_wrapper, tvlt, tvp, udop, upernet, van, videomae, vilt, vipllava, vit, vit_hybrid, vit_mae, vit_msn, vitmatte, xclip, yolos, zoedepth

Traceback (most recent call last):
  File "/home/user/app/app.py", line 95, in <module>
    run()
  File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 214, in gradio_handler
    raise error("ZeroGPU worker error", res.error_cls)
gradio.exceptions.Error: 'ValueError'

requeremets.txt

transformers==4.51.0
huggingface_hub
spaces
torch
gradio

app.py

import gradio as gr
import spaces



def load_gpu_model():
    from transformers import AutoProcessor, Llama4ForConditionalGeneration
    import torch

    model_id = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
    processor = AutoProcessor.from_pretrained(model_id)
    model = Llama4ForConditionalGeneration.from_pretrained(
        model_id,
        attn_implementation="flex_attention",
        device_map="auto",
        torch_dtype=torch.bfloat16,
    )
    return processor, model



@spaces.GPU
def run():

    gpu_processor, gpu_model = load_gpu_model()

    def respond(
            message,
            history: list[tuple[str, str]],
            system_message,
            max_tokens,
            temperature,
            top_p,
    ):
        # Формируем историю сообщений в требуемом формате:
        # каждое сообщение представлено словарём с ключами role и content,
        # где content — список объектов с типом и текстом.
        messages = []
        if system_message:
            messages.append({
                "role": "system",
                "content": [{"type": "text", "text": system_message}]
            })
        for (user_text, assistant_text) in history:
            if user_text:
                messages.append({
                    "role": "user",
                    "content": [{"type": "text", "text": user_text}]
                })
            if assistant_text:
                messages.append({
                    "role": "assistant",
                    "content": [{"type": "text", "text": assistant_text}]
                })
        messages.append({
            "role": "user",
            "content": [{"type": "text", "text": message}]
        })

        inputs = gpu_processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(gpu_model.device)

        outputs = gpu_model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )

        response = gpu_processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
        yield response
    demo = gr.ChatInterface(
        respond,
        additional_inputs=[
            gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
            gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-p (nucleus sampling)",
            ),
        ],
    )
    demo.launch()

if __name__ == "__main__":
    run()

Currently the 4-bit models will only work with Unsloth which we're going to be pushing a PR for soon

Your need to confirm your account before you can post a new comment.

Sign up or log in to comment