YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

a modular custom block that can be dynamically load in mellon!

mellon param map is saved like this, for now ... we will make it really easy soon!

from diffusers.modular_pipelines.mellon_node_utils import MellonNodeConfig, MellonParam

SUPPORTED_ANNOTATION_TASKS = [
    "<OD>",
    "<REFERRING_EXPRESSION_SEGMENTATION>",
    "<CAPTION>",
    "<DETAILED_CAPTION>",
    "<MORE_DETAILED_CAPTION>",
    "<DENSE_REGION_CAPTION>",
    "<CAPTION_TO_PHRASE_GROUNDING>",
    "<OPEN_VOCABULARY_DETECTION>",
]

SUPPORTED_ANNOTATION_OUTPUT_TYPES = [
    "mask_image",
    "bounding_box",
    "mask_overlay",
]

node_config = MellonNodeConfig(
    inputs= [
         # just a string since it is a "known" input, mellon knows how to config, e.g, getting info from https://github.com/huggingface/diffusers/blob/main/src/diffusers/modular_pipelines/mellon_node_utils.py#L29
        "image",
         # for custom inputs, we to specify how we would like it to be displayed on UI, but we can generate a default one based on the fields in corresponding `InputParam`,
         # https://huggingface.co/YiYiXu/florence-2-block/blob/main/block.py#L43, e.g. `type` can be derived from our `type_hint`, `value` can be derived from our `default`... 
         MellonParam(name="annotation_task", label="Annotation Task", type="string", options=SUPPORTED_ANNOTATION_TASKS, value="<CAPTION_TO_PHRASE_GROUNDING>"), 
         MellonParam(name="annotation_prompt", label="Annotation Prompt", type="string", default="", display="textarea"),
         MellonParam(
            name="annotation_output_type", 
            label="Annotation Output Type", 
            type="string", 
            options=SUPPORTED_ANNOTATION_OUTPUT_TYPES, 
            value="bounding_box",
            onChange={
                "mask_image": ["mask_image"],
                "bounding_box": [],
                "mask_overlay": [],
            }),
    ],
    model_inputs= [],
    outputs= [
        MellonParam(name="images", label="Images", type="image", display="output"),
        MellonParam(name="annotations", label="Annotations", type="string", display="output"),
        MellonParam(name="mask_image", label="Mask Image", type="image", display="output"),
    ],
    blocks_names= ["Florence2ImageAnnotatorBlock"],
    node_type="custom",
)

node_config.save_mellon_config("YiYiXu/florence-2-block", push_to_hub=True)

to run the block for bbox

import torch
from diffusers.modular_pipelines import ModularPipeline
from diffusers.utils import load_image

repo_id = "YiYiXu/florence-2-block"
# fetch the Florence2 image annotator block that will create our mask
pipe = ModularPipeline.from_pretrained("./florence-2-custom-block", trust_remote_code=True)
pipe.load_components(torch_dtype=torch.float16)
pipe.to("cuda")



image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true")
image = image.resize((1024, 1024))

annotation_task = '<CAPTION_TO_PHRASE_GROUNDING>'
annotation_prompt = "car"

output = pipe(
    image=image,
    annotation_task=annotation_task,
    annotation_prompt=annotation_prompt,
    annotation_output_type="bounding_box",
).image[0].save("output.png")

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support