YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
a modular custom block that can be dynamically load in mellon!
mellon param map is saved like this, for now ... we will make it really easy soon!
from diffusers.modular_pipelines.mellon_node_utils import MellonNodeConfig, MellonParam
SUPPORTED_ANNOTATION_TASKS = [
"<OD>",
"<REFERRING_EXPRESSION_SEGMENTATION>",
"<CAPTION>",
"<DETAILED_CAPTION>",
"<MORE_DETAILED_CAPTION>",
"<DENSE_REGION_CAPTION>",
"<CAPTION_TO_PHRASE_GROUNDING>",
"<OPEN_VOCABULARY_DETECTION>",
]
SUPPORTED_ANNOTATION_OUTPUT_TYPES = [
"mask_image",
"bounding_box",
"mask_overlay",
]
node_config = MellonNodeConfig(
inputs= [
# just a string since it is a "known" input, mellon knows how to config, e.g, getting info from https://github.com/huggingface/diffusers/blob/main/src/diffusers/modular_pipelines/mellon_node_utils.py#L29
"image",
# for custom inputs, we to specify how we would like it to be displayed on UI, but we can generate a default one based on the fields in corresponding `InputParam`,
# https://huggingface.co/YiYiXu/florence-2-block/blob/main/block.py#L43, e.g. `type` can be derived from our `type_hint`, `value` can be derived from our `default`...
MellonParam(name="annotation_task", label="Annotation Task", type="string", options=SUPPORTED_ANNOTATION_TASKS, value="<CAPTION_TO_PHRASE_GROUNDING>"),
MellonParam(name="annotation_prompt", label="Annotation Prompt", type="string", default="", display="textarea"),
MellonParam(
name="annotation_output_type",
label="Annotation Output Type",
type="string",
options=SUPPORTED_ANNOTATION_OUTPUT_TYPES,
value="bounding_box",
onChange={
"mask_image": ["mask_image"],
"bounding_box": [],
"mask_overlay": [],
}),
],
model_inputs= [],
outputs= [
MellonParam(name="images", label="Images", type="image", display="output"),
MellonParam(name="annotations", label="Annotations", type="string", display="output"),
MellonParam(name="mask_image", label="Mask Image", type="image", display="output"),
],
blocks_names= ["Florence2ImageAnnotatorBlock"],
node_type="custom",
)
node_config.save_mellon_config("YiYiXu/florence-2-block", push_to_hub=True)
to run the block for bbox
import torch
from diffusers.modular_pipelines import ModularPipeline
from diffusers.utils import load_image
repo_id = "YiYiXu/florence-2-block"
# fetch the Florence2 image annotator block that will create our mask
pipe = ModularPipeline.from_pretrained("./florence-2-custom-block", trust_remote_code=True)
pipe.load_components(torch_dtype=torch.float16)
pipe.to("cuda")
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true")
image = image.resize((1024, 1024))
annotation_task = '<CAPTION_TO_PHRASE_GROUNDING>'
annotation_prompt = "car"
output = pipe(
image=image,
annotation_task=annotation_task,
annotation_prompt=annotation_prompt,
annotation_output_type="bounding_box",
).image[0].save("output.png")
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support