| `FLUX.1 Fill [dev]` is a 12 billion parameter rectified flow transformer capable of filling areas in existing images based on a text description. | |
| The idea is to unlock the full outpainting potential of Flux.1.Fill-dev model. | |
| The original model parameters have not been finetuned or modified. | |
| Rather, this simple hack unlocks the full potential of the Flux.1-Fill-dev model. | |
| This is based on Flux.1-Fill-dev model and follows the FLUX.1-dev Non-Commercial License | |
| https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev is applicable. | |
| ## Diffusers | |
| To use `FLUX.1 Fill [dev]` with the 🧨 diffusers python library, first install or upgrade diffusers | |
| ```shell | |
| pip install -U diffusers | |
| ``` | |
| Then you can use `FluxFillPipeline` to run the model | |
| Here is a code snippet to use the code. | |
| ```python | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| import torch | |
| from diffusers import FluxFillPipeline | |
| from diffusers.utils import load_image | |
| from typing import Union | |
| def prepare_masked_image( | |
| foreground: Union[Image.Image, np.ndarray], | |
| mask: Union[Image.Image, np.ndarray], | |
| alpha: float = 0.001, | |
| blur: bool = True | |
| ) -> Image.Image: | |
| """ | |
| Combines the foreground and mask to produce a masked image with noise in the masked region. | |
| Args: | |
| foreground (PIL.Image.Image or np.ndarray): The input image to be inpainted. | |
| mask (PIL.Image.Image or np.ndarray): A binary mask (0 or 255) indicating the foreground region. | |
| alpha (float): Blending factor for noise. Lower alpha → more noise in the masked area. | |
| blur (bool): Whether to blur the randomly generated noise. | |
| Returns: | |
| PIL.Image.Image: The resulting masked image with noise in the masked area. | |
| """ | |
| # Ensure foreground is an ndarray | |
| if isinstance(foreground, Image.Image): | |
| foreground_np = np.array(foreground) | |
| else: | |
| foreground_np = foreground # assume already a NumPy array | |
| # Ensure mask is a NumPy array and single-channel | |
| if isinstance(mask, Image.Image): | |
| mask_np = np.array(mask.convert("L")) # convert to grayscale | |
| else: | |
| mask_np = mask | |
| if mask_np.ndim == 3: | |
| mask_np = cv2.cvtColor(mask_np, cv2.COLOR_BGR2GRAY) | |
| h, w, c = foreground_np.shape # height, width, channels | |
| # Create 3×3 kernel for dilation (used later) | |
| kernel = np.ones((3, 3), np.uint8) | |
| # Generate random Gaussian noise | |
| noise = np.random.rand(h, w) * 255 | |
| noise = noise.astype(np.uint8) | |
| if blur: | |
| noise = cv2.GaussianBlur(noise, (5, 5), 0) | |
| # Stack to 3 channels | |
| noise_rgb = np.stack([noise, noise, noise], axis=-1) | |
| # Prepare a black background | |
| black_bg = np.zeros_like(foreground_np, dtype=np.uint8) | |
| # Dilate the mask to get smoother boundaries for seamlessClone | |
| dilated_mask = cv2.dilate(mask_np, kernel, iterations=10) | |
| # Compute center for seamlessClone (center of the image) | |
| center = (w // 2, h // 2) | |
| # Use mixed clone to merge the foreground onto a black background, using the dilated mask | |
| cloned = cv2.seamlessClone( | |
| src=foreground_np, | |
| dst=black_bg, | |
| mask=dilated_mask, | |
| p=center, | |
| flags=cv2.MIXED_CLONE | |
| ) | |
| # Blend cloned result (mostly black except where mask is) with noise | |
| noisy_bg = (alpha * cloned + (1 - alpha) * noise_rgb).astype(np.uint8) | |
| # Normalize mask to [0,1] float if it’s in [0,255] | |
| if mask_np.max() > 1: | |
| mask_norm = (mask_np / 255.0).astype(np.float32) | |
| else: | |
| mask_norm = mask_np.astype(np.float32) | |
| # Expand mask to 3 channels if needed | |
| if mask_norm.ndim == 2: | |
| mask_norm = np.stack([mask_norm] * 3, axis=-1) | |
| # Combine: keep original pixels where mask=0, use noisy_bg where mask=1 | |
| combined = ((1 - mask_norm) * foreground_np + mask_norm * noisy_bg).astype(np.uint8) | |
| return Image.fromarray(combined) | |
| def main(): | |
| """Entry point for running the FluxFill pipeline.""" | |
| # Load input image and its corresponding mask | |
| fg_mask = load_image("https://huggingface.co/rkv1990/FLUX.1-Fill-dev-outpainting/resolve/main/beauty-products-mask.png") | |
| input_image= load_image("https://huggingface.co/rkv1990/FLUX.1-Fill-dev-outpainting/resolve/main/beauty-products.png") | |
| masked_image = prepare_masked_image(foreground=input_image, mask=fg_mask) | |
| # Initialize the FluxFill pipeline | |
| pipe = FluxFillPipeline.from_pretrained( | |
| "black-forest-labs/FLUX.1-Fill-dev", | |
| torch_dtype=torch.bfloat16 | |
| ).to("cuda") | |
| # Run the pipeline | |
| output = pipe( | |
| prompt="a white paper cup", | |
| image=masked_image, | |
| mask_image=fg_mask, | |
| height=1024, | |
| width=1024, | |
| guidance_scale=30, | |
| num_inference_steps=50, | |
| max_sequence_length=512, | |
| generator=torch.Generator(device="cpu").manual_seed(0) | |
| ).images[0] | |
| # Save the resulting image | |
| output.save("flux-fill-dev.png") | |
| print("Saved output to flux-fill-dev.png") | |
| if __name__ == "__main__": | |
| main() | |
| ``` | |
| To learn more check out the [diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) documentation | |
| --- | |
| language: | |
| - en | |
| license: other | |
| license_name: flux-1-dev-non-commercial-license | |
| license_link: LICENSE.md | |
| tags: | |
| - image-generation | |
| - flux | |
| - inpainting | |
| - diffusion-single-file | |
| --- |