import gradio as gr from diffusers import DiffusionPipeline import torch from PIL import Image, ImageOps from transparent_background import Remover # Initialize the Diffusion Pipeline model_id = "yahoo-inc/photo-background-generation" pipeline = DiffusionPipeline.from_pretrained(model_id, custom_pipeline=model_id) pipeline = pipeline.to('cpu') # Use CPU instead of CUDA def resize_with_padding(img, expected_size): img.thumbnail((expected_size[0], expected_size[1])) delta_width = expected_size[0] - img.size[0] delta_height = expected_size[1] - img.size[1] pad_width = delta_width // 2 pad_height = delta_height // 2 padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height) return ImageOps.expand(img, padding) def process_image(input_image, prompt): img = resize_with_padding(input_image, (512, 512)) # Load background detection model remover = Remover(mode='base') # Get foreground mask fg_mask = remover.process(img, type='map') seed = 13 mask = ImageOps.invert(fg_mask) img = resize_with_padding(img, (512, 512)) generator = torch.Generator(device='cpu').manual_seed(seed) # Use CPU generator cond_scale = 1.0 with torch.no_grad(): # Disable gradient calculations for inference controlnet_image = pipeline( prompt=prompt, image=img, mask_image=mask, control_image=mask, num_images_per_prompt=1, generator=generator, num_inference_steps=20, guess_mode=False, controlnet_conditioning_scale=cond_scale ).images[0] return controlnet_image # Create Gradio interface iface = gr.Interface( fn=process_image, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter Prompt") ], outputs=gr.Image(label="Generated Image"), title="Image Processing with Diffusion Pipeline", description="Upload an image and enter a prompt to generate a new image using the diffusion model." ) # Launch the interface iface.launch()