import spaces import gradio as gr from gradio_imageslider import ImageSlider import os from diffusers import ( StableDiffusionPipeline, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler, LMSDiscreteScheduler, DDIMScheduler, PNDMScheduler, HeunDiscreteScheduler, DPMSolverMultistepScheduler, DDPMScheduler, DPMSolverSinglestepScheduler, # DPMSolverPlusPlusMultistepScheduler, IPNDMScheduler ) import torch from PIL import Image import numpy as np # Daftar model dan ControlNet model_ids = [ "cagliostrolab/animagine-xl-3.1", "cagliostrolab/animagine-xl-3.0", "prompthero/openjourney" ] # Load all models at startup pipelines = {model_id: StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") for model_id in model_ids} controlnet_types = [ "Canny", # Edge detection using Canny algorithm "Depth", # Depth estimation from images "Normal", # Normal map generation "Pose", # Pose estimation "Segmentation", # Semantic segmentation "Scribble", # Sketch or scribble input "OpenPose", # OpenPose-based pose estimation "MLSD", # Line segment detection "HED", # Holistically-Nested Edge Detection "Color", # Colorization input "Binary", # Binary mask input "Scribble+Edge", # Combination of scribble and edge detection "OpenPose+Depth" # Combination of OpenPose and depth estimation ] schedulers = [ "EulerDiscreteScheduler", "EulerAncestralDiscreteScheduler", "LMSDiscreteScheduler", # Linear Multistep Methods "DDIMScheduler", # Denoising Diffusion Implicit Models "PNDMScheduler", # Pseudo Numerical Methods for Diffusion Models "HeunDiscreteScheduler", # Heun's method "DPMSolverMultistepScheduler", # Diffusion Probabilistic Model Solver "DDPMScheduler", # Denoising Diffusion Probabilistic Models "DPMSolverSinglestepScheduler", # Single-step version of DPM-Solver # "DPMSolver++MultistepScheduler", # Enhanced version of DPM-Solver "IPNDMScheduler", # Improved Pseudo Numerical Methods for Diffusion Models ] # Fungsi placeholder untuk load model (tidak digunakan lagi) # def load_model_func(checkpoint): # return f"Model {checkpoint} telah dimuat." @spaces.GPU() def generative_t2i(prompt, neg_prompt, width, height, scheduler, num_steps, batch_size, batch_count, cfg_scale, seed, use_hires, upscaler, upscale_by, hires_steps, denois_strength, model): all_images = [] metadata_list = [] generator = torch.manual_seed(seed) if seed != -1 else None pipeline = pipelines[model] pipeline.scheduler = globals()[scheduler].from_config(pipeline.scheduler.config) for _ in range(batch_count): images = pipeline( prompt, negative_prompt=neg_prompt, width=width, height=height, num_inference_steps=num_steps, guidance_scale=cfg_scale, num_images=batch_size, generator=generator ).images all_images.extend(images) metadata_list.extend([{"prompt": prompt, "neg_prompt": neg_prompt}] * batch_size) # Same metadata for each image in the batch return all_images, metadata_list @spaces.GPU() def generative_i2i(image, prompt, neg_prompt, scheduler, num_steps, strength, cfg_scale, seed, model): generator = torch.manual_seed(seed) if seed != -1 else None pipeline = pipelines[model] pipeline.scheduler = globals()[scheduler].from_config(pipeline.scheduler.config) init_image = image.convert("RGB") images = pipeline( prompt=prompt, negative_prompt=neg_prompt, init_image=init_image, strength=strength, num_inference_steps=num_steps, guidance_scale=cfg_scale, generator=generator ).images return images[0] @spaces.GPU() def intpaint_func (image, prompt_inpainting, neg_prompt_inpainting, guidance_scale, steps, strength, scheduler): generator = torch.manual_seed(42) # You can make seed an input later pipeline = pipelines[checkpoint.value] # Assuming 'checkpoint' is defined in the UI pipeline.scheduler = globals()[scheduler].from_config(pipeline.scheduler.config) init_image = image["image"].convert("RGB") mask_image = image["mask"].convert("RGB") images = pipeline( prompt=prompt_inpainting, negative_prompt=neg_prompt_inpainting, image=init_image, mask_image=mask_image, strength=strength, num_inference_steps=steps, guidance_scale=guidance_scale, generator=generator ).images return images[0] @spaces.GPU() def gradio_process_image (image, resolution, num_inference_steps, strength, hdr, guidance_scale, controlnet_strength, scheduler_name): generator = torch.manual_seed(42) # You can make seed an input later pipeline = pipelines[checkpoint.value] # Assuming 'checkpoint' is defined in the UI pipeline.scheduler = globals()[scheduler_name].from_config(pipeline.scheduler.config) init_image = image.convert("RGB").resize((resolution, resolution)) # Basic upscaling enhanced_image = pipeline( prompt="enhance", # A simple prompt for upscaling negative_prompt="", init_image=init_image, strength=strength, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator ).images[0] return (image, enhanced_image) with gr.Blocks(css="style.css") as app: # Dropdown untuk memilih model di luar tab dengan lebar kecil with gr.Column(): with gr.Row(): checkpoint = gr.Dropdown(choices=model_ids, label="Model", value=model_ids[0]) # VAE dihapus # load_model = gr.Button("Load", elem_id="load_model") # Tombol Load dihapus # load_model.click( # Fungsi load model tidak digunakan lagi # fn=load_model_func, # inputs=[checkpoint], # outputs=[] # ) # Tab untuk Text-to-Image with gr.Tab("Text-to-Image"): with gr.Row(elem_id="mw-full"): with gr.Column(scale=1): prompt_t2i = gr.Textbox(label="Prompt", placeholder="Enter Prompt", lines=2, elem_id="prompt-input") neg_prompt_t2i = gr.Textbox(label="Negative prompt", placeholder="Enter Negative Prompt (optional)", lines=2, elem_id="neg-prompt-input") generate_t2i_button = gr.Button("Generate", elem_id="generate_button", scale=0.13) with gr.Row(): with gr.Column(): with gr.Row(): scheduler_t2i = gr.Dropdown(choices=schedulers, label="Sampling method", value=schedulers[0]) seed_t2i = gr.Number(label="Seed", value=-1) with gr.Row(): width_t2i = gr.Slider(minimum=128, maximum=2048, step=128, label="Width", value=512) batch_size_t2i = gr.Slider(minimum=1, maximum=4, step=1, label="Batch size", value=1) with gr.Row(): height_t2i = gr.Slider(minimum=128, maximum=2048, step=128, label="Height", value=512) batch_count_t2i = gr.Slider(minimum=1, maximum=4, step=1, label="Batch Count", value=1) with gr.Row(): num_steps_t2i = gr.Slider(minimum=1, maximum=100, step=1, label="Sampling steps", value=20) cfg_scale_t2i = gr.Slider(minimum=1, maximum=20, step=0.5, label="CFG Scale", value=7) with gr.Accordion("Hires. fix", open=False): use_hires_t2i = gr.Checkbox(label="Use Hires?", value=False, scale=0) with gr.Row(): upscaler_t2i = gr.Dropdown(choices=schedulers, label="Upscaler", value=schedulers[0]) upscale_by_t2i = gr.Slider(minimum=1, maximum=8, step=0.5, label="Upscale by", value=2) with gr.Row(): hires_steps_t2i = gr.Slider(minimum=1, maximum=50, step=1, label="Hires Steps", value=20) denois_strength_t2i = gr.Slider(minimum=0, maximum=1, step=0.01, label="Denoising Strength", value=0.7) with gr.Column(): # Gallery untuk output gambar output_gallery_t2i = gr.Gallery(label="Image Results") # Output teks JSON di bawah gallery output_text_t2i = gr.JSON(label="Metadata") generate_t2i_button.click( fn=generative_t2i, inputs=[prompt_t2i, neg_prompt_t2i, width_t2i, height_t2i, scheduler_t2i, num_steps_t2i, batch_size_t2i, batch_count_t2i, cfg_scale_t2i, seed_t2i, use_hires_t2i, upscaler_t2i, upscale_by_t2i, hires_steps_t2i, denois_strength_t2i, checkpoint], outputs=[output_gallery_t2i, output_text_t2i] ) # Tab untuk Image-to-Image with gr.Tab("Image-to-Image"): with gr.Row(): with gr.Column(scale=1): prompt_input_i2i = gr.Textbox(label="Prompt", placeholder="Masukkan prompt teks", lines=2, elem_id="prompt-input") neg_prompt_input_i2i = gr.Textbox(label="Neg Prompt", placeholder="Masukkan negasi prompt", lines=2, elem_id="neg-prompt-input") generate_button_i2i_top = gr.Button("Generate", elem_id="generate-button", scale=0.13) with gr.Row(): with gr.Column(): image_input = gr.Image(label="Unggah Gambar") generate_button_i2i = gr.Button("Generate") with gr.Row(): scheduler_input = gr.Dropdown(choices=schedulers, label="Sampling method", value=schedulers[0]) seed_input = gr.Number(label="Seed", value=-1) with gr.Row(): steps = gr.Slider(minimum=1, maximum=100, step=1, label="Steps", value=20) cfg_scale = gr.Slider(minimum=1, maximum=24, step=1, label="CFG Scale", value=7) with gr.Row(): strength = gr.Slider(minimum=0, maximum=1, step=0.1, label="Strength", value=0.6) with gr.Column(): output_image_i2i = gr.Image(label="Hasil Gambar") generate_button_i2i.click( fn=generative_i2i, inputs=[image_input, prompt_input_i2i, neg_prompt_input_i2i, scheduler_input, steps, strength, cfg_scale, seed_input, checkpoint], outputs=output_image_i2i ) # Tab untuk Intpainting with gr.Tab ("Inpainting"): with gr.Row(): with gr.Column(scale=1): prompt_inpainting = gr.Textbox(label="Prompt", placeholder="Enter Prompt", lines=2, elem_id="prompt-input") neg_prompt_inpainting = gr.Textbox(label="Negative prompt", placeholder="Enter Negative Prompt (optional)", lines=2, elem_id="neg-prompt-input") generate_inpainting_button = gr.Button("Generate", elem_id="generate_button", scale=0.13) with gr.Row(): with gr.Column(): image = gr.ImageMask(sources=["upload"], layers=False, transforms=[], format="png", label="Base image", show_label=True) btn = gr.Button("Inpaint!", elem_id="run_button") with gr.Row(): scheduler = gr.Dropdown(label="Schedulers", choices=schedulers, value=schedulers[0]) with gr.Row(): strength = gr.Slider(value=0.99, minimum=0.01, maximum=1.0, step=0.01, label="Strength") with gr.Row(): steps = gr.Slider(value=20, minimum=10, maximum=30, step=1, label="Steps") guidance_scale = gr.Slider(value=7.5, minimum=1.0, maximum=20.0, step=0.1, label="Guidance_scale") with gr.Column(): image_out = gr.Image(label="Output", elem_id="output-img") btn.click(fn=intpaint_func, inputs=[image, prompt_inpainting, neg_prompt_inpainting, guidance_scale, steps, strength, scheduler], outputs=[image_out]) # Tab untuk Describe with gr.Tab("Describe"): with gr.Row(): with gr.Column(): # Components image = gr.Image(type="pil", image_mode="RGBA", label="Input") submit_button = gr.Button(value="Submit", variant="primary", size="lg") model_repo = gr.Dropdown(modules.wdtagger.dropdown_list, value=modules.wdtagger.dropdown_list[0], label="Model") general_thresh = gr.Slider(0, 1, step=modules.wdtagger.args.score_slider_step, value=modules.wdtagger.args.score_general_threshold, label="General Tags Threshold", scale=3) general_mcut_enabled = gr.Checkbox(value=False, label="Use MCut threshold", scale=1) character_thresh = gr.Slider(0, 1, step=modules.wdtagger.args.score_slider_step, value=modules.wdtagger.args.score_character_threshold, label="Character Tags Threshold", scale=3) character_mcut_enabled = gr.Checkbox(value=False, label="Use MCut threshold", scale=1) clear_button = gr.ClearButton(components=[image, model_repo, general_thresh, general_mcut_enabled, character_thresh, character_mcut_enabled], variant="secondary", size="lg") with gr.Column(): sorted_general_strings = gr.Textbox(label="Output (string)") rating = gr.Label(label="Rating") character_res = gr.Label(label="Output (characters)") general_res = gr.Label(label="Output (tags)") clear_button.add([sorted_general_strings, rating, character_res, general_res]) submit_button.click(modules.wdtagger.predictor.predict, inputs=[image, model_repo, general_thresh, general_mcut_enabled, character_thresh, character_mcut_enabled], outputs=[sorted_general_strings, rating, character_res, general_res]) # Tab untuk Upscale with gr.Tab("Upscale"): with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Input Image") run_button = gr.Button("Enhance Image") with gr.Row(): scheduler_name = gr.Dropdown(choices=schedulers, value=schedulers[0], label="Scheduler") with gr.Row(): resolution = gr.Slider(minimum=128, maximum=2048, value=1024, step=128, label="Resolution") num_inference_steps = gr.Slider(minimum=1, maximum=50, value=20, step=1, label="Number of Inference Steps") with gr.Row(): hdr = gr.Slider(minimum=0, maximum=1, value=0, step=0.1, label="HDR Effect") guidance_scale = gr.Slider(minimum=0, maximum=20, value=6, step=0.5, label="Guidance Scale") with gr.Row(): strength = gr.Slider(minimum=0, maximum=1, value=0.2, step=0.01, label="Strength") controlnet_strength = gr.Slider(minimum=0.0, maximum=2.0, value=0.75, step=0.05, label="ControlNet Strength") with gr.Column(): output_slider = ImageSlider(label="Before / After", type="numpy") run_button.click(fn=gradio_process_image, inputs=[input_image, resolution, num_inference_steps, strength, hdr, guidance_scale, controlnet_strength, scheduler_name], outputs=output_slider) # Tab untuk About with gr.Tab("About"): with gr.Row(): gr.Markdown(""" # About This user interface utilizes the **Diffusers** library to provide a comprehensive platform for generating and manipulating images. The interface is designed to be intuitive, allowing users to experiment with various image generation techniques and settings without needing deep technical knowledge. **Gradio** is used to create this interactive web application. Gradio simplifies the integration of machine learning models into a web interface, offering a seamless experience for users. ## Describe The "Describe" functionality is designed to help users create prompts for image generation. It is inspired by the [wd-tagger](https://huggingface.co/spaces/SmilingWolf/wd-tagger) space by SmilingWolf. This tool allows users to annotate and describe images using various models to generate detailed tags and descriptions. With this feature, you can: - Automatically generate descriptive tags for your images. - Explore the potential tags and categories recognized by the model. - Enhance the metadata of your images for better organization and searchability. Visit the original [wd-tagger](https://huggingface.co/spaces/SmilingWolf/wd-tagger) for more details. ## Upscale The "Upscale" feature enables users to improve the resolution of their images, making them clearer and more detailed. It is inspired by the [TileUpscalerV2](https://huggingface.co/spaces/gokaygokay/TileUpscalerV2) space by gokaygokay. With this feature, you can: - Increase the resolution of your generated images. - Enhance the quality of images without losing important details. - Make your images suitable for printing or high-resolution displays. Visit the original [TileUpscalerV2](https://huggingface.co/spaces/gokaygokay/TileUpscalerV2) for more details. ## Contribute We are always looking for passionate individuals who are interested in contributing to this project. Whether you're a developer, designer, or just someone with great ideas, your help is welcome! This project is open-source and licensed under the **Apache 2.0** License. Contributions are voluntary and there is no financial compensation, but you will gain valuable experience and the satisfaction of knowing your work is helping to advance the field of machine learning and AI. If you're interested in contributing, please get in touch with us. We appreciate any help you can provide! ### How to Contribute 1. Fork the repository on GitHub. 2. Create a new branch for your feature or bugfix. 3. Make your changes and commit them with clear messages. 4. Submit a pull request for review. Thank you for considering contributing to our project! """) # Jalankan antarmuka app.launch()