Spaces:

gokaygokay
/

Chroma

Running on Zero

File size: 14,113 Bytes

import os
import random
import sys
from typing import Sequence, Mapping, Any, Union
import torch
import gradio as gr
from PIL import Image
import numpy as np
from huggingface_hub import hf_hub_download
import spaces
from comfy import model_management

CHROMA_VERSION = "chroma-unlocked-v33.safetensors"

# Download required models
t5_path = hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="t5xxl_fp8_e4m3fn.safetensors", local_dir="models/text_encoders/")
vae_path = hf_hub_download(repo_id="lodestones/Chroma", filename="ae.safetensors", local_dir="models/vae")
unet_path = hf_hub_download(repo_id="lodestones/Chroma", filename=CHROMA_VERSION, local_dir="models/unet")

# Example prompts with their parameters
EXAMPLES = [
    [
        "A high-fashion close-up portrait of a blonde woman in clear sunglasses. The image uses a bold teal and red color split for dramatic lighting. The background is a simple teal-green. The photo is sharp and well-composed, and is designed for viewing with anaglyph 3D glasses for optimal effect. It looks professionally done.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 36, 3.0, 229
    ],
    [
        "A young man smiles broadly while wearing a blue captain's hat and blue mirrored aviator sunglasses. The man is the sharp focus of the image, set against a nicely blurred waterfront background featuring boats and buildings. The shot is cheerful, playful, and amateur looking.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 40, 2.1, 424
    ],
    [
        "A Shiba Inu smiles broadly while wearing a blue captain's hat and blue mirrored aviator sunglasses. The dog is the sharp focus of the image, set against a nicely blurred waterfront background featuring boats and buildings. The shot is cheerful, playful, and amateur looking.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 28, 2.9, 413
    ],
    [
        "A painting shows a bearded man in a beige shirt gripping a rope on a sailboat, battling choppy seas under a dramatic sunset. Another figure appears in the distant background, and the style is impressionistic with clear brushstrokes and high skill. The overall effect is one of adventure and freedom.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 33, 4.6, 281
    ],
    [
        "A painting shows an anthro male wolf sitting on a rock in a forest, anthro, male, solo, canine, canid, werewolf, muscular anthro, black body, black fur, reaching at viewer with his paws, detailed background, outdoors, outside. The scene is detailed, with a shallow depth of field focusing on the anthro wolf. The overall effect is high fantasy and cinematic.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 26, 3.8, 326
    ],
    [
        "A cute cartoon illustration of A massive black dragon with cosmic galaxy-patterned wings sits atop a castle overlooking a medieval port town at sunset. The style is bright and pastel-toned with soft lines. The artwork is clean and fairly well-executed.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 41, 5.7, 244
    ],
    [
        "A soft longhaired male anthro dog, with a white chest patch and bright yellow-green eyes, sits on a light grey kitchen island in a sun-drenched NYC apartment. The city view is blurred, showcasing a shallow depth of field. The cat has a dark collar with a small bell. The photo's aesthetic is clean, modern, and slightly dramatic due to the lighting.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 26, 4.7, 242
    ],
    [
        "A cheerful collage showcases 4 individual leopard geckos, each in a separate square. The geckos vary in color and pattern, but all are shown close-up and looking directly at the camera. Backgrounds differ, illustrating various terrarium elements and simple surfaces. High-quality shots, bright lighting, amateur photo quality. The overall aesthetic is heartwarming and simple.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 26, 4.7, 249
    ],
    [
        "A cheerful collage showcases 4 cute anthro wolf nick wilde plushie, each in a separate square. Backgrounds differ, illustrating various bed and simple surfaces. High-quality shots, bright lighting, amateur photo quality. The overall aesthetic is heartwarming and simple.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 26, 5.4, 263
    ],
    [
        "this picture depics a telegram sticker with thick white outlines of a male anthro fox",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 28, 3.9, 399
    ],
    [
        "Extreme close-up of a single tiger eye, direct frontal view. Detailed iris and pupil. Sharp focus on eye texture and color. Natural lighting to capture authentic eye shine and depth. The word \"Chroma\" is painted over it in big, white brush strokes with visible texture.",
        "low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
        1024, 1024, 26, 4.0, 9
    ]
]

# Utility functions
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
    try:
        return obj[index]
    except KeyError:
        return obj["result"][index]

def find_path(name: str, path: str = None) -> str:
    if path is None:
        path = os.getcwd()
    if name in os.listdir(path):
        path_name = os.path.join(path, name)
        print(f"{name} found: {path_name}")
        return path_name
    parent_directory = os.path.dirname(path)
    if parent_directory == path:
        return None
    return find_path(name, parent_directory)

def add_comfyui_directory_to_sys_path() -> None:
    comfyui_path = find_path("ComfyUI")
    if comfyui_path is not None and os.path.isdir(comfyui_path):
        sys.path.append(comfyui_path)
        print(f"'{comfyui_path}' added to sys.path")

def add_extra_model_paths() -> None:
    try:
        from main import load_extra_path_config
    except ImportError:
        from utils.extra_config import load_extra_path_config
    extra_model_paths = find_path("extra_model_paths.yaml")
    if extra_model_paths is not None:
        load_extra_path_config(extra_model_paths)
    else:
        print("Could not find the extra_model_paths config file.")

def import_custom_nodes() -> None:
    import asyncio
    import execution
    from nodes import init_extra_nodes
    import server
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    server_instance = server.PromptServer(loop)
    execution.PromptQueue(server_instance)
    init_extra_nodes()

# Initialize paths
add_comfyui_directory_to_sys_path()
add_extra_model_paths()
import_custom_nodes()

# Import all necessary nodes
from nodes import (
    NODE_CLASS_MAPPINGS,
    CLIPTextEncode,
    CLIPLoader,
    VAEDecode,
    UNETLoader,
    VAELoader,
    SaveImage,
)

# Initialize all model loaders outside the function
randomnoise = NODE_CLASS_MAPPINGS["RandomNoise"]()
emptysd3latentimage = NODE_CLASS_MAPPINGS["EmptySD3LatentImage"]()
ksamplerselect = NODE_CLASS_MAPPINGS["KSamplerSelect"]()
cliploader = CLIPLoader()
t5tokenizeroptions = NODE_CLASS_MAPPINGS["T5TokenizerOptions"]()
cliptextencode = CLIPTextEncode()
unetloader = UNETLoader()
vaeloader = VAELoader()
cfgguider = NODE_CLASS_MAPPINGS["CFGGuider"]()
basicscheduler = NODE_CLASS_MAPPINGS["BasicScheduler"]()
samplercustomadvanced = NODE_CLASS_MAPPINGS["SamplerCustomAdvanced"]()
vaedecode = VAEDecode()
saveimage = SaveImage()

# Load models
cliploader_78 = cliploader.load_clip(
    clip_name="t5xxl_fp8_e4m3fn.safetensors", type="chroma", device="default"
)
t5tokenizeroptions_82 = t5tokenizeroptions.set_options(
    min_padding=1, min_length=0, clip=get_value_at_index(cliploader_78, 0)
)
unetloader_76 = unetloader.load_unet(
    unet_name=CHROMA_VERSION, weight_dtype="fp8_e4m3fn"
)
vaeloader_80 = vaeloader.load_vae(vae_name="ae.safetensors")

# Add all the models that load a safetensors file
model_loaders = [cliploader_78, unetloader_76, vaeloader_80]

# Check which models are valid and how to best load them
valid_models = [
    getattr(loader[0], 'patcher', loader[0]) 
    for loader in model_loaders
    if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)
]

# Finally loads the models
model_management.load_models_gpu(valid_models)

@spaces.GPU
def generate_image(prompt, negative_prompt, width, height, steps, cfg, seed):
    with torch.inference_mode():
        # Set random seed if provided
        if seed == -1:
            seed = random.randint(1, 2**64)
        random.seed(seed)
        
        randomnoise_68 = randomnoise.get_noise(noise_seed=seed)
        emptysd3latentimage_69 = emptysd3latentimage.generate(
            width=width, height=height, batch_size=1
        )
        ksamplerselect_72 = ksamplerselect.get_sampler(sampler_name="euler")

        cliptextencode_74 = cliptextencode.encode(
            text=prompt,
            clip=get_value_at_index(t5tokenizeroptions_82, 0),
        )

        cliptextencode_75 = cliptextencode.encode(
            text=negative_prompt,
            clip=get_value_at_index(t5tokenizeroptions_82, 0),
        )

        cfgguider_73 = cfgguider.get_guider(
            cfg=cfg,
            model=get_value_at_index(unetloader_76, 0),
            positive=get_value_at_index(cliptextencode_74, 0),
            negative=get_value_at_index(cliptextencode_75, 0),
        )

        basicscheduler_84 = basicscheduler.get_sigmas(
            scheduler="beta",
            steps=steps,
            denoise=1,
            model=get_value_at_index(unetloader_76, 0),
        )

        samplercustomadvanced_67 = samplercustomadvanced.sample(
            noise=get_value_at_index(randomnoise_68, 0),
            guider=get_value_at_index(cfgguider_73, 0),
            sampler=get_value_at_index(ksamplerselect_72, 0),
            sigmas=get_value_at_index(basicscheduler_84, 0),
            latent_image=get_value_at_index(emptysd3latentimage_69, 0),
        )

        vaedecode_79 = vaedecode.decode(
            samples=get_value_at_index(samplercustomadvanced_67, 0),
            vae=get_value_at_index(vaeloader_80, 0),
        )

        # Save image using SaveImage node with simple string prefix
        saved = saveimage.save_images(
            filename_prefix="Chroma_Generated",
            images=get_value_at_index(vaedecode_79, 0),
        )
        
        # Return the path to the saved image
        saved_path = f"output/{saved['ui']['images'][0]['filename']}"
        return saved_path

# Create Gradio interface
with gr.Blocks() as app:
    gr.Markdown("""
# Chroma

Model: [Chroma](https://huggingface.co/lodestones/Chroma) by [lodestones](https://huggingface.co/lodestones)

Run any ComfyUI Workflow on Spaces: [ComfyUI Workflows](https://huggingface.co/blog/run-comfyui-workflows-on-spaces)

Space Author: [GitHub](https://github.com/gokayfem) | [X.com](https://x.com/gokayfem)
    """)
    
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Enter your prompt here...",
                lines=3
            )
            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                placeholder="Enter negative prompt here...",
                value="low quality, ugly, unfinished, out of focus, deformed, disfigure, blurry, smudged, restricted palette, flat colors",
                lines=2
            )
            
            with gr.Row():
                width = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    value=1024,
                    step=64,
                    label="Width"
                )
                height = gr.Slider(
                    minimum=512,
                    maximum=2048,
                    value=1024,
                    step=64,
                    label="Height"
                )
            
            with gr.Row():
                steps = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=26,
                    step=1,
                    label="Steps"
                )
                cfg = gr.Slider(
                    minimum=1,
                    maximum=20,
                    value=4,
                    step=0.5,
                    label="CFG Scale"
                )
                seed = gr.Number(
                    value=-1,
                    label="Seed (-1 for random)"
                )
            
            generate_btn = gr.Button("Generate")
        
        with gr.Column():
            output_image = gr.Image(label="Generated Image")
    
    generate_btn.click(
        fn=generate_image,
        inputs=[prompt, negative_prompt, width, height, steps, cfg, seed],
        outputs=[output_image]
    )
    
    # Add examples section
    gr.Examples(
        examples=EXAMPLES,
        inputs=[prompt, negative_prompt, width, height, steps, cfg, seed],
        outputs=[output_image],
        fn=generate_image,
        cache_examples=True,
        label="Example Prompts - Click to try!"
    )

if __name__ == "__main__":
    app.launch(share=True)