#!/usr/bin/env python # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is import spaces import os os.putenv('PYTORCH_NVML_BASED_CUDA_CHECK','1') os.putenv('TORCH_LINALG_PREFER_CUSOLVER','1') alloc_conf_parts = [ 'expandable_segments:True', 'pinned_use_background_threads:True' # Specific to pinned memory. ] os.environ['PYTORCH_CUDA_ALLOC_CONF'] = ','.join(alloc_conf_parts) os.environ["SAFETENSORS_FAST_GPU"] = "1" os.putenv('HF_HUB_ENABLE_HF_TRANSFER','1') import random import uuid import gradio as gr import numpy as np from PIL import Image import torch from typing import Tuple import paramiko import datetime #import diffusers from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, UNet2DConditionModel, AutoencoderKL, EulerAncestralDiscreteScheduler from diffusers.models.attention_processor import Attention, AttnProcessor2_0 from transformers import CLIPTextModelWithProjection, CLIPTextModel, CLIPTokenizer torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False torch.backends.cudnn.allow_tf32 = False torch.backends.cudnn.deterministic = False torch.backends.cudnn.benchmark = False torch.backends.cuda.preferred_blas_library="cublas" torch.backends.cuda.preferred_linalg_library="cusolver" torch.set_float32_matmul_precision("highest") FTP_HOST = 'noahcohn.com' FTP_USER = 'ford442' FTP_PASS = os.getenv("FTP_PASS") FTP_DIR = 'img.noahcohn.com/stablediff/' DESCRIPTIONXX = """ ## ⚡⚡⚡⚡ REALVISXL V5.0 BF16 (Tester G) ⚡⚡⚡⚡ """ examples = [ "Many apples splashed with drops of water within a fancy bowl 4k, hdr --v 6.0 --style raw", "A profile photo of a dog, brown background, shot on Leica M6 --ar 128:85 --v 6.0 --style raw", ] MODEL_OPTIONS = { "REALVISXL V5.0 BF16": "ford442/RealVisXL_V5.0_BF16", } MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096")) BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1")) style_list = [ { "name": "3840 x 2160", "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "2560 x 1440", "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "HD+", "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic", "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly", }, { "name": "Style Zero", "prompt": "{prompt}", "negative_prompt": "", }, ] styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list} DEFAULT_STYLE_NAME = "Style Zero" STYLE_NAMES = list(styles.keys()) HF_TOKEN = os.getenv("HF_TOKEN") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") text_encoder = CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', low_cpu_mem_usage=False, subfolder='text_encoder')#.to(device=device, dtype=torch.bfloat16) text_encoder_2 = CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', low_cpu_mem_usage=False, subfolder='text_encoder_2',token=True)#.to(device=device, dtype=torch.bfloat16) def load_and_prepare_model(): proc=Attention(query_dim=4, upcast_attention=True, upcast_softmax = True, processor = AttnProcessor2_0) tokenizer_1 = CLIPTokenizer.from_pretrained('ford442/RealVisXL_V5.0_BF16', low_cpu_mem_usage=False, subfolder='tokenizer', use_fast=True) tokenizer_2 = CLIPTokenizer.from_pretrained('ford442/RealVisXL_V5.0_BF16', low_cpu_mem_usage=False, subfolder='tokenizer_2', use_fast=True) scheduler = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', low_cpu_mem_usage=False, subfolder='scheduler') vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", low_cpu_mem_usage=False, safety_checker=None, use_safetensors=False, torch_dtype=torch.float32) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16) unet = UNet2DConditionModel.from_pretrained("ford442/RealVisXL_V5.0_BF16", low_cpu_mem_usage=False, subfolder='unet', upcast_attention=True, attention_type='gated') pipe = StableDiffusionXLPipeline.from_pretrained( 'ford442/RealVisXL_V5.0_BF16', #torch_dtype=torch.bfloat16, token=True, add_watermarker=False, text_encoder=None, text_encoder_2=None, #tokenizer=tokenizer_1, #tokenizer_2=tokenizer_2, scheduler=scheduler, unet=unet, vae=None, ) #pipe.scheduler=scheduler #pipe.tokenizer=tokenizer_1 #pipe.tokenizer_2=tokenizer_2 #pipe.unet=unet #pipe.vae.do_resize=False #pipe.vae.vae_scale_factor=8 #pipe.to(device) #pipe.to(torch.bfloat16) print(f'init noise scale: {pipe.scheduler.init_noise_sigma}') pipe.watermark=None pipe.safety_checker=None #pipe.unet.to(memory_format=torch.channels_last) pipe.to(device,torch.bfloat16) pipe.vae = vaeXL.to(device) #.to('cpu') #.to(torch.bfloat16) pipe.vae.set_default_attn_processor() return pipe pipe = load_and_prepare_model() vaeX = AutoencoderKL.from_pretrained('stabilityai/stable-diffusion-xl-refiner-1.0',subfolder='vae', token=HF_TOKEN) # ,use_safetensors=True FAILS refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained( "ford442/stable-diffusion-xl-refiner-1.0-bf16", requires_aesthetics_score=True, token=HF_TOKEN ) refiner.vae.set_default_attn_processor() refiner.to(device,torch.bfloat16) refiner.vae=vaeX.to(device) MAX_SEED = np.iinfo(np.int32).max neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' " def upload_to_ftp(filename): try: transport = paramiko.Transport((FTP_HOST, 22)) if filename.endswith(".txt"): destination_path=FTP_DIR+'/txt/'+filename else: destination_path=FTP_DIR+filename transport.connect(username = FTP_USER, password = FTP_PASS) sftp = paramiko.SFTPClient.from_transport(transport) sftp.put(filename, destination_path) sftp.close() transport.close() print(f"Uploaded {filename} to FTP server") except Exception as e: print(f"FTP upload error: {e}") def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]: if style_name in styles: p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME]) else: p, n = styles[DEFAULT_STYLE_NAME] if not negative: negative = "" return p.replace("{prompt}", positive), n + negative def save_image(img): unique_name = str(uuid.uuid4()) + ".png" img.save(unique_name,optimize=False,compress_level=0) return unique_name def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp): filename= f'tst_G_{timestamp}.txt' with open(filename, "w") as f: f.write(f"Realvis 5.0 (Tester G) \n") f.write(f"Date/time: {timestamp} \n") f.write(f"Prompt: {prompt} \n") f.write(f"Steps: {num_inference_steps} \n") f.write(f"Guidance Scale: {guidance_scale} \n") f.write(f"SPACE SETUP: \n") f.write(f"To cuda and bfloat \n") upload_to_ftp(filename) @spaces.GPU(duration=30) def generate_30( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cpu').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "latent", "denoising_end": 0.75, } if use_resolution_binning: options["use_resolution_binning"] = True images = [] batch_options = options.copy() rv_image = pipe(**batch_options).images options = { "prompt": [prompt], "image": rv_image, "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", "denoising_start": 0.75, } batch_options = options.copy() rv_image = refiner(**batch_options).images[0] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") sd_image_path = f"rv50_G_{timestamp}.png" uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] @spaces.GPU(duration=60) def generate_60( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cpu').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "latent", "denoising_end": 0.75, } if use_resolution_binning: options["use_resolution_binning"] = True images = [] batch_options = options.copy() rv_image = pipe(**batch_options).images options = { "prompt": [prompt], "image": rv_image, "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", "denoising_start": 0.75, } batch_options = options.copy() rv_image = refiner(**batch_options).images[0] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") sd_image_path = f"rv50_G_{timestamp}.png" uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] @spaces.GPU(duration=90) def generate_90( prompt: str, negative_prompt: str = "", use_negative_prompt: bool = False, style_selection: str = "", width: int = 768, height: int = 768, guidance_scale: float = 4, num_inference_steps: int = 125, use_resolution_binning: bool = True, progress=gr.Progress(track_tqdm=True) ): seed = random.randint(0, MAX_SEED) generator = torch.Generator(device='cpu').manual_seed(seed) pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16) pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16) options = { "prompt": [prompt], "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "latent", "denoising_end": 0.75, } if use_resolution_binning: options["use_resolution_binning"] = True images = [] batch_options = options.copy() rv_image = pipe(**batch_options).images options = { "prompt": [prompt], "image": rv_image, "negative_prompt": [negative_prompt], "negative_prompt_2": [neg_prompt_2], "width": width, "height": height, "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "generator": generator, "output_type": "pil", "denoising_start": 0.75, } batch_options = options.copy() rv_image = refiner(**batch_options).images[0] timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") sd_image_path = f"rv50_G_{timestamp}.png" uploadNote(prompt,num_inference_steps,guidance_scale,timestamp) rv_image.save(sd_image_path,optimize=False,compress_level=0) upload_to_ftp(sd_image_path) unique_name = str(uuid.uuid4()) + ".png" os.symlink(sd_image_path, unique_name) return [unique_name] def load_predefined_images1(): predefined_images1 = [ "assets/7.png", "assets/8.png", "assets/9.png", "assets/1.png", "assets/2.png", "assets/3.png", "assets/4.png", "assets/5.png", "assets/6.png", ] return predefined_images1 css = ''' #col-container { margin: 0 auto; max-width: 640px; } h1{text-align:center} footer { visibility: hidden } body { background-color: green; } ''' with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo: gr.Markdown(DESCRIPTIONXX) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button_30 = gr.Button("Run 30 Seconds", scale=0) run_button_60 = gr.Button("Run 60 Seconds", scale=0) run_button_90 = gr.Button("Run 90 Seconds", scale=0) result = gr.Gallery(label="Result", columns=1, show_label=False) with gr.Row(): style_selection = gr.Radio( show_label=True, container=True, interactive=True, choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, label="Quality Style", ) with gr.Row(): with gr.Column(scale=1): use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True) negative_prompt = gr.Text( label="Negative prompt", max_lines=5, lines=4, placeholder="Enter a negative prompt", value="('deformed', 'distorted', 'disfigured':1.3),'not photorealistic':1.5, 'poorly drawn', 'bad anatomy', 'wrong anatomy', 'extra limb', 'missing limb', 'floating limbs', 'poorly drawn hands', 'poorly drawn feet', 'poorly drawn face':1.3, 'out of frame', 'extra limbs', 'bad anatomy', 'bad art', 'beginner', 'distorted face','amateur'", visible=True, ) with gr.Row(): width = gr.Slider( label="Width", minimum=448, maximum=MAX_IMAGE_SIZE, step=64, value=768, ) height = gr.Slider( label="Height", minimum=448, maximum=MAX_IMAGE_SIZE, step=64, value=768, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=30, step=0.1, value=3.8, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=10, maximum=1000, step=10, value=170, ) gr.Examples( examples=examples, inputs=prompt, cache_examples=False ) use_negative_prompt.change( fn=lambda x: gr.update(visible=x), inputs=use_negative_prompt, outputs=negative_prompt, api_name=False, ) gr.on( triggers=[ run_button_30.click, ], # api_name="generate", # Add this line fn=generate_30, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.on( triggers=[ run_button_60.click, ], # api_name="generate", # Add this line fn=generate_60, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.on( triggers=[ run_button_90.click, ], # api_name="generate", # Add this line fn=generate_90, inputs=[ prompt, negative_prompt, use_negative_prompt, style_selection, width, height, guidance_scale, num_inference_steps, ], outputs=[result], ) gr.Markdown("### REALVISXL V5.0") predefined_gallery = gr.Gallery(label="REALVISXL V5.0", columns=3, show_label=False, value=load_predefined_images1()) gr.Markdown( """