import gradio as gr
import torch
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from transformers import CLIPVisionModelWithProjection
from diffusers.utils import load_image
from PIL import Image
import os
import json
import gc
import traceback

STYLE_MAP = {
    "pixar": [
        "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img0.png",
        "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img1.png",
        "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img2.png",
        "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img3.png",
        "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy/img4.png"
    ]
}

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Device: {device}, torch_dtype: {torch_dtype}")

image_encoder = CLIPVisionModelWithProjection.from_pretrained(
    "h94/IP-Adapter",
    subfolder="models/image_encoder",
    torch_dtype=torch_dtype,
)

pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch_dtype,
    image_encoder=image_encoder,
    variant="fp16" if torch.cuda.is_available() else None
).to(device)

pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="sdxl_models",
    weight_name=[
        "ip-adapter-plus_sdxl_vit-h.safetensors",
        "ip-adapter-plus-face_sdxl_vit-h.safetensors"
    ]
)
pipeline.set_ip_adapter_scale([0.7, 0.3])
pipeline.enable_model_cpu_offload()
pipeline.enable_vae_tiling()

def generate_single_scene(data):
    print("📥 Full input received:")
    print(json.dumps(data, indent=2))

    try:
        character_image_url = data["character_image_url"]
        style = data["style"]
        scene_prompt = data["scene"]

        print("🔄 Loading reference and style images...")
        face_image = load_image(character_image_url)
        style_images = [load_image(url) for url in STYLE_MAP.get(style, [])]

        torch.cuda.empty_cache()
        gc.collect()

        print("🎨 Starting generation...")
        result = pipeline(
            prompt=scene_prompt,
            ip_adapter_image=[style_images, face_image],
            negative_prompt="blurry, bad anatomy, low quality",
            width=512,
            height=768,
            guidance_scale=5.0,
            num_inference_steps=15,
            generator=torch.Generator(device).manual_seed(42)
        )

        image = result.images[0] if hasattr(result, "images") else result
        print(f"🖼️ Image generated. Type: {type(image)}")

        if isinstance(image, Image.Image):
            print("✅ Valid image object returned.")
            return image
        else:
            print("❌ Invalid image object. Returning fallback.")
            return Image.open("/mnt/data/error_image.png")

    except Exception as e:
        print(f"❌ Exception occurred: {e}")
        traceback.print_exc()
        return Image.open("/mnt/data/error_image.png")

def generate_from_json(json_input_text):
    try:
        data = json.loads(json_input_text)
        return generate_single_scene(data)
    except Exception as e:
        print(f"❌ JSON parsing error: {e}")
        traceback.print_exc()
        return Image.open("/mnt/data/error_image.png")

iface = gr.Interface(
    fn=generate_from_json,
    inputs=gr.Textbox(label="Input JSON", lines=10),
    outputs=gr.Image(label="Generated Scene or Error"),
    title="Debug Storybook Scene Generator",
    description="Displays logs and returns fallback image on error."
)

iface.queue().launch(share=True)