Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| os.system( | |
| 'pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces' | |
| ) | |
| import spaces | |
| import torch | |
| from diffusers import FlowMatchEulerDiscreteScheduler | |
| from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline | |
| from diffusers.models.transformers.transformer_wan import WanTransformer3DModel | |
| from diffusers.utils.export_utils import export_to_video | |
| import gradio as gr | |
| import tempfile | |
| import numpy as np | |
| from PIL import Image | |
| import random | |
| import gc | |
| import requests | |
| import logging | |
| from fastapi import FastAPI, HTTPException | |
| from optimization import optimize_pipeline_ | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| filename="wan_image2video.log", | |
| filemode="a", | |
| format="%(asctime)s - %(levelname)s - %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers" | |
| MAX_DIMENSION = 832 | |
| MIN_DIMENSION = 480 | |
| DIMENSION_MULTIPLE = 16 | |
| SQUARE_SIZE = 480 | |
| MAX_SEED = np.iinfo(np.int32).max | |
| FIXED_FPS = 16 | |
| MIN_FRAMES_MODEL = 8 | |
| MAX_FRAMES_MODEL = 81 | |
| MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1) | |
| MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1) | |
| DEFAULT_DURATION = 3.2 | |
| default_negative_prompt = ( | |
| "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰," | |
| "最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部," | |
| "畸形的,毁容的,形态畸形的肢体手,肢融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝," | |
| ) | |
| QUALITY_PROMPT = ", high quality, cinematic, smooth motion, detailed, vibrant, professional lighting" | |
| logger.info("Loading models into memory. This may take a few minutes...") | |
| pipe = WanImageToVideoPipeline.from_pretrained( | |
| MODEL_ID, | |
| transformer=WanTransformer3DModel.from_pretrained( | |
| "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers", | |
| subfolder="transformer", | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| ), | |
| transformer_2=WanTransformer3DModel.from_pretrained( | |
| "cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers", | |
| subfolder="transformer_2", | |
| torch_dtype=torch.bfloat16, | |
| device_map="cuda", | |
| ), | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0) | |
| pipe.to("cuda") | |
| logger.info("Optimizing pipeline...") | |
| for _ in range(3): | |
| gc.collect() | |
| torch.cuda.synchronize() | |
| torch.cuda.empty_cache() | |
| optimize_pipeline_( | |
| pipe, | |
| image=Image.new("RGB", (MAX_DIMENSION, MIN_DIMENSION)), | |
| prompt="prompt", | |
| height=MIN_DIMENSION, | |
| width=MAX_DIMENSION, | |
| num_frames=MAX_FRAMES_MODEL, | |
| ) | |
| logger.info("All models loaded and optimized. Gradio app is ready.") | |
| def translate_albanian_to_english(text: str, language: str = "en"): | |
| if not text.strip(): | |
| raise gr.Error("Please enter a description.") | |
| for attempt in range(2): | |
| try: | |
| response = requests.post( | |
| "https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate", | |
| json={"from_language": "sq", "to_language": "en", "input_text": text}, | |
| headers={"accept": "application/json", "Content-Type": "application/json"}, | |
| timeout=5, | |
| ) | |
| response.raise_for_status() | |
| translated = response.json().get("translate", "") | |
| logger.info(f"Translation response: {translated}") | |
| return translated | |
| except Exception as e: | |
| logger.error(f"Translation error (attempt {attempt + 1}): {e}") | |
| if attempt == 1: | |
| raise gr.Error("Translation failed. Please try again.") | |
| raise gr.Error("Translation failed. Please try again.") | |
| def process_image_for_video(image: Image.Image) -> Image.Image: | |
| width, height = image.size | |
| if width == height: | |
| return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS) | |
| aspect_ratio = width / height | |
| new_w, new_h = width, height | |
| if new_w > MAX_DIMENSION or new_h > MAX_DIMENSION: | |
| if aspect_ratio > 1: | |
| scale = MAX_DIMENSION / new_w | |
| else: | |
| scale = MAX_DIMENSION / new_h | |
| new_w = int(new_w * scale) | |
| new_h = int(new_h * scale) | |
| if new_w < MIN_DIMENSION or new_h < MIN_DIMENSION: | |
| if aspect_ratio > 1: | |
| scale = MIN_DIMENSION / new_h | |
| else: | |
| scale = MIN_DIMENSION / new_w | |
| new_w = int(new_w * scale) | |
| new_h = int(new_h * scale) | |
| final_w = int(round(new_w / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE) | |
| final_h = int(round(new_h / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE) | |
| final_w = max(final_w, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE) | |
| final_h = max(final_h, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE) | |
| return image.resize((final_w, final_h), Image.Resampling.LANCZOS) | |
| def resize_and_crop_to_match(target_image: Image.Image, reference_image: Image.Image) -> Image.Image: | |
| ref_w, ref_h = reference_image.size | |
| tgt_w, tgt_h = target_image.size | |
| scale = max(ref_w / tgt_w, ref_h / tgt_h) | |
| new_w, new_h = int(tgt_w * scale), int(tgt_h * scale) | |
| resized = target_image.resize((new_w, new_h), Image.Resampling.LANCZOS) | |
| left, top = (new_w - ref_w) // 2, (new_h - ref_h) // 2 | |
| return resized.crop((left, top, left + ref_w, top + ref_h)) | |
| def generate_video(start_image_pil, end_image_pil, prompt): | |
| if start_image_pil is None or end_image_pil is None: | |
| raise gr.Error("Please upload both a start and an end image.") | |
| prompt_en = translate_albanian_to_english(prompt.strip()) | |
| prompt_final = prompt_en + QUALITY_PROMPT | |
| logger.info("Pre‑processing images...") | |
| processed_start = process_image_for_video(start_image_pil) | |
| processed_end = resize_and_crop_to_match(end_image_pil, processed_start) | |
| target_h, target_w = processed_start.height, processed_start.width | |
| num_frames = np.clip( | |
| int(round(DEFAULT_DURATION * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL | |
| ) | |
| seed = random.randint(0, MAX_SEED) | |
| logger.info( | |
| f"Generating {num_frames} frames @ {target_w}x{target_h} (seed={seed})…" | |
| ) | |
| frames = pipe( | |
| image=processed_start, | |
| last_image=processed_end, | |
| prompt=prompt_final, | |
| negative_prompt=default_negative_prompt, | |
| height=target_h, | |
| width=target_w, | |
| num_frames=num_frames, | |
| guidance_scale=1.0, | |
| guidance_scale_2=1.0, | |
| num_inference_steps=8, | |
| generator=torch.Generator(device="cuda").manual_seed(seed), | |
| ).frames[0] | |
| logger.info("Saving video to temporary file…") | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: | |
| video_path = tmp.name | |
| export_to_video(frames, video_path, fps=FIXED_FPS) | |
| logger.info("Video generation complete.") | |
| return video_path | |
| def create_demo(): | |
| with gr.Blocks(css="", title="Image to Video") as demo: | |
| gr.HTML( | |
| """ | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;600;700&display=swap'); | |
| @keyframes glow {0%{box-shadow:0 0 14px rgba(0,255,128,0.5);}50%{box-shadow:0 0 14px rgba(0,255,128,0.7);}100%{box-shadow:0 0 14px rgba(0,255,128,0.5);}} | |
| @keyframes glow-hover {0%{box-shadow:0 0 20px rgba(0,255,128,0.7);}50%{box-shadow:0 0 20px rgba(0,255,128,0.9);}100%{box-shadow:0 0 20px rgba(0,255,128,0.7);}} | |
| @keyframes slide {0%{background-position:0% 50%;}50%{background-position:100% 50%;}100%{background-position:0% 50%;}} | |
| body{ | |
| background:#000000 !important; | |
| color:#FFFFFF !important; | |
| font-family:'Orbitron',sans-serif; | |
| min-height:100vh; | |
| margin:0 !important; | |
| padding:0 !important; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| overflow-x:hidden !important; | |
| display:flex !important; | |
| justify-content:center; | |
| align-items:center; | |
| flex-direction:column; | |
| } | |
| body::before{ | |
| content:""; | |
| display:block; | |
| height:600px; | |
| background:#000000 !important; | |
| } | |
| .gr-blocks,.container{ | |
| width:100% !important; | |
| max-width:100vw !important; | |
| margin:0 !important; | |
| padding:0 !important; | |
| box-sizing:border-box !important; | |
| overflow-x:hidden !important; | |
| background:#000000 !important; | |
| color:#FFFFFF !important; | |
| } | |
| #general_items{ | |
| width:100% !important; | |
| max-width:100vw !important; | |
| margin:2rem 0 !important; | |
| display:flex !important; | |
| flex-direction:column; | |
| align-items:center; | |
| justify-content:center; | |
| background:#000000 !important; | |
| color:#FFFFFF !important; | |
| } | |
| #input_column{ | |
| background:#000000 !important; | |
| border:none !important; | |
| border-radius:8px; | |
| padding:1rem !important; | |
| box-shadow:0 0 10px rgba(255,255,255,0.3) !important; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| box-sizing:border-box !important; | |
| color:#FFFFFF !important; | |
| } | |
| h1{ | |
| font-size:5rem; | |
| font-weight:700; | |
| text-align:center; | |
| color:#FFFFFF !important; | |
| text-shadow:0 0 8px rgba(255,255,255,0.3) !important; | |
| margin:0 auto .5rem; | |
| display:block; | |
| max-width:100%; | |
| } | |
| #subtitle{ | |
| font-size:1rem; | |
| text-align:center; | |
| color:#FFFFFF !important; | |
| opacity:0.8; | |
| margin-bottom:1rem; | |
| display:block; | |
| max-width:100%; | |
| } | |
| .gradio-component{ | |
| background:#000000 !important; | |
| border:none; | |
| margin:0.75rem 0; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| color:#FFFFFF !important; | |
| } | |
| .image-container{ | |
| aspect-ratio:1/1; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| min-height:500px; | |
| height:auto; | |
| border:0.5px solid #FFFFFF !important; | |
| border-radius:4px; | |
| box-sizing:border-box !important; | |
| background:#000000 !important; | |
| box-shadow:0 0 10px rgba(255,255,255,0.3) !important; | |
| position:relative; | |
| color:#FFFFFF !important; | |
| } | |
| .image-container img,.image-container video{ | |
| width:100% !important; | |
| height:auto; | |
| box-sizing:border-box !important; | |
| display:block !important; | |
| } | |
| .image-container[aria-label="Start Frame"] .file-upload, | |
| .image-container[aria-label="Start Frame"] .file-preview, | |
| .image-container[aria-label="Start Frame"] .image-actions, | |
| .image-container[aria-label="Start Frame"] .gr-file-upload, | |
| .image-container[aria-label="Start Frame"] .gr-file, | |
| .image-container[aria-label="Start Frame"] .gr-actions, | |
| .image-container[aria-label="Start Frame"] .gr-upload-button, | |
| .image-container[aria-label="Start Frame"] .gr-image-toolbar, | |
| .image-container[aria-label="Start Frame"] .gr-file-actions, | |
| .image-container[aria-label="Start Frame"] .gr-upload-options, | |
| div[aria-label="Start Frame"] > div > div:not(.image-container), | |
| div[aria-label="Start Frame"] .gr-button, | |
| .image-container[aria-label="End Frame"] .file-upload, | |
| .image-container[aria-label="End Frame"] .file-preview, | |
| .image-container[aria-label="End Frame"] .image-actions, | |
| .image-container[aria-label="End Frame"] .gr-file-upload, | |
| .image-container[aria-label="End Frame"] .gr-file, | |
| .image-container[aria-label="End Frame"] .gr-actions, | |
| .image-container[aria-label="End Frame"] .gr-upload-button, | |
| .image-container[aria-label="End Frame"] .gr-image-toolbar, | |
| .image-container[aria-label="End Frame"] .gr-file-actions, | |
| .image-container[aria-label="End Frame"] .gr-upload-options, | |
| div[aria-label="End Frame"] > div > div:not(.image-container), | |
| div[aria-label="End Frame"] .gr-button{ | |
| display:none !important; | |
| } | |
| .image-container.processing{ | |
| background:#000000 !important; | |
| position:relative !important; | |
| } | |
| .image-container.processing::before{ | |
| content:"Processing..."; | |
| color:#FFFFFF !important; | |
| opacity:0.7; | |
| font-size:1rem; | |
| font-weight:600; | |
| position:absolute; | |
| top:50%; | |
| left:50%; | |
| transform:translate(-50%,-50%); | |
| text-align:center; | |
| width:auto; | |
| max-width:150px; | |
| white-space:nowrap; | |
| display:inline-block; | |
| } | |
| input,textarea,.gr-dropdown,.gr-dropdown select{ | |
| background:#000000 !important; | |
| color:#FFFFFF !important; | |
| border:1px solid #FFFFFF !important; | |
| border-radius:4px; | |
| padding:0.5rem; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| box-sizing:border-box !important; | |
| } | |
| input:hover,textarea:hover,.gr-dropdown:hover,.gr-dropdown select:hover{ | |
| box-shadow:0 0 8px rgba(255,255,255,0.3) !important; | |
| transition:box-shadow 0.3s; | |
| } | |
| .gr-button-primary{ | |
| background:linear-gradient(90deg,rgba(0,255,128,0.3),rgba(0,200,100,0.3),rgba(0,255,128,0.3)) !important; | |
| background-size:200% 100%; | |
| animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite; | |
| color:#FFFFFF !important; | |
| border:1px solid #FFFFFF !important; | |
| border-radius:6px; | |
| padding:0.75rem 1.5rem; | |
| font-size:1.1rem; | |
| font-weight:600; | |
| box-shadow:0 0 14px rgba(0,255,128,0.7) !important; | |
| transition:box-shadow 0.3s,transform 0.3s; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| min-height:48px; | |
| cursor:pointer; | |
| } | |
| .gr-button-primary:hover{ | |
| box-shadow:0 0 20px rgba(0,255,128,0.9) !important; | |
| animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite; | |
| transform:scale(1.05); | |
| } | |
| button[aria-label="Fullscreen"],button[aria-label="Share"]{display:none !important;} | |
| button[aria-label="Download"]{ | |
| transform:scale(3); | |
| transform-origin:top right; | |
| background:#000000 !important; | |
| color:#FFFFFF !important; | |
| border:1px solid #FFFFFF !important; | |
| border-radius:4px; | |
| padding:0.4rem !important; | |
| margin: Hernández | |
| box-shadow:0 0 8px rgba(255,255,255,0.3) !important; | |
| transition:box-shadow 0.3s; | |
| } | |
| button[aria-label="Download"]:hover{ | |
| box-shadow:0 0 12px rgba(255,255,255,0.5) !important; | |
| } | |
| .progress-text,.gr-progress{display:none !important;} | |
| footer,.gr-button-secondary{display:none !important;} | |
| .gr-group{ | |
| background:#000000 !important; | |
| border:none !important; | |
| width:100% !important; | |
| max-width:100vw !important; | |
| } | |
| @media (max-width:768px){ | |
| h1{font-size:4rem;} | |
| #subtitle{font-size:0.9rem;} | |
| .gr-button-primary{ | |
| padding:0.6rem 1rem; | |
| font-size:1rem; | |
| box-shadow:0 0 10px rgba(0,255,128,0.7) !important; | |
| animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite; | |
| } | |
| .gr-button-primary:hover{ | |
| box-shadow:0 0 12px rgba(0,255,128,0.9) !important; | |
| animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite; | |
| } | |
| .image-container{ | |
| min-height:300px; | |
| box-shadow:0 0 8px rgba(255,255,255,0.3) !important; | |
| border:0.5px solid #FFFFFF !important; | |
| } | |
| .image-container.processing::before{ | |
| font-size:0.9rem; | |
| max-width:90%; | |
| white-space:normal; | |
| } | |
| } | |
| </style> | |
| <script> | |
| const allowed = /^\\/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6(\\/.*)?$/; | |
| if (!allowed.test(window.location.pathname)) { | |
| document.body.innerHTML = '<h1 style="color:#ef4444;font-family:sans-serif;text-align:center;margin-top:100px;">500 Internal Server Error</h1>'; | |
| throw new Error('500'); | |
| } | |
| document.addEventListener('DOMContentLoaded', () => { | |
| const containers = document.querySelectorAll('#general_items, #input_column, .image-container'); | |
| containers.forEach(container => { | |
| const width = container.offsetWidth; | |
| const style = window.getComputedStyle(container); | |
| console.log(`Container ${container.id || container.className}: width=${width}px, box-shadow=${style.boxShadow}, background=${style.background}, border=${style.border} (Viewport: ${window.innerWidth}px)`); | |
| container.setAttribute('data-width', `${width}px`); | |
| }); | |
| const generateBtn = document.querySelector('.gr-button-primary'); | |
| const outputVideo = document.querySelector('.image-container[aria-label="Generated Video"]'); | |
| if (generateBtn && outputVideo) { | |
| generateBtn.addEventListener('click', () => { | |
| outputVideo.classList.add('processing'); | |
| console.log('Processing overlay added.'); | |
| }); | |
| const observer = new MutationObserver(() => { | |
| if (outputVideo.querySelector('video')) { | |
| outputVideo.classList.remove('processing'); | |
| console.log('Processing overlay removed.'); | |
| observer.disconnect(); | |
| } | |
| }); | |
| observer.observe(outputVideo, { childList: true, subtree: true }); | |
| } | |
| setInterval(() => { | |
| document.querySelectorAll('.progress-text,.gr-progress,[class*="progress"]').forEach(el => el.remove()); | |
| }, 500); | |
| }); | |
| </script> | |
| """ | |
| ) | |
| with gr.Row(elem_id="general_items"): | |
| gr.Markdown("# ") | |
| gr.Markdown( | |
| "Generate videos by providing first and last frame and optional prompt description.", | |
| elem_id="subtitle", | |
| ) | |
| with gr.Column(elem_id="input_column"): | |
| start_image = gr.Image( | |
| type="pil", | |
| label="Start Frame", | |
| sources=["upload"], | |
| show_download_button=False, | |
| show_share_button=False, | |
| interactive=True, | |
| elem_classes=["gradio-component", "image-container"], | |
| ) | |
| end_image = gr.Image( | |
| type="pil", | |
| label="End Frame", | |
| sources=["upload"], | |
| show_download_button=False, | |
| show_share_button=False, | |
| interactive=True, | |
| elem_classes=["gradio-component", "image-container"], | |
| ) | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| placeholder="Describe the transition between the two images", | |
| lines=3, | |
| elem_classes=["gradio-component"], | |
| ) | |
| generate_button = gr.Button( | |
| "Generate Video", | |
| variant="primary", | |
| elem_classes=["gradio-component", "gr-button-primary"], | |
| ) | |
| output_video = gr.Video( | |
| label="Generated Video", | |
| autoplay=True, | |
| interactive=False, | |
| show_download_button=True, | |
| show_share_button=False, | |
| elem_classes=["gradio-component", "image-container"], | |
| ) | |
| gr.on( | |
| triggers=[generate_button.click], | |
| fn=generate_video, | |
| inputs=[start_image, end_image, prompt], | |
| outputs=[output_video], | |
| ) | |
| return demo | |
| app = FastAPI() | |
| demo = create_demo() | |
| app.mount("/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6", demo.app) | |
| async def catch_all(path: str): | |
| raise HTTPException(status_code=500, detail="Internal Server Error") | |
| if __name__ == "__main__": | |
| logger.info(f"Gradio version: {gr.__version__}") | |
| demo.queue().launch(share=True) |