wan-2-2-first-last-frame

Running on Zero

App Files Files Community

wan-2-2-first-last-frame / app.py

HAL1993

Update app.py

6227296 verified about 2 months ago

raw

history blame contribute delete

22.4 kB

	import os
	os.system(
	'pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces'
	)

	import spaces
	import torch
	from diffusers import FlowMatchEulerDiscreteScheduler
	from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
	from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
	from diffusers.utils.export_utils import export_to_video
	import gradio as gr
	import tempfile
	import numpy as np
	from PIL import Image
	import random
	import gc
	import requests
	import logging
	from fastapi import FastAPI, HTTPException

	from optimization import optimize_pipeline_

	logging.basicConfig(
	level=logging.INFO,
	filename="wan_image2video.log",
	filemode="a",
	format="%(asctime)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger(__name__)

	MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
	MAX_DIMENSION = 832
	MIN_DIMENSION = 480
	DIMENSION_MULTIPLE = 16
	SQUARE_SIZE = 480
	MAX_SEED = np.iinfo(np.int32).max
	FIXED_FPS = 16
	MIN_FRAMES_MODEL = 8
	MAX_FRAMES_MODEL = 81
	MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
	MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
	DEFAULT_DURATION = 3.2
	default_negative_prompt = (
	"色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，"
	"最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，"
	"畸形的，毁容的，形态畸形的肢体手，肢融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走,过曝，"
	)
	QUALITY_PROMPT = ", high quality, cinematic, smooth motion, detailed, vibrant, professional lighting"

	logger.info("Loading models into memory. This may take a few minutes...")

	pipe = WanImageToVideoPipeline.from_pretrained(
	MODEL_ID,
	transformer=WanTransformer3DModel.from_pretrained(
	"cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
	subfolder="transformer",
	torch_dtype=torch.bfloat16,
	device_map="cuda",
	),
	transformer_2=WanTransformer3DModel.from_pretrained(
	"cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
	subfolder="transformer_2",
	torch_dtype=torch.bfloat16,
	device_map="cuda",
	),
	torch_dtype=torch.bfloat16,
	)
	pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0)
	pipe.to("cuda")

	logger.info("Optimizing pipeline...")
	for _ in range(3):
	gc.collect()
	torch.cuda.synchronize()
	torch.cuda.empty_cache()

	optimize_pipeline_(
	pipe,
	image=Image.new("RGB", (MAX_DIMENSION, MIN_DIMENSION)),
	prompt="prompt",
	height=MIN_DIMENSION,
	width=MAX_DIMENSION,
	num_frames=MAX_FRAMES_MODEL,
	)
	logger.info("All models loaded and optimized. Gradio app is ready.")

	@spaces.GPU
	def translate_albanian_to_english(text: str, language: str = "en"):
	if not text.strip():
	raise gr.Error("Please enter a description.")
	for attempt in range(2):
	try:
	response = requests.post(
	"https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate",
	json={"from_language": "sq", "to_language": "en", "input_text": text},
	headers={"accept": "application/json", "Content-Type": "application/json"},
	timeout=5,
	)
	response.raise_for_status()
	translated = response.json().get("translate", "")
	logger.info(f"Translation response: {translated}")
	return translated
	except Exception as e:
	logger.error(f"Translation error (attempt {attempt + 1}): {e}")
	if attempt == 1:
	raise gr.Error("Translation failed. Please try again.")
	raise gr.Error("Translation failed. Please try again.")

	def process_image_for_video(image: Image.Image) -> Image.Image:
	width, height = image.size
	if width == height:
	return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
	aspect_ratio = width / height
	new_w, new_h = width, height
	if new_w > MAX_DIMENSION or new_h > MAX_DIMENSION:
	if aspect_ratio > 1:
	scale = MAX_DIMENSION / new_w
	else:
	scale = MAX_DIMENSION / new_h
	new_w = int(new_w * scale)
	new_h = int(new_h * scale)
	if new_w < MIN_DIMENSION or new_h < MIN_DIMENSION:
	if aspect_ratio > 1:
	scale = MIN_DIMENSION / new_h
	else:
	scale = MIN_DIMENSION / new_w
	new_w = int(new_w * scale)
	new_h = int(new_h * scale)
	final_w = int(round(new_w / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
	final_h = int(round(new_h / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
	final_w = max(final_w, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
	final_h = max(final_h, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
	return image.resize((final_w, final_h), Image.Resampling.LANCZOS)

	def resize_and_crop_to_match(target_image: Image.Image, reference_image: Image.Image) -> Image.Image:
	ref_w, ref_h = reference_image.size
	tgt_w, tgt_h = target_image.size
	scale = max(ref_w / tgt_w, ref_h / tgt_h)
	new_w, new_h = int(tgt_w * scale), int(tgt_h * scale)
	resized = target_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
	left, top = (new_w - ref_w) // 2, (new_h - ref_h) // 2
	return resized.crop((left, top, left + ref_w, top + ref_h))

	@spaces.GPU(duration=120)
	def generate_video(start_image_pil, end_image_pil, prompt):
	if start_image_pil is None or end_image_pil is None:
	raise gr.Error("Please upload both a start and an end image.")
	prompt_en = translate_albanian_to_english(prompt.strip())
	prompt_final = prompt_en + QUALITY_PROMPT
	logger.info("Pre‑processing images...")
	processed_start = process_image_for_video(start_image_pil)
	processed_end = resize_and_crop_to_match(end_image_pil, processed_start)
	target_h, target_w = processed_start.height, processed_start.width
	num_frames = np.clip(
	int(round(DEFAULT_DURATION * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL
	)
	seed = random.randint(0, MAX_SEED)
	logger.info(
	f"Generating {num_frames} frames @ {target_w}x{target_h} (seed={seed})…"
	)
	frames = pipe(
	image=processed_start,
	last_image=processed_end,
	prompt=prompt_final,
	negative_prompt=default_negative_prompt,
	height=target_h,
	width=target_w,
	num_frames=num_frames,
	guidance_scale=1.0,
	guidance_scale_2=1.0,
	num_inference_steps=8,
	generator=torch.Generator(device="cuda").manual_seed(seed),
	).frames[0]
	logger.info("Saving video to temporary file…")
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
	video_path = tmp.name
	export_to_video(frames, video_path, fps=FIXED_FPS)
	logger.info("Video generation complete.")
	return video_path

	def create_demo():
	with gr.Blocks(css="", title="Image to Video") as demo:
	gr.HTML(
	"""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;600;700&display=swap');
	@keyframes glow {0%{box-shadow:0 0 14px rgba(0,255,128,0.5);}50%{box-shadow:0 0 14px rgba(0,255,128,0.7);}100%{box-shadow:0 0 14px rgba(0,255,128,0.5);}}
	@keyframes glow-hover {0%{box-shadow:0 0 20px rgba(0,255,128,0.7);}50%{box-shadow:0 0 20px rgba(0,255,128,0.9);}100%{box-shadow:0 0 20px rgba(0,255,128,0.7);}}
	@keyframes slide {0%{background-position:0% 50%;}50%{background-position:100% 50%;}100%{background-position:0% 50%;}}
	body{
	background:#000000 !important;
	color:#FFFFFF !important;
	font-family:'Orbitron',sans-serif;
	min-height:100vh;
	margin:0 !important;
	padding:0 !important;
	width:100% !important;
	max-width:100vw !important;
	overflow-x:hidden !important;
	display:flex !important;
	justify-content:center;
	align-items:center;
	flex-direction:column;
	}
	body::before{
	content:"";
	display:block;
	height:600px;
	background:#000000 !important;
	}
	.gr-blocks,.container{
	width:100% !important;
	max-width:100vw !important;
	margin:0 !important;
	padding:0 !important;
	box-sizing:border-box !important;
	overflow-x:hidden !important;
	background:#000000 !important;
	color:#FFFFFF !important;
	}
	#general_items{
	width:100% !important;
	max-width:100vw !important;
	margin:2rem 0 !important;
	display:flex !important;
	flex-direction:column;
	align-items:center;
	justify-content:center;
	background:#000000 !important;
	color:#FFFFFF !important;
	}
	#input_column{
	background:#000000 !important;
	border:none !important;
	border-radius:8px;
	padding:1rem !important;
	box-shadow:0 0 10px rgba(255,255,255,0.3) !important;
	width:100% !important;
	max-width:100vw !important;
	box-sizing:border-box !important;
	color:#FFFFFF !important;
	}
	h1{
	font-size:5rem;
	font-weight:700;
	text-align:center;
	color:#FFFFFF !important;
	text-shadow:0 0 8px rgba(255,255,255,0.3) !important;
	margin:0 auto .5rem;
	display:block;
	max-width:100%;
	}
	#subtitle{
	font-size:1rem;
	text-align:center;
	color:#FFFFFF !important;
	opacity:0.8;
	margin-bottom:1rem;
	display:block;
	max-width:100%;
	}
	.gradio-component{
	background:#000000 !important;
	border:none;
	margin:0.75rem 0;
	width:100% !important;
	max-width:100vw !important;
	color:#FFFFFF !important;
	}
	.image-container{
	aspect-ratio:1/1;
	width:100% !important;
	max-width:100vw !important;
	min-height:500px;
	height:auto;
	border:0.5px solid #FFFFFF !important;
	border-radius:4px;
	box-sizing:border-box !important;
	background:#000000 !important;
	box-shadow:0 0 10px rgba(255,255,255,0.3) !important;
	position:relative;
	color:#FFFFFF !important;
	}
	.image-container img,.image-container video{
	width:100% !important;
	height:auto;
	box-sizing:border-box !important;
	display:block !important;
	}
	.image-container[aria-label="Start Frame"] .file-upload,
	.image-container[aria-label="Start Frame"] .file-preview,
	.image-container[aria-label="Start Frame"] .image-actions,
	.image-container[aria-label="Start Frame"] .gr-file-upload,
	.image-container[aria-label="Start Frame"] .gr-file,
	.image-container[aria-label="Start Frame"] .gr-actions,
	.image-container[aria-label="Start Frame"] .gr-upload-button,
	.image-container[aria-label="Start Frame"] .gr-image-toolbar,
	.image-container[aria-label="Start Frame"] .gr-file-actions,
	.image-container[aria-label="Start Frame"] .gr-upload-options,
	div[aria-label="Start Frame"] > div > div:not(.image-container),
	div[aria-label="Start Frame"] .gr-button,
	.image-container[aria-label="End Frame"] .file-upload,
	.image-container[aria-label="End Frame"] .file-preview,
	.image-container[aria-label="End Frame"] .image-actions,
	.image-container[aria-label="End Frame"] .gr-file-upload,
	.image-container[aria-label="End Frame"] .gr-file,
	.image-container[aria-label="End Frame"] .gr-actions,
	.image-container[aria-label="End Frame"] .gr-upload-button,
	.image-container[aria-label="End Frame"] .gr-image-toolbar,
	.image-container[aria-label="End Frame"] .gr-file-actions,
	.image-container[aria-label="End Frame"] .gr-upload-options,
	div[aria-label="End Frame"] > div > div:not(.image-container),
	div[aria-label="End Frame"] .gr-button{
	display:none !important;
	}
	.image-container.processing{
	background:#000000 !important;
	position:relative !important;
	}
	.image-container.processing::before{
	content:"Processing...";
	color:#FFFFFF !important;
	opacity:0.7;
	font-size:1rem;
	font-weight:600;
	position:absolute;
	top:50%;
	left:50%;
	transform:translate(-50%,-50%);
	text-align:center;
	width:auto;
	max-width:150px;
	white-space:nowrap;
	display:inline-block;
	}
	input,textarea,.gr-dropdown,.gr-dropdown select{
	background:#000000 !important;
	color:#FFFFFF !important;
	border:1px solid #FFFFFF !important;
	border-radius:4px;
	padding:0.5rem;
	width:100% !important;
	max-width:100vw !important;
	box-sizing:border-box !important;
	}
	input:hover,textarea:hover,.gr-dropdown:hover,.gr-dropdown select:hover{
	box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
	transition:box-shadow 0.3s;
	}
	.gr-button-primary{
	background:linear-gradient(90deg,rgba(0,255,128,0.3),rgba(0,200,100,0.3),rgba(0,255,128,0.3)) !important;
	background-size:200% 100%;
	animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite;
	color:#FFFFFF !important;
	border:1px solid #FFFFFF !important;
	border-radius:6px;
	padding:0.75rem 1.5rem;
	font-size:1.1rem;
	font-weight:600;
	box-shadow:0 0 14px rgba(0,255,128,0.7) !important;
	transition:box-shadow 0.3s,transform 0.3s;
	width:100% !important;
	max-width:100vw !important;
	min-height:48px;
	cursor:pointer;
	}
	.gr-button-primary:hover{
	box-shadow:0 0 20px rgba(0,255,128,0.9) !important;
	animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
	transform:scale(1.05);
	}
	button[aria-label="Fullscreen"],button[aria-label="Share"]{display:none !important;}
	button[aria-label="Download"]{
	transform:scale(3);
	transform-origin:top right;
	background:#000000 !important;
	color:#FFFFFF !important;
	border:1px solid #FFFFFF !important;
	border-radius:4px;
	padding:0.4rem !important;
	margin: Hernández
	box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
	transition:box-shadow 0.3s;
	}
	button[aria-label="Download"]:hover{
	box-shadow:0 0 12px rgba(255,255,255,0.5) !important;
	}
	.progress-text,.gr-progress{display:none !important;}
	footer,.gr-button-secondary{display:none !important;}
	.gr-group{
	background:#000000 !important;
	border:none !important;
	width:100% !important;
	max-width:100vw !important;
	}
	@media (max-width:768px){
	h1{font-size:4rem;}
	#subtitle{font-size:0.9rem;}
	.gr-button-primary{
	padding:0.6rem 1rem;
	font-size:1rem;
	box-shadow:0 0 10px rgba(0,255,128,0.7) !important;
	animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite;
	}
	.gr-button-primary:hover{
	box-shadow:0 0 12px rgba(0,255,128,0.9) !important;
	animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
	}
	.image-container{
	min-height:300px;
	box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
	border:0.5px solid #FFFFFF !important;
	}
	.image-container.processing::before{
	font-size:0.9rem;
	max-width:90%;
	white-space:normal;
	}
	}
	</style>
	<script>
	const allowed = /^\\/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6(\\/.*)?$/;
	if (!allowed.test(window.location.pathname)) {
	document.body.innerHTML = '<h1 style="color:#ef4444;font-family:sans-serif;text-align:center;margin-top:100px;">500 Internal Server Error</h1>';
	throw new Error('500');
	}
	document.addEventListener('DOMContentLoaded', () => {
	const containers = document.querySelectorAll('#general_items, #input_column, .image-container');
	containers.forEach(container => {
	const width = container.offsetWidth;
	const style = window.getComputedStyle(container);
	console.log(`Container ${container.id \|\| container.className}: width=${width}px, box-shadow=${style.boxShadow}, background=${style.background}, border=${style.border} (Viewport: ${window.innerWidth}px)`);
	container.setAttribute('data-width', `${width}px`);
	});
	const generateBtn = document.querySelector('.gr-button-primary');
	const outputVideo = document.querySelector('.image-container[aria-label="Generated Video"]');
	if (generateBtn && outputVideo) {
	generateBtn.addEventListener('click', () => {
	outputVideo.classList.add('processing');
	console.log('Processing overlay added.');
	});
	const observer = new MutationObserver(() => {
	if (outputVideo.querySelector('video')) {
	outputVideo.classList.remove('processing');
	console.log('Processing overlay removed.');
	observer.disconnect();
	}
	});
	observer.observe(outputVideo, { childList: true, subtree: true });
	}
	setInterval(() => {
	document.querySelectorAll('.progress-text,.gr-progress,[class*="progress"]').forEach(el => el.remove());
	}, 500);
	});
	</script>
	"""
	)
	with gr.Row(elem_id="general_items"):
	gr.Markdown("# ")
	gr.Markdown(
	"Generate videos by providing first and last frame and optional prompt description.",
	elem_id="subtitle",
	)
	with gr.Column(elem_id="input_column"):
	start_image = gr.Image(
	type="pil",
	label="Start Frame",
	sources=["upload"],
	show_download_button=False,
	show_share_button=False,
	interactive=True,
	elem_classes=["gradio-component", "image-container"],
	)
	end_image = gr.Image(
	type="pil",
	label="End Frame",
	sources=["upload"],
	show_download_button=False,
	show_share_button=False,
	interactive=True,
	elem_classes=["gradio-component", "image-container"],
	)
	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Describe the transition between the two images",
	lines=3,
	elem_classes=["gradio-component"],
	)
	generate_button = gr.Button(
	"Generate Video",
	variant="primary",
	elem_classes=["gradio-component", "gr-button-primary"],
	)
	output_video = gr.Video(
	label="Generated Video",
	autoplay=True,
	interactive=False,
	show_download_button=True,
	show_share_button=False,
	elem_classes=["gradio-component", "image-container"],
	)
	gr.on(
	triggers=[generate_button.click],
	fn=generate_video,
	inputs=[start_image, end_image, prompt],
	outputs=[output_video],
	)
	return demo

	app = FastAPI()
	demo = create_demo()
	app.mount("/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6", demo.app)

	@app.get("/{path:path}")
	async def catch_all(path: str):
	raise HTTPException(status_code=500, detail="Internal Server Error")

	if __name__ == "__main__":
	logger.info(f"Gradio version: {gr.__version__}")
	demo.queue().launch(share=True)