HAL1993's picture
Update app.py
6227296 verified
import os
os.system(
'pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces'
)
import spaces
import torch
from diffusers import FlowMatchEulerDiscreteScheduler
from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
from diffusers.utils.export_utils import export_to_video
import gradio as gr
import tempfile
import numpy as np
from PIL import Image
import random
import gc
import requests
import logging
from fastapi import FastAPI, HTTPException
from optimization import optimize_pipeline_
logging.basicConfig(
level=logging.INFO,
filename="wan_image2video.log",
filemode="a",
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
MAX_DIMENSION = 832
MIN_DIMENSION = 480
DIMENSION_MULTIPLE = 16
SQUARE_SIZE = 480
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 16
MIN_FRAMES_MODEL = 8
MAX_FRAMES_MODEL = 81
MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
DEFAULT_DURATION = 3.2
default_negative_prompt = (
"色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,"
"最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,"
"畸形的,毁容的,形态畸形的肢体手,肢融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,"
)
QUALITY_PROMPT = ", high quality, cinematic, smooth motion, detailed, vibrant, professional lighting"
logger.info("Loading models into memory. This may take a few minutes...")
pipe = WanImageToVideoPipeline.from_pretrained(
MODEL_ID,
transformer=WanTransformer3DModel.from_pretrained(
"cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
subfolder="transformer",
torch_dtype=torch.bfloat16,
device_map="cuda",
),
transformer_2=WanTransformer3DModel.from_pretrained(
"cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers",
subfolder="transformer_2",
torch_dtype=torch.bfloat16,
device_map="cuda",
),
torch_dtype=torch.bfloat16,
)
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config, shift=8.0)
pipe.to("cuda")
logger.info("Optimizing pipeline...")
for _ in range(3):
gc.collect()
torch.cuda.synchronize()
torch.cuda.empty_cache()
optimize_pipeline_(
pipe,
image=Image.new("RGB", (MAX_DIMENSION, MIN_DIMENSION)),
prompt="prompt",
height=MIN_DIMENSION,
width=MAX_DIMENSION,
num_frames=MAX_FRAMES_MODEL,
)
logger.info("All models loaded and optimized. Gradio app is ready.")
@spaces.GPU
def translate_albanian_to_english(text: str, language: str = "en"):
if not text.strip():
raise gr.Error("Please enter a description.")
for attempt in range(2):
try:
response = requests.post(
"https://hal1993-mdftranslation1234567890abcdef1234567890-fc073a6.hf.space/v1/translate",
json={"from_language": "sq", "to_language": "en", "input_text": text},
headers={"accept": "application/json", "Content-Type": "application/json"},
timeout=5,
)
response.raise_for_status()
translated = response.json().get("translate", "")
logger.info(f"Translation response: {translated}")
return translated
except Exception as e:
logger.error(f"Translation error (attempt {attempt + 1}): {e}")
if attempt == 1:
raise gr.Error("Translation failed. Please try again.")
raise gr.Error("Translation failed. Please try again.")
def process_image_for_video(image: Image.Image) -> Image.Image:
width, height = image.size
if width == height:
return image.resize((SQUARE_SIZE, SQUARE_SIZE), Image.Resampling.LANCZOS)
aspect_ratio = width / height
new_w, new_h = width, height
if new_w > MAX_DIMENSION or new_h > MAX_DIMENSION:
if aspect_ratio > 1:
scale = MAX_DIMENSION / new_w
else:
scale = MAX_DIMENSION / new_h
new_w = int(new_w * scale)
new_h = int(new_h * scale)
if new_w < MIN_DIMENSION or new_h < MIN_DIMENSION:
if aspect_ratio > 1:
scale = MIN_DIMENSION / new_h
else:
scale = MIN_DIMENSION / new_w
new_w = int(new_w * scale)
new_h = int(new_h * scale)
final_w = int(round(new_w / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
final_h = int(round(new_h / DIMENSION_MULTIPLE) * DIMENSION_MULTIPLE)
final_w = max(final_w, MIN_DIMENSION if aspect_ratio < 1 else SQUARE_SIZE)
final_h = max(final_h, MIN_DIMENSION if aspect_ratio > 1 else SQUARE_SIZE)
return image.resize((final_w, final_h), Image.Resampling.LANCZOS)
def resize_and_crop_to_match(target_image: Image.Image, reference_image: Image.Image) -> Image.Image:
ref_w, ref_h = reference_image.size
tgt_w, tgt_h = target_image.size
scale = max(ref_w / tgt_w, ref_h / tgt_h)
new_w, new_h = int(tgt_w * scale), int(tgt_h * scale)
resized = target_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
left, top = (new_w - ref_w) // 2, (new_h - ref_h) // 2
return resized.crop((left, top, left + ref_w, top + ref_h))
@spaces.GPU(duration=120)
def generate_video(start_image_pil, end_image_pil, prompt):
if start_image_pil is None or end_image_pil is None:
raise gr.Error("Please upload both a start and an end image.")
prompt_en = translate_albanian_to_english(prompt.strip())
prompt_final = prompt_en + QUALITY_PROMPT
logger.info("Pre‑processing images...")
processed_start = process_image_for_video(start_image_pil)
processed_end = resize_and_crop_to_match(end_image_pil, processed_start)
target_h, target_w = processed_start.height, processed_start.width
num_frames = np.clip(
int(round(DEFAULT_DURATION * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL
)
seed = random.randint(0, MAX_SEED)
logger.info(
f"Generating {num_frames} frames @ {target_w}x{target_h} (seed={seed})…"
)
frames = pipe(
image=processed_start,
last_image=processed_end,
prompt=prompt_final,
negative_prompt=default_negative_prompt,
height=target_h,
width=target_w,
num_frames=num_frames,
guidance_scale=1.0,
guidance_scale_2=1.0,
num_inference_steps=8,
generator=torch.Generator(device="cuda").manual_seed(seed),
).frames[0]
logger.info("Saving video to temporary file…")
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
video_path = tmp.name
export_to_video(frames, video_path, fps=FIXED_FPS)
logger.info("Video generation complete.")
return video_path
def create_demo():
with gr.Blocks(css="", title="Image to Video") as demo:
gr.HTML(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@400;600;700&display=swap');
@keyframes glow {0%{box-shadow:0 0 14px rgba(0,255,128,0.5);}50%{box-shadow:0 0 14px rgba(0,255,128,0.7);}100%{box-shadow:0 0 14px rgba(0,255,128,0.5);}}
@keyframes glow-hover {0%{box-shadow:0 0 20px rgba(0,255,128,0.7);}50%{box-shadow:0 0 20px rgba(0,255,128,0.9);}100%{box-shadow:0 0 20px rgba(0,255,128,0.7);}}
@keyframes slide {0%{background-position:0% 50%;}50%{background-position:100% 50%;}100%{background-position:0% 50%;}}
body{
background:#000000 !important;
color:#FFFFFF !important;
font-family:'Orbitron',sans-serif;
min-height:100vh;
margin:0 !important;
padding:0 !important;
width:100% !important;
max-width:100vw !important;
overflow-x:hidden !important;
display:flex !important;
justify-content:center;
align-items:center;
flex-direction:column;
}
body::before{
content:"";
display:block;
height:600px;
background:#000000 !important;
}
.gr-blocks,.container{
width:100% !important;
max-width:100vw !important;
margin:0 !important;
padding:0 !important;
box-sizing:border-box !important;
overflow-x:hidden !important;
background:#000000 !important;
color:#FFFFFF !important;
}
#general_items{
width:100% !important;
max-width:100vw !important;
margin:2rem 0 !important;
display:flex !important;
flex-direction:column;
align-items:center;
justify-content:center;
background:#000000 !important;
color:#FFFFFF !important;
}
#input_column{
background:#000000 !important;
border:none !important;
border-radius:8px;
padding:1rem !important;
box-shadow:0 0 10px rgba(255,255,255,0.3) !important;
width:100% !important;
max-width:100vw !important;
box-sizing:border-box !important;
color:#FFFFFF !important;
}
h1{
font-size:5rem;
font-weight:700;
text-align:center;
color:#FFFFFF !important;
text-shadow:0 0 8px rgba(255,255,255,0.3) !important;
margin:0 auto .5rem;
display:block;
max-width:100%;
}
#subtitle{
font-size:1rem;
text-align:center;
color:#FFFFFF !important;
opacity:0.8;
margin-bottom:1rem;
display:block;
max-width:100%;
}
.gradio-component{
background:#000000 !important;
border:none;
margin:0.75rem 0;
width:100% !important;
max-width:100vw !important;
color:#FFFFFF !important;
}
.image-container{
aspect-ratio:1/1;
width:100% !important;
max-width:100vw !important;
min-height:500px;
height:auto;
border:0.5px solid #FFFFFF !important;
border-radius:4px;
box-sizing:border-box !important;
background:#000000 !important;
box-shadow:0 0 10px rgba(255,255,255,0.3) !important;
position:relative;
color:#FFFFFF !important;
}
.image-container img,.image-container video{
width:100% !important;
height:auto;
box-sizing:border-box !important;
display:block !important;
}
.image-container[aria-label="Start Frame"] .file-upload,
.image-container[aria-label="Start Frame"] .file-preview,
.image-container[aria-label="Start Frame"] .image-actions,
.image-container[aria-label="Start Frame"] .gr-file-upload,
.image-container[aria-label="Start Frame"] .gr-file,
.image-container[aria-label="Start Frame"] .gr-actions,
.image-container[aria-label="Start Frame"] .gr-upload-button,
.image-container[aria-label="Start Frame"] .gr-image-toolbar,
.image-container[aria-label="Start Frame"] .gr-file-actions,
.image-container[aria-label="Start Frame"] .gr-upload-options,
div[aria-label="Start Frame"] > div > div:not(.image-container),
div[aria-label="Start Frame"] .gr-button,
.image-container[aria-label="End Frame"] .file-upload,
.image-container[aria-label="End Frame"] .file-preview,
.image-container[aria-label="End Frame"] .image-actions,
.image-container[aria-label="End Frame"] .gr-file-upload,
.image-container[aria-label="End Frame"] .gr-file,
.image-container[aria-label="End Frame"] .gr-actions,
.image-container[aria-label="End Frame"] .gr-upload-button,
.image-container[aria-label="End Frame"] .gr-image-toolbar,
.image-container[aria-label="End Frame"] .gr-file-actions,
.image-container[aria-label="End Frame"] .gr-upload-options,
div[aria-label="End Frame"] > div > div:not(.image-container),
div[aria-label="End Frame"] .gr-button{
display:none !important;
}
.image-container.processing{
background:#000000 !important;
position:relative !important;
}
.image-container.processing::before{
content:"Processing...";
color:#FFFFFF !important;
opacity:0.7;
font-size:1rem;
font-weight:600;
position:absolute;
top:50%;
left:50%;
transform:translate(-50%,-50%);
text-align:center;
width:auto;
max-width:150px;
white-space:nowrap;
display:inline-block;
}
input,textarea,.gr-dropdown,.gr-dropdown select{
background:#000000 !important;
color:#FFFFFF !important;
border:1px solid #FFFFFF !important;
border-radius:4px;
padding:0.5rem;
width:100% !important;
max-width:100vw !important;
box-sizing:border-box !important;
}
input:hover,textarea:hover,.gr-dropdown:hover,.gr-dropdown select:hover{
box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
transition:box-shadow 0.3s;
}
.gr-button-primary{
background:linear-gradient(90deg,rgba(0,255,128,0.3),rgba(0,200,100,0.3),rgba(0,255,128,0.3)) !important;
background-size:200% 100%;
animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite;
color:#FFFFFF !important;
border:1px solid #FFFFFF !important;
border-radius:6px;
padding:0.75rem 1.5rem;
font-size:1.1rem;
font-weight:600;
box-shadow:0 0 14px rgba(0,255,128,0.7) !important;
transition:box-shadow 0.3s,transform 0.3s;
width:100% !important;
max-width:100vw !important;
min-height:48px;
cursor:pointer;
}
.gr-button-primary:hover{
box-shadow:0 0 20px rgba(0,255,128,0.9) !important;
animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
transform:scale(1.05);
}
button[aria-label="Fullscreen"],button[aria-label="Share"]{display:none !important;}
button[aria-label="Download"]{
transform:scale(3);
transform-origin:top right;
background:#000000 !important;
color:#FFFFFF !important;
border:1px solid #FFFFFF !important;
border-radius:4px;
padding:0.4rem !important;
margin: Hernández
box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
transition:box-shadow 0.3s;
}
button[aria-label="Download"]:hover{
box-shadow:0 0 12px rgba(255,255,255,0.5) !important;
}
.progress-text,.gr-progress{display:none !important;}
footer,.gr-button-secondary{display:none !important;}
.gr-group{
background:#000000 !important;
border:none !important;
width:100% !important;
max-width:100vw !important;
}
@media (max-width:768px){
h1{font-size:4rem;}
#subtitle{font-size:0.9rem;}
.gr-button-primary{
padding:0.6rem 1rem;
font-size:1rem;
box-shadow:0 0 10px rgba(0,255,128,0.7) !important;
animation:slide 4s ease-in-out infinite,glow 3s ease-in-out infinite;
}
.gr-button-primary:hover{
box-shadow:0 0 12px rgba(0,255,128,0.9) !important;
animation:slide 4s ease-in-out infinite,glow-hover 3s ease-in-out infinite;
}
.image-container{
min-height:300px;
box-shadow:0 0 8px rgba(255,255,255,0.3) !important;
border:0.5px solid #FFFFFF !important;
}
.image-container.processing::before{
font-size:0.9rem;
max-width:90%;
white-space:normal;
}
}
</style>
<script>
const allowed = /^\\/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6(\\/.*)?$/;
if (!allowed.test(window.location.pathname)) {
document.body.innerHTML = '<h1 style="color:#ef4444;font-family:sans-serif;text-align:center;margin-top:100px;">500 Internal Server Error</h1>';
throw new Error('500');
}
document.addEventListener('DOMContentLoaded', () => {
const containers = document.querySelectorAll('#general_items, #input_column, .image-container');
containers.forEach(container => {
const width = container.offsetWidth;
const style = window.getComputedStyle(container);
console.log(`Container ${container.id || container.className}: width=${width}px, box-shadow=${style.boxShadow}, background=${style.background}, border=${style.border} (Viewport: ${window.innerWidth}px)`);
container.setAttribute('data-width', `${width}px`);
});
const generateBtn = document.querySelector('.gr-button-primary');
const outputVideo = document.querySelector('.image-container[aria-label="Generated Video"]');
if (generateBtn && outputVideo) {
generateBtn.addEventListener('click', () => {
outputVideo.classList.add('processing');
console.log('Processing overlay added.');
});
const observer = new MutationObserver(() => {
if (outputVideo.querySelector('video')) {
outputVideo.classList.remove('processing');
console.log('Processing overlay removed.');
observer.disconnect();
}
});
observer.observe(outputVideo, { childList: true, subtree: true });
}
setInterval(() => {
document.querySelectorAll('.progress-text,.gr-progress,[class*="progress"]').forEach(el => el.remove());
}, 500);
});
</script>
"""
)
with gr.Row(elem_id="general_items"):
gr.Markdown("# ")
gr.Markdown(
"Generate videos by providing first and last frame and optional prompt description.",
elem_id="subtitle",
)
with gr.Column(elem_id="input_column"):
start_image = gr.Image(
type="pil",
label="Start Frame",
sources=["upload"],
show_download_button=False,
show_share_button=False,
interactive=True,
elem_classes=["gradio-component", "image-container"],
)
end_image = gr.Image(
type="pil",
label="End Frame",
sources=["upload"],
show_download_button=False,
show_share_button=False,
interactive=True,
elem_classes=["gradio-component", "image-container"],
)
prompt = gr.Textbox(
label="Prompt",
placeholder="Describe the transition between the two images",
lines=3,
elem_classes=["gradio-component"],
)
generate_button = gr.Button(
"Generate Video",
variant="primary",
elem_classes=["gradio-component", "gr-button-primary"],
)
output_video = gr.Video(
label="Generated Video",
autoplay=True,
interactive=False,
show_download_button=True,
show_share_button=False,
elem_classes=["gradio-component", "image-container"],
)
gr.on(
triggers=[generate_button.click],
fn=generate_video,
inputs=[start_image, end_image, prompt],
outputs=[output_video],
)
return demo
app = FastAPI()
demo = create_demo()
app.mount("/z5x6c7v8b9n0m1l2k3j4h5g6f7d8s9a0q1w2e3r4t5y6u7i8o9p0l1k2j3h4g5f6", demo.app)
@app.get("/{path:path}")
async def catch_all(path: str):
raise HTTPException(status_code=500, detail="Internal Server Error")
if __name__ == "__main__":
logger.info(f"Gradio version: {gr.__version__}")
demo.queue().launch(share=True)