File size: 2,441 Bytes
81663ec
 
6328f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9"')
import spaces
import os
import gradio as gr
import torch
from diffusers import WanPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video

# Model setup
dtype = torch.bfloat16
device = "cuda"
model_id = "FastDM/Wan2.2-T2V-A14B-Merge-Lightning-V1.0-Diffusers"

print("Loading model... this may take a while.")
vae = AutoencoderKLWan.from_pretrained(
    model_id, subfolder="vae", torch_dtype=torch.float32
)
pipe = WanPipeline.from_pretrained(
    model_id, vae=vae, torch_dtype=dtype
).to(device)

# Default values
DEFAULT_PROMPT = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
DEFAULT_NEGATIVE = "bad quality, blurry, distorted, extra limbs, watermark, text"

@spaces.GPU(duration=60)
def generate_video(prompt, negative_prompt, height, width, num_frames, steps, guidance):
    video = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        guidance_scale=guidance,
        num_inference_steps=steps,
    ).frames[0]

    output_path = "t2v_out.mp4"
    export_to_video(video, output_path, fps=16)
    return output_path


with gr.Blocks() as demo:
    gr.Markdown("# 🎬 Wan2.2 Text-to-Video Demo")
    gr.Markdown("Generate short AI videos from text prompts.")

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Prompt", value=DEFAULT_PROMPT, lines=3)
            negative_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE, lines=2)
            height = gr.Slider(256, 1280, value=720, step=64, label="Height")
            width = gr.Slider(256, 1280, value=1280, step=64, label="Width")
            num_frames = gr.Slider(16, 128, value=81, step=1, label="Number of Frames")
            steps = gr.Slider(1, 20, value=4, step=1, label="Inference Steps")
            guidance = gr.Slider(0.1, 10.0, value=1.0, step=0.1, label="Guidance Scale")
            generate_btn = gr.Button("🚀 Generate Video")

        with gr.Column():
            video_output = gr.Video(label="Generated Video")

    generate_btn.click(
        fn=generate_video,
        inputs=[prompt, negative_prompt, height, width, num_frames, steps, guidance],
        outputs=[video_output],
    )

demo.launch()