Upload folder using huggingface_hub
Browse files- config.json +56 -0
- diffusion_pytorch_model.safetensors +3 -0
- handler.py +84 -0
- requirements.txt +5 -0
config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "AutoencoderKLWan",
|
3 |
+
"_diffusers_version": "0.33.0.dev0",
|
4 |
+
"attn_scales": [],
|
5 |
+
"base_dim": 96,
|
6 |
+
"dim_mult": [
|
7 |
+
1,
|
8 |
+
2,
|
9 |
+
4,
|
10 |
+
4
|
11 |
+
],
|
12 |
+
"dropout": 0.0,
|
13 |
+
"latents_mean": [
|
14 |
+
-0.7571,
|
15 |
+
-0.7089,
|
16 |
+
-0.9113,
|
17 |
+
0.1075,
|
18 |
+
-0.1745,
|
19 |
+
0.9653,
|
20 |
+
-0.1517,
|
21 |
+
1.5508,
|
22 |
+
0.4134,
|
23 |
+
-0.0715,
|
24 |
+
0.5517,
|
25 |
+
-0.3632,
|
26 |
+
-0.1922,
|
27 |
+
-0.9497,
|
28 |
+
0.2503,
|
29 |
+
-0.2921
|
30 |
+
],
|
31 |
+
"latents_std": [
|
32 |
+
2.8184,
|
33 |
+
1.4541,
|
34 |
+
2.3275,
|
35 |
+
2.6558,
|
36 |
+
1.2196,
|
37 |
+
1.7708,
|
38 |
+
2.6052,
|
39 |
+
2.0743,
|
40 |
+
3.2687,
|
41 |
+
2.1526,
|
42 |
+
2.8652,
|
43 |
+
1.5579,
|
44 |
+
1.6382,
|
45 |
+
1.1253,
|
46 |
+
2.8251,
|
47 |
+
1.916
|
48 |
+
],
|
49 |
+
"num_res_blocks": 2,
|
50 |
+
"temperal_downsample": [
|
51 |
+
false,
|
52 |
+
true,
|
53 |
+
true
|
54 |
+
],
|
55 |
+
"z_dim": 16
|
56 |
+
}
|
diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e524b3fffede1787a74e81b30976dce5400c4439ba64222168e607ed19e793
|
3 |
+
size 507591892
|
handler.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import cast, Union
|
2 |
+
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from diffusers import AutoencoderKLWan
|
6 |
+
from diffusers.video_processor import VideoProcessor
|
7 |
+
from diffusers.utils import export_to_video
|
8 |
+
|
9 |
+
|
10 |
+
class EndpointHandler:
|
11 |
+
def __init__(self, path=""):
|
12 |
+
self.device = "cuda"
|
13 |
+
self.dtype = torch.float32
|
14 |
+
self.vae = cast(
|
15 |
+
AutoencoderKLWan,
|
16 |
+
AutoencoderKLWan.from_pretrained(path, torch_dtype=self.dtype)
|
17 |
+
.to(self.device, self.dtype)
|
18 |
+
.eval(),
|
19 |
+
)
|
20 |
+
self.vae.enable_tiling()
|
21 |
+
|
22 |
+
self.vae_scale_factor_temporal = (
|
23 |
+
2 ** sum(self.vae.temperal_downsample) if getattr(self, "vae", None) else 4
|
24 |
+
)
|
25 |
+
self.vae_scale_factor_spatial = (
|
26 |
+
2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
|
27 |
+
)
|
28 |
+
self.video_processor = VideoProcessor(
|
29 |
+
vae_scale_factor=self.vae_scale_factor_spatial
|
30 |
+
)
|
31 |
+
|
32 |
+
@torch.no_grad()
|
33 |
+
def __call__(self, data) -> Union[torch.Tensor, bytes]:
|
34 |
+
"""
|
35 |
+
Args:
|
36 |
+
data (:obj:):
|
37 |
+
includes the input data and the parameters for the inference.
|
38 |
+
"""
|
39 |
+
tensor = cast(torch.Tensor, data["inputs"])
|
40 |
+
parameters = cast(dict, data.get("parameters", {}))
|
41 |
+
do_scaling = cast(bool, parameters.get("do_scaling", True))
|
42 |
+
output_type = cast(str, parameters.get("output_type", "pil"))
|
43 |
+
partial_postprocess = cast(bool, parameters.get("partial_postprocess", False))
|
44 |
+
if partial_postprocess and output_type != "pt":
|
45 |
+
output_type = "pt"
|
46 |
+
|
47 |
+
tensor = tensor.to(self.device, self.dtype)
|
48 |
+
|
49 |
+
if do_scaling:
|
50 |
+
latents_mean = (
|
51 |
+
torch.tensor(self.vae.config.latents_mean)
|
52 |
+
.view(1, self.vae.config.z_dim, 1, 1, 1)
|
53 |
+
.to(latents.device, latents.dtype)
|
54 |
+
)
|
55 |
+
latents_std = 1.0 / torch.tensor(self.vae.config.latents_std).view(
|
56 |
+
1, self.vae.config.z_dim, 1, 1, 1
|
57 |
+
).to(latents.device, latents.dtype)
|
58 |
+
latents = latents / latents_std + latents_mean
|
59 |
+
|
60 |
+
with torch.no_grad():
|
61 |
+
frames = cast(torch.Tensor, self.vae.decode(tensor, return_dict=False)[0])
|
62 |
+
|
63 |
+
if partial_postprocess:
|
64 |
+
frames = frames[0].permute(1, 0, 2, 3)
|
65 |
+
frames = torch.stack([(frame * 0.5 + 0.5).clamp(0, 1) for frame in frames])
|
66 |
+
frames = frames.permute(0, 2, 3, 1).contiguous().float()
|
67 |
+
frames = (frames * 255).round().to(torch.uint8)
|
68 |
+
elif output_type == "pil":
|
69 |
+
frames = cast(
|
70 |
+
torch.Tensor,
|
71 |
+
self.video_processor.postprocess_video(frames, output_type="pt")[0],
|
72 |
+
)
|
73 |
+
elif output_type == "mp4":
|
74 |
+
frames = cast(
|
75 |
+
torch.Tensor,
|
76 |
+
self.video_processor.postprocess_video(frames, output_type="pil")[0],
|
77 |
+
)
|
78 |
+
path = export_to_video(frames, fps=16)
|
79 |
+
with open(path, "rb") as f:
|
80 |
+
frames = f.read()
|
81 |
+
elif output_type == "pt":
|
82 |
+
frames = frames
|
83 |
+
|
84 |
+
return frames
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
huggingface_hub
|
2 |
+
diffusers @ git+https://github.com/huggingface/diffusers@main
|
3 |
+
imageio
|
4 |
+
imageio-ffmpeg
|
5 |
+
opencv-python
|