hlky HF staff commited on
Commit
3e14df3
·
verified ·
1 Parent(s): 945d1bc

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKLWan",
3
+ "_diffusers_version": "0.33.0.dev0",
4
+ "attn_scales": [],
5
+ "base_dim": 96,
6
+ "dim_mult": [
7
+ 1,
8
+ 2,
9
+ 4,
10
+ 4
11
+ ],
12
+ "dropout": 0.0,
13
+ "latents_mean": [
14
+ -0.7571,
15
+ -0.7089,
16
+ -0.9113,
17
+ 0.1075,
18
+ -0.1745,
19
+ 0.9653,
20
+ -0.1517,
21
+ 1.5508,
22
+ 0.4134,
23
+ -0.0715,
24
+ 0.5517,
25
+ -0.3632,
26
+ -0.1922,
27
+ -0.9497,
28
+ 0.2503,
29
+ -0.2921
30
+ ],
31
+ "latents_std": [
32
+ 2.8184,
33
+ 1.4541,
34
+ 2.3275,
35
+ 2.6558,
36
+ 1.2196,
37
+ 1.7708,
38
+ 2.6052,
39
+ 2.0743,
40
+ 3.2687,
41
+ 2.1526,
42
+ 2.8652,
43
+ 1.5579,
44
+ 1.6382,
45
+ 1.1253,
46
+ 2.8251,
47
+ 1.916
48
+ ],
49
+ "num_res_blocks": 2,
50
+ "temperal_downsample": [
51
+ false,
52
+ true,
53
+ true
54
+ ],
55
+ "z_dim": 16
56
+ }
diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e524b3fffede1787a74e81b30976dce5400c4439ba64222168e607ed19e793
3
+ size 507591892
handler.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import cast, Union
2
+
3
+ import torch
4
+
5
+ from diffusers import AutoencoderKLWan
6
+ from diffusers.video_processor import VideoProcessor
7
+ from diffusers.utils import export_to_video
8
+
9
+
10
+ class EndpointHandler:
11
+ def __init__(self, path=""):
12
+ self.device = "cuda"
13
+ self.dtype = torch.float32
14
+ self.vae = cast(
15
+ AutoencoderKLWan,
16
+ AutoencoderKLWan.from_pretrained(path, torch_dtype=self.dtype)
17
+ .to(self.device, self.dtype)
18
+ .eval(),
19
+ )
20
+ self.vae.enable_tiling()
21
+
22
+ self.vae_scale_factor_temporal = (
23
+ 2 ** sum(self.vae.temperal_downsample) if getattr(self, "vae", None) else 4
24
+ )
25
+ self.vae_scale_factor_spatial = (
26
+ 2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
27
+ )
28
+ self.video_processor = VideoProcessor(
29
+ vae_scale_factor=self.vae_scale_factor_spatial
30
+ )
31
+
32
+ @torch.no_grad()
33
+ def __call__(self, data) -> Union[torch.Tensor, bytes]:
34
+ """
35
+ Args:
36
+ data (:obj:):
37
+ includes the input data and the parameters for the inference.
38
+ """
39
+ tensor = cast(torch.Tensor, data["inputs"])
40
+ parameters = cast(dict, data.get("parameters", {}))
41
+ do_scaling = cast(bool, parameters.get("do_scaling", True))
42
+ output_type = cast(str, parameters.get("output_type", "pil"))
43
+ partial_postprocess = cast(bool, parameters.get("partial_postprocess", False))
44
+ if partial_postprocess and output_type != "pt":
45
+ output_type = "pt"
46
+
47
+ tensor = tensor.to(self.device, self.dtype)
48
+
49
+ if do_scaling:
50
+ latents_mean = (
51
+ torch.tensor(self.vae.config.latents_mean)
52
+ .view(1, self.vae.config.z_dim, 1, 1, 1)
53
+ .to(latents.device, latents.dtype)
54
+ )
55
+ latents_std = 1.0 / torch.tensor(self.vae.config.latents_std).view(
56
+ 1, self.vae.config.z_dim, 1, 1, 1
57
+ ).to(latents.device, latents.dtype)
58
+ latents = latents / latents_std + latents_mean
59
+
60
+ with torch.no_grad():
61
+ frames = cast(torch.Tensor, self.vae.decode(tensor, return_dict=False)[0])
62
+
63
+ if partial_postprocess:
64
+ frames = frames[0].permute(1, 0, 2, 3)
65
+ frames = torch.stack([(frame * 0.5 + 0.5).clamp(0, 1) for frame in frames])
66
+ frames = frames.permute(0, 2, 3, 1).contiguous().float()
67
+ frames = (frames * 255).round().to(torch.uint8)
68
+ elif output_type == "pil":
69
+ frames = cast(
70
+ torch.Tensor,
71
+ self.video_processor.postprocess_video(frames, output_type="pt")[0],
72
+ )
73
+ elif output_type == "mp4":
74
+ frames = cast(
75
+ torch.Tensor,
76
+ self.video_processor.postprocess_video(frames, output_type="pil")[0],
77
+ )
78
+ path = export_to_video(frames, fps=16)
79
+ with open(path, "rb") as f:
80
+ frames = f.read()
81
+ elif output_type == "pt":
82
+ frames = frames
83
+
84
+ return frames
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface_hub
2
+ diffusers @ git+https://github.com/huggingface/diffusers@main
3
+ imageio
4
+ imageio-ffmpeg
5
+ opencv-python