Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

config.json +56 -0
diffusion_pytorch_model.safetensors +3 -0
handler.py +84 -0
requirements.txt +5 -0

config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_class_name": "AutoencoderKLWan",
+  "_diffusers_version": "0.33.0.dev0",
+  "attn_scales": [],
+  "base_dim": 96,
+  "dim_mult": [
+    1,
+    2,
+    4,
+    4
+  ],
+  "dropout": 0.0,
+  "latents_mean": [
+    -0.7571,
+    -0.7089,
+    -0.9113,
+    0.1075,
+    -0.1745,
+    0.9653,
+    -0.1517,
+    1.5508,
+    0.4134,
+    -0.0715,
+    0.5517,
+    -0.3632,
+    -0.1922,
+    -0.9497,
+    0.2503,
+    -0.2921
+  ],
+  "latents_std": [
+    2.8184,
+    1.4541,
+    2.3275,
+    2.6558,
+    1.2196,
+    1.7708,
+    2.6052,
+    2.0743,
+    3.2687,
+    2.1526,
+    2.8652,
+    1.5579,
+    1.6382,
+    1.1253,
+    2.8251,
+    1.916
+  ],
+  "num_res_blocks": 2,
+  "temperal_downsample": [
+    false,
+    true,
+    true
+  ],
+  "z_dim": 16
+}

diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6e524b3fffede1787a74e81b30976dce5400c4439ba64222168e607ed19e793
+size 507591892

handler.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from typing import cast, Union
+import torch
+from diffusers import AutoencoderKLWan
+from diffusers.video_processor import VideoProcessor
+from diffusers.utils import export_to_video
+class EndpointHandler:
+    def __init__(self, path=""):
+        self.device = "cuda"
+        self.dtype = torch.float32
+        self.vae = cast(
+            AutoencoderKLWan,
+            AutoencoderKLWan.from_pretrained(path, torch_dtype=self.dtype)
+            .to(self.device, self.dtype)
+            .eval(),
+        )
+        self.vae.enable_tiling()
+        self.vae_scale_factor_temporal = (
+            2 ** sum(self.vae.temperal_downsample) if getattr(self, "vae", None) else 4
+        )
+        self.vae_scale_factor_spatial = (
+            2 ** len(self.vae.temperal_downsample) if getattr(self, "vae", None) else 8
+        )
+        self.video_processor = VideoProcessor(
+            vae_scale_factor=self.vae_scale_factor_spatial
+        )
+    @torch.no_grad()
+    def __call__(self, data) -> Union[torch.Tensor, bytes]:
+        """
+        Args:
+            data (:obj:):
+                includes the input data and the parameters for the inference.
+        """
+        tensor = cast(torch.Tensor, data["inputs"])
+        parameters = cast(dict, data.get("parameters", {}))
+        do_scaling = cast(bool, parameters.get("do_scaling", True))
+        output_type = cast(str, parameters.get("output_type", "pil"))
+        partial_postprocess = cast(bool, parameters.get("partial_postprocess", False))
+        if partial_postprocess and output_type != "pt":
+            output_type = "pt"
+        tensor = tensor.to(self.device, self.dtype)
+        if do_scaling:
+            latents_mean = (
+                torch.tensor(self.vae.config.latents_mean)
+                .view(1, self.vae.config.z_dim, 1, 1, 1)
+                .to(latents.device, latents.dtype)
+            )
+            latents_std = 1.0 / torch.tensor(self.vae.config.latents_std).view(
+                1, self.vae.config.z_dim, 1, 1, 1
+            ).to(latents.device, latents.dtype)
+            latents = latents / latents_std + latents_mean
+        with torch.no_grad():
+            frames = cast(torch.Tensor, self.vae.decode(tensor, return_dict=False)[0])
+        if partial_postprocess:
+            frames = frames[0].permute(1, 0, 2, 3)
+            frames = torch.stack([(frame * 0.5 + 0.5).clamp(0, 1) for frame in frames])
+            frames = frames.permute(0, 2, 3, 1).contiguous().float()
+            frames = (frames * 255).round().to(torch.uint8)
+        elif output_type == "pil":
+            frames = cast(
+                torch.Tensor,
+                self.video_processor.postprocess_video(frames, output_type="pt")[0],
+            )
+        elif output_type == "mp4":
+            frames = cast(
+                torch.Tensor,
+                self.video_processor.postprocess_video(frames, output_type="pil")[0],
+            )
+            path = export_to_video(frames, fps=16)
+            with open(path, "rb") as f:
+                frames = f.read()
+        elif output_type == "pt":
+            frames = frames
+        return frames

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+huggingface_hub
+diffusers @ git+https://github.com/huggingface/diffusers@main
+imageio
+imageio-ffmpeg
+opencv-python