self-forcing

Running on Zero

App Files Files Community

innoai commited on Jun 19

Commit

0b63431

verified ·

1 Parent(s): fb854a9

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -60

app.py CHANGED Viewed

@@ -171,59 +171,37 @@ ASPECT_RATIOS = {
 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
-    根据不同的长宽比，生成符合 VAE 解码器缓存格式的零张量缓存。
-    缓存张量格式必须与 ZERO_VAE_CACHE 保持一致： [batch, time, channels, height, width]
     """
-    ar_config = ASPECT_RATIOS[aspect_ratio]
-    latent_w = ar_config["latent_w"]
-    latent_h = ar_config["latent_h"]
-    # 这里 time 维度初始化为 1，channels 对应各级别的通道数
-    cache = []
-    # 第一级特征，channels=512，下采样 8 倍
-    cache.append(torch.zeros(
-        1,                # batch size
-        1,                # time frames
-        512,              # channels
-        latent_h // 8,    # height
-        latent_w // 8,    # width
-        device=device,
-        dtype=dtype
-    ))
-    # 第二级特征，channels=512，下采样 4 倍
-    cache.append(torch.zeros(
-        1,
-        1,
-        512,
-        latent_h // 4,
-        latent_w // 4,
-        device=device,
-        dtype=dtype
-    ))
-    # 第三级特征，channels=256，下采样 2 倍
-    cache.append(torch.zeros(
-        1,
-        1,
-        256,
-        latent_h // 2,
-        latent_w // 2,
-        device=device,
-        dtype=dtype
-    ))
-    # 第四级特征，channels=128，不下采样
-    cache.append(torch.zeros(
-        1,
-        1,
-        128,
-        latent_h,
-        latent_w,
-        device=device,
-        dtype=dtype
-    ))
-    return cache
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
@@ -416,14 +394,8 @@ def video_generation_handler_streaming(prompt, seed=42, fps=15, aspect_ratio="16
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
-        # For non-TRT and non-TAEHV, we need to handle aspect ratio properly
-        # Use the original ZERO_VAE_CACHE as a template but adjust dimensions
-        if aspect_ratio == "16:9":
-            # Use default cache for 16:9
-            vae_cache = [c.to(device=gpu, dtype=torch.float16) for c in ZERO_VAE_CACHE]
-        else:
-            # Create custom cache for 9:16
-            vae_cache = get_vae_cache_for_aspect_ratio(aspect_ratio, gpu, torch.float16)
     num_blocks = 7
     current_start_frame = 0

 def get_vae_cache_for_aspect_ratio(aspect_ratio, device, dtype):
     """
+    Create VAE cache with appropriate dimensions for the given aspect ratio.
+    Based on the structure of ZERO_VAE_CACHE but adjusted for different aspect ratios.
     """
+    # First, let's check the structure of ZERO_VAE_CACHE to understand the format
+    print(f"Creating VAE cache for {aspect_ratio}")
+    # For 9:16, we need to swap the height and width dimensions from the 16:9 default
+    if aspect_ratio == "9:16":
+        # The cache structure from ZERO_VAE_CACHE appears to be feature maps at different scales
+        # We need to maintain the same structure but swap H and W dimensions
+        cache = []
+        for i, tensor in enumerate(ZERO_VAE_CACHE):
+            # Get the original shape
+            original_shape = list(tensor.shape)
+            print(f"Original cache tensor {i} shape: {original_shape}")
+            # For 9:16, we swap the last two dimensions (H and W)
+            if len(original_shape) == 5:  # (B, C, T, H, W)
+                new_shape = original_shape.copy()
+                new_shape[-2], new_shape[-1] = original_shape[-1], original_shape[-2]  # Swap H and W
+                new_tensor = torch.zeros(new_shape, device=device, dtype=dtype)
+                cache.append(new_tensor)
+                print(f"New cache tensor {i} shape: {new_shape}")
+            else:
+                # If not 5D, just copy as is
+                cache.append(tensor.to(device=device, dtype=dtype))
+        return cache
+    else:
+        # For 16:9, use the default cache
+        return [c.to(device=device, dtype=dtype) for c in ZERO_VAE_CACHE]
 def frames_to_ts_file(frames, filepath, fps = 15):
     """
     vae_cache, latents_cache = None, None
     if not APP_STATE["current_use_taehv"] and not args.trt:
+        # Create VAE cache appropriate for the aspect ratio
+        vae_cache = get_vae_cache_for_aspect_ratio(aspect_ratio, gpu, torch.float16)
     num_blocks = 7
     current_start_frame = 0