Spaces:

yuyutsu07
/

Pseudo3D

Running on Zero

App Files Files Community

yuyutsu07 commited on Mar 11

Commit

16f4e59

verified ·

1 Parent(s): 2291646

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -56

app.py CHANGED Viewed

@@ -3,23 +3,26 @@ import gradio as gr
 import imageio
 import numpy as np
 from PIL import Image
-from torchvision.transforms import ToTensor
 import spaces
 import tempfile
 @spaces.GPU
-def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps, duration):
     """
-    Generate a 3D parallax video from an image and depth map with the selected animation style.
     Args:
         image (PIL.Image): Input RGB image.
-        depth_map (PIL.Image): Grayscale depth map (white = closer, black = farther).
-        animation_style (str): Animation type ("horizontal", "vertical", "circle", "perspective").
-        amplitude (float): Intensity of camera movement.
-        k (float): Depth displacement scale factor.
         fps (int): Frames per second.
         duration (float): Video duration in seconds.
     Returns:
         str: Path to the generated video file.
@@ -28,68 +31,86 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
     if image.size != depth_map.size:
         raise ValueError("Image and depth map must have the same dimensions")
-    # Convert inputs to PyTorch tensors on GPU
-    image_tensor = ToTensor()(image).unsqueeze(0).to('cuda')  # Shape: (1, 3, H, W)
-    depth_tensor = ToTensor()(depth_map.convert('L')).to('cuda')  # Shape: (1, 1, H, W)
     depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min() + 1e-6)
-    depth_tensor = depth_tensor.squeeze(0).squeeze(0)  # Shape: (H, W)
-    H, W = image_tensor.shape[2], image_tensor.shape[3]
-    # Create coordinate grid for warping
     x = torch.arange(0, W).float().to('cuda')
     y = torch.arange(0, H).float().to('cuda')
     xx, yy = torch.meshgrid(x, y, indexing='xy')
-    pixel_grid = torch.stack((xx, yy), dim=-1)  # Shape: (H, W, 2)
-    # Calculate number of frames
     num_frames = int(fps * duration)
     frames = []
-    # Generate frames based on animation style
     for frame in range(num_frames):
-        t = frame / num_frames  # Normalized time [0, 1]
         if animation_style == "horizontal":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = 0
-            displacement_scale = 1
         elif animation_style == "vertical":
             camera_x = 0
             camera_y = amplitude * np.sin(2 * np.pi * t)
-            displacement_scale = 1
         elif animation_style == "circle":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = amplitude * np.cos(2 * np.pi * t)
-            displacement_scale = 1
-        elif animation_style == "perspective":
-            camera_x = amplitude  # Fixed horizontal base for consistency
-            camera_y = 0
-            displacement_scale = 1 + 0.5 * np.sin(2 * np.pi * t)  # Scales displacement for zoom effect
         else:
             raise ValueError(f"Unsupported animation style: {animation_style}")
-        # Compute displacements in both x and y directions
-        displacement_x = displacement_scale * k * camera_x * depth_tensor
-        displacement_y = displacement_scale * k * camera_y * depth_tensor
-        # Calculate source coordinates for warping
         source_pixel_x = pixel_grid[:, :, 0] + displacement_x
         source_pixel_y = pixel_grid[:, :, 1] + displacement_y
-        # Normalize coordinates to [-1, 1] for grid_sample
         grid_x = 2 * source_pixel_x / (W - 1) - 1
         grid_y = 2 * source_pixel_y / (H - 1) - 1
-        grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)  # Shape: (1, H, W, 2)
-        # Warp the image using grid sampling
-        warped = torch.nn.functional.grid_sample(image_tensor, grid, align_corners=True)
-        # Convert warped tensor to numpy image
-        warped_np = warped.squeeze(0).permute(1, 2, 0).cpu().numpy()
-        frame_img = (warped_np * 255).astype(np.uint8)
         frames.append(frame_img)
-    # Save frames as a video
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         output_path = tmpfile.name
         writer = imageio.get_writer(output_path, fps=fps, codec='libx264')
@@ -99,41 +120,31 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
     return output_path
-# Define Gradio interface
-with gr.Blocks(title="3D Parallax Video Generator") as demo:
-    gr.Markdown("# 3D Parallax Video Generator")
-    gr.Markdown("""
-        Upload an image and its depth map (white = closer, black = farther) to create a 3D parallax video.
-        Select an animation style and adjust parameters below. Start with small amplitude and k values to avoid empty frames.
-    """)
-    # Input section
     with gr.Row():
         image_input = gr.Image(type="pil", label="Upload Image")
         depth_input = gr.Image(type="pil", label="Upload Depth Map")
-    # Parameter controls
     with gr.Row():
-        animation_style = gr.Dropdown(
-            choices=["horizontal", "vertical", "circle", "perspective"],
-            label="Animation Style",
-            value="horizontal"
-        )
         amplitude_slider = gr.Slider(0, 10, value=2, label="Amplitude", step=0.1)
         k_slider = gr.Slider(1, 20, value=5, label="Depth Scale (k)", step=0.1)
-        fps_slider = gr.Slider(10, 60, value=30, label="Frames Per Second", step=1)
-        duration_slider = gr.Slider(1, 10, value=5, label="Duration (seconds)", step=0.1)
-    # Output and interaction
     generate_btn = gr.Button("Generate Video")
     video_output = gr.Video(label="Parallax Video")
-    # Connect button to function
     generate_btn.click(
         fn=generate_parallax_video,
-        inputs=[image_input, depth_input, animation_style, amplitude_slider, k_slider, fps_slider, duration_slider],
         outputs=video_output
     )
-# Launch the application
 demo.launch()

 import imageio
 import numpy as np
 from PIL import Image
+from torchvision.transforms import ToTensor, Resize
 import spaces
 import tempfile
+from scipy.ndimage import gaussian_filter
 @spaces.GPU
+def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps, duration, ssaa_factor, use_taa):
     """
+    Generate a 3D parallax video with enhanced quality features.
     Args:
         image (PIL.Image): Input RGB image.
+        depth_map (PIL.Image): Grayscale depth map.
+        animation_style (str): Animation type (e.g., horizontal, spiral).
+        amplitude (float): Camera movement intensity.
+        k (float): Depth displacement scale.
         fps (int): Frames per second.
         duration (float): Video duration in seconds.
+        ssaa_factor (int): Super sampling factor (1, 2, 4).
+        use_taa (bool): Enable temporal anti-aliasing.
     Returns:
         str: Path to the generated video file.
     if image.size != depth_map.size:
         raise ValueError("Image and depth map must have the same dimensions")
+    # Convert to tensors with high precision
+    image_tensor = ToTensor()(image).to('cuda', dtype=torch.float32)
+    depth_tensor = ToTensor()(depth_map.convert('L')).to('cuda', dtype=torch.float32)
     depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min() + 1e-6)
+    # Smooth depth map to improve intersections
+    depth_np = depth_tensor.squeeze().cpu().numpy()
+    depth_np = gaussian_filter(depth_np, sigma=1)  # Basic smoothing
+    depth_tensor = torch.tensor(depth_np, device='cuda', dtype=torch.float32).unsqueeze(0)
+    # Apply SSAA: upscale image and depth map
+    if ssaa_factor > 1:
+        upscale = Resize((int(image.height * ssaa_factor), int(image.width * ssaa_factor)), antialias=True)
+        image_tensor = upscale(image_tensor)
+        depth_tensor = upscale(depth_tensor)
+    H, W = image_tensor.shape[1], image_tensor.shape[2]
+    # Create coordinate grid
     x = torch.arange(0, W).float().to('cuda')
     y = torch.arange(0, H).float().to('cuda')
     xx, yy = torch.meshgrid(x, y, indexing='xy')
+    pixel_grid = torch.stack((xx, yy), dim=-1)
+    # Generate frames
     num_frames = int(fps * duration)
     frames = []
+    prev_frame = None
     for frame in range(num_frames):
+        t = frame / num_frames
         if animation_style == "horizontal":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = 0
         elif animation_style == "vertical":
             camera_x = 0
             camera_y = amplitude * np.sin(2 * np.pi * t)
         elif animation_style == "circle":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = amplitude * np.cos(2 * np.pi * t)
+        elif animation_style == "spiral":  # Inspired by DepthFlow
+            radius = amplitude * (1 - t)
+            camera_x = radius * np.sin(4 * np.pi * t)
+            camera_y = radius * np.cos(4 * np.pi * t)
         else:
             raise ValueError(f"Unsupported animation style: {animation_style}")
+        # Compute displacements
+        displacement_x = k * camera_x * depth_tensor.squeeze()
+        displacement_y = k * camera_y * depth_tensor.squeeze()
+        # Calculate source coordinates
         source_pixel_x = pixel_grid[:, :, 0] + displacement_x
         source_pixel_y = pixel_grid[:, :, 1] + displacement_y
+        # Normalize to [-1, 1]
         grid_x = 2 * source_pixel_x / (W - 1) - 1
         grid_y = 2 * source_pixel_y / (H - 1) - 1
+        grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)
+        # Warp with high-quality interpolation
+        warped = torch.nn.functional.grid_sample(image_tensor.unsqueeze(0), grid, mode='bicubic', align_corners=True)
+        # Downsample if SSAA is enabled
+        if ssaa_factor > 1:
+            downscale = Resize((image.height, image.width), antialias=True)
+            warped = downscale(warped.squeeze(0)).unsqueeze(0)
+        # Convert to numpy
+        frame_img = warped.squeeze(0).permute(1, 2, 0).cpu().numpy()
+        frame_img = (frame_img * 255).astype(np.uint8)
+        # Apply TAA if enabled
+        if use_taa and prev_frame is not None:
+            frame_img = (frame_img * 0.8 + prev_frame * 0.2).astype(np.uint8)
         frames.append(frame_img)
+        prev_frame = frame_img
+    # Save video
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         output_path = tmpfile.name
         writer = imageio.get_writer(output_path, fps=fps, codec='libx264')
     return output_path
+# Gradio interface
+with gr.Blocks(title="Enhanced 3D Parallax Video Generator") as demo:
+    gr.Markdown("# Enhanced 3D Parallax Video Generator")
+    gr.Markdown("Create high-quality 3D parallax videos with advanced features.")
     with gr.Row():
         image_input = gr.Image(type="pil", label="Upload Image")
         depth_input = gr.Image(type="pil", label="Upload Depth Map")
     with gr.Row():
+        animation_style = gr.Dropdown(["horizontal", "vertical", "circle", "spiral"], label="Animation Style", value="horizontal")
         amplitude_slider = gr.Slider(0, 10, value=2, label="Amplitude", step=0.1)
         k_slider = gr.Slider(1, 20, value=5, label="Depth Scale (k)", step=0.1)
+        fps_slider = gr.Slider(10, 60, value=30, label="FPS", step=1)
+        duration_slider = gr.Slider(1, 10, value=5, label="Duration (s)", step=0.1)
+        ssaa_factor = gr.Dropdown([1, 2, 4], label="SSAA Factor", value=1)
+        use_taa = gr.Checkbox(label="Enable TAA", value=False)
     generate_btn = gr.Button("Generate Video")
     video_output = gr.Video(label="Parallax Video")
     generate_btn.click(
         fn=generate_parallax_video,
+        inputs=[image_input, depth_input, animation_style, amplitude_slider, k_slider, fps_slider, duration_slider, ssaa_factor, use_taa],
         outputs=video_output
     )
 demo.launch()