geowizard-e2e-ft

Sleeping

App Files Files Community

x10z commited on May 3

Commit

0200003

verified ·

1 Parent(s): 2ab4d40

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -31

app.py CHANGED Viewed

@@ -46,7 +46,7 @@ description = (
 def predict(image: Image.Image, processing_res_choice: int):
     """
     Single-frame prediction wrapped for GPU execution.
-    Returns a DepthNormalPipelineOutput with attributes depth_colored and normal_colored.
     """
     with torch.no_grad():
         return pipe(
@@ -61,7 +61,7 @@ def predict(image: Image.Image, processing_res_choice: int):
 def on_submit_video(video_path: str, processing_res_choice: int):
     """
-    Processes each frame of the input video, generating separate depth and normal videos.
     """
     if video_path is None:
         print("No video uploaded.")
@@ -73,11 +73,9 @@ def on_submit_video(video_path: str, processing_res_choice: int):
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # Create temporary output files
-    tmp_depth = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     tmp_normal = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out_depth = cv2.VideoWriter(tmp_depth.name, fourcc, fps, (width, height))
     out_normal = cv2.VideoWriter(tmp_normal.name, fourcc, fps, (width, height))
     # Process each frame
@@ -90,16 +88,10 @@ def on_submit_video(video_path: str, processing_res_choice: int):
         rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(rgb)
-        # Predict depth and normals
         result = predict(pil_image, processing_res_choice)
-        depth_colored = result.depth_colored
         normal_colored = result.normal_colored
-        # Write depth frame
-        depth_frame = np.array(depth_colored)
-        depth_bgr = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2BGR)
-        out_depth.write(depth_bgr)
         # Write normal frame
         normal_frame = np.array(normal_colored)
         normal_bgr = cv2.cvtColor(normal_frame, cv2.COLOR_RGB2BGR)
@@ -107,24 +99,19 @@ def on_submit_video(video_path: str, processing_res_choice: int):
     # Release resources
     cap.release()
-    out_depth.release()
     out_normal.release()
-    # Return video paths for download
-    return tmp_depth.name, tmp_normal.name
 # Build Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown(title)
     gr.Markdown(description)
-    gr.Markdown("### Depth and Normals Prediction on Video")
     with gr.Row():
-        input_video = gr.Video(
-            label="Input Video",
-            elem_id='video-display-input'
-        )
         with gr.Column():
             processing_res_choice = gr.Radio(
                 [
@@ -134,22 +121,15 @@ with gr.Blocks() as demo:
                 label="Processing resolution",
                 value=768,
             )
-            submit = gr.Button(value="Compute Depth and Normals")
     with gr.Row():
-        output_depth_video = gr.Video(
-            label="Depth Video",
-            elem_id='download'
-        )
-        output_normal_video = gr.Video(
-            label="Normal Video",
-            elem_id='download'
-        )
     submit.click(
         fn=on_submit_video,
         inputs=[input_video, processing_res_choice],
-        outputs=[output_depth_video, output_normal_video]
     )
 if __name__ == "__main__":

 def predict(image: Image.Image, processing_res_choice: int):
     """
     Single-frame prediction wrapped for GPU execution.
+    Returns a DepthNormalPipelineOutput with attribute normal_colored.
     """
     with torch.no_grad():
         return pipe(
 def on_submit_video(video_path: str, processing_res_choice: int):
     """
+    Processes each frame of the input video, generating a normal map video.
     """
     if video_path is None:
         print("No video uploaded.")
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Temporary output file for normals video
     tmp_normal = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out_normal = cv2.VideoWriter(tmp_normal.name, fourcc, fps, (width, height))
     # Process each frame
         rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         pil_image = Image.fromarray(rgb)
+        # Predict normals
         result = predict(pil_image, processing_res_choice)
         normal_colored = result.normal_colored
         # Write normal frame
         normal_frame = np.array(normal_colored)
         normal_bgr = cv2.cvtColor(normal_frame, cv2.COLOR_RGB2BGR)
     # Release resources
     cap.release()
     out_normal.release()
+    # Return video path for download
+    return tmp_normal.name
 # Build Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown(title)
     gr.Markdown(description)
+    gr.Markdown("### Normals Prediction on Video")
     with gr.Row():
+        input_video = gr.Video(label="Input Video", elem_id='video-display-input')
         with gr.Column():
             processing_res_choice = gr.Radio(
                 [
                 label="Processing resolution",
                 value=768,
             )
+            submit = gr.Button(value="Compute Normals")
     with gr.Row():
+        output_normal_video = gr.Video(label="Normal Video", elem_id='download')
     submit.click(
         fn=on_submit_video,
         inputs=[input_video, processing_res_choice],
+        outputs=[output_normal_video]
     )
 if __name__ == "__main__":