Spaces:

mmlab-ntu
/

Segment-Any-RGBD

Runtime error

App Files Files Community

Jingkang Yang commited on Apr 27, 2023

Commit

9a023ed

1 Parent(s): 4855376

update: app

Browse files

Files changed (1) hide show

app.py +88 -88

app.py CHANGED Viewed

@@ -210,96 +210,96 @@ def greet_scannet(rgb_input, depth_map_input, class_candidates):
     RGB_Semantic_SAM_Mask_gif = 'outputs/RGB_3D_All.mp4'
     return RGB_Semantic_SAM_2D, RGB_Semantic_SAM_Mask_gif, Depth_map, Depth_Semantic_SAM_2D, Depth_Semantic_SAM_Mask_gif
 with gr.Blocks(analytics_enabled=False) as segrgbd_iface:
-        SHARED_UI_WARNING = f'''### [NOTE]  It may be very slow in this shared UI.
-        You can duplicate and use it with a paid private GPU.
-        <a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/mmlab-ntu/Segment-Any-RGBD?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Duplicate Space"></a>
-        Alternatively, you can also use the Colab demo on our project page.
-        <a style="display:inline-block" href="https://github.com/Jun-CEN/SegmentAnyRGBD/"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/Project%20Page-online-brightgreen"></a>
-        '''
-        #######t2v#######
-        with gr.Tab(label="Dataset: Sailvos3D"):
-            with gr.Column():
-                with gr.Row():
-                    # with gr.Tab(label='input'):
-                    with gr.Column():
-                        with gr.Row():
-                            Input_RGB_Component = gr.Image(label = 'RGB_Input', type = 'filepath').style(width=320, height=200)
-                            Depth_Map_Output_Component = gr.Image(label = "Vis_Depth_Map").style(width=320, height=200)
-                        with gr.Row():
-                            Depth_Map_Input_Component = gr.File(label = 'input_Depth_map')
-                            Component_2D_to_3D_Projection_Parameters = gr.File(label = '2D_to_3D_Projection_Parameters')
-                        with gr.Row():
-                            Class_Candidates_Component = gr.Text(label = 'Class_Candidates')
-                        vc_end_btn = gr.Button("Send")
-                    with gr.Tab(label='Result'):
-                        with gr.Row():
-                            RGB_Semantic_SAM_Mask_Component = gr.Video(label = "RGB_Semantic_SAM_Mask").style(width=320, height=200)
-                            RGB_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_RGB_Semantic_SAM_Mask").style(width=320, height=200)
-                        with gr.Row():
-                            Depth_Semantic_SAM_Mask_Component = gr.Video(label = "Depth_Semantic_SAM_Mask").style(width=320, height=200)
-                            Depth_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_Depth_Semantic_SAM_Mask").style(width=320, height=200)
-                        with gr.Row():
-                            gr.Markdown("<b> It takes around 2 to 5 minutes to get the final results. The framework initialization, SAM segmentation, zero-shot semantic segmentation and 3D results rendering take long time.</b>")
-                gr.Examples(examples=[
-                        [
-                            'UI/sailvos3d/ex1/inputs/rgb_000160.bmp',
-                            'UI/sailvos3d/ex1/inputs/depth_000160.npy',
-                            'UI/sailvos3d/ex1/inputs/rage_matrices_000160.npz',
-                            'person, car, motorcycle, truck, bird, dog, handbag, suitcase, bottle, cup, bowl, chair, potted plant, bed, dining table, tv, laptop, cell phone, bag, bin, box, door, road barrier, stick, lamp, floor, wall',
-                        ],
-                        [
-                            'UI/sailvos3d/ex2/inputs/rgb_000540.bmp',
-                            'UI/sailvos3d/ex2/inputs/depth_000540.npy',
-                            'UI/sailvos3d/ex2/inputs/rage_matrices_000540.npz',
-                            'person, car, motorcycle, truck, bird, dog, handbag, suitcase, bottle, cup, bowl, chair, potted plant, bed, dining table, tv, laptop, cell phone, bag, bin, box, door, road barrier, stick, lamp, floor, wall',
-                        ]],
-                            inputs=[Input_RGB_Component, Depth_Map_Input_Component, Component_2D_to_3D_Projection_Parameters, Class_Candidates_Component],
-                            outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
-                            fn=greet_sailvos3d)
-            vc_end_btn.click(inputs=[Input_RGB_Component, Depth_Map_Input_Component, Component_2D_to_3D_Projection_Parameters, Class_Candidates_Component],
-                            outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
-                            fn=greet_sailvos3d)
-        with gr.Tab(label="Dataset: Scannet"):
-            with gr.Column():
-                with gr.Row():
-                    # with gr.Tab(label='input'):
-                    with gr.Column():
-                        with gr.Row():
-                            Input_RGB_Component = gr.Image(label = 'RGB_Input', type = 'filepath').style(width=320, height=200)
-                            Depth_Map_Output_Component = gr.Image(label = "Vis_Depth_Map").style(width=320, height=200)
-                        with gr.Row():
-                            Depth_Map_Input_Component = gr.File(label = "Input_Depth_Map")
-                            Class_Candidates_Component = gr.Text(label = 'Class_Candidates')
-                        vc_end_btn = gr.Button("Send")
-                    with gr.Tab(label='Result'):
-                        with gr.Row():
-                            RGB_Semantic_SAM_Mask_Component = gr.Video(label = "RGB_Semantic_SAM_Mask").style(width=320, height=200)
-                            RGB_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_RGB_Semantic_SAM_Mask").style(width=320, height=200)
-                        with gr.Row():
-                            Depth_Semantic_SAM_Mask_Component = gr.Video(label = "Depth_Semantic_SAM_Mask").style(width=320, height=200)
-                            Depth_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_Depth_Semantic_SAM_Mask").style(width=320, height=200)
-                        with gr.Row():
-                            gr.Markdown("<b> It takes around 2 to 5 minutes to get the final results. The framework initialization, SAM segmentation, zero-shot semantic segmentation and 3D results rendering take long time.</b>")
-                gr.Examples(examples=[
-                        [
-                            'UI/scannetv2/examples/scene0000_00/color/1660.jpg',
-                            'UI/scannetv2/examples/scene0000_00/depth/1660.png',
-                            'wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk, curtain, refrigerator, shower curtain, toilet, sink, bathtub, other furniture',
-                        ],
-                        [
-                            'UI/scannetv2/examples/scene0000_00/color/5560.jpg',
-                            'UI/scannetv2/examples/scene0000_00/depth/5560.png',
-                            'wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk, curtain, refrigerator, shower curtain, toilet, sink, bathtub, other furniture',
-                        ]],
-                            inputs=[Input_RGB_Component, Depth_Map_Input_Component, Class_Candidates_Component],
-                            outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
-                            fn=greet_scannet)
-            vc_end_btn.click(inputs=[Input_RGB_Component, Depth_Map_Input_Component, Class_Candidates_Component],
-                            outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
-                            fn=greet_scannet)
 demo = segrgbd_iface
 demo.launch()

     RGB_Semantic_SAM_Mask_gif = 'outputs/RGB_3D_All.mp4'
     return RGB_Semantic_SAM_2D, RGB_Semantic_SAM_Mask_gif, Depth_map, Depth_Semantic_SAM_2D, Depth_Semantic_SAM_Mask_gif
+SHARED_UI_WARNING = f'''### [NOTE]  It may be very slow in this shared UI.
+You can duplicate and use it with a paid private GPU.
+<a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/mmlab-ntu/Segment-Any-RGBD?duplicate=true"><img style="margin-top:0;margin-bottom:0" src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-xl-dark.svg" alt="Duplicate Space"></a>
+Alternatively, you can also use the Colab demo on our project page.
+<a style="display:inline-block" href="https://github.com/Jun-CEN/SegmentAnyRGBD/"><img style="margin-top:0;margin-bottom:0" src="https://img.shields.io/badge/Project%20Page-online-brightgreen"></a>
+'''
 with gr.Blocks(analytics_enabled=False) as segrgbd_iface:
+    #######t2v#######
+    with gr.Tab(label="Dataset: Sailvos3D"):
+        with gr.Column():
+            with gr.Row():
+                # with gr.Tab(label='input'):
+                with gr.Column():
+                    with gr.Row():
+                        Input_RGB_Component = gr.Image(label = 'RGB_Input', type = 'filepath').style(width=320, height=200)
+                        Depth_Map_Output_Component = gr.Image(label = "Vis_Depth_Map").style(width=320, height=200)
+                    with gr.Row():
+                        Depth_Map_Input_Component = gr.File(label = 'input_Depth_map')
+                        Component_2D_to_3D_Projection_Parameters = gr.File(label = '2D_to_3D_Projection_Parameters')
+                    with gr.Row():
+                        Class_Candidates_Component = gr.Text(label = 'Class_Candidates')
+                    vc_end_btn = gr.Button("Send")
+                with gr.Tab(label='Result'):
+                    with gr.Row():
+                        RGB_Semantic_SAM_Mask_Component = gr.Video(label = "RGB_Semantic_SAM_Mask").style(width=320, height=200)
+                        RGB_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_RGB_Semantic_SAM_Mask").style(width=320, height=200)
+                    with gr.Row():
+                        Depth_Semantic_SAM_Mask_Component = gr.Video(label = "Depth_Semantic_SAM_Mask").style(width=320, height=200)
+                        Depth_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_Depth_Semantic_SAM_Mask").style(width=320, height=200)
+                    with gr.Row():
+                        gr.Markdown("<b> It takes around 2 to 5 minutes to get the final results. The framework initialization, SAM segmentation, zero-shot semantic segmentation and 3D results rendering take long time.</b>")
+            gr.Examples(examples=[
+                    [
+                        'UI/sailvos3d/ex1/inputs/rgb_000160.bmp',
+                        'UI/sailvos3d/ex1/inputs/depth_000160.npy',
+                        'UI/sailvos3d/ex1/inputs/rage_matrices_000160.npz',
+                        'person, car, motorcycle, truck, bird, dog, handbag, suitcase, bottle, cup, bowl, chair, potted plant, bed, dining table, tv, laptop, cell phone, bag, bin, box, door, road barrier, stick, lamp, floor, wall',
+                    ],
+                    [
+                        'UI/sailvos3d/ex2/inputs/rgb_000540.bmp',
+                        'UI/sailvos3d/ex2/inputs/depth_000540.npy',
+                        'UI/sailvos3d/ex2/inputs/rage_matrices_000540.npz',
+                        'person, car, motorcycle, truck, bird, dog, handbag, suitcase, bottle, cup, bowl, chair, potted plant, bed, dining table, tv, laptop, cell phone, bag, bin, box, door, road barrier, stick, lamp, floor, wall',
+                    ]],
+                        inputs=[Input_RGB_Component, Depth_Map_Input_Component, Component_2D_to_3D_Projection_Parameters, Class_Candidates_Component],
+                        outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
+                        fn=greet_sailvos3d)
+        vc_end_btn.click(inputs=[Input_RGB_Component, Depth_Map_Input_Component, Component_2D_to_3D_Projection_Parameters, Class_Candidates_Component],
+                        outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
+                        fn=greet_sailvos3d)
+    with gr.Tab(label="Dataset: Scannet"):
+        with gr.Column():
+            with gr.Row():
+                # with gr.Tab(label='input'):
+                with gr.Column():
+                    with gr.Row():
+                        Input_RGB_Component = gr.Image(label = 'RGB_Input', type = 'filepath').style(width=320, height=200)
+                        Depth_Map_Output_Component = gr.Image(label = "Vis_Depth_Map").style(width=320, height=200)
+                    with gr.Row():
+                        Depth_Map_Input_Component = gr.File(label = "Input_Depth_Map")
+                        Class_Candidates_Component = gr.Text(label = 'Class_Candidates')
+                    vc_end_btn = gr.Button("Send")
+                with gr.Tab(label='Result'):
+                    with gr.Row():
+                        RGB_Semantic_SAM_Mask_Component = gr.Video(label = "RGB_Semantic_SAM_Mask").style(width=320, height=200)
+                        RGB_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_RGB_Semantic_SAM_Mask").style(width=320, height=200)
+                    with gr.Row():
+                        Depth_Semantic_SAM_Mask_Component = gr.Video(label = "Depth_Semantic_SAM_Mask").style(width=320, height=200)
+                        Depth_Semantic_SAM_Mask_3D_Component = gr.Video(label = "Video_3D_Depth_Semantic_SAM_Mask").style(width=320, height=200)
+                    with gr.Row():
+                        gr.Markdown("<b> It takes around 2 to 5 minutes to get the final results. The framework initialization, SAM segmentation, zero-shot semantic segmentation and 3D results rendering take long time.</b>")
+            gr.Examples(examples=[
+                    [
+                        'UI/scannetv2/examples/scene0000_00/color/1660.jpg',
+                        'UI/scannetv2/examples/scene0000_00/depth/1660.png',
+                        'wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk, curtain, refrigerator, shower curtain, toilet, sink, bathtub, other furniture',
+                    ],
+                    [
+                        'UI/scannetv2/examples/scene0000_00/color/5560.jpg',
+                        'UI/scannetv2/examples/scene0000_00/depth/5560.png',
+                        'wall, floor, cabinet, bed, chair, sofa, table, door, window, bookshelf, picture, counter, desk, curtain, refrigerator, shower curtain, toilet, sink, bathtub, other furniture',
+                    ]],
+                        inputs=[Input_RGB_Component, Depth_Map_Input_Component, Class_Candidates_Component],
+                        outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
+                        fn=greet_scannet)
+        vc_end_btn.click(inputs=[Input_RGB_Component, Depth_Map_Input_Component, Class_Candidates_Component],
+                        outputs=[RGB_Semantic_SAM_Mask_Component, RGB_Semantic_SAM_Mask_3D_Component, Depth_Map_Output_Component, Depth_Semantic_SAM_Mask_Component, Depth_Semantic_SAM_Mask_3D_Component],
+                        fn=greet_scannet)
 demo = segrgbd_iface
 demo.launch()