Spaces:

SkalskiP
/

RF-DETR

Running on T4

App Files Files Community

SkalskiP commited on 7 days ago

Commit

1015457

1 Parent(s): 9cdcd5f

update UI to support video inference

Browse files

Files changed (2) hide show

app.py +120 -46
utils/image.py +16 -0

app.py CHANGED Viewed

@@ -1,22 +1,28 @@
 import gradio as gr
 import supervision as sv
 from rfdetr import RFDETRBase, RFDETRLarge
 from rfdetr.util.coco_classes import COCO_CLASSES
 from utils.video import create_directory
 MARKDOWN = """
 # RF-DETR 🔥
-<div style="display: flex; align-items: center; gap: 8px;">
   <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb">
-    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="colab" />
   </a>
   <a href="https://blog.roboflow.com/rf-detr">
-    <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="roboflow" />
   </a>
   <a href="https://github.com/roboflow/rf-detr">
-    <img src="https://badges.aleen42.com/src/github.svg" alt="roboflow" />
   </a>
 </div>
@@ -40,13 +46,12 @@ VIDEO_TARGET_DIRECTORY = "tmp"
 create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
-def inference(image, confidence: float, resolution: int, checkpoint: str):
-    model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
-    model = model_class(resolution=resolution)
     detections = model.predict(image, threshold=confidence)
-    text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
-    thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
@@ -67,55 +72,124 @@ def inference(image, confidence: float, resolution: int, checkpoint: str):
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
     return annotated_image
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
-    with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(
-                label="Input Image",
                 image_mode='RGB',
                 type='pil',
                 height=600
             )
-            confidence_slider = gr.Slider(
-                label="Confidence",
-                minimum=0.0,
-                maximum=1.0,
-                step=0.05,
-                value=0.5,
-            )
-            resolution_slider = gr.Slider(
-                label="Inference resolution",
-                minimum=560,
-                maximum=1120,
-                step=56,
-                value=728,
             )
-            with gr.Row():
-                checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
                     choices=["base", "large"],
                     value="base"
                 )
-                submit_button = gr.Button("Submit")
-        with gr.Column():
-            output_image = gr.Image(
-                label="Input Image",
-                image_mode='RGB',
-                type='pil',
                 height=600
             )
-    gr.Examples(
-        fn=inference,
-        examples=IMAGE_EXAMPLES,
-        inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown],
-        outputs=output_image
-    )
-    submit_button.click(
-        inference,
-        inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown],
-        outputs=output_image
-    )
 demo.launch(debug=False, show_error=True)

+from typing import Union
 import gradio as gr
+import numpy as np
 import supervision as sv
+from PIL import Image
 from rfdetr import RFDETRBase, RFDETRLarge
+from rfdetr.detr import RFDETR
 from rfdetr.util.coco_classes import COCO_CLASSES
+from utils.image import calculate_resolution_wh
 from utils.video import create_directory
 MARKDOWN = """
 # RF-DETR 🔥
+<div>
   <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="colab" style="display:inline-block;">
   </a>
   <a href="https://blog.roboflow.com/rf-detr">
+    <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="roboflow" style="display:inline-block;">
   </a>
   <a href="https://github.com/roboflow/rf-detr">
+    <img src="https://badges.aleen42.com/src/github.svg" alt="roboflow" style="display:inline-block;">
   </a>
 </div>
 create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
+def detect_and_annotate(model: RFDETR, image: Union[Image.Image, np.ndarray], confidence: float):
     detections = model.predict(image, threshold=confidence)
+    resolution_wh = calculate_resolution_wh(image)
+    text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
+    thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
     bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
     label_annotator = sv.LabelAnnotator(
     annotated_image = label_annotator.annotate(annotated_image, detections, labels)
     return annotated_image
+def image_processing_inference(input_image: Image.Image, confidence: float, resolution: int, checkpoint: str):
+    model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
+    model = model_class(resolution=resolution)
+    return detect_and_annotate(model=model, image=input_image, confidence=confidence)
+def video_processing_inference(input_video: str, confidence: float, resolution: int, checkpoint: str):
+    model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
+    model = model_class(resolution=resolution)
+    return input_video
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
+    with gr.Tab("Image"):
+        with gr.Row():
+            image_processing_input_image = gr.Image(
+                label="Upload image",
                 image_mode='RGB',
                 type='pil',
                 height=600
             )
+            image_processing_output_image = gr.Image(
+                label="Output image",
+                image_mode='RGB',
+                type='pil',
+                height=600
             )
+        with gr.Row():
+            with gr.Column():
+                image_processing_confidence_slider = gr.Slider(
+                    label="Confidence",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.5,
+                )
+                image_processing_resolution_slider = gr.Slider(
+                    label="Inference resolution",
+                    minimum=560,
+                    maximum=1120,
+                    step=56,
+                    value=728,
+                )
+                image_processing_checkpoint_dropdown = gr.Dropdown(
                     label="Checkpoint",
                     choices=["base", "large"],
                     value="base"
                 )
+            with gr.Column():
+                image_processing_submit_button = gr.Button("Submit", value="primary")
+        gr.Examples(
+            fn=image_processing_inference,
+            examples=IMAGE_EXAMPLES,
+            inputs=[
+                image_processing_input_image,
+                image_processing_confidence_slider,
+                image_processing_resolution_slider,
+                image_processing_checkpoint_dropdown
+            ],
+            outputs=image_processing_output_image,
+            cache_examples=True
+        )
+        image_processing_submit_button.click(
+            image_processing_inference,
+            inputs=[
+                image_processing_input_image,
+                image_processing_confidence_slider,
+                image_processing_resolution_slider,
+                image_processing_checkpoint_dropdown
+            ],
+            outputs=image_processing_output_image
+        )
+    with gr.Tab("Video"):
+        with gr.Row():
+            video_processing_input_video = gr.Video(
+                label='Upload video',
                 height=600
             )
+            video_processing_output_video = gr.Video(
+                label='Output video',
+                height=600
+            )
+        with gr.Row():
+            with gr.Column():
+                video_processing_confidence_slider = gr.Slider(
+                    label="Confidence",
+                    minimum=0.0,
+                    maximum=1.0,
+                    step=0.05,
+                    value=0.5,
+                )
+                video_processing_resolution_slider = gr.Slider(
+                    label="Inference resolution",
+                    minimum=560,
+                    maximum=1120,
+                    step=56,
+                    value=728,
+                )
+                video_processing_checkpoint_dropdown = gr.Dropdown(
+                    label="Checkpoint",
+                    choices=["base", "large"],
+                    value="base"
+                )
+            with gr.Column():
+                video_processing_submit_button = gr.Button("Submit", value="primary")
+        video_processing_submit_button.click(
+            video_processing_inference,
+            inputs=[
+                video_processing_input_video,
+                video_processing_confidence_slider,
+                video_processing_resolution_slider,
+                video_processing_checkpoint_dropdown
+            ],
+            outputs=video_processing_output_video
+        )
 demo.launch(debug=False, show_error=True)

utils/image.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from typing import Tuple, Union
+from PIL import Image
+import numpy as np
+def calculate_resolution_wh(image: Union[Image.Image, np.ndarray]) -> Tuple[int, int]:
+    if isinstance(image, Image.Image):
+        return image.size
+    elif isinstance(image, np.ndarray):
+        if image.ndim >= 2:
+            h, w = image.shape[:2]
+            return w, h
+        else:
+            raise ValueError("Input numpy array image must have at least 2 dimensions (height, width).")
+    else:
+        raise TypeError("Input image must be a Pillow Image or a numpy array.")