Spaces:

SkalskiP
/

RF-DETR

Running on T4

App Files Files Community

SkalskiP commited on 7 days ago

Commit

8a7385d

1 Parent(s): dcf11a7

initial video processing support

Browse files

Files changed (2) hide show

.gitattributes +1 -0
app.py +28 -11

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 from typing import TypeVar
-from tqdm import tqdm
 import gradio as gr
 import numpy as np
 import supervision as sv
@@ -26,18 +25,22 @@ RF-DETR is a real-time, transformer-based object detection model architecture de
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
-IMAGE_EXAMPLES = [
     ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
     ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
     ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 ]
 COLOR = sv.ColorPalette.from_hex([
     "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
     "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
 ])
-MAX_VIDEO_LENGTH_SECONDS = 2
 VIDEO_SCALE_FACTOR = 0.5
 VIDEO_TARGET_DIRECTORY = "tmp"
@@ -59,8 +62,7 @@ def detect_and_annotate(
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
         text_color=sv.Color.BLACK,
-        text_scale=text_scale,
-        smart_position=True
     )
     labels = [
@@ -98,7 +100,7 @@ def video_processing_inference(
         confidence: float,
         resolution: int,
         checkpoint: str,
-        progress=gr.Progress(track_tqdm=True)
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
@@ -113,13 +115,13 @@ def video_processing_inference(
     frames_generator = sv.get_video_frames_generator(input_video, end=total)
     with sv.VideoSink(output_video, video_info=video_info) as sink:
-        for frame in tqdm(frames_generator, total=total):
-            frame = sv.scale_image(frame, VIDEO_SCALE_FACTOR)
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
                 confidence=confidence
             )
             sink.write_frame(annotated_frame)
     return output_video
@@ -166,7 +168,7 @@ with gr.Blocks() as demo:
         gr.Examples(
             fn=image_processing_inference,
-            examples=IMAGE_EXAMPLES,
             inputs=[
                 image_processing_input_image,
                 image_processing_confidence_slider,
@@ -174,7 +176,8 @@ with gr.Blocks() as demo:
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
-            cache_examples=True
         )
         image_processing_submit_button.click(
@@ -185,7 +188,7 @@ with gr.Blocks() as demo:
                 image_processing_resolution_slider,
                 image_processing_checkpoint_dropdown
             ],
-            outputs=image_processing_output_image
         )
     with gr.Tab("Video"):
         with gr.Row():
@@ -221,6 +224,20 @@ with gr.Blocks() as demo:
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
         video_processing_submit_button.click(
             video_processing_inference,
             inputs=[

 import os
 from typing import TypeVar
 import gradio as gr
 import numpy as np
 import supervision as sv
 by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
 """
+IMAGE_PROCESSING_EXAMPLES = [
     ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
     ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
     ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
 ]
+VIDEO_PROCESSING_EXAMPLES = [
+    ["videos/people-walking.mp4", 0.3, 728, "large"],
+    ["videos/vehicles.mp4", 0.3, 728, "large"],
+]
 COLOR = sv.ColorPalette.from_hex([
     "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
     "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
 ])
+MAX_VIDEO_LENGTH_SECONDS = 5
 VIDEO_SCALE_FACTOR = 0.5
 VIDEO_TARGET_DIRECTORY = "tmp"
     label_annotator = sv.LabelAnnotator(
         color=COLOR,
         text_color=sv.Color.BLACK,
+        text_scale=text_scale
     )
     labels = [
         confidence: float,
         resolution: int,
         checkpoint: str,
+        progress=gr.Progress()
 ):
     model = load_model(resolution=resolution, checkpoint=checkpoint)
     frames_generator = sv.get_video_frames_generator(input_video, end=total)
     with sv.VideoSink(output_video, video_info=video_info) as sink:
+        for frame in progress.tqdm(frames_generator, total=total):
             annotated_frame = detect_and_annotate(
                 model=model,
                 image=frame,
                 confidence=confidence
             )
+            annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
             sink.write_frame(annotated_frame)
     return output_video
         gr.Examples(
             fn=image_processing_inference,
+            examples=IMAGE_PROCESSING_EXAMPLES,
             inputs=[
                 image_processing_input_image,
                 image_processing_confidence_slider,
                 image_processing_checkpoint_dropdown
             ],
             outputs=image_processing_output_image,
+            cache_examples=True,
+            run_on_click=True
         )
         image_processing_submit_button.click(
                 image_processing_resolution_slider,
                 image_processing_checkpoint_dropdown
             ],
+            outputs=image_processing_output_image,
         )
     with gr.Tab("Video"):
         with gr.Row():
             with gr.Column():
                 video_processing_submit_button = gr.Button("Submit", value="primary")
+        gr.Examples(
+            fn=video_processing_inference,
+            examples=VIDEO_PROCESSING_EXAMPLES,
+            inputs=[
+                video_processing_input_video,
+                video_processing_confidence_slider,
+                video_processing_resolution_slider,
+                video_processing_checkpoint_dropdown
+            ],
+            outputs=video_processing_output_video,
+            cache_examples=True,
+            run_on_click=True
+        )
         video_processing_submit_button.click(
             video_processing_inference,
             inputs=[