initial video processing support
Browse files- .gitattributes +1 -0
- app.py +28 -11
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
from typing import TypeVar
|
3 |
|
4 |
-
from tqdm import tqdm
|
5 |
import gradio as gr
|
6 |
import numpy as np
|
7 |
import supervision as sv
|
@@ -26,18 +25,22 @@ RF-DETR is a real-time, transformer-based object detection model architecture de
|
|
26 |
by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
|
27 |
"""
|
28 |
|
29 |
-
|
30 |
['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
|
31 |
['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
|
32 |
['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
|
33 |
]
|
|
|
|
|
|
|
|
|
34 |
|
35 |
COLOR = sv.ColorPalette.from_hex([
|
36 |
"#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
|
37 |
"#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
|
38 |
])
|
39 |
|
40 |
-
MAX_VIDEO_LENGTH_SECONDS =
|
41 |
VIDEO_SCALE_FACTOR = 0.5
|
42 |
VIDEO_TARGET_DIRECTORY = "tmp"
|
43 |
|
@@ -59,8 +62,7 @@ def detect_and_annotate(
|
|
59 |
label_annotator = sv.LabelAnnotator(
|
60 |
color=COLOR,
|
61 |
text_color=sv.Color.BLACK,
|
62 |
-
text_scale=text_scale
|
63 |
-
smart_position=True
|
64 |
)
|
65 |
|
66 |
labels = [
|
@@ -98,7 +100,7 @@ def video_processing_inference(
|
|
98 |
confidence: float,
|
99 |
resolution: int,
|
100 |
checkpoint: str,
|
101 |
-
progress=gr.Progress(
|
102 |
):
|
103 |
model = load_model(resolution=resolution, checkpoint=checkpoint)
|
104 |
|
@@ -113,13 +115,13 @@ def video_processing_inference(
|
|
113 |
frames_generator = sv.get_video_frames_generator(input_video, end=total)
|
114 |
|
115 |
with sv.VideoSink(output_video, video_info=video_info) as sink:
|
116 |
-
for frame in tqdm(frames_generator, total=total):
|
117 |
-
frame = sv.scale_image(frame, VIDEO_SCALE_FACTOR)
|
118 |
annotated_frame = detect_and_annotate(
|
119 |
model=model,
|
120 |
image=frame,
|
121 |
confidence=confidence
|
122 |
)
|
|
|
123 |
sink.write_frame(annotated_frame)
|
124 |
|
125 |
return output_video
|
@@ -166,7 +168,7 @@ with gr.Blocks() as demo:
|
|
166 |
|
167 |
gr.Examples(
|
168 |
fn=image_processing_inference,
|
169 |
-
examples=
|
170 |
inputs=[
|
171 |
image_processing_input_image,
|
172 |
image_processing_confidence_slider,
|
@@ -174,7 +176,8 @@ with gr.Blocks() as demo:
|
|
174 |
image_processing_checkpoint_dropdown
|
175 |
],
|
176 |
outputs=image_processing_output_image,
|
177 |
-
cache_examples=True
|
|
|
178 |
)
|
179 |
|
180 |
image_processing_submit_button.click(
|
@@ -185,7 +188,7 @@ with gr.Blocks() as demo:
|
|
185 |
image_processing_resolution_slider,
|
186 |
image_processing_checkpoint_dropdown
|
187 |
],
|
188 |
-
outputs=image_processing_output_image
|
189 |
)
|
190 |
with gr.Tab("Video"):
|
191 |
with gr.Row():
|
@@ -221,6 +224,20 @@ with gr.Blocks() as demo:
|
|
221 |
with gr.Column():
|
222 |
video_processing_submit_button = gr.Button("Submit", value="primary")
|
223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
video_processing_submit_button.click(
|
225 |
video_processing_inference,
|
226 |
inputs=[
|
|
|
1 |
import os
|
2 |
from typing import TypeVar
|
3 |
|
|
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import supervision as sv
|
|
|
25 |
by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
|
26 |
"""
|
27 |
|
28 |
+
IMAGE_PROCESSING_EXAMPLES = [
|
29 |
['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
|
30 |
['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
|
31 |
['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
|
32 |
]
|
33 |
+
VIDEO_PROCESSING_EXAMPLES = [
|
34 |
+
["videos/people-walking.mp4", 0.3, 728, "large"],
|
35 |
+
["videos/vehicles.mp4", 0.3, 728, "large"],
|
36 |
+
]
|
37 |
|
38 |
COLOR = sv.ColorPalette.from_hex([
|
39 |
"#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
|
40 |
"#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
|
41 |
])
|
42 |
|
43 |
+
MAX_VIDEO_LENGTH_SECONDS = 5
|
44 |
VIDEO_SCALE_FACTOR = 0.5
|
45 |
VIDEO_TARGET_DIRECTORY = "tmp"
|
46 |
|
|
|
62 |
label_annotator = sv.LabelAnnotator(
|
63 |
color=COLOR,
|
64 |
text_color=sv.Color.BLACK,
|
65 |
+
text_scale=text_scale
|
|
|
66 |
)
|
67 |
|
68 |
labels = [
|
|
|
100 |
confidence: float,
|
101 |
resolution: int,
|
102 |
checkpoint: str,
|
103 |
+
progress=gr.Progress()
|
104 |
):
|
105 |
model = load_model(resolution=resolution, checkpoint=checkpoint)
|
106 |
|
|
|
115 |
frames_generator = sv.get_video_frames_generator(input_video, end=total)
|
116 |
|
117 |
with sv.VideoSink(output_video, video_info=video_info) as sink:
|
118 |
+
for frame in progress.tqdm(frames_generator, total=total):
|
|
|
119 |
annotated_frame = detect_and_annotate(
|
120 |
model=model,
|
121 |
image=frame,
|
122 |
confidence=confidence
|
123 |
)
|
124 |
+
annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
|
125 |
sink.write_frame(annotated_frame)
|
126 |
|
127 |
return output_video
|
|
|
168 |
|
169 |
gr.Examples(
|
170 |
fn=image_processing_inference,
|
171 |
+
examples=IMAGE_PROCESSING_EXAMPLES,
|
172 |
inputs=[
|
173 |
image_processing_input_image,
|
174 |
image_processing_confidence_slider,
|
|
|
176 |
image_processing_checkpoint_dropdown
|
177 |
],
|
178 |
outputs=image_processing_output_image,
|
179 |
+
cache_examples=True,
|
180 |
+
run_on_click=True
|
181 |
)
|
182 |
|
183 |
image_processing_submit_button.click(
|
|
|
188 |
image_processing_resolution_slider,
|
189 |
image_processing_checkpoint_dropdown
|
190 |
],
|
191 |
+
outputs=image_processing_output_image,
|
192 |
)
|
193 |
with gr.Tab("Video"):
|
194 |
with gr.Row():
|
|
|
224 |
with gr.Column():
|
225 |
video_processing_submit_button = gr.Button("Submit", value="primary")
|
226 |
|
227 |
+
gr.Examples(
|
228 |
+
fn=video_processing_inference,
|
229 |
+
examples=VIDEO_PROCESSING_EXAMPLES,
|
230 |
+
inputs=[
|
231 |
+
video_processing_input_video,
|
232 |
+
video_processing_confidence_slider,
|
233 |
+
video_processing_resolution_slider,
|
234 |
+
video_processing_checkpoint_dropdown
|
235 |
+
],
|
236 |
+
outputs=video_processing_output_video,
|
237 |
+
cache_examples=True,
|
238 |
+
run_on_click=True
|
239 |
+
)
|
240 |
+
|
241 |
video_processing_submit_button.click(
|
242 |
video_processing_inference,
|
243 |
inputs=[
|