SkalskiP commited on
Commit
8a7385d
·
1 Parent(s): dcf11a7

initial video processing support

Browse files
Files changed (2) hide show
  1. .gitattributes +1 -0
  2. app.py +28 -11
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  from typing import TypeVar
3
 
4
- from tqdm import tqdm
5
  import gradio as gr
6
  import numpy as np
7
  import supervision as sv
@@ -26,18 +25,22 @@ RF-DETR is a real-time, transformer-based object detection model architecture de
26
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
27
  """
28
 
29
- IMAGE_EXAMPLES = [
30
  ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
31
  ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
32
  ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
33
  ]
 
 
 
 
34
 
35
  COLOR = sv.ColorPalette.from_hex([
36
  "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
37
  "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
38
  ])
39
 
40
- MAX_VIDEO_LENGTH_SECONDS = 2
41
  VIDEO_SCALE_FACTOR = 0.5
42
  VIDEO_TARGET_DIRECTORY = "tmp"
43
 
@@ -59,8 +62,7 @@ def detect_and_annotate(
59
  label_annotator = sv.LabelAnnotator(
60
  color=COLOR,
61
  text_color=sv.Color.BLACK,
62
- text_scale=text_scale,
63
- smart_position=True
64
  )
65
 
66
  labels = [
@@ -98,7 +100,7 @@ def video_processing_inference(
98
  confidence: float,
99
  resolution: int,
100
  checkpoint: str,
101
- progress=gr.Progress(track_tqdm=True)
102
  ):
103
  model = load_model(resolution=resolution, checkpoint=checkpoint)
104
 
@@ -113,13 +115,13 @@ def video_processing_inference(
113
  frames_generator = sv.get_video_frames_generator(input_video, end=total)
114
 
115
  with sv.VideoSink(output_video, video_info=video_info) as sink:
116
- for frame in tqdm(frames_generator, total=total):
117
- frame = sv.scale_image(frame, VIDEO_SCALE_FACTOR)
118
  annotated_frame = detect_and_annotate(
119
  model=model,
120
  image=frame,
121
  confidence=confidence
122
  )
 
123
  sink.write_frame(annotated_frame)
124
 
125
  return output_video
@@ -166,7 +168,7 @@ with gr.Blocks() as demo:
166
 
167
  gr.Examples(
168
  fn=image_processing_inference,
169
- examples=IMAGE_EXAMPLES,
170
  inputs=[
171
  image_processing_input_image,
172
  image_processing_confidence_slider,
@@ -174,7 +176,8 @@ with gr.Blocks() as demo:
174
  image_processing_checkpoint_dropdown
175
  ],
176
  outputs=image_processing_output_image,
177
- cache_examples=True
 
178
  )
179
 
180
  image_processing_submit_button.click(
@@ -185,7 +188,7 @@ with gr.Blocks() as demo:
185
  image_processing_resolution_slider,
186
  image_processing_checkpoint_dropdown
187
  ],
188
- outputs=image_processing_output_image
189
  )
190
  with gr.Tab("Video"):
191
  with gr.Row():
@@ -221,6 +224,20 @@ with gr.Blocks() as demo:
221
  with gr.Column():
222
  video_processing_submit_button = gr.Button("Submit", value="primary")
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  video_processing_submit_button.click(
225
  video_processing_inference,
226
  inputs=[
 
1
  import os
2
  from typing import TypeVar
3
 
 
4
  import gradio as gr
5
  import numpy as np
6
  import supervision as sv
 
25
  by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
26
  """
27
 
28
+ IMAGE_PROCESSING_EXAMPLES = [
29
  ['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
30
  ['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
31
  ['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
32
  ]
33
+ VIDEO_PROCESSING_EXAMPLES = [
34
+ ["videos/people-walking.mp4", 0.3, 728, "large"],
35
+ ["videos/vehicles.mp4", 0.3, 728, "large"],
36
+ ]
37
 
38
  COLOR = sv.ColorPalette.from_hex([
39
  "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
40
  "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
41
  ])
42
 
43
+ MAX_VIDEO_LENGTH_SECONDS = 5
44
  VIDEO_SCALE_FACTOR = 0.5
45
  VIDEO_TARGET_DIRECTORY = "tmp"
46
 
 
62
  label_annotator = sv.LabelAnnotator(
63
  color=COLOR,
64
  text_color=sv.Color.BLACK,
65
+ text_scale=text_scale
 
66
  )
67
 
68
  labels = [
 
100
  confidence: float,
101
  resolution: int,
102
  checkpoint: str,
103
+ progress=gr.Progress()
104
  ):
105
  model = load_model(resolution=resolution, checkpoint=checkpoint)
106
 
 
115
  frames_generator = sv.get_video_frames_generator(input_video, end=total)
116
 
117
  with sv.VideoSink(output_video, video_info=video_info) as sink:
118
+ for frame in progress.tqdm(frames_generator, total=total):
 
119
  annotated_frame = detect_and_annotate(
120
  model=model,
121
  image=frame,
122
  confidence=confidence
123
  )
124
+ annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
125
  sink.write_frame(annotated_frame)
126
 
127
  return output_video
 
168
 
169
  gr.Examples(
170
  fn=image_processing_inference,
171
+ examples=IMAGE_PROCESSING_EXAMPLES,
172
  inputs=[
173
  image_processing_input_image,
174
  image_processing_confidence_slider,
 
176
  image_processing_checkpoint_dropdown
177
  ],
178
  outputs=image_processing_output_image,
179
+ cache_examples=True,
180
+ run_on_click=True
181
  )
182
 
183
  image_processing_submit_button.click(
 
188
  image_processing_resolution_slider,
189
  image_processing_checkpoint_dropdown
190
  ],
191
+ outputs=image_processing_output_image,
192
  )
193
  with gr.Tab("Video"):
194
  with gr.Row():
 
224
  with gr.Column():
225
  video_processing_submit_button = gr.Button("Submit", value="primary")
226
 
227
+ gr.Examples(
228
+ fn=video_processing_inference,
229
+ examples=VIDEO_PROCESSING_EXAMPLES,
230
+ inputs=[
231
+ video_processing_input_video,
232
+ video_processing_confidence_slider,
233
+ video_processing_resolution_slider,
234
+ video_processing_checkpoint_dropdown
235
+ ],
236
+ outputs=video_processing_output_video,
237
+ cache_examples=True,
238
+ run_on_click=True
239
+ )
240
+
241
  video_processing_submit_button.click(
242
  video_processing_inference,
243
  inputs=[