SkalskiP commited on
Commit
1015457
·
1 Parent(s): 9cdcd5f

update UI to support video inference

Browse files
Files changed (2) hide show
  1. app.py +120 -46
  2. utils/image.py +16 -0
app.py CHANGED
@@ -1,22 +1,28 @@
 
 
1
  import gradio as gr
 
2
  import supervision as sv
 
3
  from rfdetr import RFDETRBase, RFDETRLarge
 
4
  from rfdetr.util.coco_classes import COCO_CLASSES
5
 
 
6
  from utils.video import create_directory
7
 
8
  MARKDOWN = """
9
  # RF-DETR 🔥
10
 
11
- <div style="display: flex; align-items: center; gap: 8px;">
12
  <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb">
13
- <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="colab" />
14
  </a>
15
  <a href="https://blog.roboflow.com/rf-detr">
16
- <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="roboflow" />
17
  </a>
18
  <a href="https://github.com/roboflow/rf-detr">
19
- <img src="https://badges.aleen42.com/src/github.svg" alt="roboflow" />
20
  </a>
21
  </div>
22
 
@@ -40,13 +46,12 @@ VIDEO_TARGET_DIRECTORY = "tmp"
40
  create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
41
 
42
 
43
- def inference(image, confidence: float, resolution: int, checkpoint: str):
44
- model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
45
- model = model_class(resolution=resolution)
46
  detections = model.predict(image, threshold=confidence)
47
 
48
- text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
49
- thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
 
50
 
51
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
52
  label_annotator = sv.LabelAnnotator(
@@ -67,55 +72,124 @@ def inference(image, confidence: float, resolution: int, checkpoint: str):
67
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
68
  return annotated_image
69
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  with gr.Blocks() as demo:
71
  gr.Markdown(MARKDOWN)
72
- with gr.Row():
73
- with gr.Column():
74
- input_image = gr.Image(
75
- label="Input Image",
76
  image_mode='RGB',
77
  type='pil',
78
  height=600
79
  )
80
- confidence_slider = gr.Slider(
81
- label="Confidence",
82
- minimum=0.0,
83
- maximum=1.0,
84
- step=0.05,
85
- value=0.5,
86
- )
87
- resolution_slider = gr.Slider(
88
- label="Inference resolution",
89
- minimum=560,
90
- maximum=1120,
91
- step=56,
92
- value=728,
93
  )
94
- with gr.Row():
95
- checkpoint_dropdown = gr.Dropdown(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  label="Checkpoint",
97
  choices=["base", "large"],
98
  value="base"
99
  )
100
- submit_button = gr.Button("Submit")
101
- with gr.Column():
102
- output_image = gr.Image(
103
- label="Input Image",
104
- image_mode='RGB',
105
- type='pil',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  height=600
107
  )
108
- gr.Examples(
109
- fn=inference,
110
- examples=IMAGE_EXAMPLES,
111
- inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown],
112
- outputs=output_image
113
- )
114
-
115
- submit_button.click(
116
- inference,
117
- inputs=[input_image, confidence_slider, resolution_slider, checkpoint_dropdown],
118
- outputs=output_image
119
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  demo.launch(debug=False, show_error=True)
 
1
+ from typing import Union
2
+
3
  import gradio as gr
4
+ import numpy as np
5
  import supervision as sv
6
+ from PIL import Image
7
  from rfdetr import RFDETRBase, RFDETRLarge
8
+ from rfdetr.detr import RFDETR
9
  from rfdetr.util.coco_classes import COCO_CLASSES
10
 
11
+ from utils.image import calculate_resolution_wh
12
  from utils.video import create_directory
13
 
14
  MARKDOWN = """
15
  # RF-DETR 🔥
16
 
17
+ <div>
18
  <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-rf-detr-on-detection-dataset.ipynb">
19
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="colab" style="display:inline-block;">
20
  </a>
21
  <a href="https://blog.roboflow.com/rf-detr">
22
+ <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="roboflow" style="display:inline-block;">
23
  </a>
24
  <a href="https://github.com/roboflow/rf-detr">
25
+ <img src="https://badges.aleen42.com/src/github.svg" alt="roboflow" style="display:inline-block;">
26
  </a>
27
  </div>
28
 
 
46
  create_directory(directory_path=VIDEO_TARGET_DIRECTORY)
47
 
48
 
49
+ def detect_and_annotate(model: RFDETR, image: Union[Image.Image, np.ndarray], confidence: float):
 
 
50
  detections = model.predict(image, threshold=confidence)
51
 
52
+ resolution_wh = calculate_resolution_wh(image)
53
+ text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.2
54
+ thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)
55
 
56
  bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
57
  label_annotator = sv.LabelAnnotator(
 
72
  annotated_image = label_annotator.annotate(annotated_image, detections, labels)
73
  return annotated_image
74
 
75
+
76
+ def image_processing_inference(input_image: Image.Image, confidence: float, resolution: int, checkpoint: str):
77
+ model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
78
+ model = model_class(resolution=resolution)
79
+ return detect_and_annotate(model=model, image=input_image, confidence=confidence)
80
+
81
+
82
+ def video_processing_inference(input_video: str, confidence: float, resolution: int, checkpoint: str):
83
+ model_class = RFDETRBase if checkpoint == "base" else RFDETRLarge
84
+ model = model_class(resolution=resolution)
85
+ return input_video
86
+
87
  with gr.Blocks() as demo:
88
  gr.Markdown(MARKDOWN)
89
+ with gr.Tab("Image"):
90
+ with gr.Row():
91
+ image_processing_input_image = gr.Image(
92
+ label="Upload image",
93
  image_mode='RGB',
94
  type='pil',
95
  height=600
96
  )
97
+ image_processing_output_image = gr.Image(
98
+ label="Output image",
99
+ image_mode='RGB',
100
+ type='pil',
101
+ height=600
 
 
 
 
 
 
 
 
102
  )
103
+ with gr.Row():
104
+ with gr.Column():
105
+ image_processing_confidence_slider = gr.Slider(
106
+ label="Confidence",
107
+ minimum=0.0,
108
+ maximum=1.0,
109
+ step=0.05,
110
+ value=0.5,
111
+ )
112
+ image_processing_resolution_slider = gr.Slider(
113
+ label="Inference resolution",
114
+ minimum=560,
115
+ maximum=1120,
116
+ step=56,
117
+ value=728,
118
+ )
119
+ image_processing_checkpoint_dropdown = gr.Dropdown(
120
  label="Checkpoint",
121
  choices=["base", "large"],
122
  value="base"
123
  )
124
+ with gr.Column():
125
+ image_processing_submit_button = gr.Button("Submit", value="primary")
126
+
127
+ gr.Examples(
128
+ fn=image_processing_inference,
129
+ examples=IMAGE_EXAMPLES,
130
+ inputs=[
131
+ image_processing_input_image,
132
+ image_processing_confidence_slider,
133
+ image_processing_resolution_slider,
134
+ image_processing_checkpoint_dropdown
135
+ ],
136
+ outputs=image_processing_output_image,
137
+ cache_examples=True
138
+ )
139
+
140
+ image_processing_submit_button.click(
141
+ image_processing_inference,
142
+ inputs=[
143
+ image_processing_input_image,
144
+ image_processing_confidence_slider,
145
+ image_processing_resolution_slider,
146
+ image_processing_checkpoint_dropdown
147
+ ],
148
+ outputs=image_processing_output_image
149
+ )
150
+ with gr.Tab("Video"):
151
+ with gr.Row():
152
+ video_processing_input_video = gr.Video(
153
+ label='Upload video',
154
  height=600
155
  )
156
+ video_processing_output_video = gr.Video(
157
+ label='Output video',
158
+ height=600
159
+ )
160
+ with gr.Row():
161
+ with gr.Column():
162
+ video_processing_confidence_slider = gr.Slider(
163
+ label="Confidence",
164
+ minimum=0.0,
165
+ maximum=1.0,
166
+ step=0.05,
167
+ value=0.5,
168
+ )
169
+ video_processing_resolution_slider = gr.Slider(
170
+ label="Inference resolution",
171
+ minimum=560,
172
+ maximum=1120,
173
+ step=56,
174
+ value=728,
175
+ )
176
+ video_processing_checkpoint_dropdown = gr.Dropdown(
177
+ label="Checkpoint",
178
+ choices=["base", "large"],
179
+ value="base"
180
+ )
181
+ with gr.Column():
182
+ video_processing_submit_button = gr.Button("Submit", value="primary")
183
+
184
+ video_processing_submit_button.click(
185
+ video_processing_inference,
186
+ inputs=[
187
+ video_processing_input_video,
188
+ video_processing_confidence_slider,
189
+ video_processing_resolution_slider,
190
+ video_processing_checkpoint_dropdown
191
+ ],
192
+ outputs=video_processing_output_video
193
+ )
194
 
195
  demo.launch(debug=False, show_error=True)
utils/image.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+ from PIL import Image
3
+ import numpy as np
4
+
5
+ def calculate_resolution_wh(image: Union[Image.Image, np.ndarray]) -> Tuple[int, int]:
6
+
7
+ if isinstance(image, Image.Image):
8
+ return image.size
9
+ elif isinstance(image, np.ndarray):
10
+ if image.ndim >= 2:
11
+ h, w = image.shape[:2]
12
+ return w, h
13
+ else:
14
+ raise ValueError("Input numpy array image must have at least 2 dimensions (height, width).")
15
+ else:
16
+ raise TypeError("Input image must be a Pillow Image or a numpy array.")