vk
commited on
Commit
·
b65319d
1
Parent(s):
3b86909
square input handling
Browse files- yolox_onnx.py +47 -54
yolox_onnx.py
CHANGED
@@ -4,7 +4,6 @@ import numpy as np
|
|
4 |
import onnxruntime
|
5 |
|
6 |
|
7 |
-
|
8 |
class YOLOX_ONNX:
|
9 |
|
10 |
def __init__(self, model_path):
|
@@ -13,38 +12,39 @@ class YOLOX_ONNX:
|
|
13 |
self.image_size = self.model.get_inputs()[0].shape[-2:]
|
14 |
# print(self.model.get_outputs()[0].name)
|
15 |
# print(self.image_size)
|
16 |
-
self.labels_map=['pedestrian']
|
17 |
-
self.pad_to_square_flag=False
|
18 |
|
19 |
-
def pad_to_square(self,image):
|
20 |
height, width = image.shape[:2]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
size = max(height, width)
|
22 |
delta_w = size - width
|
23 |
delta_h = size - height
|
24 |
self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
|
25 |
self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
|
26 |
-
print(self.top, self.bottom,self.left, self.right)
|
27 |
-
color = [114,114,114] # padding
|
28 |
-
|
29 |
-
return padded_image
|
30 |
-
|
31 |
-
|
32 |
|
33 |
def __preprocess_image(self, img, swap=(2, 0, 1)):
|
34 |
|
35 |
-
|
36 |
-
self.pad_to_square_flag=True
|
37 |
-
img = self.pad_to_square(img) # training aspect ratio is 1:1
|
38 |
|
39 |
padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
|
40 |
r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
|
41 |
-
resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)),
|
|
|
42 |
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
43 |
padded_img = padded_img.transpose(swap)
|
44 |
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
|
45 |
return padded_img, r
|
46 |
|
47 |
-
|
48 |
@staticmethod
|
49 |
def __new_nms(boxes, scores, iou_thresh):
|
50 |
x1 = boxes[:, 0]
|
@@ -70,7 +70,6 @@ class YOLOX_ONNX:
|
|
70 |
|
71 |
return keep
|
72 |
|
73 |
-
|
74 |
def __parse_output_data(self, outputs):
|
75 |
grids = []
|
76 |
expanded_strides = []
|
@@ -96,7 +95,6 @@ class YOLOX_ONNX:
|
|
96 |
scores = np.amax(classes, axis=1)
|
97 |
classes = np.argmax(classes, axis=1)
|
98 |
|
99 |
-
|
100 |
valid_score_mask = scores > score_thresh
|
101 |
if valid_score_mask.sum() == 0:
|
102 |
return np.array([]), np.array([]), np.array([])
|
@@ -104,40 +102,34 @@ class YOLOX_ONNX:
|
|
104 |
valid_boxes = boxes[valid_score_mask]
|
105 |
valid_classes = classes[valid_score_mask]
|
106 |
|
107 |
-
|
108 |
valid_boxes_xyxy = np.ones_like(valid_boxes)
|
109 |
-
valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2]/2.
|
110 |
-
valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3]/2.
|
111 |
-
valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2]/2.
|
112 |
-
valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3]/2.
|
113 |
valid_boxes_xyxy /= resize_ratio
|
114 |
|
115 |
indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
|
116 |
valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
|
117 |
valid_scores = valid_scores[indices]
|
118 |
valid_classes = valid_classes[indices].astype('int')
|
119 |
-
|
120 |
-
#valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
|
121 |
-
|
122 |
-
if self.pad_to_square_flag:
|
123 |
-
|
124 |
-
for i,offset in enumerate([self.left,self.top,self.right,self.bottom]):
|
125 |
-
valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,i] - offset #remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
|
126 |
|
127 |
-
|
128 |
|
|
|
|
|
|
|
129 |
|
130 |
return valid_boxes_xyxy, valid_scores, valid_classes
|
131 |
|
132 |
-
def draw_boxes(self,img, boxes, scores=None, classes=None, labels=None):
|
133 |
-
|
134 |
|
135 |
for i in range(boxes.shape[0]):
|
136 |
cv2.rectangle(img,
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
|
142 |
### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
|
143 |
# text_label = ''
|
@@ -149,7 +141,7 @@ class YOLOX_ONNX:
|
|
149 |
# elif scores is not None:
|
150 |
# text_label = str("%.2f" % round(scores[i],2))
|
151 |
|
152 |
-
#w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
|
153 |
# cv2.putText(img,
|
154 |
# text_label,
|
155 |
# (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
|
@@ -162,30 +154,31 @@ class YOLOX_ONNX:
|
|
162 |
|
163 |
def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
|
164 |
|
165 |
-
h,w = image.shape[:2]
|
166 |
-
origin_img=np.copy(image)
|
167 |
model_input = np.copy(image)
|
168 |
model_input, resize_ratio = self.__preprocess_image(model_input)
|
169 |
-
#print(model_input.shape)
|
170 |
-
#print('input mean:', np.mean(model_input))
|
171 |
-
start_time=time()
|
172 |
prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
|
173 |
-
#print(self.model.get_inputs()[0].name)
|
174 |
-
#print('output mean:',np.mean(prediction))
|
175 |
prediction = self.__parse_output_data(prediction[0])
|
176 |
-
d_boxes, d_scores, d_classes=self.__decode_prediction(prediction, (h,w), resize_ratio, score_thresh,
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
return d_boxes, d_scores, d_classes
|
181 |
|
|
|
182 |
|
183 |
|
184 |
-
# if __name__=="__main__":
|
185 |
# from matplotlib import pyplot as plt
|
186 |
-
#
|
187 |
-
#
|
|
|
188 |
# yolox_nano_onnx.predict(cv2.imread(path))
|
189 |
# plt.title('Predicted')
|
190 |
-
# plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img,cv2.COLOR_BGR2RGB))
|
191 |
-
# plt.show()
|
|
|
4 |
import onnxruntime
|
5 |
|
6 |
|
|
|
7 |
class YOLOX_ONNX:
|
8 |
|
9 |
def __init__(self, model_path):
|
|
|
12 |
self.image_size = self.model.get_inputs()[0].shape[-2:]
|
13 |
# print(self.model.get_outputs()[0].name)
|
14 |
# print(self.image_size)
|
15 |
+
self.labels_map = ['pedestrian']
|
|
|
16 |
|
17 |
+
def pad_to_square(self, image):
|
18 |
height, width = image.shape[:2]
|
19 |
+
|
20 |
+
if (width / height) < 1.2:
|
21 |
+
# print('Square Image')
|
22 |
+
self.top, self.bottom = 0, 0
|
23 |
+
self.left, self.right = 0, 0
|
24 |
+
return image
|
25 |
+
|
26 |
size = max(height, width)
|
27 |
delta_w = size - width
|
28 |
delta_h = size - height
|
29 |
self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
|
30 |
self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
|
31 |
+
print(self.top, self.bottom, self.left, self.right)
|
32 |
+
color = [114, 114, 114] # padding
|
33 |
+
return cv2.copyMakeBorder(image, self.top, self.bottom, self.left, self.right, cv2.BORDER_CONSTANT, value=color)
|
|
|
|
|
|
|
34 |
|
35 |
def __preprocess_image(self, img, swap=(2, 0, 1)):
|
36 |
|
37 |
+
img = self.pad_to_square(img) # training aspect ratio is 1:1
|
|
|
|
|
38 |
|
39 |
padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
|
40 |
r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
|
41 |
+
resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)),
|
42 |
+
interpolation=cv2.INTER_LINEAR).astype(np.uint8)
|
43 |
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
44 |
padded_img = padded_img.transpose(swap)
|
45 |
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
|
46 |
return padded_img, r
|
47 |
|
|
|
48 |
@staticmethod
|
49 |
def __new_nms(boxes, scores, iou_thresh):
|
50 |
x1 = boxes[:, 0]
|
|
|
70 |
|
71 |
return keep
|
72 |
|
|
|
73 |
def __parse_output_data(self, outputs):
|
74 |
grids = []
|
75 |
expanded_strides = []
|
|
|
95 |
scores = np.amax(classes, axis=1)
|
96 |
classes = np.argmax(classes, axis=1)
|
97 |
|
|
|
98 |
valid_score_mask = scores > score_thresh
|
99 |
if valid_score_mask.sum() == 0:
|
100 |
return np.array([]), np.array([]), np.array([])
|
|
|
102 |
valid_boxes = boxes[valid_score_mask]
|
103 |
valid_classes = classes[valid_score_mask]
|
104 |
|
|
|
105 |
valid_boxes_xyxy = np.ones_like(valid_boxes)
|
106 |
+
valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2] / 2.
|
107 |
+
valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3] / 2.
|
108 |
+
valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2] / 2.
|
109 |
+
valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3] / 2.
|
110 |
valid_boxes_xyxy /= resize_ratio
|
111 |
|
112 |
indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
|
113 |
valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
|
114 |
valid_scores = valid_scores[indices]
|
115 |
valid_classes = valid_classes[indices].astype('int')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
# valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
|
118 |
|
119 |
+
for i, offset in enumerate([self.left, self.top, self.right, self.bottom]):
|
120 |
+
valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,
|
121 |
+
i] - offset # remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
|
122 |
|
123 |
return valid_boxes_xyxy, valid_scores, valid_classes
|
124 |
|
125 |
+
def draw_boxes(self, img, boxes, scores=None, classes=None, labels=None):
|
|
|
126 |
|
127 |
for i in range(boxes.shape[0]):
|
128 |
cv2.rectangle(img,
|
129 |
+
(int(boxes[i, 0]), int(boxes[i, 1])),
|
130 |
+
(int(boxes[i, 2]), int(boxes[i, 3])),
|
131 |
+
(0, 128, 0),
|
132 |
+
int(0.005 * img.shape[1]))
|
133 |
|
134 |
### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
|
135 |
# text_label = ''
|
|
|
141 |
# elif scores is not None:
|
142 |
# text_label = str("%.2f" % round(scores[i],2))
|
143 |
|
144 |
+
# w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
|
145 |
# cv2.putText(img,
|
146 |
# text_label,
|
147 |
# (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
|
|
|
154 |
|
155 |
def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
|
156 |
|
157 |
+
h, w = image.shape[:2]
|
158 |
+
origin_img = np.copy(image)
|
159 |
model_input = np.copy(image)
|
160 |
model_input, resize_ratio = self.__preprocess_image(model_input)
|
161 |
+
# print(model_input.shape)
|
162 |
+
# print('input mean:', np.mean(model_input))
|
163 |
+
start_time = time()
|
164 |
prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
|
165 |
+
# print(self.model.get_inputs()[0].name)
|
166 |
+
# print('output mean:',np.mean(prediction))
|
167 |
prediction = self.__parse_output_data(prediction[0])
|
168 |
+
d_boxes, d_scores, d_classes = self.__decode_prediction(prediction, (h, w), resize_ratio, score_thresh,
|
169 |
+
iou_thresh)
|
170 |
+
self.output_img = self.draw_boxes(origin_img, d_boxes, None, d_classes, self.labels_map)
|
171 |
+
print('elapsed time:', time() - start_time)
|
|
|
172 |
|
173 |
+
return d_boxes, d_scores, d_classes
|
174 |
|
175 |
|
176 |
+
# if __name__ == "__main__":
|
177 |
# from matplotlib import pyplot as plt
|
178 |
+
#
|
179 |
+
# path = 'test-images/test1.jpg'
|
180 |
+
# yolox_nano_onnx = YOLOX_ONNX('models/pedestrian-detection-best95.onnx')
|
181 |
# yolox_nano_onnx.predict(cv2.imread(path))
|
182 |
# plt.title('Predicted')
|
183 |
+
# plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img, cv2.COLOR_BGR2RGB))
|
184 |
+
# plt.show()
|