vk commited on
Commit
b65319d
·
1 Parent(s): 3b86909

square input handling

Browse files
Files changed (1) hide show
  1. yolox_onnx.py +47 -54
yolox_onnx.py CHANGED
@@ -4,7 +4,6 @@ import numpy as np
4
  import onnxruntime
5
 
6
 
7
-
8
  class YOLOX_ONNX:
9
 
10
  def __init__(self, model_path):
@@ -13,38 +12,39 @@ class YOLOX_ONNX:
13
  self.image_size = self.model.get_inputs()[0].shape[-2:]
14
  # print(self.model.get_outputs()[0].name)
15
  # print(self.image_size)
16
- self.labels_map=['pedestrian']
17
- self.pad_to_square_flag=False
18
 
19
- def pad_to_square(self,image):
20
  height, width = image.shape[:2]
 
 
 
 
 
 
 
21
  size = max(height, width)
22
  delta_w = size - width
23
  delta_h = size - height
24
  self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
25
  self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
26
- print(self.top, self.bottom,self.left, self.right)
27
- color = [114,114,114] # padding
28
- padded_image = cv2.copyMakeBorder(image, self.top, self.bottom, self.left, self.right, cv2.BORDER_CONSTANT, value=color)
29
- return padded_image
30
-
31
-
32
 
33
  def __preprocess_image(self, img, swap=(2, 0, 1)):
34
 
35
- if (img.shape[1]/img.shape[0]) > 1.2:
36
- self.pad_to_square_flag=True
37
- img = self.pad_to_square(img) # training aspect ratio is 1:1
38
 
39
  padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
40
  r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
41
- resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR).astype(np.uint8)
 
42
  padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
43
  padded_img = padded_img.transpose(swap)
44
  padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
45
  return padded_img, r
46
 
47
-
48
  @staticmethod
49
  def __new_nms(boxes, scores, iou_thresh):
50
  x1 = boxes[:, 0]
@@ -70,7 +70,6 @@ class YOLOX_ONNX:
70
 
71
  return keep
72
 
73
-
74
  def __parse_output_data(self, outputs):
75
  grids = []
76
  expanded_strides = []
@@ -96,7 +95,6 @@ class YOLOX_ONNX:
96
  scores = np.amax(classes, axis=1)
97
  classes = np.argmax(classes, axis=1)
98
 
99
-
100
  valid_score_mask = scores > score_thresh
101
  if valid_score_mask.sum() == 0:
102
  return np.array([]), np.array([]), np.array([])
@@ -104,40 +102,34 @@ class YOLOX_ONNX:
104
  valid_boxes = boxes[valid_score_mask]
105
  valid_classes = classes[valid_score_mask]
106
 
107
-
108
  valid_boxes_xyxy = np.ones_like(valid_boxes)
109
- valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2]/2.
110
- valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3]/2.
111
- valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2]/2.
112
- valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3]/2.
113
  valid_boxes_xyxy /= resize_ratio
114
 
115
  indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
116
  valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
117
  valid_scores = valid_scores[indices]
118
  valid_classes = valid_classes[indices].astype('int')
119
-
120
- #valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
121
-
122
- if self.pad_to_square_flag:
123
-
124
- for i,offset in enumerate([self.left,self.top,self.right,self.bottom]):
125
- valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,i] - offset #remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
126
 
127
- self.pad_to_square_flag=False
128
 
 
 
 
129
 
130
  return valid_boxes_xyxy, valid_scores, valid_classes
131
 
132
- def draw_boxes(self,img, boxes, scores=None, classes=None, labels=None):
133
-
134
 
135
  for i in range(boxes.shape[0]):
136
  cv2.rectangle(img,
137
- (int(boxes[i,0]), int(boxes[i,1])),
138
- (int(boxes[i,2]), int(boxes[i,3])),
139
- (0, 128, 0),
140
- int(0.005*img.shape[1]))
141
 
142
  ### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
143
  # text_label = ''
@@ -149,7 +141,7 @@ class YOLOX_ONNX:
149
  # elif scores is not None:
150
  # text_label = str("%.2f" % round(scores[i],2))
151
 
152
- #w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
153
  # cv2.putText(img,
154
  # text_label,
155
  # (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
@@ -162,30 +154,31 @@ class YOLOX_ONNX:
162
 
163
  def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
164
 
165
- h,w = image.shape[:2]
166
- origin_img=np.copy(image)
167
  model_input = np.copy(image)
168
  model_input, resize_ratio = self.__preprocess_image(model_input)
169
- #print(model_input.shape)
170
- #print('input mean:', np.mean(model_input))
171
- start_time=time()
172
  prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
173
- #print(self.model.get_inputs()[0].name)
174
- #print('output mean:',np.mean(prediction))
175
  prediction = self.__parse_output_data(prediction[0])
176
- d_boxes, d_scores, d_classes=self.__decode_prediction(prediction, (h,w), resize_ratio, score_thresh, iou_thresh)
177
- self.output_img = self.draw_boxes(origin_img, d_boxes,None, d_classes, self.labels_map)
178
- print('elapsed time:',time()-start_time)
179
-
180
- return d_boxes, d_scores, d_classes
181
 
 
182
 
183
 
184
- # if __name__=="__main__":
185
  # from matplotlib import pyplot as plt
186
- # path='test-images/test1.jpg'
187
- # yolox_nano_onnx=YOLOX_ONNX('models/pedestrian-detection-best95.onnx')
 
188
  # yolox_nano_onnx.predict(cv2.imread(path))
189
  # plt.title('Predicted')
190
- # plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img,cv2.COLOR_BGR2RGB))
191
- # plt.show()
 
4
  import onnxruntime
5
 
6
 
 
7
  class YOLOX_ONNX:
8
 
9
  def __init__(self, model_path):
 
12
  self.image_size = self.model.get_inputs()[0].shape[-2:]
13
  # print(self.model.get_outputs()[0].name)
14
  # print(self.image_size)
15
+ self.labels_map = ['pedestrian']
 
16
 
17
+ def pad_to_square(self, image):
18
  height, width = image.shape[:2]
19
+
20
+ if (width / height) < 1.2:
21
+ # print('Square Image')
22
+ self.top, self.bottom = 0, 0
23
+ self.left, self.right = 0, 0
24
+ return image
25
+
26
  size = max(height, width)
27
  delta_w = size - width
28
  delta_h = size - height
29
  self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
30
  self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
31
+ print(self.top, self.bottom, self.left, self.right)
32
+ color = [114, 114, 114] # padding
33
+ return cv2.copyMakeBorder(image, self.top, self.bottom, self.left, self.right, cv2.BORDER_CONSTANT, value=color)
 
 
 
34
 
35
  def __preprocess_image(self, img, swap=(2, 0, 1)):
36
 
37
+ img = self.pad_to_square(img) # training aspect ratio is 1:1
 
 
38
 
39
  padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
40
  r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
41
+ resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)),
42
+ interpolation=cv2.INTER_LINEAR).astype(np.uint8)
43
  padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
44
  padded_img = padded_img.transpose(swap)
45
  padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
46
  return padded_img, r
47
 
 
48
  @staticmethod
49
  def __new_nms(boxes, scores, iou_thresh):
50
  x1 = boxes[:, 0]
 
70
 
71
  return keep
72
 
 
73
  def __parse_output_data(self, outputs):
74
  grids = []
75
  expanded_strides = []
 
95
  scores = np.amax(classes, axis=1)
96
  classes = np.argmax(classes, axis=1)
97
 
 
98
  valid_score_mask = scores > score_thresh
99
  if valid_score_mask.sum() == 0:
100
  return np.array([]), np.array([]), np.array([])
 
102
  valid_boxes = boxes[valid_score_mask]
103
  valid_classes = classes[valid_score_mask]
104
 
 
105
  valid_boxes_xyxy = np.ones_like(valid_boxes)
106
+ valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2] / 2.
107
+ valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3] / 2.
108
+ valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2] / 2.
109
+ valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3] / 2.
110
  valid_boxes_xyxy /= resize_ratio
111
 
112
  indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
113
  valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
114
  valid_scores = valid_scores[indices]
115
  valid_classes = valid_classes[indices].astype('int')
 
 
 
 
 
 
 
116
 
117
+ # valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
118
 
119
+ for i, offset in enumerate([self.left, self.top, self.right, self.bottom]):
120
+ valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,
121
+ i] - offset # remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
122
 
123
  return valid_boxes_xyxy, valid_scores, valid_classes
124
 
125
+ def draw_boxes(self, img, boxes, scores=None, classes=None, labels=None):
 
126
 
127
  for i in range(boxes.shape[0]):
128
  cv2.rectangle(img,
129
+ (int(boxes[i, 0]), int(boxes[i, 1])),
130
+ (int(boxes[i, 2]), int(boxes[i, 3])),
131
+ (0, 128, 0),
132
+ int(0.005 * img.shape[1]))
133
 
134
  ### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
135
  # text_label = ''
 
141
  # elif scores is not None:
142
  # text_label = str("%.2f" % round(scores[i],2))
143
 
144
+ # w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
145
  # cv2.putText(img,
146
  # text_label,
147
  # (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
 
154
 
155
  def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
156
 
157
+ h, w = image.shape[:2]
158
+ origin_img = np.copy(image)
159
  model_input = np.copy(image)
160
  model_input, resize_ratio = self.__preprocess_image(model_input)
161
+ # print(model_input.shape)
162
+ # print('input mean:', np.mean(model_input))
163
+ start_time = time()
164
  prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
165
+ # print(self.model.get_inputs()[0].name)
166
+ # print('output mean:',np.mean(prediction))
167
  prediction = self.__parse_output_data(prediction[0])
168
+ d_boxes, d_scores, d_classes = self.__decode_prediction(prediction, (h, w), resize_ratio, score_thresh,
169
+ iou_thresh)
170
+ self.output_img = self.draw_boxes(origin_img, d_boxes, None, d_classes, self.labels_map)
171
+ print('elapsed time:', time() - start_time)
 
172
 
173
+ return d_boxes, d_scores, d_classes
174
 
175
 
176
+ # if __name__ == "__main__":
177
  # from matplotlib import pyplot as plt
178
+ #
179
+ # path = 'test-images/test1.jpg'
180
+ # yolox_nano_onnx = YOLOX_ONNX('models/pedestrian-detection-best95.onnx')
181
  # yolox_nano_onnx.predict(cv2.imread(path))
182
  # plt.title('Predicted')
183
+ # plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img, cv2.COLOR_BGR2RGB))
184
+ # plt.show()