Spaces:

vk888
/

yolox-pedestrian-detection-all-weather-augmentation

Sleeping

App Files Files Community

vk commited on Mar 13

Commit

b65319d

1 Parent(s): 3b86909

square input handling

Browse files

Files changed (1) hide show

yolox_onnx.py +47 -54

yolox_onnx.py CHANGED Viewed

@@ -4,7 +4,6 @@ import numpy as np
 import onnxruntime
 class YOLOX_ONNX:
     def __init__(self, model_path):
@@ -13,38 +12,39 @@ class YOLOX_ONNX:
         self.image_size = self.model.get_inputs()[0].shape[-2:]
         # print(self.model.get_outputs()[0].name)
         # print(self.image_size)
-        self.labels_map=['pedestrian']
-        self.pad_to_square_flag=False
-    def pad_to_square(self,image):
         height, width = image.shape[:2]
         size = max(height, width)
         delta_w = size - width
         delta_h = size - height
         self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
         self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
-        print(self.top, self.bottom,self.left, self.right)
-        color = [114,114,114]  # padding
-        padded_image = cv2.copyMakeBorder(image, self.top, self.bottom, self.left, self.right, cv2.BORDER_CONSTANT, value=color)
-        return padded_image
     def __preprocess_image(self, img, swap=(2, 0, 1)):
-        if (img.shape[1]/img.shape[0]) > 1.2:
-            self.pad_to_square_flag=True
-            img = self.pad_to_square(img)  # training aspect ratio is 1:1
         padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
         r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
-        resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)), interpolation=cv2.INTER_LINEAR).astype(np.uint8)
         padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
         padded_img = padded_img.transpose(swap)
         padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
         return padded_img, r
     @staticmethod
     def __new_nms(boxes, scores, iou_thresh):
         x1 = boxes[:, 0]
@@ -70,7 +70,6 @@ class YOLOX_ONNX:
         return keep
     def __parse_output_data(self, outputs):
         grids = []
         expanded_strides = []
@@ -96,7 +95,6 @@ class YOLOX_ONNX:
         scores = np.amax(classes, axis=1)
         classes = np.argmax(classes, axis=1)
         valid_score_mask = scores > score_thresh
         if valid_score_mask.sum() == 0:
             return np.array([]), np.array([]), np.array([])
@@ -104,40 +102,34 @@ class YOLOX_ONNX:
         valid_boxes = boxes[valid_score_mask]
         valid_classes = classes[valid_score_mask]
         valid_boxes_xyxy = np.ones_like(valid_boxes)
-        valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2]/2.
-        valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3]/2.
-        valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2]/2.
-        valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3]/2.
         valid_boxes_xyxy /= resize_ratio
         indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
         valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
         valid_scores = valid_scores[indices]
         valid_classes = valid_classes[indices].astype('int')
-        #valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
-        if self.pad_to_square_flag:
-            for i,offset in enumerate([self.left,self.top,self.right,self.bottom]):
-                valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,i] - offset #remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
-            self.pad_to_square_flag=False
         return valid_boxes_xyxy, valid_scores, valid_classes
-    def draw_boxes(self,img, boxes, scores=None, classes=None, labels=None):
         for i in range(boxes.shape[0]):
             cv2.rectangle(img,
-                        (int(boxes[i,0]), int(boxes[i,1])),
-                        (int(boxes[i,2]), int(boxes[i,3])),
-                        (0, 128, 0),
-                        int(0.005*img.shape[1]))
             ### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
             # text_label = ''
@@ -149,7 +141,7 @@ class YOLOX_ONNX:
             # elif scores is not None:
             #     text_label = str("%.2f" % round(scores[i],2))
-            #w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
             # cv2.putText(img,
             #             text_label,
             #             (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
@@ -162,30 +154,31 @@ class YOLOX_ONNX:
     def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
-        h,w = image.shape[:2]
-        origin_img=np.copy(image)
         model_input = np.copy(image)
         model_input, resize_ratio = self.__preprocess_image(model_input)
-        #print(model_input.shape)
-        #print('input mean:', np.mean(model_input))
-        start_time=time()
         prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
-        #print(self.model.get_inputs()[0].name)
-        #print('output mean:',np.mean(prediction))
         prediction = self.__parse_output_data(prediction[0])
-        d_boxes, d_scores, d_classes=self.__decode_prediction(prediction, (h,w), resize_ratio, score_thresh, iou_thresh)
-        self.output_img = self.draw_boxes(origin_img, d_boxes,None, d_classes, self.labels_map)
-        print('elapsed time:',time()-start_time)
-        return d_boxes, d_scores, d_classes
-# if __name__=="__main__":
 #     from matplotlib import pyplot as plt
-#     path='test-images/test1.jpg'
-#     yolox_nano_onnx=YOLOX_ONNX('models/pedestrian-detection-best95.onnx')
 #     yolox_nano_onnx.predict(cv2.imread(path))
 #     plt.title('Predicted')
-#     plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img,cv2.COLOR_BGR2RGB))
-#     plt.show()

 import onnxruntime
 class YOLOX_ONNX:
     def __init__(self, model_path):
         self.image_size = self.model.get_inputs()[0].shape[-2:]
         # print(self.model.get_outputs()[0].name)
         # print(self.image_size)
+        self.labels_map = ['pedestrian']
+    def pad_to_square(self, image):
         height, width = image.shape[:2]
+        if (width / height) < 1.2:
+            # print('Square Image')
+            self.top, self.bottom = 0, 0
+            self.left, self.right = 0, 0
+            return image
         size = max(height, width)
         delta_w = size - width
         delta_h = size - height
         self.top, self.bottom = delta_h // 2, delta_h - (delta_h // 2)
         self.left, self.right = delta_w // 2, delta_w - (delta_w // 2)
+        print(self.top, self.bottom, self.left, self.right)
+        color = [114, 114, 114]  # padding
+        return cv2.copyMakeBorder(image, self.top, self.bottom, self.left, self.right, cv2.BORDER_CONSTANT, value=color)
     def __preprocess_image(self, img, swap=(2, 0, 1)):
+        img = self.pad_to_square(img)  # training aspect ratio is 1:1
         padded_img = np.ones((self.image_size[0], self.image_size[1], 3), dtype=np.uint8) * 114
         r = min(self.image_size[0] / img.shape[0], self.image_size[1] / img.shape[1])
+        resized_img = cv2.resize(img, (int(img.shape[1] * r), int(img.shape[0] * r)),
+                                 interpolation=cv2.INTER_LINEAR).astype(np.uint8)
         padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
         padded_img = padded_img.transpose(swap)
         padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
         return padded_img, r
     @staticmethod
     def __new_nms(boxes, scores, iou_thresh):
         x1 = boxes[:, 0]
         return keep
     def __parse_output_data(self, outputs):
         grids = []
         expanded_strides = []
         scores = np.amax(classes, axis=1)
         classes = np.argmax(classes, axis=1)
         valid_score_mask = scores > score_thresh
         if valid_score_mask.sum() == 0:
             return np.array([]), np.array([]), np.array([])
         valid_boxes = boxes[valid_score_mask]
         valid_classes = classes[valid_score_mask]
         valid_boxes_xyxy = np.ones_like(valid_boxes)
+        valid_boxes_xyxy[:, 0] = valid_boxes[:, 0] - valid_boxes[:, 2] / 2.
+        valid_boxes_xyxy[:, 1] = valid_boxes[:, 1] - valid_boxes[:, 3] / 2.
+        valid_boxes_xyxy[:, 2] = valid_boxes[:, 0] + valid_boxes[:, 2] / 2.
+        valid_boxes_xyxy[:, 3] = valid_boxes[:, 1] + valid_boxes[:, 3] / 2.
         valid_boxes_xyxy /= resize_ratio
         indices = self.__new_nms(valid_boxes_xyxy, valid_scores, iou_thresh)
         valid_boxes_xyxy = valid_boxes_xyxy[indices, :]
         valid_scores = valid_scores[indices]
         valid_classes = valid_classes[indices].astype('int')
+        # valid_boxes_xyxy, valid_scores, valid_classes = self.__remove_duplicates(valid_boxes_xyxy, valid_scores, valid_classes)
+        for i, offset in enumerate([self.left, self.top, self.right, self.bottom]):
+            valid_boxes_xyxy[:, i] = valid_boxes_xyxy[:,
+                                     i] - offset  # remove pad offsets from boundingbox(xmin,ymin,xmax,ymax)
         return valid_boxes_xyxy, valid_scores, valid_classes
+    def draw_boxes(self, img, boxes, scores=None, classes=None, labels=None):
         for i in range(boxes.shape[0]):
             cv2.rectangle(img,
+                          (int(boxes[i, 0]), int(boxes[i, 1])),
+                          (int(boxes[i, 2]), int(boxes[i, 3])),
+                          (0, 128, 0),
+                          int(0.005 * img.shape[1]))
             ### not drawing classes since num_classes is 1(pedestrian) and text not greatly visible in gradio UI
             # text_label = ''
             # elif scores is not None:
             #     text_label = str("%.2f" % round(scores[i],2))
+            # w, h = cv2.getTextSize(text_label, 0, fontScale=0.5, thickness=1)[0]
             # cv2.putText(img,
             #             text_label,
             #             (int(boxes[i,0]) if int(boxes[i,0])+w<img.shape[1] else img.shape[1]-w, int(boxes[i,1])-2 if (int(boxes[i,1])-h>=3) else int(boxes[i,1])+h+2),
     def predict(self, image, score_thresh=0.4, iou_thresh=0.4):
+        h, w = image.shape[:2]
+        origin_img = np.copy(image)
         model_input = np.copy(image)
         model_input, resize_ratio = self.__preprocess_image(model_input)
+        # print(model_input.shape)
+        # print('input mean:', np.mean(model_input))
+        start_time = time()
         prediction = self.model.run(None, {self.model.get_inputs()[0].name: model_input[None, :, :, :]})
+        # print(self.model.get_inputs()[0].name)
+        # print('output mean:',np.mean(prediction))
         prediction = self.__parse_output_data(prediction[0])
+        d_boxes, d_scores, d_classes = self.__decode_prediction(prediction, (h, w), resize_ratio, score_thresh,
+                                                                iou_thresh)
+        self.output_img = self.draw_boxes(origin_img, d_boxes, None, d_classes, self.labels_map)
+        print('elapsed time:', time() - start_time)
+        return d_boxes, d_scores, d_classes
+# if __name__ == "__main__":
 #     from matplotlib import pyplot as plt
+#
+#     path = 'test-images/test1.jpg'
+#     yolox_nano_onnx = YOLOX_ONNX('models/pedestrian-detection-best95.onnx')
 #     yolox_nano_onnx.predict(cv2.imread(path))
 #     plt.title('Predicted')
+#     plt.imshow(cv2.cvtColor(yolox_nano_onnx.output_img, cv2.COLOR_BGR2RGB))
+#     plt.show()