diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1af4edac402761c5ac21515041b8bfa404862b31 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.pth.tar filter=lfs diff=lfs merge=lfs -text +*.json filter=lfs diff=lfs merge=lfs -text +*.gif filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5fddcf49ecc0acfd02d723bb484541b782c42305 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +**/__pycache__/ +**/.ipynb_checkpoints/ +**/data/ +**/datasets/ +**/development/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..2562874f7172e6f9f1bb49137bfc004c6e9bf857 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +Demo software - Experiments on person tracking with quantized networks by HyperbeeAI +Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai diff --git a/README.md b/README.md index 5c0d499d78b811d53f0056c53cd1d031d55cdcad..97fb8d614d836c6209062f1bfb66a75c2c425fb4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ ---- -license: other ---- +# Experiments on person tracking with quantized networks by HyperbeeAI + +Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai + +This repository contains our experiments for quantized neural networks for the person tracking task, evaluated over the WIDER pedestrian surveillance dataset. + +See efficientdet_comparison/ for the comparison of our trained models with efficientdet + +![demo](./experiments/demo.gif) diff --git a/dataloader.py b/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..1fe848b4529d7838cb43b18c6873cd3da2bcbfe0 --- /dev/null +++ b/dataloader.py @@ -0,0 +1,513 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import os, sys, random, torch, torchvision +from torchvision import transforms +from torchvision.datasets.vision import VisionDataset +import torchvision.ops as ops +import torch.utils.data +import numpy as np +import pandas as pd +import copy +from PIL import Image +import os.path +import time, json +from typing import Any, Callable, Optional, Tuple, List +from typing import Callable + + +class input_fxpt_normalize: + def __init__(self, act_8b_mode): + self.act_8b_mode = act_8b_mode + + def __call__(self, img): + if(self.act_8b_mode): + return img.sub(0.5).mul(256.).round().clamp(min=-128, max=127) + return img.sub(0.5).mul(256.).round().clamp(min=-128, max=127).div(128.) + + +### Emre Can: Our COCO Dataloder for training classes at specific ratio in every batch. +def class_lookup(cls): + c = list(cls.__bases__) + for base in c: + c.extend(class_lookup(base)) + return c + +# ref: https://pytorch.org/vision/main/_modules/torchvision/datasets/coco.html +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + Args: + root (string): Root directory where images are downloaded to. + + annFile (string): Path to json annotation file. + + scaleImgforCrop (int, optional): Img and target BBs are scaled with + constant aspect ratio st: + if image width, image height > scaleImgforCrop image is shrinked + until width or height becomes equal to scaleImgforCrop + + if image width, image height < scaleImgforCrop image is expanded + until width or height becomes equal to scaleImgforCrop + + else no scaling + fit_full_img: If it is set to true, image is scaled t fully fit in the window specified by "scaleImgforCrop x scaleImgforCrop" + transform (callable, optional): A function/transform that takes in an + PIL image and returns a transformed version. E.g, ``transforms.ToTensor`` + + target_transform (callable, optional): A function/transform that takes in + the target and transforms it. + transforms (callable, optional): A function/transform that takes input + sample and its target as entry and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + scaleImgforCrop: int= None, + fit_full_img = False, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None + ): + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.annFilePath = os.path.join('.',annFile) + self.catPersonId = self.coco.getCatIds(catNms=['person'])[0] + self.scaleImgforCrop = scaleImgforCrop + self.fit_full_img = fit_full_img + + + def _load_image(self, id: int) -> Image.Image: + path = self.coco.loadImgs(id)[0]["file_name"] + return Image.open(os.path.join(self.root, path)).convert("RGB") + + def _load_target(self, id) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id, iscrowd=False)) + + def __getitem__(self, index: int) -> Tuple[Any, Any, Any]: + + id = self.ids[index] + imgID = id + + try: + image = self._load_image(id) + except: + print(f'********Unable to load image with id: {imgID}********') + print('Please check if image is corrupted, and remove it from annotations if necessary.') + + + target = copy.deepcopy(self._load_target(id)) # deepcopy target list beforecentercrop manip, to be abe to work with same + # dateset without reloading it + + image_width = image.size[0] + image_height = image.size[1] + + + # If necesary rescale the image and BBs near the size of planned center crop as much as possible + scale = self._calcPrescale(image_width=image_width, image_height=image_height) + image = self._prescaleImage(image, scale) + + for i, t in enumerate(target): + BB = t['bbox'].copy() + scaledBB = self._prescaleBB(BB,scale) + target[i]['bbox'] = scaledBB + + + + # Image width height after prescaling + image_width = image.size[0] + image_height = image.size[1] + + # Check if center crop applied + centerCropped = False + if self.transforms is not None: + image, target = self.transforms(image, target) + + # If center crop applied, transform BBs as well + for t in self.transforms.transform.transforms: + if (type(t) == torchvision.transforms.transforms.CenterCrop): + centerCropped = True + + + + x_scale = image.size(2) / image_width + y_scale = image.size(1) / image_height + + bbox_arr = [] + + for idx,ann in enumerate(target): + if ann['category_id'] == self.catPersonId: + crop_size = image.shape[1] + + if centerCropped: + bbox = ann['bbox'].copy() + croppedBB = self.cropBBox(bbox, crop_size,image_height,image_width) + else: + croppedBB = torch.tensor(ann['bbox']) + + if not (croppedBB == None): + bbox_arr.append(croppedBB) + + if len(bbox_arr) != 0: + bbox_arr = torch.stack(bbox_arr) + wh = bbox_arr[:, 2:] + xy = bbox_arr[:, :2] + + id_tensor = torch.tensor([id]).unsqueeze(0).expand(bbox_arr.size(0), -1) + + bbox_arr = torch.cat([id_tensor, xy, wh], dim=-1) + else: + bbox_arr = torch.tensor(bbox_arr) + + return image, bbox_arr , imgID + + def __len__(self) -> int: + return len(self.ids) + + def get_labels(self): + labels = [] + for id in self.ids: + anns = self._load_target(id) + person_flag = False + for ann in anns: + person_flag = ann['category_id'] == self.catPersonId + if person_flag == True: + break + if person_flag == True: + labels.append(1) + else: + labels.append(0) + return torch.tensor(labels) + + def get_cat_person_id(self): + return self.catPersonId + + def get_coco_api(self): + return self.coco + + + # Functions defined for prescaling images/targets before center crop operation + def _calcPrescale(self, image_width, image_height): + # Calculate scale factor to shrink/expand image to coincide width or height to croppig area + scale = 1.0 + if self.scaleImgforCrop != None: + if self.fit_full_img: + max_size = max(image_width, image_height) + scale = max_size/self.scaleImgforCrop + else: + # image fully encapsulates cropping area or vice versa + if ((image_width-self.scaleImgforCrop)*(image_height-self.scaleImgforCrop) > 0): + # if width of original image is closer to crop area + if abs(1-image_width/self.scaleImgforCrop) < abs(1-image_height/self.scaleImgforCrop): + scale = image_width/self.scaleImgforCrop + else: + scale = image_height/self.scaleImgforCrop + return scale + + # Scales the image with defined scale + def _prescaleImage(self, image, scale): + image_width = int(image.size[0]/scale) + image_height = int(image.size[1]/scale) + + t = transforms.Resize([image_height,image_width]) + image = t(image) + return image + + # Scales the targets with defined scale + def _prescaleBB(self, BB, scale): + scaledbb = [round(p/scale,1) for p in BB] + return scaledbb + + + def cropBBox(self,bbox,crop_size, image_height, image_width): + + bbox_aligned = [] + x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3] + + # Casses for cropping + if image_height < crop_size: + offset = (crop_size - image_height) // 2 + y = y + offset + if (y+h) > crop_size: + offset = (y+h)-crop_size + h = h - offset + if image_width < crop_size: + offset = (crop_size - image_width) // 2 + x = x + offset + if (x+w) > crop_size: + offset = (x+w)-crop_size + w = w - offset + if image_width > crop_size: + offset = (image_width - crop_size) // 2 + if offset > x: + # Deal with BB coincide with left cropping boundary + w = w -(offset-x) + x = 0 + else: + x = x - offset + + # Deal with BB coincide with right cropping boundary + if (x+w) > crop_size: + offset = (x+w)-crop_size + w = w - offset + + if image_height > crop_size: + + offset = (image_height - crop_size) // 2 + if offset > y: + # Deal with BB coincide with top cropping boundary + h = h -(offset-y) + y = 0 + else: + y = y - offset + # Deal with BB coincide with bottom cropping boundary + if (y+h) > crop_size: + offset = (y+h)-crop_size + h = h - offset + + bbox_aligned.append(x) + bbox_aligned.append(y) + bbox_aligned.append(w) + bbox_aligned.append(h) + + if ((w <= 0) or (h <= 0)): + return None + else: + x_scale, y_scale = 1.0,1.0 + return torch.mul(torch.tensor(bbox_aligned), torch.tensor([x_scale, y_scale, x_scale, y_scale])) + + def __round_floats(self,o): + ''' + Used to round floats before writing to json file + ''' + if isinstance(o, float): + return round(o, 2) + if isinstance(o, dict): + return {k: self.__round_floats(v) for k, v in o.items()} + if isinstance(o, (list, tuple)): + return [self.__round_floats(x) for x in o] + return o + + def _check_if_annot_ignored(self, annot_bbox, ignore_bboxes): + '''gets an annotation and ignore bboxes list in [xmin, ymin, w, h] form and calculates the percentage + of the overlapping area. If overlapping area exceeds 50% for any ignore part, returns True, otherwise returns False + ''' + annot_bbox = annot_bbox.copy() + annot_area = max(annot_bbox[2] * annot_bbox[3], 0) + annot_bbox[2] = annot_bbox[0] + annot_bbox[2] + annot_bbox[3] = annot_bbox[1] + annot_bbox[3] + + for ignore_bbox in ignore_bboxes: + + ignore_bbox = ignore_bbox.copy() + + ignore_bbox[2] = ignore_bbox[0] + ignore_bbox[2] + ignore_bbox[3] = ignore_bbox[1] + ignore_bbox[3] + + x_min_intersect = max(annot_bbox[0], ignore_bbox[0]) + y_min_intersect = max(annot_bbox[1], ignore_bbox[1]) + x_max_intersect = min(annot_bbox[2], ignore_bbox[2]) + y_max_intersect = min(annot_bbox[3], ignore_bbox[3]) + w = max(x_max_intersect - x_min_intersect, 0) + h = max(y_max_intersect - y_min_intersect, 0) + + if annot_area <= 0: + return True + + if w * h / annot_area > 0.5: + return True + + return False + + + def createResizedAnnotJson(self,targetFileName,cropsize=512, mask_ignore_parts=False, ignore_parts_file=None): + ''' + Resizes person annotations after center crop operation and saves as json file to the + directory of original annotations with the name "targetFileName" + + If 'mask_ignore_parts' flag set to true and corresponding wider dataset ignore_parts_file supplied, + annotations having 50% or more overlap with an ignore part are deleted. + + ''' + + # Get ignore part bb's in to a dictionary, wit image names as keys + if mask_ignore_parts: + ignore_part_dict = {} + with open(ignore_parts_file) as f: + for t, ignore_raw in enumerate(f): + ignore_raw = ignore_raw.split() + imgName = ignore_raw[:1][0] + + BBs_str = ignore_raw[1:] + bb_raw = [int(bb) for bb in BBs_str] + + BBs = [] + bb = [] + for i, p in enumerate(bb_raw): + bb.append(p) + if ((i+1)%4 == 0): + + BBs.append(bb) + bb = [] + + ignore_part_dict[imgName] = BBs + + + t1 = time.time() + # Get original json annot file path, and create pah for resized json annot file + path, annotfilename = os.path.split(self.annFilePath) + resizedAnnotPath = os.path.join(path,targetFileName) + + print('') + print(f'Creating Json file for resized annotations: {resizedAnnotPath}') + + + # Load original annotation json file as dictionary and assign it to resized annot dict + with open(self.annFilePath) as json_file: + resizedanotDict = json.load(json_file) + + # Original annotations array + origannList = resizedanotDict['annotations'] + + # Check if center crop applied + centerCropped = False + if self.transforms is not None: + # If center crop applied, transform BBs as well + for t in self.transforms.transform.transforms: + if (type(t) == torchvision.transforms.transforms.CenterCrop): + centerCropped = True + + + resizedannList = [] + for resizedannot in origannList: + + currentcatID = resizedannot['category_id'] + currentBB = resizedannot['bbox'] + currentImgID = resizedannot['image_id'] + + # if annotations overlaps with an ignore part, do not add it to new annot file + if mask_ignore_parts: + image_name = self.coco.loadImgs(currentImgID)[0]['file_name'] + if image_name in ignore_part_dict: + ignoreBBs = ignore_part_dict[image_name] + is_ignored = False + is_ignored = self._check_if_annot_ignored(resizedannot['bbox'].copy(), ignoreBBs) + + if is_ignored: + continue + + # Get crop size and original image sizes + image_width = self.coco.loadImgs(currentImgID)[0]['width'] + image_height = self.coco.loadImgs(currentImgID)[0]['height'] + + + # If presclae applied to image, calculate new image width and height + scale = self._calcPrescale(image_width=image_width, image_height=image_height) + image_width = image_width / scale + image_height = image_height / scale + + if currentcatID == self.catPersonId: + # if BB is person + bbox = resizedannot['bbox'].copy() + + # If prescale appied to image, resize annotations BBs + bbox = self._prescaleBB(bbox, scale) + + # If center crop applied, crop/recalculate BBs as well + if centerCropped: + croppedBB = self.cropBBox(bbox, cropsize,image_height,image_width) + else: + croppedBB = torch.tensor(bbox) + + if (croppedBB != None): + # If BB is person and valid after crop, add it to resized annotations list + croppedBB = croppedBB.tolist() + resizedannot['bbox'] = self.__round_floats(croppedBB) + resizedannot['area'] = self.__round_floats(croppedBB[2]*croppedBB[3]) + resizedannList.append(resizedannot) + else: + # If BB is non-person add it to resized annotations list as it is + resizedannList.append(resizedannot) + + # If prescale or center-crop applied + # Change width and height information of "images" field in annotations file + origImgList = resizedanotDict['images'] + + for i, imagInfo in enumerate(origImgList): + curInfo = origImgList[i] + image_width = curInfo['width'] + image_height = curInfo['height'] + + if centerCropped: + curInfo['width'] = cropsize + curInfo['height'] = cropsize + else: + scale = self._calcPrescale(image_width=image_width, image_height=image_height) + curInfo['width'] = int(image_width / scale) + curInfo['height'] = int(image_height / scale) + + origImgList[i] = curInfo.copy() + + resizedanotDict['images'] = origImgList + resizedanotDict['annotations'] = resizedannList + print('Saving resized annotations to json file...') + + # Save resized annotations in json file + resizedanotDict = json.dumps(resizedanotDict) + with open(resizedAnnotPath, 'w') as outfile: + outfile.write(resizedanotDict) + + print(f'{resizedAnnotPath} saved.') + t2 = time.time() + print(f'Elapsed time: {t2-t1} seconds') + +# ref: https://github.com/ufoym/imbalanced-dataset-sampler +class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler): + """Samples elements randomly from a given list of indices for imbalanced dataset + Arguments: + indices: a list of indices + num_samples: number of samples to draw + constantSeed: Make it true if you want same random at each run + callback_get_label: a callback-like function which takes two arguments - dataset and index + """ + + def __init__(self, dataset,constantSeed: bool = False, indices: list = None, num_samples: int = None, + callback_get_label: Callable = None, ratio: int = 4): + # if indices is not provided, all elements in the dataset will be considered + self.constantSeed = constantSeed + self.indices = list(range(len(dataset))) if indices is None else indices + + # define custom callback + self.callback_get_label = callback_get_label + + # if num_samples is not provided, draw `len(indices)` samples in each iteration + self.num_samples = len(self.indices) if num_samples is None else num_samples + + # distribution of classes in the dataset + df = pd.DataFrame() + df["label"] = self._get_labels(dataset) + df.index = self.indices + df = df.sort_index() + + label_to_count = df["label"].value_counts() + label_to_count[1] = int(label_to_count[1] / ratio) + + weights = 1.0 / label_to_count[df["label"]] + + self.weights = torch.DoubleTensor(weights.to_list()) + + def _get_labels(self, dataset): + return dataset.get_labels() + + def __iter__(self): + if self.constantSeed: + torch.random.manual_seed(1234) + return (self.indices[i] for i in torch.multinomial(self.weights, self.num_samples, replacement=True)) + + def __len__(self): + return self.num_samples \ No newline at end of file diff --git a/efficientdet_comparison/coco_eval.py b/efficientdet_comparison/coco_eval.py new file mode 100644 index 0000000000000000000000000000000000000000..4428308f0c43d44b4e4bb7e85a8d5a379df9720f --- /dev/null +++ b/efficientdet_comparison/coco_eval.py @@ -0,0 +1,301 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +# Author: Zylo117 + +""" +COCO-Style Evaluations + +put images here datasets/your_project_name/val_set_name/*.jpg +put annotations here datasets/your_project_name/annotations/instances_{val_set_name}.json +put weights here /path/to/your/weights/*.pth +change compound_coef + +""" + +import json +import os +import numpy as np + +import argparse +import torch +from tqdm import tqdm +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from torch.utils.data import DataLoader +import torchvision +import torchvision.transforms as transforms +import time + +from models import mnv2_SSDlite +from library.ssd import conv_model_fptunc2fpt, conv_model_fpt2qat, conv_model_qat2hw, collate_fn, PredsPostProcess, round_floats +from dataloader import CocoDetection, input_fxpt_normalize + +#from library.ssd import generateAnchorsInOrigImage, collate_fn, point_form, prepareHeadDataforLoss_fast, plot_image_mnv2_2xSSDlite, sampleRandomPicsFromCOCO, saveOutputs ,PredsPostProcess, calculatemAP, batchNormAdaptation, round_floats + +ap = argparse.ArgumentParser() +ap.add_argument('-m', '--mode', type=str, default='qat', help='Mode of the model, allowed modes: fpt_unc, fpt, qat') +ap.add_argument('--nms_threshold', type=float, default=0.5, help='non max supression threshold') +ap.add_argument('--conf_threshold', type=float, default=0.5, help='confidence treshold, predictions below this level will be discarded') +ap.add_argument('-dp', '--data_path', type=str, default=None, help='/path/to/images') +ap.add_argument('-ap', '--json_path', type=str, default=None, help='/path/to/annotations.json') +ap.add_argument('-wp', '--weights_path', type=str, default=None, help='/path/to/weights') + +args = ap.parse_args() + +mode = args.mode +nms_threshold = args.nms_threshold +conf_threshold = args.conf_threshold +data_path = args.data_path +json_path = args.json_path +weights_path = args.weights_path + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +def evaluate_coco(model, DATA_PATH, JSON_PATH , nmsIoUTreshold = 0.5, PredMinConfTreshold = 0.5, HW_mode = False): + + if HW_mode: + act_8b_mode = True + else: + act_8b_mode = False + + transform = transforms.Compose([transforms.ToTensor(), input_fxpt_normalize(act_8b_mode=act_8b_mode)]) + targetFileName = 'resized.json' + dataset = CocoDetection(root=DATA_PATH, annFile=JSON_PATH, transform=transform, scaleImgforCrop= None) + + dataset.createResizedAnnotJson(targetFileName=targetFileName) + resizedFilePath = os.path.join(os.path.split(JSON_PATH)[0],targetFileName) + cocoGt=COCO(resizedFilePath) + os.remove(resizedFilePath) + + seq_sampler = torch.utils.data.SequentialSampler(dataset) + data_loader = DataLoader(dataset, + sampler=seq_sampler, + batch_size=1, + collate_fn=collate_fn, + drop_last=False) + print(f"Dataset Length: {len(dataset)}, Number of Batches: {len(data_loader)}") + + ANCHORS_HEAD1 = [(11.76, 28.97), + (20.98, 52.03), + (29.91, 77.24), + (38.97, 106.59)] + + ANCHORS_HEAD2 = [(52.25, 144.77), + (65.86, 193.05), + (96.37, 254.09), + (100.91, 109.82), + (140, 350)] + + predsPostProcess = PredsPostProcess(512, ANCHORS_HEAD1, ANCHORS_HEAD2) + + + dataDictList =[] + imgIDS = [] + for i, data in enumerate(tqdm(data_loader)): + imageBatch, targetBatch , idxBatch = data + + imageStack = torch.stack(imageBatch).detach().to(device) + imageStack.requires_grad_(True) + predBatch = model(imageStack) + + if HW_mode: + BBs1 = predBatch[0].detach() / 128.0 + CFs1 = predBatch[1].detach() / 128.0 + BBs2 = predBatch[2].detach() / 128.0 + CFs2 = predBatch[3].detach() / 128.0 + else: + BBs1 = predBatch[0].detach() + CFs1 = predBatch[1].detach() + BBs2 = predBatch[2].detach() + CFs2 = predBatch[3].detach() + + for imgNum in range(imageStack.shape[0]): + img = imageStack[imgNum,:,:,:] + target = targetBatch[imgNum] + image_id = int(idxBatch[imgNum]) + imgIDS.append(image_id) + + pred = (BBs1[imgNum,:,:,:].unsqueeze(0), CFs1[imgNum,:,:,:].unsqueeze(0), + BBs2[imgNum,:,:,:].unsqueeze(0), CFs2[imgNum,:,:,:].unsqueeze(0)) + + boxes, confidences = predsPostProcess.getPredsInOriginal(pred) + + nms_picks = torchvision.ops.nms(boxes, confidences, nmsIoUTreshold) + boxes_to_draw = boxes[nms_picks] + confs_to_draw = confidences[nms_picks] + confMask = (confs_to_draw > PredMinConfTreshold) + + # Inputs to mAP algorithm + if (confMask.any()): + + # pred boxes -> [xmin,ymin,xmax,ymax], tensor shape[numpred,4] + bbox = boxes_to_draw[confMask] + scores = confs_to_draw[confMask] + # Convert BB to coco annot format -> [xmin,ymin,width, height] + bbox[:,2] = bbox[:,2] - bbox[:,0] + bbox[:,3] = bbox[:,3] - bbox[:,1] + + + bbox = bbox.tolist() # pred boxes -> [xmin,ymin,xmax,ymax], shape[numpred,4] + score = scores.tolist() + category_id = np.ones_like(score,dtype=int).tolist() + + for j in range(len(bbox)): + box = {"image_id":image_id, "category_id":category_id[j], "bbox":bbox[j],"score":score[j]} + dataDictList.append(round_floats(box)) + + if (len(dataDictList)): + # Evavluate and Accumulate mAP for remained baches, if any + cocoDT = json.dumps(dataDictList) + + # Write detections to .json file + with open('cocoDT.json', 'w') as outfile: + outfile.write(cocoDT) + + # Load detections + cocoDt=cocoGt.loadRes('cocoDT.json') + os.remove("cocoDT.json") + + # running evaluation + annType = 'bbox' + cocoEval = COCOeval(cocoGt,cocoDt,annType) + cocoEval.params.catIds = 1 + cocoEval.params.imgIds = imgIDS + cocoEval.evaluate() + cocoEval.accumulate() + + print('') + cocoEval.summarize() + else: + raise Exception('the model does not provide any valid output, check model architecture and the data input') + + +if __name__ == '__main__': + model = mnv2_SSDlite() + + layer_bits_dictionary = {} + layer_bits_dictionary['conv1' ] = 8; + layer_bits_dictionary['epw_conv2' ] = 8; + layer_bits_dictionary['dw_conv2' ] = 8; + layer_bits_dictionary['ppw_conv2' ] = 8; + + layer_bits_dictionary['epw_conv3' ] = 8; + layer_bits_dictionary['dw_conv3' ] = 8; + layer_bits_dictionary['ppw_conv3' ] = 8; + + layer_bits_dictionary['epw_conv4' ] = 8; + layer_bits_dictionary['dw_conv4' ] = 8; + layer_bits_dictionary['ppw_conv4' ] = 8; + + layer_bits_dictionary['epw_conv5'] = 8; + layer_bits_dictionary['dw_conv5'] = 8; + layer_bits_dictionary['ppw_conv5'] = 8; + + layer_bits_dictionary['epw_conv6'] = 8; + layer_bits_dictionary['dw_conv6'] = 8; + layer_bits_dictionary['ppw_conv6'] = 8; + + layer_bits_dictionary['epw_conv7'] = 8; + layer_bits_dictionary['dw_conv7'] = 8; + layer_bits_dictionary['ppw_conv7'] = 8; + + layer_bits_dictionary['epw_conv8'] = 8; + layer_bits_dictionary['dw_conv8'] = 8; + layer_bits_dictionary['ppw_conv8'] = 8; + + layer_bits_dictionary['epw_conv9'] = 8; + layer_bits_dictionary['dw_conv9'] = 8; + layer_bits_dictionary['ppw_conv9'] = 8; + + layer_bits_dictionary['epw_conv10'] = 8; + layer_bits_dictionary['dw_conv10'] = 8; + layer_bits_dictionary['ppw_conv10'] = 8; + + layer_bits_dictionary['epw_conv11'] = 8; + layer_bits_dictionary['dw_conv11'] = 8; + layer_bits_dictionary['ppw_conv11'] = 8; + + layer_bits_dictionary['epw_conv12'] = 8; + layer_bits_dictionary['dw_conv12'] = 8; + layer_bits_dictionary['ppw_conv12'] = 8; + + layer_bits_dictionary['epw_conv13'] = 8; + layer_bits_dictionary['dw_conv13'] = 8; + layer_bits_dictionary['ppw_conv13'] = 8; + + layer_bits_dictionary['epw_conv14'] = 8; + layer_bits_dictionary['dw_conv14'] = 8; + layer_bits_dictionary['ppw_conv14'] = 8; + + layer_bits_dictionary['epw_conv15'] = 8; + layer_bits_dictionary['dw_conv15'] = 8; + layer_bits_dictionary['ppw_conv15'] = 8; + + layer_bits_dictionary['epw_conv16'] = 8; + layer_bits_dictionary['dw_conv16'] = 8; + layer_bits_dictionary['ppw_conv16'] = 8; + + layer_bits_dictionary['epw_conv17'] = 8; + layer_bits_dictionary['dw_conv17'] = 8; + layer_bits_dictionary['ppw_conv17'] = 8; + + layer_bits_dictionary['epw_conv18'] = 8; + layer_bits_dictionary['dw_conv18'] = 8; + layer_bits_dictionary['ppw_conv18'] = 8; + + layer_bits_dictionary['head1_dw_classification'] = 8; + layer_bits_dictionary['head1_pw_classification'] = 8; + layer_bits_dictionary['head1_dw_regression'] = 8; + layer_bits_dictionary['head1_pw_regression'] = 8; + + layer_bits_dictionary['head2_dw_classification'] = 8; + layer_bits_dictionary['head2_pw_classification'] = 8; + layer_bits_dictionary['head2_dw_regression'] = 8; + layer_bits_dictionary['head2_pw_regression'] = 8; + + # Convert model to appropriate mode before loading weights + HW_mode = False + if mode == 'fpt_unc': + model.to(device) + + elif mode == 'fpt': + model = conv_model_fptunc2fpt(model) + model.to(device) + + elif mode == 'qat': + model = conv_model_fptunc2fpt(model) + model.to(device) + model = conv_model_fpt2qat(model, layer_bits_dictionary) + model.to(device) + + elif mode == 'hw': + HW_mode = True + model = conv_model_fptunc2fpt(model) + model.to(device) + model = conv_model_fpt2qat(model, layer_bits_dictionary) + model.to(device) + model = conv_model_qat2hw(model) + model.to(device) + + else: + raise Exception('Invalid model mode is selected, select from: fpt_unc, fpt, qat, hw') + + + weights = torch.load(weights_path, map_location=torch.device('cpu')) + model.load_state_dict(weights['state_dict'], strict=True) + + model.requires_grad_(False) + model.eval() + + if mode == 'qat' or mode == 'hw': + print(''*5) + print('*'*120) + print('qat or hardware mode is selected, please make sure you configured layer_bits_dictionary in "coco_eval.py" accordingly!!!') + print('*'*120) + print('') + time.sleep(5) + + evaluate_coco(model, DATA_PATH=data_path, JSON_PATH=json_path , nmsIoUTreshold=nms_threshold, + PredMinConfTreshold=conf_threshold, HW_mode = HW_mode) \ No newline at end of file diff --git a/efficientdet_comparison/hardware_experiment_best.pth.tar b/efficientdet_comparison/hardware_experiment_best.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..5f8fd82ef6d8af41bbbbd7c702f6784495c478ec --- /dev/null +++ b/efficientdet_comparison/hardware_experiment_best.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4712fd8026f221b6a40f464438821858389a7a9d021da6c4f2ddb881d7a695e +size 7481103 diff --git a/efficientdet_comparison/readme.md b/efficientdet_comparison/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..acda96a9cf735243ba751c9727cb3a484b3dee9d --- /dev/null +++ b/efficientdet_comparison/readme.md @@ -0,0 +1,186 @@ +# Performance Benchmark of Quantized Detection Model + +This directory is built for comparison of our "quantized / quantization aware trained" detection algorithm to one of the SOTA compact detection algorithms, EfficientDet-d0, which has comparable complexity and structure with our quantized model. + +Our person tracking algorithm uses MobileNet-v2 as backbone mechanism and combines it with 2 SSD heads using total of 9 anchor boxes. Overall model consists of 60 convolution layers. + +We quantized the layers of this model and applied "quantization aware training" methods to recover its accuracy drop due to quantization of layers and output clamping. We have re-scaled and center cropped the images in [Wider Person Dataset](https://competitions.codalab.org/competitions/20132#learn_the_details), also we resized its annotations and converted in to COCO annotation format to use them in our training/evaluation tasks. Then we applied smart training approaches which consider the effects of quantization and output clamping of the layers during optimization, which we call "quantization aware training". + +Our main motivation of quantizing networks and applying quantization aware training methods is to reduce the overall network size, inference time and training effort while keeping accuracy drop in an acceptable level. We aim to develop quantized compact detection algorithms executable on low power and low cost accelerator chips. + +## Dependencies +* [PyTorch](https://github.com/pytorch/pytorch) +* [Torchvision](https://github.com/pytorch/vision) +* [Pycocotools](https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools) +* [webcolors](https://pypi.org/project/webcolors/) +* [PyYAML](https://github.com/yaml/pyyaml) + +## Evaluating EfficientDet with Wider Person Validation Dataset +In this section, steps to reproduce the evaluation of EfficientDet model from [Yet-Another-EfficientDet-Pytorch Repository](https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch.git) with d0 coefficients is explained. For evaluation, aforementioned Wider Person Validation Dataset in COCO format is used. + +### 1. Clone EfficientDet to Your Local +Open a terminal and go to directory in your local where you want to clone , then type: +```bash +git clone --depth 1 https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch +``` + +### 2. Prepare EfficientDet-d0 Coefficients +* Go to main directory + ```bash + cd Yet-Another-EfficientDet-Pytorch/ + ``` +* Create weights folder + ```bash + mkdir weights + ``` +* Download EfficientDet d0 coefficients + ```bash + wget https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.0/efficientdet-d0.pth -O weights/efficientdet-d0.pth + ``` + +### 3. Prepare Wider Person Dataset +* Download original Wider Person Dataset + * Sign up [Codalab](https://competitions.codalab.org/) and participate to [WIDER Face & Person Challenge 2019](https://competitions.codalab.org/competitions/20132) + * Under "Participate" tab click "Train & Validation Data in Google Drive" and download + * val_data.tar.gz + * Annotations/val_bbox.txt + * Extract val_data.tar.gz as val_data and move val_data folder under ./data/original_wider/val_data + * Move "val_bbox.txt" under ./data/original_wider/ + +* Move our "wider2coco.py" script in "efficientdet_comparison" folder to main folder of your local "Yet-Another-EfficientDet-Pytorch" repository. Following code will produce resized images and annotations. + ```bash + python wider2coco.py -ip ./data/original_wider/val_data -af ./data/original_wider/val_bbox.txt + ``` +* Script will automatically convert Wider Dataset in to COCO format and create following repository structure: + + ./Yet-Another-EfficientDet-Pytorch/datasets/wider/val + image001.jpg + image002.jpg + ... + ./Yet-Another-EfficientDet-Pytorch/datasets/wider/annotations + instances_val.json + + + +### 4. Manually Set Project's Specific Parameters + +* Create a yml file "wider.yml" under "projects" + ```bash + touch projects/wider.yml + ``` + + * Copy following content in to "wider.yml" file + + project_name: wider + train_set: train + val_set: val + num_gpus: 1 # 0 means using cpu, 1-N means using gpus + + # Wider validation dataset mean and std in RGB order + mean: [0.416, 0.413, 0.406] + std: [0.308, 0.306, 0.310] + + # this is coco anchors, change it if necessary + anchors_scales: '[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]' + anchors_ratios: '[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]' + + # objects from all labels from your dataset with the order from your annotations. + # its index must match your dataset's category_id. + # category_id is one_indexed, + # for example, index of 'car' here is 2, while category_id of is 3 + obj_list: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors','teddy bear', 'hair drier', 'toothbrush'] + +### 5. Evaluate EfficientDet model performance +* Setup "person only evaluation" + * Open "coco_eval.py" under /Yet-Another-EfficientDet-Pytorch + * Paste following code after line 132, "coco_eval.params.imgIds = image_ids", to evaluate mAP results only for person category + ```python + coco_eval.params.catIds = 1 + ``` + +* For evaluation on cuda enabled platform + ```bash + python coco_eval.py -p wider -c 0 -w ./weights/efficientdet-d0.pth + ``` + +* For evaluation on cuda disabled platform + ```bash + python coco_eval.py -p wider -c 0 -w ./weights/efficientdet-d0.pth --cuda False + ``` + + +## Evaluating Our Quantized MobilenetSSDLite model with Wider Person Validation Dataset + +### 1. Clone Quantized Mobilenet Model to Your Local +Open a terminal and go to directory in your local where you want to clone our [Quantization Aware Training - Person Tracking](https://github.com/sai-tr/persontracking_qat.git) repository, then type: +```bash +git clone --depth 1 https://github.com/sai-tr/persontracking_qat.git +``` + +### 2. Prepare Wider Person Dataset +* Download original Wider Person Dataset + * Sign up [Codalab](https://competitions.codalab.org/) and participate to [WIDER Face & Person Challenge 2019](https://competitions.codalab.org/competitions/20132) + * Under "Participate" tab click "Train & Validation Data in Google Drive" and download + * val_data.tar.gz + * Annotations/val_bbox.txt + * Extract val_data.tar.gz as val_data and move val_data folder under ./data/original_wider/val_data + * Move "val_bbox.txt" under ./data/original_wider/ + +* Move our "wider2coco.py" script in "efficientdet_comparison" folder to main folder of your local "persontracking_qat" repository. Following code will produce resized images and annotations. + ```bash + python wider2coco.py -ip ./data/original_wider/val_data -af ./data/original_wider/val_bbox.txt + ``` +* Script will automatically convert Wider Dataset in to COCO format and create following repository structure: + + ./persontracking_qat/datasets/wider/val + image001.jpg + image002.jpg + ... + ./persontracking_qat/datasets/wider/annotations + instances_val.json + +### 3. Evaluate Quantized Mobilenet Model Performance +Note that model mode should match with the loaded model parameter dictionary. Selectable model modes are: +* Full Precision Unconstrained(fpt_unc): All layers are in full precision and no output clamping +* Full Precision Constrained(fpt): All layers are in full precision and layer output are clamped to +-1 +* Quantized(qat): All layers are quantized layer outputs are clamped to +-1 + + +* Move our "coco_eval.py" script in "efficientdet_comparison" folder to "persontracking_qat" folder and use following command for evaluation: + ```bash + python coco_eval.py -m qat -dp ./datasets/wider/val -ap ./datasets/wider/annotations/all_val_prep.json -wp ./efficientdet_comparison/training_experiment_best.pth.tar + ``` + Note that: Code evaluates quantized model with weights "training_experiment_best.pth.tar", using images and annotations in paths "./datasets/wider/val" "./datasets/wider/annotations/instances_val.json" respectively. + +## mAP Comparisons +### EfficientDet-d0 + ### Wider Validation Dataset mAP scores ### + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.292 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.543 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.275 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.109 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.409 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.532 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.106 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.369 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.435 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.270 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.546 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.678 + + +### Quantized Mobilenet + ### Wider Validation Dataset mAP scores ### + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.281 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.457 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.310 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.075 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.406 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.582 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.107 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.324 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.331 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.110 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.481 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.637 + \ No newline at end of file diff --git a/efficientdet_comparison/training_experiment_best.pth.tar b/efficientdet_comparison/training_experiment_best.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..0f222489904703b8a6196c45c74ad0d603a3a635 --- /dev/null +++ b/efficientdet_comparison/training_experiment_best.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dbbd0f1849c213da61f71985dce57a88b7cd03881cd093a44b3daab61902f4 +size 7480143 diff --git a/efficientdet_comparison/wider2coco.py b/efficientdet_comparison/wider2coco.py new file mode 100644 index 0000000000000000000000000000000000000000..57a813a9cb063d591cc986c1afbdc392fbba46bb --- /dev/null +++ b/efficientdet_comparison/wider2coco.py @@ -0,0 +1,577 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import os +import datetime, time +import json +from PIL import Image +from tqdm import tqdm + +import torch, torchvision +from torchvision import transforms +from torchvision.datasets.vision import VisionDataset + +from typing import Any, Callable, Optional, Tuple, List +import argparse + +############################################################################## +####################### Functions to Prepare Images ########################## +############################################################################## +# Functions defined for prescaling images/targets before center crop operation +def calcPrescale(image_width, image_height, scaleImgforCrop = 512): + # Calculate scale factor to shrink/expand image to coincide width or height to croppig area + scale = 1.0 + # image fully encapsulates cropping area or vice versa + if ((image_width-scaleImgforCrop)*(image_height-scaleImgforCrop) > 0): + # if width of original image is closer to crop area + if abs(1-image_width/scaleImgforCrop) < abs(1-image_height/scaleImgforCrop): + scale = image_width/scaleImgforCrop + else: + scale = image_height/scaleImgforCrop + return scale + + +# Scales the image with defined scale +def prescaleImage(image, scale): + + image_width = int(image.size[0]/scale) + image_height = int(image.size[1]/scale) + + + image_res = image.resize((image_width, image_height)) + return image_res + + +def preProcessImages(org_images_path): + corruptedImgs = [] + ccrop_size = 512 + folder_dir,folder_name = os.path.split(org_images_path) + cur_dir = os.getcwd() + + processed_images_path = os.path.join(cur_dir,'datasets','wider','val') + + if not os.path.isdir(processed_images_path): + os.makedirs(processed_images_path) + imageNames = os.listdir(org_images_path) + + for i, image in enumerate(tqdm(imageNames)): + try: + if(image.split('.')[1] == 'jpg'): + imgDir = os.path.join(org_images_path,image) + img = Image.open(imgDir) + + # prescaling + image_width = img.size[0] + image_height = img.size[1] + scale = calcPrescale(image_width, image_height,scaleImgforCrop=ccrop_size) + img_resized = prescaleImage(img, scale) + + # Center Crop + width, height = img_resized.size # Get dimensions + + left = (width - ccrop_size)/2 + top = (height - ccrop_size)/2 + right = (width + ccrop_size)/2 + bottom = (height + ccrop_size)/2 + + # Crop the center of the image + img_ccropped = img_resized.crop((left, top, right, bottom)) + img_ccropped.save(os.path.join(processed_images_path, image)) + except: + print('Cannot Load: ' + image + ', check if it is corrupted.') + corruptedImgs.append(image) + + print('') + print('Conversion Finished') + print('') + if len(corruptedImgs): + print('Something wrong with the following images and they are not processed:') + print(corruptedImgs) + print('Please delete these images from associated annotations') + return + + +############################################################################## +##################### Functions to Prepare Annotations ####################### +############################################################################## +class CocoDetection(VisionDataset): + """`MS Coco Detection `_ Dataset. + Args: + root (string): Root directory where images are downloaded to. + + annFile (string): Path to json annotation file. + + scaleImgforCrop (int, optional): Img and target BBs are scaled with + constant aspect ratio st: + if image width, image height > scaleImgforCrop image is shrinked + until width or height becomes equal to scaleImgforCrop + + if image width, image height < scaleImgforCrop image is expanded + until width or height becomes equal to scaleImgforCrop + + else no scaling + transform (callable, optional): A function/transform that takes in an + PIL image and returns a transformed version. E.g, ``transforms.ToTensor`` + + target_transform (callable, optional): A function/transform that takes in + the target and transforms it. + transforms (callable, optional): A function/transform that takes input + sample and its target as entry and returns a transformed version. + """ + + def __init__( + self, + root: str, + annFile: str, + scaleImgforCrop: int= None, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + transforms: Optional[Callable] = None + ): + super().__init__(root, transforms, transform, target_transform) + from pycocotools.coco import COCO + + self.coco = COCO(annFile) + self.ids = list(sorted(self.coco.imgs.keys())) + self.annFilePath = os.path.join('.',annFile) + self.catPersonId = self.coco.getCatIds(catNms=['person'])[0] + self.scaleImgforCrop = scaleImgforCrop + + def _load_image(self, id: int) -> Image.Image: + path = self.coco.loadImgs(id)[0]["file_name"] + return Image.open(os.path.join(self.root, path)).convert("RGB") + + def _load_target(self, id) -> List[Any]: + return self.coco.loadAnns(self.coco.getAnnIds(id, iscrowd=False)) + + def __getitem__(self, index: int) -> Tuple[Any, Any, Any]: + + id = self.ids[index] + imgID = id + + try: + image = self._load_image(id) + except: + print(f'********Unable to load image with id: {imgID}********') + print('Please check if image is corrupted, and remove it from annotations if necessary.') + + + target = copy.deepcopy(self._load_target(id)) # deepcopy target list beforecentercrop manip, to be abe to work with same + # dateset without reloading it + + image_width = image.size[0] + image_height = image.size[1] + + + # If necesary rescale the image and BBs near the size of planned center crop as much as possible + scale = self._calcPrescale(image_width=image_width, image_height=image_height) + image = self._prescaleImage(image, scale) + + for i, t in enumerate(target): + BB = t['bbox'].copy() + scaledBB = self._prescaleBB(BB,scale) + target[i]['bbox'] = scaledBB + + + + # Image width height after prescaling + image_width = image.size[0] + image_height = image.size[1] + + # Check if center crop applied + centerCropped = False + if self.transforms is not None: + image, target = self.transforms(image, target) + + # If center crop applied, transform BBs as well + for t in self.transforms.transform.transforms: + if (type(t) == torchvision.transforms.transforms.CenterCrop): + centerCropped = True + + + + x_scale = image.size(2) / image_width + y_scale = image.size(1) / image_height + + bbox_arr = [] + + for idx,ann in enumerate(target): + if ann['category_id'] == self.catPersonId: + crop_size = image.shape[1] + + if centerCropped: + bbox = ann['bbox'].copy() + croppedBB = self.cropBBox(bbox, crop_size,image_height,image_width) + else: + croppedBB = torch.tensor(ann['bbox']) + + if not (croppedBB == None): + bbox_arr.append(croppedBB) + + if len(bbox_arr) != 0: + bbox_arr = torch.stack(bbox_arr) + wh = bbox_arr[:, 2:] + xy = bbox_arr[:, :2] + + id_tensor = torch.tensor([id]).unsqueeze(0).expand(bbox_arr.size(0), -1) + + bbox_arr = torch.cat([id_tensor, xy, wh], dim=-1) + else: + bbox_arr = torch.tensor(bbox_arr) + + + return image, bbox_arr , imgID + + def __len__(self) -> int: + return len(self.ids) + + def get_labels(self): + labels = [] + for id in self.ids: + anns = self._load_target(id) + person_flag = False + for ann in anns: + person_flag = ann['category_id'] == self.catPersonId + if person_flag == True: + break + if person_flag == True: + labels.append(1) + else: + labels.append(0) + return torch.tensor(labels) + + def get_cat_person_id(self): + return self.catPersonId + + def get_coco_api(self): + return self.coco + + + # Functions defined for prescaling images/targets before center crop operation + def _calcPrescale(self, image_width, image_height): + # Calculate scale factor to shrink/expand image to coincide width or height to croppig area + scale = 1.0 + if self.scaleImgforCrop != None: + # image fully encapsulates cropping area or vice versa + if ((image_width-self.scaleImgforCrop)*(image_height-self.scaleImgforCrop) > 0): + # if width of original image is closer to crop area + if abs(1-image_width/self.scaleImgforCrop) < abs(1-image_height/self.scaleImgforCrop): + scale = image_width/self.scaleImgforCrop + else: + scale = image_height/self.scaleImgforCrop + return scale + + # Scales the image with defined scale + def _prescaleImage(self, image, scale): + image_width = int(image.size[0]/scale) + image_height = int(image.size[1]/scale) + + t = transforms.Resize([image_height,image_width]) + image = t(image) + return image + + # Scales the targets with defined scale + def _prescaleBB(self, BB, scale): + scaledbb = [round(p/scale,1) for p in BB] + return scaledbb + + + def cropBBox(self,bbox,crop_size, image_height, image_width): + + bbox_aligned = [] + x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3] + + # Casses for cropping + if image_height < crop_size: + offset = (crop_size - image_height) // 2 + y = y + offset + if (y+h) > crop_size: + offset = (y+h)-crop_size + h = h - offset + if image_width < crop_size: + offset = (crop_size - image_width) // 2 + x = x + offset + if (x+w) > crop_size: + offset = (x+w)-crop_size + w = w - offset + if image_width > crop_size: + offset = (image_width - crop_size) // 2 + if offset > x: + # Deal with BB coincide with left cropping boundary + w = w -(offset-x) + x = 0 + else: + x = x - offset + + # Deal with BB coincide with right cropping boundary + if (x+w) > crop_size: + offset = (x+w)-crop_size + w = w - offset + + if image_height > crop_size: + + offset = (image_height - crop_size) // 2 + if offset > y: + # Deal with BB coincide with top cropping boundary + h = h -(offset-y) + y = 0 + else: + y = y - offset + # Deal with BB coincide with bottom cropping boundary + if (y+h) > crop_size: + offset = (y+h)-crop_size + h = h - offset + + bbox_aligned.append(x) + bbox_aligned.append(y) + bbox_aligned.append(w) + bbox_aligned.append(h) + + if ((w <= 0) or (h <= 0)): + return None + else: + x_scale, y_scale = 1.0,1.0 + return torch.mul(torch.tensor(bbox_aligned), torch.tensor([x_scale, y_scale, x_scale, y_scale])) + + def __round_floats(self,o): + ''' + Used to round floats before writing to json file + ''' + if isinstance(o, float): + return round(o, 2) + if isinstance(o, dict): + return {k: self.__round_floats(v) for k, v in o.items()} + if isinstance(o, (list, tuple)): + return [self.__round_floats(x) for x in o] + return o + + def createResizedAnnotJson(self,targetFileName,cropsize = 512): + ''' + Resizes person annotations after center crop operation and saves as json file to the + directory of original annotations with the name "targetFileName" + ''' + t1 = time.time() + # Get original json annot file path, and create pah for resized json annot file + path, annotfilename = os.path.split(self.annFilePath) + resizedAnnotPath = os.path.join(path,targetFileName) + + print('') + print(f'Creating Json file for resized annotations: {resizedAnnotPath}') + + + # Load original annotation json file as dictionary and assign it to resized annot dict + with open(self.annFilePath) as json_file: + resizedanotDict = json.load(json_file) + + # Original annotations array + origannList = resizedanotDict['annotations'] + + # Check if center crop applied + centerCropped = False + if self.transforms is not None: + # If center crop applied, transform BBs as well + for t in self.transforms.transform.transforms: + if (type(t) == torchvision.transforms.transforms.CenterCrop): + centerCropped = True + + resizedannList = [] + for resizedannot in origannList: + + currentcatID = resizedannot['category_id'] + currentBB = resizedannot['bbox'] + currentImgID = resizedannot['image_id'] + + + # Get crop size and original image sizes + image_width = self.coco.loadImgs(currentImgID)[0]['width'] + image_height = self.coco.loadImgs(currentImgID)[0]['height'] + + # If presclae applied to image, calculate new image width and height + scale = self._calcPrescale(image_width=image_width, image_height=image_height) + image_width = image_width / scale + image_height = image_height / scale + + if currentcatID == self.catPersonId: + # if BB is person + bbox = resizedannot['bbox'].copy() + + # If prescale appied to image, resize annotations BBs + bbox = self._prescaleBB(bbox, scale) + + # If center crop applied, crop/recalculate BBs as well + if centerCropped: + croppedBB = self.cropBBox(bbox, cropsize,image_height,image_width) + else: + croppedBB = torch.tensor(bbox) + + if (croppedBB != None): + # If BB is person and valid after crop, add it to resized annotations list + croppedBB = croppedBB.tolist() + resizedannot['bbox'] = self.__round_floats(croppedBB) + resizedannot['area'] = self.__round_floats(croppedBB[2]*croppedBB[3]) + resizedannList.append(resizedannot) + else: + # If BB is non-person add it to resized annotations list as it is + resizedannList.append(resizedannot) + + resizedanotDict['annotations'] = resizedannList + print('Saving resized annotations to json file...') + + # Save resized annotations in json file + resizedanotDict = json.dumps(resizedanotDict) + with open(resizedAnnotPath, 'w') as outfile: + outfile.write(resizedanotDict) + + print(f'{resizedAnnotPath} saved.') + t2 = time.time() + print(f'Elapsed time: {t2-t1} seconds') + +# Taken from : https://github.com/hasanirtiza/Pedestron/blob/master/tools/convert_datasets/pycococreatortools.py +def create_image_info(image_id, file_name, image_size, + date_captured=datetime.datetime.utcnow().isoformat(' '), + license_id=1, coco_url="", flickr_url=""): + + image_info = { + "id": image_id, + "file_name": file_name, + "width": image_size[0], + "height": image_size[1], + "date_captured": date_captured, + "license": license_id, + "coco_url": coco_url, + "flickr_url": flickr_url + } + + return image_info + +# Taken from : https://github.com/hasanirtiza/Pedestron/blob/master/tools/convert_datasets/pycococreatortools.py +def create_annotation_info(annotation_id, image_id, category_info, bounding_box): + is_crowd = category_info['is_crowd'] + + annotation_info = { + "id": annotation_id, + "image_id": image_id, + "category_id": category_info["id"], + "iscrowd": is_crowd, + "bbox": bounding_box + } + + return annotation_info + +def convWidertoCOCO(annotFile, orgImageDir): + ''' + Converts wider dataset annotations to COCO format. + Args: + annotFile: Original annotation file + orgImageDir: Original Images directory + ''' + + totalImgnum = 0 + imgID = 0 + annID = 0 + + imgList = [] + annList = [] + + category_info= {} + category_info['is_crowd'] = False + category_info['id'] = 1 + + data ={} + + data['info'] = {'description': 'Example Dataset', 'url': '', 'version': '0.1.0', 'year': 2022, 'contributor': 'ljp', 'date_created': '2019-07-18 06:56:33.567522'} + data['categories'] = [{'id': 1, 'name': 'person', 'supercategory': 'person'}] + data['licences'] = [{'id': 1, 'name': 'Attribution-NonCommercial-ShareAlike License', 'url': 'http://creativecommons.org/licenses/by-nc-sa/2.0/'}] + + with open(annotFile) as f: + for _, annot_raw in enumerate(tqdm(f)): + imgID += 1 + + annot_raw = annot_raw.split() + imgName = annot_raw[:1][0] + + totalImgnum +=1 + imageFullPath = os.path.join(orgImageDir,imgName) + try: + curImg = Image.open(imageFullPath) + image_size = curImg.size + + BBs_str = annot_raw[1:] + bb_raw = [int(bb) for bb in BBs_str] + + imgInf = create_image_info(image_id = imgID, file_name = imgName, image_size =image_size, + date_captured=datetime.datetime.utcnow().isoformat(' '), + license_id=1, coco_url="", flickr_url="") + imgList.append(imgInf) + + bb = [] + for i, p in enumerate(bb_raw): + + bb.append(p) + if ((i+1)%4 == 0): + annID += 1 + ann = create_annotation_info(annID, imgID, category_info = category_info, bounding_box = bb) + annList.append(ann) + bb = [] + + except: + print(f'Cannot create annot for {imgName}, image does not exist in given directory.') + + data['annotations'] = annList + data['images'] = imgList + + + cur_dir = os.getcwd() + processed_annot_path = os.path.join(cur_dir,'datasets','wider','annotations') + + if not os.path.isdir(processed_annot_path): + os.makedirs(processed_annot_path) + + orgCOCOAnnotFile = os.path.join( processed_annot_path ,'orig_annot.json') + + with open(orgCOCOAnnotFile, 'w') as fp: + json.dump(data, fp) + + + print('Annotations saved as: ' + orgCOCOAnnotFile) + print(f'Created {annID} COCO annotations for total {totalImgnum} images') + print('') + return orgCOCOAnnotFile + + +def main(): + parser = argparse.ArgumentParser(description='This script converts original Wider Person' + 'Validation Dataset images to 512 x 512' + 'Then resisez the annotations accordingly, saves new images and annotations under datasets folder') + parser.add_argument('-ip', '--wider_images_path', type=str, required = True, + help='path of the folder containing original images') + parser.add_argument('-af', '--wider_annotfile', type=str, required = True, + help='full path of original annotations file e.g. ./some/path/some_annot.json') + + + args = parser.parse_args() + wider_images_path = args.wider_images_path + wider_annotfile = args.wider_annotfile + + # Prepare images + print('') + print('Prescaling and Center-cropping original images to 512 x 512') + preProcessImages(wider_images_path) + print('\n'*2) + + # Convert original wider annotations in to COCO format + print('Converting original annotations to COCO format') + orgCOCOAnnotFile = convWidertoCOCO(wider_annotfile, wider_images_path) + print('\n'*2) + + # Prescale/Center-crop annotations and save + print('Prescaling/Center-cropping original annotations in COCO format') + transform = transforms.Compose([transforms.CenterCrop(512), transforms.ToTensor()]) + dataset = CocoDetection(root=wider_images_path, annFile=orgCOCOAnnotFile, transform=transform,scaleImgforCrop= 512) + targetFileName = 'instances_val.json' + dataset.createResizedAnnotJson(targetFileName=targetFileName) + os.remove(orgCOCOAnnotFile) + +if __name__ == '__main__': + main() diff --git a/experiments/demo.gif b/experiments/demo.gif new file mode 100644 index 0000000000000000000000000000000000000000..50cfb7e24c25c4f88e2109f234d55120d24a7c5b --- /dev/null +++ b/experiments/demo.gif @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cc98d6944f535c644d9fe6b4a7e3d22a203489c1fef75e85b64cc99f3a59e48 +size 8459257 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..19afbde7d48b65c1d443ce3fea18705f2461b7fb --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5214c8aa37e49a20b98c22d4b17ceeeaadec1974ae566b4dbb9f8a103485167 +size 941358 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..07ce0c7c545e9c191b9befbf5d25a48745fe4d66 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed0308109e75b636cdbad9ea6863fe06445ac336b2b86f3ab2d8d157021c788 +size 937113 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1c22c284408bd31d63c2305f444163f7b639f692 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843fcc11f1f35c812db0c75fa6637eae87d16000d331a077495da363d5a8a244 +size 668232 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b684a672494563a653a1f5e68aa04eb257f35c2b --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b5fe7ad550364580d28517c2226ee149e75ed56ae9efb1264f9f40f45267b9 +size 964775 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cadebdb985b433e86165048975246768a7cee20b --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46bc37c1ec343c694b8327fa9bcf82935422e38231a68d87832af1881925ca0 +size 894905 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f3f128ba78e937e6d80ecb38e79ae88b5761c2ab --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfb1e3c6bd982c1d221785f05018a1b3c5511d7347fdfb8c645ebf5a74f975e +size 925558 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..63a2504210d488003e9c2570f1f0220d1ce7ebb6 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46114cd41e17b38b16029e901003a5646bf7a62978c2e02e70d09e1c4fc0b58c +size 657766 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bf4e148bad89f3434485c29ec72705a2858e07fa --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de72831ad376a6e91cd63daf1bfa22cba8193d13ead6bcda0770ae38e203221e +size 973121 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..13e9e8f94e3c20c136675ced857fd53ff078aab1 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a1ded015ffad9c4bcafa073984240d13d0d7f2eda74721746e192992d562e1 +size 628842 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1f678d29bf81b616185fcf678bd4493ab206391e --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f78fb05300bf05aa7e09c03628a44dd01a91f1c6d3c79b210cd3f545142741 +size 615815 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0beab12c0cfb06928ee891d7bbd190b6775397b0 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334d7493919eb952531f4278f1dde290d54562bad842a4093e8af8f35d5f780e +size 780463 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..370e2b5810c103befd810c816731b69057c8bcd9 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da6235c5e505373cdbe6ce492b9d1f498f95725f05790f1b8dfaacc11fe2f18 +size 910642 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a8759c6fada6d09efd661c56a50a53adac560daa --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab4c570971eefac541e8bba478189b6286f898f5b1e869f3ce6a5934162325c +size 628450 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..893cd2982f1a9bd91f4f5dd28505b2eb1b291251 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aee563aad55618885ad4f9913f09ab4985464c1be2ffe4ba7c23737c88fcdb0 +size 615553 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3c434374fbd2b3f5da04d630941e535e211facb2 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0611c7642a0c6a725ffc2b7b4635b0e9881440ed61fc84bc54d6059fd7281fc3 +size 775085 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7baeeac0a28df6af421e949d8e9e6d000354c8ad --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3509f5253d032c1e0cbd828ce2fa8d8a22055338d674e825e26029984442e29 +size 914254 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/fpt_experiment.pth.tar b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/fpt_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..4d26f5cbec956e3ba9d36e65df66d2cd8a2d715a --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/fpt_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b1b6622d7a390468d66c9629e562cf1f3dba0bf2d582aee64f5fc8c44eb20f +size 7807691 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/hardware_experiment.pth.tar b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/hardware_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..5f8fd82ef6d8af41bbbbd7c702f6784495c478ec --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/hardware_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4712fd8026f221b6a40f464438821858389a7a9d021da6c4f2ddb881d7a695e +size 7481103 diff --git a/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/training_experiment.pth.tar b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/training_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..0f222489904703b8a6196c45c74ad0d603a3a635 --- /dev/null +++ b/experiments/shallow800_mnv2ssdlite_8b_retrain_wider_optimizedAnchors_annealedlr_0_95/training_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79dbbd0f1849c213da61f71985dce57a88b7cd03881cd093a44b3daab61902f4 +size 7480143 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7b7571e5b9cae850bf597ae5d646a23720978bb7 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adb74b01be14c43fd3dff764c96fe82974da415f81ca3737da72a149d7ebfd18 +size 795136 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cdb285bc6bbf295e20e486aaf78b382a36e6e865 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c0986856ad0baa2b34fbe8c38dc3c8fc2e92006a800c181889de00aefe646b +size 822785 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..43551b6d19e5027346c7631317aa3de37ada924d --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:715a53f029e7d8cdf3e2a69b27e27e5d946882fb83a3efe9093bb9a54b7f3d08 +size 635716 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7ab72bcae9456c678470d0fe856bb010d2a0f33e --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_fpt_cons_train_experiment/fpt_cons_train_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1eda6510409be4e88c97be30d024cd34d8c6a00ba73589f692a26b071371ee +size 865584 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b838ea3bc8ca58b22c43e05f54b78be369bc887e --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04719f730366648080e41b5666056a810179cda73fdef8b00e642df7b6015761 +size 800077 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0820ac28a49e2470eeb7e5d843b23043831971b9 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6c26a71c6602b1de7742603af34ad98768c1f94750448d8b8ca89aa84c4b5c +size 812743 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..94eef4fbcf853aa56df93835727ac48be2b34777 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9b0cfb1d96ae59515a0d19a7649cf65801207b9d355f58d43165b3d63de4bf +size 634754 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7166a2391bd2d5b98c97330bd320a10e5c18ed50 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_original_experiment/original_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb246fc5fb1d70ad2b2279404127d3fd90878c6cd2d0e79c3966d7f30e0feb8e +size 864321 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..82eed64ded04426724a813c89de6ebd08eca81f2 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8dd7e51cf04c0e608ed495c5da99e327280b68ded9aa8890a6f7838af05c347 +size 605282 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e87df0424e0f57da157dd6a1e68cc5bef0e1a47d --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9977cdc374934d6add8198820dd7ba3df0fa136ae4a537651ad61462034e24 +size 591622 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cbd148a595f91dbfc2359177307ff7c39c0f1eb6 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53879217a0e40715cb79319eb915939d20811146edca95979236bd0fce15aca0 +size 713544 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..939d37ab064d7f528dc62d989e80250cd38b2ff2 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_after_trained_experiment/qat_after_trained_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7330ec7bdf1faf0f7b760060cca65ec7a4944aea960f8120f250d83259978ced +size 811212 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e8aa2f7cfaaa354da9b8ee864d2f05cec67fbc0d --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36eeeabe890e0d82356f2a923a480ba5fc1c596ed0c2bb23555a2d9f6584729b +size 604890 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..47469fd5faa1383e1329acbae892452767630908 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_bn_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0daf1a49a8cfa40c6fd46de39cc98d1361ecdb32b8ba89610aae1c0d8653fc0 +size 590882 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg new file mode 100644 index 0000000000000000000000000000000000000000..419a6ed4e071f4f69dff04f33f052c149ac9e457 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_biaseses.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0c802bb2fb087cd6d00f9879f230dabbd9c30e9c45b23ec2546460252efdd8 +size 717498 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg new file mode 100644 index 0000000000000000000000000000000000000000..475489b0685d700e88f50455b7a1b5a47b9213af --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/Histograms/histograms_qat_pre_trained_experiment/qat_pre_trained_layer_weights.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff8850b9c056ddb1b32024c754a059684b0ebe4695a00a41e15d1da39320011f +size 806435 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/fpt_experiment.pth.tar b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/fpt_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..836b15434a39936761fcaf95baed4b0722933aba --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/fpt_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6629e998ec6a9a6482aeaa386370e25b26b3fae0206d7073401581dbae708a9 +size 5433449 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/hardware_experiment.pth.tar b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/hardware_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..cea616585a7eefa589665bfe3023d4ba1c9a8e32 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/hardware_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb91a81fcd0442b75e893822ffa7602eddbe9a7002b8c8062522d87986720e5 +size 5203607 diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/modified_model.py b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/modified_model.py new file mode 100644 index 0000000000000000000000000000000000000000..79b60fa5e7b0c7a7a247a4f5af808dba09a5e872 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/modified_model.py @@ -0,0 +1,147 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch.nn as nn +import qat_core.layers as layers + +class mnv2_SSDlite(nn.Module): + def __init__(self, in_channels=3, n_classes=1): + super(mnv2_SSDlite, self).__init__() + + self.conv1 = layers.conv(C_in_channels=in_channels, D_out_channels=32, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + + self.epw_conv2 = layers.conv(C_in_channels=32, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv2 = layers.conv(C_in_channels=32, D_out_channels=32, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=32, quantization_mode = 'fpt_unconstrained') + self.ppw_conv2 = layers.conv(C_in_channels=32, D_out_channels=16, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv3 = layers.conv(C_in_channels=16, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv3 = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.ppw_conv3 = layers.conv(C_in_channels=96, D_out_channels=24, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv4 = layers.conv(C_in_channels=24, D_out_channels=144, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv4 = layers.conv(C_in_channels=144, D_out_channels=144, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=144, quantization_mode = 'fpt_unconstrained') + self.ppw_conv4 = layers.conv(C_in_channels=144, D_out_channels=24, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv5 = layers.conv(C_in_channels=24, D_out_channels=144, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv5 = layers.conv(C_in_channels=144, D_out_channels=144, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=144, quantization_mode = 'fpt_unconstrained') + self.ppw_conv5 = layers.conv(C_in_channels=144, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv7 = layers.conv(C_in_channels=32, D_out_channels=192, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv7 = layers.conv(C_in_channels=192, D_out_channels=192, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=192, quantization_mode = 'fpt_unconstrained') + self.ppw_conv7 = layers.conv(C_in_channels=192, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv8 = layers.conv(C_in_channels=32, D_out_channels=192, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv8 = layers.conv(C_in_channels=192, D_out_channels=192, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=192, quantization_mode = 'fpt_unconstrained') + self.ppw_conv8 = layers.conv(C_in_channels=192, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv11 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv11 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv11 = layers.conv(C_in_channels=384, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv12 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv12 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv12 = layers.conv(C_in_channels=384, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv14 = layers.conv(C_in_channels=96, D_out_channels=576, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv14 = layers.conv(C_in_channels=576, D_out_channels=576, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=576, quantization_mode = 'fpt_unconstrained') + self.ppw_conv14 = layers.conv(C_in_channels=576, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') #ilk çıkış: torch.Size([2, 96, /16, /16]) + + self.epw_conv15 = layers.conv(C_in_channels=96, D_out_channels=576, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv15 = layers.conv(C_in_channels=576, D_out_channels=576, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=576, quantization_mode = 'fpt_unconstrained') + self.ppw_conv15 = layers.conv(C_in_channels=576, D_out_channels=160, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv17 = layers.conv(C_in_channels=160, D_out_channels=960, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv17 = layers.conv(C_in_channels=960, D_out_channels=960, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=960, quantization_mode = 'fpt_unconstrained') + self.ppw_conv17 = layers.conv(C_in_channels=960, D_out_channels=160, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv18 = layers.conv(C_in_channels=160, D_out_channels=960, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv18 = layers.conv(C_in_channels=960, D_out_channels=960, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=960, quantization_mode = 'fpt_unconstrained') + self.ppw_conv18 = layers.conv(C_in_channels=960, D_out_channels=320, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') #ikinci çıkış: torch.Size([2, 320, /32, /32]) + + self.head1_dw_classification = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.head1_pw_classification = layers.conv(C_in_channels=96, D_out_channels=8, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + self.head1_dw_regression = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.head1_pw_regression = layers.conv(C_in_channels=96, D_out_channels=16, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + + self.head2_dw_classification = layers.conv(C_in_channels=320, D_out_channels=320, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=320, quantization_mode = 'fpt_unconstrained') + self.head2_pw_classification = layers.conv(C_in_channels=320, D_out_channels=10, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + self.head2_dw_regression = layers.conv(C_in_channels=320, D_out_channels=320, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=320, quantization_mode = 'fpt_unconstrained') + self.head2_pw_regression = layers.conv(C_in_channels=320, D_out_channels=20, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + + self.add_residual = layers.add_residual(quantization_mode = 'fpt_unconstrained') + + + def forward(self, x): + x = self.conv1(x) + + x = self.epw_conv2(x) + x = self.dw_conv2(x) + x = self.ppw_conv2(x) + + x = self.epw_conv3(x) + x = self.dw_conv3(x) + x = self.ppw_conv3(x) + res4 = x + x = self.epw_conv4(x) + x = self.dw_conv4(x) + x = self.ppw_conv4(x) + x = self.add_residual(x,res4) + + x = self.epw_conv5(x) + x = self.dw_conv5(x) + x = self.ppw_conv5(x) + + res7 = x + x = self.epw_conv7(x) + x = self.dw_conv7(x) + x = self.ppw_conv7(x) + x = self.add_residual(x,res7) + + x = self.epw_conv8(x) + x = self.dw_conv8(x) + x = self.ppw_conv8(x) + + res11 = x + x = self.epw_conv11(x) + x = self.dw_conv11(x) + x = self.ppw_conv11(x) + x = self.add_residual(x,res11) + + x = self.epw_conv12(x) + x = self.dw_conv12(x) + x = self.ppw_conv12(x) + + res14 = x + x = self.epw_conv14(x) + x = self.dw_conv14(x) + x = self.ppw_conv14(x) + x = self.add_residual(x,res14) + output1 = x + + x = self.epw_conv15(x) + x = self.dw_conv15(x) + x = self.ppw_conv15(x) + + res17 = x + x = self.epw_conv17(x) + x = self.dw_conv17(x) + x = self.ppw_conv17(x) + x = self.add_residual(x,res17) + + x = self.epw_conv18(x) + x = self.dw_conv18(x) + x = self.ppw_conv18(x) + output2 = x + + output1_class = self.head1_dw_classification(output1) + output1_class = self.head1_pw_classification(output1_class) + output1_reg = self.head1_dw_regression(output1) + output1_reg = self.head1_pw_regression(output1_reg) + + output2_class = self.head2_dw_classification(output2) + output2_class = self.head2_pw_classification(output2_class) + output2_reg = self.head2_dw_regression(output2) + output2_reg = self.head2_pw_regression(output2_reg) + + #print(f"Output1 Regression: {output1_reg.shape}, Output1 Classification: {output1_class.shape}\nOutput2 Regression: {output2_reg.shape}, Output2 Classification: {output2_class.shape}") + return (output1_reg, output1_class, output2_reg, output2_class) \ No newline at end of file diff --git a/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/training_experiment.pth.tar b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/training_experiment.pth.tar new file mode 100644 index 0000000000000000000000000000000000000000..440cf4e9a4f93fcded321b6a6bc69e702b9f2cd8 --- /dev/null +++ b/experiments/shallow830_mnv2ssdlite_2b8b_wider_somelayers_excluded_annealedlr_0_95/training_experiment.pth.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f990cf40e986094eb4695dfd3b310ff358ffffaa50ff755a8ab71b5f3e9fde +size 5202903 diff --git a/library/SSDLosses.py b/library/SSDLosses.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9def336df172c94516a432436b859fd8329aec --- /dev/null +++ b/library/SSDLosses.py @@ -0,0 +1,211 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch +import torch.nn as nn +from torch.autograd import Variable +from library.ssd import jaccard, intersect +import numpy as np + +class SSDSingleClassLoss(nn.Module): + """SSD Loss Function + Compute Targets: + 1) Produce indices for positive matches by matching ground truth boxes + with (default) 'priorboxes' that have jaccard index > threshold parameter + (default threshold: 0.5). + + 2) Calculates location and confidence loss for positive matches + + 3) Hard negative mining to filter the excessive number of negative examples + that comes with using a large number of default bounding boxes. + - Negative match background CFs are sorted in ascending order (less confident pred. first) + - If Positive match exists + - Nneg is calculated by Mining_Neg2PosRatio * Npos, clipped below with min_NegMiningSample + - Smallest Nneg background CFs are selected, CF's above maxBackroundCFforLossCalc are ommitted and used in loss calc + - If there is no positive match, min_NegMiningSample less confident background CFs are taken in to loss + + Objective Loss: + L(x,c,l,g) = [(LconfPosMatch(x, c)) / Npos] + + [(λ * LconfNegMatch(x, c)) / Nneg] + [(α*Lloc(x,l,g)) / Npos] + + + Where, LconfPosMatch is the log softmax person class conf loss of positive matched boxes, + LconfNegMatch is the log softmax background class conf loss of negative matched boxes, + Lloc is the SmoothL1 Loss weighted by α which is set to 1 by cross val for original multiclass SSD. + + Args: + c: class confidences, + l: predicted boxes, + g: ground truth boxes + Npos: number of matched default boxes + Neg: number of negative matches used in loss function after negative mining + x: positive match selector + """ + + def __init__(self, Anchor_box_wh, Anchor_box_xy, alpha = 1, Jaccardtreshold = 0.5, + Mining_Neg2PosRatio = 6, min_NegMiningSample = 10, maxBackroundCFforLossCalc = 0.5, negConfLosslambda = 1.0, + regularizedLayers = None): + ''' + Args: + Anchor_box_wh: (tensor) Anchor boxes (cx,cy, w, h) form in original image, Shape: [numPreds=5376,4] + Anchor_box_xy: (tensor) Anchor boxes (cxmin,cymin, xmax, ymax) form in original image, Shape: [numPreds=5376,4] + ''' + + super(SSDSingleClassLoss, self).__init__() + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.device = device + self.Anchor_box_wh = Anchor_box_wh + self.Anchor_box_xy = Anchor_box_xy + self.alpha = alpha + self.Jaccardtreshold = Jaccardtreshold + + self.Mining_Neg2PosRatio = Mining_Neg2PosRatio + self.min_NegMiningSample = min_NegMiningSample + self.maxBackroundCFforLossCalc = maxBackroundCFforLossCalc + self.negConfLosslambda = negConfLosslambda + + self.regularizedLayers = regularizedLayers + + # application specific variances for SSD + self.var_x = 0.1 + self.var_y = 0.1 + self.var_w = 0.2 + self.var_h = 0.2 + + + def forward(self, pred_box_delt, pred_CF ,GT_box_wh, model= None): + """Multibox Loss + Args: + pred_box_delt : (tensor) Location predictions in delta form (dcx, dcy, dw, dh), shape[numPreds=5376,4] + pred_CF : (tensor) Confidence predictions (person, nonperson), shape[numPreds=5376,2] + GT_box_wh : (tensor) Ground truth boxes in (xmin, ymin, w, h) form, shape [numObjects, 4] + """ + + + device =self.device + alpha = self.alpha + Jaccardtreshold = self.Jaccardtreshold + Mining_Neg2PosRatio = self.Mining_Neg2PosRatio + min_NegMiningSample = self.min_NegMiningSample + maxBackroundCFforLossCalc = self.maxBackroundCFforLossCalc + negConfLosslambda = self.negConfLosslambda + + reg = torch.tensor(.0).to(device) + + if (len(GT_box_wh)==0): # if there is no labeled person in original image, set location loss to 0 + loss_l = torch.tensor([.0]) + num_pos = 0 + else: + GT_box_wh = GT_box_wh[:,1:] # first element of GT_box is label of picture, it is deleted + + # GT_box_cxcy_wh: GT boxes in (cx, cy, w, h) form, used in ghat calculation + GT_box_cxcy_wh = GT_box_wh.clone().to(device) + GT_box_cxcy_wh[:,0] = GT_box_wh[:,0]+GT_box_wh[:,2]/2 + GT_box_cxcy_wh[:,1] = GT_box_wh[:,1]+GT_box_wh[:,3]/2 + + # GT_box_xy: GT boxes in (xmin, ymin, xmax, ymax) form, used in Jaccard for positive match check + GT_box_xy = GT_box_wh.detach().clone().to(device) + GT_box_xy[:,2] = GT_box_wh[:,2] + GT_box_wh[:,0] + GT_box_xy[:,3] = GT_box_wh[:,3] + GT_box_wh[:,1] + + # Calculate Loss + JaccardIndices = jaccard(self.Anchor_box_xy,GT_box_xy) + posMatches = torch.nonzero(JaccardIndices >= Jaccardtreshold) + negMatchAnchIdx = torch.nonzero(JaccardIndices.max(dim=1).values < Jaccardtreshold).flatten() + + # posMatches: tensor[numpreds=5376,2], shows the matches anchor boxes to GT boxes, + # first column: ID of matched anchor, second column: ID of GT box + posMatchAnchIdx = posMatches[:,0] + posMatchGTIdx = posMatches[:,1] + + pred_backGrCF = pred_CF[:,1] + negMatch_pred_backGrCF = pred_backGrCF[negMatchAnchIdx] + + + posMatchAnchs = self.Anchor_box_wh[posMatchAnchIdx] + num_pos = posMatches.shape[0] + + if num_pos: + posMatch_pred_box_delt = pred_box_delt[posMatchAnchIdx] + posMatch_pred_CF = pred_CF[posMatchAnchIdx][:,0] +# print(f'posMatch_pred_CF: {posMatch_pred_CF}') + posMatchGTs = GT_box_cxcy_wh[posMatchGTIdx] + + + # Calculate g_hat + ghat_cx = (posMatchGTs[:,0]-posMatchAnchs[:,0])/posMatchAnchs[:,2]/self.var_x + ghat_cy = (posMatchGTs[:,1]-posMatchAnchs[:,1])/posMatchAnchs[:,3]/self.var_y + ghat_w = torch.log(posMatchGTs[:,2]/posMatchAnchs[:,2])/self.var_w + ghat_h = torch.log(posMatchGTs[:,3]/posMatchAnchs[:,3])/self.var_h + ghat = torch.cat((ghat_cx.unsqueeze(1), ghat_cy.unsqueeze(1), ghat_w.unsqueeze(1), ghat_h.unsqueeze(1)),dim=1) + + # Calculate location loss + smoothL1 = torch.nn.SmoothL1Loss(reduction='sum', beta=1.0).to(device) + ghat_1D = ghat.view(1,-1) + posMatch_pred_box_delt_1D = posMatch_pred_box_delt.view(1,-1) + loc_loss = smoothL1(posMatch_pred_box_delt_1D, ghat_1D) + + # Calculate conf loss for positive matches + posMatch_CF_loss = -torch.log(posMatch_pred_CF).sum() +# print(f'posMatch_CF_loss: {posMatch_CF_loss}') + + # Hard negative mining + negMatch_pred_backGrCF,_=negMatch_pred_backGrCF.sort(0, descending=False) + + # set hard negative mining sample num + # clamp number of negtive samples with min_NegMiningSample below, Neg2Pos Ratio x numPositive number above + num_hardmined_negative = int(np.max([num_pos*Mining_Neg2PosRatio,min_NegMiningSample])) + num_hardmined_negative = int(np.min([num_hardmined_negative, negMatch_pred_backGrCF.shape[0]])) + negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF[0:num_hardmined_negative] + # select low confidence backround CFs + negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF_mined[negMatch_pred_backGrCF_mined predConfPlotTreshold): + upper_left_x = box[0]; + upper_left_y = box[1]; + ww = box[2] - box[0] + hh = box[3] - box[1] + + conf = "{:.3f}".format(confs_to_draw[ii]) + + if not saveFig: + print(f'Conf{ii} : {confs_to_draw[ii]}') + + plt.text(upper_left_x,upper_left_y-5, conf, fontsize = 12,color= color) + rect = patches.Rectangle( + (upper_left_x, upper_left_y), + ww, hh, + linewidth=2, + edgecolor=color, + facecolor="none", + ) + ax.add_patch(rect); + + + if saveFig: + trainingOutpDir = os.path.join(".","Training Outputs") + saveDir = os.path.join(trainingOutpDir,folderName) + + if not (os.path.isdir(trainingOutpDir)): + os.mkdir(trainingOutpDir) + + if not (os.path.isdir(saveDir)): + os.mkdir(saveDir) + + if (imageID == None): + imageID = 'NA' + else: + imageID = str(int(imageID)) + + + imageName = folderName+"_ImgId_"+imageID+".png" + imageDir = os.path.join(saveDir, imageName) + plt.savefig(imageDir) + plt.close('all') + plt.cla() + + else: + plt.show() + plt.close('all') + + +def generateAnchorsInOrigImage(anchors,headgridSize,originalPicSize=512): + ''' + Prepares anchor tensors in original image. + + E.g. If there are 4 anchors for the prediction head, + 4 anchor positions in original image are calculated for (x=0, y=0),(x=1, y=0)... feature grid, and written + one under the other to anchorsInOrig + + Args: + anchors : (tuple) Tuple of anchor boxes in Tensor w,h form Tuple(Shape[numAnchors,2]) + headgridSize : Prediction head grid size, 16 or 32 for mobilenet + originalPicSize : original image size + + Return: + anchorsInOrig : Tensor shape[#ofboxes*head width size*head height size,4], anchors are written in (cx, cy, w, h) form + ''' + scale = originalPicSize/headgridSize + anchorsInOrig = torch.zeros([len(anchors)*headgridSize*headgridSize,4]) + numOfAnchorBox = len(anchors) + for i in range(headgridSize): + for j in range(headgridSize): + for k in range(len(anchors)): + cx = j*scale + (scale+1)/2 + cy = i*scale + (scale+1)/2 + w, h = anchors[k] + tempAnch = torch.tensor([cx,cy,w,h]) + anchorsInOrig[i*headgridSize*numOfAnchorBox + j*numOfAnchorBox + k,:]=tempAnch + +# anchorsInOrig.requires_grad_(True) # does no effect result + return anchorsInOrig + + +def prepareHeadDataforLoss(HeadBB,HeadConf): + ''' + Prepares prediction head tensors for loss calculation + + E.g. If there are 4 BBs for the prediction head, + 4 BB positions in delta form are written one under the other, for (x=0, y=0),(x=1, y=0)... of feature grid and returned + + Args: + HeadBB : (tensor) Location head of the layer Shape[numofAncBoxesperCell * 4, head width, head height ] + Boxes -> [dcx, dcy, dw, dh ] + HeadConf : (tensor) Confidence head of the layer Shape[numofAncBoxesperCell * 2, head width, head height ] + Confidences -> (p(person), p(background)) + + Return: + BBs : (tensor) Predicted bounding boxes are written in delta form (dcx, dcy, dw, dh) + shape[numofAncBoxesperCell * head width * head height ,4] -> shape[4096,4] for 32x32 head + + CFs : (tensor) Class confidences are written in (p(person), p(background)) + shape[#ofPredperFeatureCell * head width * head height ,2] -> shape[4096,2] for 32x32 head + ''' + width = HeadBB.shape[1] + height = HeadBB.shape[2] + + numOfAnchorBox = int(HeadBB.shape[0]/4) + BBs = torch.zeros([width*height*numOfAnchorBox,4]).to(device) + CFs = torch.zeros([width*height*numOfAnchorBox,2]).to(device) + for i in range(width): + for j in range(height): + for k in range(numOfAnchorBox): + BBs[i*height*numOfAnchorBox + j*numOfAnchorBox + k,:] = HeadBB[k*4:k*4+4,i,j] + CFs[i*height*numOfAnchorBox + j*numOfAnchorBox + k,:] = HeadConf[k*2:k*2+2,i,j] + + return BBs, CFs + + +def prepareHeadDataforLoss_fast(HeadBB,HeadConf): + ''' + Same function with prepareHeadDataforLoss(), but blackbox faster implementation. + See details in prepareHeadDataforLoss() + ''' + + BBs = HeadBB.squeeze(0) + BBs = BBs.permute((1,2,0)) + BBs = BBs.contiguous().view(-1,4) + + CFs = HeadConf.squeeze(0) + CFs = CFs.permute((1,2,0)) + CFs = CFs.contiguous().view(-1,2) + return BBs, CFs + + +# https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py +def point_form(boxes): + """ Convert box in form (cx, cy, w, h) to (xmin, ymin, xmax, ymax) + representation for comparison to point form ground truth data. + Args: + boxes: (tensor) boxes in (cx, cy, w, h) form + Return: + boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes. + """ + return torch.cat((boxes[:, :2] - boxes[:, 2:]/2, # xmin, ymin + boxes[:, :2] + boxes[:, 2:]/2), 1) # xmax, ymax + +# https://github.com/amdegroot/ssd.pytorch/blob/master/layers/box_utils.py +def intersect(box_a, box_b): + """ We resize both tensors to [A,B,2] without new malloc: + [A,2] -> [A,1,2] -> [A,B,2] + [B,2] -> [1,B,2] -> [A,B,2] + Then we compute the area of intersect between box_a and box_b. + Args: + box_a: (tensor) bounding boxes, Shape: [A,4]. xmin, ymin, xmax, ymax form + box_b: (tensor) bounding boxes, Shape: [B,4]. + Return: + (tensor) intersection area, Shape: [A,B]. + """ + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + A = box_a.size(0) + B = box_b.size(0) + box_a = box_a.to(device) + box_b = box_b.to(device) + max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), + box_b[:, 2:].unsqueeze(0).expand(A, B, 2)) + min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), + box_b[:, :2].unsqueeze(0).expand(A, B, 2)) + inter = torch.clamp((max_xy - min_xy), min=0) + return inter[:, :, 0] * inter[:, :, 1] + +def jaccard(box_a, box_b): + """Compute the jaccard overlap of two sets of boxes. The jaccard overlap + is simply the intersection over union of two boxes. Here we operate on + ground truth boxes and default boxes. + E.g.: + A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B) + Args: + box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4] + box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4] + Return: + jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)] + """ + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + inter = intersect(box_a, box_b) # boxes are in the form of xmin, ymin, xmax, ymax + area_a = ((box_a[:, 2]-box_a[:, 0]) * + (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B] + area_b = ((box_b[:, 2]-box_b[:, 0]) * + (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B] + + area_a = area_a.to(device) + area_b = area_b.to(device) + union = area_a + area_b - inter + return inter / union # [A,B] + +def collate_fn(batch): + """ + Custom collate function. + Need to create own collate_fn Function for COCO. + Merges a list of samples to form a mini-batch of Tensor(s). + Used when using batched loading from a map-style dataset. + """ + return zip(*batch) + +def sampleRandomPicsFromCOCO_old(train_loader, numtoPlot = 10, PictureSize = 512): + ''' + This function is used to sample random pictures from COCO dataset + + Args: + numtoPlot : number of random pictures to plot from dataset + + Return: + SelectedPics : (tensor) size[numtoPlot, 3, PictureSize, PictureSize] + SelectedTargets: list[(tensor)] list of bounding boxes in COCO format for each picture + + ''' + import random + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + numofbatches = len(train_loader) + batchsize = train_loader.batch_size + randomBatches = random.sample(range(0, numofbatches), numtoPlot) + + selectedTargets = [] + selectedPics = torch.zeros((numtoPlot,3,PictureSize,PictureSize)).to(device) + dataloader_iterator = iter(train_loader) + + + i = 0 + batchnum = 0 + while batchnum < numofbatches: +# print(batchnum) + if batchnum in randomBatches: + data = next(dataloader_iterator) + picnum = random.randrange(0, batchsize, 1) + randomBatches.remove(batchnum) + + imageBatch, targetBatch, picNum = data + image = imageBatch[picnum].unsqueeze(0).clone().to(device) + target = targetBatch[picnum].clone().to(device) + + selectedPics[i,:,:,:] = image + selectedTargets.append(target) + i += 1 + else: + next(dataloader_iterator) + + batchnum += 1 + + if not randomBatches: + break + + return selectedPics, selectedTargets + + +def sampleRandomPicsFromCOCO(dataset, numtoPick = 10, pickSame = False): + ''' + This function is used to sample random pictures from a COCO type dataset + + Args: + dataset: dataset to be sampled + numtoPick : number of random pictures to pick from dataset + pickSame: if it is set to true, + + Return: + SelectedPics : (tensor) size[numtoPlot, 3, PictureSize, PictureSize] + SelectedTargets: list[(tensor)] list of bounding boxes in COCO format for each picture + ''' + + if pickSame: + random.seed(1234) + else: + pass + + random_indices = random.sample(range(len(dataset)), numtoPick) + + rand_sampler = torch.utils.data.SubsetRandomSampler(random_indices) + loader = torch.utils.data.DataLoader(dataset, + sampler=rand_sampler, + batch_size=1, + collate_fn=collate_fn, + drop_last=False) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + selectedTargets = [] + selectedPics = torch.zeros((numtoPick, 3, 512, 512)).to(device) + picIds = [] + + for i, data in enumerate(loader): + imageBatch, targetBatch, picNum = data + image = imageBatch[0].unsqueeze(0).to(device) + target = targetBatch[0].to(device) + + selectedPics[i,:,:,:] = image + selectedTargets.append(target) + picIds.append(picNum[0]) + + return selectedPics, selectedTargets, picIds + + +def saveOutputs(pictures, picIds, targets, preds, anchors_head1, anchors_head2, + savefolderName='UnconstFPT', + nmsIoUTreshold = 0.45, predConfPlotTreshold = 0.6, figsize=(8,8)): + ''' + Saves pictures,ground truths and model predictions under specified folder + ''' + predsPostProcess = PredsPostProcess(512, anchors_head1, anchors_head2) + + image_width = pictures.shape[2] + image_height = pictures.shape[3] + + BBs1 = preds[0].clone() + CFs1 = preds[1].clone() + BBs2 = preds[2].clone() + CFs2 = preds[3].clone() + + for imgNum in tqdm(range(0,pictures.shape[0])): + + img = pictures[imgNum,:,:,:].clone() + target = targets[imgNum].clone() + pred = (BBs1[imgNum,:,:,:].unsqueeze(0), CFs1[imgNum,:,:,:].unsqueeze(0), + BBs2[imgNum,:,:,:].unsqueeze(0), CFs2[imgNum,:,:,:].unsqueeze(0)) + id = picIds[imgNum] + + absolute_boxes,person_cls = predsPostProcess.getPredsInOriginal(pred) + + plot_image_mnv2_2xSSDlite(img, pred_person_cls = person_cls, pred_absolute_boxes = absolute_boxes, color = 'r' + ,nmsIoUTreshold = nmsIoUTreshold, predConfPlotTreshold = predConfPlotTreshold, + target=target, figsize=figsize, + saveFig=True, imageID= id, folderName = savefolderName) + +class PredsPostProcess: + ''' + Class to convert mobilenet SSD heads to real image coordinates in form [xmin, ymin, xmax, ymax] + + ''' + def __init__(self, image_width, anchors_head1, anchors_head2): + Head1AnchorsForLoss = generateAnchorsInOrigImage(anchors_head1,headgridSize=32,originalPicSize=image_width) + Head2AnchorsForLoss = generateAnchorsInOrigImage(anchors_head2,headgridSize=16,originalPicSize=image_width) + AnchorsFlatten_wh = torch.cat((Head1AnchorsForLoss,Head2AnchorsForLoss),0) # shape[32x32x4+16x16x5, 4] + # boxes in form[cx, cy, w, h] + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + AnchorsFlatten_wh = AnchorsFlatten_wh.to(device) + self.AnchorsFlatten_wh = AnchorsFlatten_wh + self.softmax_fcn = torch.nn.Softmax(dim=1).to(device) + self.var_x = 0.1 + self.var_y = 0.1 + self.var_w = 0.2 + self.var_h = 0.2 + + def getPredsInOriginal(self,preds): + ''' + Args: + preds: Prediction heads, i.e output of mobilenet model() + + Return: + absolute_boxes: 32 * 32 *4 + 16 * 16 * 5 = 5376 pred BB's in form [imagenum, xmin, ymin, xmax, ymax] + (tensor) [5376, 5] + person cls: Person classification heads, (tensor) [5376,1] + + ''' + AnchorsFlatten_wh = self.AnchorsFlatten_wh + BBhires, CFhires = prepareHeadDataforLoss_fast(preds[0].data,preds[1].data) + BBlores, CFlores = prepareHeadDataforLoss_fast(preds[2].data,preds[3].data) + + + cls = torch.cat(( CFhires, CFlores)) + cls = self.softmax_fcn(cls) + person_cls =cls[:,0] + + delta_boxes_wh = torch.cat(( BBhires, BBlores)) + + pred_cx = delta_boxes_wh[:,0]*self.var_x*self.AnchorsFlatten_wh[:,2] + self.AnchorsFlatten_wh[:,0] + pred_cy = delta_boxes_wh[:,1]*self.var_y*self.AnchorsFlatten_wh[:,3] + self.AnchorsFlatten_wh[:,1] + pred_w = (delta_boxes_wh[:,2]*self.var_w).exp()*self.AnchorsFlatten_wh[:,2] + pred_h = (delta_boxes_wh[:,3]*self.var_h).exp()*self.AnchorsFlatten_wh[:,3] + + absolute_xleft = pred_cx - pred_w/2 + absolute_ytop = pred_cy - pred_h/2 + absolute_xright = pred_cx + pred_w/2 + absolute_ybottom = pred_cy + pred_h/2 + + absolute_boxes = torch.cat((absolute_xleft.view(-1,1), absolute_ytop.view(-1,1), absolute_xright.view(-1,1), absolute_ybottom.view(-1,1)), dim=1) + + return absolute_boxes, person_cls + + +def mAP(cocoGT, cocoDT, imgIDS, catIDS=1, annType="bbox"): + """ + Explanation: This function calculate the mean average precision for given + ground truths and detection results. Default category and + annotation format is set to 'person' and 'bbox' respectively. + This function is based on popular benchmark function "pycocotools" + that is forked 3.3k. Please re-check the iou threshold (parameter iouThrs) + ,which is default '.5:.05:.95', before you run the code. + Arguments: + cocoGT(Json File): Annotated orginal valset of COCO. + cocoDT(Json File): Model Results as format ===> [{"image_id":42, "category_id":18, "bbox":[258.15,41.29,348.26,243.78],"score":0.236}, + {"image_id":73, "category_id":11, "bbox":[61,22.75,504,609.67], "score":0.318}, + ...] + imgIDS(list): list of image IDs. + catIDS(list): list of category ids. Default=1 as person. + annType(String): Annotation type, Default=bbox. Can be ['segm','bbox','keypoints']. + Returns: + None: just results as strings in terminal. + ######################## More Detailed Guideline ######################## + The usage for CocoEval is as follows: # + cocoGt=..., cocoDt=... # load dataset and results # + E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object # + E.params.recThrs = ...; # set parameters as desired # + E.evaluate(); # run per image evaluation # + E.accumulate(); # accumulate per image results # + E.summarize(); # display summary metrics of results # + ######################################################################### + The evaluation parameters are as follows (defaults in brackets): # + imgIds - [all] N img ids to use for evaluation # + catIds - [all] K cat ids to use for evaluation # + iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation # + recThrs - [0:.01:1] R=101 recall thresholds for evaluation # + areaRng - [...] A=4 object area ranges for evaluation # + maxDets - [1 10 100] M=3 thresholds on max detections per image # + iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' # + iouType replaced the now DEPRECATED useSegm parameter. # + useCats - [1] if true use category labels for evaluation # + Note: if useCats=0 category labels are ignored as in proposal scoring. # + Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. # + ######################################################################### + evaluate(): evaluates detections on every image and every category and # + concats the results into the "evalImgs" with fields: # + dtIds - [1xD] id for each of the D detections (dt) # + gtIds - [1xG] id for each of the G ground truths (gt) # + dtMatches - [TxD] matching gt id at each IoU or 0 # + gtMatches - [TxG] matching dt id at each IoU or 0 # + dtScores - [1xD] confidence of each dt # + gtIgnore - [1xG] ignore flag for each gt # + dtIgnore - [TxD] ignore flag for each dt at each IoU # + ######################################################################### + accumulate(): accumulates the per-image, per-category evaluation # + results in "evalImgs" into the dictionary "eval" with fields: # + params - parameters used for evaluation # + date - date evaluation was performed # + counts - [T,R,K,A,M] parameter dimensions (see above) # + precision - [TxRxKxAxM] precision for every evaluation setting # + recall - [TxKxAxM] max recall for every evaluation setting # + Note: precision and recall==-1 for settings with no gt objects. # + ######################################################################### + ***For more details of COCOeval please check: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py + ***If you need an orginal example from API please check: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb + """ + + cocoEval = COCOeval(cocoGT,cocoDT,annType) + cocoEval.params.imgIds = imgIDS + cocoEval.params.catIds = catIDS + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + +def round_floats(o): + ''' + Used to round floats before writing to json form + ''' + if isinstance(o, float): + return round(o, 3) + if isinstance(o, dict): + return {k: round_floats(v) for k, v in o.items()} + if isinstance(o, (list, tuple)): + return [round_floats(x) for x in o] + return o + + +def get_FPnum_per_image(bbox, GT_bbox, min_IoU = 0.5): + ''' Founds the number of False Positives by assocating detection BB's to GT BBs + + Arguments: + ------------- + bbox : list + N x 4 list of detection bounding boxes in xmin, ymin, w, h form + GT_bbox : list + N x 4 list of ground truth bounding boxes in xmin, ymin, w, h form + min_IoU : float [0,1] + Treshold of intersection of union to evaluate detection and GT to be matched, if IoU of Det and GT is below + this value they are automatically marked as unmatched + ''' + + bbox = torch.tensor(bbox) + + # Convert x,y,w,h -> xmin, ymin, xmax, ymax + bbox[:,2] = bbox[:,0] + bbox[:,2] + bbox[:,3] = bbox[:,1] + bbox[:,3] + + GT_bbox[:,2] = GT_bbox[:,0] + GT_bbox[:,2] + GT_bbox[:,3] = GT_bbox[:,1] + GT_bbox[:,3] + + IoUscore = jaccard(GT_bbox, bbox) + + num_det = IoUscore.shape[1] + num_TP = 0 + GT_indexes = [x for x in range(IoUscore.shape[0])] + + # all detections + for det_idx in range(IoUscore.shape[1]): + + max_IoU = min_IoU + max_IoU_gt_id = None + + # all remained unmatched GTs + for i, gt_idx in enumerate(GT_indexes): + currentIoU = IoUscore[gt_idx, det_idx] + if currentIoU > max_IoU: + max_IoU = currentIoU + max_IoU_gt_id = i + + if max_IoU_gt_id is not None: + del GT_indexes[max_IoU_gt_id] # Remove GT from unmatcheds list + num_TP += 1 + + if len(GT_indexes) == 0: + break + + FP_count_image = num_det - num_TP + return FP_count_image + + +def calculatemAP(model, test_loader,cocoGT, ANCHORS_HEAD1, ANCHORS_HEAD2 , PredMinConfTreshold=0.7 , + nmsIoUTreshold = 0.5, mAPOnlyFirstBatch= False, calculate_FP_ratio=False, hardware_mode = False): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + t1 = time.time() + print('mAP calculation started...') + predsPostProcess = PredsPostProcess(512, ANCHORS_HEAD1, ANCHORS_HEAD2) + + dataDictList =[] + imgIDS = [] + model.eval() + + total_GT_count = 0 + total_FP_count = 0 + + with torch.no_grad(): + for i, data in enumerate(tqdm(test_loader)): + + imageBatch, targetBatch , idxBatch = data + + imageStack = torch.stack(imageBatch).detach().to(device) + predBatch = model(imageStack) + + # Outputs are in [-128, 127] in hw mode + if hardware_mode: + BBs1 = predBatch[0].detach() / 128.0 + CFs1 = predBatch[1].detach() / 128.0 + BBs2 = predBatch[2].detach() / 128.0 + CFs2 = predBatch[3].detach() / 128.0 + else: + BBs1 = predBatch[0].detach() + CFs1 = predBatch[1].detach() + BBs2 = predBatch[2].detach() + CFs2 = predBatch[3].detach() + + for imgNum in range(imageStack.shape[0]): + + img = imageStack[imgNum,:,:,:] + target = targetBatch[imgNum] + image_id = int(idxBatch[imgNum]) + imgIDS.append(image_id) + + pred = (BBs1[imgNum,:,:,:].unsqueeze(0), CFs1[imgNum,:,:,:].unsqueeze(0), + BBs2[imgNum,:,:,:].unsqueeze(0), CFs2[imgNum,:,:,:].unsqueeze(0)) + + absolute_boxes, person_cls = predsPostProcess.getPredsInOriginal(pred) + + confidences = person_cls + boxes = absolute_boxes + nms_picks = torchvision.ops.nms(boxes, confidences, nmsIoUTreshold) + boxes_to_draw = boxes[nms_picks] + confs_to_draw = confidences[nms_picks] + + # Predictions filtered by nms and conf tresholding, these will go to mAP + confMask = (confs_to_draw > PredMinConfTreshold) + + # Accumulate total GT bounding box number to calculate total False Positive rate + if calculate_FP_ratio and (target.shape[0] != 0): + GT_bbox = target[:,1:] + total_GT_count += GT_bbox.shape[0] + + # Inputs to mAP algorithm + if (confMask.any()): + + # pred boxes -> [xmin,ymin,xmax,ymax], tensor shape[numpred,4] + bbox = boxes_to_draw[confMask] + # Convert BB to coco annot format -> [xmin,ymin,width, height] + bbox[:,2] = bbox[:,2] - bbox[:,0] + bbox[:,3] = bbox[:,3] - bbox[:,1] + + bbox = bbox.tolist() # pred boxes -> [xmin,ymin,xmax,ymax], shape[numpred,4] + score = confs_to_draw[confMask].tolist() + category_id = np.ones_like(score,dtype=int).tolist() + + for j in range(len(bbox)): + box = {"image_id":image_id, "category_id":category_id[j], "bbox":bbox[j],"score":score[j]} + dataDictList.append(round_floats(box)) + + # If detection exists and false positive ratio calculation is enabled + if calculate_FP_ratio: + # Note that scores are already in descending order thanks to nms operation + # No ground truth, all detections are FP + if GT_bbox.shape[0] == 0: + total_FP_count += len(score) + + # Find false positives + else: + FP_count_image = get_FPnum_per_image(bbox, GT_bbox, min_IoU=0.5) + total_FP_count += FP_count_image + + if mAPOnlyFirstBatch: + break + + if (len(dataDictList)): + # Evavluate and Accumulate mAP for remained baches, if any + cocoDT = json.dumps(dataDictList) + + # Write detections to .json file + with open('cocoDT.json', 'w') as outfile: + outfile.write(cocoDT) + + # Load detections + cocoDT=cocoGT.loadRes('cocoDT.json') + + # running evaluation + annType = 'bbox' + cocoEval = COCOeval(cocoGT,cocoDT,annType) + cocoEval.params.catIds = 1 + cocoEval.params.imgIds = imgIDS + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + # Print False Positive Statistics + if calculate_FP_ratio: + print() + print('********** False Positive Statistics **********') + print(f'Total GT Boxes: {total_GT_count}, Total FPs Boxes: {total_FP_count}, FP% : {total_FP_count/total_GT_count*100}') + print() + + + mean_ap = cocoEval.stats[0].item() + mean_recall = cocoEval.stats[8].item() + + # Delete detection json file created + os.remove("cocoDT.json") + else: + mean_ap = 0 + mean_recall = 0 + t2 = time.time() + print(f'mAP done in : {t2-t1} secs') + return mean_ap, mean_recall + + +def batchNormAdaptation(model, train_loader,numSamples = 100): + ''' + BN parameters of intel model is spoiled intentionally/or unintentionaly before publishing. + Batch norm adaptation routine is proposed before any training based on this model. + https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#batch-norm-statistics-adaptation + #numSamples predictions are made and running mean variance are recalculated for the layers. + ''' + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + print('') + print('Batch norm adaptation before training started.') + + + for i, data in enumerate(train_loader): + + imageBatch, targetBatch, imgIDs = data + + imageStack = torch.stack(imageBatch) + imageStack = imageStack.detach() + imageStack.requires_grad_(False) + imageStack = imageStack.to(device) + + predBatch = model(imageStack) + if (i*len(imgIDs) >= numSamples): + return model + +# Some functions to be used in training phase +def conv_model_fptunc2fpt(model): + layer_str_arr = [attr_name for attr_name in dir(model) if + isinstance(getattr(model, attr_name), qat_core.layers.shallow_base_layer)] + # Convert layers + for layer in layer_str_arr: + layer_attribute = getattr(model, layer) + layer_attribute.mode_fptunconstrained2fpt('fpt') + setattr(model, layer, layer_attribute) + + # Convert add_residual modules. + add_res_attribute = getattr(model, 'add_residual') + add_res_attribute.mode_fptunconstrained2fpt('fpt') + setattr(model, 'add_residual', add_res_attribute) + + return model + + +def conv_model_fpt2qat(model, weight_dictionary, shift_quantile=0.985): + print('Folding BNs and converting to qat mode') + layer_attributes = [] + for layer_string in dir(model): + if(layer_string in weight_dictionary): + layer_attribute = getattr(model, layer_string) + + if layer_attribute.mode == 'fpt': + print('Folding BN for:', layer_string) + weight_bits=weight_dictionary[layer_string] + print(f'Layer bit is : {weight_bits}') + + # For binary weights convert layer in to qat_ap mode + if weight_bits == 1: + print('layer is converted in to qat_ap mode') + layer_attribute.configure_layer_base(weight_bits=2 , bias_bits=8, shift_quantile=shift_quantile) + layer_attribute.mode_fpt2qat('qat_ap'); + # convert other layers in to qat mode + else: + print('layer is converted in to qat mode') + layer_attribute.configure_layer_base(weight_bits=weight_bits , bias_bits=8, shift_quantile=shift_quantile) + layer_attribute.mode_fpt2qat('qat'); + + setattr(model, layer_string, layer_attribute) + print('') + + else: + print('To convert model to QAT mode, all layers must be in fpt mode but, ' + layer_string + 'is in' + layer_attribute.mode +' mode. Exiting...') + sys.exit() + + add_res_attribute = getattr(model, 'add_residual') + if add_res_attribute.mode == 'fpt': + add_res_attribute.mode_fpt2qat('qat') + setattr(model, 'add_residual', add_res_attribute) + else: + print('To convert model to QAT mode, add_residual modüle must be in fpt mode but, it is in ' + add_res_attribute.mode + ' mode. Exiting...') + sys.exit() + + print('********* Converting to qat mode finished *********') + print('') + return model + +def conv_model_qat2hw(model): + print('Converting model to eval/hw mode for testing') + + layer_str_arr = [attr_name for attr_name in dir(model) if + isinstance(getattr(model, attr_name), qat_core.layers.shallow_base_layer)] + + for layer in layer_str_arr: + layer_attribute = getattr(model, layer) + + if layer_attribute.mode == 'qat': + layer_attribute.mode_qat2hw('eval') + setattr(model, layer, layer_attribute) +# print(f'{layer} was in qat converted to eval mode') + elif layer_attribute.mode == 'qat_ap': + layer_attribute.mode_qat_ap2hw('eval') + setattr(model, layer, layer_attribute) +# print(f'{layer} was in qat_ap converted to eval mode') + else: + print('To convert model to hw mode, all layers must be in qat or qat_ap mode but, ' + layer_string + 'is in' + layer_attribute.mode +' mode. Exiting...') + sys.exit() +# print('') + model = model.to(model.conv1.op.weight.device.type) + + # Convert add residual operation in to eval mode + add_res_attribute = getattr(model, 'add_residual') + if add_res_attribute.mode == 'qat': + add_res_attribute.mode_qat2hw('eval') + setattr(model, 'add_residual', add_res_attribute) + else: + print('To convert model to QAT mode, add_residual modüle must be in qat mode but, it is in ' + add_res_attribute.mode + ' mode. Exiting...') + sys.exit() + + print('********* Converting model to eval/hw mode for testing finished *********') + print('') + return model \ No newline at end of file diff --git a/library/trackers/sort_tracker.py b/library/trackers/sort_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..ab8ac8072a4db5d7efdb22dbbb98a34ddf11dffa --- /dev/null +++ b/library/trackers/sort_tracker.py @@ -0,0 +1,283 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +""" + This is a modified version of SORT algorithm, tentative-confirmed + track mechanism, prediction without detection are added. + + SORT: A Simple, Online and Realtime Tracker + Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +from __future__ import print_function + +import os +import numpy as np +import glob +import time +import argparse + +from filterpy.kalman import KalmanFilter + +# np.random.seed(0) + + +def linear_assignment(cost_matrix): + try: + import lap + _, x, y = lap.lapjv(cost_matrix, extend_cost=True) + return np.array([[y[i],i] for i in x if i >= 0]) # + except ImportError: + from scipy.optimize import linear_sum_assignment + x, y = linear_sum_assignment(cost_matrix) + return np.array(list(zip(x, y))) + + +def iou_batch(bb_test, bb_gt): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bb_gt = np.expand_dims(bb_gt, 0) + bb_test = np.expand_dims(bb_test, 1) + + xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0]) + yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) + xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) + yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) + return(o) + + +def convert_bbox_to_z(bbox): + """ + Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form + [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is + the aspect ratio + """ + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w/2. + y = bbox[1] + h/2. + s = w * h #scale is just area + r = w / float(h) + return np.array([x, y, s, r]).reshape((4, 1)) + + +def convert_x_to_bbox(x,score=None): + """ + Takes a bounding box in the centre form [x,y,s,r] and returns it in the form + [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right + """ + w = np.sqrt(x[2] * x[3]) + h = x[2] / w + if(score==None): + return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) + else: + return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) + + +class KalmanBoxTracker(object): + """ + This class represents the internal state of individual tracked objects observed as bbox. + """ + count = 0 + def __init__(self,bbox,hit_to_confirm=1): + """ + Initialises a tracker using initial bounding box. + """ + #define constant velocity model + self.kf = KalmanFilter(dim_x=7, dim_z=4) + self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) + self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) + + self.kf.R[2:,2:] *= 10. + self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities + self.kf.P *= 10. + self.kf.Q[-1,-1] *= 0.01 + self.kf.Q[4:,4:] *= 0.01 + + self.kf.x[:4] = convert_bbox_to_z(bbox) + self.time_since_update = 0 + self.id = KalmanBoxTracker.count + KalmanBoxTracker.count += 1 + self.history = [] + self.hits = 0 + self.hit_streak = 0 + self.age = 0 + self.confirmed = False + self.hit_to_confirm = hit_to_confirm + self.conf = bbox[-1] + + + def update(self,bbox): + """ + Updates the state vector with observed bbox. + """ + self.time_since_update = 0 + self.history = [] + self.hits += 1 + self.hit_streak += 1 + self.conf = bbox[-1] + self.kf.update(convert_bbox_to_z(bbox)) + if not self.confirmed: + if self.hits >= self.hit_to_confirm: + self.confirmed = True +# print(f'track {self.id} confirmed') + + def predict(self): + """ + Advances the state vector and returns the predicted bounding box estimate. + """ + if((self.kf.x[6]+self.kf.x[2])<=0): + self.kf.x[6] *= 0.0 + self.kf.predict() + self.age += 1 + if(self.time_since_update>0): + self.hit_streak = 0 + self.time_since_update += 1 + self.history.append(convert_x_to_bbox(self.kf.x)) + return self.history[-1] + + def get_state(self): + """ + Returns the current bounding box estimate. + """ + return convert_x_to_bbox(self.kf.x) + + +def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3): + """ + Assigns detections to tracked object (both represented as bounding boxes) + Returns 3 lists of matches, unmatched_detections and unmatched_trackers + """ + if(len(trackers)==0): + return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) + + iou_matrix = iou_batch(detections, trackers) + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-iou_matrix) + else: + matched_indices = np.empty(shape=(0,2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if(d not in matched_indices[:,0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if(t not in matched_indices[:,1]): + unmatched_trackers.append(t) + + #filter out matched with low IOU + matches = [] + for m in matched_indices: + if(iou_matrix[m[0], m[1]] miss_to_del): + self.trackers.pop(i-1) + i -= 1 + continue + + if trk.confirmed: + d = trk.get_state()[0] + tr = np.concatenate((d,[trk.id+1])).reshape(1,-1) + ret1.append(tr) + i -= 1 + + + track_confidences = [x.conf for x in self.trackers if x.confirmed] + if(len(ret1)>0): + return np.concatenate(ret1), track_confidences[::-1] + return np.empty((0,5)), np.empty((0,1)) diff --git a/library/trackers/utils.py b/library/trackers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..26518c457df454f36caf28205f8e30ef9e28eded --- /dev/null +++ b/library/trackers/utils.py @@ -0,0 +1,80 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import distinctipy +import random +import cv2 + +N = 36 +# generate N visually distinct colours to mark different tracks +colors = distinctipy.get_colors(N, pastel_factor=0.7,rng=random.seed(6)) + +def draw_BBs(boxes_to_draw, confs_to_draw, frame, IDs = None, colors = colors, show_track_confidences = True): + + image_height, image_width, _ = frame.shape + + for i, conf in enumerate(confs_to_draw): + + if IDs is not None: + # Unique color for each ID + BB_ID = IDs[i] + color = list(colors[BB_ID%36]) + else: + color = colors[1] + + color = [int(x*255) for x in color] + color = tuple(color) + + BB = boxes_to_draw[i,:] + xmin,ymin,xmax,ymax = BB + xmin = max(xmin.item(),0) + ymin = max(ymin.item(),0) + xmax = min(xmax.item(),image_width) + ymax = min(ymax.item(),image_height) + + + topleft = (int(xmin), int(ymin)) + bottomright = (int(xmax), int(ymax)) + + thickness = 2 + cv2.rectangle(frame, topleft, bottomright, color, thickness) + + + # Write text on the frame + label_conf = str(round(conf.item(),3)) + label_conf = 'C:' + label_conf + + x1,y1 = topleft + x2,y2 = bottomright + + if IDs is not None: + # If track BB is to be drawn + label_ID = 'ID:' + str(BB_ID) + if show_track_confidences: + frame = cv2.rectangle(frame, (int(x1-thickness/2), y1 - 25), + (int(x2+thickness/2), y1-1), color, -1) + frame = cv2.putText(frame, label_conf, (x1, y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1) + frame = cv2.putText(frame, label_ID, (x1, y1 - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1) + else: + frame = cv2.rectangle(frame, (int(x1-thickness/2), y1 - 15), + (int(x2+thickness/2), y1-1), color, -1) + frame = cv2.putText(frame, label_ID, (x1, y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1) + else: + frame = cv2.rectangle(frame, (int(x1-thickness/2), y1 - 15), + (int(x2+thickness/2), y1-1), color, -1) + frame = cv2.putText(frame, label_conf, (x1, y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1) + + return frame + + +def project_BBs_to_original_frame(BBs, padding_left, padding_top, scale): + + BBs_new = BBs.clone() + + BBs_new[:,0] = BBs_new[:,0] - padding_left + BBs_new[:,1] = BBs_new[:,1] - padding_top + BBs_new[:,2] = BBs_new[:,2] - padding_left + BBs_new[:,3] = BBs_new[:,3] - padding_top + + return(BBs_new * scale) \ No newline at end of file diff --git a/models.py b/models.py new file mode 100644 index 0000000000000000000000000000000000000000..8fe7611ae2d03be8409d448272973bc18428b33b --- /dev/null +++ b/models.py @@ -0,0 +1,179 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch.nn as nn +import qat_core.layers as layers + +class mnv2_SSDlite(nn.Module): + def __init__(self, in_channels=3, n_classes=1): + super(mnv2_SSDlite, self).__init__() + + self.conv1 = layers.conv(C_in_channels=in_channels, D_out_channels=32, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + + self.epw_conv2 = layers.conv(C_in_channels=32, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv2 = layers.conv(C_in_channels=32, D_out_channels=32, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=32, quantization_mode = 'fpt_unconstrained') + self.ppw_conv2 = layers.conv(C_in_channels=32, D_out_channels=16, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv3 = layers.conv(C_in_channels=16, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv3 = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.ppw_conv3 = layers.conv(C_in_channels=96, D_out_channels=24, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv4 = layers.conv(C_in_channels=24, D_out_channels=144, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv4 = layers.conv(C_in_channels=144, D_out_channels=144, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=144, quantization_mode = 'fpt_unconstrained') + self.ppw_conv4 = layers.conv(C_in_channels=144, D_out_channels=24, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv5 = layers.conv(C_in_channels=24, D_out_channels=144, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv5 = layers.conv(C_in_channels=144, D_out_channels=144, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=144, quantization_mode = 'fpt_unconstrained') + self.ppw_conv5 = layers.conv(C_in_channels=144, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv6 = layers.conv(C_in_channels=32, D_out_channels=192, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv6 = layers.conv(C_in_channels=192, D_out_channels=192, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=192, quantization_mode = 'fpt_unconstrained') + self.ppw_conv6 = layers.conv(C_in_channels=192, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv7 = layers.conv(C_in_channels=32, D_out_channels=192, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv7 = layers.conv(C_in_channels=192, D_out_channels=192, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=192, quantization_mode = 'fpt_unconstrained') + self.ppw_conv7 = layers.conv(C_in_channels=192, D_out_channels=32, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv8 = layers.conv(C_in_channels=32, D_out_channels=192, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv8 = layers.conv(C_in_channels=192, D_out_channels=192, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=192, quantization_mode = 'fpt_unconstrained') + self.ppw_conv8 = layers.conv(C_in_channels=192, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv9 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv9 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv9 = layers.conv(C_in_channels=384, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv10 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv10 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv10 = layers.conv(C_in_channels=384, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv11 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv11 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv11 = layers.conv(C_in_channels=384, D_out_channels=64, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv12 = layers.conv(C_in_channels=64, D_out_channels=384, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv12 = layers.conv(C_in_channels=384, D_out_channels=384, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=384, quantization_mode = 'fpt_unconstrained') + self.ppw_conv12 = layers.conv(C_in_channels=384, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv13 = layers.conv(C_in_channels=96, D_out_channels=576, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv13 = layers.conv(C_in_channels=576, D_out_channels=576, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=576, quantization_mode = 'fpt_unconstrained') + self.ppw_conv13 = layers.conv(C_in_channels=576, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv14 = layers.conv(C_in_channels=96, D_out_channels=576, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv14 = layers.conv(C_in_channels=576, D_out_channels=576, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=576, quantization_mode = 'fpt_unconstrained') + self.ppw_conv14 = layers.conv(C_in_channels=576, D_out_channels=96, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') #ilk çıkış: torch.Size([2, 96, /16, /16]) + + self.epw_conv15 = layers.conv(C_in_channels=96, D_out_channels=576, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv15 = layers.conv(C_in_channels=576, D_out_channels=576, K_kernel_dimension=3, stride=2, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=576, quantization_mode = 'fpt_unconstrained') + self.ppw_conv15 = layers.conv(C_in_channels=576, D_out_channels=160, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv16 = layers.conv(C_in_channels=160, D_out_channels=960, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv16 = layers.conv(C_in_channels=960, D_out_channels=960, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=960, quantization_mode = 'fpt_unconstrained') + self.ppw_conv16 = layers.conv(C_in_channels=960, D_out_channels=160, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + self.epw_conv17 = layers.conv(C_in_channels=160, D_out_channels=960, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv17 = layers.conv(C_in_channels=960, D_out_channels=960, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=960, quantization_mode = 'fpt_unconstrained') + self.ppw_conv17 = layers.conv(C_in_channels=960, D_out_channels=160, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') + + self.epw_conv18 = layers.conv(C_in_channels=160, D_out_channels=960, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', quantization_mode = 'fpt_unconstrained') + self.dw_conv18 = layers.conv(C_in_channels=960, D_out_channels=960, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, bias=False, activation='relu', num_groups=960, quantization_mode = 'fpt_unconstrained') + self.ppw_conv18 = layers.conv(C_in_channels=960, D_out_channels=320, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), batchnorm=True, batchnorm_affine=True, bias=False, quantization_mode = 'fpt_unconstrained') #ikinci çıkış: torch.Size([2, 320, /32, /32]) + + self.head1_dw_classification = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.head1_pw_classification = layers.conv(C_in_channels=96, D_out_channels=8, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + self.head1_dw_regression = layers.conv(C_in_channels=96, D_out_channels=96, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=96, quantization_mode = 'fpt_unconstrained') + self.head1_pw_regression = layers.conv(C_in_channels=96, D_out_channels=16, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + + self.head2_dw_classification = layers.conv(C_in_channels=320, D_out_channels=320, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=320, quantization_mode = 'fpt_unconstrained') + self.head2_pw_classification = layers.conv(C_in_channels=320, D_out_channels=10, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + self.head2_dw_regression = layers.conv(C_in_channels=320, D_out_channels=320, K_kernel_dimension=3, stride=1, padding=(1,1,1,1), batchnorm=True, batchnorm_affine=True, activation='relu', num_groups=320, quantization_mode = 'fpt_unconstrained') + self.head2_pw_regression = layers.conv(C_in_channels=320, D_out_channels=20, K_kernel_dimension=1, stride=1, padding=(0,0,0,0), output_width_30b = True, quantization_mode = 'fpt_unconstrained') + + self.add_residual = layers.add_residual(quantization_mode = 'fpt_unconstrained') + + + def forward(self, x): + x = self.conv1(x) + + x = self.epw_conv2(x) + x = self.dw_conv2(x) + x = self.ppw_conv2(x) + + x = self.epw_conv3(x) + x = self.dw_conv3(x) + x = self.ppw_conv3(x) + res4 = x + x = self.epw_conv4(x) + x = self.dw_conv4(x) + x = self.ppw_conv4(x) + x = self.add_residual(x,res4) + + x = self.epw_conv5(x) + x = self.dw_conv5(x) + x = self.ppw_conv5(x) + res6 = x + x = self.epw_conv6(x) + x = self.dw_conv6(x) + x = self.ppw_conv6(x) + x = self.add_residual(x,res6) + res7 = x + x = self.epw_conv7(x) + x = self.dw_conv7(x) + x = self.ppw_conv7(x) + x = self.add_residual(x,res7) + + x = self.epw_conv8(x) + x = self.dw_conv8(x) + x = self.ppw_conv8(x) + res9 = x + x = self.epw_conv9(x) + x = self.dw_conv9(x) + x = self.ppw_conv9(x) + x = self.add_residual(x,res9) + res10 = x + x = self.epw_conv10(x) + x = self.dw_conv10(x) + x = self.ppw_conv10(x) + x = self.add_residual(x,res10) + res11 = x + x = self.epw_conv11(x) + x = self.dw_conv11(x) + x = self.ppw_conv11(x) + x = self.add_residual(x,res11) + + x = self.epw_conv12(x) + x = self.dw_conv12(x) + x = self.ppw_conv12(x) + res13 = x + x = self.epw_conv13(x) + x = self.dw_conv13(x) + x = self.ppw_conv13(x) + x = self.add_residual(x,res13) + res14 = x + x = self.epw_conv14(x) + x = self.dw_conv14(x) + x = self.ppw_conv14(x) + x = self.add_residual(x,res14) + output1 = x + + x = self.epw_conv15(x) + x = self.dw_conv15(x) + x = self.ppw_conv15(x) + res16 = x + x = self.epw_conv16(x) + x = self.dw_conv16(x) + x = self.ppw_conv16(x) + x = self.add_residual(x,res16) + res17 = x + x = self.epw_conv17(x) + x = self.dw_conv17(x) + x = self.ppw_conv17(x) + x = self.add_residual(x,res17) + + x = self.epw_conv18(x) + x = self.dw_conv18(x) + x = self.ppw_conv18(x) + output2 = x + + output1_class = self.head1_dw_classification(output1) + output1_class = self.head1_pw_classification(output1_class) + output1_reg = self.head1_dw_regression(output1) + output1_reg = self.head1_pw_regression(output1_reg) + + output2_class = self.head2_dw_classification(output2) + output2_class = self.head2_pw_classification(output2_class) + output2_reg = self.head2_dw_regression(output2) + output2_reg = self.head2_pw_regression(output2_reg) + + #print(f"Output1 Regression: {output1_reg.shape}, Output1 Classification: {output1_class.shape}\nOutput2 Regression: {output2_reg.shape}, Output2 Classification: {output2_class.shape}") + return (output1_reg, output1_class, output2_reg, output2_class) \ No newline at end of file diff --git a/optimizers.py b/optimizers.py new file mode 100644 index 0000000000000000000000000000000000000000..9721e6d1c77a17ff14ac4bb11068ed238005f648 --- /dev/null +++ b/optimizers.py @@ -0,0 +1,62 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch + +# ref: https://github.com/davda54/sam +class SAM(torch.optim.Optimizer): + def __init__(self, params, base_optimizer, rho=0.05, **kwargs): + assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}" + + defaults = dict(rho=rho, **kwargs) + super(SAM, self).__init__(params, defaults) + + self.base_optimizer = base_optimizer(self.param_groups, **kwargs) + self.param_groups = self.base_optimizer.param_groups + + @torch.no_grad() + def first_step(self, zero_grad=False): + grad_norm = self._grad_norm() + for group in self.param_groups: + scale = group["rho"] / (grad_norm + 1e-12) + + for p in group["params"]: + if p.grad is None: continue + e_w = p.grad * scale.to(p) + p.add_(e_w) # climb to the local maximum "w + e(w)" + self.state[p]["e_w"] = e_w + + if zero_grad: self.zero_grad() + + @torch.no_grad() + def second_step(self, zero_grad=False): + for group in self.param_groups: + for p in group["params"]: + if p.grad is None: continue + p.sub_(self.state[p]["e_w"]) # get back to "w" from "w + e(w)" + + self.base_optimizer.step() # do the actual "sharpness-aware" update + + if zero_grad: self.zero_grad() + + @torch.no_grad() + def step(self, closure=None): + assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided" + closure = torch.enable_grad()(closure) # the closure should do a full forward-backward pass + + self.first_step(zero_grad=True) + closure() + self.second_step() + + def _grad_norm(self): + shared_device = self.param_groups[0]["params"][0].device # put everything on the same device, in case of model parallelism + norm = torch.norm( + torch.stack([ + p.grad.norm(p=2).to(shared_device) + for group in self.param_groups for p in group["params"] + if p.grad is not None + ]), + p=2 + ) + return norm diff --git a/qat_core/functions.py b/qat_core/functions.py new file mode 100644 index 0000000000000000000000000000000000000000..e5ce87f009a90fd00550bcd5a9ddaa629d4f2983 --- /dev/null +++ b/qat_core/functions.py @@ -0,0 +1,162 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +from abc import ABC + +import torch, sys +import torch.nn as nn +from torch.autograd import Function + +################################################### +### Quantization Functions +### backward passes are straight through + +## Up-Down (ud) quantization for wide last layer ("bigdata"). Used in QAT +class Q_ud_wide(Function): + @staticmethod + def forward(_, x, xb, extrab): + up_factor = 2 ** (xb - extrab - 1) + down_factor = 2 ** (xb - 1) + return x.mul(up_factor).add(.5).floor().div(down_factor) + + @staticmethod + def backward(_, x): + return x, None, None + + +## Up-Down (ud) quantization. Used in QAT +class Q_ud(Function): + @staticmethod + def forward(_, x, xb): + updown_factor = 2 ** (xb - 1) + return x.mul(updown_factor).add(.5).floor().div(updown_factor) + + @staticmethod + def backward(_, x): + return x, None + + +## Up-Down (ud) quantization for antipodal binary. Used in qat-ap +class Q_ud_ap(Function): + @staticmethod + def forward(_, x): + x = torch.sign(x).div(2.0) # antipodal (-1,+1) weights @HW correspond to (-0.5,+0.5) in qat + mask = (x == 0) + return x - mask.float().div(2.0) + + @staticmethod + def backward(_, x): + return x + + +## Up (u) quantization. Used in Eval/hardware +class Q_u(Function): + @staticmethod + def forward(_, x, xb): + up_factor = 2 ** (8 - xb) + return x.mul(up_factor).add(.5).floor() ### Burak: maxim has a .add(0.5) at the beginning, I think that's wrong + + @staticmethod + def backward(_, x): + return x, None + + +## Down (d) quantization. Used in Eval/hardware +class Q_d(Function): + @staticmethod + def forward(_, x, xb): + down_factor = 2 ** (xb-1) + return x.div(down_factor).add(.5).floor() ### Burak: maxim has a .add(0.5) at the beginning, I think that's wrong + + @staticmethod + def backward(_, x): + return x, None + +################################################### +### Quantization module +### ("umbrella" for Functions) +class quantization(nn.Module): + def __init__(self, xb=8, mode='updown', wide=False, m=None, g=None): + super().__init__() + self.xb = xb + self.mode = mode + self.wide = wide + self.m = m + self.g = g + + def forward(self, x): + ### Deniz: Wide mode was not functioning as expected, so changed with the code from older repo + if(self.mode=='updown'): + if(self.wide): + ### Burak: maxim's implementation had the third argument as +1, which was wrong. + ### the chip seems to be adding 5 more bits to the fractional part + return Q_ud_wide.apply(x, self.xb, -5) + else: + return Q_ud.apply(x, self.xb) + elif(self.mode=='down'): + if(self.wide): + ### Burak: maxim's implementation had the second argument as (self.xb + 1), which was wrong. + ### the chip seems to be adding 5 more bits to the fractional part + return Q_d.apply(x, self.xb - 5) + else: + return Q_d.apply(x, self.xb) + elif (self.mode == 'up'): + return Q_u.apply(x, self.xb) + elif (self.mode == 'updown_ap'): + return Q_ud_ap.apply(x) + else: + print('wrong quantization mode. exiting') + sys.exit() + + +################################################### +### Clamping modules +### (doesn't need Functions since backward passes are well-defined) +class clamping_qa(nn.Module): + def __init__(self, xb=8, wide=False): + super().__init__() + if (wide): + self.min_val = -16384.0 ### Burak: this is wrong, but it's how maxim currently does it, so we play along + self.max_val = 16383.0 ### Burak: this is wrong, but it's how maxim currently does it, so we play along + else: + self.min_val = -1.0 + self.max_val = (2 ** (xb - 1) - 1) / (2 ** (xb - 1)) + + def forward(self, x): + return x.clamp(min=self.min_val, max=self.max_val) + + +class clamping_hw(nn.Module): + def __init__(self, xb=8, wide=False): + super().__init__() + if(wide): + self.min_val = -2 ** (30-1) ### Burak: this is wrong, but it's how maxim currently does it, so we play along + self.max_val = 2 ** (30-1)-1 ### Burak: this is wrong, but it's how maxim currently does it, so we play along + else: + self.min_val = -2 ** (xb - 1) + self.max_val = 2 ** (xb - 1) - 1 + + def forward(self, x): + return x.clamp(min=self.min_val, max=self.max_val) + + +################################################### +### Computing output_shift, i.e., "los" +def calc_out_shift(weight, bias, shift_quantile): + weight_r = torch.flatten(weight) + if bias is not None: + bias_r = torch.flatten(bias) + params_r = torch.cat((weight_r, bias_r)) + else: + params_r = weight_r + limit = torch.quantile(params_r.abs(), shift_quantile) + return -(1. / limit).log2().floor().clamp(min=-15., max=15.) + +def calc_out_shift_rho(W): + # eqn. 22 in the AHA report + + # this code segment is taken from the v1 repo, duygu-yavuz-dev branch + # layers.py shift_amount_1bit function + limit = torch.quantile(W.abs(), 1.0) + return - (1. / limit).log2().ceil().clamp(min=-15., max=15.) diff --git a/qat_core/layers.py b/qat_core/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..4aca30e054c92b8797e1d58c2f7c81c29c13fc09 --- /dev/null +++ b/qat_core/layers.py @@ -0,0 +1,840 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch, sys +import torch.nn as nn +import numpy as np +from torch.autograd import Function +from qat_core.functions import quantization, clamping_qa, clamping_hw, calc_out_shift, calc_out_shift_rho +import torch.nn.functional as F + +################################################### +### Base layer for conv/linear, +### enabling quantization-related mechanisms +class shallow_base_layer(nn.Module): + def __init__( + self, + quantization_mode = 'fpt', # 'fpt', 'fpt_unconstrained', 'qat', 'qat_ap' and 'eval' + pooling_tuple = False, # boolean flag, none or tuple (kernel,stride,padding) + # if it is a tuple with (kernel_size, stride, padding) arguments it sets the pooling with these parameters + # if it is True, then it sets kernel_size = 2, stride = 2, padding = 0 + # if it is False, then it sets the pooling None + # if it is None, it sets the pooling None + operation_module = None, # torch nn module for keeping and updating conv/linear parameters + operation_fcnl = None, # torch nn.functional for actually doing the operation + activation_module = None, # torch nn module for relu/abs + batchnorm_module = None, # torch nn module for batchnorm, see super + conv_groups = None, # we use this to do only depthwise for now. so grouped conv only possible with num_groups=C_in_channels + output_width_30b = False, # boolean flag that chooses between "bigdata" (32b) and normal (8b) activation modes for MAX78000 + padding_mode = "zeros", # used to decide which type of padding operation among "zeros", "reflect", "replicate" and "circular" is to be performed. default with mode "zeros" and padding value 0 corresponds to no padding + transposed = False # either the operation is convtransposed or not + ): + super().__init__() + + ############################################################################### + # Initialize stuff that won't change throughout the model's lifetime here + # since this place will only be run once (first time the model is declared) + if isinstance(pooling_tuple, tuple) and (len(pooling_tuple) == 3): + self.pool = nn.MaxPool2d(kernel_size=pooling_tuple[0], stride=pooling_tuple[1], padding=pooling_tuple[2]) + elif (type(pooling_tuple) == type(True)): + if(pooling_tuple==True): + self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) + else: + self.pool = None + elif pooling_tuple == None: + self.pool = None + else: + print('wrong pooling type in model. (kernel,stride,padding) as a tuple are acceptable. exiting') + sys.exit() + + + ### Burak: we have to access and change (forward pass) and also train (backward pass) parameters .weight and .bias for the operations + ### therefore we keep both a functional and a module for Conv2d/Linear. The name "op" is mandatory for keeping params in Maxim + ### experiment format. + self.op = operation_module + self.op_fcn = operation_fcnl + self.act = activation_module + self.bn = batchnorm_module + self.wide = output_width_30b + self.dw_groups = conv_groups + self.padding_mode = padding_mode + self.transposed = transposed + + ############################################################################### + # Initialize stuff that will change during mode progression (FPT->QAT->Eval/HW). + self.mode = quantization_mode; + self.quantize_Q_ud_8b = None + self.quantize_Q_ud_wb = None + self.quantize_Q_ud_bb = None + self.quantize_Q_ud_ap = None + self.quantize_Q_d_8b = None + self.quantize_Q_u_wb = None + self.quantize_Q_ud_wide = None + self.quantize_Q_d_wide = None + self.clamp_C_qa_8b = None + self.clamp_C_qa_bb = None + self.clamp_C_qa_wb = None + self.clamp_C_hw_8b = None + self.clamp_C_qa_wide = None + self.clamp_C_hw_wide = None + + ### Burak: these aren't really trainable parameters, but they're logged in the Maxim experiment format. It seems they marked + ### them as "non-trainable parameters" to get them automatically saved in the state_dict + self.output_shift = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) ### Burak: we called this los, this varies, default:0 + self.weight_bits = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False) ### Burak: we called this wb, this varies, default:8 + self.bias_bits = nn.Parameter(torch.Tensor([ 8 ]), requires_grad=False) ### Burak: this is always 8 + self.quantize_activation = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) ### Burak: this is 0 in FPT, 1 in QAT & eval/hardware, default: fpt + self.adjust_output_shift = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this is 1 in FPT & QAT, 0 in eval/hardware, default: fpt + self.shift_quantile = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this varies, default:1 (naive) + + ############################################################################### + # Do first mode progression (to the default) + ### Burak: this recognizes that layer configuration is done via a function, + ### thus, can be done again in training time for mode progression + weight_bits = self.weight_bits + bias_bits = self.bias_bits + shift_quantile = self.shift_quantile + self.configure_layer_base( weight_bits, bias_bits, shift_quantile ) + + + # This will be called during mode progression to set fields, + # check workflow-training-modes.png in doc for further info. + # sets functions for all modes though, not just the selected mode + def configure_layer_base(self, weight_bits, bias_bits, shift_quantile): + # quantization operators + self.quantize_Q_ud_8b = quantization(xb=8, mode='updown', wide=False, m=None, g=None) # 8 here is activation bits + self.quantize_Q_ud_wb = quantization(xb=weight_bits, mode='updown', wide=False, m=None, g=None) + self.quantize_Q_ud_bb = quantization(xb=bias_bits, mode='updown', wide=False, m=None, g=None) + self.quantize_Q_ud_ap = quantization(xb=2, mode='updown_ap', wide=False, m=None, g=None) # 2 here is dummy, mode antipodal overrides xb + self.quantize_Q_d_8b = quantization(xb=8, mode='down', wide=False, m=None, g=None) # 8 here is activation bits + self.quantize_Q_u_wb = quantization(xb=weight_bits, mode='up', wide=False, m=None, g=None) + self.quantize_Q_ud_wide = quantization(xb=8, mode='updown', wide=True, m=None, g=None) # 8 here is activation bits, but its wide, so check inside + self.quantize_Q_d_wide = quantization(xb=8, mode='down', wide=True, m=None, g=None) # 8 here is activation bits, but its wide, so check inside + + # clamping operators + self.clamp_C_qa_8b = clamping_qa(xb = 8, wide=False) # 8 here is activation bits + self.clamp_C_qa_bb = clamping_qa(xb = bias_bits, wide=False) + self.clamp_C_qa_wb = clamping_qa(xb = weight_bits, wide=False) + self.clamp_C_hw_8b = clamping_hw(xb = 8, wide=False) # 8 here is activation bits + self.clamp_C_qa_wide = clamping_qa(xb = None, wide=True) # None to avoid misleading info on the # of bits, check inside + self.clamp_C_hw_wide = clamping_hw(xb = None, wide=True) # None to avoid misleading info on the # of bits, check inside + + # state variables + self.weight_bits = nn.Parameter(torch.Tensor([weight_bits]), requires_grad=False) + self.bias_bits = nn.Parameter(torch.Tensor([bias_bits]), requires_grad=False) + self.shift_quantile = nn.Parameter(torch.Tensor([shift_quantile]), requires_grad=False) + + # This will be called during mode progression, during training (scale bn parameters with 4) + def mode_fptunconstrained2fpt(self, quantization_mode): + if(self.bn is not None): + weightsExist = (self.bn.weight != None) + biasExist = (self.bn.bias != None) + + # BN outputs multiplied by 4 to compensate effect of dividing 4 at forward pass + if(weightsExist and biasExist): + self.bn.weight.data = self.bn.weight.data * 4.0 + self.bn.bias.data = self.bn.bias.data * 4.0 + else: + # batchnorm affine=False + self.bn.running_var = self.bn.running_var / 4.0 + else: + pass + #print('This layer does not have batchnorm') + + self.mode = quantization_mode; + self.quantize_activation = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) ### Burak: this is 0 in FPT, 1 in QAT & eval/hardware + self.adjust_output_shift = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this is 1 in FPT & QAT, 0 in eval/hardware + + + # This will be called during mode progression, during training + def mode_fpt2qat(self, quantization_mode): + # just fold batchnorms + if(self.bn is not None): + weightsExist = (self.bn.weight != None) + biasExist = (self.bn.bias != None) + if(weightsExist and biasExist): + # batchnorm affine=True + w_fp = self.op.weight.data + + scale_factor = self.bn.weight.data + shift_factor = self.bn.bias.data + + running_mean_mu = self.bn.running_mean + running_var = self.bn.running_var + running_stdev_sigma = torch.sqrt(running_var + 1e-5) + + w_hat = scale_factor.reshape(-1,1,1,1) * w_fp * (1.0 / (running_stdev_sigma*4.0)).reshape((w_fp.shape[0],) + (1,) * (len(w_fp.shape) - 1)) + self.op.weight.data = w_hat + + if (self.op.bias != None): + b_fp = self.op.bias.data + b_hat = scale_factor * (b_fp - running_mean_mu)/(running_stdev_sigma*4.0) + shift_factor/4.0 + self.op.bias.data = b_hat + else: + b_fp = torch.zeros_like(shift_factor).to(self.op.weight.data.device) + b_hat = scale_factor * (b_fp - running_mean_mu)/(running_stdev_sigma*4.0) + shift_factor/4.0 + self.op.register_parameter(name='bias', param=nn.Parameter(torch.zeros_like(b_hat), requires_grad=True)) + self.op.bias.data = b_hat + + self.bn = None + + else: + # batchnorm affine=False + w_fp = self.op.weight.data + + running_mean_mu = self.bn.running_mean + running_var = self.bn.running_var + running_stdev_sigma = torch.sqrt(running_var + 1e-5) + + w_hat = w_fp * (1.0 / (running_stdev_sigma*4.0)).reshape((w_fp.shape[0],) + (1,) * (len(w_fp.shape) - 1)) + self.op.weight.data = w_hat + + if (self.op.bias != None): + b_fp = self.op.bias.data + b_hat = (b_fp - running_mean_mu)/(running_stdev_sigma*4.0) + self.op.bias.data = b_hat + else: + b_fp = torch.zeros(self.op.weight.data.shape[0]).to(self.op.weight.data.device) + b_hat = (b_fp - running_mean_mu)/(running_stdev_sigma*4.0) + self.op.register_parameter(name='bias', param=nn.Parameter(torch.zeros_like(b_hat), requires_grad=True)) + self.op.bias.data = b_hat + + self.bn = None + else: + pass + #print('This layer does not have batchnorm') + self.mode = quantization_mode; + self.quantize_activation = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this is 0 in FPT, 1 in QAT & eval/hardware + self.adjust_output_shift = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this is 1 in FPT & QAT, 0 in eval/hardware + + # This will be called during mode progression after training, for eval + def mode_qat2hw(self, quantization_mode): + w_hat = self.op.weight.data + if self.op.bias is not None: + b_hat = self.op.bias.data + else: + b_hat = None + + shift = -self.output_shift.data; + s_o = 2**(shift) + wb = self.weight_bits.data.cpu().numpy()[0] + + w_clamp = [-2**(wb-1) , 2**(wb-1)-1 ] + b_clamp = [-2**(wb+8-2), 2**(wb+8-2)-1] # 8 here is activation bits + + w = w_hat.mul(2**(wb -1)).mul(s_o).add(0.5).floor() + w = w.clamp(min=w_clamp[0],max=w_clamp[1]) + + if b_hat is not None: + b = b_hat.mul(2**(wb -1 + 7)).mul(s_o).add(0.5).floor() + b = b.clamp(min=b_clamp[0],max=b_clamp[1]) + self.op.bias.data = b + else: + self.op.bias = None + + self.op.weight.data = w + self.mode = quantization_mode; + self.quantize_activation = nn.Parameter(torch.Tensor([ 1 ]), requires_grad=False) ### Burak: this is 0 in FPT, 1 in QAT & eval/hardware + self.adjust_output_shift = nn.Parameter(torch.Tensor([ 0 ]), requires_grad=False) ### Burak: this is 1 in FPT & QAT, 0 in eval/hardware + + def mode_qat_ap2hw(self, quantization_mode): + w_hat = self.op.weight.data + b_hat = self.op.bias.data + + shift = -self.output_shift.data; + s_o = 2 ** (shift) + wb = self.weight_bits.data.cpu().numpy()[0] + + if (wb == 2): + w = self.quantize_Q_ud_ap(w_hat).mul(2.0) + else: + w_clamp = [-2 ** (wb - 1), 2 ** (wb - 1) - 1] + w = w_hat.mul(2 ** (wb - 1)).mul(s_o).add(0.5).floor() + w = w.clamp(min=w_clamp[0], max=w_clamp[1]) + + b = b_hat.mul(2 ** (wb - 1 + 7)).mul(s_o).add(0.5).floor() + + b_clamp = [-2 ** (wb + 8 - 2), 2 ** (wb + 8 - 2) - 1] # 8 here is activation bits + b = b.clamp(min=b_clamp[0], max=b_clamp[1]) + + self.op.weight.data = w + self.op.bias.data = b + self.mode = quantization_mode; + self.quantize_activation = nn.Parameter(torch.Tensor([1]), + requires_grad=False) ### Burak: this is 0 in FPT, 1 in QAT & eval/hardware + + + self.adjust_output_shift = nn.Parameter(torch.Tensor([self.output_shift.data]), + requires_grad=False) ### Burak: this is 1 in FPT & QAT, 0 in eval/hardware + + def forward(self, x): + if (self.pool is not None): + x = self.pool(x) + + if (self.mode == 'fpt'): + # pre-compute stuff + w_fp = self.op.weight + b_fp = self.op.bias + + if not isinstance(self, fullyconnected): + # actual forward pass + # Deniz: nn.functional.conv's are not supporting padding modes, so had to add this nn.functional.pad manually. + # Also, default padding mode names are different for nn.func.pad and nn.conv. Related links: + # https://discuss.pytorch.org/t/torch-nn-functional-conv1d-padding-like-torch-nn-conv1d/119489 + # https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html + if self.op.padding_mode == "zeros": + self.op.padding_mode = "constant" + if self.transposed: + x = self.op_fcn(x, w_fp, b_fp, self.op.stride, self.op.padding, self.op.output_padding); + else: + if (self.dw_groups is None): + # Note that pad=self.op.padding is just a container + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_fp, b_fp, + self.op.stride, 0, self.op.dilation) + else: + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_fp, b_fp, + self.op.stride, 0, self.op.dilation, groups=self.dw_groups) + else: + x = self.op_fcn(x, w_fp, b_fp, None, None) + + if (self.bn is not None): + x = self.bn(x) # make sure var=1 and mean=0 + x = x / 4.0 # since BN is only making sure var=1 and mean=0, 1/4 is to keep everything within [-1,1] w/ hi prob. + if (self.act is not None): + x = self.act(x) + if ((self.wide) and (self.act is None)): + x = self.clamp_C_qa_wide(x) + else: + x = self.clamp_C_qa_8b(x) + + # save stuff (los is deactivated in fpt) + self.output_shift = nn.Parameter(torch.Tensor([0]), requires_grad=False) # functional, used in Maxim-friendly experiments + self.quantize_activation = nn.Parameter(torch.Tensor([0]), requires_grad=False) # ceremonial, for Maxim-friendly experiments + self.adjust_output_shift = nn.Parameter(torch.Tensor([1]), requires_grad=False) # ceremonial, for Maxim-friendly experiments + + elif (self.mode == 'fpt_unconstrained'): + # pre-compute stuff + w_fp = self.op.weight + b_fp = self.op.bias + + if not isinstance(self, fullyconnected): + # actual forward pass + # Deniz: nn.functional.conv's are not supporting padding modes, so had to add this nn.functional.pad manually. + # Also, default padding mode names are different for nn.func.pad and nn.conv. Related links: + # https://discuss.pytorch.org/t/torch-nn-functional-conv1d-padding-like-torch-nn-conv1d/119489 + # https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html + if self.op.padding_mode == "zeros": + self.op.padding_mode = "constant" + if self.transposed: + x = self.op_fcn(x, w_fp, b_fp, self.op.stride, self.op.padding, self.op.output_padding); + else: + if (self.dw_groups is None): + # Note that pad=self.op.padding is just a container + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_fp, b_fp, + self.op.stride, 0, self.op.dilation) + else: + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_fp, b_fp, + self.op.stride, 0, self.op.dilation, groups=self.dw_groups) + else: + x = self.op_fcn(x, w_fp, b_fp, None, None) + + if (self.bn is not None): + x = self.bn(x) # make sure var=1 and mean=0 + if (self.act is not None): + x = self.act(x) + # save stuff (los is deactivated in fpt) + self.output_shift = nn.Parameter(torch.Tensor([0]), requires_grad=False) # functional, used in Maxim-friendly experiments + self.quantize_activation = nn.Parameter(torch.Tensor([0]), requires_grad=False) # ceremonial, for Maxim-friendly experiments + self.adjust_output_shift = nn.Parameter(torch.Tensor([1]), requires_grad=False) # ceremonial, for Maxim-friendly experiments + + elif (self.mode == 'qat'): + ############################################################################### + ## ASSUMPTION: batchnorms are already folded before coming here. Check doc, ## + ## the parameters with _fp and with _hat are of different magnitude ## + ############################################################################### + + # pre-compute stuff + w_hat = self.op.weight + b_hat = self.op.bias + + if b_hat is not None: + los = calc_out_shift(w_hat.detach(), b_hat.detach(), self.shift_quantile.detach()) + else: + los = calc_out_shift(w_hat.detach(), None, self.shift_quantile.detach()) + + s_w = 2 ** (-los) + s_o = 2 ** (los) + w_hat_q = self.clamp_C_qa_wb(self.quantize_Q_ud_wb(w_hat * s_w)); + + if b_hat is not None: + b_hat_q = self.clamp_C_qa_bb(self.quantize_Q_ud_bb(b_hat * s_w)); + else: + b_hat_q = None + + if not isinstance(self, fullyconnected): + # actual forward pass + # Deniz: nn.functional.conv's are not supporting padding modes, so had to add this nn.functional.pad manually. + # Also, default padding mode names are different for nn.func.pad and nn.conv. Related links: + # https://discuss.pytorch.org/t/torch-nn-functional-conv1d-padding-like-torch-nn-conv1d/119489 + # https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html + if self.op.padding_mode == "zeros": + self.op.padding_mode = "constant" + if self.transposed: + x = self.op_fcn(x, w_hat, b_hat, self.op.stride, self.op.padding, self.op.output_padding); + else: + if (self.dw_groups is None): + # Note that pad=self.op.padding is just a container + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_hat_q, b_hat_q, + self.op.stride, 0, self.op.dilation) + else: + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_hat_q, b_hat_q, + self.op.stride, 0, self.op.dilation, groups=self.dw_groups) + else: + x = self.op_fcn(x, w_hat_q, b_hat_q, None, None) + + x = x * s_o + if (self.act is not None): + x = self.act(x) + if ((self.wide) and (self.act is None)): + x = self.quantize_Q_ud_wide(x) + x = self.clamp_C_qa_wide(x) + ### Deniz: This addition is needed for wide layers to work as expected + x = x / (2 ** (5)); + else: + x = self.quantize_Q_ud_8b(x) + x = self.clamp_C_qa_8b(x) + + # save stuff + self.output_shift = nn.Parameter(torch.Tensor([los]), + requires_grad=False) # functional, used in Maxim-friendly checkpoints + + elif (self.mode == 'qat_ap'): + ############################################################################### + ## ASSUMPTION: batchnorms are already folded before coming here. Check doc, ## + ## the parameters with _fp and with _hat are of different magnitude ## + ############################################################################### + + # pre-compute stuff + w_hat = self.op.weight + b_hat = self.op.bias + + if b_hat is not None: + los = calc_out_shift(w_hat.detach(), b_hat.detach(), self.shift_quantile.detach()) + else: + los = calc_out_shift(w_hat.detach(), None, self.shift_quantile.detach()) + + s_w = 2 ** (-los) + s_o = 2 ** (los) + ############################################## + # This is the only difference from qat + if (self.weight_bits.data == 2): + w_hat_q = self.quantize_Q_ud_ap(w_hat * s_w); + else: + w_hat_q = self.clamp_C_qa_wb(self.quantize_Q_ud_wb(w_hat * s_w)); + ############################################## + + if b_hat is not None: + b_hat_q = self.clamp_C_qa_bb(self.quantize_Q_ud_bb(b_hat * s_w)); + else: + b_hat_q = None + + if not isinstance(self, fullyconnected): + # actual forward pass + # Deniz: nn.functional.conv's are not supporting padding modes, so had to add this nn.functional.pad manually. + # Also, default padding mode names are different for nn.func.pad and nn.conv. Related links: + # https://discuss.pytorch.org/t/torch-nn-functional-conv1d-padding-like-torch-nn-conv1d/119489 + # https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html + if self.op.padding_mode == "zeros": + self.op.padding_mode = "constant" + if self.transposed: + x = self.op_fcn(x, w_hat, b_hat, self.op.stride, self.op.padding, self.op.output_padding); + else: + if (self.dw_groups is None): + # Note that pad=self.op.padding is just a container + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_hat_q, b_hat_q, + self.op.stride, 0, self.op.dilation) + else: + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_hat_q, b_hat_q, + self.op.stride, 0, self.op.dilation, groups=self.dw_groups) + else: + x = self.op_fcn(x, w_hat_q, b_hat_q, None, None) + + x = x * s_o + if (self.act is not None): + x = self.act(x) + if ((self.wide) and (self.act is None)): + x = self.quantize_Q_ud_wide(x) + x = self.clamp_C_qa_wide(x) + x = x / (2 ** (5)); + else: + x = self.quantize_Q_ud_8b(x) + x = self.clamp_C_qa_8b(x) + + # save stuff + self.output_shift = nn.Parameter(torch.Tensor([los]), + requires_grad=False) # functional, used in Maxim-friendly checkpoints + + elif self.mode == 'eval': + ##################################################################################### + ## ASSUMPTION: parameters are already converted to HW before coming here.Check doc ## + ##################################################################################### + + # pre-compute stuff + w = self.op.weight + b = self.op.bias + los = self.output_shift + s_o = 2 ** los + w_q = self.quantize_Q_u_wb(w); + if b is not None: + b_q = self.quantize_Q_u_wb(b); # yes, wb, not a typo, they need to be on the same scale + else: + b_q = None + + if not isinstance(self, fullyconnected): + # actual forward pass + # Deniz: nn.functional.conv's are not supporting padding modes, so had to add this nn.functional.pad manually. + # Also, default padding mode names are different for nn.func.pad and nn.conv. Related links: + # https://discuss.pytorch.org/t/torch-nn-functional-conv1d-padding-like-torch-nn-conv1d/119489 + # https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html + if self.op.padding_mode == "zeros": + self.op.padding_mode = "constant" + if self.transposed: + x = self.op_fcn(x, w, b, self.op.stride, self.op.padding, self.op.output_padding); + else: + if (self.dw_groups is None): + # Note that pad=self.op.padding is just a container + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_q, b_q, + self.op.stride, 0, self.op.dilation) + else: + x = self.op_fcn(F.pad(x, pad=self.op.padding, mode=self.op.padding_mode), w_q, b_q, + self.op.stride, 0, self.op.dilation, groups=self.dw_groups) + else: + x = self.op_fcn(x, w_q, b_q, None, None) + + x = x * s_o + if (self.act is not None): + x = self.act(x) + if ((self.wide) and (self.act is None)): + x = self.quantize_Q_d_wide(x) + x = self.clamp_C_hw_wide(x) + ### Deniz: This addition is needed for wide layers to work as expected + x = x / (2 ** (5)); + else: + x = self.quantize_Q_d_8b(x) + x = self.clamp_C_hw_8b(x) + + # nothing to save, this was a hardware-emulated evaluation pass + else: + print('wrong quantization mode. should have been one of {fpt, qat, eval}. exiting') + sys.exit() + + return x + + +class conv(shallow_base_layer): + def __init__( + self, + C_in_channels = None, # number of input channels + D_out_channels = None, # number of output channels + K_kernel_dimension = None, # square kernel dimension + padding = (1,1,1,1), # (padding_left, padding_right, padding_top, padding_bottom) + stride = (1,1), # controls the stride of the kernel for width and height + pooling = False, # boolean flag, none or tuple (kernel,stride,padding) + # if it is a tuple with (kernel_size, stride, padding) arguments it sets the pooling with these parameters + # if it is True, then it sets kernel_size = 2, stride = 2, padding = 0 + # if it is False, then it sets the pooling None + # if it is None, it sets the pooling None + batchnorm = False, # boolean flag for now, no trainable affine parameters + batchnorm_affine = False, # boolean flag for now, to do/do not make affine batchnorm operation + batch_momentum = 0.05, # momentum parameter for batchnorm + num_groups = None, # we use this to do only depthwise for now. so grouped conv only possible with num_groups=C_in_channels + activation = None, # 'relu' and 'relu6' are the only choices for now + bias = True, # adds a learnable bias to the output. Default: True + transposed = False, # either conv2d or conv2dtranspose + output_width_30b = False, # boolean flag that chooses between "bigdata" (32b) and normal (8b) activation modes for MAX78000 + weight_initialization=None, + quantization_mode = 'fpt' + ): + if(activation is None): + activation_fcn = None; + elif(activation == 'relu'): + activation_fcn = nn.ReLU(inplace=True); + elif(activation == 'relu6'): + # Clamping limits get scaled in hw mode, but relu6 cannot be scaled that way. + print('Warning!!! Relu6 activation is selected for a layer, note that it is only supported for fpt unconstrained mode, it causes unexpected behavior in other modes') + activation_fcn = nn.ReLU6(inplace=True); + elif(activation == 'sigmoid'): + activation_fcn = nn.Sigmoid(); + else: + print('wrong activation type in model. only {relu and relu6} are acceptable. exiting') + sys.exit() + + ### Burak: only a module is enough for BN since we neither need to access internals in forward pass, nor train anything (affine=False) + if(batchnorm): + if(batchnorm_affine): + batchnorm_mdl = nn.BatchNorm2d(D_out_channels, eps=1e-05, momentum=batch_momentum, affine=True) + else: + batchnorm_mdl = nn.BatchNorm2d(D_out_channels, eps=1e-05, momentum=batch_momentum, affine=False) + else: + batchnorm_mdl = None; + + ''' + Groups = 1 + This setting is the default setting. Under this setting, all inputs are convolved to all outputs. + Groups ≠ 1 + Must be an integer such that the number of input channels and the number of output channels are both divisible by this number. + A non-default groups value allows us to create multiple paths where each path connects only a subset of input channels to the output channels. + For details see : https://iksinc.online/2020/05/10/groups-parameter-of-the-convolution-layer/ + ''' + + if transposed: + if(num_groups is not None): + print('convtranspose function does not accept groups option. exiting') + sys.exit() + else: + operation_mdl = nn.ConvTranspose2d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=stride, padding=padding, bias=bias); # default is group=1 + operation_fcn = nn.functional.conv_transpose2d + else: + if(num_groups is not None): + operation_mdl = nn.Conv2d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=stride, padding=padding, bias=bias, groups=num_groups); + else: + operation_mdl = nn.Conv2d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=stride, padding=padding, bias=bias); # default is group=1 + operation_fcn = nn.functional.conv2d + + if weight_initialization is not None: + weight_initialization(operation_mdl.weight) + + + super().__init__( + pooling_tuple = pooling, + activation_module = activation_fcn, + operation_module = operation_mdl, + operation_fcnl = operation_fcn, + batchnorm_module = batchnorm_mdl, + conv_groups = num_groups, + output_width_30b = output_width_30b, + quantization_mode = quantization_mode, + transposed = transposed + ) + + +def linear_functional(x, weight, bias, _stride, _padding): + # dummy linear function that has same arguments as conv + return nn.functional.linear(x, weight, bias) + + +class fullyconnected(shallow_base_layer): + def __init__( + # This must be updated, batch norm and ReLU6 issues + self, + in_features = None, # number of output features + out_features = None, # number of output features + pooling = False, # boolean flag, none or tuple (kernel,stride,padding) + # if it is a tuple with (kernel_size, stride, padding) arguments it sets the pooling with these parameters + # if it is True, then it sets kernel_size = 2, stride = 2, padding = 0 + # if it is False, then it sets the pooling None + # if it is None, it sets the pooling None + batchnorm = False, # boolean flag for now, no trainable affine parameters + activation = None, # 'relu' is the only choice for now + output_width_30b = False, # boolean flag that chooses between "bigdata" (32b) and normal (8b) activation modes for MAX78000 + quantization_mode = 'fpt' + ): + if(activation is None): + activation_fcn = None; + elif(activation == 'relu'): + activation_fcn = nn.ReLU(inplace=True); + else: + print('wrong activation type in model. only {relu} is acceptable. exiting') + sys.exit() + + ### Burak: only a module is enough for BN since we neither need to access internals in forward pass, nor train anything (affine=False) + if(batchnorm): + batchnorm_mdl = nn.BatchNorm2d(out_features, eps=1e-05, momentum=0.05, affine=False) + else: + batchnorm_mdl = None; + + operation_mdl = nn.Linear(in_features, out_features, bias=True); + operation_fcn = linear_functional + + super().__init__( + pooling_tuple = pooling, + activation_module = activation_fcn, + operation_module = operation_mdl, + operation_fcnl = operation_fcn, + batchnorm_module = batchnorm_mdl, + output_width_30b = output_width_30b, + quantization_mode = quantization_mode + ) + + # Define dummy arguments to make Linear and conv compatible in shallow_base_layer. + # the name "op" here refers to op in super, i.e., in base_layer + self.op.stride = None + self.op.padding = None + +class add_residual(nn.Module): + def __init__(self, quantization_mode='fpt', activation=None): + super().__init__() + self.mode = quantization_mode; + self.clamp_C_qa_8b = clamping_qa( xb = 8, wide=False) # 8 here is activation bits + self.clamp_C_hw_8b = clamping_hw( xb = 8, wide=False) # 8 here is activation bits + if(activation is None): + self.activation_fcn = nn.Identity(); + elif(activation == 'relu'): + self.activation_fcn = nn.ReLU(inplace=True); + elif(activation == 'relu6'): + # Clamping limits get scaled in hw mode, but relu6 cannot be scaled that way. + print('Warning!!! Relu6 activation is selected for a layer, note that it is only supported for fpt unconstrained mode, it causes unexpected behavior in other modes') + self.activation_fcn = nn.ReLU6(inplace=True); + elif(activation == 'sigmoid'): + self.activation_fcn = nn.Sigmoid(); + else: + print('wrong activation type in model. only {relu and relu6 and sigmoid} are acceptable. exiting') + sys.exit() + def mode_fptunconstrained2fpt(self, quantization_mode): + self.mode = 'fpt' + + def mode_fpt2qat(self, quantization_mode): + self.mode = 'qat' + + def mode_qat2hw(self, quantization_mode): + self.mode = 'eval' + + def forward(self, x, res): + x = self.activation_fcn(x+res) + if(self.mode == 'fpt_unconstrained'): + pass + elif(self.mode == 'fpt'): + x = self.clamp_C_qa_8b(x) + elif(self.mode == 'qat'): + x = self.clamp_C_qa_8b(x) + elif(self.mode == 'eval'): + x = self.clamp_C_hw_8b(x) + else: + print('wrong quantization mode. should have been one of {fpt_unconstrained, fpt, qat, eval}. exiting') + sys.exit() + return x + + +class conv1d(shallow_base_layer): + def __init__( + self, + C_in_channels = None, # number of input channels + D_out_channels = None, # number of output channels + K_kernel_dimension = None, # kernel size + padding = (0,0), # (padding_left, padding_right) + stride = 1, # stride + pooling = False, # boolean flag, none or tuple (kernel,stride,padding) + # if it is a tuple with (kernel_size, stride, padding) arguments it sets the pooling with these parameters + # if it is True, then it sets kernel_size = 2, stride = 2, padding = 0 + # if it is False, then it sets the pooling None + # if it is None, it sets the pooling None + batchnorm = False, # boolean flag for now, no trainable affine parameters + batchnorm_affine = False, # boolean flag for now, to do/do not make affine batchnorm operation + num_groups = None, # we use this to do only depthwise for now. so grouped conv only possible with num_groups=C_in_channels + activation = None, # 'relu' is the only choice for now + bias = True, # adds a learnable bias to the output. Default: True + output_width_30b = False, # boolean flag that chooses between "bigdata" (32b) and normal (8b) activation modes for MAX78000 + weight_initialization = None, + quantization_mode = 'fpt', + dilation = 1, # dilation + padding_mode = "zeros" # used to decide which type of padding operation among "zeros", "reflect", "replicate" and "circular" is to be performed. default with mode "zeros" and padding value 0 corresponds to no padding + ): + + if(activation is None): + activation_fcn = None; + elif(activation == 'relu'): + activation_fcn = nn.ReLU(inplace=True); + elif(activation == 'relu6'): + # Clamping limits get scaled in hw mode, but relu6 cannot be scaled that way. + print('Warning!!! Relu6 activation is selected for a layer, note that it is only supported for fpt unconstrained mode, it causes unexpected behavior in other modes') + activation_fcn = nn.ReLU6(inplace=True); + elif(activation == 'sigmoid'): + self.activation_fcn = nn.Sigmoid(); + else: + print('wrong activation type in model. only {relu and relu6 and sigmoid} are acceptable. exiting') + sys.exit() + + if(batchnorm): + if(batchnorm_affine): + batchnorm_mdl = nn.BatchNorm1d(D_out_channels, eps=1e-05, momentum=0.05, affine=True) + else: + batchnorm_mdl = nn.BatchNorm1d(D_out_channels, eps=1e-05, momentum=0.05, affine=False) + else: + batchnorm_mdl = None; + + if(num_groups is not None): + if(num_groups != C_in_channels): + print("only num_groups=C_in_channels (i.e., depthwise) is supported for now, exiting") + sys.exit() + if(C_in_channels != D_out_channels): # let's not ignore this even though D_out_channels is redundant here + print('num_in_channels needs to be equal to num_out_channels for depthwise conv layers, exiting') + sys.exit() + operation_mdl = nn.Conv1d(C_in_channels, C_in_channels, kernel_size=K_kernel_dimension, stride=stride, padding=padding, bias=bias, groups=C_in_channels, dilation=dilation, padding_mode=padding_mode); + else: + operation_mdl = nn.Conv1d(C_in_channels, D_out_channels, kernel_size=K_kernel_dimension, stride=stride, padding=padding, bias=bias, dilation=dilation, padding_mode=padding_mode); # default is group=1 + operation_fcn = nn.functional.conv1d + + if weight_initialization is not None: + weight_initialization(operation_mdl.weight) + + super().__init__( + pooling_tuple = pooling, + activation_module = activation_fcn, + operation_module = operation_mdl, + operation_fcnl = operation_fcn, + batchnorm_module = batchnorm_mdl, + output_width_30b = output_width_30b, + quantization_mode = quantization_mode, + conv_groups = num_groups, + padding_mode = padding_mode + ) + +class concatenate(nn.Module): + def __init__(self, quantization_mode='fpt',dim=0): + super().__init__() + self.dim = dim + self.mode = quantization_mode; + self.clamp_C_qa_8b = clamping_qa( xb = 8, wide=False) # 8 here is activation bits + self.clamp_C_hw_8b = clamping_hw( xb = 8, wide=False) # 8 here is activation bits + + def mode_fptunconstrained2fpt(self, quantization_mode): + self.mode = 'fpt' + + def mode_fpt2qat(self, quantization_mode): + self.mode = 'qat' + + def mode_qat2hw(self, quantization_mode): + self.mode = 'eval' + + def forward(self, x1, x2): + if(self.mode == 'fpt_unconstrained'): + x = torch.cat([x1, x2], dim=self.dim) + elif(self.mode == 'fpt'): + x = self.clamp_C_qa_8b(torch.cat([x1, x2], dim=self.dim)) + elif(self.mode == 'qat'): + x = self.clamp_C_qa_8b(torch.cat([x1, x2], dim=self.dim)) + elif(self.mode == 'eval'): + x = self.clamp_C_hw_8b(torch.cat([x1, x2], dim=self.dim)) + else: + print('wrong quantization mode. should have been one of {fpt_unconstrained,fpt, qat, eval}. exiting') + sys.exit() + return x + +# IMPORTANT: Bu kısım şu an quantization yapmıyor, quantized hale getirilmesi gerekiyor. +class Upsample(nn.Module): + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + recompute_scale_factor=None + ): + + + super().__init__() + self.upsample = nn.Upsample(size=size, scale_factor=scale_factor,mode=mode,align_corners=align_corners) + + def forward(self, x): + x = self.upsample(x) + return x diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7fa11966b959bcf3989e346b940611d74126d353 --- /dev/null +++ b/utils.py @@ -0,0 +1,27 @@ +########################################################################### +# Computer vision - Embedded person tracking demo software by HyperbeeAI. # +# Copyrights © 2023 Hyperbee.AI Inc. All rights reserved. main@shallow.ai # +########################################################################### +import torch + +def compute_batch_accuracy(pred, label): + correct = (pred == label).sum() + return correct,label.size(0) + +def compute_set_accuracy(model, test_loader): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + correct = 0 + total = 0 + with torch.no_grad(): + for data in test_loader: + inputs, labels = data + + inputs = inputs.to(device) + labels = labels.to(device) + outputs = model(inputs) + + correct_batch, total_batch = compute_batch_accuracy(torch.argmax(outputs, dim=1), labels) + correct += correct_batch + total += total_batch + + return correct/total \ No newline at end of file