import os import json import torch import torchvision.transforms as transforms import os.path import numpy as np import cv2 from torch.utils.data import Dataset import random from .__base_dataset__ import BaseDataset class IBIMSDataset(BaseDataset): def __init__(self, cfg, phase, **kwargs): super(IBIMSDataset, self).__init__( cfg=cfg, phase=phase, **kwargs) self.metric_scale = cfg.metric_scale self.avg = torch.nn.AvgPool2d(kernel_size=7, stride=1, ceil_mode=False, count_include_pad=True, divisor_override=None) self.unfold = torch.nn.Unfold(kernel_size=7, dilation=1, padding=0, stride=1) self.pad = torch.nn.ZeroPad2d(3) def process_depth(self, depth, rgb): depth[depth>50000] = 0 depth /= self.metric_scale return depth def load_batch(self, meta_data, data_path): curr_intrinsic = meta_data['cam_in'] # load rgb/depth curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) # get semantic labels curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) # create camera model curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) # get normal labels curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) # !!! this is diff of BaseDataset # get depth mask depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) curr_depth[~depth_mask] = -1 data_batch = dict( curr_rgb = curr_rgb, curr_depth = curr_depth, curr_sem = curr_sem, curr_normal = curr_normal, curr_cam_model=curr_cam_model, ) return data_batch def load_norm_label(self, norm_path, H, W, depth, K): depth = torch.from_numpy(depth).squeeze() K = torch.Tensor([[K[0], 0 ,K[2]], [0, K[1], K[3]], [0, 0, 1]]) K_inv = K.inverse() y, x = torch.meshgrid([torch.arange(0, 480, dtype=torch.float32), torch.arange(0, 640, dtype=torch.float32)], indexing='ij') x = x.reshape(1, 480*640) y = y.reshape(1, 480*640) ones = torch.ones_like(x) coord_2d = torch.cat((x, y, ones), dim=0) coord_3d = torch.matmul(K_inv, coord_2d).view(3, 480, 640) coord_3d = (coord_3d * depth[None, :])[None, :] coord_3d_mean = self.avg(coord_3d) uf_coord_3d = self.unfold(coord_3d.permute(1, 0, 2, 3)) coord_3d_decenter = uf_coord_3d - coord_3d_mean.view(3, 1, (480-6)*(640-6)) coord_3d_decenter = coord_3d_decenter.permute(2, 0, 1) cov = torch.bmm(coord_3d_decenter, coord_3d_decenter.permute(0, 2, 1)) eig = torch.linalg.eigh(cov) #svd = torch.linalg.svd(coord_3d_decenter) normal = (eig[1])[:, :, 0].float() #normal = (svd[1])[:, 2, :] normal = self.pad(normal.permute(1, 0).view(1, 3, (480-6), (640-6))) orient_mask = (torch.sum(normal * coord_3d, axis=1) < 0).unsqueeze(1) normal = normal * orient_mask - normal * (~orient_mask) gt_normal = normal.squeeze().permute(1, 2, 0).numpy() return gt_normal if __name__ == '__main__': from mmcv.utils import Config cfg = Config.fromfile('mono/configs/Apolloscape_DDAD/convnext_base.cascade.1m.sgd.mae.py') dataset_i = IBIMSDataset(cfg['Apolloscape'], 'train', **cfg.data_basic) print(dataset_i)