import os import json import torch import torchvision.transforms as transforms import os.path import numpy as np import cv2 from PIL import Image from torch.utils.data import Dataset import random from .__base_dataset__ import BaseDataset import h5py def creat_uv_mesh(H, W): y, x = np.meshgrid(np.arange(0, H, dtype=np.float), np.arange(0, W, dtype=np.float), indexing='ij') meshgrid = np.stack((x,y)) ones = np.ones((1,H*W), dtype=np.float) xy = meshgrid.reshape(2, -1) return np.concatenate([xy, ones], axis=0) class HypersimDataset(BaseDataset): def __init__(self, cfg, phase, **kwargs): super(HypersimDataset, self).__init__( cfg=cfg, phase=phase, **kwargs) self.metric_scale = cfg.metric_scale #self.cap_range = self.depth_range # in meter # init uv # meshgrid for depth reprojection self.xy = creat_uv_mesh(768, 1024) def load_batch(self, meta_data, data_path): curr_intrinsic = meta_data['cam_in'] # load rgb/depth curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) # get semantic labels curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) # create camera model curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) # get normal labels curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) # !!! this is diff of BaseDataset # get depth mask depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) curr_depth[~depth_mask] = -1 data_batch = dict( curr_rgb = curr_rgb, curr_depth = curr_depth, curr_sem = curr_sem, curr_normal = curr_normal, curr_cam_model=curr_cam_model, ) return data_batch def load_data_path(self, meta_data): # 'rgbs': {'rgb_color': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.color.jpg', # 'rgb_gamma': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.gamma.jpg', # 'rgb_tonemap': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.tonemap.jpg', # 'rgb_raw': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_hdf5/frame.0008.color.hdf5'} meta_data['rgb'] = meta_data['rgbs']['rgb_color'] # this is diff of BaseDataset curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) curr_sem_path = os.path.join(self.sem_root, meta_data['sem']) \ if self.sem_root is not None and ('sem' in meta_data) and (meta_data['sem'] is not None) \ else None curr_norm_path = os.path.join(self.norm_root, meta_data['normal']) \ if ('normal' in meta_data) and (meta_data['normal'] is not None) and (self.norm_root is not None) \ else None curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) \ if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) \ else None data_path=dict( rgb_path=curr_rgb_path, depth_path=curr_depth_path, sem_path=curr_sem_path, normal_path=curr_norm_path, depth_mask_path=curr_depth_mask_path, ) return data_path def load_rgb_depth(self, rgb_path: str, depth_path: str): """ Load the rgb and depth map with the paths. """ rgb = self.load_data(rgb_path, is_rgb_img=True) if rgb is None: self.logger.info(f'>>>>{rgb_path} has errors.') # depth = self.load_data(depth_path) with h5py.File(depth_path, "r") as f: depth = f["dataset"][:] np.nan_to_num(depth, copy=False, nan=0) # fill nan in gt if depth is None: self.logger.info(f'{depth_path} has errors.') depth = depth.astype(np.float) depth = self.process_depth(depth, rgb) return rgb, depth def load_norm_label(self, norm_path, H, W, depth, K): with h5py.File(norm_path, "r") as f: normal = f["dataset"][:] np.nan_to_num(normal, copy=False, nan=0) normal[:,:,1:] *= -1 normal = normal.astype(np.float) return self.align_normal(normal, depth, K, H, W) def process_depth(self, depth: np.array, rgb: np.array) -> np.array: depth[depth>60000] = 0 depth = depth / self.metric_scale return depth def align_normal(self, normal, depth, K, H, W): ''' Orientation of surface normals in hypersim is not always consistent see https://github.com/apple/ml-hypersim/issues/26 ''' # inv K K = np.array([[K[0], 0 ,K[2]], [0, K[1], K[3]], [0, 0, 1]]) inv_K = np.linalg.inv(K) # reprojection depth to camera points if H == 768 and W == 1024: xy = self.xy else: print('img size no-equal 768x1024') xy = creat_uv_mesh(H, W) points = np.matmul(inv_K[:3, :3], xy).reshape(3, H, W) points = depth * points points = points.transpose((1,2,0)) # align normal orient_mask = np.sum(normal * points, axis=2) > 0 normal[orient_mask] *= -1 return normal