Spaces:
Running
Running
import os | |
import json | |
import torch | |
import torchvision.transforms as transforms | |
import os.path | |
import numpy as np | |
import cv2 | |
from torch.utils.data import Dataset | |
import random | |
from .__base_dataset__ import BaseDataset | |
def creat_uv_mesh(H, W): | |
y, x = np.meshgrid(np.arange(0, H, dtype=np.float), np.arange(0, W, dtype=np.float), indexing='ij') | |
meshgrid = np.stack((x,y)) | |
ones = np.ones((1,H*W), dtype=np.float) | |
xy = meshgrid.reshape(2, -1) | |
return np.concatenate([xy, ones], axis=0) | |
class DIODEDataset(BaseDataset): | |
def __init__(self, cfg, phase, **kwargs): | |
super(DIODEDataset, self).__init__( | |
cfg=cfg, | |
phase=phase, | |
**kwargs) | |
self.metric_scale = cfg.metric_scale | |
# meshgrid for depth reprojection | |
self.xy = creat_uv_mesh(768, 1024) | |
def get_data_for_test(self, idx: int): | |
anno = self.annotations['files'][idx] | |
meta_data = self.load_meta_data(anno) | |
data_path = self.load_data_path(meta_data) | |
data_batch = self.load_batch(meta_data, data_path) | |
# load data | |
curr_rgb, curr_depth, curr_normal, curr_cam_model = data_batch['curr_rgb'], data_batch['curr_depth'], data_batch['curr_normal'], data_batch['curr_cam_model'] | |
ori_curr_intrinsic = meta_data['cam_in'] | |
# get crop size | |
transform_paras = dict() | |
rgbs, depths, intrinsics, cam_models, _, other_labels, transform_paras = self.img_transforms( | |
images=[curr_rgb,], #+ tmpl_rgbs, | |
labels=[curr_depth, ], | |
intrinsics=[ori_curr_intrinsic, ], # * (len(tmpl_rgbs) + 1), | |
cam_models=[curr_cam_model, ], | |
transform_paras=transform_paras) | |
# depth in original size and orignial metric*** | |
depth_out = self.clip_depth(curr_depth) * self.depth_range[1] # self.clip_depth(depths[0]) # | |
inv_depth = self.depth2invdepth(depth_out, np.zeros_like(depth_out, dtype=np.bool)) | |
filename = os.path.basename(meta_data['rgb'])[:-4] + '.jpg' | |
curr_intrinsic_mat = self.intrinsics_list2mat(intrinsics[0]) | |
ori_curr_intrinsic_mat = self.intrinsics_list2mat(ori_curr_intrinsic) | |
pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0] | |
scale_ratio = transform_paras['label_scale_factor'] if 'label_scale_factor' in transform_paras else 1.0 | |
cam_models_stacks = [ | |
torch.nn.functional.interpolate(cam_models[0][None, :, :, :], size=(cam_models[0].shape[1]//i, cam_models[0].shape[2]//i), mode='bilinear', align_corners=False).squeeze() | |
for i in [2, 4, 8, 16, 32] | |
] | |
raw_rgb = torch.from_numpy(curr_rgb) | |
curr_normal = torch.from_numpy(curr_normal.transpose((2,0,1))) | |
data = dict(input=rgbs[0], | |
target=depth_out, | |
intrinsic=curr_intrinsic_mat, | |
filename=filename, | |
dataset=self.data_name, | |
cam_model=cam_models_stacks, | |
pad=pad, | |
scale=scale_ratio, | |
raw_rgb=raw_rgb, | |
sample_id=idx, | |
data_path=meta_data['rgb'], | |
inv_depth=inv_depth, | |
normal=curr_normal, | |
) | |
return data | |
# def get_data_for_trainval(self, idx: int): | |
# anno = self.annotations['files'][idx] | |
# meta_data = self.load_meta_data(anno) | |
# # curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) | |
# # curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) | |
# # curr_sem_path = os.path.join(self.sem_root, meta_data['sem']) if self.sem_root is not None and ('sem' in meta_data) and (meta_data['sem'] is not None) else None | |
# # curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) else None | |
# data_path = self.load_data_path(meta_data) | |
# data_batch = self.load_batch(meta_data, data_path) | |
# curr_rgb, curr_depth, curr_normal, curr_sem, curr_cam_model = data_batch['curr_rgb'], data_batch['curr_depth'], data_batch['curr_normal'], data_batch['curr_sem'], data_batch['curr_cam_model'] | |
# # load data | |
# # curr_intrinsic = meta_data['cam_in'] | |
# # curr_rgb, curr_depth = self.load_rgb_depth(curr_rgb_path, curr_depth_path) | |
# # # mask the depth | |
# # curr_depth = curr_depth.squeeze() | |
# # depth_mask = self.load_depth_valid_mask(curr_depth_mask_path, curr_depth) | |
# # curr_depth[~depth_mask] = -1 | |
# # # get semantic labels | |
# # curr_sem = self.load_sem_label(curr_sem_path, curr_depth) | |
# # # create camera model | |
# # curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) | |
# # get crop size | |
# transform_paras = dict(random_crop_size = self.random_crop_size) | |
# rgbs, depths, intrinsics, cam_models, _, other_labels, transform_paras = self.img_transforms( | |
# images=[curr_rgb, ], | |
# labels=[curr_depth, ], | |
# intrinsics=[curr_intrinsic,], | |
# cam_models=[curr_cam_model, ], | |
# other_labels=[curr_sem, ], | |
# transform_paras=transform_paras) | |
# # process sky masks | |
# sem_mask = other_labels[0].int() | |
# # clip depth map | |
# depth_out = self.normalize_depth(depths[0]) | |
# # set the depth in sky region to the maximum depth | |
# depth_out[sem_mask==142] = -1 #self.depth_normalize[1] - 1e-6 | |
# filename = os.path.basename(meta_data['rgb']) | |
# curr_intrinsic_mat = self.intrinsics_list2mat(intrinsics[0]) | |
# cam_models_stacks = [ | |
# torch.nn.functional.interpolate(cam_models[0][None, :, :, :], size=(cam_models[0].shape[1]//i, cam_models[0].shape[2]//i), mode='bilinear', align_corners=False).squeeze() | |
# for i in [2, 4, 8, 16, 32] | |
# ] | |
# pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0] | |
# data = dict(input=rgbs[0], | |
# target=depth_out, | |
# intrinsic=curr_intrinsic_mat, | |
# filename=filename, | |
# dataset=self.data_name, | |
# cam_model=cam_models_stacks, | |
# #ref_input=rgbs[1:], | |
# # tmpl_flg=tmpl_annos['w_tmpl'], | |
# pad=torch.tensor(pad), | |
# data_type=[self.data_type, ], | |
# sem_mask=sem_mask.int()) | |
# return data | |
# def get_data_for_test(self, idx: int): | |
# anno = self.annotations['files'][idx] | |
# meta_data = self.load_meta_data(anno) | |
# curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) | |
# curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) | |
# curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) else None | |
# # load data | |
# ori_curr_intrinsic = meta_data['cam_in'] | |
# curr_rgb, curr_depth = self.load_rgb_depth(curr_rgb_path, curr_depth_path) | |
# # mask the depth | |
# curr_depth = curr_depth.squeeze() | |
# depth_mask = self.load_depth_valid_mask(curr_depth_mask_path, curr_depth) | |
# curr_depth[~depth_mask] = -1 | |
# ori_h, ori_w, _ = curr_rgb.shape | |
# # create camera model | |
# curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], ori_curr_intrinsic) | |
# # get crop size | |
# transform_paras = dict() | |
# rgbs, depths, intrinsics, cam_models, _, other_labels, transform_paras = self.img_transforms( | |
# images=[curr_rgb,], #+ tmpl_rgbs, | |
# labels=[curr_depth, ], | |
# intrinsics=[ori_curr_intrinsic, ], # * (len(tmpl_rgbs) + 1), | |
# cam_models=[curr_cam_model, ], | |
# transform_paras=transform_paras) | |
# # depth in original size and orignial metric*** | |
# depth_out = self.clip_depth(curr_depth) * self.depth_range[1] # self.clip_depth(depths[0]) # | |
# filename = os.path.basename(meta_data['rgb']) | |
# curr_intrinsic_mat = self.intrinsics_list2mat(intrinsics[0]) | |
# pad = transform_paras['pad'] if 'pad' in transform_paras else [0,0,0,0] | |
# scale_ratio = transform_paras['label_scale_factor'] if 'label_scale_factor' in transform_paras else 1.0 | |
# cam_models_stacks = [ | |
# torch.nn.functional.interpolate(cam_models[0][None, :, :, :], size=(cam_models[0].shape[1]//i, cam_models[0].shape[2]//i), mode='bilinear', align_corners=False).squeeze() | |
# for i in [2, 4, 8, 16, 32] | |
# ] | |
# raw_rgb = torch.from_numpy(curr_rgb) | |
# # rel_pose = torch.from_numpy(tmpl_annos['tmpl_pose_list'][0]) | |
# data = dict(input=rgbs[0], | |
# target=depth_out, | |
# intrinsic=curr_intrinsic_mat, | |
# filename=filename, | |
# dataset=self.data_name, | |
# cam_model=cam_models_stacks, | |
# pad=pad, | |
# scale=scale_ratio, | |
# raw_rgb=raw_rgb, | |
# sample_id=idx, | |
# data_path=meta_data['rgb'], | |
# ) | |
# return data | |
def load_batch(self, meta_data, data_path): | |
curr_intrinsic = meta_data['cam_in'] | |
# load rgb/depth | |
curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) | |
# get semantic labels | |
curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) | |
# create camera model | |
curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) | |
# get normal labels | |
try: | |
curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) # !!! this is diff of BaseDataset | |
except: | |
curr_normal = np.zeros_like(curr_rgb) | |
# get depth mask | |
depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) | |
curr_depth[~depth_mask] = -1 | |
data_batch = dict( | |
curr_rgb = curr_rgb, | |
curr_depth = curr_depth, | |
curr_sem = curr_sem, | |
curr_normal = curr_normal, | |
curr_cam_model=curr_cam_model, | |
) | |
return data_batch | |
def load_norm_label(self, norm_path, H, W, depth, K): | |
normal = np.load(norm_path) | |
normal[:,:,1:] *= -1 | |
normal = self.align_normal(normal, depth, K, H, W) | |
return normal | |
def process_depth(self, depth, rgb): | |
depth[depth>150] = 0 | |
depth[depth<0.1] = 0 | |
depth /= self.metric_scale | |
return depth | |
def align_normal(self, normal, depth, K, H, W): | |
# inv K | |
K = np.array([[K[0], 0 ,K[2]], | |
[0, K[1], K[3]], | |
[0, 0, 1]]) | |
inv_K = np.linalg.inv(K) | |
# reprojection depth to camera points | |
if H == 768 and W == 1024: | |
xy = self.xy | |
else: | |
print('img size no-equal 768x1024') | |
xy = creat_uv_mesh(H, W) | |
points = np.matmul(inv_K[:3, :3], xy).reshape(3, H, W) | |
points = depth * points | |
points = points.transpose((1,2,0)) | |
# align normal | |
orient_mask = np.sum(normal * points, axis=2) > 0 | |
normal[orient_mask] *= -1 | |
return normal | |
if __name__ == '__main__': | |
from mmcv.utils import Config | |
cfg = Config.fromfile('mono/configs/Apolloscape_DDAD/convnext_base.cascade.1m.sgd.mae.py') | |
dataset_i = DIODEDataset(cfg['Apolloscape'], 'train', **cfg.data_basic) | |
print(dataset_i) | |