Spaces:
Running
Running
import os | |
import json | |
import torch | |
import torchvision.transforms as transforms | |
import os.path | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
from torch.utils.data import Dataset | |
import random | |
from .__base_dataset__ import BaseDataset | |
import h5py | |
def creat_uv_mesh(H, W): | |
y, x = np.meshgrid(np.arange(0, H, dtype=np.float), np.arange(0, W, dtype=np.float), indexing='ij') | |
meshgrid = np.stack((x,y)) | |
ones = np.ones((1,H*W), dtype=np.float) | |
xy = meshgrid.reshape(2, -1) | |
return np.concatenate([xy, ones], axis=0) | |
class HypersimDataset(BaseDataset): | |
def __init__(self, cfg, phase, **kwargs): | |
super(HypersimDataset, self).__init__( | |
cfg=cfg, | |
phase=phase, | |
**kwargs) | |
self.metric_scale = cfg.metric_scale | |
#self.cap_range = self.depth_range # in meter | |
# init uv | |
# meshgrid for depth reprojection | |
self.xy = creat_uv_mesh(768, 1024) | |
def load_batch(self, meta_data, data_path): | |
curr_intrinsic = meta_data['cam_in'] | |
# load rgb/depth | |
curr_rgb, curr_depth = self.load_rgb_depth(data_path['rgb_path'], data_path['depth_path']) | |
# get semantic labels | |
curr_sem = self.load_sem_label(data_path['sem_path'], curr_depth) | |
# create camera model | |
curr_cam_model = self.create_cam_model(curr_rgb.shape[0], curr_rgb.shape[1], curr_intrinsic) | |
# get normal labels | |
curr_normal = self.load_norm_label(data_path['normal_path'], H=curr_rgb.shape[0], W=curr_rgb.shape[1], depth=curr_depth, K=curr_intrinsic) # !!! this is diff of BaseDataset | |
# get depth mask | |
depth_mask = self.load_depth_valid_mask(data_path['depth_mask_path']) | |
curr_depth[~depth_mask] = -1 | |
data_batch = dict( | |
curr_rgb = curr_rgb, | |
curr_depth = curr_depth, | |
curr_sem = curr_sem, | |
curr_normal = curr_normal, | |
curr_cam_model=curr_cam_model, | |
) | |
return data_batch | |
def load_data_path(self, meta_data): | |
# 'rgbs': {'rgb_color': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.color.jpg', | |
# 'rgb_gamma': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.gamma.jpg', | |
# 'rgb_tonemap': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_preview/frame.0008.tonemap.jpg', | |
# 'rgb_raw': 'Hypersim/data/ai_001_001/images/scene_cam_00_final_hdf5/frame.0008.color.hdf5'} | |
meta_data['rgb'] = meta_data['rgbs']['rgb_color'] # this is diff of BaseDataset | |
curr_rgb_path = os.path.join(self.data_root, meta_data['rgb']) | |
curr_depth_path = os.path.join(self.depth_root, meta_data['depth']) | |
curr_sem_path = os.path.join(self.sem_root, meta_data['sem']) \ | |
if self.sem_root is not None and ('sem' in meta_data) and (meta_data['sem'] is not None) \ | |
else None | |
curr_norm_path = os.path.join(self.norm_root, meta_data['normal']) \ | |
if ('normal' in meta_data) and (meta_data['normal'] is not None) and (self.norm_root is not None) \ | |
else None | |
curr_depth_mask_path = os.path.join(self.depth_mask_root, meta_data['depth_mask']) \ | |
if self.depth_mask_root is not None and ('depth_mask' in meta_data) and (meta_data['depth_mask'] is not None) \ | |
else None | |
data_path=dict( | |
rgb_path=curr_rgb_path, | |
depth_path=curr_depth_path, | |
sem_path=curr_sem_path, | |
normal_path=curr_norm_path, | |
depth_mask_path=curr_depth_mask_path, | |
) | |
return data_path | |
def load_rgb_depth(self, rgb_path: str, depth_path: str): | |
""" | |
Load the rgb and depth map with the paths. | |
""" | |
rgb = self.load_data(rgb_path, is_rgb_img=True) | |
if rgb is None: | |
self.logger.info(f'>>>>{rgb_path} has errors.') | |
# depth = self.load_data(depth_path) | |
with h5py.File(depth_path, "r") as f: depth = f["dataset"][:] | |
np.nan_to_num(depth, copy=False, nan=0) # fill nan in gt | |
if depth is None: | |
self.logger.info(f'{depth_path} has errors.') | |
depth = depth.astype(np.float) | |
depth = self.process_depth(depth, rgb) | |
return rgb, depth | |
def load_norm_label(self, norm_path, H, W, depth, K): | |
with h5py.File(norm_path, "r") as f: | |
normal = f["dataset"][:] | |
np.nan_to_num(normal, copy=False, nan=0) | |
normal[:,:,1:] *= -1 | |
normal = normal.astype(np.float) | |
return self.align_normal(normal, depth, K, H, W) | |
def process_depth(self, depth: np.array, rgb: np.array) -> np.array: | |
depth[depth>60000] = 0 | |
depth = depth / self.metric_scale | |
return depth | |
def align_normal(self, normal, depth, K, H, W): | |
''' | |
Orientation of surface normals in hypersim is not always consistent | |
see https://github.com/apple/ml-hypersim/issues/26 | |
''' | |
# inv K | |
K = np.array([[K[0], 0 ,K[2]], | |
[0, K[1], K[3]], | |
[0, 0, 1]]) | |
inv_K = np.linalg.inv(K) | |
# reprojection depth to camera points | |
if H == 768 and W == 1024: | |
xy = self.xy | |
else: | |
print('img size no-equal 768x1024') | |
xy = creat_uv_mesh(H, W) | |
points = np.matmul(inv_K[:3, :3], xy).reshape(3, H, W) | |
points = depth * points | |
points = points.transpose((1,2,0)) | |
# align normal | |
orient_mask = np.sum(normal * points, axis=2) > 0 | |
normal[orient_mask] *= -1 | |
return normal |