Spaces:

yushihu
/

depthanything

Runtime error

App Files Files Community

depthanything / metric_depth /depth_to_pointcloud.py

yushihu

Upload folder using huggingface_hub

6bb1ad5 verified 11 months ago

raw

history blame

3.46 kB

	# Born out of Issue 36.
	# Allows the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.)
	# Make sure you have the necessary libraries
	# Code by @1ssb

	import argparse
	import os
	import glob
	import torch
	import numpy as np
	from PIL import Image
	import torchvision.transforms as transforms
	import open3d as o3d
	from tqdm import tqdm
	from zoedepth.models.builder import build_model
	from zoedepth.utils.config import get_config

	# Global settings
	FL = 715.0873
	FY = 256 * 0.6
	FX = 256 * 0.6
	NYU_DATA = False
	FINAL_HEIGHT = 256
	FINAL_WIDTH = 256
	INPUT_DIR = './my_test/input'
	OUTPUT_DIR = './my_test/output'
	DATASET = 'nyu' # Lets not pick a fight with the model's dataloader

	def process_images(model):
	if not os.path.exists(OUTPUT_DIR):
	os.makedirs(OUTPUT_DIR)

	image_paths = glob.glob(os.path.join(INPUT_DIR, '.png')) + glob.glob(os.path.join(INPUT_DIR, '.jpg'))
	for image_path in tqdm(image_paths, desc="Processing Images"):
	try:
	color_image = Image.open(image_path).convert('RGB')
	original_width, original_height = color_image.size
	image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')

	pred = model(image_tensor, dataset=DATASET)
	if isinstance(pred, dict):
	pred = pred.get('metric_depth', pred.get('out'))
	elif isinstance(pred, (list, tuple)):
	pred = pred[-1]
	pred = pred.squeeze().detach().cpu().numpy()

	# Resize color image and depth to final size
	resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS)
	resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST)

	focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL)
	x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT))
	x = (x - FINAL_WIDTH / 2) / focal_length_x
	y = (y - FINAL_HEIGHT / 2) / focal_length_y
	z = np.array(resized_pred)
	points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
	colors = np.array(resized_color_image).reshape(-1, 3) / 255.0

	pcd = o3d.geometry.PointCloud()
	pcd.points = o3d.utility.Vector3dVector(points)
	pcd.colors = o3d.utility.Vector3dVector(colors)
	o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd)
	except Exception as e:
	print(f"Error processing {image_path}: {e}")

	def main(model_name, pretrained_resource):
	config = get_config(model_name, "eval", DATASET)
	config.pretrained_resource = pretrained_resource
	model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu')
	model.eval()
	process_images(model)

	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test")
	parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.")

	args = parser.parse_args()
	main(args.model, args.pretrained_resource)