# Born out of Issue 36. # Allows the user to set up own test files to infer on (Create a folder my_test and add subfolder input and output in the metric_depth directory before running this script.) # Make sure you have the necessary libraries # Code by @1ssb import argparse import os import glob import torch import numpy as np from PIL import Image import torchvision.transforms as transforms import open3d as o3d from tqdm import tqdm from zoedepth.models.builder import build_model from zoedepth.utils.config import get_config # Global settings FL = 715.0873 FY = 256 * 0.6 FX = 256 * 0.6 NYU_DATA = False FINAL_HEIGHT = 256 FINAL_WIDTH = 256 INPUT_DIR = './my_test/input' OUTPUT_DIR = './my_test/output' DATASET = 'nyu' # Lets not pick a fight with the model's dataloader def process_images(model): if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) image_paths = glob.glob(os.path.join(INPUT_DIR, '*.png')) + glob.glob(os.path.join(INPUT_DIR, '*.jpg')) for image_path in tqdm(image_paths, desc="Processing Images"): try: color_image = Image.open(image_path).convert('RGB') original_width, original_height = color_image.size image_tensor = transforms.ToTensor()(color_image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu') pred = model(image_tensor, dataset=DATASET) if isinstance(pred, dict): pred = pred.get('metric_depth', pred.get('out')) elif isinstance(pred, (list, tuple)): pred = pred[-1] pred = pred.squeeze().detach().cpu().numpy() # Resize color image and depth to final size resized_color_image = color_image.resize((FINAL_WIDTH, FINAL_HEIGHT), Image.LANCZOS) resized_pred = Image.fromarray(pred).resize((FINAL_WIDTH, FINAL_HEIGHT), Image.NEAREST) focal_length_x, focal_length_y = (FX, FY) if not NYU_DATA else (FL, FL) x, y = np.meshgrid(np.arange(FINAL_WIDTH), np.arange(FINAL_HEIGHT)) x = (x - FINAL_WIDTH / 2) / focal_length_x y = (y - FINAL_HEIGHT / 2) / focal_length_y z = np.array(resized_pred) points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3) colors = np.array(resized_color_image).reshape(-1, 3) / 255.0 pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points) pcd.colors = o3d.utility.Vector3dVector(colors) o3d.io.write_point_cloud(os.path.join(OUTPUT_DIR, os.path.splitext(os.path.basename(image_path))[0] + ".ply"), pcd) except Exception as e: print(f"Error processing {image_path}: {e}") def main(model_name, pretrained_resource): config = get_config(model_name, "eval", DATASET) config.pretrained_resource = pretrained_resource model = build_model(config).to('cuda' if torch.cuda.is_available() else 'cpu') model.eval() process_images(model) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", type=str, default='zoedepth', help="Name of the model to test") parser.add_argument("-p", "--pretrained_resource", type=str, default='local::./checkpoints/depth_anything_metric_depth_indoor.pt', help="Pretrained resource to use for fetching weights.") args = parser.parse_args() main(args.model, args.pretrained_resource)