Spaces:
Running
Running
import onnxruntime as ort | |
import numpy as np | |
import cv2 | |
from typing import Tuple, Dict, List | |
from matplotlib import pyplot as plt | |
def prepare_input( | |
rgb_image: np.ndarray, input_size: Tuple[int, int] | |
) -> Tuple[Dict[str, np.ndarray], List[int]]: | |
h, w = rgb_image.shape[:2] | |
scale = min(input_size[0] / h, input_size[1] / w) | |
rgb = cv2.resize( | |
rgb_image, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR | |
) | |
padding = [123.675, 116.28, 103.53] | |
h, w = rgb.shape[:2] | |
pad_h = input_size[0] - h | |
pad_w = input_size[1] - w | |
pad_h_half = pad_h // 2 | |
pad_w_half = pad_w // 2 | |
rgb: np.ndarray = cv2.copyMakeBorder( | |
rgb, | |
pad_h_half, | |
pad_h - pad_h_half, | |
pad_w_half, | |
pad_w - pad_w_half, | |
cv2.BORDER_CONSTANT, | |
value=padding, | |
) | |
pad_info = [pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half] | |
onnx_input = { | |
"image": np.ascontiguousarray( | |
np.transpose(rgb, (2, 0, 1))[None], dtype=np.float32 | |
), # 1, 3, H, W | |
} | |
return onnx_input, pad_info | |
def main( | |
onnx_model="metric3d_vit_small.onnx", | |
input_image="data/kitti_demo/rgb/0000000100.png", | |
): | |
## Dummy Test | |
B = 1 | |
if "vit" in onnx_model: | |
input_size = (616, 1064) # [H, W] | |
dummy_image = np.zeros([B, 3, input_size[0], input_size[1]], dtype=np.float32) | |
else: | |
input_size = (544, 1216) # [H, W] | |
dummy_image = np.zeros([B, 3, input_size[0], input_size[1]], dtype=np.float32) | |
providers = [ | |
( | |
"CUDAExecutionProvider", | |
{"cudnn_conv_use_max_workspace": "0", "device_id": str(0)}, | |
) | |
] | |
# providers = [("TensorrtExecutionProvider", {'trt_engine_cache_enable': True, 'trt_fp16_enable': True, 'device_id': 0, 'trt_dla_enable': False})] | |
ort_session = ort.InferenceSession(onnx_model, providers=providers) | |
outputs = ort_session.run(None, {"image": dummy_image}) | |
print( | |
f"The actual output of onnxruntime session for the dummy set: outputs[0].shape={outputs[0].shape}" | |
) | |
## Real Test | |
rgb_image = cv2.imread(input_image)[:, :, ::-1] # BGR to RGB | |
original_shape = rgb_image.shape[:2] | |
onnx_input, pad_info = prepare_input(rgb_image, input_size) | |
outputs = ort_session.run(None, onnx_input) | |
depth = outputs[0].squeeze() # [H, W] | |
# Reshape the depth to the original size | |
depth = depth[ | |
pad_info[0] : input_size[0] - pad_info[1], | |
pad_info[2] : input_size[1] - pad_info[3], | |
] | |
depth = cv2.resize( | |
depth, (original_shape[1], original_shape[0]), interpolation=cv2.INTER_LINEAR | |
) | |
plt.subplot(1, 2, 1) | |
plt.imshow(depth) | |
plt.subplot(1, 2, 2) | |
plt.imshow(rgb_image) | |
plt.show() | |
if __name__ == "__main__": | |
from fire import Fire | |
Fire(main) | |
print("Done!") | |