|
|
|
|
|
import io |
|
from PIL import Image as PImage |
|
import numpy as np |
|
from collections import defaultdict |
|
import cv2 |
|
from typing import Tuple, List |
|
from scipy.spatial.distance import cdist |
|
from scipy.optimize import minimize |
|
|
|
|
|
|
|
def empty_solution(): |
|
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.''' |
|
return np.zeros((2,3)), [(0, 1)] |
|
|
|
def read_colmap_rec(colmap_data): |
|
import pycolmap |
|
import tempfile,zipfile |
|
import io |
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf: |
|
zf.extractall(tmpdir) |
|
|
|
rec = pycolmap.Reconstruction(tmpdir) |
|
return rec |
|
|
|
def convert_entry_to_human_readable(entry): |
|
out = {} |
|
for k, v in entry.items(): |
|
if 'colmap' in k: |
|
out[k] = read_colmap_rec(v) |
|
elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't']: |
|
out[k] = np.array(v) |
|
else: |
|
out[k]=v |
|
out['__key__'] = entry['order_id'] |
|
return out |
|
|
|
|
|
def point_to_segment_dist(pt, seg_p1, seg_p2): |
|
""" |
|
Computes the Euclidean distance from pt to the line segment p1->p2. |
|
pt, seg_p1, seg_p2: (x, y) as np.ndarray |
|
""" |
|
|
|
if np.allclose(seg_p1, seg_p2): |
|
return np.linalg.norm(pt - seg_p1) |
|
seg_vec = seg_p2 - seg_p1 |
|
pt_vec = pt - seg_p1 |
|
seg_len2 = seg_vec.dot(seg_vec) |
|
t = max(0, min(1, pt_vec.dot(seg_vec)/seg_len2)) |
|
proj = seg_p1 + t*seg_vec |
|
return np.linalg.norm(pt - proj) |
|
|
|
|
|
def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0): |
|
""" |
|
Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley. |
|
For each connected component, we do a line fit with cv2.fitLine, then measure |
|
segment endpoints more robustly. We then associate apex points that are within |
|
'edge_th' of the line segment. We record those apex–apex connections for edges |
|
if at least 2 apexes lie near the same component line. |
|
""" |
|
from hoho.color_mappings import gestalt_color_mapping |
|
|
|
|
|
|
|
|
|
vertices = [] |
|
|
|
apex_color = np.array(gestalt_color_mapping['apex']) |
|
apex_mask = cv2.inRange(gest_seg_np, apex_color-0.5, apex_color+0.5) |
|
if apex_mask.sum() > 0: |
|
output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S) |
|
(numLabels, labels, stats, centroids) = output |
|
stats, centroids = stats[1:], centroids[1:] |
|
for i in range(numLabels-1): |
|
vert = {"xy": centroids[i], "type": "apex"} |
|
vertices.append(vert) |
|
|
|
|
|
eave_end_color = np.array(gestalt_color_mapping['eave_end_point']) |
|
eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-0.5, eave_end_color+0.5) |
|
if eave_end_mask.sum() > 0: |
|
output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S) |
|
(numLabels, labels, stats, centroids) = output |
|
stats, centroids = stats[1:], centroids[1:] |
|
for i in range(numLabels-1): |
|
vert = {"xy": centroids[i], "type": "eave_end_point"} |
|
vertices.append(vert) |
|
|
|
|
|
apex_pts = [] |
|
apex_idx_map = [] |
|
for idx, v in enumerate(vertices): |
|
apex_pts.append(v['xy']) |
|
apex_idx_map.append(idx) |
|
apex_pts = np.array(apex_pts) |
|
|
|
connections = [] |
|
edge_classes = ['eave', 'ridge', 'rake', 'valley'] |
|
for edge_class in edge_classes: |
|
edge_color = np.array(gestalt_color_mapping[edge_class]) |
|
mask_raw = cv2.inRange(gest_seg_np, edge_color-0.5, edge_color+0.5) |
|
|
|
|
|
kernel = np.ones((5, 5), np.uint8) |
|
mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, kernel) |
|
|
|
if mask.sum() == 0: |
|
continue |
|
|
|
|
|
output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S) |
|
(numLabels, labels, stats, centroids) = output |
|
|
|
stats, centroids = stats[1:], centroids[1:] |
|
label_indices = range(1, numLabels) |
|
|
|
|
|
for lbl in label_indices: |
|
ys, xs = np.where(labels == lbl) |
|
if len(xs) < 2: |
|
continue |
|
|
|
pts_for_fit = np.column_stack([xs, ys]).astype(np.float32) |
|
|
|
line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2, |
|
param=0, reps=0.01, aeps=0.01) |
|
vx, vy, x0, y0 = line_params.ravel() |
|
|
|
|
|
|
|
|
|
proj = ( (xs - x0)*vx + (ys - y0)*vy ) |
|
proj_min, proj_max = proj.min(), proj.max() |
|
p1 = np.array([x0 + proj_min*vx, y0 + proj_min*vy]) |
|
p2 = np.array([x0 + proj_max*vx, y0 + proj_max*vy]) |
|
|
|
|
|
|
|
|
|
if len(apex_pts) < 2: |
|
continue |
|
|
|
|
|
dists = np.array([ |
|
point_to_segment_dist(apex_pts[i], p1, p2) |
|
for i in range(len(apex_pts)) |
|
]) |
|
|
|
|
|
near_mask = (dists <= edge_th) |
|
near_indices = np.where(near_mask)[0] |
|
if len(near_indices) < 2: |
|
continue |
|
|
|
|
|
for i in range(len(near_indices)): |
|
for j in range(i+1, len(near_indices)): |
|
a_idx = near_indices[i] |
|
b_idx = near_indices[j] |
|
|
|
vA = apex_idx_map[a_idx] |
|
vB = apex_idx_map[b_idx] |
|
|
|
conn = tuple(sorted((vA, vB))) |
|
connections.append(conn) |
|
|
|
return vertices, connections |
|
|
|
|
|
def get_uv_depth(vertices, depth_fitted, sparse_depth, search_radius=10): |
|
""" |
|
For each vertex, returns a 2D array of (u,v) and a matching 1D array of depths. |
|
|
|
We attempt to use the sparse_depth if available in a local neighborhood: |
|
1. For each vertex coordinate (x, y), define a local window in sparse_depth |
|
of size (2*search_radius + 1). |
|
2. Collect all valid (nonzero) values in that window. |
|
3. If any exist, we take the median (robust) as the vertex depth. |
|
4. Otherwise, we use depth_fitted[y, x]. |
|
|
|
Parameters |
|
---------- |
|
vertices : List[dict] |
|
Each dict must have "xy" at least, e.g. {"xy": (x, y), ...} |
|
depth_fitted : np.ndarray |
|
A 2D array (H, W), the dense (or corrected) depth for fallback. |
|
sparse_depth : np.ndarray |
|
A 2D array (H, W), mostly zeros except where accurate data is available. |
|
search_radius : int |
|
Pixel radius around the vertex in which to look for sparse depth values. |
|
|
|
Returns |
|
------- |
|
uv : np.ndarray of shape (N, 2) |
|
2D float coordinates of each vertex (x, y). |
|
vertex_depth : np.ndarray of shape (N,) |
|
Depth value chosen for each vertex. |
|
""" |
|
|
|
|
|
uv = np.array([v['xy'] for v in vertices], dtype=np.float32) |
|
|
|
uv_int = np.round(uv).astype(np.int32) |
|
|
|
H, W = depth_fitted.shape[:2] |
|
|
|
uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W-1) |
|
uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H-1) |
|
|
|
|
|
vertex_depth = np.zeros(len(vertices), dtype=np.float32) |
|
|
|
for i, (x_i, y_i) in enumerate(uv_int): |
|
|
|
x0 = max(0, x_i - search_radius) |
|
x1 = min(W, x_i + search_radius + 1) |
|
y0 = max(0, y_i - search_radius) |
|
y1 = min(H, y_i + search_radius + 1) |
|
|
|
region = sparse_depth[y0:y1, x0:x1] |
|
valid_vals = region[region > 0] |
|
if len(valid_vals) > 0: |
|
|
|
vertex_depth[i] = np.median(valid_vals) |
|
else: |
|
|
|
vertex_depth[i] = depth_fitted[y_i, x_i] |
|
|
|
return uv, vertex_depth |
|
|
|
def merge_vertices_3d(vert_edge_per_image, th=0.5): |
|
'''Merge vertices that are close to each other in 3D space and are of same types''' |
|
all_3d_vertices = [] |
|
connections_3d = [] |
|
all_indexes = [] |
|
cur_start = 0 |
|
types = [] |
|
for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items(): |
|
types += [int(v['type']=='apex') for v in vertices] |
|
all_3d_vertices.append(vertices_3d) |
|
connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections] |
|
cur_start+=len(vertices_3d) |
|
all_3d_vertices = np.concatenate(all_3d_vertices, axis=0) |
|
|
|
distmat = cdist(all_3d_vertices, all_3d_vertices) |
|
types = np.array(types).reshape(-1,1) |
|
same_types = cdist(types, types) |
|
mask_to_merge = (distmat <= th) & (same_types==0) |
|
new_vertices = [] |
|
new_connections = [] |
|
to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge]))) |
|
to_merge_final = defaultdict(list) |
|
for i in range(len(all_3d_vertices)): |
|
for j in to_merge: |
|
if i in j: |
|
to_merge_final[i]+=j |
|
for k, v in to_merge_final.items(): |
|
to_merge_final[k] = list(set(v)) |
|
already_there = set() |
|
merged = [] |
|
for k, v in to_merge_final.items(): |
|
if k in already_there: |
|
continue |
|
merged.append(v) |
|
for vv in v: |
|
already_there.add(vv) |
|
old_idx_to_new = {} |
|
count=0 |
|
for idxs in merged: |
|
new_vertices.append(all_3d_vertices[idxs].mean(axis=0)) |
|
for idx in idxs: |
|
old_idx_to_new[idx] = count |
|
count +=1 |
|
|
|
new_vertices=np.array(new_vertices) |
|
|
|
for conn in connections_3d: |
|
new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]])) |
|
if new_con[0] == new_con[1]: |
|
continue |
|
if new_con not in new_connections: |
|
new_connections.append(new_con) |
|
|
|
return new_vertices, new_connections |
|
|
|
|
|
def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True): |
|
""" |
|
Prune vertices not connected to anything. If keep_largest=True, also |
|
keep only the largest connected component in the graph. |
|
""" |
|
if len(all_3d_vertices) == 0: |
|
return np.array([]), [] |
|
|
|
|
|
adj = defaultdict(set) |
|
for (i, j) in connections_3d: |
|
adj[i].add(j) |
|
adj[j].add(i) |
|
|
|
|
|
used_idxs = set() |
|
for (i, j) in connections_3d: |
|
used_idxs.add(i) |
|
used_idxs.add(j) |
|
|
|
if not used_idxs: |
|
return np.empty((0,3)), [] |
|
|
|
|
|
if not keep_largest: |
|
new_map = {} |
|
used_list = sorted(list(used_idxs)) |
|
for new_id, old_id in enumerate(used_list): |
|
new_map[old_id] = new_id |
|
new_vertices = np.array([all_3d_vertices[old_id] for old_id in used_list]) |
|
new_conns = [] |
|
for (i, j) in connections_3d: |
|
if i in used_idxs and j in used_idxs: |
|
new_conns.append((new_map[i], new_map[j])) |
|
return new_vertices, new_conns |
|
|
|
|
|
visited = set() |
|
def bfs(start): |
|
queue = [start] |
|
comp = [] |
|
visited.add(start) |
|
while queue: |
|
cur = queue.pop() |
|
comp.append(cur) |
|
for neigh in adj[cur]: |
|
if neigh not in visited: |
|
visited.add(neigh) |
|
queue.append(neigh) |
|
return comp |
|
|
|
|
|
comps = [] |
|
for idx in used_idxs: |
|
if idx not in visited: |
|
c = bfs(idx) |
|
comps.append(c) |
|
|
|
|
|
comps.sort(key=lambda c: len(c), reverse=True) |
|
largest = comps[0] if len(comps)>0 else [] |
|
|
|
|
|
new_map = {} |
|
for new_id, old_id in enumerate(largest): |
|
new_map[old_id] = new_id |
|
|
|
new_vertices = np.array([all_3d_vertices[old_id] for old_id in largest]) |
|
new_conns = [] |
|
for (i, j) in connections_3d: |
|
if i in largest and j in largest: |
|
new_conns.append((new_map[i], new_map[j])) |
|
|
|
|
|
new_conns = list(set([tuple(sorted(c)) for c in new_conns])) |
|
return new_vertices, new_conns |
|
|
|
|
|
|
|
def get_sparse_depth(colmap_rec, img_id, K, R, t, depth): |
|
H, W = depth.shape |
|
xyz = [] |
|
rgb = [] |
|
found = False |
|
|
|
for img_id_c, col_img in colmap_rec.images.items(): |
|
print (col_img.name) |
|
if col_img.name == img_id: |
|
found = True |
|
break |
|
if not found: |
|
print (f"{img_id} not found, returning empty depth") |
|
return np.zeros((H, W), dtype=np.float32), False |
|
mat4x4 = np.eye(4) |
|
mat4x4[:3 ] = col_img.cam_from_world.matrix() |
|
for pid,p in colmap_rec.points3D.items(): |
|
if col_img.has_point3D(pid): |
|
xyz.append(p.xyz) |
|
rgb.append(p.color) |
|
xyz = np.array(xyz) |
|
rgb = np.array(rgb) |
|
xyz_projected = cv2.transform(cv2.convertPointsToHomogeneous(xyz), mat4x4) |
|
xyz_projected = cv2.convertPointsFromHomogeneous(xyz_projected).reshape(-1, 3) |
|
uv, _ = cv2.projectPoints(xyz_projected, np.zeros(3), np.zeros(3), np.array(K), np.zeros(4)) |
|
uv = uv.squeeze() |
|
u, v = uv[:, 0].astype(np.int32), uv[:, 1].astype(np.int32) |
|
mask = (u >= 0) & (u < W) & (v >= 0) & (v < H) |
|
u, v = u[mask], v[mask] |
|
xyz_projected, rgb = xyz_projected[mask], rgb[mask] |
|
depth = np.zeros((H, W), dtype=np.float32) |
|
depth[v, u] = xyz_projected[:, 2] |
|
return depth, True |
|
|
|
|
|
def fit_scale_robust_median(depth, sparse_depth): |
|
""" |
|
Fits the model sparse_depth ~ k * depth + b by minimizing the median |
|
of absolute residuals, i.e. median( |sparse_depth - (k*depth + b)| ). |
|
|
|
Parameters |
|
---------- |
|
depth : np.ndarray |
|
Array of depth estimates (same shape as sparse_depth). |
|
sparse_depth : np.ndarray |
|
Sparse array with precise depth at certain locations |
|
(0 where data is unavailable). |
|
|
|
Returns |
|
------- |
|
k : float |
|
The slope of the robust best-fit affine transform. |
|
b : float |
|
The intercept of the robust best-fit affine transform. |
|
depth_fitted : np.ndarray |
|
The depth array adjusted by the affine fit: k*depth + b. |
|
""" |
|
|
|
|
|
mask = (sparse_depth != 0) |
|
X = depth[mask] |
|
Y = sparse_depth[mask] |
|
|
|
|
|
def median_abs_resid(params, xvals, yvals): |
|
k, b = params |
|
return np.median(np.abs(yvals - (k*xvals))) |
|
|
|
k_init, b_init = 1.0, 0.0 |
|
|
|
res = minimize( |
|
fun=median_abs_resid, |
|
x0=[k_init, b_init], |
|
args=(X, Y), |
|
method='Nelder-Mead' |
|
) |
|
|
|
k_robust, b_robust = res.x |
|
|
|
|
|
depth_fitted = k_robust * depth |
|
|
|
return k_robust, depth_fitted |
|
|
|
|
|
|
|
|
|
def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]: |
|
good_entry = convert_entry_to_human_readable(entry) |
|
vert_edge_per_image = {} |
|
for i, (gest, depth, K, R, t, img_id) in enumerate(zip(good_entry['gestalt'], |
|
good_entry['depth'], |
|
good_entry['K'], |
|
good_entry['R'], |
|
good_entry['t'], |
|
good_entry['image_ids'] |
|
)): |
|
colmap_rec = good_entry['colmap_binary'] |
|
K = np.array(K) |
|
R = np.array(R) |
|
t = np.array(t) |
|
gest_seg = gest.resize(depth.size) |
|
gest_seg_np = np.array(gest_seg).astype(np.uint8) |
|
|
|
depth_np = np.array(depth) / 1000. |
|
depth_sparse, found = get_sparse_depth(colmap_rec, img_id, K, R, t, depth_np) |
|
if not found: |
|
print (f'No sparse depth found for image {i}') |
|
vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3)) |
|
continue |
|
k, depth_fitted = fit_scale_robust_median(depth_np, depth_sparse) |
|
print (k) |
|
vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.) |
|
if (len(vertices) < 2) or (len(connections) < 1): |
|
print (f'Not enough vertices or connections in image {i}') |
|
vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3)) |
|
continue |
|
|
|
uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, 50) |
|
|
|
X = (uv[:, 0] - K[0, 2]) / K[0, 0] * depth_vert |
|
Y = (uv[:, 1] - K[1, 2]) / K[1, 1] * depth_vert |
|
Z = depth_vert |
|
vertices_3d_local = np.column_stack([X, Y, Z]) |
|
world_to_cam = np.eye(4) |
|
world_to_cam[:3, :3] = R |
|
world_to_cam[:3, 3] = t.reshape(-1) |
|
cam_to_world = np.linalg.inv(world_to_cam) |
|
vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world) |
|
vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3) |
|
vert_edge_per_image[i] = vertices, connections, vertices_3d |
|
all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.25) |
|
all_3d_vertices_clean, connections_3d_clean = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False) |
|
if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1: |
|
print (f'Not enough vertices or connections in the 3D vertices') |
|
return empty_solution() |
|
if visualize: |
|
from hoho.viz3d import plot_estimate_and_gt |
|
plot_estimate_and_gt( all_3d_vertices_clean, |
|
connections_3d_clean, |
|
good_entry['wf_vertices'], |
|
good_entry['wf_edges']) |
|
return all_3d_vertices_clean, connections_3d_clean |
|
|