dmytromishkin
/

my_cool_handcrafted_submission_2025

Model card Files Files and versions Community

dmytromishkin commited on Apr 7

Commit

d2cee1a

verified ·

1 Parent(s): fcc0e52

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +14 -0
example_notebook.ipynb +0 -0
example_on_training.ipynb +0 -0
handcrafted_solution.py +513 -0
params.json +23 -0
script.py +132 -0

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Empty solution example for the S23DR competition
+This repo provides a minimalistic example of a valid, but empty submission to S23DR competition.
+We recommend you take a look at [this example](https://huggingface.co/usm3d/handcrafted_baseline_submission),
+which implements some primitive algorithms and provides useful I/O and visualization functions.
+This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
+`script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
+---
+license: apache-2.0
+---

example_notebook.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

example_on_training.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

handcrafted_solution.py ADDED Viewed

	@@ -0,0 +1,513 @@

+# Description: This file contains the handcrafted solution for the task of wireframe reconstruction
+import io
+from PIL import Image as PImage
+import numpy as np
+from collections import defaultdict
+import cv2
+from typing import Tuple, List
+from scipy.spatial.distance import cdist
+from scipy.optimize import minimize
+def empty_solution():
+    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
+    return np.zeros((2,3)), [(0, 1)]
+def read_colmap_rec(colmap_data):
+    import pycolmap
+    import tempfile,zipfile
+    import io
+    with tempfile.TemporaryDirectory() as tmpdir:
+        with zipfile.ZipFile(io.BytesIO(colmap_data), "r") as zf:
+            zf.extractall(tmpdir)  # unpacks cameras.txt, images.txt, etc. to tmpdir
+        # Now parse with pycolmap
+        rec = pycolmap.Reconstruction(tmpdir)
+        return rec
+def convert_entry_to_human_readable(entry):
+    out = {}
+    for k, v in entry.items():
+        if 'colmap' in k:
+            out[k] = read_colmap_rec(v)
+        elif k in ['wf_vertices', 'wf_edges', 'K', 'R', 't']:
+            out[k] = np.array(v)
+        else:
+            out[k]=v
+    out['__key__'] = entry['order_id']
+    return out
+def point_to_segment_dist(pt, seg_p1, seg_p2):
+    """
+    Computes the Euclidean distance from pt to the line segment p1->p2.
+    pt, seg_p1, seg_p2: (x, y) as np.ndarray
+    """
+    # If both endpoints are the same, just return distance to one of them
+    if np.allclose(seg_p1, seg_p2):
+        return np.linalg.norm(pt - seg_p1)
+    seg_vec = seg_p2 - seg_p1
+    pt_vec = pt - seg_p1
+    seg_len2 = seg_vec.dot(seg_vec)
+    t = max(0, min(1, pt_vec.dot(seg_vec)/seg_len2))
+    proj = seg_p1 + t*seg_vec
+    return np.linalg.norm(pt - proj)
+def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th=25.0):
+    """
+    Identify apex and eave-end vertices, then detect lines for eave/ridge/rake/valley.
+    For each connected component, we do a line fit with cv2.fitLine, then measure
+    segment endpoints more robustly. We then associate apex points that are within
+    'edge_th' of the line segment. We record those apex–apex connections for edges
+    if at least 2 apexes lie near the same component line.
+    """
+    from hoho.color_mappings import gestalt_color_mapping  # for apex, eave_end_point, etc.
+    #--------------------------------------------------------------------------------
+    # Step A: Collect apex and eave_end vertices
+    #--------------------------------------------------------------------------------
+    vertices = []
+    # Apex
+    apex_color = np.array(gestalt_color_mapping['apex'])
+    apex_mask = cv2.inRange(gest_seg_np, apex_color-0.5, apex_color+0.5)
+    if apex_mask.sum() > 0:
+        output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
+        (numLabels, labels, stats, centroids) = output
+        stats, centroids = stats[1:], centroids[1:]  # skip background
+        for i in range(numLabels-1):
+            vert = {"xy": centroids[i], "type": "apex"}
+            vertices.append(vert)
+    # Eave end
+    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
+    eave_end_mask = cv2.inRange(gest_seg_np, eave_end_color-0.5, eave_end_color+0.5)
+    if eave_end_mask.sum() > 0:
+        output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
+        (numLabels, labels, stats, centroids) = output
+        stats, centroids = stats[1:], centroids[1:]
+        for i in range(numLabels-1):
+            vert = {"xy": centroids[i], "type": "eave_end_point"}
+            vertices.append(vert)
+    # Consolidate apex points as array:
+    apex_pts = []
+    apex_idx_map = []  # keep track of index in 'vertices'
+    for idx, v in enumerate(vertices):
+        apex_pts.append(v['xy'])
+        apex_idx_map.append(idx)
+    apex_pts = np.array(apex_pts)
+    connections = []
+    edge_classes = ['eave', 'ridge', 'rake', 'valley']
+    for edge_class in edge_classes:
+        edge_color = np.array(gestalt_color_mapping[edge_class])
+        mask_raw = cv2.inRange(gest_seg_np, edge_color-0.5, edge_color+0.5)
+        # Possibly do morphological open/close to avoid merges or small holes
+        kernel = np.ones((5, 5), np.uint8)  # smaller kernel to reduce over-merge
+        mask = cv2.morphologyEx(mask_raw, cv2.MORPH_CLOSE, kernel)
+        if mask.sum() == 0:
+            continue
+        # Connected components
+        output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
+        (numLabels, labels, stats, centroids) = output
+        # skip the background
+        stats, centroids = stats[1:], centroids[1:]
+        label_indices = range(1, numLabels)
+        # For each connected component, do a line fit
+        for lbl in label_indices:
+            ys, xs = np.where(labels == lbl)
+            if len(xs) < 2:
+                continue
+            # Fit a line using cv2.fitLine
+            pts_for_fit = np.column_stack([xs, ys]).astype(np.float32)
+            # (vx, vy, x0, y0) = direction + a point on the line
+            line_params = cv2.fitLine(pts_for_fit, distType=cv2.DIST_L2,
+                                      param=0, reps=0.01, aeps=0.01)
+            vx, vy, x0, y0 = line_params.ravel()
+            # We'll approximate endpoints by projecting (xs, ys) onto the line,
+            # then taking min and max in the 1D param along the line.
+            # param along the line = ( (x - x0)*vx + (y - y0)*vy )
+            proj = ( (xs - x0)*vx + (ys - y0)*vy )
+            proj_min, proj_max = proj.min(), proj.max()
+            p1 = np.array([x0 + proj_min*vx, y0 + proj_min*vy])
+            p2 = np.array([x0 + proj_max*vx, y0 + proj_max*vy])
+            #--------------------------------------------------------------------------------
+            # Step C: If apex points are within 'edge_th' of segment, they are connected
+            #--------------------------------------------------------------------------------
+            if len(apex_pts) < 2:
+                continue
+            # Distance from each apex to the line segment
+            dists = np.array([
+                point_to_segment_dist(apex_pts[i], p1, p2)
+                for i in range(len(apex_pts))
+            ])
+            # Indices of apex points that are near
+            near_mask = (dists <= edge_th)
+            near_indices = np.where(near_mask)[0]
+            if len(near_indices) < 2:
+                continue
+            # Connect each pair among these near apex points
+            for i in range(len(near_indices)):
+                for j in range(i+1, len(near_indices)):
+                    a_idx = near_indices[i]
+                    b_idx = near_indices[j]
+                    # 'a_idx' and 'b_idx' are indices in apex_pts / apex_idx_map
+                    vA = apex_idx_map[a_idx]
+                    vB = apex_idx_map[b_idx]
+                    # Store the connection using sorted indexing
+                    conn = tuple(sorted((vA, vB)))
+                    connections.append(conn)
+    return vertices, connections
+def get_uv_depth(vertices, depth_fitted, sparse_depth, search_radius=10):
+    """
+    For each vertex, returns a 2D array of (u,v) and a matching 1D array of depths.
+    We attempt to use the sparse_depth if available in a local neighborhood:
+      1. For each vertex coordinate (x, y), define a local window in sparse_depth
+         of size (2*search_radius + 1).
+      2. Collect all valid (nonzero) values in that window.
+      3. If any exist, we take the median (robust) as the vertex depth.
+      4. Otherwise, we use depth_fitted[y, x].
+    Parameters
+    ----------
+    vertices : List[dict]
+        Each dict must have "xy" at least, e.g. {"xy": (x, y), ...}
+    depth_fitted : np.ndarray
+        A 2D array (H, W), the dense (or corrected) depth for fallback.
+    sparse_depth : np.ndarray
+        A 2D array (H, W), mostly zeros except where accurate data is available.
+    search_radius : int
+        Pixel radius around the vertex in which to look for sparse depth values.
+    Returns
+    -------
+    uv : np.ndarray of shape (N, 2)
+        2D float coordinates of each vertex (x, y).
+    vertex_depth : np.ndarray of shape (N,)
+        Depth value chosen for each vertex.
+    """
+    # Collect each vertex's (x, y)
+    uv = np.array([v['xy'] for v in vertices], dtype=np.float32)
+    # Convert to integer pixel coordinates (round or floor)
+    uv_int = np.round(uv).astype(np.int32)
+    H, W = depth_fitted.shape[:2]
+    # Clip coordinates to stay within image bounds
+    uv_int[:, 0] = np.clip(uv_int[:, 0], 0, W-1)
+    uv_int[:, 1] = np.clip(uv_int[:, 1], 0, H-1)
+    # Prepare output array of depths
+    vertex_depth = np.zeros(len(vertices), dtype=np.float32)
+    for i, (x_i, y_i) in enumerate(uv_int):
+        # Local region in [x_i - search_radius, x_i + search_radius]
+        x0 = max(0, x_i - search_radius)
+        x1 = min(W, x_i + search_radius + 1)
+        y0 = max(0, y_i - search_radius)
+        y1 = min(H, y_i + search_radius + 1)
+        region = sparse_depth[y0:y1, x0:x1]
+        valid_vals = region[region > 0]
+        if len(valid_vals) > 0:
+            # Use median of valid sparse depth
+            vertex_depth[i] = np.median(valid_vals)
+        else:
+            # Fallback to depth_fitted at this pixel
+            vertex_depth[i] = depth_fitted[y_i, x_i]
+    return uv, vertex_depth
+def merge_vertices_3d(vert_edge_per_image, th=0.5):
+    '''Merge vertices that are close to each other in 3D space and are of same types'''
+    all_3d_vertices = []
+    connections_3d = []
+    all_indexes = []
+    cur_start = 0
+    types = []
+    for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
+        types += [int(v['type']=='apex') for v in vertices]
+        all_3d_vertices.append(vertices_3d)
+        connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
+        cur_start+=len(vertices_3d)
+    all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
+    #print (connections_3d)
+    distmat = cdist(all_3d_vertices, all_3d_vertices)
+    types = np.array(types).reshape(-1,1)
+    same_types = cdist(types, types)
+    mask_to_merge = (distmat <= th) & (same_types==0)
+    new_vertices = []
+    new_connections = []
+    to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
+    to_merge_final = defaultdict(list)
+    for i in range(len(all_3d_vertices)):
+        for j in to_merge:
+            if i in j:
+                to_merge_final[i]+=j
+    for k, v in to_merge_final.items():
+        to_merge_final[k] = list(set(v))
+    already_there = set()
+    merged = []
+    for k, v in to_merge_final.items():
+        if k in already_there:
+            continue
+        merged.append(v)
+        for vv in v:
+            already_there.add(vv)
+    old_idx_to_new = {}
+    count=0
+    for idxs in merged:
+        new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
+        for idx in idxs:
+            old_idx_to_new[idx] = count
+        count +=1
+    #print (connections_3d)
+    new_vertices=np.array(new_vertices)
+    #print (connections_3d)
+    for conn in connections_3d:
+        new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
+        if new_con[0] == new_con[1]:
+            continue
+        if new_con not in new_connections:
+            new_connections.append(new_con)
+    #print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
+    return new_vertices, new_connections
+def prune_not_connected(all_3d_vertices, connections_3d, keep_largest=True):
+    """
+    Prune vertices not connected to anything. If keep_largest=True, also
+    keep only the largest connected component in the graph.
+    """
+    if len(all_3d_vertices) == 0:
+        return np.array([]), []
+    # adjacency
+    adj = defaultdict(set)
+    for (i, j) in connections_3d:
+        adj[i].add(j)
+        adj[j].add(i)
+    # keep only vertices that appear in at least one edge
+    used_idxs = set()
+    for (i, j) in connections_3d:
+        used_idxs.add(i)
+        used_idxs.add(j)
+    if not used_idxs:
+        return np.empty((0,3)), []
+    # If we only want to remove truly isolated points, but keep multiple subgraphs:
+    if not keep_largest:
+        new_map = {}
+        used_list = sorted(list(used_idxs))
+        for new_id, old_id in enumerate(used_list):
+            new_map[old_id] = new_id
+        new_vertices = np.array([all_3d_vertices[old_id] for old_id in used_list])
+        new_conns = []
+        for (i, j) in connections_3d:
+            if i in used_idxs and j in used_idxs:
+                new_conns.append((new_map[i], new_map[j]))
+        return new_vertices, new_conns
+    # Otherwise find the largest connected component:
+    visited = set()
+    def bfs(start):
+        queue = [start]
+        comp = []
+        visited.add(start)
+        while queue:
+            cur = queue.pop()
+            comp.append(cur)
+            for neigh in adj[cur]:
+                if neigh not in visited:
+                    visited.add(neigh)
+                    queue.append(neigh)
+        return comp
+    # Collect all subgraphs
+    comps = []
+    for idx in used_idxs:
+        if idx not in visited:
+            c = bfs(idx)
+            comps.append(c)
+    # pick largest
+    comps.sort(key=lambda c: len(c), reverse=True)
+    largest = comps[0] if len(comps)>0 else []
+    # Remap
+    new_map = {}
+    for new_id, old_id in enumerate(largest):
+        new_map[old_id] = new_id
+    new_vertices = np.array([all_3d_vertices[old_id] for old_id in largest])
+    new_conns = []
+    for (i, j) in connections_3d:
+        if i in largest and j in largest:
+            new_conns.append((new_map[i], new_map[j]))
+    # remove duplicates
+    new_conns = list(set([tuple(sorted(c)) for c in new_conns]))
+    return new_vertices, new_conns
+def get_sparse_depth(colmap_rec, img_id, K, R, t, depth):
+    H, W = depth.shape
+    xyz = []
+    rgb = []
+    found = False
+    for img_id_c, col_img in colmap_rec.images.items():
+        if col_img.name == img_id:
+            found = True
+            break
+    if not found:
+        return np.zeros((H, W), dtype=np.float32), False
+    mat4x4 = np.eye(4)
+    mat4x4[:3 ] = col_img.cam_from_world.matrix()
+    for pid,p in colmap_rec.points3D.items():
+        if col_img.has_point3D(pid):
+            xyz.append(p.xyz)
+            rgb.append(p.color)
+    xyz = np.array(xyz)
+    rgb = np.array(rgb)
+    xyz_projected = cv2.transform(cv2.convertPointsToHomogeneous(xyz), mat4x4)
+    xyz_projected = cv2.convertPointsFromHomogeneous(xyz_projected).reshape(-1, 3)
+    uv, _ = cv2.projectPoints(xyz_projected, np.zeros(3), np.zeros(3), np.array(K), np.zeros(4))
+    uv = uv.squeeze()
+    u, v = uv[:, 0].astype(np.int32), uv[:, 1].astype(np.int32)
+    mask = (u >= 0) & (u < W) & (v >= 0) & (v < H)
+    u, v = u[mask], v[mask]
+    xyz_projected, rgb = xyz_projected[mask], rgb[mask]
+    depth = np.zeros((H, W), dtype=np.float32)
+    depth[v, u] = xyz_projected[:, 2]
+    return depth, True
+def fit_scale_robust_median(depth, sparse_depth):
+    """
+    Fits the model sparse_depth ~ k * depth + b by minimizing the median
+    of absolute residuals, i.e. median( |sparse_depth - (k*depth + b)| ).
+    Parameters
+    ----------
+    depth : np.ndarray
+        Array of depth estimates (same shape as sparse_depth).
+    sparse_depth : np.ndarray
+        Sparse array with precise depth at certain locations
+        (0 where data is unavailable).
+    Returns
+    -------
+    k : float
+        The slope of the robust best-fit affine transform.
+    b : float
+        The intercept of the robust best-fit affine transform.
+    depth_fitted : np.ndarray
+        The depth array adjusted by the affine fit: k*depth + b.
+    """
+    # 1. Create mask of valid (nonzero) locations in sparse_depth
+    mask = (sparse_depth != 0)
+    X = depth[mask]
+    Y = sparse_depth[mask]
+    # 2. Define the objective: median of absolute residuals
+    def median_abs_resid(params, xvals, yvals):
+        k, b = params
+        return np.median(np.abs(yvals - (k*xvals)))
+    # 3. Get an initial guess from a standard least-squares fit
+    #    (this helps the optimizer start in a reasonable region)
+    k_init, b_init = np.polyfit(X, Y, deg=1)
+    # 4. Optimize using a derivative-free method (Nelder-Mead)
+    res = minimize(
+        fun=median_abs_resid,
+        x0=[k_init, b_init],
+        args=(X, Y),
+        method='Nelder-Mead'
+    )
+    k_robust, b_robust = res.x
+    # 5. Construct the fitted depth array
+    depth_fitted = k_robust * depth #+ b_robust
+    return k_robust, depth_fitted
+def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
+    good_entry = convert_entry_to_human_readable(entry)
+    vert_edge_per_image = {}
+    for i, (gest, depth, K, R, t, img_id) in enumerate(zip(good_entry['gestalt'],
+                                                good_entry['depth'],
+                                                good_entry['K'],
+                                                good_entry['R'],
+                                                good_entry['t'],
+                                                good_entry['image_ids']
+                                                )):
+        colmap_rec = good_entry['colmap_binary']
+        K = np.array(K)
+        R = np.array(R)
+        t = np.array(t)
+        gest_seg = gest.resize(depth.size)
+        gest_seg_np = np.array(gest_seg).astype(np.uint8)
+        # Metric3D
+        depth_np = np.array(depth) / 1000.
+        depth_sparse, found = get_sparse_depth(colmap_rec, img_id, K, R, t, depth_np)
+        if not found:
+            print (f'No sparse depth found for image {i}')
+            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
+            continue
+        k, depth_fitted = fit_scale_robust_median(depth_np, depth_sparse)#fit_affine_robust_median(depth_np, depth_sparse)
+        print (k)
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.)
+        if (len(vertices) < 2) or (len(connections) < 1):
+            print (f'Not enough vertices or connections in image {i}')
+            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
+            continue
+        uv, depth_vert = get_uv_depth(vertices, depth_fitted, depth_sparse, 50)
+        # Normalize the uv to the camera intrinsics
+        X = (uv[:, 0] - K[0, 2]) / K[0, 0] * depth_vert
+        Y = (uv[:, 1] - K[1, 2]) / K[1, 1] * depth_vert
+        Z = depth_vert
+        vertices_3d_local = np.column_stack([X, Y, Z])
+        world_to_cam = np.eye(4)
+        world_to_cam[:3, :3] = R
+        world_to_cam[:3, 3] = t.reshape(-1)
+        cam_to_world =  np.linalg.inv(world_to_cam)
+        vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
+        vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
+        vert_edge_per_image[i] = vertices, connections, vertices_3d
+    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 0.25)
+    all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d, keep_largest=False)
+    if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
+        print (f'Not enough vertices or connections in the 3D vertices')
+        return empty_solution()
+    if visualize:
+        from hoho.viz3d import plot_estimate_and_gt
+        plot_estimate_and_gt(   all_3d_vertices_clean,
+                                connections_3d_clean,
+                                good_entry['wf_vertices'],
+                                good_entry['wf_edges'])
+    return all_3d_vertices_clean, connections_3d_clean

params.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "competition_id": "usm3d/S23DR2025",
+    "competition_type": "script",
+    "metric": "custom",
+    "token": "hf_******",
+    "team_id": "xxxxxxxxx_your_team_name_xxxxxxxxxx",
+    "submission_id": "xxxxxxxxx_your_sub_id_xxxxxxxxxx",
+    "submission_id_col": "order_id",
+    "submission_cols": [
+        "order_id",
+        "wf_vertices",
+        "wf_edges",
+        "wf_classifications"
+    ],
+    "submission_rows": 267,
+    "output_path": "/tmp/model",
+    "submission_repo": "<your submission repo>",
+    "time_limit": 7200,
+    "dataset": "usm3d/hoho25k_test_x",
+    "submission_filenames": [
+        "submission.parquet"
+    ]
+}

script.py ADDED Viewed

	@@ -0,0 +1,132 @@

+### This is example of the script that will be run in the test environment.
+### You can change the rest of the code to define and test your solution.
+### However, you should not change the signature of the provided function.
+### The script saves "submission.parquet" file in the current directory.
+### You can use any additional files and subdirectories to organize your code.
+from pathlib import Path
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from datasets import load_dataset
+from typing import Dict
+def empty_solution(sample):
+    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
+    return np.zeros((2,3)), [(0, 1)]
+from handcrafted_solution import predict
+class Sample(Dict):
+    def pick_repr_data(self, x):
+        if hasattr(x, 'shape'):
+            return x.shape
+        if isinstance(x, (str, float, int)):
+            return x
+        if isinstance(x, list):
+            return [type(x[0])] if len(x) > 0 else []
+        return type(x)
+    def __repr__(self):
+        # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
+        return str({k: self.pick_repr_data(v) for k,v in self.items()})
+import json
+if __name__ == "__main__":
+    print ("------------ Loading dataset------------ ")
+    param_path = Path('params.json')
+    print(param_path)
+    with param_path.open() as f:
+        params = json.load(f)
+    print(params)
+    import os
+    print('pwd:')
+    os.system('pwd')
+    print(os.system('ls -lahtr'))
+    print('/tmp/data/')
+    print(os.system('ls -lahtr /tmp/data/'))
+    print('/tmp/data/data')
+    print(os.system('ls -lahtrR /tmp/data/data'))
+    data_path_test_server = Path('/tmp/data')
+    data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
+    if data_path_test_server.exists():
+        # data_path = data_path_test_server
+        TEST_ENV = True
+    else:
+        # data_path = data_path_local
+        TEST_ENV = False
+        from huggingface_hub import snapshot_download
+        _ = snapshot_download(
+            repo_id=params['dataset'],
+            local_dir="/tmp/data",
+            repo_type="dataset",
+        )
+    data_path = data_path_test_server
+    print(data_path)
+    # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
+    # data_files = {
+    #     "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]],
+    #     "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]],
+    # }
+    data_files = {
+        "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
+        "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
+    }
+    print(data_files)
+    dataset = load_dataset(
+        str(data_path / 'hoho25k_test_x.py'),
+        data_files=data_files,
+        trust_remote_code=True,
+        writer_batch_size=100
+    )
+    # if TEST_ENV:
+    # dataset = load_dataset(
+    #     "webdataset",
+    #     data_files=data_files,
+    #     trust_remote_code=True,
+    #     # streaming=True
+    # )
+    print('load with webdataset')
+    # else:
+    #     dataset = load_dataset(
+    #         "arrow",
+    #         data_files=data_files,
+    #         trust_remote_code=True,
+    #         # streaming=True
+    #     )
+    #     print('load with arrow')
+    print(dataset, flush=True)
+    # dataset = load_dataset('webdataset', data_files={)
+    print('------------ Now you can do your solution ---------------')
+    solution = []
+    for subset_name in dataset:
+        for i, sample in enumerate(tqdm(dataset[subset_name])):
+            # replace this with your solution
+            print(Sample(sample), flush=True)
+            print('------')
+            try:
+                pred_vertices, pred_edges = predict(sample, visualize=False)
+            except:
+                pred_vertices, pred_edges = empty_solution(sample)
+            solution.append({
+                            'order_id': sample['order_id'],
+                            'wf_vertices': pred_vertices.tolist(),
+                            'wf_edges': pred_edges
+                        })
+    print('------------ Saving results ---------------')
+    sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
+    sub.to_parquet("submission.parquet")
+    print("------------ Done ------------ ")