dmytromishkin
/

my_cool_handcrafted_submission_2025

Model card Files Files and versions Community

dmytromishkin commited on Apr 9

Commit

5081776

verified ·

1 Parent(s): 9e478c8

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

LICENSE.md +13 -0
README.md +18 -2
script.py +39 -20

LICENSE.md ADDED Viewed

	@@ -0,0 +1,13 @@

+Copyright 2025 Dmytro Mishkin, Jack Langerman
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

README.md CHANGED Viewed

@@ -1,4 +1,20 @@
-# My Cool Handcrafted Submission 2025
-This repo contains a submission to the [S23DR Challenge](https://huggingface.co/spaces/usm3d/S23DR) (part of the [USM3D](https://usm3d.github.io/) workshop at CVPR2025). It was prepared by [dmytromishkin](https://huggingface.co/dmytromishkin).

+---
+license: apache-2.0
+---
+# Handcrafted solution example for the S23DR competition
+This repo provides a minimalistic example of a wireframe estimation submission to S23DR competition.
+We recommend you take a look at [this example](https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py), for detailed code of this submission. It also provides useful I/O and visualization functions.
+This example seeks to simply provide minimal code which succeeds at reading the dataset and producing a solution (in this case two vertices at the origin and edge of zero length connecting them).
+`script.py` - is the main file which is run by the competition space. It should produce `submission.parquet` as the result of the run. Please see the additional comments in the `script.py` file.
+# How to submit
+Use the notebook [example_notebook.ipynb](example_notebook.ipynb)

script.py CHANGED Viewed

@@ -11,12 +11,15 @@ import pandas as pd
 import numpy as np
 from datasets import load_dataset
 from typing import Dict
-from tqdm import tqdm
 def empty_solution(sample):
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
     return np.zeros((2,3)), [(0, 1)]
-from handcrafted_solution import predict
 class Sample(Dict):
     def pick_repr_data(self, x):
@@ -49,6 +52,7 @@ if __name__ == "__main__":
     print(os.system('ls -lahtr /tmp/data/'))
     print('/tmp/data/data')
     print(os.system('ls -lahtrR /tmp/data/data'))
     data_path_test_server = Path('/tmp/data')
     data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
@@ -66,7 +70,15 @@ if __name__ == "__main__":
             repo_type="dataset",
         )
     data_path = data_path_test_server
     print(data_path)
     data_files = {
         "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
         "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
@@ -78,33 +90,40 @@ if __name__ == "__main__":
         trust_remote_code=True,
         writer_batch_size=100
     )
     print('load with webdataset')
     print(dataset, flush=True)
     print('------------ Now you can do your solution ---------------')
     solution = []
-    num_fails = 0
     for subset_name in dataset:
-        for i, sample in enumerate(tqdm(dataset[subset_name])):
-            # replace this with your solution
             print(Sample(sample), flush=True)
             print('------')
-            try:
-                pred_vertices, pred_edges = predict(sample, visualize=False)
-            except Exception as e:
-                print (f"Faile due to {e}")
-                pred_vertices, pred_edges = empty_solution(sample)
-                num_fails+=1
-            #print (f'{pred_vertices=}, {pred_edges=}')
-            solution.append({
-                            'order_id': sample['order_id'],
-                            'wf_vertices': pred_vertices.tolist(),
-                            'wf_edges': pred_edges
-                        })
     print('------------ Saving results ---------------')
-    print (f"Processed {len(solution)} entries, get {num_fails} fails")
-    from time import sleep
-    sleep(30)
     sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
     sub.to_parquet("submission.parquet")
     print("------------ Done ------------ ")

 import numpy as np
 from datasets import load_dataset
 from typing import Dict
+from joblib import Parallel, delayed
+import os
+from hoho2025.example_solutions import predict_wireframe
+# check the https://github.com/s23dr/hoho2025/blob/main/hoho2025/example_solutions.py for the example solution
 def empty_solution(sample):
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
     return np.zeros((2,3)), [(0, 1)]
 class Sample(Dict):
     def pick_repr_data(self, x):
     print(os.system('ls -lahtr /tmp/data/'))
     print('/tmp/data/data')
     print(os.system('ls -lahtrR /tmp/data/data'))
     data_path_test_server = Path('/tmp/data')
     data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
             repo_type="dataset",
         )
     data_path = data_path_test_server
     print(data_path)
+    # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
+    # data_files = {
+    #     "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]],
+    #     "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]],
+    # }
     data_files = {
         "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
         "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
         trust_remote_code=True,
         writer_batch_size=100
     )
     print('load with webdataset')
     print(dataset, flush=True)
     print('------------ Now you can do your solution ---------------')
     solution = []
+    def process_sample(sample):
+        try:
+            pred_vertices, pred_edges = predict_wireframe(sample)
+        except:
+            pred_vertices, pred_edges = empty_solution(sample)
+        return {
+            'order_id': sample['order_id'],
+            'wf_vertices': pred_vertices.tolist(),
+            'wf_edges': pred_edges
+        }
+    num_cores = len(os.sched_getaffinity(0))
     for subset_name in dataset:
+        samples = list(dataset[subset_name])
+        # Print sample info for just a few samples to avoid clutter
+        for i, sample in enumerate(samples[:2]):
             print(Sample(sample), flush=True)
             print('------')
+        # Process samples in parallel with simple tqdm progress tracking
+        results = Parallel(n_jobs=num_cores)(
+            delayed(process_sample)(sample) for sample in tqdm(samples, desc=f"Processing {subset_name}")
+        )
+        solution.extend(results)
     print('------------ Saving results ---------------')
     sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
     sub.to_parquet("submission.parquet")
     print("------------ Done ------------ ")