dmytromishkin
/

my_cool_handcrafted_submission_2025

Model card Files Files and versions Community

dmytromishkin commited on Apr 8

Commit

98706c7

verified ·

1 Parent(s): c675034

Update script.py

Browse files

Files changed (1) hide show

script.py +5 -32

script.py CHANGED Viewed

@@ -11,6 +11,7 @@ import pandas as pd
 import numpy as np
 from datasets import load_dataset
 from typing import Dict
 def empty_solution(sample):
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
@@ -48,7 +49,6 @@ if __name__ == "__main__":
     print(os.system('ls -lahtr /tmp/data/'))
     print('/tmp/data/data')
     print(os.system('ls -lahtrR /tmp/data/data'))
     data_path_test_server = Path('/tmp/data')
     data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
@@ -66,15 +66,7 @@ if __name__ == "__main__":
             repo_type="dataset",
         )
     data_path = data_path_test_server
     print(data_path)
-    # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
-    # data_files = {
-    #     "validation": [str(p) for p in [*data_path.rglob('*validation*.arrow')]+[*data_path.rglob('*public*/**/*.tar')]],
-    #     "test": [str(p) for p in [*data_path.rglob('*test*.arrow')]+[*data_path.rglob('*private*/**/*.tar')]],
-    # }
     data_files = {
         "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
         "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
@@ -86,31 +78,12 @@ if __name__ == "__main__":
         trust_remote_code=True,
         writer_batch_size=100
     )
-    # if TEST_ENV:
-    # dataset = load_dataset(
-    #     "webdataset",
-    #     data_files=data_files,
-    #     trust_remote_code=True,
-    #     # streaming=True
-    # )
     print('load with webdataset')
-    # else:
-    #     dataset = load_dataset(
-    #         "arrow",
-    #         data_files=data_files,
-    #         trust_remote_code=True,
-    #         # streaming=True
-    #     )
-    #     print('load with arrow')
     print(dataset, flush=True)
-    # dataset = load_dataset('webdataset', data_files={)
     print('------------ Now you can do your solution ---------------')
     solution = []
     for subset_name in dataset:
         for i, sample in enumerate(tqdm(dataset[subset_name])):
             # replace this with your solution
@@ -121,15 +94,15 @@ if __name__ == "__main__":
             except Exception as e:
                 print (f"Faile due to {e}")
                 pred_vertices, pred_edges = empty_solution(sample)
-            print (f'{pred_vertices=}, {pred_edges=}')
             solution.append({
                             'order_id': sample['order_id'],
                             'wf_vertices': pred_vertices.tolist(),
                             'wf_edges': pred_edges
                         })
     print('------------ Saving results ---------------')
-    print (len(solution))
     from time import sleep
     sleep(30)
     sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])

 import numpy as np
 from datasets import load_dataset
 from typing import Dict
+from tqdm import tqdm
 def empty_solution(sample):
     '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
     print(os.system('ls -lahtr /tmp/data/'))
     print('/tmp/data/data')
     print(os.system('ls -lahtrR /tmp/data/data'))
     data_path_test_server = Path('/tmp/data')
     data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
             repo_type="dataset",
         )
     data_path = data_path_test_server
     print(data_path)
     data_files = {
         "validation": [str(p) for p in data_path.rglob('*public*/**/*.tar')],
         "test": [str(p) for p in data_path.rglob('*private*/**/*.tar')],
         trust_remote_code=True,
         writer_batch_size=100
     )
     print('load with webdataset')
     print(dataset, flush=True)
     print('------------ Now you can do your solution ---------------')
     solution = []
+    num_fails = 0
     for subset_name in dataset:
         for i, sample in enumerate(tqdm(dataset[subset_name])):
             # replace this with your solution
             except Exception as e:
                 print (f"Faile due to {e}")
                 pred_vertices, pred_edges = empty_solution(sample)
+                num_fails+=1
+            #print (f'{pred_vertices=}, {pred_edges=}')
             solution.append({
                             'order_id': sample['order_id'],
                             'wf_vertices': pred_vertices.tolist(),
                             'wf_edges': pred_edges
                         })
     print('------------ Saving results ---------------')
+    print (f"Processed {len(solution)} entries, get {num_fails} fails")
     from time import sleep
     sleep(30)
     sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])