wayne-chi commited on
Commit
1525f6b
·
verified ·
1 Parent(s): c43791e

Upload inference.py

Browse files
Files changed (1) hide show
  1. inference.py +35 -24
inference.py CHANGED
@@ -1,34 +1,31 @@
1
  import pandas as pd
2
  import numpy as np
3
- # import torch
4
  import joblib
5
  import argparse
6
  import os
7
  import glob
8
  from sklearn.multioutput import MultiOutputRegressor
9
  from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
 
10
 
11
 
12
- # print(os.environ.get("OMP_NUM_THREADS"))
13
- # os.environ["OMP_NUM_THREADS"] = os.cpu_count()
14
-
15
-
16
 
17
- import torch
 
 
18
 
19
- # Give torch.classes a benign __path__ so Streamlit won't trigger __getattr__.
20
- try:
21
- setattr(torch.classes, "__path__", [])
22
- except Exception:
23
- # Fallback wrapper if direct setattr isn't allowed in your build
24
- class _TorchClassesWrapper:
25
- def __init__(self, obj):
26
- self._obj = obj
27
- self.__path__ = []
28
- def __getattr__(self, name):
29
- return getattr(self._obj, name)
30
- torch.classes = _TorchClassesWrapper(torch.classes)
31
 
 
 
 
 
 
 
32
 
33
  class TabPFNEnsemblePredictor:
34
  """
@@ -44,7 +41,7 @@ class TabPFNEnsemblePredictor:
44
  target_cols (list): The names of the target columns for the output DataFrame.
45
  """
46
 
47
- def __init__(self, model_dir: str, model_pattern: str = "Fold_*_best_model.tabpfn_fit"):
48
  """
49
  Initializes the predictor by finding and loading the ensemble of models.
50
 
@@ -80,15 +77,29 @@ class TabPFNEnsemblePredictor:
80
  # Move model components to CPU for inference to avoid potential CUDA errors
81
  # and ensure compatibility on machines without a GPU.
82
  if not torch.cuda.is_available():
 
 
 
 
 
 
 
 
83
  print("Cuda not available using cpu")
 
 
 
 
 
 
 
 
 
84
  model = joblib.load(model_path)
85
  for estimator in model.estimators_:
86
  if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
87
  for p in estimator.predictor_.predictors:
88
- p.to("cpu")
89
- else:
90
- print("Cuda is available")
91
- model = joblib.load(model_path)
92
 
93
  loaded_models.append(model)
94
  print(f"Successfully loaded {os.path.basename(model_path)}")
@@ -122,7 +133,7 @@ class TabPFNEnsemblePredictor:
122
 
123
  return df_featured
124
 
125
- def predict(self, input_data: pd.DataFrame or np.ndarray or str) -> (np.ndarray, pd.DataFrame):
126
  """
127
  Generates ensembled predictions for the given input data.
128
 
 
1
  import pandas as pd
2
  import numpy as np
3
+ import torch
4
  import joblib
5
  import argparse
6
  import os
7
  import glob
8
  from sklearn.multioutput import MultiOutputRegressor
9
  from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
10
+ from tabpfn import TabPFNRegressor
11
 
12
 
13
+ os.environ["TABPFN_ALLOW_CPU_LARGE_DATASET"] = "true"
 
 
 
14
 
15
+ def joblib_load_cpu(path):
16
+ # Patch torch.load globally inside joblib to always load on CPU
17
+ original_load = torch.load
18
 
19
+ def cpu_loader(*args, **kwargs):
20
+ kwargs['map_location'] = torch.device('cpu')
21
+ return original_load(*args, **kwargs)
 
 
 
 
 
 
 
 
 
22
 
23
+ torch.load = cpu_loader
24
+ try:
25
+ model = joblib.load(path)
26
+ finally:
27
+ torch.load = original_load # Restore original torch.load
28
+ return model
29
 
30
  class TabPFNEnsemblePredictor:
31
  """
 
41
  target_cols (list): The names of the target columns for the output DataFrame.
42
  """
43
 
44
+ def __init__(self, model_dir: str, model_pattern: str = "Fold_*_best_model.tabpfn_fit*"):
45
  """
46
  Initializes the predictor by finding and loading the ensemble of models.
47
 
 
77
  # Move model components to CPU for inference to avoid potential CUDA errors
78
  # and ensure compatibility on machines without a GPU.
79
  if not torch.cuda.is_available():
80
+ #torch.device("cpu") # Force default
81
+ #os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
82
+ #os.environ["CUDA_VISIBLE_DEVICES"] = ""
83
+ #os.environ["HSA_OVERRIDE_GFX_VERSION"] = "0"
84
+ model = joblib_load_cpu(model_path)
85
+ for estimator in model.estimators_:
86
+ estimator.device = "cpu"
87
+ estimator.max_time = 40
88
  print("Cuda not available using cpu")
89
+ #for estimator in model.estimators_:
90
+ # if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
91
+ # for p in estimator.predictor_.predictors:
92
+ # p.to("cpu")
93
+ # if hasattr(estimator.predictor_, 'to'):
94
+ # estimator.predictor_.to('cpu')
95
+
96
+ else:
97
+ print("Cuda is available")
98
  model = joblib.load(model_path)
99
  for estimator in model.estimators_:
100
  if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
101
  for p in estimator.predictor_.predictors:
102
+ p.to("cuda")
 
 
 
103
 
104
  loaded_models.append(model)
105
  print(f"Successfully loaded {os.path.basename(model_path)}")
 
133
 
134
  return df_featured
135
 
136
+ def custom_predict(self, input_data: pd.DataFrame or np.ndarray or str) -> (np.ndarray, pd.DataFrame):
137
  """
138
  Generates ensembled predictions for the given input data.
139