Spaces:

wayne-chi
/

Eagle

Paused

App Files Files Community

wayne-chi commited on about 1 month ago

Commit

1525f6b

verified ·

1 Parent(s): c43791e

Upload inference.py

Browse files

Files changed (1) hide show

inference.py +35 -24

inference.py CHANGED Viewed

@@ -1,34 +1,31 @@
 import pandas as pd
 import numpy as np
-# import torch
 import joblib
 import argparse
 import os
 import glob
 from sklearn.multioutput import MultiOutputRegressor
 from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
-# print(os.environ.get("OMP_NUM_THREADS"))
-# os.environ["OMP_NUM_THREADS"] = os.cpu_count()
-import torch
-# Give torch.classes a benign __path__ so Streamlit won't trigger __getattr__.
-try:
-    setattr(torch.classes, "__path__", [])
-except Exception:
-    # Fallback wrapper if direct setattr isn't allowed in your build
-    class _TorchClassesWrapper:
-        def __init__(self, obj):
-            self._obj = obj
-            self.__path__ = []
-        def __getattr__(self, name):
-            return getattr(self._obj, name)
-    torch.classes = _TorchClassesWrapper(torch.classes)
 class TabPFNEnsemblePredictor:
 	"""
@@ -44,7 +41,7 @@ class TabPFNEnsemblePredictor:
 		target_cols (list): The names of the target columns for the output DataFrame.
 	"""
-	def __init__(self, model_dir: str, model_pattern: str = "Fold_*_best_model.tabpfn_fit"):
 		"""
 		Initializes the predictor by finding and loading the ensemble of models.
@@ -80,15 +77,29 @@ class TabPFNEnsemblePredictor:
 				# Move model components to CPU for inference to avoid potential CUDA errors
 				# and ensure compatibility on machines without a GPU.
 				if not torch.cuda.is_available():
 					print("Cuda not available using cpu")
 					model = joblib.load(model_path)
 					for estimator in model.estimators_:
 						if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
 							for p in estimator.predictor_.predictors:
-								p.to("cpu")
-				else:
-					print("Cuda is available")
-					model = joblib.load(model_path)
 				loaded_models.append(model)
 				print(f"Successfully loaded {os.path.basename(model_path)}")
@@ -122,7 +133,7 @@ class TabPFNEnsemblePredictor:
 		return df_featured
-	def predict(self, input_data: pd.DataFrame or np.ndarray or str) -> (np.ndarray, pd.DataFrame):
 		"""
 		Generates ensembled predictions for the given input data.

 import pandas as pd
 import numpy as np
+import torch
 import joblib
 import argparse
 import os
 import glob
 from sklearn.multioutput import MultiOutputRegressor
 from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
+from tabpfn import TabPFNRegressor
+os.environ["TABPFN_ALLOW_CPU_LARGE_DATASET"] = "true"
+def joblib_load_cpu(path):
+	# Patch torch.load globally inside joblib to always load on CPU
+	original_load = torch.load
+	def cpu_loader(*args, **kwargs):
+		kwargs['map_location'] = torch.device('cpu')
+		return original_load(*args, **kwargs)
+	torch.load = cpu_loader
+	try:
+		model = joblib.load(path)
+	finally:
+		torch.load = original_load  # Restore original torch.load
+	return model
 class TabPFNEnsemblePredictor:
 	"""
 		target_cols (list): The names of the target columns for the output DataFrame.
 	"""
+	def __init__(self, model_dir: str, model_pattern: str = "Fold_*_best_model.tabpfn_fit*"):
 		"""
 		Initializes the predictor by finding and loading the ensemble of models.
 				# Move model components to CPU for inference to avoid potential CUDA errors
 				# and ensure compatibility on machines without a GPU.
 				if not torch.cuda.is_available():
+					#torch.device("cpu")  # Force default
+					#os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"] = "1"
+					#os.environ["CUDA_VISIBLE_DEVICES"] = ""
+					#os.environ["HSA_OVERRIDE_GFX_VERSION"] = "0"
+					model = joblib_load_cpu(model_path)
+					for estimator in model.estimators_:
+						estimator.device = "cpu"
+						estimator.max_time = 40
 					print("Cuda not available using cpu")
+					#for estimator in model.estimators_:
+					#	if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
+					#		for p in estimator.predictor_.predictors:
+					#			p.to("cpu")
+					#	if hasattr(estimator.predictor_, 'to'):
+					#		estimator.predictor_.to('cpu')
+				else:
+					print("Cuda is available")
 					model = joblib.load(model_path)
 					for estimator in model.estimators_:
 						if hasattr(estimator, "predictor_") and hasattr(estimator.predictor_, "predictors"):
 							for p in estimator.predictor_.predictors:
+								p.to("cuda")
 				loaded_models.append(model)
 				print(f"Successfully loaded {os.path.basename(model_path)}")
 		return df_featured
+	def custom_predict(self, input_data: pd.DataFrame or np.ndarray or str) -> (np.ndarray, pd.DataFrame):
 		"""
 		Generates ensembled predictions for the given input data.