0408happyfeet commited on
Commit
798cf6f
·
verified ·
1 Parent(s): 546791b

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +77 -98
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os, shutil, zipfile, pickle
2
- from typing import List
3
  import pandas as pd
4
  import gradio as gr
5
 
@@ -31,100 +31,90 @@ def load_cheese_dataset(dataset_id: str) -> pd.DataFrame:
31
  return _safe_concat_splits(ds)
32
 
33
  def _find_dir_with_any_predictor_marker(start_dir: str) -> str:
34
- '''Find a directory containing either 'learner.pkl' (preferred) or 'predictor.pkl'.'''
35
  for root, dirs, files in os.walk(start_dir):
36
  if "learner.pkl" in files or "predictor.pkl" in files:
37
  return root
38
  return ""
39
 
40
- def _force_reset_predictor_paths(predictor: TabularPredictor, root_dir: str):
41
- '''Force-reset internal contexts/paths to avoid absolute Windows paths during inference.'''
 
42
  try:
43
- # predictor.path may be a read-only property on some versions; ignore if so
44
- try:
45
- if hasattr(predictor, "path"):
46
- predictor.path = root_dir
47
- except Exception:
48
- pass
49
- if hasattr(predictor, "_learner") and predictor._learner is not None:
50
- try:
51
- predictor._learner.set_contexts(path_context=root_dir)
52
- except Exception:
53
- pass
54
- tr = getattr(predictor, "_trainer", None)
55
- if tr is not None:
56
- try:
57
- if hasattr(tr, "path"):
58
- tr.path = root_dir
59
- except Exception:
60
- pass
61
- for attr in ("model_paths", "_model_paths", "path_dict", "model_path_dict"):
62
- if hasattr(tr, attr):
63
- d = getattr(tr, attr)
64
- if isinstance(d, dict):
65
- rebuilt = {name: os.path.join(root_dir, "models", name) for name in d.keys()}
66
- try:
67
- setattr(tr, attr, rebuilt)
68
- except Exception:
69
- pass
70
- except Exception as e:
71
- print("[paths] Warning: could not fully reset predictor paths:", e)
72
-
73
- def load_predictor_from_hub(repo_id: str) -> TabularPredictor:
74
- # 1) Try known zip names → extract → locate predictor dir → load with relaxed checks
75
- extract_dir = os.path.join(os.getcwd(), "ag_predictor_unpack")
76
- if os.path.exists(extract_dir):
77
- shutil.rmtree(extract_dir)
78
- os.makedirs(extract_dir, exist_ok=True)
79
 
80
  zip_candidates = ["cheese_texture_predictor_dir.zip", "predictor_dir.zip", "agModels-predictor.zip"]
 
81
  for fname in zip_candidates:
82
  try:
83
  zpath = hf_hub_download(repo_id=repo_id, filename=fname)
84
  with zipfile.ZipFile(zpath, "r") as zf:
85
- zf.extractall(extract_dir)
86
- candidate_dir = _find_dir_with_any_predictor_marker(extract_dir)
87
- if candidate_dir:
88
- pred = TabularPredictor.load(
89
- candidate_dir,
90
- require_version_match=False,
91
- require_py_version_match=False,
92
- check_packages=False,
93
- )
94
- _force_reset_predictor_paths(pred, candidate_dir)
95
- return pred
96
  except Exception as e:
97
  print(f"[loader] Zip candidate '{fname}' not usable: {e}")
98
 
99
- # 2) LAST RESORT: load a serialized predictor .pkl and rebase paths (may not work if artifacts missing)
100
- pkl_candidates = ["cheese_texture_predictor.pkl", "predictor.pkl"]
101
- for fname in pkl_candidates:
102
- try:
103
- pkl_path = hf_hub_download(repo_id=repo_id, filename=fname)
104
- with open(pkl_path, "rb") as f:
105
- obj = pickle.load(f)
106
- if isinstance(obj, TabularPredictor) or hasattr(obj, "predict"):
107
- local_dir = os.path.join(os.getcwd(), "ag_predictor_from_pkl")
108
- os.makedirs(local_dir, exist_ok=True)
109
- _force_reset_predictor_paths(obj, local_dir)
110
- return obj
111
- except Exception as e:
112
- print(f"[loader] PKL candidate '{fname}' not usable: {e}")
113
-
114
- # 3) Snapshot scan (last resort, same relaxed load flags)
115
- repo_path = snapshot_download(repo_id=repo_id)
116
- candidate_dir = _find_dir_with_any_predictor_marker(repo_path)
117
- if candidate_dir:
118
- pred = TabularPredictor.load(
119
- candidate_dir,
120
- require_version_match=False,
121
- require_py_version_match=False,
122
- check_packages=False,
123
- )
124
- _force_reset_predictor_paths(pred, candidate_dir)
125
- return pred
126
-
127
- raise FileNotFoundError("Could not load an AutoGluon predictor: no predictor directory or usable *.pkl found.")
 
 
 
 
128
 
129
  # ===== Load dataset & predictor on startup =====
130
  df_all = load_cheese_dataset(DATASET_ID)
@@ -138,10 +128,10 @@ PRICE_MIN, PRICE_MAX = float(df_all["price"].min()), float(df_all["price"].max()
138
  PROTEIN_MIN, PROTEIN_MAX = float(df_all["protein"].min()), float(df_all["protein"].max())
139
  ORIGINS: List[str] = sorted([o for o in df_all["origin"].dropna().unique().tolist() if isinstance(o, str)])
140
 
141
- PREDICTOR = load_predictor_from_hub(MODEL_REPO_ID)
142
  CLASSES = list(getattr(PREDICTOR, "class_labels", [])) or sorted(df_all["texture"].dropna().unique().tolist())
143
 
144
- # Build base-model name list robustly (avoid heavy ops if not necessary)
145
  MODEL_NAMES = ["best"]
146
  try:
147
  if hasattr(PREDICTOR, "get_model_names"):
@@ -207,40 +197,29 @@ def _coerce_and_validate(fat, origin, holed, price, protein, top_k):
207
  return fat_s, origin_s, bool(holed), price_s, protein_s, k
208
 
209
  def _predict_with_fallback(X, base_model):
210
- '''Predict with chosen model; if a dependency (e.g., torch) is missing, fall back to a non-NN model.'''
211
- # Try requested (or best/ensemble) first
212
  try_order = []
213
  if base_model in (None, "", "best"):
214
- try_order.append(None) # means default/best
215
  else:
216
  try_order.append(base_model)
217
-
218
- # Non-NN fallbacks (skip NN_* and WeightedEnsemble_*)
219
  non_nn = [m for m in MODEL_NAMES
220
  if m not in (None, "", "best")
221
  and not (m.lower().startswith("nn") or "neuralnet" in m.lower() or "weightedensemble" in m.lower())]
222
  try_order.extend([m for m in non_nn if m not in try_order])
223
-
224
  errors = []
225
  for m in try_order:
226
  try:
227
  if m is None:
228
  label_pred = PREDICTOR.predict(X).iloc[0]
229
- proba_df = PREDICTOR.predict_proba(X)
230
  else:
231
  label_pred = PREDICTOR.predict(X, model=m).iloc[0]
232
- proba_df = PREDICTOR.predict_proba(X, model=m)
233
  if m is not None:
234
  gr.Info(f"Using base model: {m}")
235
  return label_pred, proba_df
236
- except ModuleNotFoundError as e:
237
- msg = str(e)
238
- errors.append(msg)
239
- missing = getattr(e, "name", "")
240
- if missing:
241
- gr.Warning(f"Missing dependency '{missing}' for model '{m or 'best'}'; trying a fallback...")
242
- else:
243
- gr.Warning(f"Dependency missing for model '{m or 'best'}'; trying a fallback...")
244
  except Exception as e:
245
  errors.append(str(e))
246
  gr.Warning(f"Model '{m or 'best'}' failed; trying a fallback...")
 
1
  import os, shutil, zipfile, pickle
2
+ from typing import List, Tuple
3
  import pandas as pd
4
  import gradio as gr
5
 
 
31
  return _safe_concat_splits(ds)
32
 
33
  def _find_dir_with_any_predictor_marker(start_dir: str) -> str:
34
+ '''Return the first directory containing either 'learner.pkl' (preferred) or 'predictor.pkl'.'''
35
  for root, dirs, files in os.walk(start_dir):
36
  if "learner.pkl" in files or "predictor.pkl" in files:
37
  return root
38
  return ""
39
 
40
+ def _symlink_or_copytree(src: str, dst: str):
41
+ if os.path.exists(dst):
42
+ return
43
  try:
44
+ os.symlink(src, dst)
45
+ except Exception:
46
+ shutil.copytree(src, dst)
47
+
48
+ def _materialize_flat_model_layout(predictor_dir: str, extract_root: str):
49
+ '''Ensure model subdirs exist in both forms:
50
+ - predictor_dir/models/<name>/...
51
+ - extract_root/<name>/... (flat layout some predictors still reference)
52
+ - predictor_dir/<name>/... (defensive)
53
+ '''
54
+ models_dir = os.path.join(predictor_dir, "models")
55
+ if not os.path.isdir(models_dir):
56
+ return
57
+ for name in os.listdir(models_dir):
58
+ src = os.path.join(models_dir, name)
59
+ if not os.path.isdir(src):
60
+ continue
61
+ for base in (extract_root, predictor_dir):
62
+ dst = os.path.join(base, name)
63
+ _symlink_or_copytree(src, dst)
64
+
65
+ def load_predictor_from_hub(repo_id: str) -> Tuple[TabularPredictor, str, str]:
66
+ # Extract zip into a known root, then load from the inner predictor dir.
67
+ extract_root = os.path.join(os.getcwd(), "ag_predictor_unpack")
68
+ if os.path.exists(extract_root):
69
+ shutil.rmtree(extract_root)
70
+ os.makedirs(extract_root, exist_ok=True)
 
 
 
 
 
 
 
 
 
71
 
72
  zip_candidates = ["cheese_texture_predictor_dir.zip", "predictor_dir.zip", "agModels-predictor.zip"]
73
+ predictor_dir = ""
74
  for fname in zip_candidates:
75
  try:
76
  zpath = hf_hub_download(repo_id=repo_id, filename=fname)
77
  with zipfile.ZipFile(zpath, "r") as zf:
78
+ zf.extractall(extract_root)
79
+ predictor_dir = _find_dir_with_any_predictor_marker(extract_root)
80
+ if predictor_dir:
81
+ break
 
 
 
 
 
 
 
82
  except Exception as e:
83
  print(f"[loader] Zip candidate '{fname}' not usable: {e}")
84
 
85
+ if not predictor_dir:
86
+ # Snapshot and search
87
+ repo_path = snapshot_download(repo_id=repo_id)
88
+ predictor_dir = _find_dir_with_any_predictor_marker(repo_path)
89
+ if not predictor_dir:
90
+ # Try PKL fallback
91
+ for fname in ("cheese_texture_predictor.pkl", "predictor.pkl"):
92
+ try:
93
+ pkl_path = hf_hub_download(repo_id=repo_id, filename=fname)
94
+ with open(pkl_path, "rb") as f:
95
+ obj = pickle.load(f)
96
+ if isinstance(obj, TabularPredictor) or hasattr(obj, "predict"):
97
+ # Mirror structure under a local dir
98
+ predictor_dir = os.path.join(os.getcwd(), "ag_predictor_from_pkl")
99
+ os.makedirs(predictor_dir, exist_ok=True)
100
+ # Can't reconstruct 'models' tree automatically here; best effort only.
101
+ return obj, predictor_dir, extract_root
102
+ except Exception as e:
103
+ print(f"[loader] PKL candidate '{fname}' not usable: {e}")
104
+ raise FileNotFoundError("Could not locate an AutoGluon predictor directory.")
105
+
106
+ # Load predictor with relaxed checks
107
+ predictor = TabularPredictor.load(
108
+ predictor_dir,
109
+ require_version_match=False,
110
+ require_py_version_match=False,
111
+ check_packages=False,
112
+ )
113
+
114
+ # Create flat model layout to satisfy predictors that look for '/extract_root/<model>/model.pkl'
115
+ _materialize_flat_model_layout(predictor_dir, extract_root)
116
+
117
+ return predictor, predictor_dir, extract_root
118
 
119
  # ===== Load dataset & predictor on startup =====
120
  df_all = load_cheese_dataset(DATASET_ID)
 
128
  PROTEIN_MIN, PROTEIN_MAX = float(df_all["protein"].min()), float(df_all["protein"].max())
129
  ORIGINS: List[str] = sorted([o for o in df_all["origin"].dropna().unique().tolist() if isinstance(o, str)])
130
 
131
+ PREDICTOR, PREDICTOR_DIR, EXTRACT_ROOT = load_predictor_from_hub(MODEL_REPO_ID)
132
  CLASSES = list(getattr(PREDICTOR, "class_labels", [])) or sorted(df_all["texture"].dropna().unique().tolist())
133
 
134
+ # Build base-model name list robustly (reading leaderboard should not require loading submodels)
135
  MODEL_NAMES = ["best"]
136
  try:
137
  if hasattr(PREDICTOR, "get_model_names"):
 
197
  return fat_s, origin_s, bool(holed), price_s, protein_s, k
198
 
199
  def _predict_with_fallback(X, base_model):
200
+ '''Try requested/best, then non-NN models as fallback.'''
 
201
  try_order = []
202
  if base_model in (None, "", "best"):
203
+ try_order.append(None) # best/ensemble
204
  else:
205
  try_order.append(base_model)
206
+ # Build a non-NN list (skip NN/WeightedEnsemble)
 
207
  non_nn = [m for m in MODEL_NAMES
208
  if m not in (None, "", "best")
209
  and not (m.lower().startswith("nn") or "neuralnet" in m.lower() or "weightedensemble" in m.lower())]
210
  try_order.extend([m for m in non_nn if m not in try_order])
 
211
  errors = []
212
  for m in try_order:
213
  try:
214
  if m is None:
215
  label_pred = PREDICTOR.predict(X).iloc[0]
216
+ proba_df = PREDICTOR.predict_proba(X)
217
  else:
218
  label_pred = PREDICTOR.predict(X, model=m).iloc[0]
219
+ proba_df = PREDICTOR.predict_proba(X, model=m)
220
  if m is not None:
221
  gr.Info(f"Using base model: {m}")
222
  return label_pred, proba_df
 
 
 
 
 
 
 
 
223
  except Exception as e:
224
  errors.append(str(e))
225
  gr.Warning(f"Model '{m or 'best'}' failed; trying a fallback...")