Spaces:

0408happyfeet
/

cheese-texture-tabular-gradio

Sleeping

App Files Files Community

0408happyfeet commited on Oct 2

Commit

798cf6f

verified ·

1 Parent(s): 546791b

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +77 -98

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os, shutil, zipfile, pickle
-from typing import List
 import pandas as pd
 import gradio as gr
@@ -31,100 +31,90 @@ def load_cheese_dataset(dataset_id: str) -> pd.DataFrame:
         return _safe_concat_splits(ds)
 def _find_dir_with_any_predictor_marker(start_dir: str) -> str:
-    '''Find a directory containing either 'learner.pkl' (preferred) or 'predictor.pkl'.'''
     for root, dirs, files in os.walk(start_dir):
         if "learner.pkl" in files or "predictor.pkl" in files:
             return root
     return ""
-def _force_reset_predictor_paths(predictor: TabularPredictor, root_dir: str):
-    '''Force-reset internal contexts/paths to avoid absolute Windows paths during inference.'''
     try:
-        # predictor.path may be a read-only property on some versions; ignore if so
-        try:
-            if hasattr(predictor, "path"):
-                predictor.path = root_dir
-        except Exception:
-            pass
-        if hasattr(predictor, "_learner") and predictor._learner is not None:
-            try:
-                predictor._learner.set_contexts(path_context=root_dir)
-            except Exception:
-                pass
-        tr = getattr(predictor, "_trainer", None)
-        if tr is not None:
-            try:
-                if hasattr(tr, "path"):
-                    tr.path = root_dir
-            except Exception:
-                pass
-            for attr in ("model_paths", "_model_paths", "path_dict", "model_path_dict"):
-                if hasattr(tr, attr):
-                    d = getattr(tr, attr)
-                    if isinstance(d, dict):
-                        rebuilt = {name: os.path.join(root_dir, "models", name) for name in d.keys()}
-                        try:
-                            setattr(tr, attr, rebuilt)
-                        except Exception:
-                            pass
-    except Exception as e:
-        print("[paths] Warning: could not fully reset predictor paths:", e)
-def load_predictor_from_hub(repo_id: str) -> TabularPredictor:
-    # 1) Try known zip names → extract → locate predictor dir → load with relaxed checks
-    extract_dir = os.path.join(os.getcwd(), "ag_predictor_unpack")
-    if os.path.exists(extract_dir):
-        shutil.rmtree(extract_dir)
-    os.makedirs(extract_dir, exist_ok=True)
     zip_candidates = ["cheese_texture_predictor_dir.zip", "predictor_dir.zip", "agModels-predictor.zip"]
     for fname in zip_candidates:
         try:
             zpath = hf_hub_download(repo_id=repo_id, filename=fname)
             with zipfile.ZipFile(zpath, "r") as zf:
-                zf.extractall(extract_dir)
-            candidate_dir = _find_dir_with_any_predictor_marker(extract_dir)
-            if candidate_dir:
-                pred = TabularPredictor.load(
-                    candidate_dir,
-                    require_version_match=False,
-                    require_py_version_match=False,
-                    check_packages=False,
-                )
-                _force_reset_predictor_paths(pred, candidate_dir)
-                return pred
         except Exception as e:
             print(f"[loader] Zip candidate '{fname}' not usable: {e}")
-    # 2) LAST RESORT: load a serialized predictor .pkl and rebase paths (may not work if artifacts missing)
-    pkl_candidates = ["cheese_texture_predictor.pkl", "predictor.pkl"]
-    for fname in pkl_candidates:
-        try:
-            pkl_path = hf_hub_download(repo_id=repo_id, filename=fname)
-            with open(pkl_path, "rb") as f:
-                obj = pickle.load(f)
-            if isinstance(obj, TabularPredictor) or hasattr(obj, "predict"):
-                local_dir = os.path.join(os.getcwd(), "ag_predictor_from_pkl")
-                os.makedirs(local_dir, exist_ok=True)
-                _force_reset_predictor_paths(obj, local_dir)
-                return obj
-        except Exception as e:
-            print(f"[loader] PKL candidate '{fname}' not usable: {e}")
-    # 3) Snapshot scan (last resort, same relaxed load flags)
-    repo_path = snapshot_download(repo_id=repo_id)
-    candidate_dir = _find_dir_with_any_predictor_marker(repo_path)
-    if candidate_dir:
-        pred = TabularPredictor.load(
-            candidate_dir,
-            require_version_match=False,
-            require_py_version_match=False,
-            check_packages=False,
-        )
-        _force_reset_predictor_paths(pred, candidate_dir)
-        return pred
-    raise FileNotFoundError("Could not load an AutoGluon predictor: no predictor directory or usable *.pkl found.")
 # ===== Load dataset & predictor on startup =====
 df_all = load_cheese_dataset(DATASET_ID)
@@ -138,10 +128,10 @@ PRICE_MIN, PRICE_MAX = float(df_all["price"].min()), float(df_all["price"].max()
 PROTEIN_MIN, PROTEIN_MAX = float(df_all["protein"].min()), float(df_all["protein"].max())
 ORIGINS: List[str] = sorted([o for o in df_all["origin"].dropna().unique().tolist() if isinstance(o, str)])
-PREDICTOR = load_predictor_from_hub(MODEL_REPO_ID)
 CLASSES = list(getattr(PREDICTOR, "class_labels", [])) or sorted(df_all["texture"].dropna().unique().tolist())
-# Build base-model name list robustly (avoid heavy ops if not necessary)
 MODEL_NAMES = ["best"]
 try:
     if hasattr(PREDICTOR, "get_model_names"):
@@ -207,40 +197,29 @@ def _coerce_and_validate(fat, origin, holed, price, protein, top_k):
     return fat_s, origin_s, bool(holed), price_s, protein_s, k
 def _predict_with_fallback(X, base_model):
-    '''Predict with chosen model; if a dependency (e.g., torch) is missing, fall back to a non-NN model.'''
-    # Try requested (or best/ensemble) first
     try_order = []
     if base_model in (None, "", "best"):
-        try_order.append(None)  # means default/best
     else:
         try_order.append(base_model)
-    # Non-NN fallbacks (skip NN_* and WeightedEnsemble_*)
     non_nn = [m for m in MODEL_NAMES
               if m not in (None, "", "best")
               and not (m.lower().startswith("nn") or "neuralnet" in m.lower() or "weightedensemble" in m.lower())]
     try_order.extend([m for m in non_nn if m not in try_order])
     errors = []
     for m in try_order:
         try:
             if m is None:
                 label_pred = PREDICTOR.predict(X).iloc[0]
-                proba_df = PREDICTOR.predict_proba(X)
             else:
                 label_pred = PREDICTOR.predict(X, model=m).iloc[0]
-                proba_df = PREDICTOR.predict_proba(X, model=m)
             if m is not None:
                 gr.Info(f"Using base model: {m}")
             return label_pred, proba_df
-        except ModuleNotFoundError as e:
-            msg = str(e)
-            errors.append(msg)
-            missing = getattr(e, "name", "")
-            if missing:
-                gr.Warning(f"Missing dependency '{missing}' for model '{m or 'best'}'; trying a fallback...")
-            else:
-                gr.Warning(f"Dependency missing for model '{m or 'best'}'; trying a fallback...")
         except Exception as e:
             errors.append(str(e))
             gr.Warning(f"Model '{m or 'best'}' failed; trying a fallback...")

 import os, shutil, zipfile, pickle
+from typing import List, Tuple
 import pandas as pd
 import gradio as gr
         return _safe_concat_splits(ds)
 def _find_dir_with_any_predictor_marker(start_dir: str) -> str:
+    '''Return the first directory containing either 'learner.pkl' (preferred) or 'predictor.pkl'.'''
     for root, dirs, files in os.walk(start_dir):
         if "learner.pkl" in files or "predictor.pkl" in files:
             return root
     return ""
+def _symlink_or_copytree(src: str, dst: str):
+    if os.path.exists(dst):
+        return
     try:
+        os.symlink(src, dst)
+    except Exception:
+        shutil.copytree(src, dst)
+def _materialize_flat_model_layout(predictor_dir: str, extract_root: str):
+    '''Ensure model subdirs exist in both forms:
+       - predictor_dir/models/<name>/...
+       - extract_root/<name>/...   (flat layout some predictors still reference)
+       - predictor_dir/<name>/...  (defensive)
+    '''
+    models_dir = os.path.join(predictor_dir, "models")
+    if not os.path.isdir(models_dir):
+        return
+    for name in os.listdir(models_dir):
+        src = os.path.join(models_dir, name)
+        if not os.path.isdir(src):
+            continue
+        for base in (extract_root, predictor_dir):
+            dst = os.path.join(base, name)
+            _symlink_or_copytree(src, dst)
+def load_predictor_from_hub(repo_id: str) -> Tuple[TabularPredictor, str, str]:
+    # Extract zip into a known root, then load from the inner predictor dir.
+    extract_root = os.path.join(os.getcwd(), "ag_predictor_unpack")
+    if os.path.exists(extract_root):
+        shutil.rmtree(extract_root)
+    os.makedirs(extract_root, exist_ok=True)
     zip_candidates = ["cheese_texture_predictor_dir.zip", "predictor_dir.zip", "agModels-predictor.zip"]
+    predictor_dir = ""
     for fname in zip_candidates:
         try:
             zpath = hf_hub_download(repo_id=repo_id, filename=fname)
             with zipfile.ZipFile(zpath, "r") as zf:
+                zf.extractall(extract_root)
+            predictor_dir = _find_dir_with_any_predictor_marker(extract_root)
+            if predictor_dir:
+                break
         except Exception as e:
             print(f"[loader] Zip candidate '{fname}' not usable: {e}")
+    if not predictor_dir:
+        # Snapshot and search
+        repo_path = snapshot_download(repo_id=repo_id)
+        predictor_dir = _find_dir_with_any_predictor_marker(repo_path)
+        if not predictor_dir:
+            # Try PKL fallback
+            for fname in ("cheese_texture_predictor.pkl", "predictor.pkl"):
+                try:
+                    pkl_path = hf_hub_download(repo_id=repo_id, filename=fname)
+                    with open(pkl_path, "rb") as f:
+                        obj = pickle.load(f)
+                    if isinstance(obj, TabularPredictor) or hasattr(obj, "predict"):
+                        # Mirror structure under a local dir
+                        predictor_dir = os.path.join(os.getcwd(), "ag_predictor_from_pkl")
+                        os.makedirs(predictor_dir, exist_ok=True)
+                        # Can't reconstruct 'models' tree automatically here; best effort only.
+                        return obj, predictor_dir, extract_root
+                except Exception as e:
+                    print(f"[loader] PKL candidate '{fname}' not usable: {e}")
+            raise FileNotFoundError("Could not locate an AutoGluon predictor directory.")
+    # Load predictor with relaxed checks
+    predictor = TabularPredictor.load(
+        predictor_dir,
+        require_version_match=False,
+        require_py_version_match=False,
+        check_packages=False,
+    )
+    # Create flat model layout to satisfy predictors that look for '/extract_root/<model>/model.pkl'
+    _materialize_flat_model_layout(predictor_dir, extract_root)
+    return predictor, predictor_dir, extract_root
 # ===== Load dataset & predictor on startup =====
 df_all = load_cheese_dataset(DATASET_ID)
 PROTEIN_MIN, PROTEIN_MAX = float(df_all["protein"].min()), float(df_all["protein"].max())
 ORIGINS: List[str] = sorted([o for o in df_all["origin"].dropna().unique().tolist() if isinstance(o, str)])
+PREDICTOR, PREDICTOR_DIR, EXTRACT_ROOT = load_predictor_from_hub(MODEL_REPO_ID)
 CLASSES = list(getattr(PREDICTOR, "class_labels", [])) or sorted(df_all["texture"].dropna().unique().tolist())
+# Build base-model name list robustly (reading leaderboard should not require loading submodels)
 MODEL_NAMES = ["best"]
 try:
     if hasattr(PREDICTOR, "get_model_names"):
     return fat_s, origin_s, bool(holed), price_s, protein_s, k
 def _predict_with_fallback(X, base_model):
+    '''Try requested/best, then non-NN models as fallback.'''
     try_order = []
     if base_model in (None, "", "best"):
+        try_order.append(None)  # best/ensemble
     else:
         try_order.append(base_model)
+    # Build a non-NN list (skip NN/WeightedEnsemble)
     non_nn = [m for m in MODEL_NAMES
               if m not in (None, "", "best")
               and not (m.lower().startswith("nn") or "neuralnet" in m.lower() or "weightedensemble" in m.lower())]
     try_order.extend([m for m in non_nn if m not in try_order])
     errors = []
     for m in try_order:
         try:
             if m is None:
                 label_pred = PREDICTOR.predict(X).iloc[0]
+                proba_df   = PREDICTOR.predict_proba(X)
             else:
                 label_pred = PREDICTOR.predict(X, model=m).iloc[0]
+                proba_df   = PREDICTOR.predict_proba(X, model=m)
             if m is not None:
                 gr.Info(f"Using base model: {m}")
             return label_pred, proba_df
         except Exception as e:
             errors.append(str(e))
             gr.Warning(f"Model '{m or 'best'}' failed; trying a fallback...")