Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -38,11 +38,13 @@ os.makedirs(os.environ["NUMBA_CACHE_DIR"], exist_ok=True)
|
|
38 |
os.environ["NUMBA_DISABLE_CACHE"] = "1"
|
39 |
|
40 |
# 2) Config from ENV
|
41 |
-
|
|
|
|
|
42 |
MIN_TOPIC = int(os.getenv("MIN_TOPIC_SIZE", "10"))
|
43 |
MAX_DOCS = int(os.getenv("MAX_DOCS", "5000"))
|
44 |
|
45 |
-
# 3) Set HF cache envs to a writeable folder (once at startup)
|
46 |
cache_dir = "/tmp/hfcache"
|
47 |
os.makedirs(cache_dir, exist_ok=True)
|
48 |
import stat
|
@@ -52,11 +54,17 @@ os.environ["TRANSFORMERS_CACHE"] = cache_dir
|
|
52 |
os.environ["SENTENCE_TRANSFORMERS_HOME"] = cache_dir
|
53 |
|
54 |
# 4) Initialise embeddings once
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# Pre-initialize fallback global models for small-batch debugging
|
58 |
-
# Global UMAP: 2-neighbors, cosine space, random init
|
59 |
-
global_umap = UMAP(
|
60 |
n_neighbors=2,
|
61 |
metric="cosine",
|
62 |
init="random",
|
|
|
38 |
os.environ["NUMBA_DISABLE_CACHE"] = "1"
|
39 |
|
40 |
# 2) Config from ENV
|
41 |
+
# Read model name from env and normalize to lowercase to match HF repo ID
|
42 |
+
env_model = os.getenv("EMBED_MODEL", "Seznam/simcse-small-e-czech")
|
43 |
+
MODEL_NAME = env_model
|
44 |
MIN_TOPIC = int(os.getenv("MIN_TOPIC_SIZE", "10"))
|
45 |
MAX_DOCS = int(os.getenv("MAX_DOCS", "5000"))
|
46 |
|
47 |
+
# 3) Set HF cache envs to a writeable folder (once at startup) envs to a writeable folder (once at startup)
|
48 |
cache_dir = "/tmp/hfcache"
|
49 |
os.makedirs(cache_dir, exist_ok=True)
|
50 |
import stat
|
|
|
54 |
os.environ["SENTENCE_TRANSFORMERS_HOME"] = cache_dir
|
55 |
|
56 |
# 4) Initialise embeddings once
|
57 |
+
# Use huggingface_hub to snapshot-download the model locally
|
58 |
+
from huggingface_hub import snapshot_download
|
59 |
+
print(f"Downloading model {MODEL_NAME} to {cache_dir}...")
|
60 |
+
sys.stdout.flush()
|
61 |
+
local_model_path = snapshot_download(repo_id=MODEL_NAME, cache_dir=cache_dir)
|
62 |
+
|
63 |
+
# Load SentenceTransformer from local path
|
64 |
+
embeddings = SentenceTransformer(local_model_path, cache_folder=cache_dir)
|
65 |
|
66 |
# Pre-initialize fallback global models for small-batch debugging
|
67 |
+
# Global UMAP: 2-neighbors, cosine space, random init(
|
|
|
68 |
n_neighbors=2,
|
69 |
metric="cosine",
|
70 |
init="random",
|