Yeetek commited on
Commit
dc9f788
·
verified ·
1 Parent(s): 8bb268e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +26 -20
Dockerfile CHANGED
@@ -7,40 +7,46 @@ ENV PIP_NO_CACHE_DIR=1 \
7
  PYTHONUNBUFFERED=1 \
8
  EMBED_MODEL=Seznam/simcse-small-e-czech \
9
  MIN_TOPIC_SIZE=10 \
10
- MAX_DOCS=5000
11
-
12
- # ------------------------------------------------------------------
13
- # 2. Writable cache directories for Hugging Face & sentence-transformers
14
- # ------------------------------------------------------------------
15
- ENV HF_HOME=/tmp/hfcache \
16
  TRANSFORMERS_CACHE=/tmp/hfcache \
17
- SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache
18
- RUN mkdir -p /tmp/hfcache \
19
- && chmod -R 777 /tmp/hfcache
20
 
21
  # ------------------------------------------------------------------
22
- # 3. Writable cache dir for numba (already used in app.py)
23
  # ------------------------------------------------------------------
24
- ENV NUMBA_CACHE_DIR=/tmp/numba_cache
25
- RUN mkdir -p /tmp/numba_cache \
26
- && chmod -R 777 /tmp/numba_cache
27
 
28
  # ------------------------------------------------------------------
29
- # 4. Install Python deps and copy code
30
  # ------------------------------------------------------------------
31
  WORKDIR /code
32
  COPY requirements.txt .
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
- # ---- PRE-DOWNLOAD Czech SBERT so runtime never pulls ----
 
 
 
 
 
36
  RUN python - <<'PY'
37
- from sentence_transformers import SentenceTransformer
38
- SentenceTransformer(
39
- 'Seznam/simcse-small-e-czech',
40
- cache_folder='/tmp/hfcache'
41
- )
42
  PY
43
 
 
 
 
 
 
 
 
 
 
44
  COPY app.py .
45
 
46
  EXPOSE 7860
 
7
  PYTHONUNBUFFERED=1 \
8
  EMBED_MODEL=Seznam/simcse-small-e-czech \
9
  MIN_TOPIC_SIZE=10 \
10
+ MAX_DOCS=5000 \
11
+ HF_HOME=/tmp/hfcache \
 
 
 
 
12
  TRANSFORMERS_CACHE=/tmp/hfcache \
13
+ SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
14
+ NUMBA_CACHE_DIR=/tmp/numba_cache
 
15
 
16
  # ------------------------------------------------------------------
17
+ # 2. Make caches and give them world‐writable perms
18
  # ------------------------------------------------------------------
19
+ RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
20
+ && chmod -R 777 /tmp/hfcache /tmp/numba_cache
 
21
 
22
  # ------------------------------------------------------------------
23
+ # 3. Install Python deps
24
  # ------------------------------------------------------------------
25
  WORKDIR /code
26
  COPY requirements.txt .
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
+ # ------------------------------------------------------------------
30
+ # 4. Pre-download the model/tokenizer into HF cache
31
+ # Using transformers directly to populate both
32
+ # – the “models--…” layout that HF v4+ uses
33
+ # – and then we’ll symlink so SBERT’s fallback path works too
34
+ # ------------------------------------------------------------------
35
  RUN python - <<'PY'
36
+ from transformers import AutoTokenizer, AutoModel
37
+ AutoTokenizer.from_pretrained("Seznam/simcse-small-e-czech")
38
+ AutoModel.from_pretrained("Seznam/simcse-small-e-czech")
 
 
39
  PY
40
 
41
+ # ------------------------------------------------------------------
42
+ # 5. Symlink the two possible cache‐dirs so SBERT never misses it
43
+ # ------------------------------------------------------------------
44
+ RUN ln -s /tmp/hfcache/models--Seznam--simcse-small-e-czech /tmp/hfcache/Seznam_simcse-small-e-czech \
45
+ && chmod -R 777 /tmp/hfcache/models--Seznam--simcse-small-e-czech /tmp/hfcache/Seznam_simcse-small-e-czech
46
+
47
+ # ------------------------------------------------------------------
48
+ # 6. Copy your app and expose
49
+ # ------------------------------------------------------------------
50
  COPY app.py .
51
 
52
  EXPOSE 7860