Yeetek commited on
Commit
5232e60
·
verified ·
1 Parent(s): dc9f788

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -22
Dockerfile CHANGED
@@ -7,45 +7,43 @@ ENV PIP_NO_CACHE_DIR=1 \
7
  PYTHONUNBUFFERED=1 \
8
  EMBED_MODEL=Seznam/simcse-small-e-czech \
9
  MIN_TOPIC_SIZE=10 \
10
- MAX_DOCS=5000 \
11
- HF_HOME=/tmp/hfcache \
 
 
 
 
12
  TRANSFORMERS_CACHE=/tmp/hfcache \
13
- SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
14
- NUMBA_CACHE_DIR=/tmp/numba_cache
15
 
16
  # ------------------------------------------------------------------
17
- # 2. Make caches and give them world‐writable perms
18
  # ------------------------------------------------------------------
19
- RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
20
- && chmod -R 777 /tmp/hfcache /tmp/numba_cache
21
 
22
  # ------------------------------------------------------------------
23
- # 3. Install Python deps
24
  # ------------------------------------------------------------------
25
  WORKDIR /code
26
  COPY requirements.txt .
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
  # ------------------------------------------------------------------
30
- # 4. Pre-download the model/tokenizer into HF cache
31
- # Using transformers directly to populate both
32
- # – the “models--…” layout that HF v4+ uses
33
- # – and then we’ll symlink so SBERT’s fallback path works too
34
  # ------------------------------------------------------------------
35
  RUN python - <<'PY'
36
- from transformers import AutoTokenizer, AutoModel
37
- AutoTokenizer.from_pretrained("Seznam/simcse-small-e-czech")
38
- AutoModel.from_pretrained("Seznam/simcse-small-e-czech")
 
 
39
  PY
 
40
 
41
  # ------------------------------------------------------------------
42
- # 5. Symlink the two possible cache‐dirs so SBERT never misses it
43
- # ------------------------------------------------------------------
44
- RUN ln -s /tmp/hfcache/models--Seznam--simcse-small-e-czech /tmp/hfcache/Seznam_simcse-small-e-czech \
45
- && chmod -R 777 /tmp/hfcache/models--Seznam--simcse-small-e-czech /tmp/hfcache/Seznam_simcse-small-e-czech
46
-
47
- # ------------------------------------------------------------------
48
- # 6. Copy your app and expose
49
  # ------------------------------------------------------------------
50
  COPY app.py .
51
 
 
7
  PYTHONUNBUFFERED=1 \
8
  EMBED_MODEL=Seznam/simcse-small-e-czech \
9
  MIN_TOPIC_SIZE=10 \
10
+ MAX_DOCS=5000
11
+
12
+ # ------------------------------------------------------------------
13
+ # 2. Writable cache directories for Hugging Face & sentence-transformers
14
+ # ------------------------------------------------------------------
15
+ ENV HF_HOME=/tmp/hfcache \
16
  TRANSFORMERS_CACHE=/tmp/hfcache \
17
+ SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache
18
+ RUN mkdir -p /tmp/hfcache && chmod -R 777 /tmp/hfcache
19
 
20
  # ------------------------------------------------------------------
21
+ # 3. Writable cache dir for numba
22
  # ------------------------------------------------------------------
23
+ ENV NUMBA_CACHE_DIR=/tmp/numba_cache
24
+ RUN mkdir -p /tmp/numba_cache && chmod -R 777 /tmp/numba_cache
25
 
26
  # ------------------------------------------------------------------
27
+ # 4. Install Python deps
28
  # ------------------------------------------------------------------
29
  WORKDIR /code
30
  COPY requirements.txt .
31
  RUN pip install --no-cache-dir -r requirements.txt
32
 
33
  # ------------------------------------------------------------------
34
+ # 5. Pre-download the Czech SBERT and make that cache world-readable
 
 
 
35
  # ------------------------------------------------------------------
36
  RUN python - <<'PY'
37
+ from sentence_transformers import SentenceTransformer
38
+ SentenceTransformer(
39
+ 'Seznam/simcse-small-e-czech',
40
+ cache_folder='/tmp/hfcache'
41
+ )
42
  PY
43
+ RUN chmod -R 777 /tmp/hfcache
44
 
45
  # ------------------------------------------------------------------
46
+ # 6. Copy your app and run
 
 
 
 
 
 
47
  # ------------------------------------------------------------------
48
  COPY app.py .
49