Yeetek commited on
Commit
76e2de3
·
verified ·
1 Parent(s): a169ddf

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +25 -24
Dockerfile CHANGED
@@ -1,42 +1,43 @@
1
  FROM python:3.10-slim
2
 
3
- # 1) Install git (for VCS deps) + clean up
4
  RUN apt-get update \
5
  && apt-get install -y --no-install-recommends git \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
- # 2) Prep HF & numba caches and set runtime envs
9
- ENV PIP_NO_CACHE_DIR=1 \
10
- PYTHONUNBUFFERED=1 \
11
- EMBED_MODEL=Seznam/simcse-small-e-czech \
12
- MIN_TOPIC_SIZE=10 \
13
- MAX_DOCS=5000 \
14
- HF_HOME=/tmp/hfcache \
15
- TRANSFORMERS_CACHE=/tmp/hfcache \
16
- SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
17
- NUMBA_CACHE_DIR=/tmp/numba_cache
18
 
 
19
  RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
20
  && chmod -R 777 /tmp/hfcache /tmp/numba_cache
21
 
22
- # 3) Install Python dependencies (pins in requirements.txt)
23
  WORKDIR /code
24
- COPY requirements.txt ./
25
- RUN pip install --no-cache-dir -r requirements.txt
26
 
27
- # 4) Pre-download the Czech SBERT model into the cache
 
 
 
 
 
 
28
  RUN python - <<'PY'
29
  from sentence_transformers import SentenceTransformer
30
- SentenceTransformer(
31
- 'Seznam/simcse-small-e-czech',
32
- cache_folder='/tmp/hfcache'
33
- )
34
  PY
35
- RUN chmod -R 777 /tmp/hfcache
36
 
37
- # 5) Copy application code and expose port
38
- COPY app.py ./
39
  EXPOSE 7860
40
-
41
- # 6) Launch
42
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.10-slim
2
 
3
+ # 1) Install git (for any VCS installs) + clean up
4
  RUN apt-get update \
5
  && apt-get install -y --no-install-recommends git \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
+ # 2) Set all of your HF and numba cache env vars in one place
9
+ ENV HF_HOME=/tmp/hfcache \
10
+ TRANSFORMERS_CACHE=/tmp/hfcache \
11
+ SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
12
+ NUMBA_CACHE_DIR=/tmp/numba_cache \
13
+ PIP_NO_CACHE_DIR=1 \
14
+ PYTHONUNBUFFERED=1 \
15
+ EMBED_MODEL=Seznam/simcse-small-e-czech \
16
+ MIN_TOPIC_SIZE=10 \
17
+ MAX_DOCS=5000
18
 
19
+ # 3) Create those cache dirs and make them fully readable/writable
20
  RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
21
  && chmod -R 777 /tmp/hfcache /tmp/numba_cache
22
 
23
+ # 4) Install Python deps
24
  WORKDIR /code
25
+ COPY requirements.txt .
 
26
 
27
+ # ⚠️ Pin sentence-transformers to 2.1.0 so that BERTopic’s StaticEmbedding import still exists
28
+ # (you can also just update your requirements.txt to say `sentence-transformers==2.1.0`)
29
+ RUN pip install --upgrade pip \
30
+ && pip install --no-cache-dir -r requirements.txt \
31
+ && pip install --no-cache-dir sentence-transformers==2.1.0
32
+
33
+ # 5) Pre-download the Czech SBERT into the cache and then chmod all the files so the runtime user can read them
34
  RUN python - <<'PY'
35
  from sentence_transformers import SentenceTransformer
36
+ SentenceTransformer('Seznam/simcse-small-e-czech', cache_folder='/tmp/hfcache')
 
 
 
37
  PY
38
+ RUN chmod -R a+rwX /tmp/hfcache
39
 
40
+ # 6) Copy in your app and expose
41
+ COPY app.py .
42
  EXPOSE 7860
 
 
43
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]