Yeetek commited on
Commit
fde8aa4
·
verified ·
1 Parent(s): 76e2de3

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +19 -20
Dockerfile CHANGED
@@ -1,43 +1,42 @@
1
  FROM python:3.10-slim
2
 
3
- # 1) Install git (for any VCS installs) + clean up
4
  RUN apt-get update \
5
  && apt-get install -y --no-install-recommends git \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
- # 2) Set all of your HF and numba cache env vars in one place
9
- ENV HF_HOME=/tmp/hfcache \
10
- TRANSFORMERS_CACHE=/tmp/hfcache \
11
- SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
12
- NUMBA_CACHE_DIR=/tmp/numba_cache \
13
- PIP_NO_CACHE_DIR=1 \
14
- PYTHONUNBUFFERED=1 \
15
- EMBED_MODEL=Seznam/simcse-small-e-czech \
16
- MIN_TOPIC_SIZE=10 \
17
- MAX_DOCS=5000
18
 
19
- # 3) Create those cache dirs and make them fully readable/writable
20
  RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
21
- && chmod -R 777 /tmp/hfcache /tmp/numba_cache
22
 
23
- # 4) Install Python deps
24
  WORKDIR /code
25
  COPY requirements.txt .
26
-
27
- # ⚠️ Pin sentence-transformers to 2.1.0 so that BERTopic’s StaticEmbedding import still exists
28
- # (you can also just update your requirements.txt to say `sentence-transformers==2.1.0`)
29
  RUN pip install --upgrade pip \
30
  && pip install --no-cache-dir -r requirements.txt \
31
  && pip install --no-cache-dir sentence-transformers==2.1.0
32
 
33
- # 5) Pre-download the Czech SBERT into the cache and then chmod all the files so the runtime user can read them
34
  RUN python - <<'PY'
35
  from sentence_transformers import SentenceTransformer
36
- SentenceTransformer('Seznam/simcse-small-e-czech', cache_folder='/tmp/hfcache')
 
37
  PY
38
  RUN chmod -R a+rwX /tmp/hfcache
39
 
40
- # 6) Copy in your app and expose
41
  COPY app.py .
42
  EXPOSE 7860
43
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.10-slim
2
 
3
+ # 1) Install git (for any VCS-based deps) + clean up
4
  RUN apt-get update \
5
  && apt-get install -y --no-install-recommends git \
6
  && rm -rf /var/lib/apt/lists/*
7
 
8
+ # 2) Set HF & numba cache + runtime ENVs
9
+ ENV HF_HOME=/tmp/hfcache \
10
+ TRANSFORMERS_CACHE=/tmp/hfcache \
11
+ SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
12
+ NUMBA_CACHE_DIR=/tmp/numba_cache \
13
+ PIP_NO_CACHE_DIR=1 \
14
+ PYTHONUNBUFFERED=1 \
15
+ EMBED_MODEL=Seznam/simcse-small-e-czech \
16
+ MIN_TOPIC_SIZE=10 \
17
+ MAX_DOCS=5000
18
 
19
+ # 3) Create cache folders & open permissions
20
  RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
21
+ && chmod -R a+rwX /tmp/hfcache /tmp/numba_cache
22
 
23
+ # 4) Copy & install Python deps (pin ST to 2.1.0!)
24
  WORKDIR /code
25
  COPY requirements.txt .
26
+ # override any ST version in requirements
 
 
27
  RUN pip install --upgrade pip \
28
  && pip install --no-cache-dir -r requirements.txt \
29
  && pip install --no-cache-dir sentence-transformers==2.1.0
30
 
31
+ # 5) Pre-download your Czech SBERT into that cache
32
  RUN python - <<'PY'
33
  from sentence_transformers import SentenceTransformer
34
+ SentenceTransformer('Seznam/simcse-small-e-czech',
35
+ cache_folder='/tmp/hfcache')
36
  PY
37
  RUN chmod -R a+rwX /tmp/hfcache
38
 
39
+ # 6) Copy in your app & run
40
  COPY app.py .
41
  EXPOSE 7860
42
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]