FROM python:3.10-slim

# 1) Install git (for any VCS-based deps) + clean up
RUN apt-get update \
 && apt-get install -y --no-install-recommends git \
 && rm -rf /var/lib/apt/lists/*

# 2) Set HF & numba cache + runtime ENVs
# Use correct model ID casing to match HF repo
ENV HF_HOME=/tmp/hfcache \
    TRANSFORMERS_CACHE=/tmp/hfcache \
    SENTENCE_TRANSFORMERS_HOME=/tmp/hfcache \
    NUMBA_CACHE_DIR=/tmp/numba_cache \
    PIP_NO_CACHE_DIR=1 \
    PYTHONUNBUFFERED=1 \
    EMBED_MODEL=Seznam/simcse-small-e-czech \
    MIN_TOPIC_SIZE=10 \
    MAX_DOCS=5000 \
    MIN_TOPIC_SIZE=10 \
    MAX_DOCS=5000

# 3) Create cache folders & open permissions
RUN mkdir -p /tmp/hfcache /tmp/numba_cache \
 && chmod -R a+rwX /tmp/hfcache /tmp/numba_cache

# 4) Copy & install Python deps
WORKDIR /code
COPY requirements.txt .
RUN pip install --upgrade pip \
 && pip install --no-cache-dir -r requirements.txt

# 5) Pre-download your Czech SBERT into that cache using correct casing
RUN python - <<'PY'
from sentence_transformers import SentenceTransformer
SentenceTransformer(
    'Seznam/simcse-small-e-czech',
    cache_folder='/tmp/hfcache'
)
PY

# 6) Copy in your app & run
COPY app.py .
EXPOSE 7860
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]