Spaces:
Runtime error
Runtime error
File size: 2,395 Bytes
e3d5c47 1dba1e4 915906a 4455993 8c9ed7b 4455993 6d20477 fa322c3 e65a602 e5ab26b f7ece44 fa322c3 e3d5c47 941538f e3d5c47 915906a fa322c3 23e52e6 e86c1e5 d98ecdc 941538f fa322c3 8c9ed7b dbcf40b 8c9ed7b a2a4283 230d1b0 fa322c3 e3d5c47 fa322c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends ca-certificates \
git \
git-lfs \
wget \
curl \
# python build dependencies \
build-essential \
libssl-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
libncursesw5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
golang-go \
golang-1.22-go \
nvidia-driver-550 \
ffmpeg
ENV USER='user'
RUN useradd -m -u 1000 ${USER}
USER ${USER}
ENV HOME=/home/${USER} \
PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \
APPDIR=${HOME}/app
WORKDIR ${APPDIR}
ENV NVIDIA_VISIBLE_DEVICES=all
RUN curl https://pyenv.run | bash
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
ARG PYTHON_VERSION=3.10.13
RUN pyenv install ${PYTHON_VERSION} && \
pyenv global ${PYTHON_VERSION} && \
pyenv rehash && \
pip install --no-cache-dir -U pip setuptools wheel && \
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler"
COPY --chown=1000 . ${APPDIR}
RUN git clone https://github.com/ollama/ollama
RUN git clone https://github.com/ggerganov/llama.cpp
COPY groups_merged.txt ${HOME}/app/llama.cpp/.
ENV PYTHONPATH=${APPDIR} \
PYTHONUNBUFFERED=1 \
HF_HUB_ENABLE_HF_TRANSFER=1 \
GRADIO_ALLOW_FLAGGING=never \
GRADIO_NUM_PORTS=1 \
GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_THEME=huggingface \
TQDM_POSITION=-1 \
TQDM_MININTERVAL=1 \
SYSTEM=spaces \
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
NVIDIA_VISIBLE_DEVICES=all \
OLLAMA_HOST=0.0.0.0
WORKDIR ${APPDIR}/ollama
RUN OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... --verbose \
go build . --verbose \
go install . --verbose
WORKDIR ${APPDIR}/llama.cpp
RUN pip install -r requirements.txt \
LLAMA_CUDA=0 make -j llama-quantize --verbose
WORKDIR ${APPDIR}
# EXPOSE map[11434/tcp:{}]
ENTRYPOINT ollama serve --verbose & sleep 5 && \
python app.py --verbose |