File size: 2,395 Bytes
e3d5c47
1dba1e4
915906a
4455993
 
8c9ed7b
4455993
 
 
 
 
6d20477
 
 
 
 
 
 
 
 
 
 
 
 
fa322c3
e65a602
 
e5ab26b
 
 
 
 
f7ece44
fa322c3
 
 
 
e3d5c47
 
941538f
 
 
 
 
 
 
e3d5c47
915906a
fa322c3
23e52e6
e86c1e5
d98ecdc
941538f
fa322c3
8c9ed7b
 
 
 
 
 
 
 
 
 
dbcf40b
8c9ed7b
a2a4283
230d1b0
fa322c3
 
 
 
 
 
 
 
 
 
 
e3d5c47
fa322c3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
    apt-get upgrade -y && \
    apt-get install -y --no-install-recommends ca-certificates \
    git \
    git-lfs \
    wget \
    curl \
    # python build dependencies \
    build-essential \
    libssl-dev \
    zlib1g-dev \
    libbz2-dev \
    libreadline-dev \
    libsqlite3-dev \
    libncursesw5-dev \
    xz-utils \
    tk-dev \
    libxml2-dev \
    libxmlsec1-dev \
    libffi-dev \
    liblzma-dev \
    golang-go \
    golang-1.22-go \
    nvidia-driver-550 \
    ffmpeg

ENV USER='user'    
RUN useradd -m -u 1000 ${USER}
USER ${USER}
ENV HOME=/home/${USER} \
    PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \
    APPDIR=${HOME}/app
    
WORKDIR ${APPDIR}

ENV NVIDIA_VISIBLE_DEVICES=all
RUN curl https://pyenv.run | bash
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
ARG PYTHON_VERSION=3.10.13
RUN pyenv install ${PYTHON_VERSION} && \
    pyenv global ${PYTHON_VERSION} && \
    pyenv rehash && \
    pip install --no-cache-dir -U pip setuptools wheel && \
    pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler"    
    
COPY --chown=1000 . ${APPDIR}
RUN git clone https://github.com/ollama/ollama 
RUN git clone https://github.com/ggerganov/llama.cpp
COPY groups_merged.txt ${HOME}/app/llama.cpp/.
    
ENV PYTHONPATH=${APPDIR} \
    PYTHONUNBUFFERED=1 \
    HF_HUB_ENABLE_HF_TRANSFER=1 \
    GRADIO_ALLOW_FLAGGING=never \
    GRADIO_NUM_PORTS=1 \
    GRADIO_SERVER_NAME=0.0.0.0 \
    GRADIO_THEME=huggingface \
    TQDM_POSITION=-1 \
    TQDM_MININTERVAL=1 \
    SYSTEM=spaces \
    LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
    NVIDIA_VISIBLE_DEVICES=all \
    OLLAMA_HOST=0.0.0.0

WORKDIR ${APPDIR}/ollama
RUN OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... --verbose \
    go build . --verbose \
    go install . --verbose
    
WORKDIR ${APPDIR}/llama.cpp
RUN pip install -r requirements.txt \
    LLAMA_CUDA=0 make -j llama-quantize --verbose
    
WORKDIR ${APPDIR}

# EXPOSE map[11434/tcp:{}]
ENTRYPOINT ollama serve --verbose & sleep 5 && \
    python app.py --verbose