Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && \ | |
| apt-get upgrade -y && \ | |
| apt-get install -y --no-install-recommends ca-certificates \ | |
| git \ | |
| git-lfs \ | |
| wget \ | |
| curl \ | |
| # python build dependencies \ | |
| # build-essential \ | |
| # libssl-dev \ | |
| # zlib1g-dev \ | |
| # libbz2-dev \ | |
| # libreadline-dev \ | |
| # libsqlite3-dev \ | |
| # libncursesw5-dev \ | |
| # xz-utils \ | |
| # tk-dev \ | |
| # libxml2-dev \ | |
| # libxmlsec1-dev \ | |
| # libffi-dev \ | |
| # liblzma-dev \ | |
| nvidia-driver-550 \ | |
| python3.10 \ | |
| python3.10-venv \ | |
| python3-pip \ | |
| python-is-python3 | |
| # ffmpeg | |
| # software-properties-common | |
| # RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb | |
| # RUN dpkg -i cuda-keyring_1.1-1_all.deb | |
| # RUN apt-get update | |
| # RUN apt-get -y install cuda-toolkit-12-4 | |
| RUN curl -L https://ollama.com/download/ollama-linux-amd64 -o /usr/bin/ollama | |
| RUN chmod +x /usr/bin/ollama | |
| ENV USER='user' | |
| RUN useradd -m -u 1000 ${USER} | |
| USER ${USER} | |
| ENV HOME=/home/${USER} \ | |
| PATH=${HOME}/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH} \ | |
| APPDIR=${HOME}/app | |
| WORKDIR ${APPDIR} | |
| COPY --chown=1000 . ${APPDIR} | |
| # ENV NVIDIA_VISIBLE_DEVICES=all | |
| # RUN curl https://pyenv.run | bash | |
| # ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH} | |
| # ARG PYTHON_VERSION=3.10.13 | |
| # RUN pyenv install ${PYTHON_VERSION} && \ | |
| # pyenv global ${PYTHON_VERSION} && \ | |
| # pyenv rehash && \ | |
| # RUN python3.10 -m venv .venv | |
| # RUN . .venv/bin/activate | |
| RUN python -m pip install --no-cache-dir -U pip setuptools wheel | |
| RUN python -m pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler" | |
| # RUN deactivate | |
| # RUN go install golang.org/x/tools/gopls@latest | |
| # RUN git clone https://github.com/ollama/ollama | |
| # WORKDIR ${APPDIR}/ollama | |
| # RUN OLLAMA_CUSTOM_CPU_DEFS="-DGGML_AVX=on -DGGML_AVX2=on -DGGML_F16C=on -DGGML_FMA=on" go generate ./... --verbose \ | |
| # go build . --verbose \ | |
| # go install . --verbose | |
| RUN git clone https://github.com/ggerganov/llama.cpp | |
| COPY groups_merged.txt llama.cpp/. | |
| WORKDIR ${APPDIR}/llama.cpp | |
| RUN git pull | |
| RUN python -m pip install -r requirements.txt | |
| RUN GGML_CUDA=1 make -j llama-quantize --verbose | |
| # ENV PYTHONPATH=${APPDIR}/.venv/bin \ | |
| ENV PYTHONUNBUFFERED=1 \ | |
| HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
| GRADIO_ALLOW_FLAGGING=never \ | |
| GRADIO_NUM_PORTS=1 \ | |
| GRADIO_SERVER_NAME=0.0.0.0 \ | |
| GRADIO_THEME=huggingface \ | |
| TQDM_POSITION=-1 \ | |
| TQDM_MININTERVAL=1 \ | |
| SYSTEM=spaces \ | |
| LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ | |
| NVIDIA_DRIVER_CAPABILITIES=compute,utility \ | |
| NVIDIA_VISIBLE_DEVICES=all \ | |
| OLLAMA_HOST=0.0.0.0 | |
| WORKDIR ${APPDIR} | |
| # EXPOSE map[11434/tcp:{}] | |
| RUN ollama serve --verbose & sleep 5 | |
| # RUN . .venv/bin/activate | |
| ENTRYPOINT python app.py --verbose | 
