Peter Larnholt commited on
Commit
e48919a
·
1 Parent(s): 275a99c

Upgrade to vLLM 0.6.3.post1 and remove pyairports workarounds

Browse files

- Upgrade vllm from 0.5.5 to 0.6.3.post1 (stable version with proper dependency management)
- Upgrade torch from 2.4.0 to 2.5.0 (CUDA 12.1 compatible)
- Remove all pyairports/outlines workarounds (handled natively by newer vLLM)
- Delete sitecustomize.py (no longer needed)
- Simplify Dockerfile (remove cache directories and patches)
- Clean up app.py (remove VLLM_USE_OUTLINES environment variable manipulation)

Files changed (4) hide show
  1. Dockerfile +1 -11
  2. app.py +1 -5
  3. requirements.txt +2 -6
  4. sitecustomize.py +0 -14
Dockerfile CHANGED
@@ -3,11 +3,7 @@ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  PYTHONUNBUFFERED=1 \
5
  PIP_NO_CACHE_DIR=1 \
6
- HF_HUB_ENABLE_HF_TRANSFER=1 \
7
- # Give numba/outlines a writable cache in Spaces runtime
8
- NUMBA_CACHE_DIR=/tmp/numba_cache \
9
- OUTLINES_CACHE_DIR=/tmp/outlines_cache
10
- # If issues persist, add: NUMBA_DISABLE_FILE_SYSTEM_CACHING=1
11
 
12
  RUN apt-get update && apt-get install -y python3 python3-pip git && rm -rf /var/lib/apt/lists/*
13
  WORKDIR /app
@@ -15,12 +11,6 @@ WORKDIR /app
15
  COPY requirements.txt /app/
16
  RUN python3 -m pip install --upgrade pip && pip3 install -r requirements.txt
17
 
18
- # Copy sitecustomize.py to Python's site-packages to patch pyairports import globally
19
- COPY sitecustomize.py /usr/local/lib/python3.10/dist-packages/
20
-
21
- # ensure caches exist & are writable in Spaces container
22
- RUN mkdir -p /tmp/numba_cache /tmp/outlines_cache && chmod -R 777 /tmp/numba_cache /tmp/outlines_cache
23
-
24
  COPY app.py /app/
25
 
26
  # Spaces exposes the app on $PORT
 
3
  ENV DEBIAN_FRONTEND=noninteractive \
4
  PYTHONUNBUFFERED=1 \
5
  PIP_NO_CACHE_DIR=1 \
6
+ HF_HUB_ENABLE_HF_TRANSFER=1
 
 
 
 
7
 
8
  RUN apt-get update && apt-get install -y python3 python3-pip git && rm -rf /var/lib/apt/lists/*
9
  WORKDIR /app
 
11
  COPY requirements.txt /app/
12
  RUN python3 -m pip install --upgrade pip && pip3 install -r requirements.txt
13
 
 
 
 
 
 
 
14
  COPY app.py /app/
15
 
16
  # Spaces exposes the app on $PORT
app.py CHANGED
@@ -10,8 +10,6 @@ import os, time, threading, subprocess, requests
10
  from fastapi import FastAPI, Request, Response
11
  import gradio as gr
12
 
13
- os.environ["VLLM_USE_OUTLINES"] = "0" # turn off outlines (pyairports patched via sitecustomize.py)
14
-
15
  MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-14B-Instruct-AWQ")
16
  API_PORT = int(os.environ.get("API_PORT", "8000")) # vLLM internal port
17
  SYSTEM_PROMPT = os.environ.get(
@@ -34,9 +32,7 @@ if "AWQ" in MODEL_ID.upper():
34
 
35
  def launch_vllm():
36
  print(f"[vLLM] Launch: {MODEL_ID}")
37
- env = os.environ.copy()
38
- env["VLLM_USE_OUTLINES"] = "0" # disable outlines
39
- subprocess.Popen(VLLM_ARGS, env=env)
40
 
41
  def wait_vllm_ready(timeout=900, interval=3):
42
  url = f"http://127.0.0.1:{API_PORT}/v1/models"
 
10
  from fastapi import FastAPI, Request, Response
11
  import gradio as gr
12
 
 
 
13
  MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-14B-Instruct-AWQ")
14
  API_PORT = int(os.environ.get("API_PORT", "8000")) # vLLM internal port
15
  SYSTEM_PROMPT = os.environ.get(
 
32
 
33
  def launch_vllm():
34
  print(f"[vLLM] Launch: {MODEL_ID}")
35
+ subprocess.Popen(VLLM_ARGS)
 
 
36
 
37
  def wait_vllm_ready(timeout=900, interval=3):
38
  url = f"http://127.0.0.1:{API_PORT}/v1/models"
requirements.txt CHANGED
@@ -4,12 +4,8 @@ gradio>=4.38
4
  requests>=2.31
5
 
6
  # vLLM + CUDA 12.1
7
- vllm==0.5.5
8
  --extra-index-url https://download.pytorch.org/whl/cu121
9
- torch==2.4.0
10
  transformers>=4.44
11
  accelerate>=0.30
12
-
13
- # Structured outputs stack used by vLLM
14
- # outlines is imported by vLLM 0.5.5 even with VLLM_USE_OUTLINES=0
15
- # We skip outlines dependencies and will patch the import
 
4
  requests>=2.31
5
 
6
  # vLLM + CUDA 12.1
7
+ vllm==0.6.3.post1
8
  --extra-index-url https://download.pytorch.org/whl/cu121
9
+ torch==2.5.0
10
  transformers>=4.44
11
  accelerate>=0.30
 
 
 
 
sitecustomize.py DELETED
@@ -1,14 +0,0 @@
1
- """
2
- Sitecustomize to patch pyairports module before any imports.
3
- This runs automatically for all Python processes.
4
- """
5
- import sys
6
- from types import ModuleType
7
-
8
- # Create fake pyairports module to satisfy outlines import
9
- # vLLM 0.5.5 imports outlines even when VLLM_USE_OUTLINES=0
10
- pyairports = ModuleType('pyairports')
11
- pyairports.airports = ModuleType('pyairports.airports')
12
- pyairports.airports.AIRPORT_LIST = []
13
- sys.modules['pyairports'] = pyairports
14
- sys.modules['pyairports.airports'] = pyairports.airports