Spaces:

plarnholt
/

excom-ai-demo

Paused

Peter Larnholt commited on Oct 9

Commit

e48919a

1 Parent(s): 275a99c

Upgrade to vLLM 0.6.3.post1 and remove pyairports workarounds

- Upgrade vllm from 0.5.5 to 0.6.3.post1 (stable version with proper dependency management)
- Upgrade torch from 2.4.0 to 2.5.0 (CUDA 12.1 compatible)
- Remove all pyairports/outlines workarounds (handled natively by newer vLLM)
- Delete sitecustomize.py (no longer needed)
- Simplify Dockerfile (remove cache directories and patches)
- Clean up app.py (remove VLLM_USE_OUTLINES environment variable manipulation)

Files changed (4) hide show

Dockerfile +1 -11
app.py +1 -5
requirements.txt +2 -6
sitecustomize.py +0 -14

Dockerfile CHANGED Viewed

@@ -3,11 +3,7 @@ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive \
     PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=1 \
-    HF_HUB_ENABLE_HF_TRANSFER=1 \
-    # Give numba/outlines a writable cache in Spaces runtime
-    NUMBA_CACHE_DIR=/tmp/numba_cache \
-    OUTLINES_CACHE_DIR=/tmp/outlines_cache
-    # If issues persist, add: NUMBA_DISABLE_FILE_SYSTEM_CACHING=1
 RUN apt-get update && apt-get install -y python3 python3-pip git && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
@@ -15,12 +11,6 @@ WORKDIR /app
 COPY requirements.txt /app/
 RUN python3 -m pip install --upgrade pip && pip3 install -r requirements.txt
-# Copy sitecustomize.py to Python's site-packages to patch pyairports import globally
-COPY sitecustomize.py /usr/local/lib/python3.10/dist-packages/
-# ensure caches exist & are writable in Spaces container
-RUN mkdir -p /tmp/numba_cache /tmp/outlines_cache && chmod -R 777 /tmp/numba_cache /tmp/outlines_cache
 COPY app.py /app/
 # Spaces exposes the app on $PORT

 ENV DEBIAN_FRONTEND=noninteractive \
     PYTHONUNBUFFERED=1 \
     PIP_NO_CACHE_DIR=1 \
+    HF_HUB_ENABLE_HF_TRANSFER=1
 RUN apt-get update && apt-get install -y python3 python3-pip git && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY requirements.txt /app/
 RUN python3 -m pip install --upgrade pip && pip3 install -r requirements.txt
 COPY app.py /app/
 # Spaces exposes the app on $PORT

app.py CHANGED Viewed

@@ -10,8 +10,6 @@ import os, time, threading, subprocess, requests
 from fastapi import FastAPI, Request, Response
 import gradio as gr
-os.environ["VLLM_USE_OUTLINES"] = "0"   # turn off outlines (pyairports patched via sitecustomize.py)
 MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-14B-Instruct-AWQ")
 API_PORT = int(os.environ.get("API_PORT", "8000"))  # vLLM internal port
 SYSTEM_PROMPT = os.environ.get(
@@ -34,9 +32,7 @@ if "AWQ" in MODEL_ID.upper():
 def launch_vllm():
     print(f"[vLLM] Launch: {MODEL_ID}")
-    env = os.environ.copy()
-    env["VLLM_USE_OUTLINES"] = "0"  # disable outlines
-    subprocess.Popen(VLLM_ARGS, env=env)
 def wait_vllm_ready(timeout=900, interval=3):
     url = f"http://127.0.0.1:{API_PORT}/v1/models"

 from fastapi import FastAPI, Request, Response
 import gradio as gr
 MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-14B-Instruct-AWQ")
 API_PORT = int(os.environ.get("API_PORT", "8000"))  # vLLM internal port
 SYSTEM_PROMPT = os.environ.get(
 def launch_vllm():
     print(f"[vLLM] Launch: {MODEL_ID}")
+    subprocess.Popen(VLLM_ARGS)
 def wait_vllm_ready(timeout=900, interval=3):
     url = f"http://127.0.0.1:{API_PORT}/v1/models"

requirements.txt CHANGED Viewed

@@ -4,12 +4,8 @@ gradio>=4.38
 requests>=2.31
 # vLLM + CUDA 12.1
-vllm==0.5.5
 --extra-index-url https://download.pytorch.org/whl/cu121
-torch==2.4.0
 transformers>=4.44
 accelerate>=0.30
-# Structured outputs stack used by vLLM
-# outlines is imported by vLLM 0.5.5 even with VLLM_USE_OUTLINES=0
-# We skip outlines dependencies and will patch the import

 requests>=2.31
 # vLLM + CUDA 12.1
+vllm==0.6.3.post1
 --extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.5.0
 transformers>=4.44
 accelerate>=0.30

sitecustomize.py DELETED Viewed

@@ -1,14 +0,0 @@
-"""
-Sitecustomize to patch pyairports module before any imports.
-This runs automatically for all Python processes.
-"""
-import sys
-from types import ModuleType
-# Create fake pyairports module to satisfy outlines import
-# vLLM 0.5.5 imports outlines even when VLLM_USE_OUTLINES=0
-pyairports = ModuleType('pyairports')
-pyairports.airports = ModuleType('pyairports.airports')
-pyairports.airports.AIRPORT_LIST = []
-sys.modules['pyairports'] = pyairports
-sys.modules['pyairports.airports'] = pyairports.airports