fmab777 commited on
Commit
5999508
·
verified ·
1 Parent(s): d8e5fbc

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +36 -15
Dockerfile CHANGED
@@ -4,34 +4,55 @@ FROM python:3.10-slim
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  # Copy the requirements file into the container at /app
8
  COPY requirements.txt .
9
 
10
- # Install any needed system dependencies (if any - bs4, requests usually don't need much)
11
- # RUN apt-get update && apt-get install -y --no-install-recommends some-package && rm -rf /var/lib/apt/lists/*
12
- # For this bot, we likely don't need extra apt packages currently.
13
-
14
  # Install Python dependencies
15
  # Using --no-cache-dir reduces image size slightly
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
- # Install crawl4ai dependencies (Playwright browsers)
19
- # Needs to run *after* pip install and requires system dependencies installed by playwright itself
20
- # Using --with-deps installs necessary browser dependencies like fonts, libs etc.
21
  RUN crawl4ai-setup
22
- RUN python -m playwright install --with-deps chromium
23
 
24
  # Copy the rest of the application code into the container at /app
25
  COPY . .
26
 
27
- # Make port defined by PORT env var (default 7860) available
28
- # Hugging Face Spaces typically expect apps to run on port 7860
29
- EXPOSE ${PORT:-7860}
 
30
 
31
  # Define environment variable to ensure Python output is sent straight to logs
32
  ENV PYTHONUNBUFFERED=1
33
 
34
- # Command to run the application using Gunicorn (SHELL FORM for variable expansion)
35
- # It will run the Starlette 'app' object found in the 'main' module (main.py)
36
- # Listen on all interfaces (0.0.0.0) on the port specified by HF/env var (usually 7860)
37
- CMD gunicorn -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:${PORT:-7860} main:app
 
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
+ # Install system dependencies required by Playwright/crawl4ai
8
+ # Based on Playwright's recommendations for Debian/Ubuntu
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ # Base dependencies
11
+ wget \
12
+ gnupg \
13
+ # Playwright browser dependencies (Chromium)
14
+ libnss3 \
15
+ libatk1.0-0 \
16
+ libatk-bridge2.0-0 \
17
+ libcups2 \
18
+ libdrm2 \
19
+ libdbus-1-3 \
20
+ libxkbcommon0 \
21
+ libatspi2.0-0 \
22
+ libx11-6 \
23
+ libxcomposite1 \
24
+ libxdamage1 \
25
+ libxext6 \
26
+ libxfixes3 \
27
+ libxrandr2 \
28
+ libgbm1 \
29
+ libpango-1.0-0 \
30
+ libcairo2 \
31
+ libasound2 \
32
+ # Clean up
33
+ && rm -rf /var/lib/apt/lists/*
34
+
35
  # Copy the requirements file into the container at /app
36
  COPY requirements.txt .
37
 
 
 
 
 
38
  # Install Python dependencies
39
  # Using --no-cache-dir reduces image size slightly
40
  RUN pip install --no-cache-dir -r requirements.txt
41
 
42
+ # Run crawl4ai post-installation setup (installs browser binaries)
 
 
43
  RUN crawl4ai-setup
 
44
 
45
  # Copy the rest of the application code into the container at /app
46
  COPY . .
47
 
48
+ # Make port specified by Hugging Face (usually 7860 or 8080) available
49
+ # Use PORT environment variable, default to 7860 if not set
50
+ ARG PORT=7860
51
+ EXPOSE ${PORT}
52
 
53
  # Define environment variable to ensure Python output is sent straight to logs
54
  ENV PYTHONUNBUFFERED=1
55
 
56
+ # Command to run the application using Gunicorn
57
+ # Listen on all interfaces (0.0.0.0) on the port specified by HF
58
+ CMD ["gunicorn", "--conf", "gunicorn.conf.py", "main:app"]