Spaces:
Running
Running
Update Dockerfile
Browse files- Dockerfile +36 -15
Dockerfile
CHANGED
@@ -4,34 +4,55 @@ FROM python:3.10-slim
|
|
4 |
# Set the working directory in the container
|
5 |
WORKDIR /app
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# Copy the requirements file into the container at /app
|
8 |
COPY requirements.txt .
|
9 |
|
10 |
-
# Install any needed system dependencies (if any - bs4, requests usually don't need much)
|
11 |
-
# RUN apt-get update && apt-get install -y --no-install-recommends some-package && rm -rf /var/lib/apt/lists/*
|
12 |
-
# For this bot, we likely don't need extra apt packages currently.
|
13 |
-
|
14 |
# Install Python dependencies
|
15 |
# Using --no-cache-dir reduces image size slightly
|
16 |
RUN pip install --no-cache-dir -r requirements.txt
|
17 |
|
18 |
-
#
|
19 |
-
# Needs to run *after* pip install and requires system dependencies installed by playwright itself
|
20 |
-
# Using --with-deps installs necessary browser dependencies like fonts, libs etc.
|
21 |
RUN crawl4ai-setup
|
22 |
-
RUN python -m playwright install --with-deps chromium
|
23 |
|
24 |
# Copy the rest of the application code into the container at /app
|
25 |
COPY . .
|
26 |
|
27 |
-
# Make port
|
28 |
-
#
|
29 |
-
|
|
|
30 |
|
31 |
# Define environment variable to ensure Python output is sent straight to logs
|
32 |
ENV PYTHONUNBUFFERED=1
|
33 |
|
34 |
-
# Command to run the application using Gunicorn
|
35 |
-
#
|
36 |
-
|
37 |
-
CMD gunicorn -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:${PORT:-7860} main:app
|
|
|
4 |
# Set the working directory in the container
|
5 |
WORKDIR /app
|
6 |
|
7 |
+
# Install system dependencies required by Playwright/crawl4ai
|
8 |
+
# Based on Playwright's recommendations for Debian/Ubuntu
|
9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
10 |
+
# Base dependencies
|
11 |
+
wget \
|
12 |
+
gnupg \
|
13 |
+
# Playwright browser dependencies (Chromium)
|
14 |
+
libnss3 \
|
15 |
+
libatk1.0-0 \
|
16 |
+
libatk-bridge2.0-0 \
|
17 |
+
libcups2 \
|
18 |
+
libdrm2 \
|
19 |
+
libdbus-1-3 \
|
20 |
+
libxkbcommon0 \
|
21 |
+
libatspi2.0-0 \
|
22 |
+
libx11-6 \
|
23 |
+
libxcomposite1 \
|
24 |
+
libxdamage1 \
|
25 |
+
libxext6 \
|
26 |
+
libxfixes3 \
|
27 |
+
libxrandr2 \
|
28 |
+
libgbm1 \
|
29 |
+
libpango-1.0-0 \
|
30 |
+
libcairo2 \
|
31 |
+
libasound2 \
|
32 |
+
# Clean up
|
33 |
+
&& rm -rf /var/lib/apt/lists/*
|
34 |
+
|
35 |
# Copy the requirements file into the container at /app
|
36 |
COPY requirements.txt .
|
37 |
|
|
|
|
|
|
|
|
|
38 |
# Install Python dependencies
|
39 |
# Using --no-cache-dir reduces image size slightly
|
40 |
RUN pip install --no-cache-dir -r requirements.txt
|
41 |
|
42 |
+
# Run crawl4ai post-installation setup (installs browser binaries)
|
|
|
|
|
43 |
RUN crawl4ai-setup
|
|
|
44 |
|
45 |
# Copy the rest of the application code into the container at /app
|
46 |
COPY . .
|
47 |
|
48 |
+
# Make port specified by Hugging Face (usually 7860 or 8080) available
|
49 |
+
# Use PORT environment variable, default to 7860 if not set
|
50 |
+
ARG PORT=7860
|
51 |
+
EXPOSE ${PORT}
|
52 |
|
53 |
# Define environment variable to ensure Python output is sent straight to logs
|
54 |
ENV PYTHONUNBUFFERED=1
|
55 |
|
56 |
+
# Command to run the application using Gunicorn
|
57 |
+
# Listen on all interfaces (0.0.0.0) on the port specified by HF
|
58 |
+
CMD ["gunicorn", "--conf", "gunicorn.conf.py", "main:app"]
|
|