# Use an official Python runtime as a parent image FROM python:3.10-slim # Set the working directory in the container WORKDIR /app # Install system dependencies required by Playwright/crawl4ai # Based on Playwright's recommendations for Debian/Ubuntu RUN apt-get update && apt-get install -y --no-install-recommends \ # Base dependencies wget \ gnupg \ # Playwright browser dependencies (Chromium) libnss3 \ libatk1.0-0 \ libatk-bridge2.0-0 \ libcups2 \ libdrm2 \ libdbus-1-3 \ libxkbcommon0 \ libatspi2.0-0 \ libx11-6 \ libxcomposite1 \ libxdamage1 \ libxext6 \ libxfixes3 \ libxrandr2 \ libgbm1 \ libpango-1.0-0 \ libcairo2 \ libasound2 \ # Clean up && rm -rf /var/lib/apt/lists/* # Copy the requirements file into the container at /app COPY requirements.txt . # Install Python dependencies # Using --no-cache-dir reduces image size slightly RUN pip install --no-cache-dir -r requirements.txt # Run crawl4ai post-installation setup (installs browser binaries) RUN crawl4ai-setup # Copy the rest of the application code into the container at /app COPY . . # Make port specified by Hugging Face (usually 7860 or 8080) available # Use PORT environment variable, default to 7860 if not set ARG PORT=7860 EXPOSE ${PORT} # Define environment variable to ensure Python output is sent straight to logs ENV PYTHONUNBUFFERED=1 # Command to run the application using Gunicorn # Listen on all interfaces (0.0.0.0) on the port specified by HF CMD ["gunicorn", "--conf", "gunicorn.conf.py", "main:app"]