# Use an official Python runtime as a parent image FROM python:3.10-slim # Set the working directory in the container WORKDIR /app # Install system dependencies needed by Playwright browsers # Reference: https://playwright.dev/docs/docker#python # Using apt-get update before install and cleaning up reduces image size RUN apt-get update && apt-get install -y --no-install-recommends \ libnss3 \ libnspr4 \ libdbus-glib-1-2 \ libatk1.0-0 \ libatk-bridge2.0-0 \ libcups2 \ libdrm2 \ libatspi2.0-0 \ libxcomposite1 \ libxdamage1 \ libxfixes3 \ libxrandr2 \ libgbm1 \ libpango-1.0-0 \ libcairo2 \ libasound2 \ libxshmfence1 \ && rm -rf /var/lib/apt/lists/* # Copy the requirements file into the container at /app COPY requirements.txt . # Install Python dependencies # Using --no-cache-dir reduces image size slightly RUN pip install --no-cache-dir -r requirements.txt # Install Playwright browsers system-wide WITH dependencies # This needs to happen *after* pip install crawl4ai (which depends on playwright) # Using --with-deps installs necessary OS libraries if needed (though we added common ones above) RUN playwright install --with-deps chromium firefox webkit # Copy the rest of the application code into the container at /app COPY . . # Make port 7860 available to the world outside this container # Hugging Face Spaces typically expect apps to run on port 7860 EXPOSE 7860 # Define environment variable to ensure Python output is sent straight to logs ENV PYTHONUNBUFFERED=1 # Set PLAYWRIGHT_BROWSERS_PATH to use the system-wide install ENV PLAYWRIGHT_BROWSERS_PATH=/root/.cache/ms-playwright # Command to run the application using Gunicorn # Use the gunicorn config file for settings like port, workers, timeout CMD ["gunicorn", "-c", "gunicorn.conf.py", "main:app"]