🦶 Faster R-CNN Foot Detection Model

This model detects feet or shoes in an image using a fine-tuned Faster R-CNN model from Torchvision.

It was trained on a small custom dataset of foot annotations and is intended as a starting point for foot/shoe detection in street, fashion, or movement-based applications.

Web demo:

🧠 Model Details

Base model: fasterrcnn_resnet50_fpn (pretrained on COCO)
Fine-tuned on 40 images of feet/shoes
Class labels:
- 1: foot/shoe
Bounding box outputs with confidence scores
Optimized for CPU (but works with MPS and CUDA)

⚡️ Quick Start

To download this repository:

git clone https://github.com/tonyassi/FootDetection.git
cd FootDetection

Install:

pip install -r requirements.txt

Usage:

from FootDetection import FootDetection
from PIL import Image

# Initialize model (first run will auto-download weights)
foot_detection = FootDetection("cpu")  # "cuda" for GPU  or "mps" for Apple Silicon

# Load image
img = Image.open("image.jpg").convert("RGB")

# Run detection
results = foot_detection.detect(img, threshold=0.1)
print(results)

# Draw boxes
img_with_boxes = foot_detection.draw_boxes(img)
img_with_boxes.show()
img_with_boxes.save("annotated_image.jpg")

📦 Usage

pip install torch torchvision pillow huggingface_hub

import os
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image, ImageDraw
from torchvision.transforms import functional as F
from huggingface_hub import hf_hub_download

# ===== CONFIG =====
device = torch.device("cpu")  # or "mps" if stable
checkpoint_dir = "checkpoints"
checkpoint_file = "fasterrcnn_foot.pth"
local_path = os.path.join(checkpoint_dir, checkpoint_file)

# ===== Ensure Checkpoint Exists =====
if not os.path.exists(local_path):
    os.makedirs(checkpoint_dir, exist_ok=True)
    print("Downloading model from Hugging Face...")
    local_path = hf_hub_download(
        repo_id="tonyassi/foot-detection",
        filename=checkpoint_file,
        local_dir=checkpoint_dir
    )

# ===== Load Model =====
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
model.load_state_dict(torch.load(local_path, map_location=device))
model.to(device)
model.eval()

# ===== Function: Foot Detection =====
def foot_detection(image, threshold=0.1):
    """Takes a PIL image, returns bounding boxes + scores above threshold"""
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
    with torch.no_grad():
        outputs = model(image_tensor)[0]

    boxes = []
    scores = []
    for box, score in zip(outputs["boxes"], outputs["scores"]):
        if score >= threshold:
            boxes.append(box.tolist())
            scores.append(score.item())

    return {
        "boxes": boxes,
        "scores": scores
    }

# ===== Function: Draw Bounding Boxes =====
def draw_bounding_box(image, detection):
    """Draws boxes and scores on a copy of the image"""
    image_copy = image.copy()
    draw = ImageDraw.Draw(image_copy)

    for box, score in zip(detection["boxes"], detection["scores"]):
        x0, y0, x1, y1 = box
        draw.rectangle([x0, y0, x1, y1], outline="red", width=3)
        draw.text((x0, y0), f"{score:.2f}", fill="red")

    return image_copy


from PIL import Image

# ==== Load and prepare image ====
image_path = "test.jpg"  # replace with your image path
image = Image.open(image_path).convert("RGB")

# ==== Run detection ====
detections = foot_detection(image, threshold=0.3)

# ==== Draw results ====
result_image = draw_bounding_box(image, detections)
result_image.show()  # or result_image.save("output.jpg")

tonyassi
/

foot-detection

🦶 Faster R-CNN Foot Detection Model

🧠 Model Details

⚡️ Quick Start

📦 Usage

Space using tonyassi/foot-detection 1

Evaluation results