🦶 Faster R-CNN Foot Detection Model
by Tony Assi
This model detects feet or shoes in an image using a fine-tuned Faster R-CNN model from Torchvision.
It was trained on a small custom dataset of foot annotations and is intended as a starting point for foot/shoe detection in street, fashion, or movement-based applications.
🧠 Model Details
- Base model:
fasterrcnn_resnet50_fpn
(pretrained on COCO) - Fine-tuned on 40 images of feet/shoes
- Class labels:
1
: foot/shoe
- Bounding box outputs with confidence scores
- Optimized for CPU (but works with MPS and CUDA)
⚡️ Quick Start
To download this repository:
git clone https://github.com/tonyassi/FootDetection.git
cd FootDetection
Install:
pip install -r requirements.txt
Usage:
from FootDetection import FootDetection
from PIL import Image
# Initialize model (first run will auto-download weights)
foot_detection = FootDetection("cpu") # "cuda" for GPU or "mps" for Apple Silicon
# Load image
img = Image.open("image.jpg").convert("RGB")
# Run detection
results = foot_detection.detect(img, threshold=0.1)
print(results)
# Draw boxes
img_with_boxes = foot_detection.draw_boxes(img)
img_with_boxes.show()
img_with_boxes.save("annotated_image.jpg")
📦 Usage
pip install torch torchvision pillow huggingface_hub
import os
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image, ImageDraw
from torchvision.transforms import functional as F
from huggingface_hub import hf_hub_download
# ===== CONFIG =====
device = torch.device("cpu") # or "mps" if stable
checkpoint_dir = "checkpoints"
checkpoint_file = "fasterrcnn_foot.pth"
local_path = os.path.join(checkpoint_dir, checkpoint_file)
# ===== Ensure Checkpoint Exists =====
if not os.path.exists(local_path):
os.makedirs(checkpoint_dir, exist_ok=True)
print("Downloading model from Hugging Face...")
local_path = hf_hub_download(
repo_id="tonyassi/foot-detection",
filename=checkpoint_file,
local_dir=checkpoint_dir
)
# ===== Load Model =====
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
model.load_state_dict(torch.load(local_path, map_location=device))
model.to(device)
model.eval()
# ===== Function: Foot Detection =====
def foot_detection(image, threshold=0.1):
"""Takes a PIL image, returns bounding boxes + scores above threshold"""
image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
with torch.no_grad():
outputs = model(image_tensor)[0]
boxes = []
scores = []
for box, score in zip(outputs["boxes"], outputs["scores"]):
if score >= threshold:
boxes.append(box.tolist())
scores.append(score.item())
return {
"boxes": boxes,
"scores": scores
}
# ===== Function: Draw Bounding Boxes =====
def draw_bounding_box(image, detection):
"""Draws boxes and scores on a copy of the image"""
image_copy = image.copy()
draw = ImageDraw.Draw(image_copy)
for box, score in zip(detection["boxes"], detection["scores"]):
x0, y0, x1, y1 = box
draw.rectangle([x0, y0, x1, y1], outline="red", width=3)
draw.text((x0, y0), f"{score:.2f}", fill="red")
return image_copy
from PIL import Image
# ==== Load and prepare image ====
image_path = "test.jpg" # replace with your image path
image = Image.open(image_path).convert("RGB")
# ==== Run detection ====
detections = foot_detection(image, threshold=0.3)
# ==== Draw results ====
result_image = draw_bounding_box(image, detections)
result_image.show() # or result_image.save("output.jpg")
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support