|
--- |
|
license: mit |
|
language: |
|
- en |
|
metrics: |
|
- accuracy |
|
tags: |
|
- code |
|
--- |
|
|
|
|
|
# ResNet Cat-Dog Classifier |
|
|
|
This repository contains a ResNet-based convolutional neural network trained to classify images as either cats or dogs. The model achieves an accuracy of 90.27% on a test dataset and is fine-tuned using transfer learning on the ImageNet dataset. It uses PyTorch for training and inference. |
|
|
|
## Model Details |
|
|
|
### Architecture: |
|
- Backbone: ResNet-18 |
|
- Input Size: 128x128 RGB images |
|
- Output: Binary classification (Cat or Dog) |
|
|
|
### Training Details: |
|
- Dataset: Kaggle Cats and Dogs dataset |
|
- Loss Function: Cross-entropy loss |
|
- Optimizer: Adam optimizer |
|
- Learning Rate: 0.001 |
|
- Epochs: 15 |
|
- Batch Size: 32 |
|
|
|
### Performance: |
|
- Accuracy: 90.27% on test images |
|
- Training Time: Approximately 1 hour on NVIDIA RTX 3050 Ti |
|
|
|
|
|
## Results: |
|
 |
|
## Usage |
|
|
|
### Installation: |
|
- Dependencies: PyTorch, TorchVision, matplotlib |
|
|
|
|
|
|
|
### Inference: |
|
```python |
|
import torch |
|
from torchvision.models import resnet18 |
|
from PIL import Image |
|
import torchvision.transforms as transforms |
|
import matplotlib.pyplot as plt |
|
model = resnet18(pretrained=False) |
|
num_ftrs = model.fc.in_features |
|
model.fc = torch.nn.Linear(num_ftrs, 2) |
|
|
|
# Load the trained model state_dict |
|
model_path = 'cat_dog_classifier.pth' |
|
model.load_state_dict(torch.load(model_path)) |
|
model.eval() |
|
|
|
<!-- ResNet( |
|
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) |
|
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) |
|
(layer1): Sequential( |
|
(0): BasicBlock( |
|
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
(1): BasicBlock( |
|
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(layer2): Sequential( |
|
(0): BasicBlock( |
|
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(downsample): Sequential( |
|
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) |
|
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(1): BasicBlock( |
|
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(layer3): Sequential( |
|
(0): BasicBlock( |
|
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(downsample): Sequential( |
|
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) |
|
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(1): BasicBlock( |
|
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(layer4): Sequential( |
|
(0): BasicBlock( |
|
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(downsample): Sequential( |
|
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) |
|
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(1): BasicBlock( |
|
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
(relu): ReLU(inplace=True) |
|
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) |
|
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) |
|
) |
|
) |
|
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1)) |
|
(fc): Linear(in_features=512, out_features=2, bias=True) |
|
) |
|
--> |
|
# Define the transformation (ensure it matches the training preprocessing) |
|
transform = transforms.Compose([ |
|
transforms.Resize((128, 128)), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
|
]) |
|
|
|
def load_image(image_path): |
|
image = Image.open(image_path) |
|
image = transform(image) |
|
image = image.unsqueeze(0) # Add batch dimension |
|
return image |
|
|
|
def predict_image(model, image_path): |
|
image = load_image(image_path) |
|
model.eval() |
|
with torch.no_grad(): |
|
outputs = model(image) |
|
_, predicted = torch.max(outputs, 1) |
|
return "Cat" if predicted.item() == 0 else "Dog" |
|
|
|
def plot_image(image_path, prediction): |
|
image = Image.open(image_path) |
|
plt.imshow(image) |
|
plt.title(f'Predicted: {prediction}') |
|
plt.axis('off') |
|
plt.show() |
|
|
|
# Example usage |
|
image_path = "path.jpeg" |
|
prediction = predict_image(model, image_path) |
|
print(f'The predicted class for the image is: {prediction}') |
|
plot_image(image_path, prediction) |
|
The predicted class for the image is: Cat |