Upload ComplexUNet for CIFAR-10 inpainting

Browse files

Files changed (3) hide show

README.md +94 -0
inpainting_transformer_weights.pth +3 -0
model.py +53 -0

README.md ADDED Viewed

	@@ -0,0 +1,94 @@

+---
+license: mit
+language: en
+library_name: pytorch
+tags:
+- image-inpainting
+- computer-vision
+- pytorch
+- unet
+- cifar-10
+datasets:
+- cifar10
+---
+# U-Net for Image Inpainting on CIFAR-10
+This repository contains a PyTorch implementation of a deep U-Net with Residual Blocks, trained to perform image inpainting on the CIFAR-10 dataset. The model takes an image with a masked (blacked-out) region and reconstructs the missing part.
+## Model Description
+The model is a `ComplexUNet` architecture, a variant of the standard U-Net. It features:
+- **Deeper Architecture**: 4 downsampling and 4 upsampling stages.
+- **Residual Blocks**: Each stage uses residual blocks instead of simple convolutional layers.
+- **Increased Width**: The model was trained with `base_channels=96`.
+- **Total Parameters**: 73,148,259
+## How to Use
+First, install the required libraries:
+```bash
+pip install torch torchvision numpy Pillow
+```
+Then, you can load the model and perform inpainting on an image tensor.
+```python
+import torch
+from torchvision import transforms as T
+from PIL import Image
+from model import ComplexUNet # Import the class from model.py
+# --- Setup ---
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Download the .pth file from the 'Files and versions' tab of this repo
+MODEL_PATH = "inpainting_model_larger.pth"
+# --- Load Model ---
+model = ComplexUNet(base_channels=96)
+model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
+model.to(DEVICE)
+model.eval()
+# --- Load and Preprocess Image ---
+# image = Image.open("your_image.png").convert("RGB")
+# For demonstration, let's create a dummy tensor
+transform = T.Compose([T.Resize((32, 32)), T.ToTensor()])
+# image_tensor = transform(image)
+image_tensor = torch.rand(3, 32, 32)
+# --- Create a Mask ---
+masked_tensor = image_tensor.clone()
+masked_tensor[:, 8:24, 8:24] = 0 # Example mask in the center
+# --- Perform Inpainting ---
+with torch.no_grad():
+    input_tensor = masked_tensor.unsqueeze(0).to(DEVICE)
+    reconstructed_tensor = model(input_tensor).squeeze(0).cpu()
+# 'reconstructed_tensor' now holds the inpainted image.
+from torchvision.transforms.functional import to_pil_image
+reconstructed_image = to_pil_image(reconstructed_tensor)
+reconstructed_image.save("reconstructed_image.png")
+print("Saved reconstructed_image.png")
+```
+## Training Data
+The model was trained on the **CIFAR-10** dataset.
+- **Preprocessing**: Images were used at their original **32x32 pixels** resolution.
+- **Augmentation**: For each training image, a random rectangular mask was applied.
+## Training Procedure
+- **Framework**: PyTorch
+- **Optimizer**: Adam
+- **Learning Rate**: 0.001
+- **Epochs**: 50
+- **Batch Size**: 128
+- **Loss Function**: Mean Squared Error (MSE)
+## Evaluation
+Evaluation metrics were not saved by the training script. To get PSNR and SSIM, please run the `evaluate_model` function from the training script.

inpainting_transformer_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f1afbf67c0f3de4261cb41d4083d238bbfd51d54f9ed9e67b7d3faa3183d2d
+size 7928446

model.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import torch.nn as nn
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        if in_channels != out_channels:
+            self.shortcut = nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), nn.BatchNorm2d(out_channels))
+        else:
+            self.shortcut = nn.Identity()
+    def forward(self, x):
+        residual = self.shortcut(x)
+        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
+        out = self.conv2(out); out = self.bn2(out)
+        out += residual
+        out = self.relu(out)
+        return out
+class ComplexUNet(nn.Module):
+    def __init__(self, base_channels=96): # Default to the trained architecture
+        super(ComplexUNet, self).__init__()
+        c = base_channels
+        self.pool = nn.MaxPool2d(2, 2)
+        self.enc1 = ResidualBlock(3, c)
+        self.enc2 = ResidualBlock(c, c*2)
+        self.enc3 = ResidualBlock(c*2, c*4)
+        self.enc4 = ResidualBlock(c*4, c*8)
+        self.bottleneck = ResidualBlock(c*8, c*16)
+        self.upconv1 = nn.ConvTranspose2d(c*16, c*8, kernel_size=2, stride=2)
+        self.upconv2 = nn.ConvTranspose2d(c*8, c*4, kernel_size=2, stride=2)
+        self.upconv3 = nn.ConvTranspose2d(c*4, c*2, kernel_size=2, stride=2)
+        self.upconv4 = nn.ConvTranspose2d(c*2, c, kernel_size=2, stride=2)
+        self.dec_conv1 = ResidualBlock(c*16, c*8)
+        self.dec_conv2 = ResidualBlock(c*8, c*4)
+        self.dec_conv3 = ResidualBlock(c*4, c*2)
+        self.dec_conv4 = ResidualBlock(c*2, c)
+        self.final_conv = nn.Conv2d(c, 3, kernel_size=1)
+    def forward(self, x):
+        e1 = self.enc1(x); p1 = self.pool(e1); e2 = self.enc2(p1); p2 = self.pool(e2)
+        e3 = self.enc3(p2); p3 = self.pool(e3); e4 = self.enc4(p3); p4 = self.pool(e4)
+        b = self.bottleneck(p4)
+        d1 = self.upconv1(b); d1 = torch.cat([d1, e4], dim=1); d1 = self.dec_conv1(d1)
+        d2 = self.upconv2(d1); d2 = torch.cat([d2, e3], dim=1); d2 = self.dec_conv2(d2)
+        d3 = self.upconv3(d2); d3 = torch.cat([d3, e2], dim=1); d3 = self.dec_conv3(d3)
+        d4 = self.upconv4(d3); d4 = torch.cat([d4, e1], dim=1); d4 = self.dec_conv4(d4)
+        out = self.final_conv(d4)
+        return torch.sigmoid(out)