54 lines
2 KiB
Python
54 lines
2 KiB
Python
import torch.nn as nn
|
|
|
|
class DogCatClassifier(nn.Module):
|
|
def __init__ (self):
|
|
super().__init__()
|
|
|
|
# 3 color (RGB) image, so tensor is of shape (B x 3 x H X W)
|
|
self.conv1 = nn.Sequential(
|
|
nn.Conv2d(3, 32, 3, padding = 1), # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32
|
|
nn.ReLU(inplace = True), # relu to add nonlinearity
|
|
nn.MaxPool2d(2), # reduces h and w of img by a factor of 2
|
|
nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167
|
|
)
|
|
|
|
# tensor size is now (B x 32 x h/2 x w/2)
|
|
|
|
self.conv2 = nn.Sequential(
|
|
nn.Conv2d(32, 64, 3, padding = 1), # 32 channels to 64 ch with 3x3 kernel
|
|
nn.ReLU(inplace = True),
|
|
nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
|
|
nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167
|
|
)
|
|
|
|
# tensor size is now (B x 64 x h/4 x w/4)
|
|
|
|
self.conv3 = nn.Sequential(
|
|
nn.Conv2d(64, 128, 3, padding = 1), # 64 channels to 128 ch with 3x3 kernel
|
|
nn.ReLU(inplace = True),
|
|
nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
|
|
nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167
|
|
)
|
|
|
|
# tensor size is now (B x 128 x h/8 x w/8)
|
|
|
|
self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol
|
|
self.dropout = 0.5 # tunable, removes half of the values and replaces them with 0s
|
|
self.fc2 = nn.Linear(512, 1) # 512 ch to 1 ch output
|
|
|
|
def forward(self, x):
|
|
x = self.conv1(x)
|
|
x = self.conv2(x)
|
|
x = self.conv3(x)
|
|
x = x.view(x.size(0), -1)
|
|
# reformats for use in linear layer
|
|
x = self.fc1(x)
|
|
x = nn.functional.relu(x) # relu to add nonlinearity
|
|
x = self.fc2(x)
|
|
x = nn.functional.sigmoid(x) # 1 / 1 + e ^(-x)
|
|
return x
|
|
|
|
|
|
|
|
|
|
|