more documentation

This commit is contained in:
Artem 2025-03-26 15:39:22 -04:00
parent c72a00dc0d
commit 7d9454b288
3 changed files with 46 additions and 40 deletions

View file

@ -4,40 +4,48 @@ class DogCatClassifier(nn.Module):
def __init__ (self): def __init__ (self):
super().__init__() super().__init__()
# 3 color (RGB) image, so tensor is of shape (B x 3 x H X W)
self.conv1 = nn.Sequential( self.conv1 = nn.Sequential(
nn.Conv2d(3, 32, 3, padding = 1), nn.Conv2d(3, 32, 3, padding = 1), # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32
nn.ReLU(inplace = True), nn.ReLU(inplace = True), # relu to add nonlinearity
nn.MaxPool2d(2), nn.MaxPool2d(2), # reduces h and w of img by a factor of 2
nn.BatchNorm2d(32) nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167
) )
# tensor size is now (B x 32 x h/2 x w/2)
self.conv2 = nn.Sequential( self.conv2 = nn.Sequential(
nn.Conv2d(32, 64, 3, padding = 1), nn.Conv2d(32, 64, 3, padding = 1), # 32 channels to 64 ch with 3x3 kernel
nn.ReLU(inplace = True), nn.ReLU(inplace = True),
nn.MaxPool2d(2), nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
nn.BatchNorm2d(64) nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167
) )
# tensor size is now (B x 64 x h/4 x w/4)
self.conv3 = nn.Sequential( self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, padding = 1), nn.Conv2d(64, 128, 3, padding = 1), # 64 channels to 128 ch with 3x3 kernel
nn.ReLU(inplace = True), nn.ReLU(inplace = True),
nn.MaxPool2d(2), nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
nn.BatchNorm2d(128) nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167
) )
# tensor size is now (B x 128 x h/8 x w/8)
self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol
self.dropout = 0.5 # tunable self.dropout = 0.5 # tunable, removes half of the values and replaces them with 0s
self.fc2 = nn.Linear(512, 1) self.fc2 = nn.Linear(512, 1) # 512 ch to 1 ch output
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.conv1(x)
x = self.conv2(x) x = self.conv2(x)
x = self.conv3(x) x = self.conv3(x)
x = x.view(x.size(0), -1) x = x.view(x.size(0), -1)
# reformats for use in linear layer
x = self.fc1(x) x = self.fc1(x)
x = nn.functional.relu(x) x = nn.functional.relu(x) # relu to add nonlinearity
x = self.fc2(x) x = self.fc2(x)
x = nn.functional.sigmoid(x) x = nn.functional.sigmoid(x) # 1 / 1 + e ^(-x)
return x return x

16
test.py
View file

@ -4,23 +4,21 @@ from torch.utils.data import DataLoader
from dogs_cats_ds import DogCatDataset from dogs_cats_ds import DogCatDataset
from model import DogCatClassifier from model import DogCatClassifier
from consts import TEST_DATA from consts import TEST_DATA
import torch.optim as optim
def test(model: nn.Module, test_loader: DataLoader, criterion, device): def test(model: nn.Module, test_loader: DataLoader, criterion, device):
model.eval() model.eval()
test_loss = 0 test_loss = 0
correct = 0 correct = 0
total = 0 total = 0
with torch.no_grad(): with torch.no_grad(): # do not update gradients
for img, lab in test_loader: for img, lab in test_loader:
img, lab = img.to(device), lab.to(device).float().view(-1, 1) img, lab = img.to(device), lab.to(device).float().view(-1, 1) # similar to how we did it in train, offset both to a gpu for better perf
out = model(img) out = model(img)
loss = criterion(out, lab) loss = criterion(out, lab) # evaluate loss
test_loss += loss.item() test_loss += loss.item()
pred = (out > 0.5).float() pred = (out > 0.5).float() #same as with train, if its < 0.5 return 0 else 1
total += lab.size(0) total += lab.size(0) # total is increased by batch size
correct += (pred == lab).sum().item() correct += (pred == lab).sum().item() # correct only += 1 if the prediction matches the label
print(f'test loss: {test_loss / len(test_loader):.4f}, test_acc: {100*correct/total:.2f}%') print(f'test loss: {test_loss / len(test_loader):.4f}, test_acc: {100*correct/total:.2f}%')
model.train() model.train()
@ -33,6 +31,6 @@ if __name__ == "__main__":
dog_test_loader = DataLoader(dog_test_dataset, batch_size = 32, shuffle = False) # since its test, bad to shuffle dog_test_loader = DataLoader(dog_test_dataset, batch_size = 32, shuffle = False) # since its test, bad to shuffle
model = DogCatClassifier() model = DogCatClassifier()
criterion = nn.BCELoss() criterion = nn.BCELoss()
model.load_state_dict(torch.load('dog_cat_classifier.pth', map_location = device, weights_only = True)) model.load_state_dict(torch.load('dog_cat_classifier.pth', map_location = device, weights_only = True)) # loads what we trained in train.py
test(model, dog_test_loader, criterion, device) test(model, dog_test_loader, criterion, device)

View file

@ -1,33 +1,33 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import DataLoader, Dataset from torch.utils.data import DataLoader
from consts import TRAIN_DATA from consts import TRAIN_DATA
from tqdm import tqdm from tqdm import tqdm
from model import DogCatClassifier from model import DogCatClassifier
from dogs_cats_ds import DogCatDataset from dogs_cats_ds import DogCatDataset
def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs): def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs):
model.to(device) model.to(device) # send to gpu if there is one, otherwise toss it over to cpu
model.train() model.train() #train mode means that all gradients are active and modifiable
for epoch in tqdm(range(epochs)): for epoch in tqdm(range(epochs)): # wrapper around for loop to add a nice progress bar
running_loss = 0.0 running_loss = 0.0 # start the loss, amount of cats and dogs we guess correctly, and complete samples at 0 (float 0 in case of loss since it can be a float)
correct = 0 correct = 0
total = 0 total = 0
for i, (img, lab) in enumerate(train_loader): for i, (img, lab) in enumerate(train_loader): # for each image, label pair in the dataset
img, lab = img.to(device), lab.to(device).float().view(-1, 1) img, lab = img.to(device), lab.to(device).float().view(-1, 1) # send the image and label to the gpu if there is one else send to cpu, .view(-1, 1) returns the same tensor data but with the shape of the last dimension
optimizer.zero_grad() optimizer.zero_grad() # resets gradients to zero when we initialize.
out = model(img) out = model(img) # outputs are the results of our model on the image (sigmoid)
loss = criterion(out, lab) loss = criterion(out, lab) # loss is difference between expected and real label from prediction
loss.backward() loss.backward() # backprop using autograd
optimizer.step() optimizer.step() # update optimizer
running_loss += loss.item() running_loss += loss.item() # loss in epoch updated with loss
pred = (out > 0.5).float() pred = (out > 0.5).float() # prediction is 0 if less than 0.5 else 1
total += lab.size(0) total += lab.size(0) #total samples is increased by the 0th dim of the tensor(batch size)
correct += (pred == lab).sum().item() correct += (pred == lab).sum().item() # only add 1 to the correct count if the actual label (dog) = the predicted label(dog)
if (i + 1) % 50: if (i + 1) % 50:
print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%') print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%')
@ -47,6 +47,6 @@ if __name__ == "__main__":
print(model) print(model)
train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10) train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10)
torch.save(model.state_dict(), 'dog_cat_classifier.pth') torch.save(model.state_dict(), 'dog_cat_classifier.pth') # saves model to pth file
print('done w train, model saved') print('done w train, model saved')