From b723f0f6b4170962b0fc70f2d9d99e5a6536e65d Mon Sep 17 00:00:00 2001 From: Artem Date: Tue, 1 Apr 2025 16:41:45 -0400 Subject: [PATCH] need to do test then we done --- dogs_cats_ds.py | 2 +- model.py | 54 ++++++++++++++++-------------------------- test.py | 23 +++++++++++++++++- train.py | 63 +++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 93 insertions(+), 49 deletions(-) diff --git a/dogs_cats_ds.py b/dogs_cats_ds.py index 1621805..4ca7994 100644 --- a/dogs_cats_ds.py +++ b/dogs_cats_ds.py @@ -1,6 +1,6 @@ from torch.utils.data import Dataset class DogCatDataset(Dataset): - def __init__(self, ds, dog=[5], cat = [3]): + def __init__(self, ds:Dataset, dog=[5], cat = [3]): self.ds = ds self.idx = [] for i in range(len(ds)): diff --git a/model.py b/model.py index 153508f..d0124c0 100644 --- a/model.py +++ b/model.py @@ -5,50 +5,36 @@ class DogCatClassifier(nn.Module): super().__init__() # 3 color (RGB) image, so tensor is of shape (B x 3 x H X W) - self.conv1 = nn.Sequential( - nn.Conv2d(3, 32, 3, padding = 1), # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32 - nn.ReLU(inplace = True), # relu to add nonlinearity - nn.MaxPool2d(2), # reduces h and w of img by a factor of 2 - nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167 - ) + # if we take a look, we can see that the images are of size 32 * 32 if we look at them in a file explorer, so our H and W are 32 in this case + self.conv1 = nn.Conv2d(3, 32, 3, padding = 1) # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32 + self.relu = nn.ReLU(inplace = True) # relu to add nonlinearity + self.mp = nn.MaxPool2d(2) # reduces h and w of img by a factor of 2 + self.bn1 = nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167 - # tensor size is now (B x 32 x h/2 x w/2) + # tensor size is now (B x 32 x 32/2 = 16 x 32/2 = 16) - self.conv2 = nn.Sequential( - nn.Conv2d(32, 64, 3, padding = 1), # 32 channels to 64 ch with 3x3 kernel - nn.ReLU(inplace = True), - nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2 - nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167 - ) + self.conv2 = nn.Conv2d(32, 64, 3, padding = 1) # 32 channels to 64 ch with 3x3 kernel + self.bn2 = nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167 - # tensor size is now (B x 64 x h/4 x w/4) + # tensor size is now (B x 64 x 32/4 = 8 x 32/4 = 8) - self.conv3 = nn.Sequential( - nn.Conv2d(64, 128, 3, padding = 1), # 64 channels to 128 ch with 3x3 kernel - nn.ReLU(inplace = True), - nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2 - nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167 - ) + self.conv3 = nn.Conv2d(64, 128, 3, padding = 1) # 64 channels to 128 ch with 3x3 kernelnn + self.bn3= nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167 - # tensor size is now (B x 128 x h/8 x w/8) - - self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol + # tensor size is now (B x 128 x 32/8 = 4 x 32/8 = 4) + # basically, we have B batches, 128 channels, and a 4x4 pixel representation of our initial image + self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048 feats (ch x h x w), lowkey had to calculator it lol self.dropout = 0.5 # tunable, removes half of the values and replaces them with 0s - self.fc2 = nn.Linear(512, 1) # 512 ch to 1 ch output - + self.fc2 = nn.Linear(512, 1) # 512 feats to 1 scalar output + def forward(self, x): - x = self.conv1(x) - x = self.conv2(x) - x = self.conv3(x) - x = x.view(x.size(0), -1) + x = self.bn1(self.mp(self.relu(self.conv1(x)))) + x = self.bn2(self.mp(self.relu(self.conv2(x)))) + x = self.bn3(self.mp(self.relu(self.conv3(x)))) + x = x.view(x.size(0), -1) # see model_ez for why we do this before linear layers # reformats for use in linear layer x = self.fc1(x) x = nn.functional.relu(x) # relu to add nonlinearity x = self.fc2(x) x = nn.functional.sigmoid(x) # 1 / 1 + e ^(-x) return x - - - - - diff --git a/test.py b/test.py index 4225af0..415b71c 100644 --- a/test.py +++ b/test.py @@ -5,11 +5,31 @@ from dogs_cats_ds import DogCatDataset from model import DogCatClassifier from consts import TEST_DATA -def test(model: nn.Module, test_loader: DataLoader, criterion, device): + +def test(model: nn.Module, test_images: torch.Tensor, test_labels: torch.Tensor, criterion): model.eval() test_loss = 0 correct = 0 total = 0 + with torch.no_grad(): + for img in test_images: + for label in test_labels: + out = model(img) + loss = criterion(label) + test_loss += loss.item() + pred = (out <= 0.5).float() + total += label.size(0) + correct += (pred == lab).sum().item() + + + + + +def test(model: nn.Module, test_loader: DataLoader, criterion, device): + model.eval() # this mode will disable the backward funcitonality for all tensors, and only perform the fwd pass + test_loss = 0 + correct = 0 + total = 0 with torch.no_grad(): # do not update gradients for img, lab in test_loader: img, lab = img.to(device), lab.to(device).float().view(-1, 1) # similar to how we did it in train, offset both to a gpu for better perf @@ -32,5 +52,6 @@ if __name__ == "__main__": model = DogCatClassifier() criterion = nn.BCELoss() model.load_state_dict(torch.load('dog_cat_classifier.pth', map_location = device, weights_only = True)) # loads what we trained in train.py + print(model) test(model, dog_test_loader, criterion, device) diff --git a/train.py b/train.py index e710f50..cc6abdb 100644 --- a/train.py +++ b/train.py @@ -2,11 +2,37 @@ import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader +from torch.utils.data.dataset import Dataset from consts import TRAIN_DATA from tqdm import tqdm from model import DogCatClassifier from dogs_cats_ds import DogCatDataset + + +# def train_sgd(model: nn.Module, +# imgs: torch.Tensor, +# labels: torch.Tensor, +# batch_size: int, +# criterion: nn.Module, +# optimizer: optim.Optimizer): +# losses: list = [] +# shuffle: torch.Tensor = torch.randperm(imgs.size(0)) +# images_shuffled: torch.Tensor = imgs[shuffle] +# labels_shuffled: torch.Tensor = labels[shuffle] +# for i in range(0, imgs.size(0), batch_size): +# batched_images: torch.Tensor = images_shuffled[i:i+batch_size] +# batched_labels: torch.Tensor = labels_shuffled[i:i+batch_size] +# outputs = model(batched_images) +# loss = criterion(outputs, batched_labels) +# loss.backward() +# optimizer.step() +# +# losses.append(loss.item()) +# return losses + + + def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs): model.to(device) # send to gpu if there is one, otherwise toss it over to cpu model.train() #train mode means that all gradients are active and modifiable @@ -19,18 +45,27 @@ def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, devi img, lab = img.to(device), lab.to(device).float().view(-1, 1) # send the image and label to the gpu if there is one else send to cpu, .view(-1, 1) returns the same tensor data but with the shape of the last dimension optimizer.zero_grad() # resets gradients to zero when we initialize. - out = model(img) # outputs are the results of our model on the image (sigmoid) - loss = criterion(out, lab) # loss is difference between expected and real label from prediction - loss.backward() # backprop using autograd + # get a prediction here + # + # calculate the loss here + + # perform backpropogation here + + + # optimizer.step() # update optimizer running_loss += loss.item() # loss in epoch updated with loss - pred = (out > 0.5).float() # prediction is 0 if less than 0.5 else 1 - total += lab.size(0) #total samples is increased by the 0th dim of the tensor(batch size) - correct += (pred == lab).sum().item() # only add 1 to the correct count if the actual label (dog) = the predicted label(dog) + #calculate accuracy - if (i + 1) % 50: - print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%') + + #hint: what kind of values is the accuracy outputing? What kind of values do we want? + + total += lab.size(0) #total samples is increased by the 0th dim of the tensor(batch size) + # only add 1 to the correct count if the actual label (dog) = the predicted label(dog) + + if (i + 1) % 50 == 0: + print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/(i+1):.4f}, acc lookin like {100 * correct / total :.2f}%') running_loss = 0.0 total = 0 correct = 0 @@ -39,14 +74,16 @@ def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, devi if __name__ == "__main__": device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f'Using device: {device}') + dog_train_dataset = DogCatDataset(TRAIN_DATA) dog_train_loader = DataLoader(dog_train_dataset, batch_size = 32, shuffle = True) # since its train, ok to shuffle - model = DogCatClassifier() - criterion = nn.BCELoss() - optimizer = optim.Adam(model.parameters(), lr = 0.001) - print(model) + + model = DogCatClassifier() # black box for now + criterion = nn.BCELoss() # cross entropy loss, feel free to experiment with others + optimizer = optim.Adam(model.parameters(), lr = 0.001) # feel free to mess around with other optimizers as well train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10) - torch.save(model.state_dict(), 'dog_cat_classifier.pth') # saves model to pth file + torch.save(model.state_dict(), 'dog_cat_classifier.pth') # saves model to pth file, which can be read by pytorch print('done w train, model saved') +