need to do test then we done

This commit is contained in:
Artem 2025-04-01 16:41:45 -04:00
parent 7d9454b288
commit b723f0f6b4
4 changed files with 93 additions and 49 deletions

View file

@ -1,6 +1,6 @@
from torch.utils.data import Dataset from torch.utils.data import Dataset
class DogCatDataset(Dataset): class DogCatDataset(Dataset):
def __init__(self, ds, dog=[5], cat = [3]): def __init__(self, ds:Dataset, dog=[5], cat = [3]):
self.ds = ds self.ds = ds
self.idx = [] self.idx = []
for i in range(len(ds)): for i in range(len(ds)):

View file

@ -5,50 +5,36 @@ class DogCatClassifier(nn.Module):
super().__init__() super().__init__()
# 3 color (RGB) image, so tensor is of shape (B x 3 x H X W) # 3 color (RGB) image, so tensor is of shape (B x 3 x H X W)
self.conv1 = nn.Sequential( # if we take a look, we can see that the images are of size 32 * 32 if we look at them in a file explorer, so our H and W are 32 in this case
nn.Conv2d(3, 32, 3, padding = 1), # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32 self.conv1 = nn.Conv2d(3, 32, 3, padding = 1) # passes conv kernel over batch and increases num channels from 3 (for RBG) to 32
nn.ReLU(inplace = True), # relu to add nonlinearity self.relu = nn.ReLU(inplace = True) # relu to add nonlinearity
nn.MaxPool2d(2), # reduces h and w of img by a factor of 2 self.mp = nn.MaxPool2d(2) # reduces h and w of img by a factor of 2
nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167 self.bn1 = nn.BatchNorm2d(32) #normalizes over z distribution https://arxiv.org/abs/1502.03167
)
# tensor size is now (B x 32 x h/2 x w/2) # tensor size is now (B x 32 x 32/2 = 16 x 32/2 = 16)
self.conv2 = nn.Sequential( self.conv2 = nn.Conv2d(32, 64, 3, padding = 1) # 32 channels to 64 ch with 3x3 kernel
nn.Conv2d(32, 64, 3, padding = 1), # 32 channels to 64 ch with 3x3 kernel self.bn2 = nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167
nn.ReLU(inplace = True),
nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
nn.BatchNorm2d(64) #normalizes over z distribution https://arxiv.org/abs/1502.03167
)
# tensor size is now (B x 64 x h/4 x w/4) # tensor size is now (B x 64 x 32/4 = 8 x 32/4 = 8)
self.conv3 = nn.Sequential( self.conv3 = nn.Conv2d(64, 128, 3, padding = 1) # 64 channels to 128 ch with 3x3 kernelnn
nn.Conv2d(64, 128, 3, padding = 1), # 64 channels to 128 ch with 3x3 kernel self.bn3= nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167
nn.ReLU(inplace = True),
nn.MaxPool2d(2), # reduces # reduces h and w of img by a factor of 2
nn.BatchNorm2d(128) # normalizes over z distribution https://arxiv.org/abs/1502.03167
)
# tensor size is now (B x 128 x h/8 x w/8) # tensor size is now (B x 128 x 32/8 = 4 x 32/8 = 4)
# basically, we have B batches, 128 channels, and a 4x4 pixel representation of our initial image
self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048 feats (ch x h x w), lowkey had to calculator it lol
self.dropout = 0.5 # tunable, removes half of the values and replaces them with 0s self.dropout = 0.5 # tunable, removes half of the values and replaces them with 0s
self.fc2 = nn.Linear(512, 1) # 512 ch to 1 ch output self.fc2 = nn.Linear(512, 1) # 512 feats to 1 scalar output
def forward(self, x): def forward(self, x):
x = self.conv1(x) x = self.bn1(self.mp(self.relu(self.conv1(x))))
x = self.conv2(x) x = self.bn2(self.mp(self.relu(self.conv2(x))))
x = self.conv3(x) x = self.bn3(self.mp(self.relu(self.conv3(x))))
x = x.view(x.size(0), -1) x = x.view(x.size(0), -1) # see model_ez for why we do this before linear layers
# reformats for use in linear layer # reformats for use in linear layer
x = self.fc1(x) x = self.fc1(x)
x = nn.functional.relu(x) # relu to add nonlinearity x = nn.functional.relu(x) # relu to add nonlinearity
x = self.fc2(x) x = self.fc2(x)
x = nn.functional.sigmoid(x) # 1 / 1 + e ^(-x) x = nn.functional.sigmoid(x) # 1 / 1 + e ^(-x)
return x return x

23
test.py
View file

@ -5,11 +5,31 @@ from dogs_cats_ds import DogCatDataset
from model import DogCatClassifier from model import DogCatClassifier
from consts import TEST_DATA from consts import TEST_DATA
def test(model: nn.Module, test_loader: DataLoader, criterion, device):
def test(model: nn.Module, test_images: torch.Tensor, test_labels: torch.Tensor, criterion):
model.eval() model.eval()
test_loss = 0 test_loss = 0
correct = 0 correct = 0
total = 0 total = 0
with torch.no_grad():
for img in test_images:
for label in test_labels:
out = model(img)
loss = criterion(label)
test_loss += loss.item()
pred = (out <= 0.5).float()
total += label.size(0)
correct += (pred == lab).sum().item()
def test(model: nn.Module, test_loader: DataLoader, criterion, device):
model.eval() # this mode will disable the backward funcitonality for all tensors, and only perform the fwd pass
test_loss = 0
correct = 0
total = 0
with torch.no_grad(): # do not update gradients with torch.no_grad(): # do not update gradients
for img, lab in test_loader: for img, lab in test_loader:
img, lab = img.to(device), lab.to(device).float().view(-1, 1) # similar to how we did it in train, offset both to a gpu for better perf img, lab = img.to(device), lab.to(device).float().view(-1, 1) # similar to how we did it in train, offset both to a gpu for better perf
@ -32,5 +52,6 @@ if __name__ == "__main__":
model = DogCatClassifier() model = DogCatClassifier()
criterion = nn.BCELoss() criterion = nn.BCELoss()
model.load_state_dict(torch.load('dog_cat_classifier.pth', map_location = device, weights_only = True)) # loads what we trained in train.py model.load_state_dict(torch.load('dog_cat_classifier.pth', map_location = device, weights_only = True)) # loads what we trained in train.py
print(model)
test(model, dog_test_loader, criterion, device) test(model, dog_test_loader, criterion, device)

View file

@ -2,11 +2,37 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
from consts import TRAIN_DATA from consts import TRAIN_DATA
from tqdm import tqdm from tqdm import tqdm
from model import DogCatClassifier from model import DogCatClassifier
from dogs_cats_ds import DogCatDataset from dogs_cats_ds import DogCatDataset
# def train_sgd(model: nn.Module,
# imgs: torch.Tensor,
# labels: torch.Tensor,
# batch_size: int,
# criterion: nn.Module,
# optimizer: optim.Optimizer):
# losses: list = []
# shuffle: torch.Tensor = torch.randperm(imgs.size(0))
# images_shuffled: torch.Tensor = imgs[shuffle]
# labels_shuffled: torch.Tensor = labels[shuffle]
# for i in range(0, imgs.size(0), batch_size):
# batched_images: torch.Tensor = images_shuffled[i:i+batch_size]
# batched_labels: torch.Tensor = labels_shuffled[i:i+batch_size]
# outputs = model(batched_images)
# loss = criterion(outputs, batched_labels)
# loss.backward()
# optimizer.step()
#
# losses.append(loss.item())
# return losses
def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs): def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs):
model.to(device) # send to gpu if there is one, otherwise toss it over to cpu model.to(device) # send to gpu if there is one, otherwise toss it over to cpu
model.train() #train mode means that all gradients are active and modifiable model.train() #train mode means that all gradients are active and modifiable
@ -19,18 +45,27 @@ def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, devi
img, lab = img.to(device), lab.to(device).float().view(-1, 1) # send the image and label to the gpu if there is one else send to cpu, .view(-1, 1) returns the same tensor data but with the shape of the last dimension img, lab = img.to(device), lab.to(device).float().view(-1, 1) # send the image and label to the gpu if there is one else send to cpu, .view(-1, 1) returns the same tensor data but with the shape of the last dimension
optimizer.zero_grad() # resets gradients to zero when we initialize. optimizer.zero_grad() # resets gradients to zero when we initialize.
out = model(img) # outputs are the results of our model on the image (sigmoid) # get a prediction here
loss = criterion(out, lab) # loss is difference between expected and real label from prediction #
loss.backward() # backprop using autograd # calculate the loss here
# perform backpropogation here
#
optimizer.step() # update optimizer optimizer.step() # update optimizer
running_loss += loss.item() # loss in epoch updated with loss running_loss += loss.item() # loss in epoch updated with loss
pred = (out > 0.5).float() # prediction is 0 if less than 0.5 else 1 #calculate accuracy
total += lab.size(0) #total samples is increased by the 0th dim of the tensor(batch size)
correct += (pred == lab).sum().item() # only add 1 to the correct count if the actual label (dog) = the predicted label(dog)
if (i + 1) % 50:
print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%') #hint: what kind of values is the accuracy outputing? What kind of values do we want?
total += lab.size(0) #total samples is increased by the 0th dim of the tensor(batch size)
# only add 1 to the correct count if the actual label (dog) = the predicted label(dog)
if (i + 1) % 50 == 0:
print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/(i+1):.4f}, acc lookin like {100 * correct / total :.2f}%')
running_loss = 0.0 running_loss = 0.0
total = 0 total = 0
correct = 0 correct = 0
@ -39,14 +74,16 @@ def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, devi
if __name__ == "__main__": if __name__ == "__main__":
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}') print(f'Using device: {device}')
dog_train_dataset = DogCatDataset(TRAIN_DATA) dog_train_dataset = DogCatDataset(TRAIN_DATA)
dog_train_loader = DataLoader(dog_train_dataset, batch_size = 32, shuffle = True) # since its train, ok to shuffle dog_train_loader = DataLoader(dog_train_dataset, batch_size = 32, shuffle = True) # since its train, ok to shuffle
model = DogCatClassifier()
criterion = nn.BCELoss() model = DogCatClassifier() # black box for now
optimizer = optim.Adam(model.parameters(), lr = 0.001) criterion = nn.BCELoss() # cross entropy loss, feel free to experiment with others
print(model) optimizer = optim.Adam(model.parameters(), lr = 0.001) # feel free to mess around with other optimizers as well
train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10) train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10)
torch.save(model.state_dict(), 'dog_cat_classifier.pth') # saves model to pth file torch.save(model.state_dict(), 'dog_cat_classifier.pth') # saves model to pth file, which can be read by pytorch
print('done w train, model saved') print('done w train, model saved')