beginning of clusterfreak

2025-03-20 12:45:57 -04:00 · 2025-03-20 12:45:57 -04:00 · 0e6bd00886
commit 0e6bd00886
parent a74fb9f90a
8 changed files with 173 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -176,3 +176,5 @@ ipython_config.py
 # Remove previous ipynb_checkpoints
 #   git rm -r .ipynb_checkpoints/
 Data/
 *.pth
--- a/consts.py
+++ b/consts.py
@ -0,0 +1,19 @@
 from pathlib import Path
 import torchvision
 import torchvision.transforms as transforms
 CIFAR_DIR = Path('Data/CIFAR10')
 CIFAR_DIR.mkdir(exist_ok = True)
 normalize = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 ])
 TRAIN_DATA = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = True, transform = normalize, download = True)
 TEST_DATA = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = False, transform = normalize, download = True)
 CLASSES = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
--- a/dataset.py
+++ b/dataset.py
@ -0,0 +1,24 @@
 import torchvision
 from consts import CIFAR_DIR
 #optional transformations:
 # https://pytorch.org/vision/0.11/transforms.html
 #training data using torchvision cifar.
 cifar_data_train = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = True, transform = None, download = True)
 #example of cifar data sample. It is an image, class example.
 # here, the image is the image (PIL, or pillow) and the corresponding label, frog. I've chopped the dataset to only include cats
 # and dogs, so we can apply a different form of classification so it's easier to perform
 example_data = cifar_data_train[0]
 print(f'items in an instance of cifar10: {len(example_data)}')
 example_data[0].show()
 print(f'class corresponding to image: {example_data[1]}')
--- a/dogs_cats_ds.py
+++ b/dogs_cats_ds.py
@ -0,0 +1,26 @@
 from torch.utils.data import Dataset
 class DogCatDataset(Dataset):
    def __init__(self, ds, dog=[5], cat = [3]):
        self.ds = ds
        self.idx = []
        for i in range(len(ds)):
            img, lab = ds[i]
            if lab in dog or lab in cat:
                self.idx.append(i)
    def __len__(self):
        return len(self.idx)
    def __getitem__(self, idx):
        orig_idx = self.idx[idx]
        img, lab = self.ds[orig_idx]
        if lab == 5:
            bin_lab = 1
        elif lab == 3:
            bin_lab = 0
        else:
            print('we got a non dog or cat label')
        return img, bin_lab
--- a/model.py
+++ b/model.py
@ -0,0 +1,46 @@
 import torch.nn as nn
 class DogCatClassifier(nn.Module):
    def __init__ (self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(32)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(64)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding = 1),
            nn.ReLU(inplace = True),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(128)
        )
        self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol
        self.dropout = 0.5 # tunable
        self.fc2 = nn.Linear(512, 1)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.fc2(x)
        x = nn.functional.sigmoid(x)
        return x
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,4 @@
 numpy
 torchvision
 torch
 tqdm
--- a/test.py
+++ b/test.py
--- a/train.py
+++ b/train.py
@ -0,0 +1,52 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader, Dataset
 from consts import TRAIN_DATA
 from tqdm import tqdm
 from model import DogCatClassifier
 from dogs_cats_ds import DogCatDataset
 def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs):
    model.to(device)
    model.train()
    for epoch in tqdm(range(epochs)):
        running_loss = 0.0
        correct = 0
        total = 0
        for i, (img, lab) in enumerate(train_loader):
            img, lab = img.to(device), lab.to(device).float().view(-1, 1)
            optimizer.zero_grad()
            out = model(img)
            loss = criterion(out, lab)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            pred = (out > 0.5).float()
            total += lab.size(0)
            correct += (pred == lab).sum().item()
            if (i + 1) % 50:
                print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%')
                running_loss = 0.0
                total = 0
                correct = 0
 if __name__ == "__main__":
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'Using device: {device}')
    dog_train_dataset = DogCatDataset(TRAIN_DATA)
    dog_train_loader = DataLoader(dog_train_dataset, batch_size = 32, shuffle = True) # since its train, ok to shuffle
    model = DogCatClassifier()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr = 0.001)
    print(model)
    train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10)
    torch.save(model.state_dict(), 'dog_cat_classifier.pth')
    print('done w train, model saved')