beginning of clusterfreak

This commit is contained in:
Artem 2025-03-20 12:45:57 -04:00
parent a74fb9f90a
commit 0e6bd00886
8 changed files with 173 additions and 0 deletions

2
.gitignore vendored
View file

@ -176,3 +176,5 @@ ipython_config.py
# Remove previous ipynb_checkpoints # Remove previous ipynb_checkpoints
# git rm -r .ipynb_checkpoints/ # git rm -r .ipynb_checkpoints/
Data/
*.pth

19
consts.py Normal file
View file

@ -0,0 +1,19 @@
from pathlib import Path
import torchvision
import torchvision.transforms as transforms
CIFAR_DIR = Path('Data/CIFAR10')
CIFAR_DIR.mkdir(exist_ok = True)
normalize = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
TRAIN_DATA = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = True, transform = normalize, download = True)
TEST_DATA = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = False, transform = normalize, download = True)
CLASSES = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

24
dataset.py Normal file
View file

@ -0,0 +1,24 @@
import torchvision
from consts import CIFAR_DIR
#optional transformations:
# https://pytorch.org/vision/0.11/transforms.html
#training data using torchvision cifar.
cifar_data_train = torchvision.datasets.CIFAR10(root = CIFAR_DIR, train = True, transform = None, download = True)
#example of cifar data sample. It is an image, class example.
# here, the image is the image (PIL, or pillow) and the corresponding label, frog. I've chopped the dataset to only include cats
# and dogs, so we can apply a different form of classification so it's easier to perform
example_data = cifar_data_train[0]
print(f'items in an instance of cifar10: {len(example_data)}')
example_data[0].show()
print(f'class corresponding to image: {example_data[1]}')

26
dogs_cats_ds.py Normal file
View file

@ -0,0 +1,26 @@
from torch.utils.data import Dataset
class DogCatDataset(Dataset):
def __init__(self, ds, dog=[5], cat = [3]):
self.ds = ds
self.idx = []
for i in range(len(ds)):
img, lab = ds[i]
if lab in dog or lab in cat:
self.idx.append(i)
def __len__(self):
return len(self.idx)
def __getitem__(self, idx):
orig_idx = self.idx[idx]
img, lab = self.ds[orig_idx]
if lab == 5:
bin_lab = 1
elif lab == 3:
bin_lab = 0
else:
print('we got a non dog or cat label')
return img, bin_lab

46
model.py Normal file
View file

@ -0,0 +1,46 @@
import torch.nn as nn
class DogCatClassifier(nn.Module):
def __init__ (self):
super().__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 32, 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(2),
nn.BatchNorm2d(32)
)
self.conv2 = nn.Sequential(
nn.Conv2d(32, 64, 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(2),
nn.BatchNorm2d(64)
)
self.conv3 = nn.Sequential(
nn.Conv2d(64, 128, 3, padding = 1),
nn.ReLU(inplace = True),
nn.MaxPool2d(2),
nn.BatchNorm2d(128)
)
self.fc1 = nn.Linear(128 * 4 * 4 , 512)# 2048, lowkey had to calculator it lol
self.dropout = 0.5 # tunable
self.fc2 = nn.Linear(512, 1)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = nn.functional.relu(x)
x = self.fc2(x)
x = nn.functional.sigmoid(x)
return x

4
requirements.txt Normal file
View file

@ -0,0 +1,4 @@
numpy
torchvision
torch
tqdm

0
test.py Normal file
View file

52
train.py Normal file
View file

@ -0,0 +1,52 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from consts import TRAIN_DATA
from tqdm import tqdm
from model import DogCatClassifier
from dogs_cats_ds import DogCatDataset
def train(model: nn.Module, train_loader: DataLoader, criterion, optimizer, device, epochs):
model.to(device)
model.train()
for epoch in tqdm(range(epochs)):
running_loss = 0.0
correct = 0
total = 0
for i, (img, lab) in enumerate(train_loader):
img, lab = img.to(device), lab.to(device).float().view(-1, 1)
optimizer.zero_grad()
out = model(img)
loss = criterion(out, lab)
loss.backward()
optimizer.step()
running_loss += loss.item()
pred = (out > 0.5).float()
total += lab.size(0)
correct += (pred == lab).sum().item()
if (i + 1) % 50:
print(f'yo its epoch {epoch + 1} out of {epochs} and we on minibatch {i + 1} / {len(train_loader)}. Loss lookin like: {running_loss/100:.4f}, acc lookin like {100 * correct / total :.2f}%')
running_loss = 0.0
total = 0
correct = 0
if __name__ == "__main__":
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
dog_train_dataset = DogCatDataset(TRAIN_DATA)
dog_train_loader = DataLoader(dog_train_dataset, batch_size = 32, shuffle = True) # since its train, ok to shuffle
model = DogCatClassifier()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
print(model)
train(model = model, train_loader = dog_train_loader, criterion = criterion, optimizer = optimizer, device = device, epochs = 10)
torch.save(model.state_dict(), 'dog_cat_classifier.pth')
print('done w train, model saved')