import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

REBUILD_DATA = False # set to true to one once, then back to false unless you want to change something in your training data.

class DogsVSCats():
    IMG_SIZE = 50
    CATS = "PetImages/Cat"
    DOGS = "PetImages/Dog"
    TESTING = "PetImages/Testing"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []

    catcount = 0
    dogcount = 0

    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for f in tqdm(os.listdir(label)):
                if "jpg" in f:
                    try:
                        path = os.path.join(label, f)
                        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                        img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                        self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])  # do something like print(np.eye(2)[1]), just makes one_hot 
                        #print(np.eye(2)[self.LABELS[label]])

                        if label == self.CATS:
                            self.catcount += 1
                        elif label == self.DOGS:
                            self.dogcount += 1

                    except Exception as e:
                        pass
                        #print(label, f, str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Cats:',dogsvcats.catcount)
        print('Dogs:',dogsvcats.dogcount)


class Net(nn.Module):
    def __init__(self):
        super().__init__() # just run the init of parent class (nn.Module)
        self.conv1 = nn.Conv2d(1, 32, 5) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(32, 64, 5) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 5x5 kernel / window
        self.conv3 = nn.Conv2d(64, 128, 5)

        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 2) # 512 in, 2 out bc we're doing 2 classes (dog vs cat).

    def convs(self, x):
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = self.fc2(x) # bc this is our output layer. No activation here.
        return F.softmax(x, dim=1)


net = Net()
print(net)

if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()

training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])

VAL_PCT = 0.1  # lets reserve 10% of our data for validation
val_size = int(len(X)*VAL_PCT)

train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

BATCH_SIZE = 100
EPOCHS = 1


def train(net):
    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
            #print(f"{i}:{i+BATCH_SIZE}")
            batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
            batch_y = train_y[i:i+BATCH_SIZE]

            net.zero_grad()

            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()    # Does the update

        print(f"Epoch: {epoch}. Loss: {loss}")


def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i])
            net_out = net(test_X[i].view(-1, 1, 50, 50))[0]  # returns a list, 
            predicted_class = torch.argmax(net_out)

            if predicted_class == real_class:
                correct += 1
            total += 1

    print("Accuracy: ", round(correct/total, 3))

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)
24946

I went ahead and made a quick function to handle the training, mostly since I didn't want to run the training bit again just yet. What I want to talk about now instead is how we go about running things on the GPU.

To start, you will need the GPU version of Pytorch. In order to use Pytorch on the GPU, you need a higher end NVIDIA GPU that is CUDA enabled.

If you do not have one, there are cloud providers. Linode is both a sponsor of this series as well as they simply have the best prices at the moment on cloud GPUs, by far.

Here's a Tutorial for setting up cloud GPUs. You could use the same commands from that tutorial if you're running Ubuntu 16.04 locally.

If you're on Windows, or some other OS, the requirements to getting CUDA setup are the same.

You need to install the CUDA toolkit.

After that, you need to download and extract CuDNN, moving the CuDNN contents into your Cuda Toolkit directory. When you've extracted the CuDNN download, you will have 3 directories inside of a directory called cuda. You just need to move the bin, include, and lib directories and merge them into your Cuda Toolkit directory. For example, for me, my CUDA toolkit directory is: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0, so this is where I would merge those CuDNN directories too.

Once you've done that, make sure you have the GPU version of Pytorch too, of course. When you go to the get started page, you can find the topin for choosing a CUDA version.

I believe you can also use Anaconda to install both the GPU version of Pytorch as well as the required CUDA packages. I personally don't enjoy using the Conda environment, but this is also an option.

Finally, if you're having trouble, come join us in the Sentdex discord. It's really quite simple (download/install CUDA Toolkit and drag and drop the CuDNN files) ... but this can still be daunting to someone unfamiliar with this, as well as certain issues that can still arise. We'd be happy to help you out in our community discord!

Once you think you've got everything setup, make sure CUDA is available:

torch.cuda.is_available()

True

Now we're ready to decide what we want to do on the GPU. We know at the very least we want our model and its calculations to be done on the GPU.

If your model is on the GPU, this means, in order to pass data through it, we also want our data on the GPU.

Thus, we want not only the model, but also the training data (if it can be fit), all on the GPU.

To start, we can put our network on our GPU. To do this, we can just set a flag like:

device = torch.device("cuda:0")
device

device(type='cuda', index=0)

Often, however, we want to write code that allows for a variety of people to use our code, including those who may not have a GPU available. To handle for this, we can use the above torch.cuda.is_available() and do:

if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU

Most basic neural networks wont benefit much from multiple GPUs, but, as you progress, you may find that you'd like to use multiple GPUs for your task. Again, to write code that can logically use what's available, you can get how many GPUs are available by doing:

torch.cuda.device_count()

1

From here, we could extrapolate out index numbers and assign specific layers to specific GPUs.

For now, we're writing code that really just needs either one GPU or CPU, so we'll just use a single device. Now that we have figured out the best device to use, we can start setting things to that device. For example, setting our neural network to that device is as easy as:

net.to(device)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)

We alread had our net defined above, but, usually you'd just immediately define and send it to the device, like:

net = Net().to(device)

Now we can go to train, but this time, let's put our batches on the GPU. In this example, we could actually have put all of our data on the GPU, since it's not a huge dataset. This would save with some IO time moving things from RAM to VRAM, But this wont be a very common, so I'd rather just show the way you're going to normally have to most likely do it.

I am going to copy the above train function and then make really just one quick modification, which is, after we've defined our batches, we can move them to the GPU by doing: batch_X, batch_y = batch_X.to(device), batch_y.to(device). So now our train function is:

EPOCHS = 3

def train(net):
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    BATCH_SIZE = 100
    EPOCHS = 3
    for epoch in range(EPOCHS):
        for i in range(0, len(train_X), BATCH_SIZE): # from 0, to the len of x, stepping BATCH_SIZE at a time. [:50] ..for now just to dev
            #print(f"{i}:{i+BATCH_SIZE}")
            batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
            batch_y = train_y[i:i+BATCH_SIZE]

            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            net.zero_grad()

            optimizer.zero_grad()   # zero the gradient buffers
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()    # Does the update

        print(f"Epoch: {epoch}. Loss: {loss}")

train(net)

Epoch: 0. Loss: 0.23834262788295746
Epoch: 1. Loss: 0.20373524725437164
Epoch: 2. Loss: 0.1704103648662567

As you can see by running times, this is much faster. Now we can also test on either the GPU or CPU. Since we're testing on quite a few samples, we can also do this on the GPU:

test_X.to(device)
test_y.to(device)

def test(net):
    correct = 0
    total = 0
    with torch.no_grad():
        for i in tqdm(range(len(test_X))):
            real_class = torch.argmax(test_y[i]).to(device)
            net_out = net(test_X[i].view(-1, 1, 50, 50).to(device))[0]  # returns a list, 
            predicted_class = torch.argmax(net_out)

            if predicted_class == real_class:
                correct += 1
            total += 1

    print("Accuracy: ", round(correct/total, 3))

test(net)

100%|a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^| 2494/2494 [00:08<00:00, 299.45it/s]

Accuracy:  0.706

Test in batches faster:

correct = 0
total = 0
for i in tqdm(range(0, len(test_X), BATCH_SIZE)):

    batch_X = test_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50).to(device)
    batch_y = test_y[i:i+BATCH_SIZE].to(device)
    batch_out = net(batch_X)

    out_maxes = [torch.argmax(i) for i in batch_out]
    target_maxes = [torch.argmax(i) for i in batch_y]
    for i,j in zip(out_maxes, target_maxes):
        if i == j:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

100%|a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^a-^| 25/25 [00:01<00:00, 19.49it/s]

Accuracy:  0.706

Okay, so we've made quite a bit of progress from the beginning.

We've learned to make neural networks and how to train them, and we've seen good results already.

That said, can we do better than accuracy in the 70%s? Should we just keep doing more and more epochs? When do we stop?

What if we have multiple models? Is the only way to compare them to train them to completion? ...but we don't know when that is!

In the next tutorial, we're going to cover some basic analysis, visualization, and concepts to consider when analyzing a model's effectiveness.

Running on the GPU - Deep Learning and Neural Networks with Python and Pytorch p.7