from IPython.display import Image
from IPython.core.display import HTML

Image(url= "https://miro.medium.com/max/3200/1*QQVbuP2SEasB0XAmvjW0AA.jpeg", width=400, height=200)

%matplotlib inline
%config InlineBackend.figure_format = 'retina'  #To get figures with high quality!

import numpy as np
import torch
from torch import nn
from torch import optim
import matplotlib.pyplot as plt

import torch
from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz

  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz

  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz

  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz

  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /root/.pytorch/F_MNIST_data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/F_MNIST_data/FashionMNIST/raw

dataiter = iter(trainloader)   #To iterate through the dataset

images, labels = dataiter.next()
print(type(images))
print(images.shape)
print(labels.shape)

<class 'torch.Tensor'>
torch.Size([64, 1, 28, 28])
torch.Size([64])

plt.imshow(images[1].numpy().reshape([28,28]), cmap='Greys_r')

<matplotlib.image.AxesImage at 0x7fc8d7adbd90>

class MLP(nn.Module):
    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels):
        super().__init__()
        self.L1 = nn.Linear(dimx,hidden1)
        self.L2 = nn.Linear(hidden1,hidden2)
        self.L3 = nn.Linear(hidden2,hidden3)
        self.L4 = nn.Linear(hidden3,nlabels)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      x = self.L1(x)
      x = self.relu(x)
      x = self.L2(x)
      x = self.relu(x)
      x = self.L3(x)
      x = self.relu(x)
      x = self.L4(x)
      x = self.softmax(x)
      return x

class MLP_extended(MLP):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels,epochs=30,lr=0.001):

        super().__init__(dimx,hidden1,hidden2,hidden3,nlabels)  # initialize the `MLP`!

        self.lr = lr

        self.optim = optim.Adam(self.parameters(), self.lr)

        self.epochs = epochs

        self.criterion = nn.NLLLoss()

        # List to store the loss evolution along training
        self.loss_during_training = []

    # extend the class to incorporate a training method
    def trainloop(self,trainloader):
            for e in range(int(self.epochs)):
                running_loss = 0.
                for images, labels in trainloader:
                    self.optim.zero_grad()  # RESET GRADIENTS!

                    # forward pass
                    out = self.forward(images.view(images.shape[0], -1)) # -1 reshape image in a vector, 0th column refers to batch size
                    loss = self.criterion(out,labels) # compare output with the labels
                    running_loss += loss.item() # update the loss

                    # backwardpass
                    loss.backward() # do backpropagation
                    self.optim.step() # update parameters

                self.loss_during_training.append(running_loss/len(trainloader))

                if(e % 1 == 0): # Every 10 epochs
                    print("Training loss after %d epochs: %f"
                          %(e,self.loss_during_training[-1]))

    def eval_performance(self,loader):
        loss = 0
        accuracy = 0

        # Turn off gradients
        with torch.no_grad():
            for images,labels in loader:
                logprobs = my_newest_MLP.forward(images.view(images.shape[0], -1)) # softmax for logprobabilities

                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(images.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))

        return accuracy/len(loader)

my_newest_MLP = MLP_extended(dimx=784,hidden1=256, hidden2=128, hidden3=64, nlabels=10,epochs=10,lr=1e-3)
my_newest_MLP.trainloop(trainloader)

Training loss after 0 epochs: -0.536915
Training loss after 1 epochs: -0.563040
Training loss after 2 epochs: -0.583778
Training loss after 3 epochs: -0.617179
Training loss after 4 epochs: -0.647763
Training loss after 5 epochs: -0.649312
Training loss after 6 epochs: -0.653856
Training loss after 7 epochs: -0.653800
Training loss after 8 epochs: -0.660971
Training loss after 9 epochs: -0.657742

# evaluate train and test performance
train_accuracy = my_newest_MLP.eval_performance(trainloader)
test_accuracy = my_newest_MLP.eval_performance(testloader)
print("Training performance accuracy for this model is", float(train_accuracy))
print("Testing performance accuracy for this model is", float(test_accuracy))

Training performance accuracy for this model is 0.6674273610115051
Testing performance accuracy for this model is 0.6532643437385559

import copy

validloader = copy.deepcopy(trainloader)  # copy the object

#We take the first 45k images for training
trainloader.dataset.data = trainloader.dataset.data[:45000,:,:]
trainloader.dataset.targets = trainloader.dataset.targets[:45000]

#And the rest for validation
validloader.dataset.data = validloader.dataset.data[45000:,:,:]
validloader.dataset.targets = validloader.dataset.targets[45000:]

class MLP_extended(MLP):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels,epochs=30,lr=0.001):

        super().__init__(dimx,hidden1,hidden2,hidden3,nlabels)  # initialize the `MLP`!

        self.lr = lr

        self.optim = optim.Adam(self.parameters(), self.lr)

        self.epochs = epochs

        self.criterion = nn.NLLLoss()

        # List to store the loss evolution along training and validation
        self.loss_during_training = []
        self.valid_loss_during_training = []

    def trainloop(self,trainloader,validloader):
                for e in range(int(self.epochs)):
                    running_loss = 0.
                    for images, labels in trainloader:
                        self.optim.zero_grad() # reset gradients

                        # forward pass
                        out = self.forward(images.view(images.shape[0], -1)) # -1 reshapes the image in a vector, 0th column refers to batch size
                        loss = self.criterion(out,labels) # compare output with the labels
                        running_loss += loss.item() # update the loss

                        # backwardpass
                        loss.backward() # do backpropagation
                        self.optim.step() # update parameters

                    self.loss_during_training.append(running_loss/len(trainloader))
                    with torch.no_grad():
                        valid_running_loss = 0.
                        for images, labels in validloader:

                            # forward pass
                            out = self.forward(images.view(images.shape[0], -1)) # -1 reshapes the image in a vector, 0th column refers to batch size
                            loss = self.criterion(out,labels) # compare output with the labels
                            valid_running_loss += loss.item() # update the loss
                        self.valid_loss_during_training.append(valid_running_loss/len(validloader))

                        if e in range(int(self.epochs)): #if(e % 1 == 0): # Every 10 epochs
                            print("Training loss after %d epochs: %f"
                                %(e,self.loss_during_training[-1]))

    def eval_performance(self,loader):
        loss = 0
        accuracy = 0

        # Turn off gradients for evaluation
        with torch.no_grad():
            for images,labels in loader:
                logprobs = my_newest_MLP.forward(images.view(images.shape[0], -1)) # reshape again before forward pass

                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(images.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))

        return accuracy/len(loader)

# now, repreat training and plot train/validation loss along epochs
my_newest_MLP = MLP_extended(dimx=28*28,hidden1=256, hidden2=128, hidden3=64, nlabels=10,epochs=30,lr=1e-3)
train_accuracy = my_newest_MLP.trainloop(trainloader, validloader)

Training loss after 0 epochs: -0.655320
Training loss after 1 epochs: -0.723569
Training loss after 2 epochs: -0.727752
Training loss after 3 epochs: -0.729491
Training loss after 4 epochs: -0.729023
Training loss after 5 epochs: -0.729958
Training loss after 6 epochs: -0.734956
Training loss after 7 epochs: -0.733417
Training loss after 8 epochs: -0.753747
Training loss after 9 epochs: -0.782341
Training loss after 10 epochs: -0.792284
Training loss after 11 epochs: -0.793828
Training loss after 12 epochs: -0.796047
Training loss after 13 epochs: -0.792448
Training loss after 14 epochs: -0.799183
Training loss after 15 epochs: -0.795375
Training loss after 16 epochs: -0.799945
Training loss after 17 epochs: -0.792362
Training loss after 18 epochs: -0.797146
Training loss after 19 epochs: -0.817324
Training loss after 20 epochs: -0.826835
Training loss after 21 epochs: -0.832243
Training loss after 22 epochs: -0.834970
Training loss after 23 epochs: -0.832149
Training loss after 24 epochs: -0.839396
Training loss after 25 epochs: -0.830845
Training loss after 26 epochs: -0.835662
Training loss after 27 epochs: -0.842711
Training loss after 28 epochs: -0.833575
Training loss after 29 epochs: -0.829943

# training loss
plt.plot(my_newest_MLP.loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

# validation loss
plt.plot(my_newest_MLP.valid_loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

Text(0, 0.5, 'Loss')

# now we must compute train/test accuracy
train_accuracy = my_newest_MLP.eval_performance(trainloader)
valid_accuracy = my_newest_MLP.eval_performance(validloader)
test_accuracy = my_newest_MLP.eval_performance(testloader)
print("The training accuracy for the extended MLP class is ", float(train_accuracy))
print("The validation accuracy is ", float(valid_accuracy))
print("The testing accuracy is ", float(test_accuracy))

The training accuracy for the extended MLP class is  0.8030894994735718
The validation accuracy is  0.7977393865585327
The testing accuracy is  0.7890127301216125

# plot the loss along epochs
plt.plot(my_newest_MLP.valid_loss_during_training,'-b',label='Cross Entropy Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.grid()

# avoid over-fitting with early stopping
# see this by watching increasing validation loss
# use the version of the model with lowest validation loss

# repeat training for 8 epochs, as after this, the loss increases again
my_newest_MLP = MLP_extended(dimx=784,hidden1=256, hidden2=128, hidden3=64, nlabels=10, epochs=8, lr=1e-3)
my_newest_MLP.trainloop(trainloader,validloader)

Training loss after 0 epochs: -0.702211
Training loss after 1 epochs: -0.808014
Training loss after 2 epochs: -0.823421
Training loss after 3 epochs: -0.828093
Training loss after 4 epochs: -0.831531
Training loss after 5 epochs: -0.833801
Training loss after 6 epochs: -0.838100
Training loss after 7 epochs: -0.837192

# plot train/validation loss along epochs
plt.plot(my_newest_MLP.valid_loss_during_training,'-b',label='Cross Entropy Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.grid()

# evaluate train, validation, and test performance

train_accuracy = my_newest_MLP.eval_performance(trainloader)
valid_accuracy = my_newest_MLP.eval_performance(validloader)
test_accuracy = my_newest_MLP.eval_performance(testloader)
print("Training performance accuracy for this model is", float(train_accuracy))
print("Validation performance accuracy is", float(train_accuracy))
print("Testing performance accuracy is", float(test_accuracy))

Training performance accuracy for this model is 0.851318359375
Validation performance accuracy is 0.851318359375
Testing performance accuracy is 0.8324044346809387

class MLP_dropout(nn.Module):
    def __init__(self,dimx,hidden1,nlabels): #Nlabels will be 10 in our case
        super().__init__()
        
        self.output1 = nn.Linear(dimx,hidden1)  
        
        self.output2 = nn.Linear(hidden1,nlabels)
    
        self.relu = nn.ReLU()
        
        self.logsoftmax = nn.LogSoftmax(dim=1)  
        
        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.output1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.output2(x)
        x = self.logsoftmax(x)
        return x

class MLPdrop(nn.Module):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels):
        super().init()
        self.L1 = nn.Linear(dimx,hidden1)
        self.L2 = nn.Linear(hidden1,hidden2)
        self.L3 = nn.Linear(hidden2,hidden3)
        self.L4 = nn.Linear(hidden3,nlabels)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2) # DROPOUT PROBABILITY OF 0.2
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      # pass input tensor again through each operation above
      x = self.L1(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L2(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L3(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L4(x)
      x = self.softmax(x)
      return x

# modify the extended class to set model.eval() when appropiate

class MLPdrop_extended(MLP):
    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels,epochs=30,lr=0.001):

        super().__init__(dimx,hidden1,hidden2,hidden3,nlabels)  #To initialize `MLP`!

        self.lr = lr #Learning Rate

        self.optim = optim.Adam(self.parameters(), self.lr)

        self.epochs = epochs

        self.criterion = nn.NLLLoss()

        # List to store the loss evolution along training
        self.loss_during_training = []
        self.valid_loss_during_training = []


    def trainloop(self,trainloader,validloader):
            for e in range(int(self.epochs)):
                running_loss = 0.
                for images, labels in trainloader:
                    self.optim.zero_grad()

                    # forward pass
                    out = self.forward(images.view(images.shape[0], -1)) # -1 reshapes the image in a vector, 0th column refers to batch size
                    loss = self.criterion(out,labels) # compare output with the labels
                    running_loss += loss.item() # update the loss

                    # backwardpass
                    loss.backward() # do backpropagation
                    self.optim.step() #update parameters

                self.loss_during_training.append(running_loss/len(trainloader))
                self.valid_loss_during_training.append(running_loss/len(validloader))

                if(e % 1 == 0): # Every 10 epochs
                    print("Training loss after %d epochs: %f"
                          %(e,self.loss_during_training[-1]))

    def eval_performance(self,loader):
        loss = 0
        accuracy = 0

        # Turn off gradients for validation
        with torch.no_grad():

            # set model to eval mode (turn off dropout)
            self.eval()

            #validation pass
            for images,labels in loader:
                logprobs = self.forward(images.view(images.shape[0], -1)) # We use a log-softmax, so what we get are log-probabilities

                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(images.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))

        # set model back to train mode (turn dropout back on)
        self.train()

        return accuracy/len(loader)

# plot evolution for training and validation models
MLPdrop_extended = MLPdrop_extended(dimx=28*28,hidden1=256, hidden2=128, hidden3=64, nlabels=10,epochs=10,lr=1e-3)
MLPdrop_extended.trainloop(trainloader,validloader)

plt.plot(MLPdrop_extended.loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

plt.plot(MLPdrop_extended.valid_loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

Training loss after 0 epochs: -0.689089
Training loss after 1 epochs: -0.763342
Training loss after 2 epochs: -0.819389
Training loss after 3 epochs: -0.824464
Training loss after 4 epochs: -0.829106
Training loss after 5 epochs: -0.835652
Training loss after 6 epochs: -0.833999
Training loss after 7 epochs: -0.840516
Training loss after 8 epochs: -0.842713
Training loss after 9 epochs: -0.847611

Text(0, 0.5, 'Loss')

train_accuracy = MLPdrop_extended.eval_performance(trainloader)
valid_accuracy = MLPdrop_extended.eval_performance(validloader)
test_accuracy = MLPdrop_extended.eval_performance(testloader)
print("Training performance accuracy for this model is", float(train_accuracy))
print("Validation performance accuracy is", float(train_accuracy))
print("Testing performance accuracy is", float(test_accuracy))

Training performance accuracy for this model is 0.8526500463485718
Validation performance accuracy is 0.8526500463485718
Testing performance accuracy is 0.8341958522796631

class MLPdrop_1(nn.Module):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels):
        super().init()
        self.L1 = nn.Linear(dimx,hidden1)
        self.L2 = nn.Linear(hidden1,hidden2)
        self.L3 = nn.Linear(hidden2,hidden3)
        self.L4 = nn.Linear(hidden3,nlabels)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1) # DROPOUT PROBABILITY 0.1
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      # pass input tensor again through each operation above
      x = self.L1(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L2(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L3(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L4(x)
      x = self.softmax(x)
      return x

class MLPdrop_1_extended(MLP):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels,epochs=30,lr=0.001):

        super().__init__(dimx,hidden1,hidden2,hidden3,nlabels)

        self.lr = lr

        self.optim = optim.Adam(self.parameters(), self.lr)

        self.epochs = epochs

        self.criterion = nn.NLLLoss()

        self.loss_during_training = []
        self.valid_loss_during_training = []


    def trainloop(self,trainloader,validloader):
            for e in range(int(self.epochs)):
                running_loss = 0.
                for images, labels in trainloader:
                    self.optim.zero_grad()

                    # forward pass
                    out = self.forward(images.view(images.shape[0], -1))
                    loss = self.criterion(out,labels)
                    running_loss += loss.item()

                    # backwardpass
                    loss.backward() # do backpropagation
                    self.optim.step()

                self.loss_during_training.append(running_loss/len(trainloader))
                self.valid_loss_during_training.append(running_loss/len(validloader))

                if(e % 1 == 0):
                    print("Training loss after %d epochs: %f"
                          %(e,self.loss_during_training[-1]))

    def eval_performance(self,loader):
        loss = 0
        accuracy = 0

        # Turn off gradients for validation
        with torch.no_grad():

            # set model to eval mode (turn off dropout mode)
            self.eval()

            #validation pass
            for images,labels in loader:
                logprobs = self.forward(images.view(images.shape[0], -1))

                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(images.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))

        # set model back to train mode (turn dropout mode back on)
        self.train()

        return accuracy/len(loader)

# plot evolution for training and validation models
MLPdrop_first = MLPdrop_1_extended(dimx=28*28,hidden1=256, hidden2=128, hidden3=64, nlabels=10,epochs=10,lr=1e-3)
MLPdrop_first.trainloop(trainloader,validloader)

plt.plot(MLPdrop_first.loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

Training loss after 0 epochs: -0.665202
Training loss after 1 epochs: -0.709505
Training loss after 2 epochs: -0.712515
Training loss after 3 epochs: -0.713740
Training loss after 4 epochs: -0.727236
Training loss after 5 epochs: -0.738436
Training loss after 6 epochs: -0.743407
Training loss after 7 epochs: -0.750551
Training loss after 8 epochs: -0.751758
Training loss after 9 epochs: -0.755890

Text(0, 0.5, 'Loss')

# compute train, validation, and test performance
# evaluate train, validation, and test performance

train_accuracy = MLPdrop_first.eval_performance(trainloader)
valid_accuracy = MLPdrop_first.eval_performance(validloader)
test_accuracy = MLPdrop_first.eval_performance(testloader)
print("Training performance accuracy for this model is", float(train_accuracy))
print("Validation performance accuracy is", float(train_accuracy))
print("Testing performance accuracy is", float(test_accuracy))

Training performance accuracy for this model is 0.7559259533882141
Validation performance accuracy is 0.7559259533882141
Testing performance accuracy is 0.7434315085411072

class MLPdrop_2(nn.Module):

    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels):
        super().init()
        self.L1 = nn.Linear(dimx,hidden1)
        self.L2 = nn.Linear(hidden1,hidden2)
        self.L3 = nn.Linear(hidden2,hidden3)
        self.L4 = nn.Linear(hidden3,nlabels)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3) # DROPOUT PROBABILITY 0.3
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      x = self.L1(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L2(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L3(x)
      x = self.relu(x)
      x = self.dropout(x)
      x = self.L4(x)
      x = self.softmax(x)
      return x

class MLPdrop_2_extended(MLP):
    def __init__(self,dimx,hidden1,hidden2,hidden3,nlabels,epochs=30,lr=0.001):

        super().__init__(dimx,hidden1,hidden2,hidden3,nlabels)

        self.lr = lr #Learning Rate

        self.optim = optim.Adam(self.parameters(), self.lr)

        self.epochs = epochs

        self.criterion = nn.NLLLoss()

        self.loss_during_training = []
        self.valid_loss_during_training = []

    def trainloop(self,trainloader,validloader):
            for e in range(int(self.epochs)):
                running_loss = 0.
                for images, labels in trainloader:
                    self.optim.zero_grad()

                    # forward pass
                    out = self.forward(images.view(images.shape[0], -1))
                    loss = self.criterion(out,labels) # compare output with the labels
                    running_loss += loss.item() # update the loss

                    # backwardpass
                    loss.backward() # do backpropagation
                    self.optim.step()

                self.loss_during_training.append(running_loss/len(trainloader))
                self.valid_loss_during_training.append(running_loss/len(validloader))
                if(e % 1 == 0):
                    print("Training loss after %d epochs: %f"
                          %(e,self.loss_during_training[-1]))

    def eval_performance(self,loader):
        loss = 0
        accuracy = 0

        # Turn off gradients for validation
        with torch.no_grad():

            # set model to eval mode
            self.eval()

            #validation pass
            for images,labels in loader:
                logprobs = self.forward(images.view(images.shape[0], -1))

                top_p, top_class = logprobs.topk(1, dim=1)
                equals = (top_class == labels.view(images.shape[0], 1))
                accuracy += torch.mean(equals.type(torch.FloatTensor))

        # set model back to train mode
        self.train()

        return accuracy/len(loader)

# plot evolution for training and validation models
MLPdrop_second = MLPdrop_2_extended(dimx=28*28,hidden1=256, hidden2=128, hidden3=64, nlabels=10,epochs=10,lr=1e-3)
MLPdrop_second.trainloop(trainloader,validloader)

plt.plot(MLPdrop_second.loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

plt.plot(MLPdrop_second.valid_loss_during_training,'-b',label='BCE Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')

Training loss after 0 epochs: -0.696890
Training loss after 1 epochs: -0.792698
Training loss after 2 epochs: -0.815323
Training loss after 3 epochs: -0.825103
Training loss after 4 epochs: -0.834248
Training loss after 5 epochs: -0.838383
Training loss after 6 epochs: -0.834520
Training loss after 7 epochs: -0.835742
Training loss after 8 epochs: -0.839225
Training loss after 9 epochs: -0.834637

Text(0, 0.5, 'Loss')

# compute train, validation, and test performance
# evaluate train, validation, and test performance
train_accuracy = MLPdrop_second.eval_performance(trainloader)
valid_accuracy = MLPdrop_second.eval_performance(validloader)
test_accuracy = MLPdrop_second.eval_performance(testloader)
print("Training performance accuracy for this model is", float(train_accuracy))
print("Validation performance accuracy is", float(train_accuracy))
print("Testing performance accuracy is", float(test_accuracy))

Training performance accuracy for this model is 0.8151189684867859
Validation performance accuracy is 0.8151189684867859
Testing performance accuracy is 0.8016520738601685

Lab 2 (Part 2): Regularizing MLPs¶

Part I. Download FMNIST with `torchvision`¶

Part II. Visualize overfiting¶

Implement Early Stopping¶

Part III. Using Dropout Regularization¶

Lab 2 (Part 2): Regularizing MLPs¶

Part I. Download FMNIST with torchvision¶

Part II. Visualize overfiting¶

Implement Early Stopping¶

Part III. Using Dropout Regularization¶

Part I. Download FMNIST with `torchvision`¶