import torch
from torch import nn
import numpy as np
import torchvision
from torchvision import datasets
import torchvision.transforms as transforms
from torch import optim
import time

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

path_to_folder = '/content/drive/My Drive/MM + PPCA/reduced_celebA/'

def get_celeba(batch_size, dataset_directory):

    train_transformation = transforms.Compose([
        transforms.Resize((64, 64)),        # THE IMAGES ARE RE-SCALED TO 64x64
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])
    train_dataset = torchvision.datasets.ImageFolder(dataset_directory + 'celeba', train_transformation)


    # Use sampler for randomization
    training_sampler = torch.utils.data.SubsetRandomSampler(range(len(train_dataset)))

    # Prepare Data Loaders for training and validation
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=training_sampler)

    return trainloader

# load the database in a single trainloader
trainloader = get_celeba(32,path_to_folder)

def imshow(img):
    img = img / 2 + 0.5     # unnormalize to plot
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

train = iter(trainloader)

i,l = next(train)

imshow(i[0,:,:,:])

imshow(torchvision.utils.make_grid(i))

class encoder(nn.Module):

    def __init__(self,dimz,channels=3,var_x=0.1):


        super().__init__()

        self.dimz = dimz    #dimz is k, the dimension of the latent space

        # self.conv1 is a convolutional layer, with 32 output channels, kernel size 4, stride 2,
        # and padding 1

        self.conv1 = nn.Conv2d(in_channels=channels, out_channels=32,
                               kernel_size=4, stride=2, padding=1)

        self.relu = nn.ReLU()

        # self.conv2 is a convolutional layer, with 32 output channels, kernel size 4, stride 2,
        # and padding 1

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32,
                               kernel_size=4, stride=2, padding=1)

        # self.conv3 is a convolutional layer, with 64 output channels, kernel size 4, stride 2,
        # and padding 1

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64,
                               kernel_size=4, stride=2, padding=1)

        # self.conv4 is a convolutional layer, with 64 output channels, kernel size 4, stride 2,
        # and padding 1

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64,
                               kernel_size=4, stride=2, padding=1)

        # self.conv5 is a convolutional layer, with 256 output channels, kernel size 4, stride 1,
        # and padding 0

        self.conv5 = nn.Conv2d(in_channels=64, out_channels=256,
                               kernel_size=4, stride=1, padding=0)


        # self.linear is a linear layer with dimz*2 outputs
        self.linear = nn.Linear(in_features = 256, out_features = self.dimz * 2)

        self.softplus = nn.Softplus()

    def forward(self,x):

        # The forward method to project and image into a 2dimz dimensional vector

        z = self.relu(self.conv1(x))
        z = self.relu(self.conv2(z))
        z = self.relu(self.conv3(z))
        z = self.relu(self.conv4(z))
        z = self.relu(self.conv5(z))
        # Transform z into a 256-dim vector
        z = torch.squeeze(z)
        z = self.linear(z)

        return z

    def encode_and_sample(self,x,flag_sample=True):

        # This methods compute both the posterior mean and variance
        # Also obtain a sample from the posterior using the
        # reparameterization trick.

        # We obtain the encoder projection using the forward method

        z = self.forward(x)

        # The mean is the first dimz components of the forward output

        mu = z[:, :self.dimz]

        # compute the variance from the last dimz components using a
        # soft plus
        var = self.softplus(0.5 * z[:, self.dimz:])

        sample = None

        if(flag_sample==True):

            eps = torch.randn_like(var)

            sample = mu + eps*(var**0.5)

        return mu,var,sample

encoder_obj = encoder(dimz = 2)
mu_z, var_z, sample = encoder_obj.encode_and_sample(i)

print("mu_z shape: {}".format(mu_z.detach().numpy().shape))
print("mu_z of image 0:")
print(mu_z[0, :])

print("\nvar_z shape: {}".format(var_z.detach().numpy().shape))
print("var_z of image 0:")
print(var_z[0, :])

print("\nSample shape: {}".format(sample.detach().numpy().shape))

mu_z shape: (32, 2)
mu_z of image 0:
tensor([-0.0174, -0.0021], grad_fn=<SliceBackward0>)

var_z shape: (32, 2)
var_z of image 0:
tensor([0.6916, 0.7035], grad_fn=<SliceBackward0>)

Sample shape: (32, 2)

class decoder(nn.Module):

    def __init__(self,dimz,channels=3,var_x=0.1):

        super().__init__()

        # We expand z into a 256 dimensional vector

        self.linear = nn.Linear(dimz,256)

        self.relu = nn.ReLU()

        self.tanh = nn.Tanh()

        # self.tconv1 is a convolutional layer, with 64 output channels, kernel size 4, stride 1,
        # and padding 0

        self.tconv1 = nn.ConvTranspose2d(256, 64, kernel_size=4, stride=1,padding=0)

        # self.tconv2 is a convolutional layer, with 64 output channels, kernel size 4, stride 2,
        # and padding 1

        self.tconv2 = nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2,padding=1)

        # self.tconv3 is a convolutional layer, with 32 output channels, kernel size 4, stride 2,
        # and padding 1

        self.tconv3 = nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2,padding=1)

        # self.tconv3 is a convolutional layer, with 32 output channels, kernel size 4, stride 2,
        # and padding 1

        self.tconv4 = nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2,padding=1)

        # self.tconv3 is a convolutional layer, with channels output channels, kernel size 4, stride 2,
        # and padding 1

        self.tconv5 = nn.ConvTranspose2d(32, channels, kernel_size=4, stride=2,padding=1)

    def forward(self,z):

        x = self.relu(self.linear(z).view(-1,256,1,1))
        x = self.relu(self.tconv1(x))
        x = self.relu(self.tconv2(x))
        x = self.relu(self.tconv3(x))
        x = self.relu(self.tconv4(x))
        x = self.tanh(self.tconv5(x))
        return x

    def decode(self,z):

        # This function simply calls the forward method

        return self.forward(z)

my_dec = decoder(dimz=2)

x_mean = my_dec.decode(mu_z).detach()

imshow(i[0,:,:,:])

imshow(x_mean[0,:,:,:])

def eval_Gaussian_LL(x,mu_x,var_x):

    # x is a mini-batch of images. It has dimension [Batch,3,dimx,dimx]

    # mu_x is a mini-batch of reconstructed images. It has dimension [Batch,3,dimx,dimx]

    # var_x is a torch constant

    D = x.shape[1] * x.shape[2] * x.shape[3]   # Dimension of the image

    x = x.reshape(-1, D)

    mu_x = mu_x.reshape(-1, D)

    var_x = torch.ones_like(mu_x) * var_x

    # Constant term in the gaussian distribution
    cnt = D * np.log(2 * np.pi) + torch.sum(torch.log(var_x), dim=-1)

    # log-likelihood per datapoint

    logp_data = -0.5 * (cnt + torch.sum((x - mu_x) * var_x ** -1 * (x - mu_x), dim=-1))

    # Accumulated Gaussian log-likelihood for all datapoints in the batch
    logp = torch.sum(logp_data)

    return logp,logp_data

var_x = 0.1

logp,logp_data = eval_Gaussian_LL(i,x_mean,var_x)

print(logp)

plt.plot(np.arange(0,32),logp_data)

tensor(-653016.)

[<matplotlib.lines.Line2D at 0x7f25f2adbdd0>]

class VAE(nn.Module):

    def __init__(self,dimz,channels=3,var_x=0.1):

        super().__init__()

        self.var_x = var_x

        self.dimz = dimz

        # We create an encoder network

        self.encoder = encoder(self.dimz)

        # We create a decoder network

        self.decoder = decoder(self.dimz)

    def forward(self,x):

        # In the forward method, we return the mean and variance
        # given by the encoder network and also the reconstruction mean
        # given by the decoder network using a sample from the
        # encoder's posterior distribution.

        mu_z,var_z,sample_z = self.encoder.encode_and_sample(x)

        # Decoder provides the mean of the reconstruction

        mu_x = self.decoder.decode(sample_z)

        return mu_x,mu_z,var_z

    # Reconstruction + KL divergence losses summed over all elements and batch

    def loss_function(self, x, mu_x, mu_z, var_z):

        # We evaluate the loglikelihood in the batch using the function provided above

        logp,_ = eval_Gaussian_LL(x, mu_x, self.var_x)

        # KL divergence between q(z) and N()
        # see Appendix B from VAE paper:
        # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
        # https://arxiv.org/abs/1312.6114

        KLz = -0.5 * torch.sum(1 + torch.log(var_z) - mu_z.pow(2) - var_z)

        # To maximize ELBO we minimize loss (-ELBO)
        return -logp + KLz, -logp, KLz

my_vae = VAE(dimz = 2)

print(my_vae.loss_function(i,x_mean,mu_z,var_z))

(tensor(653017.8750, grad_fn=<AddBackward0>), tensor(653016.), tensor(1.8520, grad_fn=<MulBackward0>))

class VAE_extended(VAE):

    def __init__(self, dimz=2,  channels=3, var_x=0.1,lr=1e-3,epochs=20,save_folder='./',restore=False):

        super().__init__(dimz,channels=3,var_x=0.1)

        self.lr = lr
        self.optim = optim.Adam(self.parameters(), self.lr)
        self.epochs = epochs

        self.save_folder = save_folder

        if(restore==True):
            state_dict = torch.load(self.save_folder+'VAE_checkpoint.pth')
            self.load_state_dict(state_dict)

        self.loss_during_training = []
        self.reconstruc_during_training = []
        self.KL_during_training = []

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def trainloop(self,trainloader):

        nims = len(trainloader.dataset)

        self.train()

        for e in range(int(self.epochs)):

            train_loss = 0
            train_rec = 0
            train_kl_l = 0

            idx_batch = 0

            for images, labels in trainloader:

                images = images.to(self.device)

                self.optim.zero_grad()

                mu_x, mu_z, var_z = self.forward(images)

                loss, rec, kl_l = self.loss_function(images,mu_x, mu_z, var_z)

                loss.backward()

                train_loss += loss.item()
                train_rec += rec.item()
                train_kl_l += kl_l.item()

                self.optim.step()

                if(idx_batch%10==0):

                    torch.save(self.state_dict(), self.save_folder + 'VAE_checkpoint.pth')

                idx_batch += 1

            self.loss_during_training.append(train_loss/len(trainloader))
            self.reconstruc_during_training.append(train_rec/len(trainloader))
            self.KL_during_training.append(train_kl_l/len(trainloader))

            if(e%5==0):

                torch.save(self.state_dict(), self.save_folder + 'VAE_checkpoint.pth')
                print('Train Epoch: {} \tLoss: {:.6f}'.format(e,self.loss_during_training[-1]))


    def sample(self,num_imgs):

        with torch.no_grad():

            eps = torch.randn([num_imgs,self.dimz]).to(self.device)

            x_sample = self.decoder.decode(eps)

            return x_sample.to("cpu").detach()

path_to_folder = '/content/drive/My Drive/MM + PPCA/'
my_vae = VAE_extended(dimz = 50, var_x= 0.1, lr = 1e-3, restore = True ,save_folder = path_to_folder)

x = my_vae.sample(20)

imshow(x[0,:,:,:])

imshow(torchvision.utils.make_grid(x))

z_batch,_,_ = my_vae.encoder.encode_and_sample(i.to(my_vae.device))

x_reconstructed = my_vae.decoder.decode(z_batch).detach().to('cpu')

# We plot he original image
imshow(i[0,:,:,:])

# And the reconstruction
imshow(x_reconstructed[0,:,:,:])

# visualize the images to interpolate

img_1 = i[0,:,:,:]
img_2 = i[1,:,:,:]

imshow(img_1)

imshow(img_2)

# z_1 and z_2 are saved in z_batch, computed previously

z_1 = z_batch[0,:]
z_2 = z_batch[1,:]

# Linear interpolation

weight = torch.Tensor(1.0-np.arange(0,10,1)/10)

reconstructed_image = [my_vae.decoder.decode(z_1*w+z_2*(1-w)).to('cpu').detach() for w in weight]

# Visualize all images in the reconstructed_image list

for im in reconstructed_image:
    imshow(im.squeeze())

from matplotlib.offsetbox import OffsetImage, AnnotationBbox

def rescale(img):
    img = img / 2 + 0.5     # unnormalize to plot
    npimg = img.numpy()
    return np.transpose(npimg, (1, 2, 0))

def plot_latent_space_with_images(images,latent,xmin=-150,xmax=150,ymin=-150,ymax=150):

    # images --> Minibatch of images (numpy array!)
    # latent --> Matrix of 2D representations (numpy array!)

    f, ax = plt.subplots(1,1,figsize=(8, 8))
    # ax is a figure handle
    ax.clear()
    for i in range(len(images)):
        im = OffsetImage(rescale(images[i,:,:,:]))
        ab = AnnotationBbox(im, latent[i,:],frameon=False)
        ax.add_artist(ab)
    #We set the limits according to the maximum and minimum values found for the latent projections
    ax.set_xlim(xmin,xmax)
    ax.set_ylim(ymin,ymax)
    ax.set_title('Latent space Z with Images')

from sklearn.manifold import TSNE

# Apply TSNE over z_batch

latent_tsne = TSNE(n_components=2).fit_transform(z_batch.detach().to('cpu'))

/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
  FutureWarning,
/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:793: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
  FutureWarning,

plot_latent_space_with_images(i, latent_tsne)

# Create the model (default var_x and lr)
my_vae_trained = VAE_extended(dimz = 100, epochs = 40)

# Train the model
my_vae_trained.trainloop(trainloader)

Train Epoch: 0 	Loss: 483821.523888
Train Epoch: 5 	Loss: 128573.141827
Train Epoch: 10 	Loss: 85271.208909
Train Epoch: 15 	Loss: 65216.461050
Train Epoch: 20 	Loss: 51617.274564
Train Epoch: 25 	Loss: 42146.586013
Train Epoch: 30 	Loss: 30253.109544
Train Epoch: 35 	Loss: 19972.311340

x = my_vae_trained.sample(20)
imshow(x[0,:,:,:])
imshow(torchvision.utils.make_grid(x))

Understanding Variational Autoencoders (VAE)¶

Emilie Naples - Master in Big Data Analytics¶

Probabilistic model¶

Inference model¶

Training loss: the evidence lower bound (ELBO)¶

Part I: Loading the reduced CelebA dataset¶

A class to implement the encoder mean and variance¶

A class to implement the decoder mean¶

The variational autoencoder class¶

Incorporating a training method¶

Validate a pre-trained model¶

Train the VAE from scratch¶

Wow!!¶