cWGAN_gp.py

# -*- coding: utf-8 -*-
"""cWGAN_GP_mnist.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1hRdREHdQEc_eWi9xmFYA2aLRxChMtbjb
"""

from google.colab import drive
drive.mount('/content/gdrive')

with open('/content/gdrive/My Drive/Shoes_Generator/cWGAN/foo.txt', 'w') as f:
  f.write('Hello Google Drive!')
!cat /content/gdrive/My\ Drive/foo.txt

# Large amount of credit goes to:
# https://github.com/eriklindernoren/Keras-GAN/blob/master/wgan_gp/wgan_gp.py and
# https://github.com/eriklindernoren/Keras-GAN/blob/master/cgan/cgan.py
# which I've used as a reference for this implementation
# Author: Hanling Wang
# Date: 2018-11-21

from __future__ import print_function, division

from keras.datasets import mnist
from keras.layers.merge import _Merge
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply
from keras.layers import BatchNormalization, Activation, ZeroPadding2D, Embedding
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D, Conv2DTranspose
from keras.models import Sequential, Model
from keras.optimizers import RMSprop
from functools import partial

import tensorflow as tf
import keras.backend as K
import glob
import cv2
import time
import os
import matplotlib.pyplot as plt
import argparse

import math

import numpy as np

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1";
os.environ["CUDA_VISIBLE_DEVICES"]="0"

config = tf.ConfigProto(device_count = {'GPU': 1 , 'CPU': 1}) 
sess = tf.Session(config=config) 
K.set_session(sess)


class RandomWeightedAverage(_Merge):
    """Provides a (random) weighted average between real and generated image samples"""
    def _merge_function(self, inputs):
        global batch_size
        alpha = K.random_uniform((batch_size, 1, 1, 1))
        return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])
        
class CWGANGP():
    def __init__(self, nb_iter=30000, batch_size=32, sample_interval=50, dataset_size=10000):
        self.img_rows = 128
        self.img_cols = 128
        self.channels = 3
        self.nclasses = 10
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 100
        self.losslog = []
        self.nb_iter = nb_iter
        self.batch_size = batch_size
        self.sample_interval = sample_interval
        self.dataset_size = dataset_size
        self.labels =['boots-ankle', 'boots-kneehigh', 'boots-midcalf', 'sandals-flats', 'shoes-athletic', 'shoes-flats', 'shoes-heels', 'shoes-loafers', 'shoes-oxfords', 'slippers-flats']
        
        # Following parameter and optimizer set as recommended in paper
        self.n_critic = 5
        optimizer = RMSprop(lr=0.00005)

        # Build the generator and critic
        self.generator = self.build_generator()
        self.critic = self.build_critic()

        #-------------------------------
        # Construct Computational Graph
        #       for the Critic
        #-------------------------------

        # Freeze generator's layers while training critic
        self.generator.trainable = False

        # Image input (real sample)
        real_img = Input(shape=self.img_shape)

        # Noise input
        z_disc = Input(shape=(self.latent_dim,))
        label = Input(shape=(1,))
        # Generate image based of noise (fake sample)
        fake_img = self.generator([z_disc, label])

        # Discriminator determines validity of the real and fake images
        fake = self.critic([fake_img, label])
        valid = self.critic([real_img, label])

        # Construct weighted average between real and fake images
        interpolated_img = RandomWeightedAverage()([real_img, fake_img])
        # Determine validity of weighted sample
        validity_interpolated = self.critic([interpolated_img, label])

        # Use Python partial to provide loss function with additional
        # 'averaged_samples' argument
        partial_gp_loss = partial(self.gradient_penalty_loss,
                          averaged_samples=interpolated_img)
        partial_gp_loss.__name__ = 'gradient_penalty' # Keras requires function names

        self.critic_model = Model(inputs=[real_img, label, z_disc],
                            outputs=[valid, fake, validity_interpolated])
        self.critic_model.compile(loss=[self.wasserstein_loss,
                                              self.wasserstein_loss,
                                              partial_gp_loss],
                                        optimizer=optimizer,
                                        loss_weights=[1, 1, 10])
        #-------------------------------
        # Construct Computational Graph
        #         for Generator
        #-------------------------------

        # For the generator we freeze the critic's layers
        self.critic.trainable = False
        self.generator.trainable = True

        # Sampled noise for input to generator
        z_gen = Input(shape=(100,))
        # add label to the input
        label = Input(shape=(1,))
        # Generate images based of noise
        img = self.generator([z_gen, label])
        # Discriminator determines validity
        valid = self.critic([img, label])
        # Defines generator model
        self.generator_model = Model([z_gen, label], valid)
        self.generator_model.compile(loss=self.wasserstein_loss, optimizer=optimizer)
        
        
    def gradient_penalty_loss(self, y_true, y_pred, averaged_samples):
        """
        Computes gradient penalty based on prediction and weighted real / fake samples
        """
        gradients = K.gradients(y_pred, averaged_samples)[0]
        # compute the euclidean norm by squaring ...
        gradients_sqr = K.square(gradients)
        #   ... summing over the rows ...
        gradients_sqr_sum = K.sum(gradients_sqr,
                                  axis=np.arange(1, len(gradients_sqr.shape)))
        #   ... and sqrt
        gradient_l2_norm = K.sqrt(gradients_sqr_sum)
        # compute lambda * (1 - ||grad||)^2 still for each single sample
        gradient_penalty = K.square(1 - gradient_l2_norm)
        # return the mean as loss over all the batch samples
        return K.mean(gradient_penalty)


    def wasserstein_loss(self, y_true, y_pred):
        return K.mean(y_true * y_pred)

    def build_generator(self):

        generator = Sequential()
        depth = 816
        dim = 2
        dropout_rate = 0.5

        # In: 100 noise variables
        # Out: dim x dim x depth
        generator.add(Dense(dim*dim*depth, input_dim=100))
        generator.add(Reshape((dim, dim, depth)))
        generator.add(BatchNormalization(momentum=0.9))
        generator.add(LeakyReLU(alpha=0.2))

        # In: dim x dim x depth
        # Out: 2*dim x 2*dim x depth/2
        generator.add(UpSampling2D())
        generator.add(Conv2DTranspose(filters=int(depth/2), kernel_size=5, strides=2, padding='same'))
        generator.add(BatchNormalization(momentum=0.9))
        generator.add(Dropout(rate=dropout_rate))
        generator.add(LeakyReLU(alpha=0.2))

        # In: 2*dim x 2*dim x depth/2
        # Out: 4*dim x 4*dim x depth/4
        generator.add(UpSampling2D())
        generator.add(Conv2DTranspose(filters=int(depth/4), kernel_size=5, strides=2, padding='same'))
        generator.add(BatchNormalization(momentum=0.9))
        generator.add(Dropout(rate=dropout_rate))
        generator.add(LeakyReLU(alpha=0.2))


        # In: 4*dim x 4*dim x depth/4
        # Out: 8*dim x 8*dim x depth/8
        generator.add(UpSampling2D())
        generator.add(Conv2DTranspose(filters=int(depth/8), kernel_size=5, strides=2, padding='same'))
        generator.add(BatchNormalization(momentum=0.9))
        generator.add(Dropout(rate=dropout_rate))
        generator.add(LeakyReLU(alpha=0.2))

        # Out: 128 x 128 x 3 color image
        generator.add(Conv2DTranspose(filters=3, kernel_size=5, padding='same'))
        generator.add(Activation('tanh'))

        print("GENERATOR NETWORK SHAPE")
        generator.summary()
        
        noise = Input(shape=(self.latent_dim,))
        label = Input(shape=(1,), dtype='int32')
        label_embedding = Flatten()(Embedding(self.nclasses, self.latent_dim)(label))
        
        model_input = multiply([noise, label_embedding])
        img = generator(model_input)

        return Model([noise, label], img)

    def build_critic(self):
        
        discr = Sequential()
        depth = 32
        
        # In: 102 x 135 x 3, depth = 1
        discr.add(Conv2D(filters=depth*1, kernel_size=5, strides=2,data_format='channels_last', padding='same', input_shape=self.img_shape))
        discr.add(LeakyReLU(alpha=0.2))

        discr.add(Conv2D(filters=depth*2, kernel_size=5, strides=2, padding='same'))
        #discr.add(BatchNormalization(momentum=0.9))
        discr.add(LeakyReLU(alpha=0.2))

        discr.add(Conv2D(filters=depth*4, kernel_size=5, strides=2, padding='same'))
        #discr.add(BatchNormalization(momentum=0.9))
        discr.add(LeakyReLU(alpha=0.2))

        discr.add(Conv2D(filters=depth*8, kernel_size=5, strides=2, padding='same'))
        #discr.add(BatchNormalization(momentum=0.9))
        discr.add(LeakyReLU(alpha=0.2))
        # Shape depth*8*8*8
        discr.add(Flatten())
        

        print("DISCRIMINATOR NETWORK SHAPE")
        discr.summary()

        img = Input(shape=self.img_shape)
        label = Input(shape=(1,), dtype='int32')
        
        label_embedding = Flatten()(Embedding(self.nclasses, depth*8*8*8)(label))
        flat_img = discr(img)

        dense_input = multiply([flat_img, label_embedding])        
        
        validity = Dense(1)(dense_input)

        Model([img, label], validity).summary()

        return Model([img, label], validity)
      
      
    def createTS(self):
        print("Loading images ... \n")

        images = np.zeros((self.dataset_size, self.img_rows, self.img_cols, self.channels), dtype=np.float32)
        y = np.zeros((self.dataset_size, 1), dtype=np.float32)
        input_directory = '/content/gdrive/My Drive/Shoes_Generator/dataset/'
        print("Pre-processing images...")
        j = 0
        for k, label in enumerate(self.labels):
            print("Loading {} ...".format(label))
            label_directory = input_directory + label
            i = 0
            for img in glob.glob("{}/*.jpg".format(label_directory)):
                try:
                    shoe = cv2.imread(img)
                    shoe = cv2.resize(shoe, (128, 128))

                    #Normalize image between -1 and 1
                    channel_0 = (shoe[:,:,0].astype('float32') - 255/2)/(255/2)
                    channel_1 = (shoe[:,:,1].astype('float32') - 255/2)/(255/2)
                    channel_2 = (shoe[:,:,2].astype('float32') - 255/2)/(255/2)
                    norm_shoe = np.stack([channel_0, channel_1, channel_2], axis=-1)
                    images[j,:,:,:]= norm_shoe
                    y[j] = k
                    i += 1
                    j += 1
                    if j%200 == 0:
                        print('Loaded {} images out of {}'.format(j, self.dataset_size))
                except:
                    print("Passed: ",j)
                    pass

                if i == int(self.dataset_size/self.nclasses):
                    break
            print('{} images loaded from {}'.format(i, label))

        #print("Image size: ",images[10,:,:,:].shape)
        #print("Image example: ", images[10,30:40,30:40,0])
        #print("Rescaled image", images[10,30:40,30:40,0] * 255/2 + 255/2)

        return images, y
    

    def train(self):

        
        X_train, y_train = self.createTS()
        t = time.time()
        
        # Adversarial ground truths
        valid = -np.ones((self.batch_size, 1))
        fake =  np.ones((self.batch_size, 1))
        dummy = np.zeros((self.batch_size, 1)) # Dummy gt for gradient penalty
        for iteration in range(self.nb_iter):
            for _ in range(self.n_critic):

                # ---------------------
                #  Train Discriminator
                # ---------------------

                # Select a random batch of images
                idx = np.random.randint(0, X_train.shape[0], self.batch_size)
                imgs, label= X_train[idx,:,:,:], y_train[idx]
                
                # Sample generator input
                noise = np.random.normal(0, 1, (self.batch_size, self.latent_dim))
                with tf.device('/device:GPU:0'):
                  # Train the critic
                  d_loss = self.critic_model.train_on_batch([imgs, label, noise], [valid, fake, dummy])

            # ---------------------
            #  Train Generator
            # ---------------------
            # sampled_labels = np.zeros((self.batch_size, 1))
            sampled_labels = np.random.randint(0, self.nclasses, self.batch_size).reshape(-1, 1)
            with tf.device('/device:GPU:0'):
              g_loss = self.generator_model.train_on_batch([noise, sampled_labels], valid)

            # Plot the progress
            print ("%d [D loss: %f] [G loss: %f]" % (iteration, d_loss[0], g_loss))
            self.losslog.append([d_loss[0], g_loss])
            
            # If at save interval => save generated image samples
            if iteration % self.sample_interval == 0:
                self.plot_images(iteration, t)                
                self.generator.save('/content/gdrive/My Drive/Shoes_Generator/cWGAN/models/generator_{}.h5'.format(iteration))
                self.critic.save('/content/gdrive/My Drive/Shoes_Generator/cWGAN/models/critic_{}.h5'.format(iteration))
                with open('/content/gdrive/My Drive/Shoes_Generator/cWGAN/loss.log', 'w') as f:
                    f.writelines('d_loss, g_loss\n')
                    for each in self.losslog:
                        f.writelines('%s, %s\n'%(each[0], each[1]))

    def plot_images(self, iteration, time, label=-1, show=False):
       
        samples = 4
        image_dir = "/content/gdrive/My Drive/Shoes_Generator/cWGAN/images/TEST{}".format(time)
        if not os.path.exists(image_dir):
            os.makedirs(image_dir)
        filename = image_dir + "/shoes_{}.png".format(iteration)
        # Generate noise and create new fake image
        noise = np.random.standard_normal(size=[samples, 100])
        if label == -1:
            sampled_labels = np.array([1,3,4,8]).reshape(-1, 1)
        else:
            sampled_labels = np.ones((1,samples)) * label
            sampled_labels = sampled_labels.reshape(-1, 1)
        images = self.generator.predict([noise, sampled_labels])

        plt.figure(figsize=(10,10))
        for i in range(images.shape[0]):
            plt.subplot(math.sqrt(samples), math.sqrt(samples), i+1)
            image = images[i, :, :, :]
            image = np.reshape(image, [self.img_rows, self.img_cols, 3])
            image = image * 255/2 + 255/2  # Rescale pixel values
            plt.imshow(image.astype(np.uint8))
            plt.axis('off')
        plt.tight_layout()
        plt.savefig(filename)
        if show:
          plt.show()
        plt.close('all')
     
    def test(self, model_name, label):
        file_name = '/content/gdrive/My Drive/Shoes_Generator/cWGAN/models/'+model_name
        self.generator = load_model(file_name)
        for i,l in enumerate(self.labels):
            if label == l:
                self.plot_images(1, int(time.time()), label=i, show=True)
                return
        print("Insert a valid label")
      

if __name__ == '__main__':
    iterations = 30000
    batch_size = 32
    sample_interval = 50
    
    
    parser = argparse.ArgumentParser(description='Conditional WGAN-GP to create shoes')
    parser.add_argument("--mode", type=str, default='train',
                        help="Mode : test or train")
    parser.add_argument("--model", type=str, default=None,
                        help="In test mode, path to the saved model (Eg generator_xx.h5)")    
    parser.add_argument("--label", type=str, default=None,
                        help="In test mode, label of the shoes to be created, choose among the following labels : boots-ankle, boots-kneehigh, boots-midcalf, sandals-flats, shoes-athletic, shoes-flats, shoes-heels, shoes-loafers, shoes-oxfords, slippers-flats ")

    args = parser.parse_args()
    wgan = CWGANGP(iterations, batch_size, sample_interval)
    if args.mode == 'train':
      wgan.train()
    else:
      print("Testing")
      wgan.test(args.model, args.label)