Example of Generative Adversarial Networks

This example was based on the Generative Adversarial Networks (GANs) with R by Dr. Bharatendra Rai. The code can be found on GITHUB or Youtube.

1. Import Libraries

library(keras) #Used a lot for Deep learning
library(EBImage)

2. Import MNist dataset

mnist <- dataset_mnist()
str(mnist)

## List of 2
##  $ train:List of 2
##   ..$ x: int [1:60000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ y: int [1:60000(1d)] 5 0 4 1 9 2 1 3 1 4 ...
##  $ test :List of 2
##   ..$ x: int [1:10000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ y: int [1:10000(1d)] 7 2 1 0 4 1 4 9 5 9 ...

mnist contains 60.000 images on train data and 10.000 images on test data;
mnist data contains representations of numbers from 0 to 9;
The height and width of these images are 28 per 28.

c(c(trainx, trainy), c(testx, testy)) %<-% mnist #the trainy and testy represent the labels
summary(trainx)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   33.32    0.00  255.00

The images have values between 0 and 255, with a mean of 33.

table(trainy)

## trainy
##    0    1    2    3    4    5    6    7    8    9 
## 5923 6742 5958 6131 5842 5421 5918 6265 5851 5949

There are 5923 images of digit “0”, and 5918 of digit “6”.

str(trainx)

##  int [1:60000, 1:28, 1:28] 0 0 0 0 0 0 0 0 0 0 ...

In this example, we will use only digit “5”, therefore:

trainx <- trainx[trainy == 5,,] #The two commas are to match the same height and width of the images.

3. Plot the first 64 images of digit 5

par(mfrow = c(8,8), mar = rep(0, 4))
for(i in 1:64) plot(as.raster(trainx[i,,], max = 255))

These are real images of handwritings that difer from person to person.

4. Data normalization (rescaled?)

a) Return to 1 line/1 column format.

par(mfrow = c(1,1)) 
trainx <- array_reshape(trainx, 
                        c(nrow(trainx), 28, 28, 1))

b) Reshape the dataset to the format that is needed for the model.

str(trainx) #Dimensions of trainx

##  num [1:5421, 1:28, 1:28, 1] 0 0 0 0 0 0 0 0 0 0 ...

c) Divide by 255, so the new trainx ranges between 0 and 1.

trainx <- trainx/255
summary(trainx)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1287  0.0000  1.0000

5. Generator Network

h <- 28  #height
w <- 28  #width
c <- 1   #Number of channels
l <- 28  #Latent dimension

a) Define generated input and output

gi <- layer_input(shape = l)  #Generated input

go <- gi %>% layer_dense(units = 32 * 14 * 14) %>%
layer_activation_leaky_relu() %>%
layer_reshape(target_shape = c(14, 14, 32)) %>%
layer_conv_2d(filters = 32, 
                kernel_size = 5,
                padding = "same") %>%
layer_activation_leaky_relu() %>%
layer_conv_2d_transpose(filters = 32,
                          kernel_size = 4,
                          strides = 2,
                          padding = "same") %>%
layer_activation_leaky_relu() %>%
layer_conv_2d(filters = 1,
              kernel_size = 5,
                activation = "tanh",
                padding = "same")

b) Define generated model

g <- keras_model(gi,go)
summary(g)

## Model: "model"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  input_1 (InputLayer)               [(None, 28)]                    0           
##                                                                                 
##  dense (Dense)                      (None, 6272)                    181888      
##                                                                                 
##  leaky_re_lu_2 (LeakyReLU)          (None, 6272)                    0           
##                                                                                 
##  reshape (Reshape)                  (None, 14, 14, 32)              0           
##                                                                                 
##  conv2d_1 (Conv2D)                  (None, 14, 14, 32)              25632       
##                                                                                 
##  leaky_re_lu_1 (LeakyReLU)          (None, 14, 14, 32)              0           
##                                                                                 
##  conv2d_transpose (Conv2DTranspose)  (None, 28, 28, 32)             16416       
##                                                                                 
##  leaky_re_lu (LeakyReLU)            (None, 28, 28, 32)              0           
##                                                                                 
##  conv2d (Conv2D)                    (None, 28, 28, 1)               801         
##                                                                                 
## ================================================================================
## Total params: 224,737
## Trainable params: 224,737
## Non-trainable params: 0
## ________________________________________________________________________________

The generated network will creat fake images;
The final shape of the fake images are (28,28,1).

6. Discriminator network

di <- layer_input(shape = c(h, w, c)) #discriminator input

do <- di %>%    #discriminator output
  layer_conv_2d(filters = 64,
                kernel_size = 4) %>%
  layer_activation_leaky_relu() %>%
  layer_flatten() %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 1,    # That is a classification layer that classifies the images in real or fake. 
              activation = "sigmoid")

a) Define discriminator model

d <- keras_model(di, do)
summary(d)

## Model: "model_1"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  input_2 (InputLayer)               [(None, 28, 28, 1)]             0           
##                                                                                 
##  conv2d_2 (Conv2D)                  (None, 25, 25, 64)              1088        
##                                                                                 
##  leaky_re_lu_3 (LeakyReLU)          (None, 25, 25, 64)              0           
##                                                                                 
##  flatten (Flatten)                  (None, 40000)                   0           
##                                                                                 
##  dropout (Dropout)                  (None, 40000)                   0           
##                                                                                 
##  dense_1 (Dense)                    (None, 1)                       40001       
##                                                                                 
## ================================================================================
## Total params: 41,089
## Trainable params: 41,089
## Non-trainable params: 0
## ________________________________________________________________________________

input_12 (InputLayer) has the same dimensions as the real and fake images.

b) Compile discriminator network

d %>% compile(optimizer = 'rmsprop',
              loss = 'binary_crossentropy')

c) Freeze weights and compile

freeze_weights(d)
gani <- layer_input(shape = l)     #GAN input

gano <- gani %>% g %>% d      #GAN output

gan <- keras_model(gani, gano)

gan %>% compile(optimizer = 'rmsprop',
                loss = 'binary_crossentropy')

d) Training the network

b <- 50
setwd("G:/O meu disco/Thesis/Software simulacao Carlos/Generative Modeling")

dir <- "gan_img"    
dir.create(dir)    #Creating directory for images

start <- 1;  dloss <- NULL;    gloss <- NULL

# a)Generate 50 fake images
for(i in 1:100) {noise <- matrix(rnorm(b*l),
                                 nrow = b,
                                 ncol = l)   #noise is a matrix with a random normal distribution of b*l
fake <- g %>% predict(noise)     #fake images

#b) Combine real and fake images

stop <- start + b - 1      #Start = 1; b = 50; stop = 50

real <- trainx[start:stop,,,]

real <- array_reshape(real, c(nrow(real), 28, 28, 1))   #reshape to the required format

rows <- nrow(real)  #rows = 50

both <- array(0, dim = c(rows*2, dim(real)[-1]))  #Storing real and fake images.    
# You use "rows*2" because you have 50 fake images and 50 real images. 
# It is "-1" because now "rows*2" will be 1st value. 

both[1:rows,,,] <- fake   # fake images are from the 1st to the 50th.
both[(rows+1):(rows*2),,,] <- real   # real images are from the 51th to 100th.

labels <- rbind(matrix(runif(b,0.9,1),   #Fake image = [0.9:1]   #matrix with random uniform distribution.
                       nrow = b,
                       ncol = 1),
                matrix(runif(b, 0, 0.1),  #Real images = [0:0.1]
                       nrow = b,
                       ncol = 1)) 

#c) Train discriminator

start <- start + b      #Start will start at 51

dloss[i] <- d %>% train_on_batch(both, labels)    #train both fake and real images in batches

#d)Train generator using GAN. Trying to fool the network.

fakeAsReal <- array(runif(b, 0, 0.1), dim = c(b, 1))
gloss[i] <- gan %>% train_on_batch(noise, fakeAsReal)

# e) Save fake images

f <- fake[1,,,]   #Saves 1st fake image from each iteration
dim(f) <- c(28, 28, 1)
image_array_save(f, path = file.path(dir, 
                                     paste0("f", i, ".png")))}

In each iteration, 50 images are being created.
If the discriminator learns to discriminate well between real and fake images, then the discriminator loss (dloss) will be low.

7. Plot loss for 100 iterations

x <- 1:100

plot(x, dloss, col = 'red', type = 'l',
     ylim = c(0,3),
     xlab = 'Iterations',
     ylab = 'Loss')    # We will plot the dloss and gloss
lines(x, gloss, col = 'black', type = 'l')
legend('topright', 
       legend = c("Disciminator Loss", "GAN Loss"),
       col = c("red", "black"), lty = 1:2, cex = 1)

IMP: The discriminator loss slightly increases with the iterations, indicating that it becomes more dificult for the discriminator to diferentiate a real image from a fake one. Therefore, it is expected that both loss come closer and closer after many iterations.

8. Fake images

setwd("G:/O meu disco/Thesis/Software simulacao Carlos/Generative Modeling/gan_img")

temp = list.files(pattern = '*.png')  #list the figures in the folder
mypic <- list()
for(i in 1:length(temp)){mypic[[i]] <- readImage(temp[[i]])}
  par(mfrow = c(10,10))
for(i in 1:length(temp)) plot(mypic[[i]])

c11 is the first iteration, c1.2 is the second iteration. c2.1 is iteration 11. Therefore there are 100 iterations. From the iteration 91 (c10.1).