**Exercise 8**

Train a Deep Neural Network on CIFAR10 image dataset

In [8]:
import tensorflow_gpu as tf
import numpy as np
import keras
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, AlphaDropout
from sklearn.model_selection import train_test_split
import os

SyntaxError: invalid syntax (<ipython-input-8-dc5907620648>, line 1)

In [2]:
class config:
    def __init__(self, n_classes = 10, n_epochs = 5, n_hidden = 20, 
                 n_neurons = 100, kernel_initializer = "he_normal", 
                 optimizer = keras.optimizers.Nadam(), 
                 activation='elu'):
        self.n_classes = n_classes
        self.n_epochs = n_epochs
        self.n_hidden = n_hidden
        self.n_neurons = n_neurons
        self.kernel_initializer = kernel_initializer
        self.activation = activation
        self.optimizer = optimizer
        self.input_shape = [32, 32, 3]
        self.loss = 'sparse_categorical_crossentropy'
        self.metrics = ['accuracy']
        
cfg = config()

# Note that our n_epochs is low (only 5), this is to keep this exercise to a reasonable time frame
# To achieve better accuracy you can raise the epochs to a large armount
# and let our Early Stopping callback do its work

In [3]:
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

X_train = x_train.astype('float32')
X_test = x_test.astype('float32')
X_train /= 255
X_test /= 255

# Split X_train into X_train and X_val
X_train, X_val, y_train, y_val = train_test_split(
   x_train, y_train, test_size=0.1)

pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_val_scaled = (X_val - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

x_train shape: (50000, 32, 32, 3)
50000 train samples
10000 test samples


In [4]:
# Setup logdir for TensorBoard
root_logdir = os.path.join(os.curdir, 'my_logs')

# Setup function to get directory for logging our current run
def get_run_logdir():
    import time
    run_id = time.strftime('run_%Y_%m_%d_%H_%M_%S')
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir() 

In [5]:
# Model Checkpoint callback incase of crash
checkpoint_cb = keras.callbacks.ModelCheckpoint('simple_mlp.h5',
                                               save_best_only=True)

# Early Stopping callback
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,
                                                 restore_best_weights=True)

# TensorBoard callback
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [6]:
# Create baseline model with He Initialization, elu activation, and Nadam optimization

model = Sequential()
model.add(Flatten(input_shape=cfg.input_shape))
for layer in range(cfg.n_hidden):
    model.add(Dense(cfg.n_neurons, activation=cfg.activation, 
                    kernel_initializer=cfg.kernel_initializer))
model.add(Dense(10, activation='softmax'))
model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)
model.summary()







_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               307300    
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_6 (Dense)              (None, 100)               10100     
____

In [7]:
model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, 
         validation_data = (X_val_scaled, y_val),
         callbacks = [checkpoint_cb, 
                     early_stopping_cb, 
                     tensorboard_cb])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 45000 samples, validate on 5000 samples




InternalError: cudaGetDevice() failed. Status: CUDA driver version is insufficient for CUDA runtime version

In [8]:
# Load TensorBoard to port 6006

%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 5612.

In [9]:
# Create same model, this time adding Batch Normalization before each hidden layer

model = Sequential()
model.add(Flatten(input_shape=cfg.input_shape))
for layer in range(cfg.n_hidden):
    model.add(Dense(cfg.n_neurons, activation=cfg.activation, 
                    kernel_initializer=cfg.kernel_initializer))
    model.add(BatchNormalization())
model.add(Dense(10, activation='softmax'))
model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_22 (Dense)             (None, 100)               307300    
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dense_23 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_2 (Batch (None, 100)               400       
_________________________________________________________________
dense_24 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_3 (Batch (None, 100)              

In [10]:
run_logdir = get_run_logdir() 

# TensorBoard callback
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [11]:
model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, 
         validation_data = (X_val_scaled, y_val),
         callbacks = [checkpoint_cb, 
                     early_stopping_cb, 
                     tensorboard_cb])

Train on 45000 samples, validate on 5000 samples
Epoch 1/5
  224/45000 [..............................] - ETA: 29:09 - loss: 3.3145 - accuracy: 0.0670 



Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x16288ae3348>

In [12]:
# Load TensorBoard to port 6006

%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 11252), started 0:05:18 ago. (Use '!kill 11252' to kill it.)

In [13]:
run_logdir = get_run_logdir() 

# TensorBoard callback
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)
cfg.kernel_initializer='lecun_normal'
cfg.activation='selu'

In [14]:
# Construct Selu model (note that there are no Batch Normalization layers this time)
model = Sequential()
model.add(Flatten(input_shape=cfg.input_shape))
for layer in range(cfg.n_hidden):
    model.add(Dense(cfg.n_neurons, activation=cfg.activation))
model.add(Dense(10, activation='softmax'))
model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_43 (Dense)             (None, 100)               307300    
_________________________________________________________________
dense_44 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_45 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_46 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_47 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_48 (Dense)             (None, 100)              

In [15]:
model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, 
         validation_data = (X_val_scaled, y_val),
         callbacks = [checkpoint_cb, 
                     early_stopping_cb, 
                     tensorboard_cb])

Train on 45000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1629adb5c48>

In [16]:
run_logdir = get_run_logdir() 

# TensorBoard callback
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)
cfg.kernel_initializer='lecun_normal'
cfg.activation='selu'

In [17]:
# Construct Selu model (note that there are no Batch Normalization layers this time)
# Now with alpha dropout
model = Sequential()
model.add(Flatten(input_shape=cfg.input_shape))
for layer in range(cfg.n_hidden):
    model.add(AlphaDropout(rate=0.2))
    model.add(Dense(cfg.n_neurons, activation=cfg.activation, kernel_initializer=cfg.kernel_initializer))
model.add(Dense(10, activation='softmax'))
model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 3072)              0         
_________________________________________________________________
alpha_dropout_1 (AlphaDropou (None, 3072)              0         
_________________________________________________________________
dense_64 (Dense)             (None, 100)               307300    
_________________________________________________________________
alpha_dropout_2 (AlphaDropou (None, 100)               0         
_________________________________________________________________
dense_65 (Dense)             (None, 100)               10100     
_________________________________________________________________
alpha_dropout_3 (AlphaDropou (None, 100)               0         
_________________________________________________________________
dense_66 (Dense)             (None, 100)              

In [18]:
model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, 
         validation_data = (X_val_scaled, y_val),
         callbacks = [checkpoint_cb, 
                     early_stopping_cb, 
                     tensorboard_cb])

Train on 45000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1631fa52d48>

In [19]:
# Load TensorBoard to port 6006

%load_ext tensorboard
%tensorboard --logdir=./my_logs --port=6006

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 11252), started 0:13:58 ago. (Use '!kill 11252' to kill it.)

In [20]:
# Implement 1 Cycle Scheduling
# Thanks to https://github.com/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb

batch_size = 128
K = keras.backend

class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None,
                 last_iterations=None, last_rate=None):
        self.iterations = iterations
        self.max_rate = max_rate
        self.start_rate = start_rate or max_rate / 10
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_rate = last_rate or self.start_rate / 1000
        self.iteration = 0
    def _interpolate(self, iter1, iter2, rate1, rate2):
        return ((rate2 - rate1) * (self.iteration - iter1)
                / (iter2 - iter1) + rate1)
    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.iteration < 2 * self.half_iteration:
            rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,
                                     self.max_rate, self.start_rate)
        else:
            rate = self._interpolate(2 * self.half_iteration, self.iterations,
                                     self.start_rate, self.last_rate)
            rate = max(rate, self.last_rate)
        self.iteration += 1
        K.set_value(self.model.optimizer.lr, rate)

In [21]:
run_logdir = get_run_logdir() 

# TensorBoard callback
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

In [22]:
cfg.kernel_initializer = "he_normal"
cfg.activation='elu'

model = Sequential()
model.add(Flatten(input_shape=cfg.input_shape))
for layer in range(cfg.n_hidden):
    model.add(Dense(cfg.n_neurons, activation=cfg.activation, 
                    kernel_initializer=cfg.kernel_initializer))
    model.add(BatchNormalization())
model.add(Dense(10, activation='softmax'))
model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_5 (Flatten)          (None, 3072)              0         
_________________________________________________________________
dense_85 (Dense)             (None, 100)               307300    
_________________________________________________________________
batch_normalization_21 (Batc (None, 100)               400       
_________________________________________________________________
dense_86 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_22 (Batc (None, 100)               400       
_________________________________________________________________
dense_87 (Dense)             (None, 100)               10100     
_________________________________________________________________
batch_normalization_23 (Batc (None, 100)              

In [23]:
onecycle = OneCycleScheduler(len(X_train) // batch_size * cfg.n_epochs, max_rate=0.05)
history = model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, batch_size=batch_size,
                    validation_data=(X_val_scaled, y_val),
                    callbacks=[onecycle])

Train on 45000 samples, validate on 5000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
