GeronBook/Ch11/Exercises.ipynb

960 lines
45 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Exercise 8**\n",
"\n",
"Train a Deep Neural Network on CIFAR10 image dataset"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import keras\n",
"from keras.datasets import cifar10\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, AlphaDropout\n",
"from sklearn.model_selection import train_test_split\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class config:\n",
" def __init__(self, n_classes = 10, n_epochs = 5, n_hidden = 20, \n",
" n_neurons = 100, kernel_initializer = \"he_normal\", \n",
" optimizer = keras.optimizers.Nadam(), \n",
" activation='elu'):\n",
" self.n_classes = n_classes\n",
" self.n_epochs = n_epochs\n",
" self.n_hidden = n_hidden\n",
" self.n_neurons = n_neurons\n",
" self.kernel_initializer = kernel_initializer\n",
" self.activation = activation\n",
" self.optimizer = optimizer\n",
" self.input_shape = [32, 32, 3]\n",
" self.loss = 'sparse_categorical_crossentropy'\n",
" self.metrics = ['accuracy']\n",
" \n",
"cfg = config()\n",
"\n",
"# Note that our n_epochs is low (only 5), this is to keep this exercise to a reasonable time frame\n",
"# To achieve better accuracy you can raise the epochs to a large armount\n",
"# and let our Early Stopping callback do its work"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"x_train shape: (50000, 32, 32, 3)\n",
"50000 train samples\n",
"10000 test samples\n"
]
}
],
"source": [
"# The data, split between train and test sets:\n",
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
"print('x_train shape:', x_train.shape)\n",
"print(x_train.shape[0], 'train samples')\n",
"print(x_test.shape[0], 'test samples')\n",
"\n",
"X_train = x_train.astype('float32')\n",
"X_test = x_test.astype('float32')\n",
"X_train /= 255\n",
"X_test /= 255\n",
"\n",
"# Split X_train into X_train and X_val\n",
"X_train, X_val, y_train, y_val = train_test_split(\n",
" x_train, y_train, test_size=0.1)\n",
"\n",
"pixel_means = X_train.mean(axis=0, keepdims=True)\n",
"pixel_stds = X_train.std(axis=0, keepdims=True)\n",
"X_train_scaled = (X_train - pixel_means) / pixel_stds\n",
"X_val_scaled = (X_val - pixel_means) / pixel_stds\n",
"X_test_scaled = (X_test - pixel_means) / pixel_stds"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Setup logdir for TensorBoard\n",
"root_logdir = os.path.join(os.curdir, 'my_logs')\n",
"\n",
"# Setup function to get directory for logging our current run\n",
"def get_run_logdir():\n",
" import time\n",
" run_id = time.strftime('run_%Y_%m_%d_%H_%M_%S')\n",
" return os.path.join(root_logdir, run_id)\n",
"\n",
"run_logdir = get_run_logdir() "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Model Checkpoint callback incase of crash\n",
"checkpoint_cb = keras.callbacks.ModelCheckpoint('simple_mlp.h5',\n",
" save_best_only=True)\n",
"\n",
"# Early Stopping callback\n",
"early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,\n",
" restore_best_weights=True)\n",
"\n",
"# TensorBoard callback\n",
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:3341: The name tf.log is deprecated. Please use tf.math.log instead.\n",
"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten_1 (Flatten) (None, 3072) 0 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 100) 307300 \n",
"_________________________________________________________________\n",
"dense_2 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_3 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_4 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_5 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_6 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_7 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_8 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_9 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_10 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_11 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_12 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_13 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_14 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_15 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_16 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_17 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_18 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_19 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_20 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_21 (Dense) (None, 10) 1010 \n",
"=================================================================\n",
"Total params: 500,210\n",
"Trainable params: 500,210\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"# Create baseline model with He Initialization, elu activation, and Nadam optimization\n",
"\n",
"model = Sequential()\n",
"model.add(Flatten(input_shape=cfg.input_shape))\n",
"for layer in range(cfg.n_hidden):\n",
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
" kernel_initializer=cfg.kernel_initializer))\n",
"model.add(Dense(10, activation='softmax'))\n",
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\tensorflow_core\\python\\ops\\math_grad.py:1424: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:2741: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
"\n",
"Train on 45000 samples, validate on 5000 samples\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:850: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.\n",
"\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:853: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n",
"\n",
"Epoch 1/5\n",
"45000/45000 [==============================] - 76s 2ms/step - loss: 1.9980 - acc: 0.2697 - val_loss: 2.0319 - val_acc: 0.2578\n",
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:995: The name tf.Summary is deprecated. Please use tf.compat.v1.Summary instead.\n",
"\n",
"Epoch 2/5\n",
"45000/45000 [==============================] - 68s 2ms/step - loss: 1.8190 - acc: 0.3308 - val_loss: 1.8712 - val_acc: 0.3270\n",
"Epoch 3/5\n",
"45000/45000 [==============================] - 73s 2ms/step - loss: 1.7708 - acc: 0.3539 - val_loss: 1.8496 - val_acc: 0.3278\n",
"Epoch 4/5\n",
"45000/45000 [==============================] - 61s 1ms/step - loss: 1.7449 - acc: 0.3714 - val_loss: 1.7296 - val_acc: 0.3866\n",
"Epoch 5/5\n",
"45000/45000 [==============================] - 41s 919us/step - loss: 1.7015 - acc: 0.3888 - val_loss: 1.7038 - val_acc: 0.3828\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x1e301821f08>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
" validation_data = (X_val_scaled, y_val),\n",
" callbacks = [checkpoint_cb, \n",
" early_stopping_cb, \n",
" tensorboard_cb])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Launching TensorBoard..."
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Load TensorBoard to port 6006\n",
"\n",
"%load_ext tensorboard\n",
"%tensorboard --logdir=./my_logs --port=6006"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create same model, this time adding Batch Normalization before each hidden layer\n",
"\n",
"model = Sequential()\n",
"model.add(Flatten(input_shape=cfg.input_shape))\n",
"for layer in range(cfg.n_hidden):\n",
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
" kernel_initializer=cfg.kernel_initializer))\n",
" model.add(BatchNormalization())\n",
"model.add(Dense(10, activation='softmax'))\n",
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_logdir = get_run_logdir() \n",
"\n",
"# TensorBoard callback\n",
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
" validation_data = (X_val_scaled, y_val),\n",
" callbacks = [checkpoint_cb, \n",
" early_stopping_cb, \n",
" tensorboard_cb])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load TensorBoard to port 6006\n",
"\n",
"%load_ext tensorboard\n",
"%tensorboard --logdir=./my_logs --port=6006"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_logdir = get_run_logdir() \n",
"\n",
"# TensorBoard callback\n",
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
"\n",
"# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n",
"cfg.kernel_initializer='lecun_normal'\n",
"cfg.activation='selu'"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential_3\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten_3 (Flatten) (None, 3072) 0 \n",
"_________________________________________________________________\n",
"dense_43 (Dense) (None, 100) 307300 \n",
"_________________________________________________________________\n",
"dense_44 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_45 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_46 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_47 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_48 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_49 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_50 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_51 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_52 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_53 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_54 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_55 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_56 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_57 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_58 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_59 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_60 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_61 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_62 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_63 (Dense) (None, 10) 1010 \n",
"=================================================================\n",
"Total params: 500,210\n",
"Trainable params: 500,210\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"# Construct Selu model (note that there are no Batch Normalization layers this time)\n",
"model = Sequential()\n",
"model.add(Flatten(input_shape=cfg.input_shape))\n",
"for layer in range(cfg.n_hidden):\n",
" model.add(Dense(cfg.n_neurons, activation=cfg.activation))\n",
"model.add(Dense(10, activation='softmax'))\n",
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 45000 samples, validate on 5000 samples\n",
"Epoch 1/5\n",
"45000/45000 [==============================] - 40s 883us/step - loss: 2.1985 - accuracy: 0.2347 - val_loss: 1.8834 - val_accuracy: 0.2900\n",
"Epoch 2/5\n",
"45000/45000 [==============================] - 42s 923us/step - loss: 1.8693 - accuracy: 0.2965 - val_loss: 1.8639 - val_accuracy: 0.2964\n",
"Epoch 3/5\n",
"45000/45000 [==============================] - 39s 876us/step - loss: 1.8027 - accuracy: 0.3231 - val_loss: 1.7998 - val_accuracy: 0.3196\n",
"Epoch 4/5\n",
"45000/45000 [==============================] - 39s 859us/step - loss: 1.7604 - accuracy: 0.3451 - val_loss: 1.8129 - val_accuracy: 0.3088\n",
"Epoch 5/5\n",
"45000/45000 [==============================] - 38s 852us/step - loss: 1.7392 - accuracy: 0.3527 - val_loss: 1.6917 - val_accuracy: 0.3714\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.callbacks.History at 0x1629adb5c48>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
" validation_data = (X_val_scaled, y_val),\n",
" callbacks = [checkpoint_cb, \n",
" early_stopping_cb, \n",
" tensorboard_cb])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"run_logdir = get_run_logdir() \n",
"\n",
"# TensorBoard callback\n",
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
"\n",
"# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n",
"cfg.kernel_initializer='lecun_normal'\n",
"cfg.activation='selu'"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential_4\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten_4 (Flatten) (None, 3072) 0 \n",
"_________________________________________________________________\n",
"alpha_dropout_1 (AlphaDropou (None, 3072) 0 \n",
"_________________________________________________________________\n",
"dense_64 (Dense) (None, 100) 307300 \n",
"_________________________________________________________________\n",
"alpha_dropout_2 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_65 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_3 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_66 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_4 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_67 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_5 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_68 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_6 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_69 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_7 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_70 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_8 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_71 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_9 (AlphaDropou (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_72 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_10 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_73 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_11 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_74 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_12 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_75 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_13 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_76 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_14 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_77 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_15 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_78 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_16 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_79 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_17 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_80 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_18 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_81 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_19 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_82 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"alpha_dropout_20 (AlphaDropo (None, 100) 0 \n",
"_________________________________________________________________\n",
"dense_83 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"dense_84 (Dense) (None, 10) 1010 \n",
"=================================================================\n",
"Total params: 500,210\n",
"Trainable params: 500,210\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"# Construct Selu model (note that there are no Batch Normalization layers this time)\n",
"# Now with alpha dropout\n",
"model = Sequential()\n",
"model.add(Flatten(input_shape=cfg.input_shape))\n",
"for layer in range(cfg.n_hidden):\n",
" model.add(AlphaDropout(rate=0.2))\n",
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, kernel_initializer=cfg.kernel_initializer))\n",
"model.add(Dense(10, activation='softmax'))\n",
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 45000 samples, validate on 5000 samples\n",
"Epoch 1/5\n",
"45000/45000 [==============================] - 57s 1ms/step - loss: 2.2238 - accuracy: 0.1515 - val_loss: 92.7995 - val_accuracy: 0.1574\n",
"Epoch 2/5\n",
"45000/45000 [==============================] - 54s 1ms/step - loss: 2.1047 - accuracy: 0.1786 - val_loss: 35.0895 - val_accuracy: 0.1534\n",
"Epoch 3/5\n",
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0622 - accuracy: 0.1984 - val_loss: 26.8686 - val_accuracy: 0.1080\n",
"Epoch 4/5\n",
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0472 - accuracy: 0.2090 - val_loss: 12.6033 - val_accuracy: 0.1708\n",
"Epoch 5/5\n",
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0519 - accuracy: 0.2057 - val_loss: 17.4287 - val_accuracy: 0.1782\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.callbacks.History at 0x1631fa52d48>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
" validation_data = (X_val_scaled, y_val),\n",
" callbacks = [checkpoint_cb, \n",
" early_stopping_cb, \n",
" tensorboard_cb])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The tensorboard extension is already loaded. To reload it, use:\n",
" %reload_ext tensorboard\n"
]
},
{
"data": {
"text/plain": [
"Reusing TensorBoard on port 6006 (pid 11252), started 0:13:58 ago. (Use '!kill 11252' to kill it.)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" <iframe id=\"tensorboard-frame-50d3e8cf3006bcb9\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
" </iframe>\n",
" <script>\n",
" (function() {\n",
" const frame = document.getElementById(\"tensorboard-frame-50d3e8cf3006bcb9\");\n",
" const url = new URL(\"/\", window.location);\n",
" url.port = 6006;\n",
" frame.src = url;\n",
" })();\n",
" </script>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Load TensorBoard to port 6006\n",
"\n",
"%load_ext tensorboard\n",
"%tensorboard --logdir=./my_logs --port=6006"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# Implement 1 Cycle Scheduling\n",
"# Thanks to https://github.com/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb\n",
"\n",
"batch_size = 128\n",
"K = keras.backend\n",
"\n",
"class OneCycleScheduler(keras.callbacks.Callback):\n",
" def __init__(self, iterations, max_rate, start_rate=None,\n",
" last_iterations=None, last_rate=None):\n",
" self.iterations = iterations\n",
" self.max_rate = max_rate\n",
" self.start_rate = start_rate or max_rate / 10\n",
" self.last_iterations = last_iterations or iterations // 10 + 1\n",
" self.half_iteration = (iterations - self.last_iterations) // 2\n",
" self.last_rate = last_rate or self.start_rate / 1000\n",
" self.iteration = 0\n",
" def _interpolate(self, iter1, iter2, rate1, rate2):\n",
" return ((rate2 - rate1) * (self.iteration - iter1)\n",
" / (iter2 - iter1) + rate1)\n",
" def on_batch_begin(self, batch, logs):\n",
" if self.iteration < self.half_iteration:\n",
" rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n",
" elif self.iteration < 2 * self.half_iteration:\n",
" rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n",
" self.max_rate, self.start_rate)\n",
" else:\n",
" rate = self._interpolate(2 * self.half_iteration, self.iterations,\n",
" self.start_rate, self.last_rate)\n",
" rate = max(rate, self.last_rate)\n",
" self.iteration += 1\n",
" K.set_value(self.model.optimizer.lr, rate)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"run_logdir = get_run_logdir() \n",
"\n",
"# TensorBoard callback\n",
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential_5\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten_5 (Flatten) (None, 3072) 0 \n",
"_________________________________________________________________\n",
"dense_85 (Dense) (None, 100) 307300 \n",
"_________________________________________________________________\n",
"batch_normalization_21 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_86 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_22 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_87 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_23 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_88 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_24 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_89 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_25 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_90 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_26 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_91 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_27 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_92 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_28 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_93 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_29 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_94 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_30 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_95 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_31 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_96 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_32 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_97 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_33 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_98 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_34 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_99 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_35 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_100 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_36 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_101 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_37 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_102 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_38 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_103 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_39 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_104 (Dense) (None, 100) 10100 \n",
"_________________________________________________________________\n",
"batch_normalization_40 (Batc (None, 100) 400 \n",
"_________________________________________________________________\n",
"dense_105 (Dense) (None, 10) 1010 \n",
"=================================================================\n",
"Total params: 508,210\n",
"Trainable params: 504,210\n",
"Non-trainable params: 4,000\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"cfg.kernel_initializer = \"he_normal\"\n",
"cfg.activation='elu'\n",
"\n",
"model = Sequential()\n",
"model.add(Flatten(input_shape=cfg.input_shape))\n",
"for layer in range(cfg.n_hidden):\n",
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
" kernel_initializer=cfg.kernel_initializer))\n",
" model.add(BatchNormalization())\n",
"model.add(Dense(10, activation='softmax'))\n",
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 45000 samples, validate on 5000 samples\n",
"Epoch 1/5\n",
"45000/45000 [==============================] - 39s 867us/step - loss: 1.9622 - accuracy: 0.2782 - val_loss: 4.5324 - val_accuracy: 0.2428\n",
"Epoch 2/5\n",
"45000/45000 [==============================] - 31s 680us/step - loss: 1.7844 - accuracy: 0.3499 - val_loss: 9.2643 - val_accuracy: 0.3186\n",
"Epoch 3/5\n",
"45000/45000 [==============================] - 31s 690us/step - loss: 1.6917 - accuracy: 0.3895 - val_loss: 3.3850 - val_accuracy: 0.4072\n",
"Epoch 4/5\n",
"45000/45000 [==============================] - 25s 563us/step - loss: 1.5552 - accuracy: 0.4431 - val_loss: 1.7749 - val_accuracy: 0.4264\n",
"Epoch 5/5\n",
"45000/45000 [==============================] - 28s 625us/step - loss: 1.4320 - accuracy: 0.4866 - val_loss: 1.4649 - val_accuracy: 0.4712\n"
]
}
],
"source": [
"onecycle = OneCycleScheduler(len(X_train) // batch_size * cfg.n_epochs, max_rate=0.05)\n",
"history = model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, batch_size=batch_size,\n",
" validation_data=(X_val_scaled, y_val),\n",
" callbacks=[onecycle])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}