{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**Exercise 8**\n", "\n", "Train a Deep Neural Network on CIFAR10 image dataset" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import keras\n", "from keras.datasets import cifar10\n", "from keras.models import Sequential\n", "from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, AlphaDropout\n", "from sklearn.model_selection import train_test_split\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "class config:\n", " def __init__(self, n_classes = 10, n_epochs = 5, n_hidden = 20, \n", " n_neurons = 100, kernel_initializer = \"he_normal\", \n", " optimizer = keras.optimizers.Nadam(), \n", " activation='elu'):\n", " self.n_classes = n_classes\n", " self.n_epochs = n_epochs\n", " self.n_hidden = n_hidden\n", " self.n_neurons = n_neurons\n", " self.kernel_initializer = kernel_initializer\n", " self.activation = activation\n", " self.optimizer = optimizer\n", " self.input_shape = [32, 32, 3]\n", " self.loss = 'sparse_categorical_crossentropy'\n", " self.metrics = ['accuracy']\n", " \n", "cfg = config()\n", "\n", "# Note that our n_epochs is low (only 5), this is to keep this exercise to a reasonable time frame\n", "# To achieve better accuracy you can raise the epochs to a large armount\n", "# and let our Early Stopping callback do its work" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x_train shape: (50000, 32, 32, 3)\n", "50000 train samples\n", "10000 test samples\n" ] } ], "source": [ "# The data, split between train and test sets:\n", "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", "print('x_train shape:', x_train.shape)\n", "print(x_train.shape[0], 'train samples')\n", "print(x_test.shape[0], 'test samples')\n", "\n", "X_train = x_train.astype('float32')\n", "X_test = x_test.astype('float32')\n", "X_train /= 255\n", "X_test /= 255\n", "\n", "# Split X_train into X_train and X_val\n", "X_train, X_val, y_train, y_val = train_test_split(\n", " x_train, y_train, test_size=0.1)\n", "\n", "pixel_means = X_train.mean(axis=0, keepdims=True)\n", "pixel_stds = X_train.std(axis=0, keepdims=True)\n", "X_train_scaled = (X_train - pixel_means) / pixel_stds\n", "X_val_scaled = (X_val - pixel_means) / pixel_stds\n", "X_test_scaled = (X_test - pixel_means) / pixel_stds" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Setup logdir for TensorBoard\n", "root_logdir = os.path.join(os.curdir, 'my_logs')\n", "\n", "# Setup function to get directory for logging our current run\n", "def get_run_logdir():\n", " import time\n", " run_id = time.strftime('run_%Y_%m_%d_%H_%M_%S')\n", " return os.path.join(root_logdir, run_id)\n", "\n", "run_logdir = get_run_logdir() " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Model Checkpoint callback incase of crash\n", "checkpoint_cb = keras.callbacks.ModelCheckpoint('simple_mlp.h5',\n", " save_best_only=True)\n", "\n", "# Early Stopping callback\n", "early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,\n", " restore_best_weights=True)\n", "\n", "# TensorBoard callback\n", "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:3341: The name tf.log is deprecated. Please use tf.math.log instead.\n", "\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_1 (Flatten) (None, 3072) 0 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 100) 307300 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_3 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_4 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_5 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_7 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_8 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_9 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_10 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_11 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_12 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_13 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_14 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_15 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_16 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_17 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_18 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_19 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_20 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_21 (Dense) (None, 10) 1010 \n", "=================================================================\n", "Total params: 500,210\n", "Trainable params: 500,210\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "# Create baseline model with He Initialization, elu activation, and Nadam optimization\n", "\n", "model = Sequential()\n", "model.add(Flatten(input_shape=cfg.input_shape))\n", "for layer in range(cfg.n_hidden):\n", " model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n", " kernel_initializer=cfg.kernel_initializer))\n", "model.add(Dense(10, activation='softmax'))\n", "model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\tensorflow_core\\python\\ops\\math_grad.py:1424: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use tf.where in 2.0, which has the same broadcast rule as np.where\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:2741: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n", "\n", "Train on 45000 samples, validate on 5000 samples\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:850: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.\n", "\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:853: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n", "\n", "Epoch 1/5\n", "45000/45000 [==============================] - 76s 2ms/step - loss: 1.9980 - acc: 0.2697 - val_loss: 2.0319 - val_acc: 0.2578\n", "WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py:995: The name tf.Summary is deprecated. Please use tf.compat.v1.Summary instead.\n", "\n", "Epoch 2/5\n", "45000/45000 [==============================] - 68s 2ms/step - loss: 1.8190 - acc: 0.3308 - val_loss: 1.8712 - val_acc: 0.3270\n", "Epoch 3/5\n", "45000/45000 [==============================] - 73s 2ms/step - loss: 1.7708 - acc: 0.3539 - val_loss: 1.8496 - val_acc: 0.3278\n", "Epoch 4/5\n", "45000/45000 [==============================] - 61s 1ms/step - loss: 1.7449 - acc: 0.3714 - val_loss: 1.7296 - val_acc: 0.3866\n", "Epoch 5/5\n", "45000/45000 [==============================] - 41s 919us/step - loss: 1.7015 - acc: 0.3888 - val_loss: 1.7038 - val_acc: 0.3828\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n", " validation_data = (X_val_scaled, y_val),\n", " callbacks = [checkpoint_cb, \n", " early_stopping_cb, \n", " tensorboard_cb])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Launching TensorBoard..." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Load TensorBoard to port 6006\n", "\n", "%load_ext tensorboard\n", "%tensorboard --logdir=./my_logs --port=6006" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create same model, this time adding Batch Normalization before each hidden layer\n", "\n", "model = Sequential()\n", "model.add(Flatten(input_shape=cfg.input_shape))\n", "for layer in range(cfg.n_hidden):\n", " model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n", " kernel_initializer=cfg.kernel_initializer))\n", " model.add(BatchNormalization())\n", "model.add(Dense(10, activation='softmax'))\n", "model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "run_logdir = get_run_logdir() \n", "\n", "# TensorBoard callback\n", "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n", " validation_data = (X_val_scaled, y_val),\n", " callbacks = [checkpoint_cb, \n", " early_stopping_cb, \n", " tensorboard_cb])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load TensorBoard to port 6006\n", "\n", "%load_ext tensorboard\n", "%tensorboard --logdir=./my_logs --port=6006" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "run_logdir = get_run_logdir() \n", "\n", "# TensorBoard callback\n", "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", "\n", "# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n", "cfg.kernel_initializer='lecun_normal'\n", "cfg.activation='selu'" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_3\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_3 (Flatten) (None, 3072) 0 \n", "_________________________________________________________________\n", "dense_43 (Dense) (None, 100) 307300 \n", "_________________________________________________________________\n", "dense_44 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_45 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_46 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_47 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_48 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_49 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_50 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_51 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_52 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_53 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_54 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_55 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_56 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_57 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_58 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_59 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_60 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_61 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_62 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_63 (Dense) (None, 10) 1010 \n", "=================================================================\n", "Total params: 500,210\n", "Trainable params: 500,210\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "# Construct Selu model (note that there are no Batch Normalization layers this time)\n", "model = Sequential()\n", "model.add(Flatten(input_shape=cfg.input_shape))\n", "for layer in range(cfg.n_hidden):\n", " model.add(Dense(cfg.n_neurons, activation=cfg.activation))\n", "model.add(Dense(10, activation='softmax'))\n", "model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 45000 samples, validate on 5000 samples\n", "Epoch 1/5\n", "45000/45000 [==============================] - 40s 883us/step - loss: 2.1985 - accuracy: 0.2347 - val_loss: 1.8834 - val_accuracy: 0.2900\n", "Epoch 2/5\n", "45000/45000 [==============================] - 42s 923us/step - loss: 1.8693 - accuracy: 0.2965 - val_loss: 1.8639 - val_accuracy: 0.2964\n", "Epoch 3/5\n", "45000/45000 [==============================] - 39s 876us/step - loss: 1.8027 - accuracy: 0.3231 - val_loss: 1.7998 - val_accuracy: 0.3196\n", "Epoch 4/5\n", "45000/45000 [==============================] - 39s 859us/step - loss: 1.7604 - accuracy: 0.3451 - val_loss: 1.8129 - val_accuracy: 0.3088\n", "Epoch 5/5\n", "45000/45000 [==============================] - 38s 852us/step - loss: 1.7392 - accuracy: 0.3527 - val_loss: 1.6917 - val_accuracy: 0.3714\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n", " validation_data = (X_val_scaled, y_val),\n", " callbacks = [checkpoint_cb, \n", " early_stopping_cb, \n", " tensorboard_cb])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "run_logdir = get_run_logdir() \n", "\n", "# TensorBoard callback\n", "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n", "\n", "# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n", "cfg.kernel_initializer='lecun_normal'\n", "cfg.activation='selu'" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_4\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_4 (Flatten) (None, 3072) 0 \n", "_________________________________________________________________\n", "alpha_dropout_1 (AlphaDropou (None, 3072) 0 \n", "_________________________________________________________________\n", "dense_64 (Dense) (None, 100) 307300 \n", "_________________________________________________________________\n", "alpha_dropout_2 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_65 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_3 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_66 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_4 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_67 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_5 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_68 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_6 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_69 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_7 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_70 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_8 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_71 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_9 (AlphaDropou (None, 100) 0 \n", "_________________________________________________________________\n", "dense_72 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_10 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_73 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_11 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_74 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_12 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_75 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_13 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_76 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_14 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_77 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_15 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_78 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_16 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_79 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_17 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_80 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_18 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_81 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_19 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_82 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "alpha_dropout_20 (AlphaDropo (None, 100) 0 \n", "_________________________________________________________________\n", "dense_83 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "dense_84 (Dense) (None, 10) 1010 \n", "=================================================================\n", "Total params: 500,210\n", "Trainable params: 500,210\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "# Construct Selu model (note that there are no Batch Normalization layers this time)\n", "# Now with alpha dropout\n", "model = Sequential()\n", "model.add(Flatten(input_shape=cfg.input_shape))\n", "for layer in range(cfg.n_hidden):\n", " model.add(AlphaDropout(rate=0.2))\n", " model.add(Dense(cfg.n_neurons, activation=cfg.activation, kernel_initializer=cfg.kernel_initializer))\n", "model.add(Dense(10, activation='softmax'))\n", "model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 45000 samples, validate on 5000 samples\n", "Epoch 1/5\n", "45000/45000 [==============================] - 57s 1ms/step - loss: 2.2238 - accuracy: 0.1515 - val_loss: 92.7995 - val_accuracy: 0.1574\n", "Epoch 2/5\n", "45000/45000 [==============================] - 54s 1ms/step - loss: 2.1047 - accuracy: 0.1786 - val_loss: 35.0895 - val_accuracy: 0.1534\n", "Epoch 3/5\n", "45000/45000 [==============================] - 58s 1ms/step - loss: 2.0622 - accuracy: 0.1984 - val_loss: 26.8686 - val_accuracy: 0.1080\n", "Epoch 4/5\n", "45000/45000 [==============================] - 58s 1ms/step - loss: 2.0472 - accuracy: 0.2090 - val_loss: 12.6033 - val_accuracy: 0.1708\n", "Epoch 5/5\n", "45000/45000 [==============================] - 58s 1ms/step - loss: 2.0519 - accuracy: 0.2057 - val_loss: 17.4287 - val_accuracy: 0.1782\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n", " validation_data = (X_val_scaled, y_val),\n", " callbacks = [checkpoint_cb, \n", " early_stopping_cb, \n", " tensorboard_cb])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The tensorboard extension is already loaded. To reload it, use:\n", " %reload_ext tensorboard\n" ] }, { "data": { "text/plain": [ "Reusing TensorBoard on port 6006 (pid 11252), started 0:13:58 ago. (Use '!kill 11252' to kill it.)" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Load TensorBoard to port 6006\n", "\n", "%load_ext tensorboard\n", "%tensorboard --logdir=./my_logs --port=6006" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Implement 1 Cycle Scheduling\n", "# Thanks to https://github.com/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb\n", "\n", "batch_size = 128\n", "K = keras.backend\n", "\n", "class OneCycleScheduler(keras.callbacks.Callback):\n", " def __init__(self, iterations, max_rate, start_rate=None,\n", " last_iterations=None, last_rate=None):\n", " self.iterations = iterations\n", " self.max_rate = max_rate\n", " self.start_rate = start_rate or max_rate / 10\n", " self.last_iterations = last_iterations or iterations // 10 + 1\n", " self.half_iteration = (iterations - self.last_iterations) // 2\n", " self.last_rate = last_rate or self.start_rate / 1000\n", " self.iteration = 0\n", " def _interpolate(self, iter1, iter2, rate1, rate2):\n", " return ((rate2 - rate1) * (self.iteration - iter1)\n", " / (iter2 - iter1) + rate1)\n", " def on_batch_begin(self, batch, logs):\n", " if self.iteration < self.half_iteration:\n", " rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n", " elif self.iteration < 2 * self.half_iteration:\n", " rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n", " self.max_rate, self.start_rate)\n", " else:\n", " rate = self._interpolate(2 * self.half_iteration, self.iterations,\n", " self.start_rate, self.last_rate)\n", " rate = max(rate, self.last_rate)\n", " self.iteration += 1\n", " K.set_value(self.model.optimizer.lr, rate)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "run_logdir = get_run_logdir() \n", "\n", "# TensorBoard callback\n", "tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_5\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_5 (Flatten) (None, 3072) 0 \n", "_________________________________________________________________\n", "dense_85 (Dense) (None, 100) 307300 \n", "_________________________________________________________________\n", "batch_normalization_21 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_86 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_22 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_87 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_23 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_88 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_24 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_89 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_25 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_90 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_26 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_91 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_27 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_92 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_28 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_93 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_29 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_94 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_30 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_95 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_31 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_96 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_32 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_97 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_33 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_98 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_34 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_99 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_35 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_100 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_36 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_101 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_37 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_102 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_38 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_103 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_39 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_104 (Dense) (None, 100) 10100 \n", "_________________________________________________________________\n", "batch_normalization_40 (Batc (None, 100) 400 \n", "_________________________________________________________________\n", "dense_105 (Dense) (None, 10) 1010 \n", "=================================================================\n", "Total params: 508,210\n", "Trainable params: 504,210\n", "Non-trainable params: 4,000\n", "_________________________________________________________________\n" ] } ], "source": [ "cfg.kernel_initializer = \"he_normal\"\n", "cfg.activation='elu'\n", "\n", "model = Sequential()\n", "model.add(Flatten(input_shape=cfg.input_shape))\n", "for layer in range(cfg.n_hidden):\n", " model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n", " kernel_initializer=cfg.kernel_initializer))\n", " model.add(BatchNormalization())\n", "model.add(Dense(10, activation='softmax'))\n", "model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train on 45000 samples, validate on 5000 samples\n", "Epoch 1/5\n", "45000/45000 [==============================] - 39s 867us/step - loss: 1.9622 - accuracy: 0.2782 - val_loss: 4.5324 - val_accuracy: 0.2428\n", "Epoch 2/5\n", "45000/45000 [==============================] - 31s 680us/step - loss: 1.7844 - accuracy: 0.3499 - val_loss: 9.2643 - val_accuracy: 0.3186\n", "Epoch 3/5\n", "45000/45000 [==============================] - 31s 690us/step - loss: 1.6917 - accuracy: 0.3895 - val_loss: 3.3850 - val_accuracy: 0.4072\n", "Epoch 4/5\n", "45000/45000 [==============================] - 25s 563us/step - loss: 1.5552 - accuracy: 0.4431 - val_loss: 1.7749 - val_accuracy: 0.4264\n", "Epoch 5/5\n", "45000/45000 [==============================] - 28s 625us/step - loss: 1.4320 - accuracy: 0.4866 - val_loss: 1.4649 - val_accuracy: 0.4712\n" ] } ], "source": [ "onecycle = OneCycleScheduler(len(X_train) // batch_size * cfg.n_epochs, max_rate=0.05)\n", "history = model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, batch_size=batch_size,\n", " validation_data=(X_val_scaled, y_val),\n", " callbacks=[onecycle])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 2 }