1130 lines
63 KiB
Plaintext
1130 lines
63 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Exercise 8**\n",
|
|
"\n",
|
|
"Train a Deep Neural Network on CIFAR10 image dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "SyntaxError",
|
|
"evalue": "invalid syntax (<ipython-input-8-dc5907620648>, line 1)",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-8-dc5907620648>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m import tensorflow-gpu as tf\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import tensorflow_gpu as tf\n",
|
|
"import numpy as np\n",
|
|
"import keras\n",
|
|
"from keras.datasets import cifar10\n",
|
|
"from keras.models import Sequential\n",
|
|
"from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, AlphaDropout\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"import os"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class config:\n",
|
|
" def __init__(self, n_classes = 10, n_epochs = 5, n_hidden = 20, \n",
|
|
" n_neurons = 100, kernel_initializer = \"he_normal\", \n",
|
|
" optimizer = keras.optimizers.Nadam(), \n",
|
|
" activation='elu'):\n",
|
|
" self.n_classes = n_classes\n",
|
|
" self.n_epochs = n_epochs\n",
|
|
" self.n_hidden = n_hidden\n",
|
|
" self.n_neurons = n_neurons\n",
|
|
" self.kernel_initializer = kernel_initializer\n",
|
|
" self.activation = activation\n",
|
|
" self.optimizer = optimizer\n",
|
|
" self.input_shape = [32, 32, 3]\n",
|
|
" self.loss = 'sparse_categorical_crossentropy'\n",
|
|
" self.metrics = ['accuracy']\n",
|
|
" \n",
|
|
"cfg = config()\n",
|
|
"\n",
|
|
"# Note that our n_epochs is low (only 5), this is to keep this exercise to a reasonable time frame\n",
|
|
"# To achieve better accuracy you can raise the epochs to a large armount\n",
|
|
"# and let our Early Stopping callback do its work"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"x_train shape: (50000, 32, 32, 3)\n",
|
|
"50000 train samples\n",
|
|
"10000 test samples\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# The data, split between train and test sets:\n",
|
|
"(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n",
|
|
"print('x_train shape:', x_train.shape)\n",
|
|
"print(x_train.shape[0], 'train samples')\n",
|
|
"print(x_test.shape[0], 'test samples')\n",
|
|
"\n",
|
|
"X_train = x_train.astype('float32')\n",
|
|
"X_test = x_test.astype('float32')\n",
|
|
"X_train /= 255\n",
|
|
"X_test /= 255\n",
|
|
"\n",
|
|
"# Split X_train into X_train and X_val\n",
|
|
"X_train, X_val, y_train, y_val = train_test_split(\n",
|
|
" x_train, y_train, test_size=0.1)\n",
|
|
"\n",
|
|
"pixel_means = X_train.mean(axis=0, keepdims=True)\n",
|
|
"pixel_stds = X_train.std(axis=0, keepdims=True)\n",
|
|
"X_train_scaled = (X_train - pixel_means) / pixel_stds\n",
|
|
"X_val_scaled = (X_val - pixel_means) / pixel_stds\n",
|
|
"X_test_scaled = (X_test - pixel_means) / pixel_stds"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Setup logdir for TensorBoard\n",
|
|
"root_logdir = os.path.join(os.curdir, 'my_logs')\n",
|
|
"\n",
|
|
"# Setup function to get directory for logging our current run\n",
|
|
"def get_run_logdir():\n",
|
|
" import time\n",
|
|
" run_id = time.strftime('run_%Y_%m_%d_%H_%M_%S')\n",
|
|
" return os.path.join(root_logdir, run_id)\n",
|
|
"\n",
|
|
"run_logdir = get_run_logdir() "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Model Checkpoint callback incase of crash\n",
|
|
"checkpoint_cb = keras.callbacks.ModelCheckpoint('simple_mlp.h5',\n",
|
|
" save_best_only=True)\n",
|
|
"\n",
|
|
"# Early Stopping callback\n",
|
|
"early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,\n",
|
|
" restore_best_weights=True)\n",
|
|
"\n",
|
|
"# TensorBoard callback\n",
|
|
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4185: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:3341: The name tf.log is deprecated. Please use tf.math.log instead.\n",
|
|
"\n",
|
|
"_________________________________________________________________\n",
|
|
"Layer (type) Output Shape Param # \n",
|
|
"=================================================================\n",
|
|
"flatten_1 (Flatten) (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_1 (Dense) (None, 100) 307300 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_2 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_3 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_4 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_5 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_6 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_7 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_8 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_9 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_10 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_11 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_12 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_13 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_14 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_15 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_16 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_17 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_18 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_19 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_20 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_21 (Dense) (None, 10) 1010 \n",
|
|
"=================================================================\n",
|
|
"Total params: 500,210\n",
|
|
"Trainable params: 500,210\n",
|
|
"Non-trainable params: 0\n",
|
|
"_________________________________________________________________\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create baseline model with He Initialization, elu activation, and Nadam optimization\n",
|
|
"\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Flatten(input_shape=cfg.input_shape))\n",
|
|
"for layer in range(cfg.n_hidden):\n",
|
|
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
|
|
" kernel_initializer=cfg.kernel_initializer))\n",
|
|
"model.add(Dense(10, activation='softmax'))\n",
|
|
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\tensorflow_core\\python\\ops\\math_grad.py:1424: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
|
|
"Instructions for updating:\n",
|
|
"Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:2741: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
|
|
"\n",
|
|
"Train on 45000 samples, validate on 5000 samples\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
|
|
"\n",
|
|
"WARNING:tensorflow:From C:\\Users\\TSB\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"ename": "InternalError",
|
|
"evalue": "cudaGetDevice() failed. Status: CUDA driver version is insufficient for CUDA runtime version",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[1;31mInternalError\u001b[0m Traceback (most recent call last)",
|
|
"\u001b[1;32m<ipython-input-7-6c9cdcf16f7a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m callbacks = [checkpoint_cb, \n\u001b[0;32m 4\u001b[0m \u001b[0mearly_stopping_cb\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m tensorboard_cb])\n\u001b[0m",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[0;32m 1037\u001b[0m \u001b[0minitial_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1038\u001b[0m \u001b[0msteps_per_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1039\u001b[1;33m validation_steps=validation_steps)\n\u001b[0m\u001b[0;32m 1040\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1041\u001b[0m def evaluate(self, x=None, y=None,\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\engine\\training_arrays.py\u001b[0m in \u001b[0;36mfit_loop\u001b[1;34m(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[0;32m 115\u001b[0m \u001b[0mcallback_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 116\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 117\u001b[1;33m \u001b[0mcallbacks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcallback_model\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 118\u001b[0m callbacks.set_params({\n\u001b[0;32m 119\u001b[0m \u001b[1;34m'batch_size'\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py\u001b[0m in \u001b[0;36mset_model\u001b[1;34m(self, model)\u001b[0m\n\u001b[0;32m 52\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mset_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcallback\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcallbacks\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 54\u001b[1;33m \u001b[0mcallback\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset_model\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 55\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mon_epoch_begin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlogs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\callbacks.py\u001b[0m in \u001b[0;36mset_model\u001b[1;34m(self, model)\u001b[0m\n\u001b[0;32m 788\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 789\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mK\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'tensorflow'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 790\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msess\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mK\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_session\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 791\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhistogram_freq\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmerged\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 792\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mlayer\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py\u001b[0m in \u001b[0;36mget_session\u001b[1;34m()\u001b[0m\n\u001b[0;32m 184\u001b[0m config = tf.ConfigProto(intra_op_parallelism_threads=num_thread,\n\u001b[0;32m 185\u001b[0m allow_soft_placement=True)\n\u001b[1;32m--> 186\u001b[1;33m \u001b[0m_SESSION\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 187\u001b[0m \u001b[0msession\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_SESSION\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 188\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0m_MANUAL_VAR_INIT\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\tensorflow_core\\python\\client\\session.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, target, graph, config)\u001b[0m\n\u001b[0;32m 1583\u001b[0m \u001b[0mprotocol\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mconfiguration\u001b[0m \u001b[0moptions\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mthe\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1584\u001b[0m \"\"\"\n\u001b[1;32m-> 1585\u001b[1;33m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgraph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1586\u001b[0m \u001b[1;31m# NOTE(mrry): Create these on first `__enter__` to avoid a reference cycle.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1587\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_default_graph_context_manager\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;32m~\\Miniconda3\\envs\\geron_env\\lib\\site-packages\\tensorflow_core\\python\\client\\session.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, target, graph, config)\u001b[0m\n\u001b[0;32m 697\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 698\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 699\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_session\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtf_session\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTF_NewSessionRef\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_graph\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_c_graph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mopts\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 700\u001b[0m \u001b[1;31m# pylint: enable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 701\u001b[0m \u001b[1;32mfinally\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
|
"\u001b[1;31mInternalError\u001b[0m: cudaGetDevice() failed. Status: CUDA driver version is insufficient for CUDA runtime version"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
|
|
" validation_data = (X_val_scaled, y_val),\n",
|
|
" callbacks = [checkpoint_cb, \n",
|
|
" early_stopping_cb, \n",
|
|
" tensorboard_cb])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 5612."
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load TensorBoard to port 6006\n",
|
|
"\n",
|
|
"%load_ext tensorboard\n",
|
|
"%tensorboard --logdir=./my_logs --port=6006"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Model: \"sequential_2\"\n",
|
|
"_________________________________________________________________\n",
|
|
"Layer (type) Output Shape Param # \n",
|
|
"=================================================================\n",
|
|
"flatten_2 (Flatten) (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_22 (Dense) (None, 100) 307300 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_1 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_23 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_2 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_24 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_3 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_25 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_4 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_26 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_5 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_27 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_6 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_28 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_7 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_29 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_8 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_30 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_9 (Batch (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_31 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_10 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_32 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_11 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_33 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_12 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_34 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_13 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_35 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_14 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_36 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_15 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_37 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_16 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_38 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_17 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_39 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_18 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_40 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_19 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_41 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_20 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_42 (Dense) (None, 10) 1010 \n",
|
|
"=================================================================\n",
|
|
"Total params: 508,210\n",
|
|
"Trainable params: 504,210\n",
|
|
"Non-trainable params: 4,000\n",
|
|
"_________________________________________________________________\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Create same model, this time adding Batch Normalization before each hidden layer\n",
|
|
"\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Flatten(input_shape=cfg.input_shape))\n",
|
|
"for layer in range(cfg.n_hidden):\n",
|
|
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
|
|
" kernel_initializer=cfg.kernel_initializer))\n",
|
|
" model.add(BatchNormalization())\n",
|
|
"model.add(Dense(10, activation='softmax'))\n",
|
|
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_logdir = get_run_logdir() \n",
|
|
"\n",
|
|
"# TensorBoard callback\n",
|
|
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Train on 45000 samples, validate on 5000 samples\n",
|
|
"Epoch 1/5\n",
|
|
" 224/45000 [..............................] - ETA: 29:09 - loss: 3.3145 - accuracy: 0.0670 "
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\keras\\callbacks\\callbacks.py:95: RuntimeWarning: Method (on_train_batch_end) is slow compared to the batch update (0.137934). Check your callbacks.\n",
|
|
" % (hook_name, delta_t_median), RuntimeWarning)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"45000/45000 [==============================] - 56s 1ms/step - loss: 1.9594 - accuracy: 0.2809 - val_loss: 1.8254 - val_accuracy: 0.3464\n",
|
|
"Epoch 2/5\n",
|
|
"45000/45000 [==============================] - 45s 1ms/step - loss: 1.7866 - accuracy: 0.3526 - val_loss: 1.6686 - val_accuracy: 0.3880\n",
|
|
"Epoch 3/5\n",
|
|
"45000/45000 [==============================] - 44s 981us/step - loss: 1.6870 - accuracy: 0.3946 - val_loss: 1.6479 - val_accuracy: 0.4138\n",
|
|
"Epoch 4/5\n",
|
|
"45000/45000 [==============================] - 44s 983us/step - loss: 1.6167 - accuracy: 0.4242 - val_loss: 1.6013 - val_accuracy: 0.4314\n",
|
|
"Epoch 5/5\n",
|
|
"45000/45000 [==============================] - 45s 1ms/step - loss: 1.5659 - accuracy: 0.4438 - val_loss: 1.5529 - val_accuracy: 0.4552\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<keras.callbacks.callbacks.History at 0x16288ae3348>"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
|
|
" validation_data = (X_val_scaled, y_val),\n",
|
|
" callbacks = [checkpoint_cb, \n",
|
|
" early_stopping_cb, \n",
|
|
" tensorboard_cb])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The tensorboard extension is already loaded. To reload it, use:\n",
|
|
" %reload_ext tensorboard\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Reusing TensorBoard on port 6006 (pid 11252), started 0:05:18 ago. (Use '!kill 11252' to kill it.)"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"\n",
|
|
" <iframe id=\"tensorboard-frame-26121546b6ef8b90\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
|
|
" </iframe>\n",
|
|
" <script>\n",
|
|
" (function() {\n",
|
|
" const frame = document.getElementById(\"tensorboard-frame-26121546b6ef8b90\");\n",
|
|
" const url = new URL(\"/\", window.location);\n",
|
|
" url.port = 6006;\n",
|
|
" frame.src = url;\n",
|
|
" })();\n",
|
|
" </script>\n",
|
|
" "
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load TensorBoard to port 6006\n",
|
|
"\n",
|
|
"%load_ext tensorboard\n",
|
|
"%tensorboard --logdir=./my_logs --port=6006"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_logdir = get_run_logdir() \n",
|
|
"\n",
|
|
"# TensorBoard callback\n",
|
|
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
|
|
"\n",
|
|
"# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n",
|
|
"cfg.kernel_initializer='lecun_normal'\n",
|
|
"cfg.activation='selu'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Model: \"sequential_3\"\n",
|
|
"_________________________________________________________________\n",
|
|
"Layer (type) Output Shape Param # \n",
|
|
"=================================================================\n",
|
|
"flatten_3 (Flatten) (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_43 (Dense) (None, 100) 307300 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_44 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_45 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_46 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_47 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_48 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_49 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_50 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_51 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_52 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_53 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_54 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_55 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_56 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_57 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_58 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_59 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_60 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_61 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_62 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_63 (Dense) (None, 10) 1010 \n",
|
|
"=================================================================\n",
|
|
"Total params: 500,210\n",
|
|
"Trainable params: 500,210\n",
|
|
"Non-trainable params: 0\n",
|
|
"_________________________________________________________________\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Construct Selu model (note that there are no Batch Normalization layers this time)\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Flatten(input_shape=cfg.input_shape))\n",
|
|
"for layer in range(cfg.n_hidden):\n",
|
|
" model.add(Dense(cfg.n_neurons, activation=cfg.activation))\n",
|
|
"model.add(Dense(10, activation='softmax'))\n",
|
|
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Train on 45000 samples, validate on 5000 samples\n",
|
|
"Epoch 1/5\n",
|
|
"45000/45000 [==============================] - 40s 883us/step - loss: 2.1985 - accuracy: 0.2347 - val_loss: 1.8834 - val_accuracy: 0.2900\n",
|
|
"Epoch 2/5\n",
|
|
"45000/45000 [==============================] - 42s 923us/step - loss: 1.8693 - accuracy: 0.2965 - val_loss: 1.8639 - val_accuracy: 0.2964\n",
|
|
"Epoch 3/5\n",
|
|
"45000/45000 [==============================] - 39s 876us/step - loss: 1.8027 - accuracy: 0.3231 - val_loss: 1.7998 - val_accuracy: 0.3196\n",
|
|
"Epoch 4/5\n",
|
|
"45000/45000 [==============================] - 39s 859us/step - loss: 1.7604 - accuracy: 0.3451 - val_loss: 1.8129 - val_accuracy: 0.3088\n",
|
|
"Epoch 5/5\n",
|
|
"45000/45000 [==============================] - 38s 852us/step - loss: 1.7392 - accuracy: 0.3527 - val_loss: 1.6917 - val_accuracy: 0.3714\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<keras.callbacks.callbacks.History at 0x1629adb5c48>"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
|
|
" validation_data = (X_val_scaled, y_val),\n",
|
|
" callbacks = [checkpoint_cb, \n",
|
|
" early_stopping_cb, \n",
|
|
" tensorboard_cb])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_logdir = get_run_logdir() \n",
|
|
"\n",
|
|
"# TensorBoard callback\n",
|
|
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)\n",
|
|
"\n",
|
|
"# Prepare configuration for a Selu based model (needs mean of 0 and standard deviation of 1)\n",
|
|
"cfg.kernel_initializer='lecun_normal'\n",
|
|
"cfg.activation='selu'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Model: \"sequential_4\"\n",
|
|
"_________________________________________________________________\n",
|
|
"Layer (type) Output Shape Param # \n",
|
|
"=================================================================\n",
|
|
"flatten_4 (Flatten) (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_1 (AlphaDropou (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_64 (Dense) (None, 100) 307300 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_2 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_65 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_3 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_66 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_4 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_67 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_5 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_68 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_6 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_69 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_7 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_70 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_8 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_71 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_9 (AlphaDropou (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_72 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_10 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_73 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_11 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_74 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_12 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_75 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_13 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_76 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_14 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_77 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_15 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_78 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_16 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_79 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_17 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_80 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_18 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_81 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_19 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_82 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"alpha_dropout_20 (AlphaDropo (None, 100) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_83 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_84 (Dense) (None, 10) 1010 \n",
|
|
"=================================================================\n",
|
|
"Total params: 500,210\n",
|
|
"Trainable params: 500,210\n",
|
|
"Non-trainable params: 0\n",
|
|
"_________________________________________________________________\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Construct Selu model (note that there are no Batch Normalization layers this time)\n",
|
|
"# Now with alpha dropout\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Flatten(input_shape=cfg.input_shape))\n",
|
|
"for layer in range(cfg.n_hidden):\n",
|
|
" model.add(AlphaDropout(rate=0.2))\n",
|
|
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, kernel_initializer=cfg.kernel_initializer))\n",
|
|
"model.add(Dense(10, activation='softmax'))\n",
|
|
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Train on 45000 samples, validate on 5000 samples\n",
|
|
"Epoch 1/5\n",
|
|
"45000/45000 [==============================] - 57s 1ms/step - loss: 2.2238 - accuracy: 0.1515 - val_loss: 92.7995 - val_accuracy: 0.1574\n",
|
|
"Epoch 2/5\n",
|
|
"45000/45000 [==============================] - 54s 1ms/step - loss: 2.1047 - accuracy: 0.1786 - val_loss: 35.0895 - val_accuracy: 0.1534\n",
|
|
"Epoch 3/5\n",
|
|
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0622 - accuracy: 0.1984 - val_loss: 26.8686 - val_accuracy: 0.1080\n",
|
|
"Epoch 4/5\n",
|
|
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0472 - accuracy: 0.2090 - val_loss: 12.6033 - val_accuracy: 0.1708\n",
|
|
"Epoch 5/5\n",
|
|
"45000/45000 [==============================] - 58s 1ms/step - loss: 2.0519 - accuracy: 0.2057 - val_loss: 17.4287 - val_accuracy: 0.1782\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<keras.callbacks.callbacks.History at 0x1631fa52d48>"
|
|
]
|
|
},
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, \n",
|
|
" validation_data = (X_val_scaled, y_val),\n",
|
|
" callbacks = [checkpoint_cb, \n",
|
|
" early_stopping_cb, \n",
|
|
" tensorboard_cb])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"The tensorboard extension is already loaded. To reload it, use:\n",
|
|
" %reload_ext tensorboard\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Reusing TensorBoard on port 6006 (pid 11252), started 0:13:58 ago. (Use '!kill 11252' to kill it.)"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"\n",
|
|
" <iframe id=\"tensorboard-frame-50d3e8cf3006bcb9\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
|
|
" </iframe>\n",
|
|
" <script>\n",
|
|
" (function() {\n",
|
|
" const frame = document.getElementById(\"tensorboard-frame-50d3e8cf3006bcb9\");\n",
|
|
" const url = new URL(\"/\", window.location);\n",
|
|
" url.port = 6006;\n",
|
|
" frame.src = url;\n",
|
|
" })();\n",
|
|
" </script>\n",
|
|
" "
|
|
],
|
|
"text/plain": [
|
|
"<IPython.core.display.HTML object>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load TensorBoard to port 6006\n",
|
|
"\n",
|
|
"%load_ext tensorboard\n",
|
|
"%tensorboard --logdir=./my_logs --port=6006"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Implement 1 Cycle Scheduling\n",
|
|
"# Thanks to https://github.com/ageron/handson-ml2/blob/master/11_training_deep_neural_networks.ipynb\n",
|
|
"\n",
|
|
"batch_size = 128\n",
|
|
"K = keras.backend\n",
|
|
"\n",
|
|
"class OneCycleScheduler(keras.callbacks.Callback):\n",
|
|
" def __init__(self, iterations, max_rate, start_rate=None,\n",
|
|
" last_iterations=None, last_rate=None):\n",
|
|
" self.iterations = iterations\n",
|
|
" self.max_rate = max_rate\n",
|
|
" self.start_rate = start_rate or max_rate / 10\n",
|
|
" self.last_iterations = last_iterations or iterations // 10 + 1\n",
|
|
" self.half_iteration = (iterations - self.last_iterations) // 2\n",
|
|
" self.last_rate = last_rate or self.start_rate / 1000\n",
|
|
" self.iteration = 0\n",
|
|
" def _interpolate(self, iter1, iter2, rate1, rate2):\n",
|
|
" return ((rate2 - rate1) * (self.iteration - iter1)\n",
|
|
" / (iter2 - iter1) + rate1)\n",
|
|
" def on_batch_begin(self, batch, logs):\n",
|
|
" if self.iteration < self.half_iteration:\n",
|
|
" rate = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)\n",
|
|
" elif self.iteration < 2 * self.half_iteration:\n",
|
|
" rate = self._interpolate(self.half_iteration, 2 * self.half_iteration,\n",
|
|
" self.max_rate, self.start_rate)\n",
|
|
" else:\n",
|
|
" rate = self._interpolate(2 * self.half_iteration, self.iterations,\n",
|
|
" self.start_rate, self.last_rate)\n",
|
|
" rate = max(rate, self.last_rate)\n",
|
|
" self.iteration += 1\n",
|
|
" K.set_value(self.model.optimizer.lr, rate)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_logdir = get_run_logdir() \n",
|
|
"\n",
|
|
"# TensorBoard callback\n",
|
|
"tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Model: \"sequential_5\"\n",
|
|
"_________________________________________________________________\n",
|
|
"Layer (type) Output Shape Param # \n",
|
|
"=================================================================\n",
|
|
"flatten_5 (Flatten) (None, 3072) 0 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_85 (Dense) (None, 100) 307300 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_21 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_86 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_22 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_87 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_23 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_88 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_24 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_89 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_25 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_90 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_26 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_91 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_27 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_92 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_28 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_93 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_29 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_94 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_30 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_95 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_31 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_96 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_32 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_97 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_33 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_98 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_34 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_99 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_35 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_100 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_36 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_101 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_37 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_102 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_38 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_103 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_39 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_104 (Dense) (None, 100) 10100 \n",
|
|
"_________________________________________________________________\n",
|
|
"batch_normalization_40 (Batc (None, 100) 400 \n",
|
|
"_________________________________________________________________\n",
|
|
"dense_105 (Dense) (None, 10) 1010 \n",
|
|
"=================================================================\n",
|
|
"Total params: 508,210\n",
|
|
"Trainable params: 504,210\n",
|
|
"Non-trainable params: 4,000\n",
|
|
"_________________________________________________________________\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cfg.kernel_initializer = \"he_normal\"\n",
|
|
"cfg.activation='elu'\n",
|
|
"\n",
|
|
"model = Sequential()\n",
|
|
"model.add(Flatten(input_shape=cfg.input_shape))\n",
|
|
"for layer in range(cfg.n_hidden):\n",
|
|
" model.add(Dense(cfg.n_neurons, activation=cfg.activation, \n",
|
|
" kernel_initializer=cfg.kernel_initializer))\n",
|
|
" model.add(BatchNormalization())\n",
|
|
"model.add(Dense(10, activation='softmax'))\n",
|
|
"model.compile(loss=cfg.loss, optimizer=cfg.optimizer, metrics=cfg.metrics)\n",
|
|
"model.summary()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Train on 45000 samples, validate on 5000 samples\n",
|
|
"Epoch 1/5\n",
|
|
"45000/45000 [==============================] - 39s 867us/step - loss: 1.9622 - accuracy: 0.2782 - val_loss: 4.5324 - val_accuracy: 0.2428\n",
|
|
"Epoch 2/5\n",
|
|
"45000/45000 [==============================] - 31s 680us/step - loss: 1.7844 - accuracy: 0.3499 - val_loss: 9.2643 - val_accuracy: 0.3186\n",
|
|
"Epoch 3/5\n",
|
|
"45000/45000 [==============================] - 31s 690us/step - loss: 1.6917 - accuracy: 0.3895 - val_loss: 3.3850 - val_accuracy: 0.4072\n",
|
|
"Epoch 4/5\n",
|
|
"45000/45000 [==============================] - 25s 563us/step - loss: 1.5552 - accuracy: 0.4431 - val_loss: 1.7749 - val_accuracy: 0.4264\n",
|
|
"Epoch 5/5\n",
|
|
"45000/45000 [==============================] - 28s 625us/step - loss: 1.4320 - accuracy: 0.4866 - val_loss: 1.4649 - val_accuracy: 0.4712\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"onecycle = OneCycleScheduler(len(X_train) // batch_size * cfg.n_epochs, max_rate=0.05)\n",
|
|
"history = model.fit(X_train_scaled, y_train, epochs=cfg.n_epochs, batch_size=batch_size,\n",
|
|
" validation_data=(X_val_scaled, y_val),\n",
|
|
" callbacks=[onecycle])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|