add some chapter exercices

2019-12-20 10:11:36 -08:00 · 2019-12-20 10:11:36 -08:00 · e80eae2468
parent ce2759452b
commit e80eae2468
10 changed files with 6478 additions and 0 deletions
--- a/Ch4/.ipynb_checkpoints/exercises-checkpoint.ipynb
+++ b/Ch4/.ipynb_checkpoints/exercises-checkpoint.ipynb
@ -0,0 +1,437 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 12**\n",
+    "\n",
+    "Implement batch gradient descent from scratch (no SKLearn!)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from matplotlib import pyplot as plt\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "iris = datasets.load_iris()\n",
+    "list(iris.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _iris_dataset:\n",
+      "\n",
+      "Iris plants dataset\n",
+      "--------------------\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "    :Number of Instances: 150 (50 in each of three classes)\n",
+      "    :Number of Attributes: 4 numeric, predictive attributes and the class\n",
+      "    :Attribute Information:\n",
+      "        - sepal length in cm\n",
+      "        - sepal width in cm\n",
+      "        - petal length in cm\n",
+      "        - petal width in cm\n",
+      "        - class:\n",
+      "                - Iris-Setosa\n",
+      "                - Iris-Versicolour\n",
+      "                - Iris-Virginica\n",
+      "                \n",
+      "    :Summary Statistics:\n",
+      "\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "                    Min  Max   Mean    SD   Class Correlation\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "    sepal length:   4.3  7.9   5.84   0.83    0.7826\n",
+      "    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n",
+      "    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n",
+      "    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "\n",
+      "    :Missing Attribute Values: None\n",
+      "    :Class Distribution: 33.3% for each of 3 classes.\n",
+      "    :Creator: R.A. Fisher\n",
+      "    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n",
+      "    :Date: July, 1988\n",
+      "\n",
+      "The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n",
+      "from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n",
+      "Machine Learning Repository, which has two wrong data points.\n",
+      "\n",
+      "This is perhaps the best known database to be found in the\n",
+      "pattern recognition literature.  Fisher's paper is a classic in the field and\n",
+      "is referenced frequently to this day.  (See Duda & Hart, for example.)  The\n",
+      "data set contains 3 classes of 50 instances each, where each class refers to a\n",
+      "type of iris plant.  One class is linearly separable from the other 2; the\n",
+      "latter are NOT linearly separable from each other.\n",
+      "\n",
+      ".. topic:: References\n",
+      "\n",
+      "   - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
+      "     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
+      "     Mathematical Statistics\" (John Wiley, NY, 1950).\n",
+      "   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
+      "     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n",
+      "   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
+      "     Structure and Classification Rule for Recognition in Partially Exposed\n",
+      "     Environments\".  IEEE Transactions on Pattern Analysis and Machine\n",
+      "     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
+      "   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\n",
+      "     on Information Theory, May 1972, 431-433.\n",
+      "   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\n",
+      "     conceptual clustering system finds 3 classes in the data.\n",
+      "   - Many, many more ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(iris.DESCR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = iris[\"data\"][:, (2,3)]  # petal length and width\n",
+    "y = (iris[\"target\"])  # 1 if Iris virginica, else 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(150, 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Important variables\n",
+    "\n",
+    "X_with_bias = np.c_[np.ones([len(X), 1]), X] # Add column of ones for theta intercept term\n",
+    "alpha = 0.1\n",
+    "iterations=1500\n",
+    "\n",
+    "print(X.shape)\n",
+    "\n",
+    "# NOTE: If ValueError: all input arrays must have the same shape appears then you may have run this cel multiple times\n",
+    "#    which will have added multiple collumns of ones to the matrix X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup our proportions\n",
+    "\n",
+    "test_ratio = .2\n",
+    "val_ratio = .2\n",
+    "total_size = len(X)\n",
+    "\n",
+    "# Calculate size of our splits\n",
+    "\n",
+    "test_size = int(test_ratio*total_size)\n",
+    "val_size = int(val_ratio*total_size)\n",
+    "train_size = total_size - test_size - val_size\n",
+    "\n",
+    "# Split our data\n",
+    "\n",
+    "rnd_indices = np.random.permutation(total_size) # Shuffle our input matrix\n",
+    "\n",
+    "X_train = X_with_bias[rnd_indices[:train_size]]\n",
+    "y_train = y[rnd_indices[:train_size]]\n",
+    "X_valid = X_with_bias[rnd_indices[train_size:-test_size]]\n",
+    "y_valid = y[rnd_indices[train_size:-test_size]]\n",
+    "X_test = X_with_bias[rnd_indices[-test_size:]]\n",
+    "y_test = y[rnd_indices[-test_size:]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(90, 3)\n",
+      "(30, 2)\n",
+      "(30, 3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_train.shape)\n",
+    "print(X_val.shape)\n",
+    "print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def to_one_hot(y):\n",
+    "    n_classes = y.max() + 1\n",
+    "    m = len(y)\n",
+    "    Y_one_hot = np.zeros((m, n_classes)) # Setup zero matrix with m rows and a column for each class\n",
+    "    Y_one_hot[np.arange(m), y] = 1 # Fill in ones\n",
+    "    return Y_one_hot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([2, 2, 2, 0, 0, 0, 1, 2, 0, 2])"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_train[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 1.],\n",
+       "       [0., 0., 1.],\n",
+       "       [0., 0., 1.],\n",
+       "       [1., 0., 0.],\n",
+       "       [1., 0., 0.],\n",
+       "       [1., 0., 0.],\n",
+       "       [0., 1., 0.],\n",
+       "       [0., 0., 1.],\n",
+       "       [1., 0., 0.],\n",
+       "       [0., 0., 1.]])"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "to_one_hot(y_train[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y_train_one_hot = to_one_hot(y_train)\n",
+    "Y_test_one_hot = to_one_hot(y_test)\n",
+    "Y_val_one_hot = to_one_hot(y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Softmax function = exp(X) / (sum of exp(X))\n",
+    "\n",
+    "def softmax(logits):\n",
+    "    exps = np.exp(logits)\n",
+    "    exp_sums = np.sum(exps, axis=1, keepdims=True)\n",
+    "    return exps / exp_sums"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_inputs = X_train.shape[1] # Number of features\n",
+    "n_outputs = len(np.unique(y_train)) # 3 uniqure values which will each be a possible output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 1.4567897105648775\n",
+      "500 0.7451993577978241\n",
+      "1000 0.6279369677273878\n",
+      "1500 0.5572702696067121\n",
+      "2000 0.5111859948576022\n",
+      "2500 0.47856473219026296\n",
+      "3000 0.45387932862540925\n",
+      "3500 0.43422780377165426\n",
+      "4000 0.41797875623202274\n",
+      "4500 0.4041537521442775\n",
+      "5000 0.39213163561158126\n"
+     ]
+    }
+   ],
+   "source": [
+    "eta = 0.01\n",
+    "n_iterations = 5001\n",
+    "m = len(X_train)\n",
+    "epsilon = 1e-7\n",
+    "\n",
+    "Theta = np.random.randn(n_inputs, n_outputs)\n",
+    "\n",
+    "# Cycle through set to apply batch gradient descent\n",
+    "\n",
+    "for iteration in range(n_iterations):\n",
+    "    logits = X_train.dot(Theta) # Logits which are raw predictions from applying X to Theta\n",
+    "    p_hat = softmax(logits) # Apply softmax to logits to get our probabilities\n",
+    "    loss = -np.mean(np.sum(Y_train_one_hot * np.log(p_hat + epsilon), axis=1)) # Compute loss function\n",
+    "    error = p_hat - Y_train_one_hot # Compute error \n",
+    "    if iteration % 500 == 0:\n",
+    "        print(iteration, loss)\n",
+    "    Grad = 1/m * X_train.T.dot(error)\n",
+    "    Theta = Theta - eta * Grad\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 3.61613128,  0.06856255, -2.86225561],\n",
+       "       [-0.2597962 ,  0.80558911,  0.70553675],\n",
+       "       [-0.90831271,  0.18903751,  2.43558706]])"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Theta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9666666666666667"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Predictions\n",
+    "\n",
+    "logits = X_valid.dot(Theta)\n",
+    "p_hat = softmax(logits)\n",
+    "y_pred = np.argmax(p_hat, axis=1)\n",
+    "\n",
+    "accuracy_score = np.mean(y_pred == y_valid)\n",
+    "accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Ch4/datasets/housing/ex1data1.txt
+++ b/Ch4/datasets/housing/ex1data1.txt
@ -0,0 +1,97 @@
+6.1101,17.592
+5.5277,9.1302
+8.5186,13.662
+7.0032,11.854
+5.8598,6.8233
+8.3829,11.886
+7.4764,4.3483
+8.5781,12
+6.4862,6.5987
+5.0546,3.8166
+5.7107,3.2522
+14.164,15.505
+5.734,3.1551
+8.4084,7.2258
+5.6407,0.71618
+5.3794,3.5129
+6.3654,5.3048
+5.1301,0.56077
+6.4296,3.6518
+7.0708,5.3893
+6.1891,3.1386
+20.27,21.767
+5.4901,4.263
+6.3261,5.1875
+5.5649,3.0825
+18.945,22.638
+12.828,13.501
+10.957,7.0467
+13.176,14.692
+22.203,24.147
+5.2524,-1.22
+6.5894,5.9966
+9.2482,12.134
+5.8918,1.8495
+8.2111,6.5426
+7.9334,4.5623
+8.0959,4.1164
+5.6063,3.3928
+12.836,10.117
+6.3534,5.4974
+5.4069,0.55657
+6.8825,3.9115
+11.708,5.3854
+5.7737,2.4406
+7.8247,6.7318
+7.0931,1.0463
+5.0702,5.1337
+5.8014,1.844
+11.7,8.0043
+5.5416,1.0179
+7.5402,6.7504
+5.3077,1.8396
+7.4239,4.2885
+7.6031,4.9981
+6.3328,1.4233
+6.3589,-1.4211
+6.2742,2.4756
+5.6397,4.6042
+9.3102,3.9624
+9.4536,5.4141
+8.8254,5.1694
+5.1793,-0.74279
+21.279,17.929
+14.908,12.054
+18.959,17.054
+7.2182,4.8852
+8.2951,5.7442
+10.236,7.7754
+5.4994,1.0173
+20.341,20.992
+10.136,6.6799
+7.3345,4.0259
+6.0062,1.2784
+7.2259,3.3411
+5.0269,-2.6807
+6.5479,0.29678
+7.5386,3.8845
+5.0365,5.7014
+10.274,6.7526
+5.1077,2.0576
+5.7292,0.47953
+5.1884,0.20421
+6.3557,0.67861
+9.7687,7.5435
+6.5159,5.3436
+8.5172,4.2415
+9.1802,6.7981
+6.002,0.92695
+5.5204,0.152
+5.0594,2.8214
+5.7077,1.8451
+7.6366,4.2959
+5.8707,7.2029
+5.3054,1.9869
+8.2934,0.14454
+13.394,9.0551
+5.4369,0.61705
--- a/Ch4/datasets/housing/ex1data2.txt
+++ b/Ch4/datasets/housing/ex1data2.txt
@ -0,0 +1,47 @@
+2104,3,399900
+1600,3,329900
+2400,3,369000
+1416,2,232000
+3000,4,539900
+1985,4,299900
+1534,3,314900
+1427,3,198999
+1380,3,212000
+1494,3,242500
+1940,4,239999
+2000,3,347000
+1890,3,329999
+4478,5,699900
+1268,3,259900
+2300,4,449900
+1320,2,299900
+1236,3,199900
+2609,4,499998
+3031,4,599000
+1767,3,252900
+1888,2,255000
+1604,3,242900
+1962,4,259900
+3890,3,573900
+1100,3,249900
+1458,3,464500
+2526,3,469000
+2200,3,475000
+2637,3,299900
+1839,2,349900
+1000,1,169900
+2040,4,314900
+3137,3,579900
+1811,4,285900
+1437,3,249900
+1239,3,229900
+2132,4,345000
+4215,4,549000
+2162,4,287000
+1664,2,368500
+2238,3,329900
+2567,4,314000
+1200,3,299000
+852,2,179900
+1852,4,299900
+1203,3,239500
--- a/Ch4/exercises.ipynb
+++ b/Ch4/exercises.ipynb
@ -0,0 +1,437 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 12**\n",
+    "\n",
+    "Implement batch gradient descent from scratch (no SKLearn!)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from matplotlib import pyplot as plt\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "iris = datasets.load_iris()\n",
+    "list(iris.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _iris_dataset:\n",
+      "\n",
+      "Iris plants dataset\n",
+      "--------------------\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "    :Number of Instances: 150 (50 in each of three classes)\n",
+      "    :Number of Attributes: 4 numeric, predictive attributes and the class\n",
+      "    :Attribute Information:\n",
+      "        - sepal length in cm\n",
+      "        - sepal width in cm\n",
+      "        - petal length in cm\n",
+      "        - petal width in cm\n",
+      "        - class:\n",
+      "                - Iris-Setosa\n",
+      "                - Iris-Versicolour\n",
+      "                - Iris-Virginica\n",
+      "                \n",
+      "    :Summary Statistics:\n",
+      "\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "                    Min  Max   Mean    SD   Class Correlation\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "    sepal length:   4.3  7.9   5.84   0.83    0.7826\n",
+      "    sepal width:    2.0  4.4   3.05   0.43   -0.4194\n",
+      "    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)\n",
+      "    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)\n",
+      "    ============== ==== ==== ======= ===== ====================\n",
+      "\n",
+      "    :Missing Attribute Values: None\n",
+      "    :Class Distribution: 33.3% for each of 3 classes.\n",
+      "    :Creator: R.A. Fisher\n",
+      "    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n",
+      "    :Date: July, 1988\n",
+      "\n",
+      "The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken\n",
+      "from Fisher's paper. Note that it's the same as in R, but not as in the UCI\n",
+      "Machine Learning Repository, which has two wrong data points.\n",
+      "\n",
+      "This is perhaps the best known database to be found in the\n",
+      "pattern recognition literature.  Fisher's paper is a classic in the field and\n",
+      "is referenced frequently to this day.  (See Duda & Hart, for example.)  The\n",
+      "data set contains 3 classes of 50 instances each, where each class refers to a\n",
+      "type of iris plant.  One class is linearly separable from the other 2; the\n",
+      "latter are NOT linearly separable from each other.\n",
+      "\n",
+      ".. topic:: References\n",
+      "\n",
+      "   - Fisher, R.A. \"The use of multiple measurements in taxonomic problems\"\n",
+      "     Annual Eugenics, 7, Part II, 179-188 (1936); also in \"Contributions to\n",
+      "     Mathematical Statistics\" (John Wiley, NY, 1950).\n",
+      "   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.\n",
+      "     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.\n",
+      "   - Dasarathy, B.V. (1980) \"Nosing Around the Neighborhood: A New System\n",
+      "     Structure and Classification Rule for Recognition in Partially Exposed\n",
+      "     Environments\".  IEEE Transactions on Pattern Analysis and Machine\n",
+      "     Intelligence, Vol. PAMI-2, No. 1, 67-71.\n",
+      "   - Gates, G.W. (1972) \"The Reduced Nearest Neighbor Rule\".  IEEE Transactions\n",
+      "     on Information Theory, May 1972, 431-433.\n",
+      "   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al\"s AUTOCLASS II\n",
+      "     conceptual clustering system finds 3 classes in the data.\n",
+      "   - Many, many more ...\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(iris.DESCR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = iris[\"data\"][:, (2,3)]  # petal length and width\n",
+    "y = (iris[\"target\"])  # 1 if Iris virginica, else 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(150, 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Important variables\n",
+    "\n",
+    "X_with_bias = np.c_[np.ones([len(X), 1]), X] # Add column of ones for theta intercept term\n",
+    "alpha = 0.1\n",
+    "iterations=1500\n",
+    "\n",
+    "print(X.shape)\n",
+    "\n",
+    "# NOTE: If ValueError: all input arrays must have the same shape appears then you may have run this cel multiple times\n",
+    "#    which will have added multiple collumns of ones to the matrix X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup our proportions\n",
+    "\n",
+    "test_ratio = .2\n",
+    "val_ratio = .2\n",
+    "total_size = len(X)\n",
+    "\n",
+    "# Calculate size of our splits\n",
+    "\n",
+    "test_size = int(test_ratio*total_size)\n",
+    "val_size = int(val_ratio*total_size)\n",
+    "train_size = total_size - test_size - val_size\n",
+    "\n",
+    "# Split our data\n",
+    "\n",
+    "rnd_indices = np.random.permutation(total_size) # Shuffle our input matrix\n",
+    "\n",
+    "X_train = X_with_bias[rnd_indices[:train_size]]\n",
+    "y_train = y[rnd_indices[:train_size]]\n",
+    "X_valid = X_with_bias[rnd_indices[train_size:-test_size]]\n",
+    "y_valid = y[rnd_indices[train_size:-test_size]]\n",
+    "X_test = X_with_bias[rnd_indices[-test_size:]]\n",
+    "y_test = y[rnd_indices[-test_size:]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(90, 3)\n",
+      "(30, 2)\n",
+      "(30, 3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_train.shape)\n",
+    "print(X_val.shape)\n",
+    "print(X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def to_one_hot(y):\n",
+    "    n_classes = y.max() + 1\n",
+    "    m = len(y)\n",
+    "    Y_one_hot = np.zeros((m, n_classes)) # Setup zero matrix with m rows and a column for each class\n",
+    "    Y_one_hot[np.arange(m), y] = 1 # Fill in ones\n",
+    "    return Y_one_hot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([2, 2, 2, 0, 0, 0, 1, 2, 0, 2])"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "y_train[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0., 0., 1.],\n",
+       "       [0., 0., 1.],\n",
+       "       [0., 0., 1.],\n",
+       "       [1., 0., 0.],\n",
+       "       [1., 0., 0.],\n",
+       "       [1., 0., 0.],\n",
+       "       [0., 1., 0.],\n",
+       "       [0., 0., 1.],\n",
+       "       [1., 0., 0.],\n",
+       "       [0., 0., 1.]])"
+      ]
+     },
+     "execution_count": 74,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "to_one_hot(y_train[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Y_train_one_hot = to_one_hot(y_train)\n",
+    "Y_test_one_hot = to_one_hot(y_test)\n",
+    "Y_val_one_hot = to_one_hot(y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Softmax function = exp(X) / (sum of exp(X))\n",
+    "\n",
+    "def softmax(logits):\n",
+    "    exps = np.exp(logits)\n",
+    "    exp_sums = np.sum(exps, axis=1, keepdims=True)\n",
+    "    return exps / exp_sums"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_inputs = X_train.shape[1] # Number of features\n",
+    "n_outputs = len(np.unique(y_train)) # 3 uniqure values which will each be a possible output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 1.4567897105648775\n",
+      "500 0.7451993577978241\n",
+      "1000 0.6279369677273878\n",
+      "1500 0.5572702696067121\n",
+      "2000 0.5111859948576022\n",
+      "2500 0.47856473219026296\n",
+      "3000 0.45387932862540925\n",
+      "3500 0.43422780377165426\n",
+      "4000 0.41797875623202274\n",
+      "4500 0.4041537521442775\n",
+      "5000 0.39213163561158126\n"
+     ]
+    }
+   ],
+   "source": [
+    "eta = 0.01\n",
+    "n_iterations = 5001\n",
+    "m = len(X_train)\n",
+    "epsilon = 1e-7\n",
+    "\n",
+    "Theta = np.random.randn(n_inputs, n_outputs)\n",
+    "\n",
+    "# Cycle through set to apply batch gradient descent\n",
+    "\n",
+    "for iteration in range(n_iterations):\n",
+    "    logits = X_train.dot(Theta) # Logits which are raw predictions from applying X to Theta\n",
+    "    p_hat = softmax(logits) # Apply softmax to logits to get our probabilities\n",
+    "    loss = -np.mean(np.sum(Y_train_one_hot * np.log(p_hat + epsilon), axis=1)) # Compute loss function\n",
+    "    error = p_hat - Y_train_one_hot # Compute error \n",
+    "    if iteration % 500 == 0:\n",
+    "        print(iteration, loss)\n",
+    "    Grad = 1/m * X_train.T.dot(error)\n",
+    "    Theta = Theta - eta * Grad\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 3.61613128,  0.06856255, -2.86225561],\n",
+       "       [-0.2597962 ,  0.80558911,  0.70553675],\n",
+       "       [-0.90831271,  0.18903751,  2.43558706]])"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Theta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9666666666666667"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Predictions\n",
+    "\n",
+    "logits = X_valid.dot(Theta)\n",
+    "p_hat = softmax(logits)\n",
+    "y_pred = np.argmax(p_hat, axis=1)\n",
+    "\n",
+    "accuracy_score = np.mean(y_pred == y_valid)\n",
+    "accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Ch5/.ipynb_checkpoints/Exercises-checkpoint.ipynb
+++ b/Ch5/.ipynb_checkpoints/Exercises-checkpoint.ipynb
--- a/Ch5/Exercises.ipynb
+++ b/Ch5/Exercises.ipynb
--- a/Ch6/.ipynb_checkpoints/Exercises-checkpoint.ipynb
+++ b/Ch6/.ipynb_checkpoints/Exercises-checkpoint.ipynb
--- a/Ch6/Exercises.ipynb
+++ b/Ch6/Exercises.ipynb
--- a/Ch7/.ipynb_checkpoints/Exercises-checkpoint.ipynb
+++ b/Ch7/.ipynb_checkpoints/Exercises-checkpoint.ipynb
@ -0,0 +1,521 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from matplotlib import pyplot as plt\n",
+    "import os\n",
+    "from sklearn.datasets import fetch_openml\n",
+    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.preprocessing import normalize"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 8**\n",
+    "\n",
+    "Create hard/soft voting ensemble on mnist"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mnist = fetch_openml('mnist_784', version=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url'])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mnist.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(70000, 784)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X, y = mnist['data'], mnist['target']\n",
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split into train, val, test sets of size 50k, 10k, 10k\n",
+    "\n",
+    "X_train = X[:50000]\n",
+    "y_train = y[:50000]\n",
+    "X_val = X[50000:60000]\n",
+    "y_val = y[50000:60000]\n",
+    "X_test = X[60000:]\n",
+    "y_test = y[60000:]\n",
+    "\n",
+    "# Normalize features\n",
+    "\n",
+    "X_train /= 255.0\n",
+    "X_val /= 255.0\n",
+    "X_test /= 255.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_test.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training our  RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
+      "                       criterion='gini', max_depth=None, max_features='auto',\n",
+      "                       max_leaf_nodes=None, max_samples=None,\n",
+      "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "                       min_samples_leaf=1, min_samples_split=2,\n",
+      "                       min_weight_fraction_leaf=0.0, n_estimators=100,\n",
+      "                       n_jobs=None, oob_score=False, random_state=None,\n",
+      "                       verbose=0, warm_start=False)\n",
+      "Training our  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,\n",
+      "                     criterion='gini', max_depth=None, max_features='auto',\n",
+      "                     max_leaf_nodes=None, max_samples=None,\n",
+      "                     min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "                     min_samples_leaf=1, min_samples_split=2,\n",
+      "                     min_weight_fraction_leaf=0.0, n_estimators=100,\n",
+      "                     n_jobs=None, oob_score=False, random_state=None, verbose=0,\n",
+      "                     warm_start=False)\n",
+      "Training our  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
+      "          intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
+      "          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
+      "          verbose=0)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\svm\\_base.py:947: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "  \"the number of iterations.\", ConvergenceWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "rfc = RandomForestClassifier()\n",
+    "etc = ExtraTreesClassifier()\n",
+    "svc = LinearSVC()\n",
+    "\n",
+    "classifiers = [rfc, etc, svc]\n",
+    "scores = []\n",
+    "\n",
+    "# Fit each classifier to the training set and predict on X_val\n",
+    "for clf in classifiers:\n",
+    "    print('Training our ', clf)\n",
+    "    clf.fit(X_train, y_train)\n",
+    "    score = clf.score(X_val, y_val)\n",
+    "    scores.append(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.9719, 0.9741, 0.9208]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\svm\\_base.py:947: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "  \"the number of iterations.\", ConvergenceWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "VotingClassifier(estimators=[('rf',\n",
+       "                              RandomForestClassifier(bootstrap=True,\n",
+       "                                                     ccp_alpha=0.0,\n",
+       "                                                     class_weight=None,\n",
+       "                                                     criterion='gini',\n",
+       "                                                     max_depth=None,\n",
+       "                                                     max_features='auto',\n",
+       "                                                     max_leaf_nodes=None,\n",
+       "                                                     max_samples=None,\n",
+       "                                                     min_impurity_decrease=0.0,\n",
+       "                                                     min_impurity_split=None,\n",
+       "                                                     min_samples_leaf=1,\n",
+       "                                                     min_samples_split=2,\n",
+       "                                                     min_weight_fraction_leaf=0.0,\n",
+       "                                                     n_estimators=100,\n",
+       "                                                     n_jobs=None,\n",
+       "                                                     oob_score...\n",
+       "                                                   n_estimators=100,\n",
+       "                                                   n_jobs=None, oob_score=False,\n",
+       "                                                   random_state=None, verbose=0,\n",
+       "                                                   warm_start=False)),\n",
+       "                             ('sv',\n",
+       "                              LinearSVC(C=1.0, class_weight=None, dual=True,\n",
+       "                                        fit_intercept=True, intercept_scaling=1,\n",
+       "                                        loss='squared_hinge', max_iter=1000,\n",
+       "                                        multi_class='ovr', penalty='l2',\n",
+       "                                        random_state=None, tol=0.0001,\n",
+       "                                        verbose=0))],\n",
+       "                 flatten_transform=True, n_jobs=None, voting='hard',\n",
+       "                 weights=None)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import VotingClassifier\n",
+    "\n",
+    "# Hard vote ensmeble\n",
+    "voting_clf = VotingClassifier(\n",
+    "    estimators=[('rf', rfc), ('et', etc), ('sv', svc)],\n",
+    "    voting='hard'\n",
+    ")\n",
+    "\n",
+    "voting_clf.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9719"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try without SVC\n",
+    "\n",
+    "del voting_clf.estimators_[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9732"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9752"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Set to soft voting and check if better\n",
+    "\n",
+    "voting_clf.voting='soft'\n",
+    "\n",
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9707"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Check on Test Set\n",
+    "\n",
+    "voting_clf.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 9**\n",
+    "\n",
+    "train a stacking ensemble on our previous classifiers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round up our predictions\n",
+    "\n",
+    "X_val_predictions = np.empty((len(X_val), len(classifiers)), dtype=np.float32)\n",
+    "\n",
+    "for index, clf in enumerate(classifiers):\n",
+    "    X_val_predictions[:, index] = clf.predict(X_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[3. 3. 3.]\n",
+      " [8. 8. 8.]\n",
+      " [6. 6. 6.]\n",
+      " ...\n",
+      " [5. 5. 5.]\n",
+      " [6. 6. 6.]\n",
+      " [8. 8. 8.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_val_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
+       "                       criterion='gini', max_depth=None, max_features='auto',\n",
+       "                       max_leaf_nodes=None, max_samples=None,\n",
+       "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "                       min_samples_leaf=1, min_samples_split=2,\n",
+       "                       min_weight_fraction_leaf=0.0, n_estimators=200,\n",
+       "                       n_jobs=None, oob_score=True, random_state=None,\n",
+       "                       verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Train a classifier which will take as input our predictions matrix\n",
+    "blender = RandomForestClassifier(n_estimators=200, oob_score=True)\n",
+    "blender.fit(X_val_predictions, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9727"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Check our out of bag score to get an idea of accuracy\n",
+    "blender.oob_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round up predictions for X_test\n",
+    "X_test_predictions = np.empty((len(X_val), len(classifiers)), dtype=np.float32)\n",
+    "\n",
+    "for index, clf in enumerate(classifiers):\n",
+    "    X_test_predictions[:, index] = clf.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use our blender to predict based on our predictions matrix\n",
+    "y_pred = blender.predict(X_test_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.968"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "accuracy_score(y_pred, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Ch7/Exercises.ipynb
+++ b/Ch7/Exercises.ipynb
@ -0,0 +1,521 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from matplotlib import pyplot as plt\n",
+    "import os\n",
+    "from sklearn.datasets import fetch_openml\n",
+    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.preprocessing import normalize"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 8**\n",
+    "\n",
+    "Create hard/soft voting ensemble on mnist"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mnist = fetch_openml('mnist_784', version=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url'])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mnist.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(70000, 784)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X, y = mnist['data'], mnist['target']\n",
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split into train, val, test sets of size 50k, 10k, 10k\n",
+    "\n",
+    "X_train = X[:50000]\n",
+    "y_train = y[:50000]\n",
+    "X_val = X[50000:60000]\n",
+    "y_val = y[50000:60000]\n",
+    "X_test = X[60000:]\n",
+    "y_test = y[60000:]\n",
+    "\n",
+    "# Normalize features\n",
+    "\n",
+    "X_train /= 255.0\n",
+    "X_val /= 255.0\n",
+    "X_test /= 255.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_test.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training our  RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
+      "                       criterion='gini', max_depth=None, max_features='auto',\n",
+      "                       max_leaf_nodes=None, max_samples=None,\n",
+      "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "                       min_samples_leaf=1, min_samples_split=2,\n",
+      "                       min_weight_fraction_leaf=0.0, n_estimators=100,\n",
+      "                       n_jobs=None, oob_score=False, random_state=None,\n",
+      "                       verbose=0, warm_start=False)\n",
+      "Training our  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,\n",
+      "                     criterion='gini', max_depth=None, max_features='auto',\n",
+      "                     max_leaf_nodes=None, max_samples=None,\n",
+      "                     min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "                     min_samples_leaf=1, min_samples_split=2,\n",
+      "                     min_weight_fraction_leaf=0.0, n_estimators=100,\n",
+      "                     n_jobs=None, oob_score=False, random_state=None, verbose=0,\n",
+      "                     warm_start=False)\n",
+      "Training our  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
+      "          intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
+      "          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
+      "          verbose=0)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\svm\\_base.py:947: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "  \"the number of iterations.\", ConvergenceWarning)\n"
+     ]
+    }
+   ],
+   "source": [
+    "rfc = RandomForestClassifier()\n",
+    "etc = ExtraTreesClassifier()\n",
+    "svc = LinearSVC()\n",
+    "\n",
+    "classifiers = [rfc, etc, svc]\n",
+    "scores = []\n",
+    "\n",
+    "# Fit each classifier to the training set and predict on X_val\n",
+    "for clf in classifiers:\n",
+    "    print('Training our ', clf)\n",
+    "    clf.fit(X_train, y_train)\n",
+    "    score = clf.score(X_val, y_val)\n",
+    "    scores.append(score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.9719, 0.9741, 0.9208]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\svm\\_base.py:947: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n",
+      "  \"the number of iterations.\", ConvergenceWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "VotingClassifier(estimators=[('rf',\n",
+       "                              RandomForestClassifier(bootstrap=True,\n",
+       "                                                     ccp_alpha=0.0,\n",
+       "                                                     class_weight=None,\n",
+       "                                                     criterion='gini',\n",
+       "                                                     max_depth=None,\n",
+       "                                                     max_features='auto',\n",
+       "                                                     max_leaf_nodes=None,\n",
+       "                                                     max_samples=None,\n",
+       "                                                     min_impurity_decrease=0.0,\n",
+       "                                                     min_impurity_split=None,\n",
+       "                                                     min_samples_leaf=1,\n",
+       "                                                     min_samples_split=2,\n",
+       "                                                     min_weight_fraction_leaf=0.0,\n",
+       "                                                     n_estimators=100,\n",
+       "                                                     n_jobs=None,\n",
+       "                                                     oob_score...\n",
+       "                                                   n_estimators=100,\n",
+       "                                                   n_jobs=None, oob_score=False,\n",
+       "                                                   random_state=None, verbose=0,\n",
+       "                                                   warm_start=False)),\n",
+       "                             ('sv',\n",
+       "                              LinearSVC(C=1.0, class_weight=None, dual=True,\n",
+       "                                        fit_intercept=True, intercept_scaling=1,\n",
+       "                                        loss='squared_hinge', max_iter=1000,\n",
+       "                                        multi_class='ovr', penalty='l2',\n",
+       "                                        random_state=None, tol=0.0001,\n",
+       "                                        verbose=0))],\n",
+       "                 flatten_transform=True, n_jobs=None, voting='hard',\n",
+       "                 weights=None)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import VotingClassifier\n",
+    "\n",
+    "# Hard vote ensmeble\n",
+    "voting_clf = VotingClassifier(\n",
+    "    estimators=[('rf', rfc), ('et', etc), ('sv', svc)],\n",
+    "    voting='hard'\n",
+    ")\n",
+    "\n",
+    "voting_clf.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9719"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Try without SVC\n",
+    "\n",
+    "del voting_clf.estimators_[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9732"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9752"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Set to soft voting and check if better\n",
+    "\n",
+    "voting_clf.voting='soft'\n",
+    "\n",
+    "voting_clf.score(X_val, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9707"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Check on Test Set\n",
+    "\n",
+    "voting_clf.score(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Exercise 9**\n",
+    "\n",
+    "train a stacking ensemble on our previous classifiers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round up our predictions\n",
+    "\n",
+    "X_val_predictions = np.empty((len(X_val), len(classifiers)), dtype=np.float32)\n",
+    "\n",
+    "for index, clf in enumerate(classifiers):\n",
+    "    X_val_predictions[:, index] = clf.predict(X_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[3. 3. 3.]\n",
+      " [8. 8. 8.]\n",
+      " [6. 6. 6.]\n",
+      " ...\n",
+      " [5. 5. 5.]\n",
+      " [6. 6. 6.]\n",
+      " [8. 8. 8.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(X_val_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
+       "                       criterion='gini', max_depth=None, max_features='auto',\n",
+       "                       max_leaf_nodes=None, max_samples=None,\n",
+       "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "                       min_samples_leaf=1, min_samples_split=2,\n",
+       "                       min_weight_fraction_leaf=0.0, n_estimators=200,\n",
+       "                       n_jobs=None, oob_score=True, random_state=None,\n",
+       "                       verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Train a classifier which will take as input our predictions matrix\n",
+    "blender = RandomForestClassifier(n_estimators=200, oob_score=True)\n",
+    "blender.fit(X_val_predictions, y_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.9727"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Check our out of bag score to get an idea of accuracy\n",
+    "blender.oob_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Round up predictions for X_test\n",
+    "X_test_predictions = np.empty((len(X_val), len(classifiers)), dtype=np.float32)\n",
+    "\n",
+    "for index, clf in enumerate(classifiers):\n",
+    "    X_test_predictions[:, index] = clf.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use our blender to predict based on our predictions matrix\n",
+    "y_pred = blender.predict(X_test_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.968"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "accuracy_score(y_pred, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}