{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"from math import sqrt\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.metrics import mean_absolute_error\n",
"import tensorflow as tf\n",
"import keras"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"train = pd.read_csv(os.path.join('data', 'clean_train.csv'))\n",
"test = pd.read_csv(os.path.join('data', 'clean_test.csv'))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" PCA0 | \n",
" PCA1 | \n",
" PCA2 | \n",
" PCA3 | \n",
" PCA4 | \n",
" PCA5 | \n",
" PCA6 | \n",
" PCA7 | \n",
" PCA8 | \n",
" ... | \n",
" PCA164 | \n",
" PCA165 | \n",
" PCA166 | \n",
" PCA167 | \n",
" PCA168 | \n",
" PCA169 | \n",
" PCA170 | \n",
" PCA171 | \n",
" Id | \n",
" SalePrice | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 4.345109 | \n",
" 1.619386 | \n",
" -0.739617 | \n",
" -2.080179 | \n",
" -0.985088 | \n",
" 1.999117 | \n",
" -1.231870 | \n",
" -0.131782 | \n",
" 1.316470 | \n",
" ... | \n",
" 0.160733 | \n",
" 0.071333 | \n",
" 0.155468 | \n",
" 0.172801 | \n",
" -0.169568 | \n",
" -0.144326 | \n",
" 0.391713 | \n",
" -0.013357 | \n",
" 1 | \n",
" 208500 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 0.019142 | \n",
" -3.106959 | \n",
" 0.168223 | \n",
" -0.553341 | \n",
" 0.940712 | \n",
" 0.200719 | \n",
" -0.468954 | \n",
" 0.235082 | \n",
" -0.838022 | \n",
" ... | \n",
" -1.063234 | \n",
" -0.334556 | \n",
" 0.361166 | \n",
" -1.218397 | \n",
" -0.346191 | \n",
" -0.962753 | \n",
" -0.138863 | \n",
" 1.083103 | \n",
" 2 | \n",
" 181500 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" 4.851149 | \n",
" 1.242811 | \n",
" -0.351815 | \n",
" -1.484957 | \n",
" -0.758200 | \n",
" 2.181179 | \n",
" -1.843949 | \n",
" 0.296194 | \n",
" 1.299142 | \n",
" ... | \n",
" 0.088334 | \n",
" 0.238624 | \n",
" 0.327280 | \n",
" 0.325285 | \n",
" -0.704900 | \n",
" -0.036388 | \n",
" -0.540516 | \n",
" 0.021711 | \n",
" 3 | \n",
" 223500 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3 | \n",
" -1.771641 | \n",
" 0.039500 | \n",
" -1.358623 | \n",
" 1.920760 | \n",
" -2.550817 | \n",
" 0.209519 | \n",
" -0.756387 | \n",
" 0.700109 | \n",
" -1.408543 | \n",
" ... | \n",
" -0.172186 | \n",
" -0.518922 | \n",
" 0.231498 | \n",
" -0.074296 | \n",
" -0.034287 | \n",
" -0.877735 | \n",
" 0.028065 | \n",
" -0.321009 | \n",
" 4 | \n",
" 140000 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4 | \n",
" 6.463747 | \n",
" 1.064473 | \n",
" 0.209472 | \n",
" 0.448906 | \n",
" -1.555301 | \n",
" 3.215822 | \n",
" -0.946356 | \n",
" -0.805204 | \n",
" 2.112526 | \n",
" ... | \n",
" -0.270189 | \n",
" 0.375297 | \n",
" -0.396732 | \n",
" -0.109084 | \n",
" 0.317305 | \n",
" -0.145975 | \n",
" -0.674692 | \n",
" -0.378458 | \n",
" 5 | \n",
" 250000 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 175 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
"\n",
" PCA6 PCA7 PCA8 ... PCA164 PCA165 PCA166 PCA167 \\\n",
"0 -1.231870 -0.131782 1.316470 ... 0.160733 0.071333 0.155468 0.172801 \n",
"1 -0.468954 0.235082 -0.838022 ... -1.063234 -0.334556 0.361166 -1.218397 \n",
"2 -1.843949 0.296194 1.299142 ... 0.088334 0.238624 0.327280 0.325285 \n",
"3 -0.756387 0.700109 -1.408543 ... -0.172186 -0.518922 0.231498 -0.074296 \n",
"4 -0.946356 -0.805204 2.112526 ... -0.270189 0.375297 -0.396732 -0.109084 \n",
"\n",
" PCA168 PCA169 PCA170 PCA171 Id SalePrice \n",
"0 -0.169568 -0.144326 0.391713 -0.013357 1 208500 \n",
"1 -0.346191 -0.962753 -0.138863 1.083103 2 181500 \n",
"2 -0.704900 -0.036388 -0.540516 0.021711 3 223500 \n",
"3 -0.034287 -0.877735 0.028065 -0.321009 4 140000 \n",
"4 0.317305 -0.145975 -0.674692 -0.378458 5 250000 \n",
"\n",
"[5 rows x 175 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" PCA0 | \n",
" PCA1 | \n",
" PCA2 | \n",
" PCA3 | \n",
" PCA4 | \n",
" PCA5 | \n",
" PCA6 | \n",
" PCA7 | \n",
" PCA8 | \n",
" ... | \n",
" PCA163 | \n",
" PCA164 | \n",
" PCA165 | \n",
" PCA166 | \n",
" PCA167 | \n",
" PCA168 | \n",
" PCA169 | \n",
" PCA170 | \n",
" PCA171 | \n",
" Id | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" -3.208086 | \n",
" -2.987338 | \n",
" -0.327066 | \n",
" -1.609206 | \n",
" 0.016879 | \n",
" -1.514939 | \n",
" -0.417889 | \n",
" -0.988173 | \n",
" -0.653363 | \n",
" ... | \n",
" -0.027364 | \n",
" 0.653222 | \n",
" -0.201973 | \n",
" -0.769946 | \n",
" -0.344834 | \n",
" 0.514257 | \n",
" 1.114106 | \n",
" 0.337765 | \n",
" -0.639617 | \n",
" 1461 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" -1.403753 | \n",
" -4.261851 | \n",
" 0.107527 | \n",
" 0.935981 | \n",
" 0.165777 | \n",
" -0.299485 | \n",
" -0.524918 | \n",
" -2.332121 | \n",
" 0.031044 | \n",
" ... | \n",
" 3.856117 | \n",
" 0.787996 | \n",
" 0.215221 | \n",
" 0.458275 | \n",
" 1.135109 | \n",
" 0.378972 | \n",
" 0.953559 | \n",
" -1.008240 | \n",
" 4.445435 | \n",
" 1462 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" 2.257002 | \n",
" 0.427951 | \n",
" -0.610464 | \n",
" -1.301125 | \n",
" -1.058327 | \n",
" 2.674177 | \n",
" -1.500824 | \n",
" -0.223999 | \n",
" 0.403440 | \n",
" ... | \n",
" -0.117138 | \n",
" -0.378473 | \n",
" -0.031613 | \n",
" 0.090593 | \n",
" -0.173914 | \n",
" -0.150098 | \n",
" -0.006612 | \n",
" 0.190780 | \n",
" -0.152486 | \n",
" 1463 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3 | \n",
" 3.253618 | \n",
" 0.537318 | \n",
" -0.796079 | \n",
" -0.851716 | \n",
" -1.209643 | \n",
" 2.388795 | \n",
" -1.340676 | \n",
" -0.876322 | \n",
" 0.421183 | \n",
" ... | \n",
" -0.441586 | \n",
" 0.020066 | \n",
" -0.151709 | \n",
" 0.444826 | \n",
" 0.008218 | \n",
" -0.161705 | \n",
" -0.453482 | \n",
" 0.472352 | \n",
" 0.046141 | \n",
" 1464 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4 | \n",
" 2.876409 | \n",
" -0.075909 | \n",
" -0.154959 | \n",
" -2.469870 | \n",
" 1.407820 | \n",
" 0.487532 | \n",
" 0.072190 | \n",
" 2.414446 | \n",
" 1.667224 | \n",
" ... | \n",
" 0.269062 | \n",
" 0.651172 | \n",
" -0.050461 | \n",
" -0.526448 | \n",
" -0.843701 | \n",
" 0.574770 | \n",
" -0.227828 | \n",
" 1.071423 | \n",
" 1.362638 | \n",
" 1465 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 174 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
"0 0 -3.208086 -2.987338 -0.327066 -1.609206 0.016879 -1.514939 \n",
"1 1 -1.403753 -4.261851 0.107527 0.935981 0.165777 -0.299485 \n",
"2 2 2.257002 0.427951 -0.610464 -1.301125 -1.058327 2.674177 \n",
"3 3 3.253618 0.537318 -0.796079 -0.851716 -1.209643 2.388795 \n",
"4 4 2.876409 -0.075909 -0.154959 -2.469870 1.407820 0.487532 \n",
"\n",
" PCA6 PCA7 PCA8 ... PCA163 PCA164 PCA165 PCA166 \\\n",
"0 -0.417889 -0.988173 -0.653363 ... -0.027364 0.653222 -0.201973 -0.769946 \n",
"1 -0.524918 -2.332121 0.031044 ... 3.856117 0.787996 0.215221 0.458275 \n",
"2 -1.500824 -0.223999 0.403440 ... -0.117138 -0.378473 -0.031613 0.090593 \n",
"3 -1.340676 -0.876322 0.421183 ... -0.441586 0.020066 -0.151709 0.444826 \n",
"4 0.072190 2.414446 1.667224 ... 0.269062 0.651172 -0.050461 -0.526448 \n",
"\n",
" PCA167 PCA168 PCA169 PCA170 PCA171 Id \n",
"0 -0.344834 0.514257 1.114106 0.337765 -0.639617 1461 \n",
"1 1.135109 0.378972 0.953559 -1.008240 4.445435 1462 \n",
"2 -0.173914 -0.150098 -0.006612 0.190780 -0.152486 1463 \n",
"3 0.008218 -0.161705 -0.453482 0.472352 0.046141 1464 \n",
"4 -0.843701 0.574770 -0.227828 1.071423 1.362638 1465 \n",
"\n",
"[5 rows x 174 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Set aside unnecessary features\n",
"\n",
"trainId = train['Id'].astype(int)\n",
"testId = test['Id'].astype(int)\n",
"\n",
"train = train.drop('Id', axis=1)\n",
"test = test.drop('Id', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"X = train.drop('SalePrice', axis=1)\n",
"y = train['SalePrice']\n",
"X_test = test"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 208500\n",
"1 181500\n",
"2 223500\n",
"3 140000\n",
"4 250000\n",
" ... \n",
"1455 175000\n",
"1456 210000\n",
"1457 266500\n",
"1458 142125\n",
"1459 147500\n",
"Name: SalePrice, Length: 1460, dtype: int64\n",
"(1460, 173)\n",
"(1459, 173)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" PCA0 | \n",
" PCA1 | \n",
" PCA2 | \n",
" PCA3 | \n",
" PCA4 | \n",
" PCA5 | \n",
" PCA6 | \n",
" PCA7 | \n",
" PCA8 | \n",
" ... | \n",
" PCA162 | \n",
" PCA163 | \n",
" PCA164 | \n",
" PCA165 | \n",
" PCA166 | \n",
" PCA167 | \n",
" PCA168 | \n",
" PCA169 | \n",
" PCA170 | \n",
" PCA171 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 4.345109 | \n",
" 1.619386 | \n",
" -0.739617 | \n",
" -2.080179 | \n",
" -0.985088 | \n",
" 1.999117 | \n",
" -1.231870 | \n",
" -0.131782 | \n",
" 1.316470 | \n",
" ... | \n",
" -0.276936 | \n",
" -0.128260 | \n",
" 0.160733 | \n",
" 0.071333 | \n",
" 0.155468 | \n",
" 0.172801 | \n",
" -0.169568 | \n",
" -0.144326 | \n",
" 0.391713 | \n",
" -0.013357 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 0.019142 | \n",
" -3.106959 | \n",
" 0.168223 | \n",
" -0.553341 | \n",
" 0.940712 | \n",
" 0.200719 | \n",
" -0.468954 | \n",
" 0.235082 | \n",
" -0.838022 | \n",
" ... | \n",
" 0.140974 | \n",
" -0.224535 | \n",
" -1.063234 | \n",
" -0.334556 | \n",
" 0.361166 | \n",
" -1.218397 | \n",
" -0.346191 | \n",
" -0.962753 | \n",
" -0.138863 | \n",
" 1.083103 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" 4.851149 | \n",
" 1.242811 | \n",
" -0.351815 | \n",
" -1.484957 | \n",
" -0.758200 | \n",
" 2.181179 | \n",
" -1.843949 | \n",
" 0.296194 | \n",
" 1.299142 | \n",
" ... | \n",
" -0.289024 | \n",
" -0.282563 | \n",
" 0.088334 | \n",
" 0.238624 | \n",
" 0.327280 | \n",
" 0.325285 | \n",
" -0.704900 | \n",
" -0.036388 | \n",
" -0.540516 | \n",
" 0.021711 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3 | \n",
" -1.771641 | \n",
" 0.039500 | \n",
" -1.358623 | \n",
" 1.920760 | \n",
" -2.550817 | \n",
" 0.209519 | \n",
" -0.756387 | \n",
" 0.700109 | \n",
" -1.408543 | \n",
" ... | \n",
" 0.286790 | \n",
" 0.672251 | \n",
" -0.172186 | \n",
" -0.518922 | \n",
" 0.231498 | \n",
" -0.074296 | \n",
" -0.034287 | \n",
" -0.877735 | \n",
" 0.028065 | \n",
" -0.321009 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4 | \n",
" 6.463747 | \n",
" 1.064473 | \n",
" 0.209472 | \n",
" 0.448906 | \n",
" -1.555301 | \n",
" 3.215822 | \n",
" -0.946356 | \n",
" -0.805204 | \n",
" 2.112526 | \n",
" ... | \n",
" -0.235585 | \n",
" 0.019570 | \n",
" -0.270189 | \n",
" 0.375297 | \n",
" -0.396732 | \n",
" -0.109084 | \n",
" 0.317305 | \n",
" -0.145975 | \n",
" -0.674692 | \n",
" -0.378458 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 173 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
"\n",
" PCA6 PCA7 PCA8 ... PCA162 PCA163 PCA164 PCA165 \\\n",
"0 -1.231870 -0.131782 1.316470 ... -0.276936 -0.128260 0.160733 0.071333 \n",
"1 -0.468954 0.235082 -0.838022 ... 0.140974 -0.224535 -1.063234 -0.334556 \n",
"2 -1.843949 0.296194 1.299142 ... -0.289024 -0.282563 0.088334 0.238624 \n",
"3 -0.756387 0.700109 -1.408543 ... 0.286790 0.672251 -0.172186 -0.518922 \n",
"4 -0.946356 -0.805204 2.112526 ... -0.235585 0.019570 -0.270189 0.375297 \n",
"\n",
" PCA166 PCA167 PCA168 PCA169 PCA170 PCA171 \n",
"0 0.155468 0.172801 -0.169568 -0.144326 0.391713 -0.013357 \n",
"1 0.361166 -1.218397 -0.346191 -0.962753 -0.138863 1.083103 \n",
"2 0.327280 0.325285 -0.704900 -0.036388 -0.540516 0.021711 \n",
"3 0.231498 -0.074296 -0.034287 -0.877735 0.028065 -0.321009 \n",
"4 -0.396732 -0.109084 0.317305 -0.145975 -0.674692 -0.378458 \n",
"\n",
"[5 rows x 173 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(y)\n",
"print(X.shape)\n",
"print(X_test.shape)\n",
"X.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 1460 samples\n",
"Epoch 1/20\n",
"1460/1460 [==============================] - 8s 5ms/sample - loss: 21378772287.8247 - mse: 21378768896.0000\n",
"Epoch 2/20\n",
"1460/1460 [==============================] - 7s 4ms/sample - loss: 14424159683.6822 - mse: 14424160256.0000\n",
"Epoch 3/20\n",
"1460/1460 [==============================] - 7s 4ms/sample - loss: 9813772273.9726 - mse: 9813772288.0000\n",
"Epoch 4/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 4094837492.7781 - mse: 4094837504.0000\n",
"Epoch 5/20\n",
"1460/1460 [==============================] - 7s 5ms/sample - loss: 2989677719.4959 - mse: 2989677568.0000 3s - loss: 3902894729.6\n",
"Epoch 6/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 1279209371.7041 - mse: 1279209088.0000\n",
"Epoch 7/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 1186372414.0712 - mse: 1186372224.0000\n",
"Epoch 8/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 929078176.2630 - mse: 929078208.0000\n",
"Epoch 9/20\n",
"1460/1460 [==============================] - 7s 5ms/sample - loss: 1044272466.4110 - mse: 1044272320.0000\n",
"Epoch 10/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 521697620.8658 - mse: 521697728.0000\n",
"Epoch 11/20\n",
"1460/1460 [==============================] - 7s 4ms/sample - loss: 544022221.8521 - mse: 544022208.0000\n",
"Epoch 12/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 468429404.4932 - mse: 468429408.0000\n",
"Epoch 13/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 580859017.9945 - mse: 580859008.0000\n",
"Epoch 14/20\n",
"1460/1460 [==============================] - 7s 4ms/sample - loss: 477522669.0630 - mse: 477522624.0000\n",
"Epoch 15/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 336378193.0959 - mse: 336378176.0000\n",
"Epoch 16/20\n",
"1460/1460 [==============================] - 7s 5ms/sample - loss: 307777051.1781 - mse: 307777088.0000\n",
"Epoch 17/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 348827916.4932 - mse: 348827936.0000\n",
"Epoch 18/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 375509386.9151 - mse: 375509312.0000\n",
"Epoch 19/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 452619568.2192 - mse: 452619552.0000\n",
"Epoch 20/20\n",
"1460/1460 [==============================] - 6s 4ms/sample - loss: 348767770.2137 - mse: 348767808.0000\n",
"Model: \"sequential_7\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"flatten_7 (Flatten) multiple 0 \n",
"_________________________________________________________________\n",
"dense_48 (Dense) multiple 178176 \n",
"_________________________________________________________________\n",
"dense_49 (Dense) multiple 1049600 \n",
"_________________________________________________________________\n",
"dense_50 (Dense) multiple 2099200 \n",
"_________________________________________________________________\n",
"dense_51 (Dense) multiple 4196352 \n",
"_________________________________________________________________\n",
"dense_52 (Dense) multiple 2098176 \n",
"_________________________________________________________________\n",
"dense_53 (Dense) multiple 1049600 \n",
"_________________________________________________________________\n",
"dense_54 (Dense) multiple 524800 \n",
"_________________________________________________________________\n",
"dense_55 (Dense) multiple 131328 \n",
"_________________________________________________________________\n",
"dense_56 (Dense) multiple 32896 \n",
"_________________________________________________________________\n",
"dense_57 (Dense) multiple 8256 \n",
"_________________________________________________________________\n",
"dense_58 (Dense) multiple 2080 \n",
"_________________________________________________________________\n",
"dense_59 (Dense) multiple 33 \n",
"=================================================================\n",
"Total params: 11,370,497\n",
"Trainable params: 11,370,497\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"None\n"
]
}
],
"source": [
"# Setup our model\n",
"\n",
"model = tf.keras.models.Sequential([\n",
" # Flatten out our input\n",
" tf.keras.layers.Flatten(),\n",
" \n",
" # Setup our hidden layer\n",
" \n",
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(2048, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(2048, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(512, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(256, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(128, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(64, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(32, activation=tf.nn.relu),\n",
" \n",
" # Setup output layer\n",
" tf.keras.layers.Dense(1, activation=tf.nn.relu)\n",
"])\n",
"\n",
"# Compile our model\n",
"model.compile(optimizer='adam', \n",
" loss = 'mean_squared_error', \n",
" metrics=['mse'])\n",
"\n",
"# Fit model\n",
"history = model.fit(\n",
" X.values,\n",
" y.values,\n",
" epochs=20\n",
")\n",
"\n",
"# Model summary\n",
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"mse = mean_squared_error(y.values, model.predict(X.values))\n",
"mae = mean_absolute_error(y.values, model.predict(X.values))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training mse = 206305033.33630806 & mae = 9953.949831442636 & rmse = 14363.322503387162\n"
]
}
],
"source": [
"print(\"Training mse = \",mse,\" & mae = \",mae,\" & rmse = \", sqrt(mse))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" SalePrice | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1461 | \n",
" 115729.328125 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1462 | \n",
" 148874.625000 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1463 | \n",
" 154782.968750 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1464 | \n",
" 159385.453125 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1465 | \n",
" 170496.734375 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id SalePrice\n",
"0 1461 115729.328125\n",
"1 1462 148874.625000\n",
"2 1463 154782.968750\n",
"3 1464 159385.453125\n",
"4 1465 170496.734375"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Setup and save Tree prediction\n",
"\n",
"test_pred = model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
"predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
"submission = pd.DataFrame(predictions, columns = ['Id', 'SalePrice'])\n",
"submission['Id'] = submission['Id'].astype(int)\n",
"submission.to_csv(\"submissions/NNSubmission.csv\", index=False)\n",
"submission.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# Import Ensemble prediction made from Tree/Forest models\n",
"\n",
"Ensemble_prediction = pd.read_csv(os.path.join('submissions', 'EnsembleSubmission.csv'))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# Average old ensemble with NN model\n",
"\n",
"Ensemble_prediction['NNSale'] = submission['SalePrice']\n",
"Ensemble_prediction['SalePrice'] = Ensemble_prediction[['SalePrice', 'NNSale']].mean(axis=1)\n",
"Ensemble_prediction = Ensemble_prediction.drop('NNSale', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" SalePrice | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1461 | \n",
" 120161.371094 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1462 | \n",
" 152452.843750 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1463 | \n",
" 162175.976562 | \n",
"
\n",
" \n",
" | 3 | \n",
" 1464 | \n",
" 167157.189844 | \n",
"
\n",
" \n",
" | 4 | \n",
" 1465 | \n",
" 174259.088281 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id SalePrice\n",
"0 1461 120161.371094\n",
"1 1462 152452.843750\n",
"2 1463 162175.976562\n",
"3 1464 167157.189844\n",
"4 1465 174259.088281"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Ensemble_prediction.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"Ensemble_prediction.to_csv('submissions/NNEnsembleSubmission.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}