1095 lines
37 KiB
Plaintext
1095 lines
37 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Using TensorFlow backend.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import os\n",
|
||
"from math import sqrt\n",
|
||
"from sklearn.metrics import mean_squared_error\n",
|
||
"from sklearn.metrics import mean_absolute_error\n",
|
||
"import tensorflow as tf\n",
|
||
"import keras"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train = pd.read_csv(os.path.join('data', 'clean_train.csv'))\n",
|
||
"test = pd.read_csv(os.path.join('data', 'clean_test.csv'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>PCA0</th>\n",
|
||
" <th>PCA1</th>\n",
|
||
" <th>PCA2</th>\n",
|
||
" <th>PCA3</th>\n",
|
||
" <th>PCA4</th>\n",
|
||
" <th>PCA5</th>\n",
|
||
" <th>PCA6</th>\n",
|
||
" <th>PCA7</th>\n",
|
||
" <th>PCA8</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PCA164</th>\n",
|
||
" <th>PCA165</th>\n",
|
||
" <th>PCA166</th>\n",
|
||
" <th>PCA167</th>\n",
|
||
" <th>PCA168</th>\n",
|
||
" <th>PCA169</th>\n",
|
||
" <th>PCA170</th>\n",
|
||
" <th>PCA171</th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4.345109</td>\n",
|
||
" <td>1.619386</td>\n",
|
||
" <td>-0.739617</td>\n",
|
||
" <td>-2.080179</td>\n",
|
||
" <td>-0.985088</td>\n",
|
||
" <td>1.999117</td>\n",
|
||
" <td>-1.231870</td>\n",
|
||
" <td>-0.131782</td>\n",
|
||
" <td>1.316470</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.160733</td>\n",
|
||
" <td>0.071333</td>\n",
|
||
" <td>0.155468</td>\n",
|
||
" <td>0.172801</td>\n",
|
||
" <td>-0.169568</td>\n",
|
||
" <td>-0.144326</td>\n",
|
||
" <td>0.391713</td>\n",
|
||
" <td>-0.013357</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.019142</td>\n",
|
||
" <td>-3.106959</td>\n",
|
||
" <td>0.168223</td>\n",
|
||
" <td>-0.553341</td>\n",
|
||
" <td>0.940712</td>\n",
|
||
" <td>0.200719</td>\n",
|
||
" <td>-0.468954</td>\n",
|
||
" <td>0.235082</td>\n",
|
||
" <td>-0.838022</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-1.063234</td>\n",
|
||
" <td>-0.334556</td>\n",
|
||
" <td>0.361166</td>\n",
|
||
" <td>-1.218397</td>\n",
|
||
" <td>-0.346191</td>\n",
|
||
" <td>-0.962753</td>\n",
|
||
" <td>-0.138863</td>\n",
|
||
" <td>1.083103</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>181500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>4.851149</td>\n",
|
||
" <td>1.242811</td>\n",
|
||
" <td>-0.351815</td>\n",
|
||
" <td>-1.484957</td>\n",
|
||
" <td>-0.758200</td>\n",
|
||
" <td>2.181179</td>\n",
|
||
" <td>-1.843949</td>\n",
|
||
" <td>0.296194</td>\n",
|
||
" <td>1.299142</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.088334</td>\n",
|
||
" <td>0.238624</td>\n",
|
||
" <td>0.327280</td>\n",
|
||
" <td>0.325285</td>\n",
|
||
" <td>-0.704900</td>\n",
|
||
" <td>-0.036388</td>\n",
|
||
" <td>-0.540516</td>\n",
|
||
" <td>0.021711</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>223500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>-1.771641</td>\n",
|
||
" <td>0.039500</td>\n",
|
||
" <td>-1.358623</td>\n",
|
||
" <td>1.920760</td>\n",
|
||
" <td>-2.550817</td>\n",
|
||
" <td>0.209519</td>\n",
|
||
" <td>-0.756387</td>\n",
|
||
" <td>0.700109</td>\n",
|
||
" <td>-1.408543</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.172186</td>\n",
|
||
" <td>-0.518922</td>\n",
|
||
" <td>0.231498</td>\n",
|
||
" <td>-0.074296</td>\n",
|
||
" <td>-0.034287</td>\n",
|
||
" <td>-0.877735</td>\n",
|
||
" <td>0.028065</td>\n",
|
||
" <td>-0.321009</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>140000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6.463747</td>\n",
|
||
" <td>1.064473</td>\n",
|
||
" <td>0.209472</td>\n",
|
||
" <td>0.448906</td>\n",
|
||
" <td>-1.555301</td>\n",
|
||
" <td>3.215822</td>\n",
|
||
" <td>-0.946356</td>\n",
|
||
" <td>-0.805204</td>\n",
|
||
" <td>2.112526</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.270189</td>\n",
|
||
" <td>0.375297</td>\n",
|
||
" <td>-0.396732</td>\n",
|
||
" <td>-0.109084</td>\n",
|
||
" <td>0.317305</td>\n",
|
||
" <td>-0.145975</td>\n",
|
||
" <td>-0.674692</td>\n",
|
||
" <td>-0.378458</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>250000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 175 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||
"\n",
|
||
" PCA6 PCA7 PCA8 ... PCA164 PCA165 PCA166 PCA167 \\\n",
|
||
"0 -1.231870 -0.131782 1.316470 ... 0.160733 0.071333 0.155468 0.172801 \n",
|
||
"1 -0.468954 0.235082 -0.838022 ... -1.063234 -0.334556 0.361166 -1.218397 \n",
|
||
"2 -1.843949 0.296194 1.299142 ... 0.088334 0.238624 0.327280 0.325285 \n",
|
||
"3 -0.756387 0.700109 -1.408543 ... -0.172186 -0.518922 0.231498 -0.074296 \n",
|
||
"4 -0.946356 -0.805204 2.112526 ... -0.270189 0.375297 -0.396732 -0.109084 \n",
|
||
"\n",
|
||
" PCA168 PCA169 PCA170 PCA171 Id SalePrice \n",
|
||
"0 -0.169568 -0.144326 0.391713 -0.013357 1 208500 \n",
|
||
"1 -0.346191 -0.962753 -0.138863 1.083103 2 181500 \n",
|
||
"2 -0.704900 -0.036388 -0.540516 0.021711 3 223500 \n",
|
||
"3 -0.034287 -0.877735 0.028065 -0.321009 4 140000 \n",
|
||
"4 0.317305 -0.145975 -0.674692 -0.378458 5 250000 \n",
|
||
"\n",
|
||
"[5 rows x 175 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>PCA0</th>\n",
|
||
" <th>PCA1</th>\n",
|
||
" <th>PCA2</th>\n",
|
||
" <th>PCA3</th>\n",
|
||
" <th>PCA4</th>\n",
|
||
" <th>PCA5</th>\n",
|
||
" <th>PCA6</th>\n",
|
||
" <th>PCA7</th>\n",
|
||
" <th>PCA8</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PCA163</th>\n",
|
||
" <th>PCA164</th>\n",
|
||
" <th>PCA165</th>\n",
|
||
" <th>PCA166</th>\n",
|
||
" <th>PCA167</th>\n",
|
||
" <th>PCA168</th>\n",
|
||
" <th>PCA169</th>\n",
|
||
" <th>PCA170</th>\n",
|
||
" <th>PCA171</th>\n",
|
||
" <th>Id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>-3.208086</td>\n",
|
||
" <td>-2.987338</td>\n",
|
||
" <td>-0.327066</td>\n",
|
||
" <td>-1.609206</td>\n",
|
||
" <td>0.016879</td>\n",
|
||
" <td>-1.514939</td>\n",
|
||
" <td>-0.417889</td>\n",
|
||
" <td>-0.988173</td>\n",
|
||
" <td>-0.653363</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.027364</td>\n",
|
||
" <td>0.653222</td>\n",
|
||
" <td>-0.201973</td>\n",
|
||
" <td>-0.769946</td>\n",
|
||
" <td>-0.344834</td>\n",
|
||
" <td>0.514257</td>\n",
|
||
" <td>1.114106</td>\n",
|
||
" <td>0.337765</td>\n",
|
||
" <td>-0.639617</td>\n",
|
||
" <td>1461</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>-1.403753</td>\n",
|
||
" <td>-4.261851</td>\n",
|
||
" <td>0.107527</td>\n",
|
||
" <td>0.935981</td>\n",
|
||
" <td>0.165777</td>\n",
|
||
" <td>-0.299485</td>\n",
|
||
" <td>-0.524918</td>\n",
|
||
" <td>-2.332121</td>\n",
|
||
" <td>0.031044</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.856117</td>\n",
|
||
" <td>0.787996</td>\n",
|
||
" <td>0.215221</td>\n",
|
||
" <td>0.458275</td>\n",
|
||
" <td>1.135109</td>\n",
|
||
" <td>0.378972</td>\n",
|
||
" <td>0.953559</td>\n",
|
||
" <td>-1.008240</td>\n",
|
||
" <td>4.445435</td>\n",
|
||
" <td>1462</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2.257002</td>\n",
|
||
" <td>0.427951</td>\n",
|
||
" <td>-0.610464</td>\n",
|
||
" <td>-1.301125</td>\n",
|
||
" <td>-1.058327</td>\n",
|
||
" <td>2.674177</td>\n",
|
||
" <td>-1.500824</td>\n",
|
||
" <td>-0.223999</td>\n",
|
||
" <td>0.403440</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.117138</td>\n",
|
||
" <td>-0.378473</td>\n",
|
||
" <td>-0.031613</td>\n",
|
||
" <td>0.090593</td>\n",
|
||
" <td>-0.173914</td>\n",
|
||
" <td>-0.150098</td>\n",
|
||
" <td>-0.006612</td>\n",
|
||
" <td>0.190780</td>\n",
|
||
" <td>-0.152486</td>\n",
|
||
" <td>1463</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3.253618</td>\n",
|
||
" <td>0.537318</td>\n",
|
||
" <td>-0.796079</td>\n",
|
||
" <td>-0.851716</td>\n",
|
||
" <td>-1.209643</td>\n",
|
||
" <td>2.388795</td>\n",
|
||
" <td>-1.340676</td>\n",
|
||
" <td>-0.876322</td>\n",
|
||
" <td>0.421183</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.441586</td>\n",
|
||
" <td>0.020066</td>\n",
|
||
" <td>-0.151709</td>\n",
|
||
" <td>0.444826</td>\n",
|
||
" <td>0.008218</td>\n",
|
||
" <td>-0.161705</td>\n",
|
||
" <td>-0.453482</td>\n",
|
||
" <td>0.472352</td>\n",
|
||
" <td>0.046141</td>\n",
|
||
" <td>1464</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2.876409</td>\n",
|
||
" <td>-0.075909</td>\n",
|
||
" <td>-0.154959</td>\n",
|
||
" <td>-2.469870</td>\n",
|
||
" <td>1.407820</td>\n",
|
||
" <td>0.487532</td>\n",
|
||
" <td>0.072190</td>\n",
|
||
" <td>2.414446</td>\n",
|
||
" <td>1.667224</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.269062</td>\n",
|
||
" <td>0.651172</td>\n",
|
||
" <td>-0.050461</td>\n",
|
||
" <td>-0.526448</td>\n",
|
||
" <td>-0.843701</td>\n",
|
||
" <td>0.574770</td>\n",
|
||
" <td>-0.227828</td>\n",
|
||
" <td>1.071423</td>\n",
|
||
" <td>1.362638</td>\n",
|
||
" <td>1465</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 174 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||
"0 0 -3.208086 -2.987338 -0.327066 -1.609206 0.016879 -1.514939 \n",
|
||
"1 1 -1.403753 -4.261851 0.107527 0.935981 0.165777 -0.299485 \n",
|
||
"2 2 2.257002 0.427951 -0.610464 -1.301125 -1.058327 2.674177 \n",
|
||
"3 3 3.253618 0.537318 -0.796079 -0.851716 -1.209643 2.388795 \n",
|
||
"4 4 2.876409 -0.075909 -0.154959 -2.469870 1.407820 0.487532 \n",
|
||
"\n",
|
||
" PCA6 PCA7 PCA8 ... PCA163 PCA164 PCA165 PCA166 \\\n",
|
||
"0 -0.417889 -0.988173 -0.653363 ... -0.027364 0.653222 -0.201973 -0.769946 \n",
|
||
"1 -0.524918 -2.332121 0.031044 ... 3.856117 0.787996 0.215221 0.458275 \n",
|
||
"2 -1.500824 -0.223999 0.403440 ... -0.117138 -0.378473 -0.031613 0.090593 \n",
|
||
"3 -1.340676 -0.876322 0.421183 ... -0.441586 0.020066 -0.151709 0.444826 \n",
|
||
"4 0.072190 2.414446 1.667224 ... 0.269062 0.651172 -0.050461 -0.526448 \n",
|
||
"\n",
|
||
" PCA167 PCA168 PCA169 PCA170 PCA171 Id \n",
|
||
"0 -0.344834 0.514257 1.114106 0.337765 -0.639617 1461 \n",
|
||
"1 1.135109 0.378972 0.953559 -1.008240 4.445435 1462 \n",
|
||
"2 -0.173914 -0.150098 -0.006612 0.190780 -0.152486 1463 \n",
|
||
"3 0.008218 -0.161705 -0.453482 0.472352 0.046141 1464 \n",
|
||
"4 -0.843701 0.574770 -0.227828 1.071423 1.362638 1465 \n",
|
||
"\n",
|
||
"[5 rows x 174 columns]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Set aside unnecessary features\n",
|
||
"\n",
|
||
"trainId = train['Id'].astype(int)\n",
|
||
"testId = test['Id'].astype(int)\n",
|
||
"\n",
|
||
"train = train.drop('Id', axis=1)\n",
|
||
"test = test.drop('Id', axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"X = train.drop('SalePrice', axis=1)\n",
|
||
"y = train['SalePrice']\n",
|
||
"X_test = test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0 208500\n",
|
||
"1 181500\n",
|
||
"2 223500\n",
|
||
"3 140000\n",
|
||
"4 250000\n",
|
||
" ... \n",
|
||
"1455 175000\n",
|
||
"1456 210000\n",
|
||
"1457 266500\n",
|
||
"1458 142125\n",
|
||
"1459 147500\n",
|
||
"Name: SalePrice, Length: 1460, dtype: int64\n",
|
||
"(1460, 173)\n",
|
||
"(1459, 173)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>PCA0</th>\n",
|
||
" <th>PCA1</th>\n",
|
||
" <th>PCA2</th>\n",
|
||
" <th>PCA3</th>\n",
|
||
" <th>PCA4</th>\n",
|
||
" <th>PCA5</th>\n",
|
||
" <th>PCA6</th>\n",
|
||
" <th>PCA7</th>\n",
|
||
" <th>PCA8</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PCA162</th>\n",
|
||
" <th>PCA163</th>\n",
|
||
" <th>PCA164</th>\n",
|
||
" <th>PCA165</th>\n",
|
||
" <th>PCA166</th>\n",
|
||
" <th>PCA167</th>\n",
|
||
" <th>PCA168</th>\n",
|
||
" <th>PCA169</th>\n",
|
||
" <th>PCA170</th>\n",
|
||
" <th>PCA171</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4.345109</td>\n",
|
||
" <td>1.619386</td>\n",
|
||
" <td>-0.739617</td>\n",
|
||
" <td>-2.080179</td>\n",
|
||
" <td>-0.985088</td>\n",
|
||
" <td>1.999117</td>\n",
|
||
" <td>-1.231870</td>\n",
|
||
" <td>-0.131782</td>\n",
|
||
" <td>1.316470</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.276936</td>\n",
|
||
" <td>-0.128260</td>\n",
|
||
" <td>0.160733</td>\n",
|
||
" <td>0.071333</td>\n",
|
||
" <td>0.155468</td>\n",
|
||
" <td>0.172801</td>\n",
|
||
" <td>-0.169568</td>\n",
|
||
" <td>-0.144326</td>\n",
|
||
" <td>0.391713</td>\n",
|
||
" <td>-0.013357</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.019142</td>\n",
|
||
" <td>-3.106959</td>\n",
|
||
" <td>0.168223</td>\n",
|
||
" <td>-0.553341</td>\n",
|
||
" <td>0.940712</td>\n",
|
||
" <td>0.200719</td>\n",
|
||
" <td>-0.468954</td>\n",
|
||
" <td>0.235082</td>\n",
|
||
" <td>-0.838022</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.140974</td>\n",
|
||
" <td>-0.224535</td>\n",
|
||
" <td>-1.063234</td>\n",
|
||
" <td>-0.334556</td>\n",
|
||
" <td>0.361166</td>\n",
|
||
" <td>-1.218397</td>\n",
|
||
" <td>-0.346191</td>\n",
|
||
" <td>-0.962753</td>\n",
|
||
" <td>-0.138863</td>\n",
|
||
" <td>1.083103</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>4.851149</td>\n",
|
||
" <td>1.242811</td>\n",
|
||
" <td>-0.351815</td>\n",
|
||
" <td>-1.484957</td>\n",
|
||
" <td>-0.758200</td>\n",
|
||
" <td>2.181179</td>\n",
|
||
" <td>-1.843949</td>\n",
|
||
" <td>0.296194</td>\n",
|
||
" <td>1.299142</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.289024</td>\n",
|
||
" <td>-0.282563</td>\n",
|
||
" <td>0.088334</td>\n",
|
||
" <td>0.238624</td>\n",
|
||
" <td>0.327280</td>\n",
|
||
" <td>0.325285</td>\n",
|
||
" <td>-0.704900</td>\n",
|
||
" <td>-0.036388</td>\n",
|
||
" <td>-0.540516</td>\n",
|
||
" <td>0.021711</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>-1.771641</td>\n",
|
||
" <td>0.039500</td>\n",
|
||
" <td>-1.358623</td>\n",
|
||
" <td>1.920760</td>\n",
|
||
" <td>-2.550817</td>\n",
|
||
" <td>0.209519</td>\n",
|
||
" <td>-0.756387</td>\n",
|
||
" <td>0.700109</td>\n",
|
||
" <td>-1.408543</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.286790</td>\n",
|
||
" <td>0.672251</td>\n",
|
||
" <td>-0.172186</td>\n",
|
||
" <td>-0.518922</td>\n",
|
||
" <td>0.231498</td>\n",
|
||
" <td>-0.074296</td>\n",
|
||
" <td>-0.034287</td>\n",
|
||
" <td>-0.877735</td>\n",
|
||
" <td>0.028065</td>\n",
|
||
" <td>-0.321009</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>6.463747</td>\n",
|
||
" <td>1.064473</td>\n",
|
||
" <td>0.209472</td>\n",
|
||
" <td>0.448906</td>\n",
|
||
" <td>-1.555301</td>\n",
|
||
" <td>3.215822</td>\n",
|
||
" <td>-0.946356</td>\n",
|
||
" <td>-0.805204</td>\n",
|
||
" <td>2.112526</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>-0.235585</td>\n",
|
||
" <td>0.019570</td>\n",
|
||
" <td>-0.270189</td>\n",
|
||
" <td>0.375297</td>\n",
|
||
" <td>-0.396732</td>\n",
|
||
" <td>-0.109084</td>\n",
|
||
" <td>0.317305</td>\n",
|
||
" <td>-0.145975</td>\n",
|
||
" <td>-0.674692</td>\n",
|
||
" <td>-0.378458</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 173 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||
"\n",
|
||
" PCA6 PCA7 PCA8 ... PCA162 PCA163 PCA164 PCA165 \\\n",
|
||
"0 -1.231870 -0.131782 1.316470 ... -0.276936 -0.128260 0.160733 0.071333 \n",
|
||
"1 -0.468954 0.235082 -0.838022 ... 0.140974 -0.224535 -1.063234 -0.334556 \n",
|
||
"2 -1.843949 0.296194 1.299142 ... -0.289024 -0.282563 0.088334 0.238624 \n",
|
||
"3 -0.756387 0.700109 -1.408543 ... 0.286790 0.672251 -0.172186 -0.518922 \n",
|
||
"4 -0.946356 -0.805204 2.112526 ... -0.235585 0.019570 -0.270189 0.375297 \n",
|
||
"\n",
|
||
" PCA166 PCA167 PCA168 PCA169 PCA170 PCA171 \n",
|
||
"0 0.155468 0.172801 -0.169568 -0.144326 0.391713 -0.013357 \n",
|
||
"1 0.361166 -1.218397 -0.346191 -0.962753 -0.138863 1.083103 \n",
|
||
"2 0.327280 0.325285 -0.704900 -0.036388 -0.540516 0.021711 \n",
|
||
"3 0.231498 -0.074296 -0.034287 -0.877735 0.028065 -0.321009 \n",
|
||
"4 -0.396732 -0.109084 0.317305 -0.145975 -0.674692 -0.378458 \n",
|
||
"\n",
|
||
"[5 rows x 173 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"print(y)\n",
|
||
"print(X.shape)\n",
|
||
"print(X_test.shape)\n",
|
||
"X.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Train on 1460 samples\n",
|
||
"Epoch 1/20\n",
|
||
"1460/1460 [==============================] - 8s 5ms/sample - loss: 21378772287.8247 - mse: 21378768896.0000\n",
|
||
"Epoch 2/20\n",
|
||
"1460/1460 [==============================] - 7s 4ms/sample - loss: 14424159683.6822 - mse: 14424160256.0000\n",
|
||
"Epoch 3/20\n",
|
||
"1460/1460 [==============================] - 7s 4ms/sample - loss: 9813772273.9726 - mse: 9813772288.0000\n",
|
||
"Epoch 4/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 4094837492.7781 - mse: 4094837504.0000\n",
|
||
"Epoch 5/20\n",
|
||
"1460/1460 [==============================] - 7s 5ms/sample - loss: 2989677719.4959 - mse: 2989677568.0000 3s - loss: 3902894729.6\n",
|
||
"Epoch 6/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 1279209371.7041 - mse: 1279209088.0000\n",
|
||
"Epoch 7/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 1186372414.0712 - mse: 1186372224.0000\n",
|
||
"Epoch 8/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 929078176.2630 - mse: 929078208.0000\n",
|
||
"Epoch 9/20\n",
|
||
"1460/1460 [==============================] - 7s 5ms/sample - loss: 1044272466.4110 - mse: 1044272320.0000\n",
|
||
"Epoch 10/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 521697620.8658 - mse: 521697728.0000\n",
|
||
"Epoch 11/20\n",
|
||
"1460/1460 [==============================] - 7s 4ms/sample - loss: 544022221.8521 - mse: 544022208.0000\n",
|
||
"Epoch 12/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 468429404.4932 - mse: 468429408.0000\n",
|
||
"Epoch 13/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 580859017.9945 - mse: 580859008.0000\n",
|
||
"Epoch 14/20\n",
|
||
"1460/1460 [==============================] - 7s 4ms/sample - loss: 477522669.0630 - mse: 477522624.0000\n",
|
||
"Epoch 15/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 336378193.0959 - mse: 336378176.0000\n",
|
||
"Epoch 16/20\n",
|
||
"1460/1460 [==============================] - 7s 5ms/sample - loss: 307777051.1781 - mse: 307777088.0000\n",
|
||
"Epoch 17/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 348827916.4932 - mse: 348827936.0000\n",
|
||
"Epoch 18/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 375509386.9151 - mse: 375509312.0000\n",
|
||
"Epoch 19/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 452619568.2192 - mse: 452619552.0000\n",
|
||
"Epoch 20/20\n",
|
||
"1460/1460 [==============================] - 6s 4ms/sample - loss: 348767770.2137 - mse: 348767808.0000\n",
|
||
"Model: \"sequential_7\"\n",
|
||
"_________________________________________________________________\n",
|
||
"Layer (type) Output Shape Param # \n",
|
||
"=================================================================\n",
|
||
"flatten_7 (Flatten) multiple 0 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_48 (Dense) multiple 178176 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_49 (Dense) multiple 1049600 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_50 (Dense) multiple 2099200 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_51 (Dense) multiple 4196352 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_52 (Dense) multiple 2098176 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_53 (Dense) multiple 1049600 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_54 (Dense) multiple 524800 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_55 (Dense) multiple 131328 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_56 (Dense) multiple 32896 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_57 (Dense) multiple 8256 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_58 (Dense) multiple 2080 \n",
|
||
"_________________________________________________________________\n",
|
||
"dense_59 (Dense) multiple 33 \n",
|
||
"=================================================================\n",
|
||
"Total params: 11,370,497\n",
|
||
"Trainable params: 11,370,497\n",
|
||
"Non-trainable params: 0\n",
|
||
"_________________________________________________________________\n",
|
||
"None\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Setup our model\n",
|
||
"\n",
|
||
"model = tf.keras.models.Sequential([\n",
|
||
" # Flatten out our input\n",
|
||
" tf.keras.layers.Flatten(),\n",
|
||
" \n",
|
||
" # Setup our hidden layer\n",
|
||
" \n",
|
||
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(2048, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(2048, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(512, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(256, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(128, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(64, activation=tf.nn.relu),\n",
|
||
" tf.keras.layers.Dense(32, activation=tf.nn.relu),\n",
|
||
" \n",
|
||
" # Setup output layer\n",
|
||
" tf.keras.layers.Dense(1, activation=tf.nn.relu)\n",
|
||
"])\n",
|
||
"\n",
|
||
"# Compile our model\n",
|
||
"model.compile(optimizer='adam', \n",
|
||
" loss = 'mean_squared_error', \n",
|
||
" metrics=['mse'])\n",
|
||
"\n",
|
||
"# Fit model\n",
|
||
"history = model.fit(\n",
|
||
" X.values,\n",
|
||
" y.values,\n",
|
||
" epochs=20\n",
|
||
")\n",
|
||
"\n",
|
||
"# Model summary\n",
|
||
"print(model.summary())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mse = mean_squared_error(y.values, model.predict(X.values))\n",
|
||
"mae = mean_absolute_error(y.values, model.predict(X.values))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Training mse = 206305033.33630806 & mae = 9953.949831442636 & rmse = 14363.322503387162\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Training mse = \",mse,\" & mae = \",mae,\" & rmse = \", sqrt(mse))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1461</td>\n",
|
||
" <td>115729.328125</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1462</td>\n",
|
||
" <td>148874.625000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1463</td>\n",
|
||
" <td>154782.968750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1464</td>\n",
|
||
" <td>159385.453125</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1465</td>\n",
|
||
" <td>170496.734375</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id SalePrice\n",
|
||
"0 1461 115729.328125\n",
|
||
"1 1462 148874.625000\n",
|
||
"2 1463 154782.968750\n",
|
||
"3 1464 159385.453125\n",
|
||
"4 1465 170496.734375"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Setup and save Tree prediction\n",
|
||
"\n",
|
||
"test_pred = model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
|
||
"predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
|
||
"submission = pd.DataFrame(predictions, columns = ['Id', 'SalePrice'])\n",
|
||
"submission['Id'] = submission['Id'].astype(int)\n",
|
||
"submission.to_csv(\"submissions/NNSubmission.csv\", index=False)\n",
|
||
"submission.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Import Ensemble prediction made from Tree/Forest models\n",
|
||
"\n",
|
||
"Ensemble_prediction = pd.read_csv(os.path.join('submissions', 'EnsembleSubmission.csv'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Average old ensemble with NN model\n",
|
||
"\n",
|
||
"Ensemble_prediction['NNSale'] = submission['SalePrice']\n",
|
||
"Ensemble_prediction['SalePrice'] = Ensemble_prediction[['SalePrice', 'NNSale']].mean(axis=1)\n",
|
||
"Ensemble_prediction = Ensemble_prediction.drop('NNSale', axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1461</td>\n",
|
||
" <td>120161.371094</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1462</td>\n",
|
||
" <td>152452.843750</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1463</td>\n",
|
||
" <td>162175.976562</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1464</td>\n",
|
||
" <td>167157.189844</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1465</td>\n",
|
||
" <td>174259.088281</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id SalePrice\n",
|
||
"0 1461 120161.371094\n",
|
||
"1 1462 152452.843750\n",
|
||
"2 1463 162175.976562\n",
|
||
"3 1464 167157.189844\n",
|
||
"4 1465 174259.088281"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"Ensemble_prediction.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"Ensemble_prediction.to_csv('submissions/NNEnsembleSubmission.csv', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.7.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|