Add kaggle_house_prices with random forests model
This commit is contained in:
parent
7cb532e1a8
commit
a0b052290c
|
|
@ -0,0 +1,994 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import os\n",
|
||||||
|
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||||
|
"from sklearn.ensemble import RandomForestRegressor\n",
|
||||||
|
"from math import sqrt\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.metrics import mean_absolute_error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"train = pd.read_csv(os.path.join('data', 'clean_train.csv'))\n",
|
||||||
|
"test = pd.read_csv(os.path.join('data', 'clean_test.csv'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" <th>SalePrice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>4.345109</td>\n",
|
||||||
|
" <td>1.619386</td>\n",
|
||||||
|
" <td>-0.739617</td>\n",
|
||||||
|
" <td>-2.080179</td>\n",
|
||||||
|
" <td>-0.985088</td>\n",
|
||||||
|
" <td>1.999117</td>\n",
|
||||||
|
" <td>-1.231870</td>\n",
|
||||||
|
" <td>-0.131782</td>\n",
|
||||||
|
" <td>1.316470</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.160733</td>\n",
|
||||||
|
" <td>0.071333</td>\n",
|
||||||
|
" <td>0.155468</td>\n",
|
||||||
|
" <td>0.172801</td>\n",
|
||||||
|
" <td>-0.169568</td>\n",
|
||||||
|
" <td>-0.144326</td>\n",
|
||||||
|
" <td>0.391713</td>\n",
|
||||||
|
" <td>-0.013357</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>208500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0.019142</td>\n",
|
||||||
|
" <td>-3.106959</td>\n",
|
||||||
|
" <td>0.168223</td>\n",
|
||||||
|
" <td>-0.553341</td>\n",
|
||||||
|
" <td>0.940712</td>\n",
|
||||||
|
" <td>0.200719</td>\n",
|
||||||
|
" <td>-0.468954</td>\n",
|
||||||
|
" <td>0.235082</td>\n",
|
||||||
|
" <td>-0.838022</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-1.063234</td>\n",
|
||||||
|
" <td>-0.334556</td>\n",
|
||||||
|
" <td>0.361166</td>\n",
|
||||||
|
" <td>-1.218397</td>\n",
|
||||||
|
" <td>-0.346191</td>\n",
|
||||||
|
" <td>-0.962753</td>\n",
|
||||||
|
" <td>-0.138863</td>\n",
|
||||||
|
" <td>1.083103</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>181500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>4.851149</td>\n",
|
||||||
|
" <td>1.242811</td>\n",
|
||||||
|
" <td>-0.351815</td>\n",
|
||||||
|
" <td>-1.484957</td>\n",
|
||||||
|
" <td>-0.758200</td>\n",
|
||||||
|
" <td>2.181179</td>\n",
|
||||||
|
" <td>-1.843949</td>\n",
|
||||||
|
" <td>0.296194</td>\n",
|
||||||
|
" <td>1.299142</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.088334</td>\n",
|
||||||
|
" <td>0.238624</td>\n",
|
||||||
|
" <td>0.327280</td>\n",
|
||||||
|
" <td>0.325285</td>\n",
|
||||||
|
" <td>-0.704900</td>\n",
|
||||||
|
" <td>-0.036388</td>\n",
|
||||||
|
" <td>-0.540516</td>\n",
|
||||||
|
" <td>0.021711</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>223500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>-1.771641</td>\n",
|
||||||
|
" <td>0.039500</td>\n",
|
||||||
|
" <td>-1.358623</td>\n",
|
||||||
|
" <td>1.920760</td>\n",
|
||||||
|
" <td>-2.550817</td>\n",
|
||||||
|
" <td>0.209519</td>\n",
|
||||||
|
" <td>-0.756387</td>\n",
|
||||||
|
" <td>0.700109</td>\n",
|
||||||
|
" <td>-1.408543</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.172186</td>\n",
|
||||||
|
" <td>-0.518922</td>\n",
|
||||||
|
" <td>0.231498</td>\n",
|
||||||
|
" <td>-0.074296</td>\n",
|
||||||
|
" <td>-0.034287</td>\n",
|
||||||
|
" <td>-0.877735</td>\n",
|
||||||
|
" <td>0.028065</td>\n",
|
||||||
|
" <td>-0.321009</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>140000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>6.463747</td>\n",
|
||||||
|
" <td>1.064473</td>\n",
|
||||||
|
" <td>0.209472</td>\n",
|
||||||
|
" <td>0.448906</td>\n",
|
||||||
|
" <td>-1.555301</td>\n",
|
||||||
|
" <td>3.215822</td>\n",
|
||||||
|
" <td>-0.946356</td>\n",
|
||||||
|
" <td>-0.805204</td>\n",
|
||||||
|
" <td>2.112526</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.270189</td>\n",
|
||||||
|
" <td>0.375297</td>\n",
|
||||||
|
" <td>-0.396732</td>\n",
|
||||||
|
" <td>-0.109084</td>\n",
|
||||||
|
" <td>0.317305</td>\n",
|
||||||
|
" <td>-0.145975</td>\n",
|
||||||
|
" <td>-0.674692</td>\n",
|
||||||
|
" <td>-0.378458</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>250000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 175 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||||||
|
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||||||
|
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||||||
|
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||||||
|
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA164 PCA165 PCA166 PCA167 \\\n",
|
||||||
|
"0 -1.231870 -0.131782 1.316470 ... 0.160733 0.071333 0.155468 0.172801 \n",
|
||||||
|
"1 -0.468954 0.235082 -0.838022 ... -1.063234 -0.334556 0.361166 -1.218397 \n",
|
||||||
|
"2 -1.843949 0.296194 1.299142 ... 0.088334 0.238624 0.327280 0.325285 \n",
|
||||||
|
"3 -0.756387 0.700109 -1.408543 ... -0.172186 -0.518922 0.231498 -0.074296 \n",
|
||||||
|
"4 -0.946356 -0.805204 2.112526 ... -0.270189 0.375297 -0.396732 -0.109084 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA168 PCA169 PCA170 PCA171 Id SalePrice \n",
|
||||||
|
"0 -0.169568 -0.144326 0.391713 -0.013357 1 208500 \n",
|
||||||
|
"1 -0.346191 -0.962753 -0.138863 1.083103 2 181500 \n",
|
||||||
|
"2 -0.704900 -0.036388 -0.540516 0.021711 3 223500 \n",
|
||||||
|
"3 -0.034287 -0.877735 0.028065 -0.321009 4 140000 \n",
|
||||||
|
"4 0.317305 -0.145975 -0.674692 -0.378458 5 250000 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 175 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"train.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA163</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>-3.208086</td>\n",
|
||||||
|
" <td>-2.987338</td>\n",
|
||||||
|
" <td>-0.327066</td>\n",
|
||||||
|
" <td>-1.609206</td>\n",
|
||||||
|
" <td>0.016879</td>\n",
|
||||||
|
" <td>-1.514939</td>\n",
|
||||||
|
" <td>-0.417889</td>\n",
|
||||||
|
" <td>-0.988173</td>\n",
|
||||||
|
" <td>-0.653363</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.027364</td>\n",
|
||||||
|
" <td>0.653222</td>\n",
|
||||||
|
" <td>-0.201973</td>\n",
|
||||||
|
" <td>-0.769946</td>\n",
|
||||||
|
" <td>-0.344834</td>\n",
|
||||||
|
" <td>0.514257</td>\n",
|
||||||
|
" <td>1.114106</td>\n",
|
||||||
|
" <td>0.337765</td>\n",
|
||||||
|
" <td>-0.639617</td>\n",
|
||||||
|
" <td>1461</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>-1.403753</td>\n",
|
||||||
|
" <td>-4.261851</td>\n",
|
||||||
|
" <td>0.107527</td>\n",
|
||||||
|
" <td>0.935981</td>\n",
|
||||||
|
" <td>0.165777</td>\n",
|
||||||
|
" <td>-0.299485</td>\n",
|
||||||
|
" <td>-0.524918</td>\n",
|
||||||
|
" <td>-2.332121</td>\n",
|
||||||
|
" <td>0.031044</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>3.856117</td>\n",
|
||||||
|
" <td>0.787996</td>\n",
|
||||||
|
" <td>0.215221</td>\n",
|
||||||
|
" <td>0.458275</td>\n",
|
||||||
|
" <td>1.135109</td>\n",
|
||||||
|
" <td>0.378972</td>\n",
|
||||||
|
" <td>0.953559</td>\n",
|
||||||
|
" <td>-1.008240</td>\n",
|
||||||
|
" <td>4.445435</td>\n",
|
||||||
|
" <td>1462</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2.257002</td>\n",
|
||||||
|
" <td>0.427951</td>\n",
|
||||||
|
" <td>-0.610464</td>\n",
|
||||||
|
" <td>-1.301125</td>\n",
|
||||||
|
" <td>-1.058327</td>\n",
|
||||||
|
" <td>2.674177</td>\n",
|
||||||
|
" <td>-1.500824</td>\n",
|
||||||
|
" <td>-0.223999</td>\n",
|
||||||
|
" <td>0.403440</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.117138</td>\n",
|
||||||
|
" <td>-0.378473</td>\n",
|
||||||
|
" <td>-0.031613</td>\n",
|
||||||
|
" <td>0.090593</td>\n",
|
||||||
|
" <td>-0.173914</td>\n",
|
||||||
|
" <td>-0.150098</td>\n",
|
||||||
|
" <td>-0.006612</td>\n",
|
||||||
|
" <td>0.190780</td>\n",
|
||||||
|
" <td>-0.152486</td>\n",
|
||||||
|
" <td>1463</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3.253618</td>\n",
|
||||||
|
" <td>0.537318</td>\n",
|
||||||
|
" <td>-0.796079</td>\n",
|
||||||
|
" <td>-0.851716</td>\n",
|
||||||
|
" <td>-1.209643</td>\n",
|
||||||
|
" <td>2.388795</td>\n",
|
||||||
|
" <td>-1.340676</td>\n",
|
||||||
|
" <td>-0.876322</td>\n",
|
||||||
|
" <td>0.421183</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.441586</td>\n",
|
||||||
|
" <td>0.020066</td>\n",
|
||||||
|
" <td>-0.151709</td>\n",
|
||||||
|
" <td>0.444826</td>\n",
|
||||||
|
" <td>0.008218</td>\n",
|
||||||
|
" <td>-0.161705</td>\n",
|
||||||
|
" <td>-0.453482</td>\n",
|
||||||
|
" <td>0.472352</td>\n",
|
||||||
|
" <td>0.046141</td>\n",
|
||||||
|
" <td>1464</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>2.876409</td>\n",
|
||||||
|
" <td>-0.075909</td>\n",
|
||||||
|
" <td>-0.154959</td>\n",
|
||||||
|
" <td>-2.469870</td>\n",
|
||||||
|
" <td>1.407820</td>\n",
|
||||||
|
" <td>0.487532</td>\n",
|
||||||
|
" <td>0.072190</td>\n",
|
||||||
|
" <td>2.414446</td>\n",
|
||||||
|
" <td>1.667224</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.269062</td>\n",
|
||||||
|
" <td>0.651172</td>\n",
|
||||||
|
" <td>-0.050461</td>\n",
|
||||||
|
" <td>-0.526448</td>\n",
|
||||||
|
" <td>-0.843701</td>\n",
|
||||||
|
" <td>0.574770</td>\n",
|
||||||
|
" <td>-0.227828</td>\n",
|
||||||
|
" <td>1.071423</td>\n",
|
||||||
|
" <td>1.362638</td>\n",
|
||||||
|
" <td>1465</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 174 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 -3.208086 -2.987338 -0.327066 -1.609206 0.016879 -1.514939 \n",
|
||||||
|
"1 1 -1.403753 -4.261851 0.107527 0.935981 0.165777 -0.299485 \n",
|
||||||
|
"2 2 2.257002 0.427951 -0.610464 -1.301125 -1.058327 2.674177 \n",
|
||||||
|
"3 3 3.253618 0.537318 -0.796079 -0.851716 -1.209643 2.388795 \n",
|
||||||
|
"4 4 2.876409 -0.075909 -0.154959 -2.469870 1.407820 0.487532 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA163 PCA164 PCA165 PCA166 \\\n",
|
||||||
|
"0 -0.417889 -0.988173 -0.653363 ... -0.027364 0.653222 -0.201973 -0.769946 \n",
|
||||||
|
"1 -0.524918 -2.332121 0.031044 ... 3.856117 0.787996 0.215221 0.458275 \n",
|
||||||
|
"2 -1.500824 -0.223999 0.403440 ... -0.117138 -0.378473 -0.031613 0.090593 \n",
|
||||||
|
"3 -1.340676 -0.876322 0.421183 ... -0.441586 0.020066 -0.151709 0.444826 \n",
|
||||||
|
"4 0.072190 2.414446 1.667224 ... 0.269062 0.651172 -0.050461 -0.526448 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA167 PCA168 PCA169 PCA170 PCA171 Id \n",
|
||||||
|
"0 -0.344834 0.514257 1.114106 0.337765 -0.639617 1461 \n",
|
||||||
|
"1 1.135109 0.378972 0.953559 -1.008240 4.445435 1462 \n",
|
||||||
|
"2 -0.173914 -0.150098 -0.006612 0.190780 -0.152486 1463 \n",
|
||||||
|
"3 0.008218 -0.161705 -0.453482 0.472352 0.046141 1464 \n",
|
||||||
|
"4 -0.843701 0.574770 -0.227828 1.071423 1.362638 1465 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 174 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"test.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Set aside unnecessary features\n",
|
||||||
|
"\n",
|
||||||
|
"trainId = train['Id'].astype(int)\n",
|
||||||
|
"testId = test['Id'].astype(int)\n",
|
||||||
|
"\n",
|
||||||
|
"train = train.drop('Id', axis=1)\n",
|
||||||
|
"test = test.drop('Id', axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X = train.drop('SalePrice', axis=1)\n",
|
||||||
|
"y = train['SalePrice']\n",
|
||||||
|
"X_test = test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0 208500\n",
|
||||||
|
"1 181500\n",
|
||||||
|
"2 223500\n",
|
||||||
|
"3 140000\n",
|
||||||
|
"4 250000\n",
|
||||||
|
" ... \n",
|
||||||
|
"1455 175000\n",
|
||||||
|
"1456 210000\n",
|
||||||
|
"1457 266500\n",
|
||||||
|
"1458 142125\n",
|
||||||
|
"1459 147500\n",
|
||||||
|
"Name: SalePrice, Length: 1460, dtype: int64\n",
|
||||||
|
"(1460, 173)\n",
|
||||||
|
"(1459, 173)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA162</th>\n",
|
||||||
|
" <th>PCA163</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>4.345109</td>\n",
|
||||||
|
" <td>1.619386</td>\n",
|
||||||
|
" <td>-0.739617</td>\n",
|
||||||
|
" <td>-2.080179</td>\n",
|
||||||
|
" <td>-0.985088</td>\n",
|
||||||
|
" <td>1.999117</td>\n",
|
||||||
|
" <td>-1.231870</td>\n",
|
||||||
|
" <td>-0.131782</td>\n",
|
||||||
|
" <td>1.316470</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.276936</td>\n",
|
||||||
|
" <td>-0.128260</td>\n",
|
||||||
|
" <td>0.160733</td>\n",
|
||||||
|
" <td>0.071333</td>\n",
|
||||||
|
" <td>0.155468</td>\n",
|
||||||
|
" <td>0.172801</td>\n",
|
||||||
|
" <td>-0.169568</td>\n",
|
||||||
|
" <td>-0.144326</td>\n",
|
||||||
|
" <td>0.391713</td>\n",
|
||||||
|
" <td>-0.013357</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0.019142</td>\n",
|
||||||
|
" <td>-3.106959</td>\n",
|
||||||
|
" <td>0.168223</td>\n",
|
||||||
|
" <td>-0.553341</td>\n",
|
||||||
|
" <td>0.940712</td>\n",
|
||||||
|
" <td>0.200719</td>\n",
|
||||||
|
" <td>-0.468954</td>\n",
|
||||||
|
" <td>0.235082</td>\n",
|
||||||
|
" <td>-0.838022</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.140974</td>\n",
|
||||||
|
" <td>-0.224535</td>\n",
|
||||||
|
" <td>-1.063234</td>\n",
|
||||||
|
" <td>-0.334556</td>\n",
|
||||||
|
" <td>0.361166</td>\n",
|
||||||
|
" <td>-1.218397</td>\n",
|
||||||
|
" <td>-0.346191</td>\n",
|
||||||
|
" <td>-0.962753</td>\n",
|
||||||
|
" <td>-0.138863</td>\n",
|
||||||
|
" <td>1.083103</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>4.851149</td>\n",
|
||||||
|
" <td>1.242811</td>\n",
|
||||||
|
" <td>-0.351815</td>\n",
|
||||||
|
" <td>-1.484957</td>\n",
|
||||||
|
" <td>-0.758200</td>\n",
|
||||||
|
" <td>2.181179</td>\n",
|
||||||
|
" <td>-1.843949</td>\n",
|
||||||
|
" <td>0.296194</td>\n",
|
||||||
|
" <td>1.299142</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.289024</td>\n",
|
||||||
|
" <td>-0.282563</td>\n",
|
||||||
|
" <td>0.088334</td>\n",
|
||||||
|
" <td>0.238624</td>\n",
|
||||||
|
" <td>0.327280</td>\n",
|
||||||
|
" <td>0.325285</td>\n",
|
||||||
|
" <td>-0.704900</td>\n",
|
||||||
|
" <td>-0.036388</td>\n",
|
||||||
|
" <td>-0.540516</td>\n",
|
||||||
|
" <td>0.021711</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>-1.771641</td>\n",
|
||||||
|
" <td>0.039500</td>\n",
|
||||||
|
" <td>-1.358623</td>\n",
|
||||||
|
" <td>1.920760</td>\n",
|
||||||
|
" <td>-2.550817</td>\n",
|
||||||
|
" <td>0.209519</td>\n",
|
||||||
|
" <td>-0.756387</td>\n",
|
||||||
|
" <td>0.700109</td>\n",
|
||||||
|
" <td>-1.408543</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.286790</td>\n",
|
||||||
|
" <td>0.672251</td>\n",
|
||||||
|
" <td>-0.172186</td>\n",
|
||||||
|
" <td>-0.518922</td>\n",
|
||||||
|
" <td>0.231498</td>\n",
|
||||||
|
" <td>-0.074296</td>\n",
|
||||||
|
" <td>-0.034287</td>\n",
|
||||||
|
" <td>-0.877735</td>\n",
|
||||||
|
" <td>0.028065</td>\n",
|
||||||
|
" <td>-0.321009</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>6.463747</td>\n",
|
||||||
|
" <td>1.064473</td>\n",
|
||||||
|
" <td>0.209472</td>\n",
|
||||||
|
" <td>0.448906</td>\n",
|
||||||
|
" <td>-1.555301</td>\n",
|
||||||
|
" <td>3.215822</td>\n",
|
||||||
|
" <td>-0.946356</td>\n",
|
||||||
|
" <td>-0.805204</td>\n",
|
||||||
|
" <td>2.112526</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.235585</td>\n",
|
||||||
|
" <td>0.019570</td>\n",
|
||||||
|
" <td>-0.270189</td>\n",
|
||||||
|
" <td>0.375297</td>\n",
|
||||||
|
" <td>-0.396732</td>\n",
|
||||||
|
" <td>-0.109084</td>\n",
|
||||||
|
" <td>0.317305</td>\n",
|
||||||
|
" <td>-0.145975</td>\n",
|
||||||
|
" <td>-0.674692</td>\n",
|
||||||
|
" <td>-0.378458</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 173 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||||||
|
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||||||
|
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||||||
|
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||||||
|
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA162 PCA163 PCA164 PCA165 \\\n",
|
||||||
|
"0 -1.231870 -0.131782 1.316470 ... -0.276936 -0.128260 0.160733 0.071333 \n",
|
||||||
|
"1 -0.468954 0.235082 -0.838022 ... 0.140974 -0.224535 -1.063234 -0.334556 \n",
|
||||||
|
"2 -1.843949 0.296194 1.299142 ... -0.289024 -0.282563 0.088334 0.238624 \n",
|
||||||
|
"3 -0.756387 0.700109 -1.408543 ... 0.286790 0.672251 -0.172186 -0.518922 \n",
|
||||||
|
"4 -0.946356 -0.805204 2.112526 ... -0.235585 0.019570 -0.270189 0.375297 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA166 PCA167 PCA168 PCA169 PCA170 PCA171 \n",
|
||||||
|
"0 0.155468 0.172801 -0.169568 -0.144326 0.391713 -0.013357 \n",
|
||||||
|
"1 0.361166 -1.218397 -0.346191 -0.962753 -0.138863 1.083103 \n",
|
||||||
|
"2 0.327280 0.325285 -0.704900 -0.036388 -0.540516 0.021711 \n",
|
||||||
|
"3 0.231498 -0.074296 -0.034287 -0.877735 0.028065 -0.321009 \n",
|
||||||
|
"4 -0.396732 -0.109084 0.317305 -0.145975 -0.674692 -0.378458 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 173 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(y)\n",
|
||||||
|
"print(X.shape)\n",
|
||||||
|
"print(X_test.shape)\n",
|
||||||
|
"X.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Initialize our models\n",
|
||||||
|
"\n",
|
||||||
|
"tree_model = DecisionTreeRegressor()\n",
|
||||||
|
"rf_model = RandomForestRegressor()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\ensemble\\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
|
||||||
|
" \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
|
||||||
|
" max_features='auto', max_leaf_nodes=None,\n",
|
||||||
|
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
|
||||||
|
" min_samples_leaf=1, min_samples_split=2,\n",
|
||||||
|
" min_weight_fraction_leaf=0.0, n_estimators=10,\n",
|
||||||
|
" n_jobs=None, oob_score=False, random_state=None,\n",
|
||||||
|
" verbose=0, warm_start=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Fit our models to the training data\n",
|
||||||
|
"\n",
|
||||||
|
"tree_model.fit(X, y)\n",
|
||||||
|
"rf_model.fit(X, y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Calculate error metrics for each model\n",
|
||||||
|
"\n",
|
||||||
|
"tree_mse = mean_squared_error(y, tree_model.predict(X))\n",
|
||||||
|
"tree_mae = mean_absolute_error(y, tree_model.predict(X))\n",
|
||||||
|
"rf_mse = mean_squared_error(y, rf_model.predict(X))\n",
|
||||||
|
"rf_mae = mean_absolute_error(y, rf_model.predict(X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Decision Tree training mse = 0.0 & mae = 0.0 & rmse = 0.0\n",
|
||||||
|
"Random Forest training mse = 202420995.41813016 & mae = 8265.452260273973 & rmse = 14227.473261901783\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(\"Decision Tree training mse = \",tree_mse,\" & mae = \",tree_mae,\" & rmse = \", sqrt(tree_mse))\n",
|
||||||
|
"print(\"Random Forest training mse = \",rf_mse,\" & mae = \",rf_mae,\" & rmse = \", sqrt(rf_mse))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"<bound method NDFrame.head of Id SalePrice\n",
|
||||||
|
"0 1461 125500.0\n",
|
||||||
|
"1 1462 149500.0\n",
|
||||||
|
"2 1463 185000.0\n",
|
||||||
|
"3 1464 201000.0\n",
|
||||||
|
"4 1465 176000.0\n",
|
||||||
|
"... ... ...\n",
|
||||||
|
"1454 2915 89000.0\n",
|
||||||
|
"1455 2916 80000.0\n",
|
||||||
|
"1456 2917 167900.0\n",
|
||||||
|
"1457 2918 135000.0\n",
|
||||||
|
"1458 2919 181000.0\n",
|
||||||
|
"\n",
|
||||||
|
"[1459 rows x 2 columns]>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Setup and save Tree prediction\n",
|
||||||
|
"\n",
|
||||||
|
"test_pred = tree_model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
|
||||||
|
"tree_predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
|
||||||
|
"tree_submission = pd.DataFrame(tree_predictions, columns = ['Id', 'SalePrice'])\n",
|
||||||
|
"tree_submission['Id'] = tree_submission['Id'].astype(int)\n",
|
||||||
|
"print(tree_submission.head)\n",
|
||||||
|
"tree_submission.to_csv(\"submissions/TreeSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"<bound method NDFrame.head of Id SalePrice\n",
|
||||||
|
"0 1461 136110.0\n",
|
||||||
|
"1 1462 167750.0\n",
|
||||||
|
"2 1463 184140.0\n",
|
||||||
|
"3 1464 186963.2\n",
|
||||||
|
"4 1465 188728.2\n",
|
||||||
|
"... ... ...\n",
|
||||||
|
"1454 2915 95030.0\n",
|
||||||
|
"1455 2916 99045.9\n",
|
||||||
|
"1456 2917 168480.0\n",
|
||||||
|
"1457 2918 114850.0\n",
|
||||||
|
"1458 2919 196190.0\n",
|
||||||
|
"\n",
|
||||||
|
"[1459 rows x 2 columns]>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Setup and save Random Forest prediction\n",
|
||||||
|
"\n",
|
||||||
|
"test_pred = rf_model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
|
||||||
|
"rf_predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
|
||||||
|
"rf_submission = pd.DataFrame(rf_predictions, columns = ['Id', 'SalePrice'])\n",
|
||||||
|
"rf_submission['Id'] = rf_submission['Id'].astype(int)\n",
|
||||||
|
"print(rf_submission.head)\n",
|
||||||
|
"rf_submission.to_csv(\"submissions/RFSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Set up and save Ensemble prediction\n",
|
||||||
|
"\n",
|
||||||
|
"ensemble_submission = tree_submission\n",
|
||||||
|
"ensemble_submission['TreeSalePrice'] = tree_submission['SalePrice']\n",
|
||||||
|
"ensemble_submission['ForestSalePrice'] = rf_submission['SalePrice']\n",
|
||||||
|
"ensemble_submission['SalePrice'] = ensemble_submission[['TreeSalePrice', 'ForestSalePrice']].mean(axis=1)\n",
|
||||||
|
"ensemble_submission = ensemble_submission.drop(['TreeSalePrice', 'ForestSalePrice'], axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" <th>SalePrice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>1461</td>\n",
|
||||||
|
" <td>133457.50</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1462</td>\n",
|
||||||
|
" <td>163187.50</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>1463</td>\n",
|
||||||
|
" <td>184355.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>1464</td>\n",
|
||||||
|
" <td>190472.40</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>1465</td>\n",
|
||||||
|
" <td>185546.15</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Id SalePrice\n",
|
||||||
|
"0 1461 133457.50\n",
|
||||||
|
"1 1462 163187.50\n",
|
||||||
|
"2 1463 184355.00\n",
|
||||||
|
"3 1464 190472.40\n",
|
||||||
|
"4 1465 185546.15"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"ensemble_submission.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 28,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ensemble_submission.to_csv(\"submissions/EnsembleSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,994 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import os\n",
|
||||||
|
"from sklearn.tree import DecisionTreeRegressor\n",
|
||||||
|
"from sklearn.ensemble import RandomForestRegressor\n",
|
||||||
|
"from math import sqrt\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.metrics import mean_absolute_error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"train = pd.read_csv(os.path.join('data', 'clean_train.csv'))\n",
|
||||||
|
"test = pd.read_csv(os.path.join('data', 'clean_test.csv'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" <th>SalePrice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>4.345109</td>\n",
|
||||||
|
" <td>1.619386</td>\n",
|
||||||
|
" <td>-0.739617</td>\n",
|
||||||
|
" <td>-2.080179</td>\n",
|
||||||
|
" <td>-0.985088</td>\n",
|
||||||
|
" <td>1.999117</td>\n",
|
||||||
|
" <td>-1.231870</td>\n",
|
||||||
|
" <td>-0.131782</td>\n",
|
||||||
|
" <td>1.316470</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.160733</td>\n",
|
||||||
|
" <td>0.071333</td>\n",
|
||||||
|
" <td>0.155468</td>\n",
|
||||||
|
" <td>0.172801</td>\n",
|
||||||
|
" <td>-0.169568</td>\n",
|
||||||
|
" <td>-0.144326</td>\n",
|
||||||
|
" <td>0.391713</td>\n",
|
||||||
|
" <td>-0.013357</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>208500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0.019142</td>\n",
|
||||||
|
" <td>-3.106959</td>\n",
|
||||||
|
" <td>0.168223</td>\n",
|
||||||
|
" <td>-0.553341</td>\n",
|
||||||
|
" <td>0.940712</td>\n",
|
||||||
|
" <td>0.200719</td>\n",
|
||||||
|
" <td>-0.468954</td>\n",
|
||||||
|
" <td>0.235082</td>\n",
|
||||||
|
" <td>-0.838022</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-1.063234</td>\n",
|
||||||
|
" <td>-0.334556</td>\n",
|
||||||
|
" <td>0.361166</td>\n",
|
||||||
|
" <td>-1.218397</td>\n",
|
||||||
|
" <td>-0.346191</td>\n",
|
||||||
|
" <td>-0.962753</td>\n",
|
||||||
|
" <td>-0.138863</td>\n",
|
||||||
|
" <td>1.083103</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>181500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>4.851149</td>\n",
|
||||||
|
" <td>1.242811</td>\n",
|
||||||
|
" <td>-0.351815</td>\n",
|
||||||
|
" <td>-1.484957</td>\n",
|
||||||
|
" <td>-0.758200</td>\n",
|
||||||
|
" <td>2.181179</td>\n",
|
||||||
|
" <td>-1.843949</td>\n",
|
||||||
|
" <td>0.296194</td>\n",
|
||||||
|
" <td>1.299142</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.088334</td>\n",
|
||||||
|
" <td>0.238624</td>\n",
|
||||||
|
" <td>0.327280</td>\n",
|
||||||
|
" <td>0.325285</td>\n",
|
||||||
|
" <td>-0.704900</td>\n",
|
||||||
|
" <td>-0.036388</td>\n",
|
||||||
|
" <td>-0.540516</td>\n",
|
||||||
|
" <td>0.021711</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>223500</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>-1.771641</td>\n",
|
||||||
|
" <td>0.039500</td>\n",
|
||||||
|
" <td>-1.358623</td>\n",
|
||||||
|
" <td>1.920760</td>\n",
|
||||||
|
" <td>-2.550817</td>\n",
|
||||||
|
" <td>0.209519</td>\n",
|
||||||
|
" <td>-0.756387</td>\n",
|
||||||
|
" <td>0.700109</td>\n",
|
||||||
|
" <td>-1.408543</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.172186</td>\n",
|
||||||
|
" <td>-0.518922</td>\n",
|
||||||
|
" <td>0.231498</td>\n",
|
||||||
|
" <td>-0.074296</td>\n",
|
||||||
|
" <td>-0.034287</td>\n",
|
||||||
|
" <td>-0.877735</td>\n",
|
||||||
|
" <td>0.028065</td>\n",
|
||||||
|
" <td>-0.321009</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>140000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>6.463747</td>\n",
|
||||||
|
" <td>1.064473</td>\n",
|
||||||
|
" <td>0.209472</td>\n",
|
||||||
|
" <td>0.448906</td>\n",
|
||||||
|
" <td>-1.555301</td>\n",
|
||||||
|
" <td>3.215822</td>\n",
|
||||||
|
" <td>-0.946356</td>\n",
|
||||||
|
" <td>-0.805204</td>\n",
|
||||||
|
" <td>2.112526</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.270189</td>\n",
|
||||||
|
" <td>0.375297</td>\n",
|
||||||
|
" <td>-0.396732</td>\n",
|
||||||
|
" <td>-0.109084</td>\n",
|
||||||
|
" <td>0.317305</td>\n",
|
||||||
|
" <td>-0.145975</td>\n",
|
||||||
|
" <td>-0.674692</td>\n",
|
||||||
|
" <td>-0.378458</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>250000</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 175 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||||||
|
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||||||
|
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||||||
|
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||||||
|
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA164 PCA165 PCA166 PCA167 \\\n",
|
||||||
|
"0 -1.231870 -0.131782 1.316470 ... 0.160733 0.071333 0.155468 0.172801 \n",
|
||||||
|
"1 -0.468954 0.235082 -0.838022 ... -1.063234 -0.334556 0.361166 -1.218397 \n",
|
||||||
|
"2 -1.843949 0.296194 1.299142 ... 0.088334 0.238624 0.327280 0.325285 \n",
|
||||||
|
"3 -0.756387 0.700109 -1.408543 ... -0.172186 -0.518922 0.231498 -0.074296 \n",
|
||||||
|
"4 -0.946356 -0.805204 2.112526 ... -0.270189 0.375297 -0.396732 -0.109084 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA168 PCA169 PCA170 PCA171 Id SalePrice \n",
|
||||||
|
"0 -0.169568 -0.144326 0.391713 -0.013357 1 208500 \n",
|
||||||
|
"1 -0.346191 -0.962753 -0.138863 1.083103 2 181500 \n",
|
||||||
|
"2 -0.704900 -0.036388 -0.540516 0.021711 3 223500 \n",
|
||||||
|
"3 -0.034287 -0.877735 0.028065 -0.321009 4 140000 \n",
|
||||||
|
"4 0.317305 -0.145975 -0.674692 -0.378458 5 250000 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 175 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"train.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA163</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>-3.208086</td>\n",
|
||||||
|
" <td>-2.987338</td>\n",
|
||||||
|
" <td>-0.327066</td>\n",
|
||||||
|
" <td>-1.609206</td>\n",
|
||||||
|
" <td>0.016879</td>\n",
|
||||||
|
" <td>-1.514939</td>\n",
|
||||||
|
" <td>-0.417889</td>\n",
|
||||||
|
" <td>-0.988173</td>\n",
|
||||||
|
" <td>-0.653363</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.027364</td>\n",
|
||||||
|
" <td>0.653222</td>\n",
|
||||||
|
" <td>-0.201973</td>\n",
|
||||||
|
" <td>-0.769946</td>\n",
|
||||||
|
" <td>-0.344834</td>\n",
|
||||||
|
" <td>0.514257</td>\n",
|
||||||
|
" <td>1.114106</td>\n",
|
||||||
|
" <td>0.337765</td>\n",
|
||||||
|
" <td>-0.639617</td>\n",
|
||||||
|
" <td>1461</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>-1.403753</td>\n",
|
||||||
|
" <td>-4.261851</td>\n",
|
||||||
|
" <td>0.107527</td>\n",
|
||||||
|
" <td>0.935981</td>\n",
|
||||||
|
" <td>0.165777</td>\n",
|
||||||
|
" <td>-0.299485</td>\n",
|
||||||
|
" <td>-0.524918</td>\n",
|
||||||
|
" <td>-2.332121</td>\n",
|
||||||
|
" <td>0.031044</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>3.856117</td>\n",
|
||||||
|
" <td>0.787996</td>\n",
|
||||||
|
" <td>0.215221</td>\n",
|
||||||
|
" <td>0.458275</td>\n",
|
||||||
|
" <td>1.135109</td>\n",
|
||||||
|
" <td>0.378972</td>\n",
|
||||||
|
" <td>0.953559</td>\n",
|
||||||
|
" <td>-1.008240</td>\n",
|
||||||
|
" <td>4.445435</td>\n",
|
||||||
|
" <td>1462</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2.257002</td>\n",
|
||||||
|
" <td>0.427951</td>\n",
|
||||||
|
" <td>-0.610464</td>\n",
|
||||||
|
" <td>-1.301125</td>\n",
|
||||||
|
" <td>-1.058327</td>\n",
|
||||||
|
" <td>2.674177</td>\n",
|
||||||
|
" <td>-1.500824</td>\n",
|
||||||
|
" <td>-0.223999</td>\n",
|
||||||
|
" <td>0.403440</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.117138</td>\n",
|
||||||
|
" <td>-0.378473</td>\n",
|
||||||
|
" <td>-0.031613</td>\n",
|
||||||
|
" <td>0.090593</td>\n",
|
||||||
|
" <td>-0.173914</td>\n",
|
||||||
|
" <td>-0.150098</td>\n",
|
||||||
|
" <td>-0.006612</td>\n",
|
||||||
|
" <td>0.190780</td>\n",
|
||||||
|
" <td>-0.152486</td>\n",
|
||||||
|
" <td>1463</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3.253618</td>\n",
|
||||||
|
" <td>0.537318</td>\n",
|
||||||
|
" <td>-0.796079</td>\n",
|
||||||
|
" <td>-0.851716</td>\n",
|
||||||
|
" <td>-1.209643</td>\n",
|
||||||
|
" <td>2.388795</td>\n",
|
||||||
|
" <td>-1.340676</td>\n",
|
||||||
|
" <td>-0.876322</td>\n",
|
||||||
|
" <td>0.421183</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.441586</td>\n",
|
||||||
|
" <td>0.020066</td>\n",
|
||||||
|
" <td>-0.151709</td>\n",
|
||||||
|
" <td>0.444826</td>\n",
|
||||||
|
" <td>0.008218</td>\n",
|
||||||
|
" <td>-0.161705</td>\n",
|
||||||
|
" <td>-0.453482</td>\n",
|
||||||
|
" <td>0.472352</td>\n",
|
||||||
|
" <td>0.046141</td>\n",
|
||||||
|
" <td>1464</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>2.876409</td>\n",
|
||||||
|
" <td>-0.075909</td>\n",
|
||||||
|
" <td>-0.154959</td>\n",
|
||||||
|
" <td>-2.469870</td>\n",
|
||||||
|
" <td>1.407820</td>\n",
|
||||||
|
" <td>0.487532</td>\n",
|
||||||
|
" <td>0.072190</td>\n",
|
||||||
|
" <td>2.414446</td>\n",
|
||||||
|
" <td>1.667224</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.269062</td>\n",
|
||||||
|
" <td>0.651172</td>\n",
|
||||||
|
" <td>-0.050461</td>\n",
|
||||||
|
" <td>-0.526448</td>\n",
|
||||||
|
" <td>-0.843701</td>\n",
|
||||||
|
" <td>0.574770</td>\n",
|
||||||
|
" <td>-0.227828</td>\n",
|
||||||
|
" <td>1.071423</td>\n",
|
||||||
|
" <td>1.362638</td>\n",
|
||||||
|
" <td>1465</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 174 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 -3.208086 -2.987338 -0.327066 -1.609206 0.016879 -1.514939 \n",
|
||||||
|
"1 1 -1.403753 -4.261851 0.107527 0.935981 0.165777 -0.299485 \n",
|
||||||
|
"2 2 2.257002 0.427951 -0.610464 -1.301125 -1.058327 2.674177 \n",
|
||||||
|
"3 3 3.253618 0.537318 -0.796079 -0.851716 -1.209643 2.388795 \n",
|
||||||
|
"4 4 2.876409 -0.075909 -0.154959 -2.469870 1.407820 0.487532 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA163 PCA164 PCA165 PCA166 \\\n",
|
||||||
|
"0 -0.417889 -0.988173 -0.653363 ... -0.027364 0.653222 -0.201973 -0.769946 \n",
|
||||||
|
"1 -0.524918 -2.332121 0.031044 ... 3.856117 0.787996 0.215221 0.458275 \n",
|
||||||
|
"2 -1.500824 -0.223999 0.403440 ... -0.117138 -0.378473 -0.031613 0.090593 \n",
|
||||||
|
"3 -1.340676 -0.876322 0.421183 ... -0.441586 0.020066 -0.151709 0.444826 \n",
|
||||||
|
"4 0.072190 2.414446 1.667224 ... 0.269062 0.651172 -0.050461 -0.526448 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA167 PCA168 PCA169 PCA170 PCA171 Id \n",
|
||||||
|
"0 -0.344834 0.514257 1.114106 0.337765 -0.639617 1461 \n",
|
||||||
|
"1 1.135109 0.378972 0.953559 -1.008240 4.445435 1462 \n",
|
||||||
|
"2 -0.173914 -0.150098 -0.006612 0.190780 -0.152486 1463 \n",
|
||||||
|
"3 0.008218 -0.161705 -0.453482 0.472352 0.046141 1464 \n",
|
||||||
|
"4 -0.843701 0.574770 -0.227828 1.071423 1.362638 1465 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 174 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"test.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Set aside unnecessary features\n",
|
||||||
|
"\n",
|
||||||
|
"trainId = train['Id'].astype(int)\n",
|
||||||
|
"testId = test['Id'].astype(int)\n",
|
||||||
|
"\n",
|
||||||
|
"train = train.drop('Id', axis=1)\n",
|
||||||
|
"test = test.drop('Id', axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X = train.drop('SalePrice', axis=1)\n",
|
||||||
|
"y = train['SalePrice']\n",
|
||||||
|
"X_test = test"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0 208500\n",
|
||||||
|
"1 181500\n",
|
||||||
|
"2 223500\n",
|
||||||
|
"3 140000\n",
|
||||||
|
"4 250000\n",
|
||||||
|
" ... \n",
|
||||||
|
"1455 175000\n",
|
||||||
|
"1456 210000\n",
|
||||||
|
"1457 266500\n",
|
||||||
|
"1458 142125\n",
|
||||||
|
"1459 147500\n",
|
||||||
|
"Name: SalePrice, Length: 1460, dtype: int64\n",
|
||||||
|
"(1460, 173)\n",
|
||||||
|
"(1459, 173)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Unnamed: 0</th>\n",
|
||||||
|
" <th>PCA0</th>\n",
|
||||||
|
" <th>PCA1</th>\n",
|
||||||
|
" <th>PCA2</th>\n",
|
||||||
|
" <th>PCA3</th>\n",
|
||||||
|
" <th>PCA4</th>\n",
|
||||||
|
" <th>PCA5</th>\n",
|
||||||
|
" <th>PCA6</th>\n",
|
||||||
|
" <th>PCA7</th>\n",
|
||||||
|
" <th>PCA8</th>\n",
|
||||||
|
" <th>...</th>\n",
|
||||||
|
" <th>PCA162</th>\n",
|
||||||
|
" <th>PCA163</th>\n",
|
||||||
|
" <th>PCA164</th>\n",
|
||||||
|
" <th>PCA165</th>\n",
|
||||||
|
" <th>PCA166</th>\n",
|
||||||
|
" <th>PCA167</th>\n",
|
||||||
|
" <th>PCA168</th>\n",
|
||||||
|
" <th>PCA169</th>\n",
|
||||||
|
" <th>PCA170</th>\n",
|
||||||
|
" <th>PCA171</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>4.345109</td>\n",
|
||||||
|
" <td>1.619386</td>\n",
|
||||||
|
" <td>-0.739617</td>\n",
|
||||||
|
" <td>-2.080179</td>\n",
|
||||||
|
" <td>-0.985088</td>\n",
|
||||||
|
" <td>1.999117</td>\n",
|
||||||
|
" <td>-1.231870</td>\n",
|
||||||
|
" <td>-0.131782</td>\n",
|
||||||
|
" <td>1.316470</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.276936</td>\n",
|
||||||
|
" <td>-0.128260</td>\n",
|
||||||
|
" <td>0.160733</td>\n",
|
||||||
|
" <td>0.071333</td>\n",
|
||||||
|
" <td>0.155468</td>\n",
|
||||||
|
" <td>0.172801</td>\n",
|
||||||
|
" <td>-0.169568</td>\n",
|
||||||
|
" <td>-0.144326</td>\n",
|
||||||
|
" <td>0.391713</td>\n",
|
||||||
|
" <td>-0.013357</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>0.019142</td>\n",
|
||||||
|
" <td>-3.106959</td>\n",
|
||||||
|
" <td>0.168223</td>\n",
|
||||||
|
" <td>-0.553341</td>\n",
|
||||||
|
" <td>0.940712</td>\n",
|
||||||
|
" <td>0.200719</td>\n",
|
||||||
|
" <td>-0.468954</td>\n",
|
||||||
|
" <td>0.235082</td>\n",
|
||||||
|
" <td>-0.838022</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.140974</td>\n",
|
||||||
|
" <td>-0.224535</td>\n",
|
||||||
|
" <td>-1.063234</td>\n",
|
||||||
|
" <td>-0.334556</td>\n",
|
||||||
|
" <td>0.361166</td>\n",
|
||||||
|
" <td>-1.218397</td>\n",
|
||||||
|
" <td>-0.346191</td>\n",
|
||||||
|
" <td>-0.962753</td>\n",
|
||||||
|
" <td>-0.138863</td>\n",
|
||||||
|
" <td>1.083103</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>4.851149</td>\n",
|
||||||
|
" <td>1.242811</td>\n",
|
||||||
|
" <td>-0.351815</td>\n",
|
||||||
|
" <td>-1.484957</td>\n",
|
||||||
|
" <td>-0.758200</td>\n",
|
||||||
|
" <td>2.181179</td>\n",
|
||||||
|
" <td>-1.843949</td>\n",
|
||||||
|
" <td>0.296194</td>\n",
|
||||||
|
" <td>1.299142</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.289024</td>\n",
|
||||||
|
" <td>-0.282563</td>\n",
|
||||||
|
" <td>0.088334</td>\n",
|
||||||
|
" <td>0.238624</td>\n",
|
||||||
|
" <td>0.327280</td>\n",
|
||||||
|
" <td>0.325285</td>\n",
|
||||||
|
" <td>-0.704900</td>\n",
|
||||||
|
" <td>-0.036388</td>\n",
|
||||||
|
" <td>-0.540516</td>\n",
|
||||||
|
" <td>0.021711</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>-1.771641</td>\n",
|
||||||
|
" <td>0.039500</td>\n",
|
||||||
|
" <td>-1.358623</td>\n",
|
||||||
|
" <td>1.920760</td>\n",
|
||||||
|
" <td>-2.550817</td>\n",
|
||||||
|
" <td>0.209519</td>\n",
|
||||||
|
" <td>-0.756387</td>\n",
|
||||||
|
" <td>0.700109</td>\n",
|
||||||
|
" <td>-1.408543</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>0.286790</td>\n",
|
||||||
|
" <td>0.672251</td>\n",
|
||||||
|
" <td>-0.172186</td>\n",
|
||||||
|
" <td>-0.518922</td>\n",
|
||||||
|
" <td>0.231498</td>\n",
|
||||||
|
" <td>-0.074296</td>\n",
|
||||||
|
" <td>-0.034287</td>\n",
|
||||||
|
" <td>-0.877735</td>\n",
|
||||||
|
" <td>0.028065</td>\n",
|
||||||
|
" <td>-0.321009</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>6.463747</td>\n",
|
||||||
|
" <td>1.064473</td>\n",
|
||||||
|
" <td>0.209472</td>\n",
|
||||||
|
" <td>0.448906</td>\n",
|
||||||
|
" <td>-1.555301</td>\n",
|
||||||
|
" <td>3.215822</td>\n",
|
||||||
|
" <td>-0.946356</td>\n",
|
||||||
|
" <td>-0.805204</td>\n",
|
||||||
|
" <td>2.112526</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
|
" <td>-0.235585</td>\n",
|
||||||
|
" <td>0.019570</td>\n",
|
||||||
|
" <td>-0.270189</td>\n",
|
||||||
|
" <td>0.375297</td>\n",
|
||||||
|
" <td>-0.396732</td>\n",
|
||||||
|
" <td>-0.109084</td>\n",
|
||||||
|
" <td>0.317305</td>\n",
|
||||||
|
" <td>-0.145975</td>\n",
|
||||||
|
" <td>-0.674692</td>\n",
|
||||||
|
" <td>-0.378458</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"<p>5 rows × 173 columns</p>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Unnamed: 0 PCA0 PCA1 PCA2 PCA3 PCA4 PCA5 \\\n",
|
||||||
|
"0 0 4.345109 1.619386 -0.739617 -2.080179 -0.985088 1.999117 \n",
|
||||||
|
"1 1 0.019142 -3.106959 0.168223 -0.553341 0.940712 0.200719 \n",
|
||||||
|
"2 2 4.851149 1.242811 -0.351815 -1.484957 -0.758200 2.181179 \n",
|
||||||
|
"3 3 -1.771641 0.039500 -1.358623 1.920760 -2.550817 0.209519 \n",
|
||||||
|
"4 4 6.463747 1.064473 0.209472 0.448906 -1.555301 3.215822 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA6 PCA7 PCA8 ... PCA162 PCA163 PCA164 PCA165 \\\n",
|
||||||
|
"0 -1.231870 -0.131782 1.316470 ... -0.276936 -0.128260 0.160733 0.071333 \n",
|
||||||
|
"1 -0.468954 0.235082 -0.838022 ... 0.140974 -0.224535 -1.063234 -0.334556 \n",
|
||||||
|
"2 -1.843949 0.296194 1.299142 ... -0.289024 -0.282563 0.088334 0.238624 \n",
|
||||||
|
"3 -0.756387 0.700109 -1.408543 ... 0.286790 0.672251 -0.172186 -0.518922 \n",
|
||||||
|
"4 -0.946356 -0.805204 2.112526 ... -0.235585 0.019570 -0.270189 0.375297 \n",
|
||||||
|
"\n",
|
||||||
|
" PCA166 PCA167 PCA168 PCA169 PCA170 PCA171 \n",
|
||||||
|
"0 0.155468 0.172801 -0.169568 -0.144326 0.391713 -0.013357 \n",
|
||||||
|
"1 0.361166 -1.218397 -0.346191 -0.962753 -0.138863 1.083103 \n",
|
||||||
|
"2 0.327280 0.325285 -0.704900 -0.036388 -0.540516 0.021711 \n",
|
||||||
|
"3 0.231498 -0.074296 -0.034287 -0.877735 0.028065 -0.321009 \n",
|
||||||
|
"4 -0.396732 -0.109084 0.317305 -0.145975 -0.674692 -0.378458 \n",
|
||||||
|
"\n",
|
||||||
|
"[5 rows x 173 columns]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(y)\n",
|
||||||
|
"print(X.shape)\n",
|
||||||
|
"print(X_test.shape)\n",
|
||||||
|
"X.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Initialize our models\n",
|
||||||
|
"\n",
|
||||||
|
"tree_model = DecisionTreeRegressor()\n",
|
||||||
|
"rf_model = RandomForestRegressor()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\users\\tsb\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\ensemble\\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
|
||||||
|
" \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
|
||||||
|
" max_features='auto', max_leaf_nodes=None,\n",
|
||||||
|
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
|
||||||
|
" min_samples_leaf=1, min_samples_split=2,\n",
|
||||||
|
" min_weight_fraction_leaf=0.0, n_estimators=10,\n",
|
||||||
|
" n_jobs=None, oob_score=False, random_state=None,\n",
|
||||||
|
" verbose=0, warm_start=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Fit our models to the training data\n",
|
||||||
|
"\n",
|
||||||
|
"tree_model.fit(X, y)\n",
|
||||||
|
"rf_model.fit(X, y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Calculate error metrics for each model\n",
|
||||||
|
"\n",
|
||||||
|
"tree_mse = mean_squared_error(y, tree_model.predict(X))\n",
|
||||||
|
"tree_mae = mean_absolute_error(y, tree_model.predict(X))\n",
|
||||||
|
"rf_mse = mean_squared_error(y, rf_model.predict(X))\n",
|
||||||
|
"rf_mae = mean_absolute_error(y, rf_model.predict(X))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Decision Tree training mse = 0.0 & mae = 0.0 & rmse = 0.0\n",
|
||||||
|
"Random Forest training mse = 202420995.41813016 & mae = 8265.452260273973 & rmse = 14227.473261901783\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(\"Decision Tree training mse = \",tree_mse,\" & mae = \",tree_mae,\" & rmse = \", sqrt(tree_mse))\n",
|
||||||
|
"print(\"Random Forest training mse = \",rf_mse,\" & mae = \",rf_mae,\" & rmse = \", sqrt(rf_mse))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"<bound method NDFrame.head of Id SalePrice\n",
|
||||||
|
"0 1461 125500.0\n",
|
||||||
|
"1 1462 149500.0\n",
|
||||||
|
"2 1463 185000.0\n",
|
||||||
|
"3 1464 201000.0\n",
|
||||||
|
"4 1465 176000.0\n",
|
||||||
|
"... ... ...\n",
|
||||||
|
"1454 2915 89000.0\n",
|
||||||
|
"1455 2916 80000.0\n",
|
||||||
|
"1456 2917 167900.0\n",
|
||||||
|
"1457 2918 135000.0\n",
|
||||||
|
"1458 2919 181000.0\n",
|
||||||
|
"\n",
|
||||||
|
"[1459 rows x 2 columns]>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Setup and save Tree prediction\n",
|
||||||
|
"\n",
|
||||||
|
"test_pred = tree_model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
|
||||||
|
"tree_predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
|
||||||
|
"tree_submission = pd.DataFrame(tree_predictions, columns = ['Id', 'SalePrice'])\n",
|
||||||
|
"tree_submission['Id'] = tree_submission['Id'].astype(int)\n",
|
||||||
|
"print(tree_submission.head)\n",
|
||||||
|
"tree_submission.to_csv(\"submissions/TreeSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"<bound method NDFrame.head of Id SalePrice\n",
|
||||||
|
"0 1461 136110.0\n",
|
||||||
|
"1 1462 167750.0\n",
|
||||||
|
"2 1463 184140.0\n",
|
||||||
|
"3 1464 186963.2\n",
|
||||||
|
"4 1465 188728.2\n",
|
||||||
|
"... ... ...\n",
|
||||||
|
"1454 2915 95030.0\n",
|
||||||
|
"1455 2916 99045.9\n",
|
||||||
|
"1456 2917 168480.0\n",
|
||||||
|
"1457 2918 114850.0\n",
|
||||||
|
"1458 2919 196190.0\n",
|
||||||
|
"\n",
|
||||||
|
"[1459 rows x 2 columns]>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Setup and save Random Forest prediction\n",
|
||||||
|
"\n",
|
||||||
|
"test_pred = rf_model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
|
||||||
|
"rf_predictions = np.c_[testId, test_pred] # Note that we take the argmax over the collumns to use our softmax output\n",
|
||||||
|
"rf_submission = pd.DataFrame(rf_predictions, columns = ['Id', 'SalePrice'])\n",
|
||||||
|
"rf_submission['Id'] = rf_submission['Id'].astype(int)\n",
|
||||||
|
"print(rf_submission.head)\n",
|
||||||
|
"rf_submission.to_csv(\"submissions/RFSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 26,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Set up and save Ensemble prediction\n",
|
||||||
|
"\n",
|
||||||
|
"ensemble_submission = tree_submission\n",
|
||||||
|
"ensemble_submission['TreeSalePrice'] = tree_submission['SalePrice']\n",
|
||||||
|
"ensemble_submission['ForestSalePrice'] = rf_submission['SalePrice']\n",
|
||||||
|
"ensemble_submission['SalePrice'] = ensemble_submission[['TreeSalePrice', 'ForestSalePrice']].mean(axis=1)\n",
|
||||||
|
"ensemble_submission = ensemble_submission.drop(['TreeSalePrice', 'ForestSalePrice'], axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>Id</th>\n",
|
||||||
|
" <th>SalePrice</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>0</td>\n",
|
||||||
|
" <td>1461</td>\n",
|
||||||
|
" <td>133457.50</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>1</td>\n",
|
||||||
|
" <td>1462</td>\n",
|
||||||
|
" <td>163187.50</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>1463</td>\n",
|
||||||
|
" <td>184355.00</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>1464</td>\n",
|
||||||
|
" <td>190472.40</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" <td>1465</td>\n",
|
||||||
|
" <td>185546.15</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" Id SalePrice\n",
|
||||||
|
"0 1461 133457.50\n",
|
||||||
|
"1 1462 163187.50\n",
|
||||||
|
"2 1463 184355.00\n",
|
||||||
|
"3 1464 190472.40\n",
|
||||||
|
"4 1465 185546.15"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"ensemble_submission.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 28,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ensemble_submission.to_csv(\"submissions/EnsembleSubmission.csv\", index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,523 @@
|
||||||
|
MSSubClass: Identifies the type of dwelling involved in the sale.
|
||||||
|
|
||||||
|
20 1-STORY 1946 & NEWER ALL STYLES
|
||||||
|
30 1-STORY 1945 & OLDER
|
||||||
|
40 1-STORY W/FINISHED ATTIC ALL AGES
|
||||||
|
45 1-1/2 STORY - UNFINISHED ALL AGES
|
||||||
|
50 1-1/2 STORY FINISHED ALL AGES
|
||||||
|
60 2-STORY 1946 & NEWER
|
||||||
|
70 2-STORY 1945 & OLDER
|
||||||
|
75 2-1/2 STORY ALL AGES
|
||||||
|
80 SPLIT OR MULTI-LEVEL
|
||||||
|
85 SPLIT FOYER
|
||||||
|
90 DUPLEX - ALL STYLES AND AGES
|
||||||
|
120 1-STORY PUD (Planned Unit Development) - 1946 & NEWER
|
||||||
|
150 1-1/2 STORY PUD - ALL AGES
|
||||||
|
160 2-STORY PUD - 1946 & NEWER
|
||||||
|
180 PUD - MULTILEVEL - INCL SPLIT LEV/FOYER
|
||||||
|
190 2 FAMILY CONVERSION - ALL STYLES AND AGES
|
||||||
|
|
||||||
|
MSZoning: Identifies the general zoning classification of the sale.
|
||||||
|
|
||||||
|
A Agriculture
|
||||||
|
C Commercial
|
||||||
|
FV Floating Village Residential
|
||||||
|
I Industrial
|
||||||
|
RH Residential High Density
|
||||||
|
RL Residential Low Density
|
||||||
|
RP Residential Low Density Park
|
||||||
|
RM Residential Medium Density
|
||||||
|
|
||||||
|
LotFrontage: Linear feet of street connected to property
|
||||||
|
|
||||||
|
LotArea: Lot size in square feet
|
||||||
|
|
||||||
|
Street: Type of road access to property
|
||||||
|
|
||||||
|
Grvl Gravel
|
||||||
|
Pave Paved
|
||||||
|
|
||||||
|
Alley: Type of alley access to property
|
||||||
|
|
||||||
|
Grvl Gravel
|
||||||
|
Pave Paved
|
||||||
|
NA No alley access
|
||||||
|
|
||||||
|
LotShape: General shape of property
|
||||||
|
|
||||||
|
Reg Regular
|
||||||
|
IR1 Slightly irregular
|
||||||
|
IR2 Moderately Irregular
|
||||||
|
IR3 Irregular
|
||||||
|
|
||||||
|
LandContour: Flatness of the property
|
||||||
|
|
||||||
|
Lvl Near Flat/Level
|
||||||
|
Bnk Banked - Quick and significant rise from street grade to building
|
||||||
|
HLS Hillside - Significant slope from side to side
|
||||||
|
Low Depression
|
||||||
|
|
||||||
|
Utilities: Type of utilities available
|
||||||
|
|
||||||
|
AllPub All public Utilities (E,G,W,& S)
|
||||||
|
NoSewr Electricity, Gas, and Water (Septic Tank)
|
||||||
|
NoSeWa Electricity and Gas Only
|
||||||
|
ELO Electricity only
|
||||||
|
|
||||||
|
LotConfig: Lot configuration
|
||||||
|
|
||||||
|
Inside Inside lot
|
||||||
|
Corner Corner lot
|
||||||
|
CulDSac Cul-de-sac
|
||||||
|
FR2 Frontage on 2 sides of property
|
||||||
|
FR3 Frontage on 3 sides of property
|
||||||
|
|
||||||
|
LandSlope: Slope of property
|
||||||
|
|
||||||
|
Gtl Gentle slope
|
||||||
|
Mod Moderate Slope
|
||||||
|
Sev Severe Slope
|
||||||
|
|
||||||
|
Neighborhood: Physical locations within Ames city limits
|
||||||
|
|
||||||
|
Blmngtn Bloomington Heights
|
||||||
|
Blueste Bluestem
|
||||||
|
BrDale Briardale
|
||||||
|
BrkSide Brookside
|
||||||
|
ClearCr Clear Creek
|
||||||
|
CollgCr College Creek
|
||||||
|
Crawfor Crawford
|
||||||
|
Edwards Edwards
|
||||||
|
Gilbert Gilbert
|
||||||
|
IDOTRR Iowa DOT and Rail Road
|
||||||
|
MeadowV Meadow Village
|
||||||
|
Mitchel Mitchell
|
||||||
|
Names North Ames
|
||||||
|
NoRidge Northridge
|
||||||
|
NPkVill Northpark Villa
|
||||||
|
NridgHt Northridge Heights
|
||||||
|
NWAmes Northwest Ames
|
||||||
|
OldTown Old Town
|
||||||
|
SWISU South & West of Iowa State University
|
||||||
|
Sawyer Sawyer
|
||||||
|
SawyerW Sawyer West
|
||||||
|
Somerst Somerset
|
||||||
|
StoneBr Stone Brook
|
||||||
|
Timber Timberland
|
||||||
|
Veenker Veenker
|
||||||
|
|
||||||
|
Condition1: Proximity to various conditions
|
||||||
|
|
||||||
|
Artery Adjacent to arterial street
|
||||||
|
Feedr Adjacent to feeder street
|
||||||
|
Norm Normal
|
||||||
|
RRNn Within 200' of North-South Railroad
|
||||||
|
RRAn Adjacent to North-South Railroad
|
||||||
|
PosN Near positive off-site feature--park, greenbelt, etc.
|
||||||
|
PosA Adjacent to postive off-site feature
|
||||||
|
RRNe Within 200' of East-West Railroad
|
||||||
|
RRAe Adjacent to East-West Railroad
|
||||||
|
|
||||||
|
Condition2: Proximity to various conditions (if more than one is present)
|
||||||
|
|
||||||
|
Artery Adjacent to arterial street
|
||||||
|
Feedr Adjacent to feeder street
|
||||||
|
Norm Normal
|
||||||
|
RRNn Within 200' of North-South Railroad
|
||||||
|
RRAn Adjacent to North-South Railroad
|
||||||
|
PosN Near positive off-site feature--park, greenbelt, etc.
|
||||||
|
PosA Adjacent to postive off-site feature
|
||||||
|
RRNe Within 200' of East-West Railroad
|
||||||
|
RRAe Adjacent to East-West Railroad
|
||||||
|
|
||||||
|
BldgType: Type of dwelling
|
||||||
|
|
||||||
|
1Fam Single-family Detached
|
||||||
|
2FmCon Two-family Conversion; originally built as one-family dwelling
|
||||||
|
Duplx Duplex
|
||||||
|
TwnhsE Townhouse End Unit
|
||||||
|
TwnhsI Townhouse Inside Unit
|
||||||
|
|
||||||
|
HouseStyle: Style of dwelling
|
||||||
|
|
||||||
|
1Story One story
|
||||||
|
1.5Fin One and one-half story: 2nd level finished
|
||||||
|
1.5Unf One and one-half story: 2nd level unfinished
|
||||||
|
2Story Two story
|
||||||
|
2.5Fin Two and one-half story: 2nd level finished
|
||||||
|
2.5Unf Two and one-half story: 2nd level unfinished
|
||||||
|
SFoyer Split Foyer
|
||||||
|
SLvl Split Level
|
||||||
|
|
||||||
|
OverallQual: Rates the overall material and finish of the house
|
||||||
|
|
||||||
|
10 Very Excellent
|
||||||
|
9 Excellent
|
||||||
|
8 Very Good
|
||||||
|
7 Good
|
||||||
|
6 Above Average
|
||||||
|
5 Average
|
||||||
|
4 Below Average
|
||||||
|
3 Fair
|
||||||
|
2 Poor
|
||||||
|
1 Very Poor
|
||||||
|
|
||||||
|
OverallCond: Rates the overall condition of the house
|
||||||
|
|
||||||
|
10 Very Excellent
|
||||||
|
9 Excellent
|
||||||
|
8 Very Good
|
||||||
|
7 Good
|
||||||
|
6 Above Average
|
||||||
|
5 Average
|
||||||
|
4 Below Average
|
||||||
|
3 Fair
|
||||||
|
2 Poor
|
||||||
|
1 Very Poor
|
||||||
|
|
||||||
|
YearBuilt: Original construction date
|
||||||
|
|
||||||
|
YearRemodAdd: Remodel date (same as construction date if no remodeling or additions)
|
||||||
|
|
||||||
|
RoofStyle: Type of roof
|
||||||
|
|
||||||
|
Flat Flat
|
||||||
|
Gable Gable
|
||||||
|
Gambrel Gabrel (Barn)
|
||||||
|
Hip Hip
|
||||||
|
Mansard Mansard
|
||||||
|
Shed Shed
|
||||||
|
|
||||||
|
RoofMatl: Roof material
|
||||||
|
|
||||||
|
ClyTile Clay or Tile
|
||||||
|
CompShg Standard (Composite) Shingle
|
||||||
|
Membran Membrane
|
||||||
|
Metal Metal
|
||||||
|
Roll Roll
|
||||||
|
Tar&Grv Gravel & Tar
|
||||||
|
WdShake Wood Shakes
|
||||||
|
WdShngl Wood Shingles
|
||||||
|
|
||||||
|
Exterior1st: Exterior covering on house
|
||||||
|
|
||||||
|
AsbShng Asbestos Shingles
|
||||||
|
AsphShn Asphalt Shingles
|
||||||
|
BrkComm Brick Common
|
||||||
|
BrkFace Brick Face
|
||||||
|
CBlock Cinder Block
|
||||||
|
CemntBd Cement Board
|
||||||
|
HdBoard Hard Board
|
||||||
|
ImStucc Imitation Stucco
|
||||||
|
MetalSd Metal Siding
|
||||||
|
Other Other
|
||||||
|
Plywood Plywood
|
||||||
|
PreCast PreCast
|
||||||
|
Stone Stone
|
||||||
|
Stucco Stucco
|
||||||
|
VinylSd Vinyl Siding
|
||||||
|
Wd Sdng Wood Siding
|
||||||
|
WdShing Wood Shingles
|
||||||
|
|
||||||
|
Exterior2nd: Exterior covering on house (if more than one material)
|
||||||
|
|
||||||
|
AsbShng Asbestos Shingles
|
||||||
|
AsphShn Asphalt Shingles
|
||||||
|
BrkComm Brick Common
|
||||||
|
BrkFace Brick Face
|
||||||
|
CBlock Cinder Block
|
||||||
|
CemntBd Cement Board
|
||||||
|
HdBoard Hard Board
|
||||||
|
ImStucc Imitation Stucco
|
||||||
|
MetalSd Metal Siding
|
||||||
|
Other Other
|
||||||
|
Plywood Plywood
|
||||||
|
PreCast PreCast
|
||||||
|
Stone Stone
|
||||||
|
Stucco Stucco
|
||||||
|
VinylSd Vinyl Siding
|
||||||
|
Wd Sdng Wood Siding
|
||||||
|
WdShing Wood Shingles
|
||||||
|
|
||||||
|
MasVnrType: Masonry veneer type
|
||||||
|
|
||||||
|
BrkCmn Brick Common
|
||||||
|
BrkFace Brick Face
|
||||||
|
CBlock Cinder Block
|
||||||
|
None None
|
||||||
|
Stone Stone
|
||||||
|
|
||||||
|
MasVnrArea: Masonry veneer area in square feet
|
||||||
|
|
||||||
|
ExterQual: Evaluates the quality of the material on the exterior
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Average/Typical
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
|
||||||
|
ExterCond: Evaluates the present condition of the material on the exterior
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Average/Typical
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
|
||||||
|
Foundation: Type of foundation
|
||||||
|
|
||||||
|
BrkTil Brick & Tile
|
||||||
|
CBlock Cinder Block
|
||||||
|
PConc Poured Contrete
|
||||||
|
Slab Slab
|
||||||
|
Stone Stone
|
||||||
|
Wood Wood
|
||||||
|
|
||||||
|
BsmtQual: Evaluates the height of the basement
|
||||||
|
|
||||||
|
Ex Excellent (100+ inches)
|
||||||
|
Gd Good (90-99 inches)
|
||||||
|
TA Typical (80-89 inches)
|
||||||
|
Fa Fair (70-79 inches)
|
||||||
|
Po Poor (<70 inches
|
||||||
|
NA No Basement
|
||||||
|
|
||||||
|
BsmtCond: Evaluates the general condition of the basement
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Typical - slight dampness allowed
|
||||||
|
Fa Fair - dampness or some cracking or settling
|
||||||
|
Po Poor - Severe cracking, settling, or wetness
|
||||||
|
NA No Basement
|
||||||
|
|
||||||
|
BsmtExposure: Refers to walkout or garden level walls
|
||||||
|
|
||||||
|
Gd Good Exposure
|
||||||
|
Av Average Exposure (split levels or foyers typically score average or above)
|
||||||
|
Mn Mimimum Exposure
|
||||||
|
No No Exposure
|
||||||
|
NA No Basement
|
||||||
|
|
||||||
|
BsmtFinType1: Rating of basement finished area
|
||||||
|
|
||||||
|
GLQ Good Living Quarters
|
||||||
|
ALQ Average Living Quarters
|
||||||
|
BLQ Below Average Living Quarters
|
||||||
|
Rec Average Rec Room
|
||||||
|
LwQ Low Quality
|
||||||
|
Unf Unfinshed
|
||||||
|
NA No Basement
|
||||||
|
|
||||||
|
BsmtFinSF1: Type 1 finished square feet
|
||||||
|
|
||||||
|
BsmtFinType2: Rating of basement finished area (if multiple types)
|
||||||
|
|
||||||
|
GLQ Good Living Quarters
|
||||||
|
ALQ Average Living Quarters
|
||||||
|
BLQ Below Average Living Quarters
|
||||||
|
Rec Average Rec Room
|
||||||
|
LwQ Low Quality
|
||||||
|
Unf Unfinshed
|
||||||
|
NA No Basement
|
||||||
|
|
||||||
|
BsmtFinSF2: Type 2 finished square feet
|
||||||
|
|
||||||
|
BsmtUnfSF: Unfinished square feet of basement area
|
||||||
|
|
||||||
|
TotalBsmtSF: Total square feet of basement area
|
||||||
|
|
||||||
|
Heating: Type of heating
|
||||||
|
|
||||||
|
Floor Floor Furnace
|
||||||
|
GasA Gas forced warm air furnace
|
||||||
|
GasW Gas hot water or steam heat
|
||||||
|
Grav Gravity furnace
|
||||||
|
OthW Hot water or steam heat other than gas
|
||||||
|
Wall Wall furnace
|
||||||
|
|
||||||
|
HeatingQC: Heating quality and condition
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Average/Typical
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
|
||||||
|
CentralAir: Central air conditioning
|
||||||
|
|
||||||
|
N No
|
||||||
|
Y Yes
|
||||||
|
|
||||||
|
Electrical: Electrical system
|
||||||
|
|
||||||
|
SBrkr Standard Circuit Breakers & Romex
|
||||||
|
FuseA Fuse Box over 60 AMP and all Romex wiring (Average)
|
||||||
|
FuseF 60 AMP Fuse Box and mostly Romex wiring (Fair)
|
||||||
|
FuseP 60 AMP Fuse Box and mostly knob & tube wiring (poor)
|
||||||
|
Mix Mixed
|
||||||
|
|
||||||
|
1stFlrSF: First Floor square feet
|
||||||
|
|
||||||
|
2ndFlrSF: Second floor square feet
|
||||||
|
|
||||||
|
LowQualFinSF: Low quality finished square feet (all floors)
|
||||||
|
|
||||||
|
GrLivArea: Above grade (ground) living area square feet
|
||||||
|
|
||||||
|
BsmtFullBath: Basement full bathrooms
|
||||||
|
|
||||||
|
BsmtHalfBath: Basement half bathrooms
|
||||||
|
|
||||||
|
FullBath: Full bathrooms above grade
|
||||||
|
|
||||||
|
HalfBath: Half baths above grade
|
||||||
|
|
||||||
|
Bedroom: Bedrooms above grade (does NOT include basement bedrooms)
|
||||||
|
|
||||||
|
Kitchen: Kitchens above grade
|
||||||
|
|
||||||
|
KitchenQual: Kitchen quality
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Typical/Average
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
|
||||||
|
TotRmsAbvGrd: Total rooms above grade (does not include bathrooms)
|
||||||
|
|
||||||
|
Functional: Home functionality (Assume typical unless deductions are warranted)
|
||||||
|
|
||||||
|
Typ Typical Functionality
|
||||||
|
Min1 Minor Deductions 1
|
||||||
|
Min2 Minor Deductions 2
|
||||||
|
Mod Moderate Deductions
|
||||||
|
Maj1 Major Deductions 1
|
||||||
|
Maj2 Major Deductions 2
|
||||||
|
Sev Severely Damaged
|
||||||
|
Sal Salvage only
|
||||||
|
|
||||||
|
Fireplaces: Number of fireplaces
|
||||||
|
|
||||||
|
FireplaceQu: Fireplace quality
|
||||||
|
|
||||||
|
Ex Excellent - Exceptional Masonry Fireplace
|
||||||
|
Gd Good - Masonry Fireplace in main level
|
||||||
|
TA Average - Prefabricated Fireplace in main living area or Masonry Fireplace in basement
|
||||||
|
Fa Fair - Prefabricated Fireplace in basement
|
||||||
|
Po Poor - Ben Franklin Stove
|
||||||
|
NA No Fireplace
|
||||||
|
|
||||||
|
GarageType: Garage location
|
||||||
|
|
||||||
|
2Types More than one type of garage
|
||||||
|
Attchd Attached to home
|
||||||
|
Basment Basement Garage
|
||||||
|
BuiltIn Built-In (Garage part of house - typically has room above garage)
|
||||||
|
CarPort Car Port
|
||||||
|
Detchd Detached from home
|
||||||
|
NA No Garage
|
||||||
|
|
||||||
|
GarageYrBlt: Year garage was built
|
||||||
|
|
||||||
|
GarageFinish: Interior finish of the garage
|
||||||
|
|
||||||
|
Fin Finished
|
||||||
|
RFn Rough Finished
|
||||||
|
Unf Unfinished
|
||||||
|
NA No Garage
|
||||||
|
|
||||||
|
GarageCars: Size of garage in car capacity
|
||||||
|
|
||||||
|
GarageArea: Size of garage in square feet
|
||||||
|
|
||||||
|
GarageQual: Garage quality
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Typical/Average
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
NA No Garage
|
||||||
|
|
||||||
|
GarageCond: Garage condition
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Typical/Average
|
||||||
|
Fa Fair
|
||||||
|
Po Poor
|
||||||
|
NA No Garage
|
||||||
|
|
||||||
|
PavedDrive: Paved driveway
|
||||||
|
|
||||||
|
Y Paved
|
||||||
|
P Partial Pavement
|
||||||
|
N Dirt/Gravel
|
||||||
|
|
||||||
|
WoodDeckSF: Wood deck area in square feet
|
||||||
|
|
||||||
|
OpenPorchSF: Open porch area in square feet
|
||||||
|
|
||||||
|
EnclosedPorch: Enclosed porch area in square feet
|
||||||
|
|
||||||
|
3SsnPorch: Three season porch area in square feet
|
||||||
|
|
||||||
|
ScreenPorch: Screen porch area in square feet
|
||||||
|
|
||||||
|
PoolArea: Pool area in square feet
|
||||||
|
|
||||||
|
PoolQC: Pool quality
|
||||||
|
|
||||||
|
Ex Excellent
|
||||||
|
Gd Good
|
||||||
|
TA Average/Typical
|
||||||
|
Fa Fair
|
||||||
|
NA No Pool
|
||||||
|
|
||||||
|
Fence: Fence quality
|
||||||
|
|
||||||
|
GdPrv Good Privacy
|
||||||
|
MnPrv Minimum Privacy
|
||||||
|
GdWo Good Wood
|
||||||
|
MnWw Minimum Wood/Wire
|
||||||
|
NA No Fence
|
||||||
|
|
||||||
|
MiscFeature: Miscellaneous feature not covered in other categories
|
||||||
|
|
||||||
|
Elev Elevator
|
||||||
|
Gar2 2nd Garage (if not described in garage section)
|
||||||
|
Othr Other
|
||||||
|
Shed Shed (over 100 SF)
|
||||||
|
TenC Tennis Court
|
||||||
|
NA None
|
||||||
|
|
||||||
|
MiscVal: $Value of miscellaneous feature
|
||||||
|
|
||||||
|
MoSold: Month Sold (MM)
|
||||||
|
|
||||||
|
YrSold: Year Sold (YYYY)
|
||||||
|
|
||||||
|
SaleType: Type of sale
|
||||||
|
|
||||||
|
WD Warranty Deed - Conventional
|
||||||
|
CWD Warranty Deed - Cash
|
||||||
|
VWD Warranty Deed - VA Loan
|
||||||
|
New Home just constructed and sold
|
||||||
|
COD Court Officer Deed/Estate
|
||||||
|
Con Contract 15% Down payment regular terms
|
||||||
|
ConLw Contract Low Down payment and low interest
|
||||||
|
ConLI Contract Low Interest
|
||||||
|
ConLD Contract Low Down
|
||||||
|
Oth Other
|
||||||
|
|
||||||
|
SaleCondition: Condition of sale
|
||||||
|
|
||||||
|
Normal Normal Sale
|
||||||
|
Abnorml Abnormal Sale - trade, foreclosure, short sale
|
||||||
|
AdjLand Adjoining Land Purchase
|
||||||
|
Alloca Allocation - two linked properties with separate deeds, typically condo with a garage unit
|
||||||
|
Family Sale between family members
|
||||||
|
Partial Home was not completed when last assessed (associated with New Homes)
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue