{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.ensemble import RandomForestRegressor\n", "from math import sqrt\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.metrics import mean_absolute_error" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv(os.path.join('data', 'clean_train.csv'))\n", "test = pd.read_csv(os.path.join('data', 'clean_test.csv'))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Unnamed: 0 | \n", "PCA0 | \n", "PCA1 | \n", "PCA2 | \n", "PCA3 | \n", "PCA4 | \n", "PCA5 | \n", "PCA6 | \n", "PCA7 | \n", "PCA8 | \n", "... | \n", "PCA164 | \n", "PCA165 | \n", "PCA166 | \n", "PCA167 | \n", "PCA168 | \n", "PCA169 | \n", "PCA170 | \n", "PCA171 | \n", "Id | \n", "SalePrice | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "4.345109 | \n", "1.619386 | \n", "-0.739617 | \n", "-2.080179 | \n", "-0.985088 | \n", "1.999117 | \n", "-1.231870 | \n", "-0.131782 | \n", "1.316470 | \n", "... | \n", "0.160733 | \n", "0.071333 | \n", "0.155468 | \n", "0.172801 | \n", "-0.169568 | \n", "-0.144326 | \n", "0.391713 | \n", "-0.013357 | \n", "1 | \n", "208500 | \n", "
| 1 | \n", "1 | \n", "0.019142 | \n", "-3.106959 | \n", "0.168223 | \n", "-0.553341 | \n", "0.940712 | \n", "0.200719 | \n", "-0.468954 | \n", "0.235082 | \n", "-0.838022 | \n", "... | \n", "-1.063234 | \n", "-0.334556 | \n", "0.361166 | \n", "-1.218397 | \n", "-0.346191 | \n", "-0.962753 | \n", "-0.138863 | \n", "1.083103 | \n", "2 | \n", "181500 | \n", "
| 2 | \n", "2 | \n", "4.851149 | \n", "1.242811 | \n", "-0.351815 | \n", "-1.484957 | \n", "-0.758200 | \n", "2.181179 | \n", "-1.843949 | \n", "0.296194 | \n", "1.299142 | \n", "... | \n", "0.088334 | \n", "0.238624 | \n", "0.327280 | \n", "0.325285 | \n", "-0.704900 | \n", "-0.036388 | \n", "-0.540516 | \n", "0.021711 | \n", "3 | \n", "223500 | \n", "
| 3 | \n", "3 | \n", "-1.771641 | \n", "0.039500 | \n", "-1.358623 | \n", "1.920760 | \n", "-2.550817 | \n", "0.209519 | \n", "-0.756387 | \n", "0.700109 | \n", "-1.408543 | \n", "... | \n", "-0.172186 | \n", "-0.518922 | \n", "0.231498 | \n", "-0.074296 | \n", "-0.034287 | \n", "-0.877735 | \n", "0.028065 | \n", "-0.321009 | \n", "4 | \n", "140000 | \n", "
| 4 | \n", "4 | \n", "6.463747 | \n", "1.064473 | \n", "0.209472 | \n", "0.448906 | \n", "-1.555301 | \n", "3.215822 | \n", "-0.946356 | \n", "-0.805204 | \n", "2.112526 | \n", "... | \n", "-0.270189 | \n", "0.375297 | \n", "-0.396732 | \n", "-0.109084 | \n", "0.317305 | \n", "-0.145975 | \n", "-0.674692 | \n", "-0.378458 | \n", "5 | \n", "250000 | \n", "
5 rows × 175 columns
\n", "| \n", " | Unnamed: 0 | \n", "PCA0 | \n", "PCA1 | \n", "PCA2 | \n", "PCA3 | \n", "PCA4 | \n", "PCA5 | \n", "PCA6 | \n", "PCA7 | \n", "PCA8 | \n", "... | \n", "PCA163 | \n", "PCA164 | \n", "PCA165 | \n", "PCA166 | \n", "PCA167 | \n", "PCA168 | \n", "PCA169 | \n", "PCA170 | \n", "PCA171 | \n", "Id | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "-3.208086 | \n", "-2.987338 | \n", "-0.327066 | \n", "-1.609206 | \n", "0.016879 | \n", "-1.514939 | \n", "-0.417889 | \n", "-0.988173 | \n", "-0.653363 | \n", "... | \n", "-0.027364 | \n", "0.653222 | \n", "-0.201973 | \n", "-0.769946 | \n", "-0.344834 | \n", "0.514257 | \n", "1.114106 | \n", "0.337765 | \n", "-0.639617 | \n", "1461 | \n", "
| 1 | \n", "1 | \n", "-1.403753 | \n", "-4.261851 | \n", "0.107527 | \n", "0.935981 | \n", "0.165777 | \n", "-0.299485 | \n", "-0.524918 | \n", "-2.332121 | \n", "0.031044 | \n", "... | \n", "3.856117 | \n", "0.787996 | \n", "0.215221 | \n", "0.458275 | \n", "1.135109 | \n", "0.378972 | \n", "0.953559 | \n", "-1.008240 | \n", "4.445435 | \n", "1462 | \n", "
| 2 | \n", "2 | \n", "2.257002 | \n", "0.427951 | \n", "-0.610464 | \n", "-1.301125 | \n", "-1.058327 | \n", "2.674177 | \n", "-1.500824 | \n", "-0.223999 | \n", "0.403440 | \n", "... | \n", "-0.117138 | \n", "-0.378473 | \n", "-0.031613 | \n", "0.090593 | \n", "-0.173914 | \n", "-0.150098 | \n", "-0.006612 | \n", "0.190780 | \n", "-0.152486 | \n", "1463 | \n", "
| 3 | \n", "3 | \n", "3.253618 | \n", "0.537318 | \n", "-0.796079 | \n", "-0.851716 | \n", "-1.209643 | \n", "2.388795 | \n", "-1.340676 | \n", "-0.876322 | \n", "0.421183 | \n", "... | \n", "-0.441586 | \n", "0.020066 | \n", "-0.151709 | \n", "0.444826 | \n", "0.008218 | \n", "-0.161705 | \n", "-0.453482 | \n", "0.472352 | \n", "0.046141 | \n", "1464 | \n", "
| 4 | \n", "4 | \n", "2.876409 | \n", "-0.075909 | \n", "-0.154959 | \n", "-2.469870 | \n", "1.407820 | \n", "0.487532 | \n", "0.072190 | \n", "2.414446 | \n", "1.667224 | \n", "... | \n", "0.269062 | \n", "0.651172 | \n", "-0.050461 | \n", "-0.526448 | \n", "-0.843701 | \n", "0.574770 | \n", "-0.227828 | \n", "1.071423 | \n", "1.362638 | \n", "1465 | \n", "
5 rows × 174 columns
\n", "| \n", " | Unnamed: 0 | \n", "PCA0 | \n", "PCA1 | \n", "PCA2 | \n", "PCA3 | \n", "PCA4 | \n", "PCA5 | \n", "PCA6 | \n", "PCA7 | \n", "PCA8 | \n", "... | \n", "PCA162 | \n", "PCA163 | \n", "PCA164 | \n", "PCA165 | \n", "PCA166 | \n", "PCA167 | \n", "PCA168 | \n", "PCA169 | \n", "PCA170 | \n", "PCA171 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "4.345109 | \n", "1.619386 | \n", "-0.739617 | \n", "-2.080179 | \n", "-0.985088 | \n", "1.999117 | \n", "-1.231870 | \n", "-0.131782 | \n", "1.316470 | \n", "... | \n", "-0.276936 | \n", "-0.128260 | \n", "0.160733 | \n", "0.071333 | \n", "0.155468 | \n", "0.172801 | \n", "-0.169568 | \n", "-0.144326 | \n", "0.391713 | \n", "-0.013357 | \n", "
| 1 | \n", "1 | \n", "0.019142 | \n", "-3.106959 | \n", "0.168223 | \n", "-0.553341 | \n", "0.940712 | \n", "0.200719 | \n", "-0.468954 | \n", "0.235082 | \n", "-0.838022 | \n", "... | \n", "0.140974 | \n", "-0.224535 | \n", "-1.063234 | \n", "-0.334556 | \n", "0.361166 | \n", "-1.218397 | \n", "-0.346191 | \n", "-0.962753 | \n", "-0.138863 | \n", "1.083103 | \n", "
| 2 | \n", "2 | \n", "4.851149 | \n", "1.242811 | \n", "-0.351815 | \n", "-1.484957 | \n", "-0.758200 | \n", "2.181179 | \n", "-1.843949 | \n", "0.296194 | \n", "1.299142 | \n", "... | \n", "-0.289024 | \n", "-0.282563 | \n", "0.088334 | \n", "0.238624 | \n", "0.327280 | \n", "0.325285 | \n", "-0.704900 | \n", "-0.036388 | \n", "-0.540516 | \n", "0.021711 | \n", "
| 3 | \n", "3 | \n", "-1.771641 | \n", "0.039500 | \n", "-1.358623 | \n", "1.920760 | \n", "-2.550817 | \n", "0.209519 | \n", "-0.756387 | \n", "0.700109 | \n", "-1.408543 | \n", "... | \n", "0.286790 | \n", "0.672251 | \n", "-0.172186 | \n", "-0.518922 | \n", "0.231498 | \n", "-0.074296 | \n", "-0.034287 | \n", "-0.877735 | \n", "0.028065 | \n", "-0.321009 | \n", "
| 4 | \n", "4 | \n", "6.463747 | \n", "1.064473 | \n", "0.209472 | \n", "0.448906 | \n", "-1.555301 | \n", "3.215822 | \n", "-0.946356 | \n", "-0.805204 | \n", "2.112526 | \n", "... | \n", "-0.235585 | \n", "0.019570 | \n", "-0.270189 | \n", "0.375297 | \n", "-0.396732 | \n", "-0.109084 | \n", "0.317305 | \n", "-0.145975 | \n", "-0.674692 | \n", "-0.378458 | \n", "
5 rows × 173 columns
\n", "| \n", " | Id | \n", "SalePrice | \n", "
|---|---|---|
| 0 | \n", "1461 | \n", "133457.50 | \n", "
| 1 | \n", "1462 | \n", "163187.50 | \n", "
| 2 | \n", "1463 | \n", "184355.00 | \n", "
| 3 | \n", "1464 | \n", "190472.40 | \n", "
| 4 | \n", "1465 | \n", "185546.15 | \n", "