260 lines
31 KiB
Plaintext
260 lines
31 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib as mpl\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import sklearn.linear_model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get the data\n",
|
|
"country_stats = pd.read_csv('data/country_stats.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Country</th>\n",
|
|
" <th>GDP per capita</th>\n",
|
|
" <th>Life satisfaction</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <td>0</td>\n",
|
|
" <td>South Africa</td>\n",
|
|
" <td>6100.354</td>\n",
|
|
" <td>4.7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>1</td>\n",
|
|
" <td>Turkey</td>\n",
|
|
" <td>8957.894</td>\n",
|
|
" <td>5.5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>2</td>\n",
|
|
" <td>Russia</td>\n",
|
|
" <td>11162.652</td>\n",
|
|
" <td>5.8</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>3</td>\n",
|
|
" <td>Poland</td>\n",
|
|
" <td>14901.547</td>\n",
|
|
" <td>6.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <td>4</td>\n",
|
|
" <td>Hungary</td>\n",
|
|
" <td>17463.284</td>\n",
|
|
" <td>5.6</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Country GDP per capita Life satisfaction\n",
|
|
"0 South Africa 6100.354 4.7\n",
|
|
"1 Turkey 8957.894 5.5\n",
|
|
"2 Russia 11162.652 5.8\n",
|
|
"3 Poland 14901.547 6.1\n",
|
|
"4 Hungary 17463.284 5.6"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"country_stats.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X = np.c_[country_stats[\"GDP per capita\"]]\n",
|
|
"y = np.c_[country_stats[\"Life satisfaction\"]]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<function matplotlib.pyplot.show(*args, **kw)>"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Visualize the data\n",
|
|
"country_stats.plot(kind='scatter', x = \"GDP per capita\", y = \"Life satisfaction\")\n",
|
|
"plt.show"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Here we can see a fairly linear association and as such a linear model will fit decently\n",
|
|
"\n",
|
|
"# Choose model\n",
|
|
"model = sklearn.linear_model.LinearRegression()\n",
|
|
"\n",
|
|
"# Train the model\n",
|
|
"model.fit(X, y)\n",
|
|
"\n",
|
|
"# Predictions\n",
|
|
"y_pred = model.predict(X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.legend.Legend at 0x1db6af81948>"
|
|
]
|
|
},
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# plot with the line of best fit found by our model\n",
|
|
"\n",
|
|
"plt.plot(X, y_pred, 'r', label='Line of Best Fit')\n",
|
|
"plt.scatter(X, y, color='k', s=3.5)\n",
|
|
"plt.ylabel('Life Satisfaction')\n",
|
|
"plt.xlabel('GDP Per Capita')\n",
|
|
"plt.legend(loc='lower right')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'For our line of best fit, the slope is 4.082156611743397e-05 and the intercept is 5.027635740227981'"
|
|
]
|
|
},
|
|
"execution_count": 48,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\"For our line of best fit, the slope is {} and the intercept is {}\".format(model.coef_[0][0], model.intercept_[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.4"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|