GeronBook/Ch1/.ipynb_checkpoints/cleanup-checkpoint.ipynb

1253 lines
44 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 127,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"BLI = pd.read_csv(\"data\\BLI.csv\")\n",
"WEO = pd.read_csv(\"data\\WEO.xls\", thousands=',', delimiter='\\t', encoding='latin1', na_values='n/a')"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>WEO Country Code</th>\n",
" <th>Country</th>\n",
" <th>Units</th>\n",
" <th>Scale</th>\n",
" <th>2017</th>\n",
" <th>2018</th>\n",
" <th>2019</th>\n",
" <th>2020</th>\n",
" <th>2021</th>\n",
" <th>2022</th>\n",
" <th>2023</th>\n",
" <th>2024</th>\n",
" <th>Estimates Start After</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>512</td>\n",
" <td>Afghanistan</td>\n",
" <td>U.S. dollars</td>\n",
" <td>Units</td>\n",
" <td>569.531</td>\n",
" <td>544.983</td>\n",
" <td>513.108</td>\n",
" <td>509.759</td>\n",
" <td>533.089</td>\n",
" <td>566.416</td>\n",
" <td>602.884</td>\n",
" <td>644.950</td>\n",
" <td>2016.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>914</td>\n",
" <td>Albania</td>\n",
" <td>U.S. dollars</td>\n",
" <td>Units</td>\n",
" <td>4540.459</td>\n",
" <td>5239.212</td>\n",
" <td>5372.742</td>\n",
" <td>5847.056</td>\n",
" <td>6333.425</td>\n",
" <td>6876.566</td>\n",
" <td>7410.754</td>\n",
" <td>7993.468</td>\n",
" <td>2018.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>612</td>\n",
" <td>Algeria</td>\n",
" <td>U.S. dollars</td>\n",
" <td>Units</td>\n",
" <td>4012.134</td>\n",
" <td>4080.913</td>\n",
" <td>3980.118</td>\n",
" <td>4039.101</td>\n",
" <td>4032.707</td>\n",
" <td>4055.391</td>\n",
" <td>4047.724</td>\n",
" <td>3768.013</td>\n",
" <td>2017.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>614</td>\n",
" <td>Angola</td>\n",
" <td>U.S. dollars</td>\n",
" <td>Units</td>\n",
" <td>4303.693</td>\n",
" <td>3620.589</td>\n",
" <td>3037.976</td>\n",
" <td>2867.517</td>\n",
" <td>2897.032</td>\n",
" <td>2953.859</td>\n",
" <td>3045.260</td>\n",
" <td>3120.541</td>\n",
" <td>2017.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>311</td>\n",
" <td>Antigua and Barbuda</td>\n",
" <td>U.S. dollars</td>\n",
" <td>Units</td>\n",
" <td>16089.363</td>\n",
" <td>17464.336</td>\n",
" <td>18109.095</td>\n",
" <td>18887.448</td>\n",
" <td>19544.781</td>\n",
" <td>20124.740</td>\n",
" <td>20721.909</td>\n",
" <td>21336.798</td>\n",
" <td>2011.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" WEO Country Code Country Units Scale 2017 \\\n",
"0 512 Afghanistan U.S. dollars Units 569.531 \n",
"1 914 Albania U.S. dollars Units 4540.459 \n",
"2 612 Algeria U.S. dollars Units 4012.134 \n",
"3 614 Angola U.S. dollars Units 4303.693 \n",
"4 311 Antigua and Barbuda U.S. dollars Units 16089.363 \n",
"\n",
" 2018 2019 2020 2021 2022 2023 \\\n",
"0 544.983 513.108 509.759 533.089 566.416 602.884 \n",
"1 5239.212 5372.742 5847.056 6333.425 6876.566 7410.754 \n",
"2 4080.913 3980.118 4039.101 4032.707 4055.391 4047.724 \n",
"3 3620.589 3037.976 2867.517 2897.032 2953.859 3045.260 \n",
"4 17464.336 18109.095 18887.448 19544.781 20124.740 20721.909 \n",
"\n",
" 2024 Estimates Start After \n",
"0 644.950 2016.0 \n",
"1 7993.468 2018.0 \n",
"2 3768.013 2017.0 \n",
"3 3120.541 2017.0 \n",
"4 21336.798 2011.0 "
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"WEO.head()"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>LOCATION</th>\n",
" <th>Country</th>\n",
" <th>INDICATOR</th>\n",
" <th>Indicator</th>\n",
" <th>MEASURE</th>\n",
" <th>Measure</th>\n",
" <th>INEQUALITY</th>\n",
" <th>Inequality</th>\n",
" <th>Unit Code</th>\n",
" <th>Unit</th>\n",
" <th>PowerCode Code</th>\n",
" <th>PowerCode</th>\n",
" <th>Reference Period Code</th>\n",
" <th>Reference Period</th>\n",
" <th>Value</th>\n",
" <th>Flag Codes</th>\n",
" <th>Flags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>AUS</td>\n",
" <td>Australia</td>\n",
" <td>JE_LMIS</td>\n",
" <td>Labour market insecurity</td>\n",
" <td>L</td>\n",
" <td>Value</td>\n",
" <td>TOT</td>\n",
" <td>Total</td>\n",
" <td>PC</td>\n",
" <td>Percentage</td>\n",
" <td>0</td>\n",
" <td>Units</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>AUT</td>\n",
" <td>Austria</td>\n",
" <td>JE_LMIS</td>\n",
" <td>Labour market insecurity</td>\n",
" <td>L</td>\n",
" <td>Value</td>\n",
" <td>TOT</td>\n",
" <td>Total</td>\n",
" <td>PC</td>\n",
" <td>Percentage</td>\n",
" <td>0</td>\n",
" <td>Units</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>BEL</td>\n",
" <td>Belgium</td>\n",
" <td>JE_LMIS</td>\n",
" <td>Labour market insecurity</td>\n",
" <td>L</td>\n",
" <td>Value</td>\n",
" <td>TOT</td>\n",
" <td>Total</td>\n",
" <td>PC</td>\n",
" <td>Percentage</td>\n",
" <td>0</td>\n",
" <td>Units</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>CAN</td>\n",
" <td>Canada</td>\n",
" <td>JE_LMIS</td>\n",
" <td>Labour market insecurity</td>\n",
" <td>L</td>\n",
" <td>Value</td>\n",
" <td>TOT</td>\n",
" <td>Total</td>\n",
" <td>PC</td>\n",
" <td>Percentage</td>\n",
" <td>0</td>\n",
" <td>Units</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>CZE</td>\n",
" <td>Czech Republic</td>\n",
" <td>JE_LMIS</td>\n",
" <td>Labour market insecurity</td>\n",
" <td>L</td>\n",
" <td>Value</td>\n",
" <td>TOT</td>\n",
" <td>Total</td>\n",
" <td>PC</td>\n",
" <td>Percentage</td>\n",
" <td>0</td>\n",
" <td>Units</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" LOCATION Country INDICATOR Indicator MEASURE \\\n",
"0 AUS Australia JE_LMIS Labour market insecurity L \n",
"1 AUT Austria JE_LMIS Labour market insecurity L \n",
"2 BEL Belgium JE_LMIS Labour market insecurity L \n",
"3 CAN Canada JE_LMIS Labour market insecurity L \n",
"4 CZE Czech Republic JE_LMIS Labour market insecurity L \n",
"\n",
" Measure INEQUALITY Inequality Unit Code Unit PowerCode Code \\\n",
"0 Value TOT Total PC Percentage 0 \n",
"1 Value TOT Total PC Percentage 0 \n",
"2 Value TOT Total PC Percentage 0 \n",
"3 Value TOT Total PC Percentage 0 \n",
"4 Value TOT Total PC Percentage 0 \n",
"\n",
" PowerCode Reference Period Code Reference Period Value Flag Codes Flags \n",
"0 Units NaN NaN 5.4 NaN NaN \n",
"1 Units NaN NaN 3.5 NaN NaN \n",
"2 Units NaN NaN 3.7 NaN NaN \n",
"3 Units NaN NaN 6.0 NaN NaN \n",
"4 Units NaN NaN 3.1 NaN NaN "
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BLI.head()"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [],
"source": [
"BLI = BLI[BLI['INEQUALITY']=='TOT']\n",
"BLI = BLI.pivot(index=\"Country\", columns=\"Indicator\", values=\"Value\")"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Indicator</th>\n",
" <th>Air pollution</th>\n",
" <th>Dwellings without basic facilities</th>\n",
" <th>Educational attainment</th>\n",
" <th>Employees working very long hours</th>\n",
" <th>Employment rate</th>\n",
" <th>Feeling safe walking alone at night</th>\n",
" <th>Homicide rate</th>\n",
" <th>Household net adjusted disposable income</th>\n",
" <th>Household net wealth</th>\n",
" <th>Housing expenditure</th>\n",
" <th>...</th>\n",
" <th>Personal earnings</th>\n",
" <th>Quality of support network</th>\n",
" <th>Rooms per person</th>\n",
" <th>Self-reported health</th>\n",
" <th>Stakeholder engagement for developing regulations</th>\n",
" <th>Student skills</th>\n",
" <th>Time devoted to leisure and personal care</th>\n",
" <th>Voter turnout</th>\n",
" <th>Water quality</th>\n",
" <th>Years in education</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>Australia</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>81.0</td>\n",
" <td>13.04</td>\n",
" <td>73.0</td>\n",
" <td>63.5</td>\n",
" <td>1.1</td>\n",
" <td>32759.0</td>\n",
" <td>427064.0</td>\n",
" <td>20.0</td>\n",
" <td>...</td>\n",
" <td>49126.0</td>\n",
" <td>95.0</td>\n",
" <td>NaN</td>\n",
" <td>85.0</td>\n",
" <td>2.7</td>\n",
" <td>502.0</td>\n",
" <td>14.35</td>\n",
" <td>91.0</td>\n",
" <td>93.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Austria</td>\n",
" <td>16.0</td>\n",
" <td>0.9</td>\n",
" <td>85.0</td>\n",
" <td>6.66</td>\n",
" <td>72.0</td>\n",
" <td>80.6</td>\n",
" <td>0.5</td>\n",
" <td>33541.0</td>\n",
" <td>308325.0</td>\n",
" <td>21.0</td>\n",
" <td>...</td>\n",
" <td>50349.0</td>\n",
" <td>92.0</td>\n",
" <td>1.6</td>\n",
" <td>70.0</td>\n",
" <td>1.3</td>\n",
" <td>492.0</td>\n",
" <td>14.55</td>\n",
" <td>80.0</td>\n",
" <td>92.0</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Belgium</td>\n",
" <td>15.0</td>\n",
" <td>1.9</td>\n",
" <td>77.0</td>\n",
" <td>4.75</td>\n",
" <td>63.0</td>\n",
" <td>70.1</td>\n",
" <td>1.0</td>\n",
" <td>30364.0</td>\n",
" <td>386006.0</td>\n",
" <td>21.0</td>\n",
" <td>...</td>\n",
" <td>49675.0</td>\n",
" <td>91.0</td>\n",
" <td>2.2</td>\n",
" <td>74.0</td>\n",
" <td>2.0</td>\n",
" <td>503.0</td>\n",
" <td>15.70</td>\n",
" <td>89.0</td>\n",
" <td>84.0</td>\n",
" <td>19.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Brazil</td>\n",
" <td>10.0</td>\n",
" <td>6.7</td>\n",
" <td>49.0</td>\n",
" <td>7.13</td>\n",
" <td>61.0</td>\n",
" <td>35.6</td>\n",
" <td>26.7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>90.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.2</td>\n",
" <td>395.0</td>\n",
" <td>NaN</td>\n",
" <td>79.0</td>\n",
" <td>73.0</td>\n",
" <td>16.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Canada</td>\n",
" <td>7.0</td>\n",
" <td>0.2</td>\n",
" <td>91.0</td>\n",
" <td>3.69</td>\n",
" <td>73.0</td>\n",
" <td>82.2</td>\n",
" <td>1.3</td>\n",
" <td>30854.0</td>\n",
" <td>423849.0</td>\n",
" <td>22.0</td>\n",
" <td>...</td>\n",
" <td>47622.0</td>\n",
" <td>93.0</td>\n",
" <td>2.6</td>\n",
" <td>88.0</td>\n",
" <td>2.9</td>\n",
" <td>523.0</td>\n",
" <td>14.56</td>\n",
" <td>68.0</td>\n",
" <td>91.0</td>\n",
" <td>17.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
"Indicator Air pollution Dwellings without basic facilities \\\n",
"Country \n",
"Australia 5.0 NaN \n",
"Austria 16.0 0.9 \n",
"Belgium 15.0 1.9 \n",
"Brazil 10.0 6.7 \n",
"Canada 7.0 0.2 \n",
"\n",
"Indicator Educational attainment Employees working very long hours \\\n",
"Country \n",
"Australia 81.0 13.04 \n",
"Austria 85.0 6.66 \n",
"Belgium 77.0 4.75 \n",
"Brazil 49.0 7.13 \n",
"Canada 91.0 3.69 \n",
"\n",
"Indicator Employment rate Feeling safe walking alone at night \\\n",
"Country \n",
"Australia 73.0 63.5 \n",
"Austria 72.0 80.6 \n",
"Belgium 63.0 70.1 \n",
"Brazil 61.0 35.6 \n",
"Canada 73.0 82.2 \n",
"\n",
"Indicator Homicide rate Household net adjusted disposable income \\\n",
"Country \n",
"Australia 1.1 32759.0 \n",
"Austria 0.5 33541.0 \n",
"Belgium 1.0 30364.0 \n",
"Brazil 26.7 NaN \n",
"Canada 1.3 30854.0 \n",
"\n",
"Indicator Household net wealth Housing expenditure ... Personal earnings \\\n",
"Country ... \n",
"Australia 427064.0 20.0 ... 49126.0 \n",
"Austria 308325.0 21.0 ... 50349.0 \n",
"Belgium 386006.0 21.0 ... 49675.0 \n",
"Brazil NaN NaN ... NaN \n",
"Canada 423849.0 22.0 ... 47622.0 \n",
"\n",
"Indicator Quality of support network Rooms per person Self-reported health \\\n",
"Country \n",
"Australia 95.0 NaN 85.0 \n",
"Austria 92.0 1.6 70.0 \n",
"Belgium 91.0 2.2 74.0 \n",
"Brazil 90.0 NaN NaN \n",
"Canada 93.0 2.6 88.0 \n",
"\n",
"Indicator Stakeholder engagement for developing regulations Student skills \\\n",
"Country \n",
"Australia 2.7 502.0 \n",
"Austria 1.3 492.0 \n",
"Belgium 2.0 503.0 \n",
"Brazil 2.2 395.0 \n",
"Canada 2.9 523.0 \n",
"\n",
"Indicator Time devoted to leisure and personal care Voter turnout \\\n",
"Country \n",
"Australia 14.35 91.0 \n",
"Austria 14.55 80.0 \n",
"Belgium 15.70 89.0 \n",
"Brazil NaN 79.0 \n",
"Canada 14.56 68.0 \n",
"\n",
"Indicator Water quality Years in education \n",
"Country \n",
"Australia 93.0 21.0 \n",
"Austria 92.0 17.0 \n",
"Belgium 84.0 19.3 \n",
"Brazil 73.0 16.2 \n",
"Canada 91.0 17.3 \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"BLI.head()"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"WEO.rename(columns={\"2019\": \"GDP per capita\"}, inplace=True)\n",
"WEO.set_index(\"Country\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"full_country_stats = pd.merge(left=BLI, right=WEO,\n",
" left_index=True, right_index=True)\n",
"full_country_stats.sort_values(by=\"GDP per capita\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Air pollution</th>\n",
" <th>Dwellings without basic facilities</th>\n",
" <th>Educational attainment</th>\n",
" <th>Employees working very long hours</th>\n",
" <th>Employment rate</th>\n",
" <th>Feeling safe walking alone at night</th>\n",
" <th>Homicide rate</th>\n",
" <th>Household net adjusted disposable income</th>\n",
" <th>Household net wealth</th>\n",
" <th>Housing expenditure</th>\n",
" <th>...</th>\n",
" <th>Scale</th>\n",
" <th>2017</th>\n",
" <th>2018</th>\n",
" <th>GDP per capita</th>\n",
" <th>2020</th>\n",
" <th>2021</th>\n",
" <th>2022</th>\n",
" <th>2023</th>\n",
" <th>2024</th>\n",
" <th>Estimates Start After</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>South Africa</td>\n",
" <td>22.0</td>\n",
" <td>37.0</td>\n",
" <td>73.0</td>\n",
" <td>18.12</td>\n",
" <td>43.0</td>\n",
" <td>36.1</td>\n",
" <td>13.7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>18.0</td>\n",
" <td>...</td>\n",
" <td>Units</td>\n",
" <td>6119.887</td>\n",
" <td>6353.846</td>\n",
" <td>6100.354</td>\n",
" <td>6193.171</td>\n",
" <td>6331.797</td>\n",
" <td>6493.317</td>\n",
" <td>6663.568</td>\n",
" <td>6846.991</td>\n",
" <td>2018.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Colombia</td>\n",
" <td>10.0</td>\n",
" <td>23.9</td>\n",
" <td>54.0</td>\n",
" <td>26.56</td>\n",
" <td>67.0</td>\n",
" <td>44.4</td>\n",
" <td>24.5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>17.0</td>\n",
" <td>...</td>\n",
" <td>Units</td>\n",
" <td>6325.486</td>\n",
" <td>6641.507</td>\n",
" <td>6508.127</td>\n",
" <td>6744.007</td>\n",
" <td>7053.528</td>\n",
" <td>7381.726</td>\n",
" <td>7729.015</td>\n",
" <td>8096.644</td>\n",
" <td>2018.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Brazil</td>\n",
" <td>10.0</td>\n",
" <td>6.7</td>\n",
" <td>49.0</td>\n",
" <td>7.13</td>\n",
" <td>61.0</td>\n",
" <td>35.6</td>\n",
" <td>26.7</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>Units</td>\n",
" <td>9926.291</td>\n",
" <td>8958.576</td>\n",
" <td>8796.909</td>\n",
" <td>8955.650</td>\n",
" <td>9344.111</td>\n",
" <td>9737.998</td>\n",
" <td>10167.442</td>\n",
" <td>10606.458</td>\n",
" <td>2016.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Turkey</td>\n",
" <td>20.0</td>\n",
" <td>8.0</td>\n",
" <td>39.0</td>\n",
" <td>32.64</td>\n",
" <td>52.0</td>\n",
" <td>59.8</td>\n",
" <td>1.4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>20.0</td>\n",
" <td>...</td>\n",
" <td>Units</td>\n",
" <td>10551.139</td>\n",
" <td>9405.321</td>\n",
" <td>8957.894</td>\n",
" <td>9683.565</td>\n",
" <td>10635.818</td>\n",
" <td>11373.637</td>\n",
" <td>11901.693</td>\n",
" <td>12489.904</td>\n",
" <td>2018.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Mexico</td>\n",
" <td>16.0</td>\n",
" <td>25.5</td>\n",
" <td>38.0</td>\n",
" <td>28.70</td>\n",
" <td>61.0</td>\n",
" <td>41.8</td>\n",
" <td>18.1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>20.0</td>\n",
" <td>...</td>\n",
" <td>Units</td>\n",
" <td>9366.655</td>\n",
" <td>9796.976</td>\n",
" <td>10118.167</td>\n",
" <td>10405.789</td>\n",
" <td>10767.497</td>\n",
" <td>11150.183</td>\n",
" <td>11563.558</td>\n",
" <td>12007.789</td>\n",
" <td>2018.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 36 columns</p>\n",
"</div>"
],
"text/plain": [
" Air pollution Dwellings without basic facilities \\\n",
"Country \n",
"South Africa 22.0 37.0 \n",
"Colombia 10.0 23.9 \n",
"Brazil 10.0 6.7 \n",
"Turkey 20.0 8.0 \n",
"Mexico 16.0 25.5 \n",
"\n",
" Educational attainment Employees working very long hours \\\n",
"Country \n",
"South Africa 73.0 18.12 \n",
"Colombia 54.0 26.56 \n",
"Brazil 49.0 7.13 \n",
"Turkey 39.0 32.64 \n",
"Mexico 38.0 28.70 \n",
"\n",
" Employment rate Feeling safe walking alone at night \\\n",
"Country \n",
"South Africa 43.0 36.1 \n",
"Colombia 67.0 44.4 \n",
"Brazil 61.0 35.6 \n",
"Turkey 52.0 59.8 \n",
"Mexico 61.0 41.8 \n",
"\n",
" Homicide rate Household net adjusted disposable income \\\n",
"Country \n",
"South Africa 13.7 NaN \n",
"Colombia 24.5 NaN \n",
"Brazil 26.7 NaN \n",
"Turkey 1.4 NaN \n",
"Mexico 18.1 NaN \n",
"\n",
" Household net wealth Housing expenditure ... Scale \\\n",
"Country ... \n",
"South Africa NaN 18.0 ... Units \n",
"Colombia NaN 17.0 ... Units \n",
"Brazil NaN NaN ... Units \n",
"Turkey NaN 20.0 ... Units \n",
"Mexico NaN 20.0 ... Units \n",
"\n",
" 2017 2018 GDP per capita 2020 2021 \\\n",
"Country \n",
"South Africa 6119.887 6353.846 6100.354 6193.171 6331.797 \n",
"Colombia 6325.486 6641.507 6508.127 6744.007 7053.528 \n",
"Brazil 9926.291 8958.576 8796.909 8955.650 9344.111 \n",
"Turkey 10551.139 9405.321 8957.894 9683.565 10635.818 \n",
"Mexico 9366.655 9796.976 10118.167 10405.789 10767.497 \n",
"\n",
" 2022 2023 2024 Estimates Start After \n",
"Country \n",
"South Africa 6493.317 6663.568 6846.991 2018.0 \n",
"Colombia 7381.726 7729.015 8096.644 2018.0 \n",
"Brazil 9737.998 10167.442 10606.458 2016.0 \n",
"Turkey 11373.637 11901.693 12489.904 2018.0 \n",
"Mexico 11150.183 11563.558 12007.789 2018.0 \n",
"\n",
"[5 rows x 36 columns]"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_country_stats.head()"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['South Africa', 'Colombia', 'Brazil', 'Turkey', 'Mexico', 'Russia',\n",
" 'Poland', 'Chile', 'Hungary', 'Latvia', 'Lithuania', 'Slovak Republic',\n",
" 'Greece', 'Portugal', 'Czech Republic', 'Estonia', 'Slovenia', 'Spain',\n",
" 'Korea', 'Italy', 'New Zealand', 'Japan', 'United Kingdom', 'France',\n",
" 'Israel', 'Belgium', 'Canada', 'Germany', 'Finland', 'Austria',\n",
" 'Sweden', 'Netherlands', 'Australia', 'Denmark', 'United States',\n",
" 'Iceland', 'Ireland', 'Norway', 'Switzerland', 'Luxembourg'],\n",
" dtype='object', name='Country')\n"
]
}
],
"source": [
"print(full_country_stats.index)"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GDP per capita</th>\n",
" <th>Life satisfaction</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Country</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>South Africa</td>\n",
" <td>6100.354</td>\n",
" <td>4.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Turkey</td>\n",
" <td>8957.894</td>\n",
" <td>5.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Russia</td>\n",
" <td>11162.652</td>\n",
" <td>5.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Poland</td>\n",
" <td>14901.547</td>\n",
" <td>6.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Hungary</td>\n",
" <td>17463.284</td>\n",
" <td>5.6</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Lithuania</td>\n",
" <td>19266.788</td>\n",
" <td>5.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Slovak Republic</td>\n",
" <td>19547.657</td>\n",
" <td>6.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Greece</td>\n",
" <td>19974.374</td>\n",
" <td>5.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Portugal</td>\n",
" <td>23030.786</td>\n",
" <td>5.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Czech Republic</td>\n",
" <td>23213.954</td>\n",
" <td>6.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Estonia</td>\n",
" <td>23523.596</td>\n",
" <td>5.7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Slovenia</td>\n",
" <td>26170.250</td>\n",
" <td>5.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Spain</td>\n",
" <td>29961.105</td>\n",
" <td>6.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Korea</td>\n",
" <td>31430.598</td>\n",
" <td>5.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Italy</td>\n",
" <td>32946.524</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>New Zealand</td>\n",
" <td>40634.137</td>\n",
" <td>7.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Japan</td>\n",
" <td>40846.777</td>\n",
" <td>5.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>United Kingdom</td>\n",
" <td>41030.232</td>\n",
" <td>6.8</td>\n",
" </tr>\n",
" <tr>\n",
" <td>France</td>\n",
" <td>41760.606</td>\n",
" <td>6.5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Israel</td>\n",
" <td>42823.307</td>\n",
" <td>7.2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Belgium</td>\n",
" <td>45175.585</td>\n",
" <td>6.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Canada</td>\n",
" <td>46212.842</td>\n",
" <td>7.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Germany</td>\n",
" <td>46563.989</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Finland</td>\n",
" <td>48868.742</td>\n",
" <td>7.6</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Austria</td>\n",
" <td>50022.612</td>\n",
" <td>7.1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Sweden</td>\n",
" <td>51241.914</td>\n",
" <td>7.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Netherlands</td>\n",
" <td>52367.849</td>\n",
" <td>7.4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Australia</td>\n",
" <td>53825.164</td>\n",
" <td>7.3</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Denmark</td>\n",
" <td>59795.269</td>\n",
" <td>7.6</td>\n",
" </tr>\n",
" <tr>\n",
" <td>United States</td>\n",
" <td>65111.596</td>\n",
" <td>6.9</td>\n",
" </tr>\n",
" <tr>\n",
" <td>Iceland</td>\n",
" <td>67037.340</td>\n",
" <td>7.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GDP per capita Life satisfaction\n",
"Country \n",
"South Africa 6100.354 4.7\n",
"Turkey 8957.894 5.5\n",
"Russia 11162.652 5.8\n",
"Poland 14901.547 6.1\n",
"Hungary 17463.284 5.6\n",
"Lithuania 19266.788 5.9\n",
"Slovak Republic 19547.657 6.2\n",
"Greece 19974.374 5.4\n",
"Portugal 23030.786 5.4\n",
"Czech Republic 23213.954 6.7\n",
"Estonia 23523.596 5.7\n",
"Slovenia 26170.250 5.9\n",
"Spain 29961.105 6.3\n",
"Korea 31430.598 5.9\n",
"Italy 32946.524 6.0\n",
"New Zealand 40634.137 7.3\n",
"Japan 40846.777 5.9\n",
"United Kingdom 41030.232 6.8\n",
"France 41760.606 6.5\n",
"Israel 42823.307 7.2\n",
"Belgium 45175.585 6.9\n",
"Canada 46212.842 7.4\n",
"Germany 46563.989 7.0\n",
"Finland 48868.742 7.6\n",
"Austria 50022.612 7.1\n",
"Sweden 51241.914 7.3\n",
"Netherlands 52367.849 7.4\n",
"Australia 53825.164 7.3\n",
"Denmark 59795.269 7.6\n",
"United States 65111.596 6.9\n",
"Iceland 67037.340 7.5"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"remove_indices = [9, 1, 2, 4, 7]\n",
"keep_indices = list(set(range(36)) - set(remove_indices))\n",
"clean_data = full_country_stats[[\"GDP per capita\", 'Life satisfaction']].iloc[keep_indices]\n",
"clean_data"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {},
"outputs": [],
"source": [
"clean_data.to_csv('data/country_stats.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}