{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import sklearn\n",
    "import sklearn.model_selection\n",
    "from sklearn import metrics, preprocessing\n",
    "import pickle\n",
    "import math\n",
    "import tensorflow as tf\n",
    "from os import path, getcwd, chdir"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create dataframes from our csv files and set indeces\n",
    "\n",
    "df = pd.read_csv('data/train.csv')\n",
    "df.set_index('PassengerId', inplace=True)\n",
    "\n",
    "testdf = pd.read_csv('data/test.csv')\n",
    "PassengerId = testdf['PassengerId']\n",
    "testdf.set_index('PassengerId', inplace=True)\n",
    "\n",
    "data = [df, testdf]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<bound method NDFrame.head of              Survived  Pclass  \\\n",
      "PassengerId                     \n",
      "1                   0       3   \n",
      "2                   1       1   \n",
      "3                   1       3   \n",
      "4                   1       1   \n",
      "5                   0       3   \n",
      "...               ...     ...   \n",
      "887                 0       2   \n",
      "888                 1       1   \n",
      "889                 0       3   \n",
      "890                 1       1   \n",
      "891                 0       3   \n",
      "\n",
      "                                                          Name  Sex  \\\n",
      "PassengerId                                                           \n",
      "1                                      Braund, Mr. Owen Harris    1   \n",
      "2            Cumings, Mrs. John Bradley (Florence Briggs Th...    0   \n",
      "3                                       Heikkinen, Miss. Laina    0   \n",
      "4                 Futrelle, Mrs. Jacques Heath (Lily May Peel)    0   \n",
      "5                                     Allen, Mr. William Henry    1   \n",
      "...                                                        ...  ...   \n",
      "887                                      Montvila, Rev. Juozas    1   \n",
      "888                               Graham, Miss. Margaret Edith    0   \n",
      "889                   Johnston, Miss. Catherine Helen \"Carrie\"    0   \n",
      "890                                      Behr, Mr. Karl Howell    1   \n",
      "891                                        Dooley, Mr. Patrick    1   \n",
      "\n",
      "                   Age  SibSp  Parch            Ticket     Fare  Cabin  \\\n",
      "PassengerId                                                              \n",
      "1            22.000000      1      0         A/5 21171   7.2500    147   \n",
      "2            38.000000      1      0          PC 17599  71.2833     81   \n",
      "3            26.000000      0      0  STON/O2. 3101282   7.9250    147   \n",
      "4            35.000000      1      0            113803  53.1000     55   \n",
      "5            35.000000      0      0            373450   8.0500    147   \n",
      "...                ...    ...    ...               ...      ...    ...   \n",
      "887          27.000000      0      0            211536  13.0000    147   \n",
      "888          19.000000      0      0            112053  30.0000     30   \n",
      "889          29.699118      1      2        W./C. 6607  23.4500    147   \n",
      "890          26.000000      0      0            111369  30.0000     60   \n",
      "891          32.000000      0      0            370376   7.7500    147   \n",
      "\n",
      "             Embarked  \n",
      "PassengerId            \n",
      "1                   2  \n",
      "2                   0  \n",
      "3                   2  \n",
      "4                   2  \n",
      "5                   2  \n",
      "...               ...  \n",
      "887                 2  \n",
      "888                 2  \n",
      "889                 2  \n",
      "890                 0  \n",
      "891                 1  \n",
      "\n",
      "[891 rows x 11 columns]>\n"
     ]
    }
   ],
   "source": [
    "# Preprocess the data by converting non numerical features into numerical categorical features \n",
    "# and applying mean imputation to deal with NaN values\n",
    "\n",
    "for dataframe in data:\n",
    "    le = preprocessing.LabelEncoder()\n",
    "    dataframe[\"Sex\"] = le.fit_transform(list(dataframe[\"Sex\"]))\n",
    "    dataframe[\"Cabin\"] = le.fit_transform(list(dataframe[\"Cabin\"]))\n",
    "    dataframe[\"Embarked\"] = le.fit_transform(list(dataframe[\"Embarked\"]))\n",
    "    dataframe.fillna(dataframe.mean(), inplace=True)\n",
    "    \n",
    "print(df.head)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<bound method NDFrame.head of              Pclass  Sex        Age  SibSp  Parch     Fare  Cabin  Embarked\n",
      "PassengerId                                                                \n",
      "1                 3    1  22.000000      1      0   7.2500    147         2\n",
      "2                 1    0  38.000000      1      0  71.2833     81         0\n",
      "3                 3    0  26.000000      0      0   7.9250    147         2\n",
      "4                 1    0  35.000000      1      0  53.1000     55         2\n",
      "5                 3    1  35.000000      0      0   8.0500    147         2\n",
      "...             ...  ...        ...    ...    ...      ...    ...       ...\n",
      "887               2    1  27.000000      0      0  13.0000    147         2\n",
      "888               1    0  19.000000      0      0  30.0000     30         2\n",
      "889               3    0  29.699118      1      2  23.4500    147         2\n",
      "890               1    1  26.000000      0      0  30.0000     60         0\n",
      "891               3    1  32.000000      0      0   7.7500    147         1\n",
      "\n",
      "[891 rows x 8 columns]>\n"
     ]
    }
   ],
   "source": [
    "# Create our input matrix, label vector, and test input matrix\n",
    "\n",
    "X = df.drop(['Name', 'Survived', 'Ticket'], axis=1)\n",
    "y = df['Survived']\n",
    "X_test = testdf.drop(['Name', 'Ticket'], axis=1)\n",
    "print(X.head)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Normalize the data\n",
    "\n",
    "X=(X-X.mean())/X.std()\n",
    "X_test=(X_test-X_test.mean())/X_test.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup our data to be fed into our model\n",
    "\n",
    "dataset = tf.data.Dataset.from_tensor_slices((X.values, y.values))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:Layer sequential_11 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
      "\n",
      "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
      "\n",
      "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
      "\n",
      "Epoch 1/10\n",
      "891/891 [==============================] - 5s 5ms/step - loss: 0.4820 - accuracy: 0.7778\n",
      "Epoch 2/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4380 - accuracy: 0.8148\n",
      "Epoch 3/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4248 - accuracy: 0.8148\n",
      "Epoch 4/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4261 - accuracy: 0.8148\n",
      "Epoch 5/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4186 - accuracy: 0.8283\n",
      "Epoch 6/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4074 - accuracy: 0.8182\n",
      "Epoch 7/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4068 - accuracy: 0.8238\n",
      "Epoch 8/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.4059 - accuracy: 0.8249\n",
      "Epoch 9/10\n",
      "891/891 [==============================] - 2s 2ms/step - loss: 0.3979 - accuracy: 0.8272\n",
      "Epoch 10/10\n",
      "891/891 [==============================] - 3s 4ms/step - loss: 0.3924 - accuracy: 0.8272\n"
     ]
    }
   ],
   "source": [
    "# Setup our model\n",
    "\n",
    "model = tf.keras.models.Sequential([\n",
    "    # Flatten out our input\n",
    "    tf.keras.layers.Flatten(),\n",
    "    \n",
    "    # Setup our first layer\n",
    "    tf.keras.layers.Dense(1024, activation=tf.nn.relu),\n",
    "    \n",
    "    # Setup output layer\n",
    "    tf.keras.layers.Dense(2, activation=tf.nn.softmax)\n",
    "])\n",
    "\n",
    "# Compile our model\n",
    "model.compile(optimizer='adam', \n",
    "              loss = 'sparse_categorical_crossentropy', \n",
    "             metrics=['accuracy'])\n",
    "\n",
    "# Fit model\n",
    "history = model.fit(\n",
    "    train_dataset, \n",
    "    epochs=10\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<bound method NDFrame.head of      PassengerId  Survived\n",
      "0            892         0\n",
      "1            893         0\n",
      "2            894         0\n",
      "3            895         0\n",
      "4            896         0\n",
      "..           ...       ...\n",
      "413         1305         0\n",
      "414         1306         1\n",
      "415         1307         0\n",
      "416         1308         0\n",
      "417         1309         0\n",
      "\n",
      "[418 rows x 2 columns]>\n"
     ]
    }
   ],
   "source": [
    "test_pred = model.predict(X_test.values) # Note that we need to feed our model the values or our dataframe X_test\n",
    "predictions = np.c_[PassengerId, np.argmax(test_pred, axis=1)]\n",
    "submission = pd.DataFrame(predictions, columns = ['PassengerId', 'Survived'])\n",
    "print(submission.head)\n",
    "submission.to_csv(\"submissions/NNSubmission.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}