MLProjects/breastCancerML/.ipynb_checkpoints/breastCancerML-checkpoint.i...

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import sklearn\n",
    "from sklearn import svm, datasets, metrics\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load in our data\n",
    "cancer = datasets.load_breast_cancer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split our input and target features\n",
    "x = cancer.data\n",
    "y = cancer.target\n",
    "\n",
    "# Split into train, dev, test sets with 90 / 5 / 5 split\n",
    "x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
    "    x,y,test_size=0.1)\n",
    "\n",
    "x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
    "    x_test, y_test, test_size=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9642857142857143\n"
     ]
    }
   ],
   "source": [
    "# Setup Support Vector Classifier\n",
    "classes = cancer.target_names\n",
    "clf = svm.SVC(kernel='linear', gamma='scale')\n",
    "clf.fit(x_train, y_train)\n",
    "\n",
    "# Make predictions and measure accuracy\n",
    "y_pred = clf.predict(x_test)\n",
    "acc = metrics.accuracy_score(y_test, y_pred)\n",
    "print(acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9310344827586207\n",
      "0.9655172413793104\n",
      "0.9655172413793104\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.6206896551724138\n",
      "0.7931034482758621\n",
      "0.8620689655172413\n",
      "0.8620689655172413\n",
      "0.8275862068965517\n",
      "0.8620689655172413\n",
      "0.8620689655172413\n",
      "0.896551724137931\n",
      "0.9310344827586207\n",
      "0.9310344827586207\n",
      "0.6206896551724138\n",
      "0.6206896551724138\n",
      "0.6206896551724138\n",
      "0.5862068965517241\n",
      "0.5517241379310345\n",
      "0.4827586206896552\n",
      "0.4827586206896552\n",
      "0.4827586206896552\n",
      "0.4827586206896552\n",
      "0.4827586206896552\n"
     ]
    }
   ],
   "source": [
    "# Tune parameters of kernel, C, gamma \n",
    "# Note: use logarithmically scaled values for C\n",
    "kernels = ['linear','rbf','sigmoid']\n",
    "C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
    "\n",
    "best = 0\n",
    "for kernel in kernels:\n",
    "    for C in C_values:\n",
    "        classes = cancer.target_names\n",
    "        clf = svm.SVC(kernel=kernel, C=C, gamma='scale')\n",
    "        clf.fit(x_train, y_train)\n",
    "        y_pred = clf.predict(x_dev)\n",
    "        acc = metrics.accuracy_score(y_dev, y_pred)\n",
    "        print(acc)\n",
    "        if acc > best:\n",
    "            best = acc\n",
    "            with open('cancerModel.pickle','wb') as f:\n",
    "                pickle.dump(clf,f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9642857142857143\n"
     ]
    }
   ],
   "source": [
    "# Load in our best model (according to Validation accuracy)\n",
    "pickle_in = open('cancerModel.pickle','rb')\n",
    "clf = pickle.load(pickle_in)\n",
    "y_pred = clf.predict(x_test)\n",
    "acc = metrics.accuracy_score(y_test, y_pred)\n",
    "print(acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}