{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import sklearn\n", "from sklearn import svm, datasets, metrics\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load in our data\n", "cancer = datasets.load_breast_cancer()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Split our input and target features\n", "x = cancer.data\n", "y = cancer.target\n", "\n", "# Split into train, dev, test sets with 90 / 5 / 5 split\n", "x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n", " x,y,test_size=0.1)\n", "\n", "x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n", " x_test, y_test, test_size=0.5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9642857142857143\n" ] } ], "source": [ "# Setup Support Vector Classifier\n", "classes = cancer.target_names\n", "clf = svm.SVC(kernel='linear', gamma='scale')\n", "clf.fit(x_train, y_train)\n", "\n", "# Make predictions and measure accuracy\n", "y_pred = clf.predict(x_test)\n", "acc = metrics.accuracy_score(y_test, y_pred)\n", "print(acc)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9310344827586207\n", "0.9655172413793104\n", "0.9655172413793104\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.6206896551724138\n", "0.7931034482758621\n", "0.8620689655172413\n", "0.8620689655172413\n", "0.8275862068965517\n", "0.8620689655172413\n", "0.8620689655172413\n", "0.896551724137931\n", "0.9310344827586207\n", "0.9310344827586207\n", "0.6206896551724138\n", "0.6206896551724138\n", "0.6206896551724138\n", "0.5862068965517241\n", "0.5517241379310345\n", "0.4827586206896552\n", "0.4827586206896552\n", "0.4827586206896552\n", "0.4827586206896552\n", "0.4827586206896552\n" ] } ], "source": [ "# Tune parameters of kernel, C, gamma \n", "# Note: use logarithmically scaled values for C\n", "kernels = ['linear','rbf','sigmoid']\n", "C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n", "\n", "best = 0\n", "for kernel in kernels:\n", " for C in C_values:\n", " classes = cancer.target_names\n", " clf = svm.SVC(kernel=kernel, C=C, gamma='scale')\n", " clf.fit(x_train, y_train)\n", " y_pred = clf.predict(x_dev)\n", " acc = metrics.accuracy_score(y_dev, y_pred)\n", " print(acc)\n", " if acc > best:\n", " best = acc\n", " with open('cancerModel.pickle','wb') as f:\n", " pickle.dump(clf,f)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9642857142857143\n" ] } ], "source": [ "# Load in our best model (according to Validation accuracy)\n", "pickle_in = open('cancerModel.pickle','rb')\n", "clf = pickle.load(pickle_in)\n", "y_pred = clf.predict(x_test)\n", "acc = metrics.accuracy_score(y_test, y_pred)\n", "print(acc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 2 }