In [1]:
import os
import numpy as np
import sklearn
from sklearn import svm, datasets, metrics
import pickle

In [2]:
# Load in our data
cancer = datasets.load_breast_cancer()

In [3]:
# Split our input and target features
x = cancer.data
y = cancer.target

# Split into train, dev, test sets with 90 / 5 / 5 split
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
 x,y,test_size=0.1)

x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(
 x_test, y_test, test_size=0.5)

In [4]:
# Setup Support Vector Classifier
classes = cancer.target_names
clf = svm.SVC(kernel='linear', gamma='scale')
clf.fit(x_train, y_train)

# Make predictions and measure accuracy
y_pred = clf.predict(x_test)
acc = metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9642857142857143


In [5]:
# Tune parameters of kernel, C, gamma 
# Note: use logarithmically scaled values for C
kernels = ['linear','rbf','sigmoid']
C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]

best = 0
for kernel in kernels:
 for C in C_values:
 classes = cancer.target_names
 clf = svm.SVC(kernel=kernel, C=C, gamma='scale')
 clf.fit(x_train, y_train)
 y_pred = clf.predict(x_dev)
 acc = metrics.accuracy_score(y_dev, y_pred)
 print(acc)
 if acc > best:
 best = acc
 with open('cancerModel.pickle','wb') as f:
 pickle.dump(clf,f)

0.9310344827586207
0.9655172413793104
0.9655172413793104
0.9310344827586207
0.9310344827586207
0.9310344827586207
0.9310344827586207
0.9310344827586207
0.9310344827586207
0.9310344827586207
0.6206896551724138
0.7931034482758621
0.8620689655172413
0.8620689655172413
0.8275862068965517
0.8620689655172413
0.8620689655172413
0.896551724137931
0.9310344827586207
0.9310344827586207
0.6206896551724138
0.6206896551724138
0.6206896551724138
0.5862068965517241
0.5517241379310345
0.4827586206896552
0.4827586206896552
0.4827586206896552
0.4827586206896552
0.4827586206896552


In [6]:
# Load in our best model (according to Validation accuracy)
pickle_in = open('cancerModel.pickle','rb')
clf = pickle.load(pickle_in)
y_pred = clf.predict(x_test)
acc = metrics.accuracy_score(y_test, y_pred)
print(acc)

0.9642857142857143
