184 lines
4.4 KiB
Plaintext
184 lines
4.4 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import numpy as np\n",
|
|
"import sklearn\n",
|
|
"from sklearn import svm, datasets, metrics\n",
|
|
"import pickle"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load in our data\n",
|
|
"cancer = datasets.load_breast_cancer()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Split our input and target features\n",
|
|
"x = cancer.data\n",
|
|
"y = cancer.target\n",
|
|
"\n",
|
|
"# Split into train, dev, test sets with 90 / 5 / 5 split\n",
|
|
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
|
|
" x,y,test_size=0.1)\n",
|
|
"\n",
|
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
|
" x_test, y_test, test_size=0.5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.9642857142857143\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Setup Support Vector Classifier\n",
|
|
"classes = cancer.target_names\n",
|
|
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
|
"clf.fit(x_train, y_train)\n",
|
|
"\n",
|
|
"# Make predictions and measure accuracy\n",
|
|
"y_pred = clf.predict(x_test)\n",
|
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
|
"print(acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.9310344827586207\n",
|
|
"0.9655172413793104\n",
|
|
"0.9655172413793104\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.6206896551724138\n",
|
|
"0.7931034482758621\n",
|
|
"0.8620689655172413\n",
|
|
"0.8620689655172413\n",
|
|
"0.8275862068965517\n",
|
|
"0.8620689655172413\n",
|
|
"0.8620689655172413\n",
|
|
"0.896551724137931\n",
|
|
"0.9310344827586207\n",
|
|
"0.9310344827586207\n",
|
|
"0.6206896551724138\n",
|
|
"0.6206896551724138\n",
|
|
"0.6206896551724138\n",
|
|
"0.5862068965517241\n",
|
|
"0.5517241379310345\n",
|
|
"0.4827586206896552\n",
|
|
"0.4827586206896552\n",
|
|
"0.4827586206896552\n",
|
|
"0.4827586206896552\n",
|
|
"0.4827586206896552\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Tune parameters of kernel, C, gamma \n",
|
|
"# Note: use logarithmically scaled values for C\n",
|
|
"kernels = ['linear','rbf','sigmoid']\n",
|
|
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
|
"\n",
|
|
"best = 0\n",
|
|
"for kernel in kernels:\n",
|
|
" for C in C_values:\n",
|
|
" classes = cancer.target_names\n",
|
|
" clf = svm.SVC(kernel=kernel, C=C, gamma='scale')\n",
|
|
" clf.fit(x_train, y_train)\n",
|
|
" y_pred = clf.predict(x_dev)\n",
|
|
" acc = metrics.accuracy_score(y_dev, y_pred)\n",
|
|
" print(acc)\n",
|
|
" if acc > best:\n",
|
|
" best = acc\n",
|
|
" with open('cancerModel.pickle','wb') as f:\n",
|
|
" pickle.dump(clf,f)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"0.9642857142857143\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Load in our best model (according to Validation accuracy)\n",
|
|
"pickle_in = open('cancerModel.pickle','rb')\n",
|
|
"clf = pickle.load(pickle_in)\n",
|
|
"y_pred = clf.predict(x_test)\n",
|
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
|
"print(acc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|