Clean up code and add comments
This commit is contained in:
parent
b8ba9c81e6
commit
34ea348f9c
|
|
@ -10,7 +10,6 @@
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import sklearn\n",
|
"import sklearn\n",
|
||||||
"from sklearn import svm, datasets, metrics\n",
|
"from sklearn import svm, datasets, metrics\n",
|
||||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
|
||||||
"import pickle"
|
"import pickle"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -18,93 +17,19 @@
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
|
|
||||||
" 'mean smoothness' 'mean compactness' 'mean concavity'\n",
|
|
||||||
" 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
|
|
||||||
" 'radius error' 'texture error' 'perimeter error' 'area error'\n",
|
|
||||||
" 'smoothness error' 'compactness error' 'concavity error'\n",
|
|
||||||
" 'concave points error' 'symmetry error' 'fractal dimension error'\n",
|
|
||||||
" 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
|
|
||||||
" 'worst smoothness' 'worst compactness' 'worst concavity'\n",
|
|
||||||
" 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n",
|
|
||||||
"labels: ['malignant' 'benign']\n",
|
|
||||||
"data: [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]\n",
|
|
||||||
" [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]\n",
|
|
||||||
" [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]\n",
|
|
||||||
" ...\n",
|
|
||||||
" [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]\n",
|
|
||||||
" [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]\n",
|
|
||||||
" [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]\n",
|
|
||||||
"target: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
||||||
" 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0\n",
|
|
||||||
" 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1\n",
|
|
||||||
" 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1\n",
|
|
||||||
" 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0\n",
|
|
||||||
" 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1\n",
|
|
||||||
" 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0\n",
|
|
||||||
" 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|
||||||
" 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1\n",
|
|
||||||
" 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0\n",
|
|
||||||
" 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1\n",
|
|
||||||
" 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1\n",
|
|
||||||
" 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1\n",
|
|
||||||
" 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0\n",
|
|
||||||
" 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|
||||||
" 1 1 1 1 1 1 1 0 0 0 0 0 0 1]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"cancer = datasets.load_breast_cancer()\n",
|
"# Load in our data\n",
|
||||||
"print(\"Features:\", cancer.feature_names)\n",
|
"cancer = datasets.load_breast_cancer()"
|
||||||
"print(\"labels:\", cancer.target_names)\n",
|
|
||||||
"print(\"data:\", cancer.data)\n",
|
|
||||||
"print(\"target:\", cancer.target)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[[1.160e+01 2.449e+01 7.423e+01 4.172e+02 7.474e-02 5.688e-02 1.974e-02\n",
|
|
||||||
" 1.313e-02 1.935e-01 5.878e-02 2.512e-01 1.786e+00 1.961e+00 1.821e+01\n",
|
|
||||||
" 6.122e-03 2.337e-02 1.596e-02 6.998e-03 3.194e-02 2.211e-03 1.244e+01\n",
|
|
||||||
" 3.162e+01 8.139e+01 4.765e+02 9.545e-02 1.361e-01 7.239e-02 4.815e-02\n",
|
|
||||||
" 3.244e-01 6.745e-02]\n",
|
|
||||||
" [1.288e+01 1.822e+01 8.445e+01 4.931e+02 1.218e-01 1.661e-01 4.825e-02\n",
|
|
||||||
" 5.303e-02 1.709e-01 7.253e-02 4.426e-01 1.169e+00 3.176e+00 3.437e+01\n",
|
|
||||||
" 5.273e-03 2.329e-02 1.405e-02 1.244e-02 1.816e-02 3.299e-03 1.505e+01\n",
|
|
||||||
" 2.437e+01 9.931e+01 6.747e+02 1.456e-01 2.961e-01 1.246e-01 1.096e-01\n",
|
|
||||||
" 2.582e-01 8.893e-02]\n",
|
|
||||||
" [1.086e+01 2.148e+01 6.851e+01 3.605e+02 7.431e-02 4.227e-02 0.000e+00\n",
|
|
||||||
" 0.000e+00 1.661e-01 5.948e-02 3.163e-01 1.304e+00 2.115e+00 2.067e+01\n",
|
|
||||||
" 9.579e-03 1.104e-02 0.000e+00 0.000e+00 3.004e-02 2.228e-03 1.166e+01\n",
|
|
||||||
" 2.477e+01 7.408e+01 4.123e+02 1.001e-01 7.348e-02 0.000e+00 0.000e+00\n",
|
|
||||||
" 2.458e-01 6.592e-02]\n",
|
|
||||||
" [2.020e+01 2.683e+01 1.337e+02 1.234e+03 9.905e-02 1.669e-01 1.641e-01\n",
|
|
||||||
" 1.265e-01 1.875e-01 6.020e-02 9.761e-01 1.892e+00 7.128e+00 1.036e+02\n",
|
|
||||||
" 8.439e-03 4.674e-02 5.904e-02 2.536e-02 3.710e-02 4.286e-03 2.419e+01\n",
|
|
||||||
" 3.381e+01 1.600e+02 1.671e+03 1.278e-01 3.416e-01 3.703e-01 2.152e-01\n",
|
|
||||||
" 3.271e-01 7.632e-02]\n",
|
|
||||||
" [2.047e+01 2.067e+01 1.347e+02 1.299e+03 9.156e-02 1.313e-01 1.523e-01\n",
|
|
||||||
" 1.015e-01 2.166e-01 5.419e-02 8.336e-01 1.736e+00 5.168e+00 1.004e+02\n",
|
|
||||||
" 4.938e-03 3.089e-02 4.093e-02 1.699e-02 2.816e-02 2.719e-03 2.323e+01\n",
|
|
||||||
" 2.715e+01 1.520e+02 1.645e+03 1.097e-01 2.534e-01 3.092e-01 1.613e-01\n",
|
|
||||||
" 3.220e-01 6.386e-02]]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Split our input and target features\n",
|
||||||
"x = cancer.data\n",
|
"x = cancer.data\n",
|
||||||
"y = cancer.target\n",
|
"y = cancer.target\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -113,9 +38,7 @@
|
||||||
" x,y,test_size=0.1)\n",
|
" x,y,test_size=0.1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
" x_test, y_test, test_size=0.5)\n",
|
" x_test, y_test, test_size=0.5)"
|
||||||
"\n",
|
|
||||||
"print(x_train[:5])"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -127,15 +50,17 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.9285714285714286\n"
|
"0.9642857142857143\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Setup Support Vector Classifier\n",
|
||||||
"classes = cancer.target_names\n",
|
"classes = cancer.target_names\n",
|
||||||
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
||||||
"clf.fit(x_train, y_train)\n",
|
"clf.fit(x_train, y_train)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Make predictions and measure accuracy\n",
|
||||||
"y_pred = clf.predict(x_test)\n",
|
"y_pred = clf.predict(x_test)\n",
|
||||||
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
"print(acc)"
|
"print(acc)"
|
||||||
|
|
@ -152,45 +77,43 @@
|
||||||
"text": [
|
"text": [
|
||||||
"0.9310344827586207\n",
|
"0.9310344827586207\n",
|
||||||
"0.9655172413793104\n",
|
"0.9655172413793104\n",
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.6896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9655172413793104\n",
|
"0.9655172413793104\n",
|
||||||
"0.9655172413793104\n",
|
"0.9310344827586207\n",
|
||||||
"0.9655172413793104\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.7931034482758621\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8275862068965517\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.5862068965517241\n",
|
||||||
|
"0.5517241379310345\n",
|
||||||
"0.4827586206896552\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n"
|
||||||
"0.41379310344827586\n",
|
|
||||||
"0.41379310344827586\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Tune parameters of kernel, C, gamma \n",
|
"# Tune parameters of kernel, C, gamma \n",
|
||||||
"# Note: use logarithmic scale random values\n",
|
"# Note: use logarithmically scaled values for C\n",
|
||||||
"kernels = ['linear','rbf','sigmoid']\n",
|
"kernels = ['linear','rbf','sigmoid']\n",
|
||||||
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"best = 0\n",
|
"best = 0\n",
|
||||||
"for kernel in kernels:\n",
|
"for kernel in kernels:\n",
|
||||||
" for C in C_values:\n",
|
" for C in C_values:\n",
|
||||||
|
|
@ -203,25 +126,24 @@
|
||||||
" if acc > best:\n",
|
" if acc > best:\n",
|
||||||
" best = acc\n",
|
" best = acc\n",
|
||||||
" with open('cancerModel.pickle','wb') as f:\n",
|
" with open('cancerModel.pickle','wb') as f:\n",
|
||||||
" pickle.dump(clf,f)\n",
|
" pickle.dump(clf,f)"
|
||||||
" \n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.9285714285714286\n"
|
"0.9642857142857143\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Load in our best model (according to Validation accuracy)\n",
|
||||||
"pickle_in = open('cancerModel.pickle','rb')\n",
|
"pickle_in = open('cancerModel.pickle','rb')\n",
|
||||||
"clf = pickle.load(pickle_in)\n",
|
"clf = pickle.load(pickle_in)\n",
|
||||||
"y_pred = clf.predict(x_test)\n",
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
|
@ -253,7 +175,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.4"
|
"version": "3.7.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import sklearn\n",
|
"import sklearn\n",
|
||||||
"from sklearn import svm, datasets, metrics\n",
|
"from sklearn import svm, datasets, metrics\n",
|
||||||
"from sklearn.neighbors import KNeighborsClassifier\n",
|
|
||||||
"import pickle"
|
"import pickle"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
@ -18,93 +17,19 @@
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
|
|
||||||
" 'mean smoothness' 'mean compactness' 'mean concavity'\n",
|
|
||||||
" 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
|
|
||||||
" 'radius error' 'texture error' 'perimeter error' 'area error'\n",
|
|
||||||
" 'smoothness error' 'compactness error' 'concavity error'\n",
|
|
||||||
" 'concave points error' 'symmetry error' 'fractal dimension error'\n",
|
|
||||||
" 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
|
|
||||||
" 'worst smoothness' 'worst compactness' 'worst concavity'\n",
|
|
||||||
" 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n",
|
|
||||||
"labels: ['malignant' 'benign']\n",
|
|
||||||
"data: [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]\n",
|
|
||||||
" [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]\n",
|
|
||||||
" [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]\n",
|
|
||||||
" ...\n",
|
|
||||||
" [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]\n",
|
|
||||||
" [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]\n",
|
|
||||||
" [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]\n",
|
|
||||||
"target: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
||||||
" 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0\n",
|
|
||||||
" 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1\n",
|
|
||||||
" 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1\n",
|
|
||||||
" 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0\n",
|
|
||||||
" 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1\n",
|
|
||||||
" 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0\n",
|
|
||||||
" 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|
||||||
" 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1\n",
|
|
||||||
" 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0\n",
|
|
||||||
" 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1\n",
|
|
||||||
" 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1\n",
|
|
||||||
" 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1\n",
|
|
||||||
" 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0\n",
|
|
||||||
" 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|
||||||
" 1 1 1 1 1 1 1 0 0 0 0 0 0 1]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"cancer = datasets.load_breast_cancer()\n",
|
"# Load in our data\n",
|
||||||
"print(\"Features:\", cancer.feature_names)\n",
|
"cancer = datasets.load_breast_cancer()"
|
||||||
"print(\"labels:\", cancer.target_names)\n",
|
|
||||||
"print(\"data:\", cancer.data)\n",
|
|
||||||
"print(\"target:\", cancer.target)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[[1.160e+01 2.449e+01 7.423e+01 4.172e+02 7.474e-02 5.688e-02 1.974e-02\n",
|
|
||||||
" 1.313e-02 1.935e-01 5.878e-02 2.512e-01 1.786e+00 1.961e+00 1.821e+01\n",
|
|
||||||
" 6.122e-03 2.337e-02 1.596e-02 6.998e-03 3.194e-02 2.211e-03 1.244e+01\n",
|
|
||||||
" 3.162e+01 8.139e+01 4.765e+02 9.545e-02 1.361e-01 7.239e-02 4.815e-02\n",
|
|
||||||
" 3.244e-01 6.745e-02]\n",
|
|
||||||
" [1.288e+01 1.822e+01 8.445e+01 4.931e+02 1.218e-01 1.661e-01 4.825e-02\n",
|
|
||||||
" 5.303e-02 1.709e-01 7.253e-02 4.426e-01 1.169e+00 3.176e+00 3.437e+01\n",
|
|
||||||
" 5.273e-03 2.329e-02 1.405e-02 1.244e-02 1.816e-02 3.299e-03 1.505e+01\n",
|
|
||||||
" 2.437e+01 9.931e+01 6.747e+02 1.456e-01 2.961e-01 1.246e-01 1.096e-01\n",
|
|
||||||
" 2.582e-01 8.893e-02]\n",
|
|
||||||
" [1.086e+01 2.148e+01 6.851e+01 3.605e+02 7.431e-02 4.227e-02 0.000e+00\n",
|
|
||||||
" 0.000e+00 1.661e-01 5.948e-02 3.163e-01 1.304e+00 2.115e+00 2.067e+01\n",
|
|
||||||
" 9.579e-03 1.104e-02 0.000e+00 0.000e+00 3.004e-02 2.228e-03 1.166e+01\n",
|
|
||||||
" 2.477e+01 7.408e+01 4.123e+02 1.001e-01 7.348e-02 0.000e+00 0.000e+00\n",
|
|
||||||
" 2.458e-01 6.592e-02]\n",
|
|
||||||
" [2.020e+01 2.683e+01 1.337e+02 1.234e+03 9.905e-02 1.669e-01 1.641e-01\n",
|
|
||||||
" 1.265e-01 1.875e-01 6.020e-02 9.761e-01 1.892e+00 7.128e+00 1.036e+02\n",
|
|
||||||
" 8.439e-03 4.674e-02 5.904e-02 2.536e-02 3.710e-02 4.286e-03 2.419e+01\n",
|
|
||||||
" 3.381e+01 1.600e+02 1.671e+03 1.278e-01 3.416e-01 3.703e-01 2.152e-01\n",
|
|
||||||
" 3.271e-01 7.632e-02]\n",
|
|
||||||
" [2.047e+01 2.067e+01 1.347e+02 1.299e+03 9.156e-02 1.313e-01 1.523e-01\n",
|
|
||||||
" 1.015e-01 2.166e-01 5.419e-02 8.336e-01 1.736e+00 5.168e+00 1.004e+02\n",
|
|
||||||
" 4.938e-03 3.089e-02 4.093e-02 1.699e-02 2.816e-02 2.719e-03 2.323e+01\n",
|
|
||||||
" 2.715e+01 1.520e+02 1.645e+03 1.097e-01 2.534e-01 3.092e-01 1.613e-01\n",
|
|
||||||
" 3.220e-01 6.386e-02]]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Split our input and target features\n",
|
||||||
"x = cancer.data\n",
|
"x = cancer.data\n",
|
||||||
"y = cancer.target\n",
|
"y = cancer.target\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -113,9 +38,7 @@
|
||||||
" x,y,test_size=0.1)\n",
|
" x,y,test_size=0.1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
" x_test, y_test, test_size=0.5)\n",
|
" x_test, y_test, test_size=0.5)"
|
||||||
"\n",
|
|
||||||
"print(x_train[:5])"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -127,15 +50,17 @@
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.9285714285714286\n"
|
"0.9642857142857143\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Setup Support Vector Classifier\n",
|
||||||
"classes = cancer.target_names\n",
|
"classes = cancer.target_names\n",
|
||||||
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
||||||
"clf.fit(x_train, y_train)\n",
|
"clf.fit(x_train, y_train)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Make predictions and measure accuracy\n",
|
||||||
"y_pred = clf.predict(x_test)\n",
|
"y_pred = clf.predict(x_test)\n",
|
||||||
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
"print(acc)"
|
"print(acc)"
|
||||||
|
|
@ -152,45 +77,43 @@
|
||||||
"text": [
|
"text": [
|
||||||
"0.9310344827586207\n",
|
"0.9310344827586207\n",
|
||||||
"0.9655172413793104\n",
|
"0.9655172413793104\n",
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.6896551724137931\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.896551724137931\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9310344827586207\n",
|
|
||||||
"0.9655172413793104\n",
|
"0.9655172413793104\n",
|
||||||
"0.9655172413793104\n",
|
"0.9310344827586207\n",
|
||||||
"0.9655172413793104\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
"0.6896551724137931\n",
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.7931034482758621\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8275862068965517\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.8620689655172413\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.6206896551724138\n",
|
||||||
|
"0.5862068965517241\n",
|
||||||
|
"0.5517241379310345\n",
|
||||||
"0.4827586206896552\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n",
|
||||||
"0.41379310344827586\n",
|
"0.4827586206896552\n"
|
||||||
"0.41379310344827586\n",
|
|
||||||
"0.41379310344827586\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# Tune parameters of kernel, C, gamma \n",
|
"# Tune parameters of kernel, C, gamma \n",
|
||||||
"# Note: use logarithmic scale random values\n",
|
"# Note: use logarithmically scaled values for C\n",
|
||||||
"kernels = ['linear','rbf','sigmoid']\n",
|
"kernels = ['linear','rbf','sigmoid']\n",
|
||||||
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"best = 0\n",
|
"best = 0\n",
|
||||||
"for kernel in kernels:\n",
|
"for kernel in kernels:\n",
|
||||||
" for C in C_values:\n",
|
" for C in C_values:\n",
|
||||||
|
|
@ -203,25 +126,24 @@
|
||||||
" if acc > best:\n",
|
" if acc > best:\n",
|
||||||
" best = acc\n",
|
" best = acc\n",
|
||||||
" with open('cancerModel.pickle','wb') as f:\n",
|
" with open('cancerModel.pickle','wb') as f:\n",
|
||||||
" pickle.dump(clf,f)\n",
|
" pickle.dump(clf,f)"
|
||||||
" \n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.9285714285714286\n"
|
"0.9642857142857143\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# Load in our best model (according to Validation accuracy)\n",
|
||||||
"pickle_in = open('cancerModel.pickle','rb')\n",
|
"pickle_in = open('cancerModel.pickle','rb')\n",
|
||||||
"clf = pickle.load(pickle_in)\n",
|
"clf = pickle.load(pickle_in)\n",
|
||||||
"y_pred = clf.predict(x_test)\n",
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
|
@ -253,7 +175,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.4"
|
"version": "3.7.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,145 @@
|
||||||
|
Month,#Passengers
|
||||||
|
1949-01,112
|
||||||
|
1949-02,118
|
||||||
|
1949-03,132
|
||||||
|
1949-04,129
|
||||||
|
1949-05,121
|
||||||
|
1949-06,135
|
||||||
|
1949-07,148
|
||||||
|
1949-08,148
|
||||||
|
1949-09,136
|
||||||
|
1949-10,119
|
||||||
|
1949-11,104
|
||||||
|
1949-12,118
|
||||||
|
1950-01,115
|
||||||
|
1950-02,126
|
||||||
|
1950-03,141
|
||||||
|
1950-04,135
|
||||||
|
1950-05,125
|
||||||
|
1950-06,149
|
||||||
|
1950-07,170
|
||||||
|
1950-08,170
|
||||||
|
1950-09,158
|
||||||
|
1950-10,133
|
||||||
|
1950-11,114
|
||||||
|
1950-12,140
|
||||||
|
1951-01,145
|
||||||
|
1951-02,150
|
||||||
|
1951-03,178
|
||||||
|
1951-04,163
|
||||||
|
1951-05,172
|
||||||
|
1951-06,178
|
||||||
|
1951-07,199
|
||||||
|
1951-08,199
|
||||||
|
1951-09,184
|
||||||
|
1951-10,162
|
||||||
|
1951-11,146
|
||||||
|
1951-12,166
|
||||||
|
1952-01,171
|
||||||
|
1952-02,180
|
||||||
|
1952-03,193
|
||||||
|
1952-04,181
|
||||||
|
1952-05,183
|
||||||
|
1952-06,218
|
||||||
|
1952-07,230
|
||||||
|
1952-08,242
|
||||||
|
1952-09,209
|
||||||
|
1952-10,191
|
||||||
|
1952-11,172
|
||||||
|
1952-12,194
|
||||||
|
1953-01,196
|
||||||
|
1953-02,196
|
||||||
|
1953-03,236
|
||||||
|
1953-04,235
|
||||||
|
1953-05,229
|
||||||
|
1953-06,243
|
||||||
|
1953-07,264
|
||||||
|
1953-08,272
|
||||||
|
1953-09,237
|
||||||
|
1953-10,211
|
||||||
|
1953-11,180
|
||||||
|
1953-12,201
|
||||||
|
1954-01,204
|
||||||
|
1954-02,188
|
||||||
|
1954-03,235
|
||||||
|
1954-04,227
|
||||||
|
1954-05,234
|
||||||
|
1954-06,264
|
||||||
|
1954-07,302
|
||||||
|
1954-08,293
|
||||||
|
1954-09,259
|
||||||
|
1954-10,229
|
||||||
|
1954-11,203
|
||||||
|
1954-12,229
|
||||||
|
1955-01,242
|
||||||
|
1955-02,233
|
||||||
|
1955-03,267
|
||||||
|
1955-04,269
|
||||||
|
1955-05,270
|
||||||
|
1955-06,315
|
||||||
|
1955-07,364
|
||||||
|
1955-08,347
|
||||||
|
1955-09,312
|
||||||
|
1955-10,274
|
||||||
|
1955-11,237
|
||||||
|
1955-12,278
|
||||||
|
1956-01,284
|
||||||
|
1956-02,277
|
||||||
|
1956-03,317
|
||||||
|
1956-04,313
|
||||||
|
1956-05,318
|
||||||
|
1956-06,374
|
||||||
|
1956-07,413
|
||||||
|
1956-08,405
|
||||||
|
1956-09,355
|
||||||
|
1956-10,306
|
||||||
|
1956-11,271
|
||||||
|
1956-12,306
|
||||||
|
1957-01,315
|
||||||
|
1957-02,301
|
||||||
|
1957-03,356
|
||||||
|
1957-04,348
|
||||||
|
1957-05,355
|
||||||
|
1957-06,422
|
||||||
|
1957-07,465
|
||||||
|
1957-08,467
|
||||||
|
1957-09,404
|
||||||
|
1957-10,347
|
||||||
|
1957-11,305
|
||||||
|
1957-12,336
|
||||||
|
1958-01,340
|
||||||
|
1958-02,318
|
||||||
|
1958-03,362
|
||||||
|
1958-04,348
|
||||||
|
1958-05,363
|
||||||
|
1958-06,435
|
||||||
|
1958-07,491
|
||||||
|
1958-08,505
|
||||||
|
1958-09,404
|
||||||
|
1958-10,359
|
||||||
|
1958-11,310
|
||||||
|
1958-12,337
|
||||||
|
1959-01,360
|
||||||
|
1959-02,342
|
||||||
|
1959-03,406
|
||||||
|
1959-04,396
|
||||||
|
1959-05,420
|
||||||
|
1959-06,472
|
||||||
|
1959-07,548
|
||||||
|
1959-08,559
|
||||||
|
1959-09,463
|
||||||
|
1959-10,407
|
||||||
|
1959-11,362
|
||||||
|
1959-12,405
|
||||||
|
1960-01,417
|
||||||
|
1960-02,391
|
||||||
|
1960-03,419
|
||||||
|
1960-04,461
|
||||||
|
1960-05,472
|
||||||
|
1960-06,535
|
||||||
|
1960-07,622
|
||||||
|
1960-08,606
|
||||||
|
1960-09,508
|
||||||
|
1960-10,461
|
||||||
|
1960-11,390
|
||||||
|
1960-12,432
|
||||||
|
|
|
@ -0,0 +1,145 @@
|
||||||
|
Month,#Passengers
|
||||||
|
1949-01,112
|
||||||
|
1949-02,118
|
||||||
|
1949-03,132
|
||||||
|
1949-04,129
|
||||||
|
1949-05,121
|
||||||
|
1949-06,135
|
||||||
|
1949-07,148
|
||||||
|
1949-08,148
|
||||||
|
1949-09,136
|
||||||
|
1949-10,119
|
||||||
|
1949-11,104
|
||||||
|
1949-12,118
|
||||||
|
1950-01,115
|
||||||
|
1950-02,126
|
||||||
|
1950-03,141
|
||||||
|
1950-04,135
|
||||||
|
1950-05,125
|
||||||
|
1950-06,149
|
||||||
|
1950-07,170
|
||||||
|
1950-08,170
|
||||||
|
1950-09,158
|
||||||
|
1950-10,133
|
||||||
|
1950-11,114
|
||||||
|
1950-12,140
|
||||||
|
1951-01,145
|
||||||
|
1951-02,150
|
||||||
|
1951-03,178
|
||||||
|
1951-04,163
|
||||||
|
1951-05,172
|
||||||
|
1951-06,178
|
||||||
|
1951-07,199
|
||||||
|
1951-08,199
|
||||||
|
1951-09,184
|
||||||
|
1951-10,162
|
||||||
|
1951-11,146
|
||||||
|
1951-12,166
|
||||||
|
1952-01,171
|
||||||
|
1952-02,180
|
||||||
|
1952-03,193
|
||||||
|
1952-04,181
|
||||||
|
1952-05,183
|
||||||
|
1952-06,218
|
||||||
|
1952-07,230
|
||||||
|
1952-08,242
|
||||||
|
1952-09,209
|
||||||
|
1952-10,191
|
||||||
|
1952-11,172
|
||||||
|
1952-12,194
|
||||||
|
1953-01,196
|
||||||
|
1953-02,196
|
||||||
|
1953-03,236
|
||||||
|
1953-04,235
|
||||||
|
1953-05,229
|
||||||
|
1953-06,243
|
||||||
|
1953-07,264
|
||||||
|
1953-08,272
|
||||||
|
1953-09,237
|
||||||
|
1953-10,211
|
||||||
|
1953-11,180
|
||||||
|
1953-12,201
|
||||||
|
1954-01,204
|
||||||
|
1954-02,188
|
||||||
|
1954-03,235
|
||||||
|
1954-04,227
|
||||||
|
1954-05,234
|
||||||
|
1954-06,264
|
||||||
|
1954-07,302
|
||||||
|
1954-08,293
|
||||||
|
1954-09,259
|
||||||
|
1954-10,229
|
||||||
|
1954-11,203
|
||||||
|
1954-12,229
|
||||||
|
1955-01,242
|
||||||
|
1955-02,233
|
||||||
|
1955-03,267
|
||||||
|
1955-04,269
|
||||||
|
1955-05,270
|
||||||
|
1955-06,315
|
||||||
|
1955-07,364
|
||||||
|
1955-08,347
|
||||||
|
1955-09,312
|
||||||
|
1955-10,274
|
||||||
|
1955-11,237
|
||||||
|
1955-12,278
|
||||||
|
1956-01,284
|
||||||
|
1956-02,277
|
||||||
|
1956-03,317
|
||||||
|
1956-04,313
|
||||||
|
1956-05,318
|
||||||
|
1956-06,374
|
||||||
|
1956-07,413
|
||||||
|
1956-08,405
|
||||||
|
1956-09,355
|
||||||
|
1956-10,306
|
||||||
|
1956-11,271
|
||||||
|
1956-12,306
|
||||||
|
1957-01,315
|
||||||
|
1957-02,301
|
||||||
|
1957-03,356
|
||||||
|
1957-04,348
|
||||||
|
1957-05,355
|
||||||
|
1957-06,422
|
||||||
|
1957-07,465
|
||||||
|
1957-08,467
|
||||||
|
1957-09,404
|
||||||
|
1957-10,347
|
||||||
|
1957-11,305
|
||||||
|
1957-12,336
|
||||||
|
1958-01,340
|
||||||
|
1958-02,318
|
||||||
|
1958-03,362
|
||||||
|
1958-04,348
|
||||||
|
1958-05,363
|
||||||
|
1958-06,435
|
||||||
|
1958-07,491
|
||||||
|
1958-08,505
|
||||||
|
1958-09,404
|
||||||
|
1958-10,359
|
||||||
|
1958-11,310
|
||||||
|
1958-12,337
|
||||||
|
1959-01,360
|
||||||
|
1959-02,342
|
||||||
|
1959-03,406
|
||||||
|
1959-04,396
|
||||||
|
1959-05,420
|
||||||
|
1959-06,472
|
||||||
|
1959-07,548
|
||||||
|
1959-08,559
|
||||||
|
1959-09,463
|
||||||
|
1959-10,407
|
||||||
|
1959-11,362
|
||||||
|
1959-12,405
|
||||||
|
1960-01,417
|
||||||
|
1960-02,391
|
||||||
|
1960-03,419
|
||||||
|
1960-04,461
|
||||||
|
1960-05,472
|
||||||
|
1960-06,535
|
||||||
|
1960-07,622
|
||||||
|
1960-08,606
|
||||||
|
1960-09,508
|
||||||
|
1960-10,461
|
||||||
|
1960-11,390
|
||||||
|
1960-12,432
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue