Add breastCancerML, carML, and MNIST_Digit_Kmeans projects
This commit is contained in:
parent
d1255eee54
commit
d1cc480203
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,261 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import sklearn\n",
|
||||||
|
"from sklearn import svm, datasets, metrics\n",
|
||||||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||||
|
"import pickle"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
|
||||||
|
" 'mean smoothness' 'mean compactness' 'mean concavity'\n",
|
||||||
|
" 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
|
||||||
|
" 'radius error' 'texture error' 'perimeter error' 'area error'\n",
|
||||||
|
" 'smoothness error' 'compactness error' 'concavity error'\n",
|
||||||
|
" 'concave points error' 'symmetry error' 'fractal dimension error'\n",
|
||||||
|
" 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
|
||||||
|
" 'worst smoothness' 'worst compactness' 'worst concavity'\n",
|
||||||
|
" 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n",
|
||||||
|
"labels: ['malignant' 'benign']\n",
|
||||||
|
"data: [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]\n",
|
||||||
|
" [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]\n",
|
||||||
|
" [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]\n",
|
||||||
|
" ...\n",
|
||||||
|
" [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]\n",
|
||||||
|
" [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]\n",
|
||||||
|
" [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]\n",
|
||||||
|
"target: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0\n",
|
||||||
|
" 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1\n",
|
||||||
|
" 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1\n",
|
||||||
|
" 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0\n",
|
||||||
|
" 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1\n",
|
||||||
|
" 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
||||||
|
" 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1\n",
|
||||||
|
" 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0\n",
|
||||||
|
" 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1\n",
|
||||||
|
" 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1\n",
|
||||||
|
" 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1\n",
|
||||||
|
" 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0\n",
|
||||||
|
" 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
||||||
|
" 1 1 1 1 1 1 1 0 0 0 0 0 0 1]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"cancer = datasets.load_breast_cancer()\n",
|
||||||
|
"print(\"Features:\", cancer.feature_names)\n",
|
||||||
|
"print(\"labels:\", cancer.target_names)\n",
|
||||||
|
"print(\"data:\", cancer.data)\n",
|
||||||
|
"print(\"target:\", cancer.target)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[[1.160e+01 2.449e+01 7.423e+01 4.172e+02 7.474e-02 5.688e-02 1.974e-02\n",
|
||||||
|
" 1.313e-02 1.935e-01 5.878e-02 2.512e-01 1.786e+00 1.961e+00 1.821e+01\n",
|
||||||
|
" 6.122e-03 2.337e-02 1.596e-02 6.998e-03 3.194e-02 2.211e-03 1.244e+01\n",
|
||||||
|
" 3.162e+01 8.139e+01 4.765e+02 9.545e-02 1.361e-01 7.239e-02 4.815e-02\n",
|
||||||
|
" 3.244e-01 6.745e-02]\n",
|
||||||
|
" [1.288e+01 1.822e+01 8.445e+01 4.931e+02 1.218e-01 1.661e-01 4.825e-02\n",
|
||||||
|
" 5.303e-02 1.709e-01 7.253e-02 4.426e-01 1.169e+00 3.176e+00 3.437e+01\n",
|
||||||
|
" 5.273e-03 2.329e-02 1.405e-02 1.244e-02 1.816e-02 3.299e-03 1.505e+01\n",
|
||||||
|
" 2.437e+01 9.931e+01 6.747e+02 1.456e-01 2.961e-01 1.246e-01 1.096e-01\n",
|
||||||
|
" 2.582e-01 8.893e-02]\n",
|
||||||
|
" [1.086e+01 2.148e+01 6.851e+01 3.605e+02 7.431e-02 4.227e-02 0.000e+00\n",
|
||||||
|
" 0.000e+00 1.661e-01 5.948e-02 3.163e-01 1.304e+00 2.115e+00 2.067e+01\n",
|
||||||
|
" 9.579e-03 1.104e-02 0.000e+00 0.000e+00 3.004e-02 2.228e-03 1.166e+01\n",
|
||||||
|
" 2.477e+01 7.408e+01 4.123e+02 1.001e-01 7.348e-02 0.000e+00 0.000e+00\n",
|
||||||
|
" 2.458e-01 6.592e-02]\n",
|
||||||
|
" [2.020e+01 2.683e+01 1.337e+02 1.234e+03 9.905e-02 1.669e-01 1.641e-01\n",
|
||||||
|
" 1.265e-01 1.875e-01 6.020e-02 9.761e-01 1.892e+00 7.128e+00 1.036e+02\n",
|
||||||
|
" 8.439e-03 4.674e-02 5.904e-02 2.536e-02 3.710e-02 4.286e-03 2.419e+01\n",
|
||||||
|
" 3.381e+01 1.600e+02 1.671e+03 1.278e-01 3.416e-01 3.703e-01 2.152e-01\n",
|
||||||
|
" 3.271e-01 7.632e-02]\n",
|
||||||
|
" [2.047e+01 2.067e+01 1.347e+02 1.299e+03 9.156e-02 1.313e-01 1.523e-01\n",
|
||||||
|
" 1.015e-01 2.166e-01 5.419e-02 8.336e-01 1.736e+00 5.168e+00 1.004e+02\n",
|
||||||
|
" 4.938e-03 3.089e-02 4.093e-02 1.699e-02 2.816e-02 2.719e-03 2.323e+01\n",
|
||||||
|
" 2.715e+01 1.520e+02 1.645e+03 1.097e-01 2.534e-01 3.092e-01 1.613e-01\n",
|
||||||
|
" 3.220e-01 6.386e-02]]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"x = cancer.data\n",
|
||||||
|
"y = cancer.target\n",
|
||||||
|
"\n",
|
||||||
|
"# Split into train, dev, test sets with 90 / 5 / 5 split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x,y,test_size=0.1)\n",
|
||||||
|
"\n",
|
||||||
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x_test, y_test, test_size=0.5)\n",
|
||||||
|
"\n",
|
||||||
|
"print(x_train[:5])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9285714285714286\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"classes = cancer.target_names\n",
|
||||||
|
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
||||||
|
"clf.fit(x_train, y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.4827586206896552\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Tune parameters of kernel, C, gamma \n",
|
||||||
|
"# Note: use logarithmic scale random values\n",
|
||||||
|
"kernels = ['linear','rbf','sigmoid']\n",
|
||||||
|
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"best = 0\n",
|
||||||
|
"for kernel in kernels:\n",
|
||||||
|
" for C in C_values:\n",
|
||||||
|
" classes = cancer.target_names\n",
|
||||||
|
" clf = svm.SVC(kernel=kernel, C=C, gamma='scale')\n",
|
||||||
|
" clf.fit(x_train, y_train)\n",
|
||||||
|
" y_pred = clf.predict(x_dev)\n",
|
||||||
|
" acc = metrics.accuracy_score(y_dev, y_pred)\n",
|
||||||
|
" print(acc)\n",
|
||||||
|
" if acc > best:\n",
|
||||||
|
" best = acc\n",
|
||||||
|
" with open('cancerModel.pickle','wb') as f:\n",
|
||||||
|
" pickle.dump(clf,f)\n",
|
||||||
|
" \n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9285714285714286\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pickle_in = open('cancerModel.pickle','rb')\n",
|
||||||
|
"clf = pickle.load(pickle_in)\n",
|
||||||
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,261 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import sklearn\n",
|
||||||
|
"from sklearn import svm, datasets, metrics\n",
|
||||||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||||
|
"import pickle"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n",
|
||||||
|
" 'mean smoothness' 'mean compactness' 'mean concavity'\n",
|
||||||
|
" 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n",
|
||||||
|
" 'radius error' 'texture error' 'perimeter error' 'area error'\n",
|
||||||
|
" 'smoothness error' 'compactness error' 'concavity error'\n",
|
||||||
|
" 'concave points error' 'symmetry error' 'fractal dimension error'\n",
|
||||||
|
" 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n",
|
||||||
|
" 'worst smoothness' 'worst compactness' 'worst concavity'\n",
|
||||||
|
" 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n",
|
||||||
|
"labels: ['malignant' 'benign']\n",
|
||||||
|
"data: [[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]\n",
|
||||||
|
" [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]\n",
|
||||||
|
" [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]\n",
|
||||||
|
" ...\n",
|
||||||
|
" [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]\n",
|
||||||
|
" [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]\n",
|
||||||
|
" [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]\n",
|
||||||
|
"target: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
||||||
|
" 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0\n",
|
||||||
|
" 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1\n",
|
||||||
|
" 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1\n",
|
||||||
|
" 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0\n",
|
||||||
|
" 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1\n",
|
||||||
|
" 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0\n",
|
||||||
|
" 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
||||||
|
" 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1\n",
|
||||||
|
" 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0\n",
|
||||||
|
" 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1\n",
|
||||||
|
" 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1\n",
|
||||||
|
" 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1\n",
|
||||||
|
" 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0\n",
|
||||||
|
" 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
||||||
|
" 1 1 1 1 1 1 1 0 0 0 0 0 0 1]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"cancer = datasets.load_breast_cancer()\n",
|
||||||
|
"print(\"Features:\", cancer.feature_names)\n",
|
||||||
|
"print(\"labels:\", cancer.target_names)\n",
|
||||||
|
"print(\"data:\", cancer.data)\n",
|
||||||
|
"print(\"target:\", cancer.target)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[[1.160e+01 2.449e+01 7.423e+01 4.172e+02 7.474e-02 5.688e-02 1.974e-02\n",
|
||||||
|
" 1.313e-02 1.935e-01 5.878e-02 2.512e-01 1.786e+00 1.961e+00 1.821e+01\n",
|
||||||
|
" 6.122e-03 2.337e-02 1.596e-02 6.998e-03 3.194e-02 2.211e-03 1.244e+01\n",
|
||||||
|
" 3.162e+01 8.139e+01 4.765e+02 9.545e-02 1.361e-01 7.239e-02 4.815e-02\n",
|
||||||
|
" 3.244e-01 6.745e-02]\n",
|
||||||
|
" [1.288e+01 1.822e+01 8.445e+01 4.931e+02 1.218e-01 1.661e-01 4.825e-02\n",
|
||||||
|
" 5.303e-02 1.709e-01 7.253e-02 4.426e-01 1.169e+00 3.176e+00 3.437e+01\n",
|
||||||
|
" 5.273e-03 2.329e-02 1.405e-02 1.244e-02 1.816e-02 3.299e-03 1.505e+01\n",
|
||||||
|
" 2.437e+01 9.931e+01 6.747e+02 1.456e-01 2.961e-01 1.246e-01 1.096e-01\n",
|
||||||
|
" 2.582e-01 8.893e-02]\n",
|
||||||
|
" [1.086e+01 2.148e+01 6.851e+01 3.605e+02 7.431e-02 4.227e-02 0.000e+00\n",
|
||||||
|
" 0.000e+00 1.661e-01 5.948e-02 3.163e-01 1.304e+00 2.115e+00 2.067e+01\n",
|
||||||
|
" 9.579e-03 1.104e-02 0.000e+00 0.000e+00 3.004e-02 2.228e-03 1.166e+01\n",
|
||||||
|
" 2.477e+01 7.408e+01 4.123e+02 1.001e-01 7.348e-02 0.000e+00 0.000e+00\n",
|
||||||
|
" 2.458e-01 6.592e-02]\n",
|
||||||
|
" [2.020e+01 2.683e+01 1.337e+02 1.234e+03 9.905e-02 1.669e-01 1.641e-01\n",
|
||||||
|
" 1.265e-01 1.875e-01 6.020e-02 9.761e-01 1.892e+00 7.128e+00 1.036e+02\n",
|
||||||
|
" 8.439e-03 4.674e-02 5.904e-02 2.536e-02 3.710e-02 4.286e-03 2.419e+01\n",
|
||||||
|
" 3.381e+01 1.600e+02 1.671e+03 1.278e-01 3.416e-01 3.703e-01 2.152e-01\n",
|
||||||
|
" 3.271e-01 7.632e-02]\n",
|
||||||
|
" [2.047e+01 2.067e+01 1.347e+02 1.299e+03 9.156e-02 1.313e-01 1.523e-01\n",
|
||||||
|
" 1.015e-01 2.166e-01 5.419e-02 8.336e-01 1.736e+00 5.168e+00 1.004e+02\n",
|
||||||
|
" 4.938e-03 3.089e-02 4.093e-02 1.699e-02 2.816e-02 2.719e-03 2.323e+01\n",
|
||||||
|
" 2.715e+01 1.520e+02 1.645e+03 1.097e-01 2.534e-01 3.092e-01 1.613e-01\n",
|
||||||
|
" 3.220e-01 6.386e-02]]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"x = cancer.data\n",
|
||||||
|
"y = cancer.target\n",
|
||||||
|
"\n",
|
||||||
|
"# Split into train, dev, test sets with 90 / 5 / 5 split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x,y,test_size=0.1)\n",
|
||||||
|
"\n",
|
||||||
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x_test, y_test, test_size=0.5)\n",
|
||||||
|
"\n",
|
||||||
|
"print(x_train[:5])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9285714285714286\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"classes = cancer.target_names\n",
|
||||||
|
"clf = svm.SVC(kernel='linear', gamma='scale')\n",
|
||||||
|
"clf.fit(x_train, y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.896551724137931\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9310344827586207\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.9655172413793104\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.6896551724137931\n",
|
||||||
|
"0.4827586206896552\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n",
|
||||||
|
"0.41379310344827586\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Tune parameters of kernel, C, gamma \n",
|
||||||
|
"# Note: use logarithmic scale random values\n",
|
||||||
|
"kernels = ['linear','rbf','sigmoid']\n",
|
||||||
|
"C_values = [0.001, 0.01, 0.1, 1, 5, 25, 50, 100, 500, 1000]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"best = 0\n",
|
||||||
|
"for kernel in kernels:\n",
|
||||||
|
" for C in C_values:\n",
|
||||||
|
" classes = cancer.target_names\n",
|
||||||
|
" clf = svm.SVC(kernel=kernel, C=C, gamma='scale')\n",
|
||||||
|
" clf.fit(x_train, y_train)\n",
|
||||||
|
" y_pred = clf.predict(x_dev)\n",
|
||||||
|
" acc = metrics.accuracy_score(y_dev, y_pred)\n",
|
||||||
|
" print(acc)\n",
|
||||||
|
" if acc > best:\n",
|
||||||
|
" best = acc\n",
|
||||||
|
" with open('cancerModel.pickle','wb') as f:\n",
|
||||||
|
" pickle.dump(clf,f)\n",
|
||||||
|
" \n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9285714285714286\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pickle_in = open('cancerModel.pickle','rb')\n",
|
||||||
|
"clf = pickle.load(pickle_in)\n",
|
||||||
|
"y_pred = clf.predict(x_test)\n",
|
||||||
|
"acc = metrics.accuracy_score(y_test, y_pred)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,398 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Here we implement K Nearest Neighbors on a car dataset to decide whether a given car will be unacceptable, acceptable, good, or very good. You can find the dataset [here](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# For accessing other folders\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"# For easy data loading\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# For access the the KNN algorithm and relevant features\n",
|
||||||
|
"import sklearn\n",
|
||||||
|
"from sklearn.utils import shuffle\n",
|
||||||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||||
|
"from sklearn import linear_model, preprocessing\n",
|
||||||
|
"\n",
|
||||||
|
"# For storing and loading models\n",
|
||||||
|
"import pickle"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We begin by loading in and printing out our dataset. Note that before loading in, we altered the dataset to include the relevant labels at the start."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" buying maint doors persons lug_boot safety class\n",
|
||||||
|
"0 vhigh vhigh 2 2 small low unacc\n",
|
||||||
|
"1 vhigh vhigh 2 2 small med unacc\n",
|
||||||
|
"2 vhigh vhigh 2 2 small high unacc\n",
|
||||||
|
"3 vhigh vhigh 2 2 med low unacc\n",
|
||||||
|
"4 vhigh vhigh 2 2 med med unacc\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data = pd.read_csv(os.path.join('data','car.csv'))\n",
|
||||||
|
"print(data.head())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To make use of this data, we need to convert these strings to numbers. To do so, we use the Label Encoder from SKLearn before splitting into train, test, and validation sets."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Object which will convert our labels into numbers\n",
|
||||||
|
"# Note that this takes in a list as input\n",
|
||||||
|
"le = preprocessing.LabelEncoder()\n",
|
||||||
|
"\n",
|
||||||
|
"# Encode all our feature labels\n",
|
||||||
|
"buying = le.fit_transform(list(data[\"buying\"]))\n",
|
||||||
|
"maint = le.fit_transform(list(data[\"maint\"]))\n",
|
||||||
|
"doors = le.fit_transform(list(data[\"doors\"]))\n",
|
||||||
|
"persons = le.fit_transform(list(data[\"persons\"]))\n",
|
||||||
|
"lug_boot = le.fit_transform(list(data[\"lug_boot\"]))\n",
|
||||||
|
"safety = le.fit_transform(list(data[\"safety\"]))\n",
|
||||||
|
"cls = le.fit_transform(list(data[\"class\"]))\n",
|
||||||
|
"\n",
|
||||||
|
"# Set our prediction feature\n",
|
||||||
|
"predict = \"class\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Setup X and y by zipping together all these np arrays\n",
|
||||||
|
"x = list(zip(buying,maint,doors,persons,lug_boot,safety))\n",
|
||||||
|
"y = cls\n",
|
||||||
|
"\n",
|
||||||
|
"# Split into test, train, validation sets with 80 / 10 / 10 split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x,y,test_size=0.2)\n",
|
||||||
|
"\n",
|
||||||
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x_test, y_test, test_size=0.5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9421965317919075\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model = KNeighborsClassifier(n_neighbors = 7)\n",
|
||||||
|
"model.fit(x_train, y_train)\n",
|
||||||
|
"acc = model.score(x_dev, y_dev)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"names = ['unacc', 'acc', 'good', 'vgood']\n",
|
||||||
|
"predicted = model.predict(x_dev)\n",
|
||||||
|
"for i in range(len(x_dev)):\n",
|
||||||
|
" print(\"Predicted:\", names[predicted[i]], \"Actual:\", names[y_test[i]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now we can tune our k parameter using a sequence of odd integers evaluated by their test accuracy. To approximate the resulting real-world accuracy we look at the validation accuracy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"temp = []\n",
|
||||||
|
"for i in range(100):\n",
|
||||||
|
" if i % 2 != 0:\n",
|
||||||
|
" temp.append(i)\n",
|
||||||
|
"best = 0\n",
|
||||||
|
"for k in temp:\n",
|
||||||
|
" model = KNeighborsClassifier(n_neighbors = k)\n",
|
||||||
|
" model.fit(x_train, y_train)\n",
|
||||||
|
" acc = model.score(x_dev, y_dev)\n",
|
||||||
|
" if acc > best:\n",
|
||||||
|
" best = acc\n",
|
||||||
|
" with open('carmodel.pickle','wb') as f:\n",
|
||||||
|
" pickle.dump(model,f)\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Training accuracy is: 0.9580318379160637\n",
|
||||||
|
"Dev accuracy is: 0.9421965317919075\n",
|
||||||
|
"Test accuracy is: 0.9132947976878613\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pickle_in = open('carmodel.pickle','rb')\n",
|
||||||
|
"model = pickle.load(pickle_in)\n",
|
||||||
|
"print(\"Training accuracy is:\", model.score(x_train, y_train))\n",
|
||||||
|
"print(\"Dev accuracy is:\", model.score(x_dev, y_dev))\n",
|
||||||
|
"print(\"Test accuracy is:\", model.score(x_test, y_test))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,398 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Here we implement K Nearest Neighbors on a car dataset to decide whether a given car will be unacceptable, acceptable, good, or very good. You can find the dataset [here](https://archive.ics.uci.edu/ml/datasets/Car+Evaluation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# For accessing other folders\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"# For easy data loading\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"# For access the the KNN algorithm and relevant features\n",
|
||||||
|
"import sklearn\n",
|
||||||
|
"from sklearn.utils import shuffle\n",
|
||||||
|
"from sklearn.neighbors import KNeighborsClassifier\n",
|
||||||
|
"from sklearn import linear_model, preprocessing\n",
|
||||||
|
"\n",
|
||||||
|
"# For storing and loading models\n",
|
||||||
|
"import pickle"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We begin by loading in and printing out our dataset. Note that before loading in, we altered the dataset to include the relevant labels at the start."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
" buying maint doors persons lug_boot safety class\n",
|
||||||
|
"0 vhigh vhigh 2 2 small low unacc\n",
|
||||||
|
"1 vhigh vhigh 2 2 small med unacc\n",
|
||||||
|
"2 vhigh vhigh 2 2 small high unacc\n",
|
||||||
|
"3 vhigh vhigh 2 2 med low unacc\n",
|
||||||
|
"4 vhigh vhigh 2 2 med med unacc\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"data = pd.read_csv(os.path.join('data','car.csv'))\n",
|
||||||
|
"print(data.head())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To make use of this data, we need to convert these strings to numbers. To do so, we use the Label Encoder from SKLearn before splitting into train, test, and validation sets."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Object which will convert our labels into numbers\n",
|
||||||
|
"# Note that this takes in a list as input\n",
|
||||||
|
"le = preprocessing.LabelEncoder()\n",
|
||||||
|
"\n",
|
||||||
|
"# Encode all our feature labels\n",
|
||||||
|
"buying = le.fit_transform(list(data[\"buying\"]))\n",
|
||||||
|
"maint = le.fit_transform(list(data[\"maint\"]))\n",
|
||||||
|
"doors = le.fit_transform(list(data[\"doors\"]))\n",
|
||||||
|
"persons = le.fit_transform(list(data[\"persons\"]))\n",
|
||||||
|
"lug_boot = le.fit_transform(list(data[\"lug_boot\"]))\n",
|
||||||
|
"safety = le.fit_transform(list(data[\"safety\"]))\n",
|
||||||
|
"cls = le.fit_transform(list(data[\"class\"]))\n",
|
||||||
|
"\n",
|
||||||
|
"# Set our prediction feature\n",
|
||||||
|
"predict = \"class\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Setup X and y by zipping together all these np arrays\n",
|
||||||
|
"x = list(zip(buying,maint,doors,persons,lug_boot,safety))\n",
|
||||||
|
"y = cls\n",
|
||||||
|
"\n",
|
||||||
|
"# Split into test, train, validation sets with 80 / 10 / 10 split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x,y,test_size=0.2)\n",
|
||||||
|
"\n",
|
||||||
|
"x_test, x_dev, y_test, y_dev = sklearn.model_selection.train_test_split(\n",
|
||||||
|
" x_test, y_test, test_size=0.5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9421965317919075\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"model = KNeighborsClassifier(n_neighbors = 7)\n",
|
||||||
|
"model.fit(x_train, y_train)\n",
|
||||||
|
"acc = model.score(x_dev, y_dev)\n",
|
||||||
|
"print(acc)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: acc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: vgood Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: unacc Actual: acc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: good\n",
|
||||||
|
"Predicted: unacc Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: good Actual: good\n",
|
||||||
|
"Predicted: acc Actual: good\n",
|
||||||
|
"Predicted: good Actual: vgood\n",
|
||||||
|
"Predicted: good Actual: unacc\n",
|
||||||
|
"Predicted: good Actual: good\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"names = ['unacc', 'acc', 'good', 'vgood']\n",
|
||||||
|
"predicted = model.predict(x_dev)\n",
|
||||||
|
"for i in range(len(x_dev)):\n",
|
||||||
|
" print(\"Predicted:\", names[predicted[i]], \"Actual:\", names[y_test[i]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now we can tune our k parameter using a sequence of odd integers evaluated by their test accuracy. To approximate the resulting real-world accuracy we look at the validation accuracy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"temp = []\n",
|
||||||
|
"for i in range(100):\n",
|
||||||
|
" if i % 2 != 0:\n",
|
||||||
|
" temp.append(i)\n",
|
||||||
|
"best = 0\n",
|
||||||
|
"for k in temp:\n",
|
||||||
|
" model = KNeighborsClassifier(n_neighbors = k)\n",
|
||||||
|
" model.fit(x_train, y_train)\n",
|
||||||
|
" acc = model.score(x_dev, y_dev)\n",
|
||||||
|
" if acc > best:\n",
|
||||||
|
" best = acc\n",
|
||||||
|
" with open('carmodel.pickle','wb') as f:\n",
|
||||||
|
" pickle.dump(model,f)\n",
|
||||||
|
" "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Training accuracy is: 0.9580318379160637\n",
|
||||||
|
"Dev accuracy is: 0.9421965317919075\n",
|
||||||
|
"Test accuracy is: 0.9132947976878613\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"pickle_in = open('carmodel.pickle','rb')\n",
|
||||||
|
"model = pickle.load(pickle_in)\n",
|
||||||
|
"print(\"Training accuracy is:\", model.score(x_train, y_train))\n",
|
||||||
|
"print(\"Dev accuracy is:\", model.score(x_dev, y_dev))\n",
|
||||||
|
"print(\"Test accuracy is:\", model.score(x_test, y_test))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,14 @@
|
||||||
|
| names file (C4.5 format) for car evaluation domain
|
||||||
|
|
||||||
|
| class values
|
||||||
|
|
||||||
|
unacc, acc, good, vgood
|
||||||
|
|
||||||
|
| attributes
|
||||||
|
|
||||||
|
buying: vhigh, high, med, low.
|
||||||
|
maint: vhigh, high, med, low.
|
||||||
|
doors: 2, 3, 4, 5more.
|
||||||
|
persons: 2, 4, more.
|
||||||
|
lug_boot: small, med, big.
|
||||||
|
safety: low, med, high.
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,82 @@
|
||||||
|
1. Title: Car Evaluation Database
|
||||||
|
|
||||||
|
2. Sources:
|
||||||
|
(a) Creator: Marko Bohanec
|
||||||
|
(b) Donors: Marko Bohanec (marko.bohanec@ijs.si)
|
||||||
|
Blaz Zupan (blaz.zupan@ijs.si)
|
||||||
|
(c) Date: June, 1997
|
||||||
|
|
||||||
|
3. Past Usage:
|
||||||
|
|
||||||
|
The hierarchical decision model, from which this dataset is
|
||||||
|
derived, was first presented in
|
||||||
|
|
||||||
|
M. Bohanec and V. Rajkovic: Knowledge acquisition and explanation for
|
||||||
|
multi-attribute decision making. In 8th Intl Workshop on Expert
|
||||||
|
Systems and their Applications, Avignon, France. pages 59-78, 1988.
|
||||||
|
|
||||||
|
Within machine-learning, this dataset was used for the evaluation
|
||||||
|
of HINT (Hierarchy INduction Tool), which was proved to be able to
|
||||||
|
completely reconstruct the original hierarchical model. This,
|
||||||
|
together with a comparison with C4.5, is presented in
|
||||||
|
|
||||||
|
B. Zupan, M. Bohanec, I. Bratko, J. Demsar: Machine learning by
|
||||||
|
function decomposition. ICML-97, Nashville, TN. 1997 (to appear)
|
||||||
|
|
||||||
|
4. Relevant Information Paragraph:
|
||||||
|
|
||||||
|
Car Evaluation Database was derived from a simple hierarchical
|
||||||
|
decision model originally developed for the demonstration of DEX
|
||||||
|
(M. Bohanec, V. Rajkovic: Expert system for decision
|
||||||
|
making. Sistemica 1(1), pp. 145-157, 1990.). The model evaluates
|
||||||
|
cars according to the following concept structure:
|
||||||
|
|
||||||
|
CAR car acceptability
|
||||||
|
. PRICE overall price
|
||||||
|
. . buying buying price
|
||||||
|
. . maint price of the maintenance
|
||||||
|
. TECH technical characteristics
|
||||||
|
. . COMFORT comfort
|
||||||
|
. . . doors number of doors
|
||||||
|
. . . persons capacity in terms of persons to carry
|
||||||
|
. . . lug_boot the size of luggage boot
|
||||||
|
. . safety estimated safety of the car
|
||||||
|
|
||||||
|
Input attributes are printed in lowercase. Besides the target
|
||||||
|
concept (CAR), the model includes three intermediate concepts:
|
||||||
|
PRICE, TECH, COMFORT. Every concept is in the original model
|
||||||
|
related to its lower level descendants by a set of examples (for
|
||||||
|
these examples sets see http://www-ai.ijs.si/BlazZupan/car.html).
|
||||||
|
|
||||||
|
The Car Evaluation Database contains examples with the structural
|
||||||
|
information removed, i.e., directly relates CAR to the six input
|
||||||
|
attributes: buying, maint, doors, persons, lug_boot, safety.
|
||||||
|
|
||||||
|
Because of known underlying concept structure, this database may be
|
||||||
|
particularly useful for testing constructive induction and
|
||||||
|
structure discovery methods.
|
||||||
|
|
||||||
|
5. Number of Instances: 1728
|
||||||
|
(instances completely cover the attribute space)
|
||||||
|
|
||||||
|
6. Number of Attributes: 6
|
||||||
|
|
||||||
|
7. Attribute Values:
|
||||||
|
|
||||||
|
buying v-high, high, med, low
|
||||||
|
maint v-high, high, med, low
|
||||||
|
doors 2, 3, 4, 5-more
|
||||||
|
persons 2, 4, more
|
||||||
|
lug_boot small, med, big
|
||||||
|
safety low, med, high
|
||||||
|
|
||||||
|
8. Missing Attribute Values: none
|
||||||
|
|
||||||
|
9. Class Distribution (number of instances per class)
|
||||||
|
|
||||||
|
class N N[%]
|
||||||
|
-----------------------------
|
||||||
|
unacc 1210 (70.023 %)
|
||||||
|
acc 384 (22.222 %)
|
||||||
|
good 69 ( 3.993 %)
|
||||||
|
v-good 65 ( 3.762 %)
|
||||||
Loading…
Reference in New Issue