import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=66, stratify=cancer.target)
training_accuracy = []
test_accuracy = []
neighbors_settings = range(1, 11)
for n_neighbors in neighbors_settings:
clf = KNeighborsClassifier(n_neighbors=n_neighbors)
clf.fit(X_train, y_train)
training_accuracy.append(clf.score(X_train, y_train))
test_accuracy.append(clf.score(X_test, y_test))
neighbor_dict = {}
for n_neighbors in neighbors_settings:
neighbor_dict[n_neighbors] = [training_accuracy[n_neighbors - 1], test_accuracy[n_neighbors - 1]]
print(neighbor_dict)
plt.plot(neighbors_settings, training_accuracy, label='training_accuracy')
plt.plot(neighbors_settings, test_accuracy, label='test_accuracy')
plt.xlabel('n_neighbors')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
---------
{1: [1.0, 0.9020979020979021],
2: [0.9765258215962441, 0.8881118881118881],
3: [0.9577464788732394, 0.9230769230769231],
4: [0.9553990610328639, 0.9230769230769231],
5: [0.9483568075117371, 0.9230769230769231],
6: [0.9460093896713615, 0.9370629370629371],
7: [0.9436619718309859, 0.9300699300699301],
8: [0.9413145539906104, 0.9300699300699301],
9: [0.9342723004694836, 0.916083916083916],
10: [0.9389671361502347, 0.916083916083916]}