鸢尾花数据分类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# 从sklearn导入数据集
from sklearn import datasets
# 从数据集导入鸢尾花数据集
iris = datasets.load_iris()
# 'data', the data to learn, 'target', the classification labels,
X, y = iris.data, iris.target

# test train split
from sklearn.model_selection import train_test_split
# random_state is the random number generator
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=4)

# Model training
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predict
y_pred = knn.predict(X_test)

# Score
score = knn.score(X_test, y_test)
print(f'score: {score}')

# Cross validation
from sklearn.model_selection import cross_val_score
score = cross_val_score(knn, X, y, cv=5, scoring='accuracy')
print(f'score: {score}')

# Parameter tuning
k_range = range(1, 31)
k_scores = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy')
k_scores.append(scores.mean())

print(f'k_range: {k_range}')
print(f'k_scores: {k_scores}')

# plot
import matplotlib.pyplot as plt
plt.plot(k_range, k_scores)
plt.show()

# model save
import pickle
with open('save/knn_iris.mdl', 'wb') as f:
pickle.dump(knn, f)