1. MNIST ๋ฐ์ดํฐ์ ์ผ๋ก ๋ถ๋ฅ๊ธฐ๋ฅผ ๋ง๋ค์ด ํ ์คํธ ์ธํธ์์ 97% ์ ํ๋๋ฅผ ๋ฌ์ฑํด๋ณด์ธ์.
from sklearn.datasets import fetch_openml
import numpy as np
mnist = fetch_openml('mnist_784', version = 1)
X, y = mnist["data"], mnist["target"]
y = y.astype(np.int)
#Train, Test set ๋๋๊ธฐ
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors = 3, weights = "distance")
knn_clf.fit(X_train, y_train)
from sklearn.metrics import accuracy_score
y_pred = knn_clf.predict(X_test)
print(accuracy_score(y_test,y_pred))
KNeiborsClassifier๋ก ์ ์ ํ์๋ค.
n_neighbors = 2๋ก ํ์ ๋, 96.9์ ๋์ ์ฑ๋ฅ์ด ๋์ 3์ผ๋ก ๋ณ๊ฒฝํ๋ 0.9717์ ๋๋ฌํ์๋ค.
2. MNIST ์ด๋ฏธ์ง๋ฅผ (์ผ, ์ค๋ฅธ, ์, ์๋) ์ด๋ ๋ฐฉํฅ์ผ๋ก๋ ํ ํฝ์ ์ด๋์ํฌ ์ ์๋ ํจ์๋ฅผ ๋ง๋ค์ด๋ณด์ธ์. ๊ทธ๋ฐ ๋ค์ ํ๋ จ ์ธํธ์ ์๋ ๊ฐ ์ด๋ฏธ์ง์ ๋ํด ๋ค ๊ฐ์ ์ด๋๋ ๋ณต์ฌ๋ณธ (๋ฐฉํฅ๋ง๋ค ํ ๊ฐ์ฉ)์ ๋ง๋ค์ด ํ๋ จ ์ธํธ์ ์ถ๊ฐํ์ธ์. ๋ง์ง๋ง์ผ๋ก ์ด ํ์ฅ๋ ๋ฐ์ดํฐ์ ์์ ์์์ ์ฐพ์ ์ต์ ์ ๋ชจ๋ธ์ ํ๋ จ์ํค๊ณ ํ ์คํธ ์ธํธ์์ ์ ํ๋๋ฅผ ์ธก์ ํด๋ณด์ธ์. ๋ชจ๋ธ ์ฑ๋ฅ์ด ๋ ๋์์ก๋์ง ํ์ธํด๋ณด์ธ์!
from sklearn.datasets import fetch_openml
import numpy as np
mnist = fetch_openml('mnist_784', version = 1)
X, y = mnist["data"], mnist["target"]
y = y.astype(np.int)
#Train, Test set ๋๋๊ธฐ
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
์ฌ๊ธฐ๊น์ง๋ ์์ ๋์ผํ๋ค.
from scipy.ndimage.interpolation import shift
#์ด๋ฏธ์ง ํฝ์
๋ณ๊ฒฝ ํจ์
def shift_image(image, dx, dy):
image = image.reshape((28, 28))
shifted_image = shift(image, [dy, dx], cval = 0 , mode = "constant")
return shifted_image.reshape([-1])
#์ฆ๊ฐํ ์ด๋ฏธ์ง ์๊ฐํ
image = X_train.to_numpy()[1000]
shifted_image_down = shift_image(image, 0, 5)
shifted_image_left = shift_image(image, -5, 0)
shifted_image_right = shift_image(image, 5, 0)
shifted_image_up = shift_image(image, 0, -5)
plt.figure(figsize=(12,5))
plt.subplot(1,5,1)
plt.title("Original", fontsize=14)
plt.imshow(image.reshape(28, 28), interpolation="nearest", cmap="Greys")
plt.subplot(1,5,2)
plt.title("Shifted down", fontsize=14)
plt.imshow(shifted_image_down.reshape(28, 28), interpolation="nearest", cmap="Greys")
plt.subplot(1,5,3)
plt.title("Shifted left", fontsize=14)
plt.imshow(shifted_image_left.reshape(28, 28), interpolation="nearest", cmap="Greys")
plt.subplot(1,5,4)
plt.title("Shifted right", fontsize=14)
plt.imshow(shifted_image_right.reshape(28, 28), interpolation="nearest", cmap="Greys")
plt.subplot(1,5,5)
plt.title("Shifted up", fontsize=14)
plt.imshow(shifted_image_up.reshape(28, 28), interpolation="nearest", cmap="Greys")
plt.show()
#์ฆ๊ฐ๊ธฐ๋ฒ์ ์ ์ฉ
X_train_augmented = [X_train.to_numpy()[i] for i in range(len(X_train))]
y_train_augmented = [label for label in y_train]
for dx, dy in ((1, 0), (-1, 0), (0, 1), (0, -1)):
for image, label in zip(X_train.values, y_train):
X_train_augmented.append(shift_image(image, dx, dy))
y_train_augmented.append(label)
X_train_augmented = np.array(X_train_augmented)
y_train_augmented = np.array(y_train_augmented)
์ ์ฝ๋๋ ์ฆ๊ฐ๊ธฐ๋ฒ์ ์ ์ฉํ์ฌ ๋ฐ์ดํฐ์ ์๋ฅผ ๋ถ๋ฆฌ๋ ๊ณผ์ ์ด๋ค.
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors = 3, weights = "distance")
knn_clf.fit(X_train_augmented, y_train_augmented)
from sklearn.metrics import accuracy_score
y_pred = knn_clf.predict(X_test)
print(accuracy_score(y_test,y_pred))
๋์ผํ๊ฒ KNeiborsClassifier๋ก ์งํํ๋ 0.9717 -> 0.9763 ์ผ๋ก ์ ์๋ฏธํ ์ฑ๋ฅ ์ฐจ์ด๋ฅผ ๊ฑฐ๋์๋ค.
'AI > Machine Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[ํธ์ฆ์จ ๋จธ์ ๋ฌ๋ 2ํ] Chapter 10 ์์ฝ (0) | 2022.12.08 |
---|---|
[ํธ์ฆ์จ ๋จธ์ ๋ฌ๋ 2ํ] Chapter 8 ์์ฝ (0) | 2022.12.03 |
[ํธ์ฆ์จ ๋จธ์ ๋ฌ๋ 2ํ] Chapter 5 ์์ฝ (0) | 2022.11.29 |
[ํธ์ฆ์จ ๋จธ์ ๋ฌ๋ 2ํ] Chapter 4 ์์ฝ (1) | 2022.10.27 |
[ํธ์ฆ์จ ๋จธ์ ๋ฌ๋ 2ํ] Chapter 3 ์์ฝ (0) | 2022.10.26 |