Commit ac04cd68 by 20200318029

homework2

parent f400d5ad
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
class RBFKernel(object):
"""
a gaussian kernel
k(x, y) = exp( - gamma * || x - y ||^2)
"""
def __init__(self, gamma: float) -> None:
self._gamma = gamma
@property
def gamma(self) -> float:
return self._gamma
@gamma.setter
def gamma(self, gamma: float) -> None:
self._gamma = gamma
def _rbf(self, x: np.array, y: np.array) -> float:
return np.exp(- self._gamma * np.sum((x - y) ** 2))
def __call__(self, x: np.array, y: np.array) -> float:
return self._rbf(x, y)
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
self._rbf = RBFKernel(gamma)
def fit(self, X: np.array, y: np.array) -> None:
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1.
Returns
-------
self
"""
self.X = X
classes = list(set(y))
assert len(classes) == 2, "only works for 2 classes"
X_cls_1 = X[y == classes[0], :]
X_cls_2 = X[y == classes[1], :]
# M, M_1 & M_2
M_1 = self._M_cls(X, X_cls_1)
M_2 = self._M_cls(X, X_cls_2)
self.M = np.matmul((M_2 - M_1), np.transpose(M_2 - M_1))
# N, K_1 & K_2
num_samples = X.shape[0]
num_samples_cls_1 = X_cls_1.shape[0]
num_samples_cls_2 = X_cls_2.shape[0]
K_1 = self._K_cls(X, X_cls_1)
K_2 = self._K_cls(X, X_cls_2)
self.N = np.matmul(
np.matmul(
K_1,
(np.identity(num_samples_cls_1) - 1 / num_samples_cls_1 * np.ones(shape=(num_samples_cls_1, num_samples_cls_1)))
),
np.transpose(K_1)
)
self.N += np.matmul(
np.matmul(
K_2,
(np.identity(num_samples_cls_2) - 1 / num_samples_cls_2 * np.ones(shape=(num_samples_cls_2, num_samples_cls_2)))
),
np.transpose(K_2)
)
self.N += self.lmb + np.identity(num_samples)
self.K = [K_1, K_2]
_, vecs = linalg.eig(self.M, self.N)
self.EigenVectors = vecs[:, : self.n_components]
def transform(self, X_test: np.array) -> np.array:
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
num_samples_test = X_test.shape[0]
num_samples = self.X.shape[0]
y_pred = np.zeros(shape=(num_samples_test, self.n_components))
for i in range(num_samples_test):
# kernel
x = X_test[i, :]
x_rbf = np.zeros(shape=(num_samples, ))
for k in range(num_samples):
x_rbf[k] = self._rbf(self.X[k, :], x)
y_pred[i, :] = np.matmul(x_rbf, self.EigenVectors)
return y_pred
def _M_cls(self, X: np.array, X_cls: np.array) -> np.array:
"""
"""
num_samples = X.shape[0]
num_samples_cls = X_cls.shape[0]
# M_cls
M_cls = np.zeros(shape=(num_samples, 1))
for j in range(num_samples):
for k in range(num_samples_cls):
M_cls[j] += self._rbf(X[j, :], X_cls[k, :])
M_cls /= num_samples_cls
return M_cls
def _K_cls(self, X: np.array, X_cls: np.array) -> np.array:
num_samples = X.shape[0]
num_samples_cls = X_cls.shape[0]
K_cls = np.zeros(shape=(num_samples, num_samples_cls))
for n in range(num_samples):
for m in range(num_samples_cls):
K_cls[n, m] = self._rbf(X[n, :], X_cls[m, :])
return K_cls
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 3
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces(download_if_missing=False)
targets = faces.target
# show sample images
images = faces.images[targets < 2] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(20): # display 20 images
# subplot : 4 rows and 5 columns
img_grid = fig.add_subplot(4, 5, i+1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X, y = faces.data[targets < 2], faces.target[targets < 2]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.5, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
plt.figure()
# plt.subplot(1, 3, i + 1, aspect=1)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
from scipy.io.matlab import loadmat
from sklearn.datasets._base import _pkl_filepath
import joblib
import os
data_home = os.environ.get('SCIKIT_LEARN_DATA', os.path.join('~', 'scikit_learn_data'))
data_home = os.path.expanduser(data_home)
mfile = loadmat(file_name=os.path.join(data_home, "olivettifaces.mat"))
faces = mfile['faces'].T.copy()
filepath = _pkl_filepath(data_home, 'olivetti.pkz')
joblib.dump(faces, filepath, compress=6)
这个是咱们第一个项目,没有思路的可以找小徐班主任要Jerry老师的视频描述(建议大家先自己多理解看看,如果暂时没有思路可以看看之前的课程,也可以再找找资料)<br/><br/>
要求如下:<br/><br/>
作业截至时间:5月22日23:59,具体答案将会在本周末讲解并公布<br/><br/>
----基于 Kernel LDA + KNN 的人脸识别<br/>
----使用 Kernel Discriminant Analysis 做特征降维<br/>
----使用 K-Nearest-Neighbor 做分类<br/><br/><br/>
数据:<br/>
----人脸图像来自于 Olivetti faces data-set from AT&T (classification)<br/>
----数据集包含 40 个人的人脸图像, 每个人都有 10 张图像<br/>
----我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像<br/><br/><br/>
算法:<br/>
----需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维<br/>
----代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容<br/><br/><br/>
结果:<br/>
1.要求识别成功率:100%<br/>
2.达到如图所示效果图
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment