From a58d26aa4ce55903f80a8c0fd9eb02d9c9c74146 Mon Sep 17 00:00:00 2001 From: scxixixi <1310503779@qq.com> Date: Fri, 22 May 2020 22:08:01 +0800 Subject: [PATCH] First project --- README.md | 1 + face.rec.klda.homework.py | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+) create mode 100644 face.rec.klda.homework.py diff --git a/README.md b/README.md index e69de29..6d1e8b6 100644 --- a/README.md +++ b/README.md @@ -0,0 +1 @@ +#Homework is submit here diff --git a/face.rec.klda.homework.py b/face.rec.klda.homework.py new file mode 100644 index 0000000..290e4d4 --- /dev/null +++ b/face.rec.klda.homework.py @@ -0,0 +1,222 @@ +""" +============================================================== +基于 Kernel LDA + KNN 的人脸识别 +使用 Kernel Discriminant Analysis 做特征降维 +使用 K-Nearest-Neighbor 做分类 + +数据: + 人脸图像来自于 Olivetti faces data-set from AT&T (classification) + 数据集包含 40 个人的人脸图像, 每个人都有 10 张图像 + 我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像 + +算法: + 需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维 + 代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容 +============================================================== +""" + +# License: BSD 3 clause + +import numpy as np +import matplotlib.pyplot as plt +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.datasets import fetch_olivetti_faces +from sklearn.neighbors import KNeighborsClassifier +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler + +print(__doc__) +################################################ +""" +Scikit-learn-compatible Kernel Discriminant Analysis. +""" + +import numpy as np +from scipy import linalg +from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin +from sklearn.preprocessing import OneHotEncoder +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y +from scipy.spatial.distance import pdist, squareform + +class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin, + TransformerMixin): + """Kernel Discriminant Analysis. + + Parameters + ---------- + n_components: integer. + The dimension after transform. + gamma: float. + Parameter to RBF Kernel + lmb: float (>= 0.0), default=0.001. + Regularization parameter + + """ + + def __init__(self, n_components, gamma, lmb=0.001): + self.n_components = n_components + self.gamma = gamma + self.lmb = lmb + self.X = None # 用于存放输入的训练数据的 X + self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix + self.M = None # 用于存放 Kernel LDA 最优化公式中的 M + self.N = None # 用于存放 Kernel LDA 最优化公式中的 N + self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序 + + def fit(self, X, y): + """Fit KDA model. + + Parameters + ---------- + X: numpy array of shape [n_samples, n_features] + Training set. + y: numpy array of shape [n_samples] + Target values. Only works for 2 classes with label/target 0 and 1. + + Returns + ------- + self + + """ + self.X = X + + # calculate the euclidean matrix + distance_matrix = self.find_distance_matrix(X) + self.K = np.exp(-self.gamma * distance_matrix) + + # calculate indexes of data points of two class + K1 = self.K[:,y==0] + K2 = self.K[:,y==1] + + # calculate A = I - 1_{l_j} for calc of N + l1, l2 = K1.shape[1], K2.shape[1] + A1 = np.identity(l1) - (1 / float(l1)) * np.ones((l1, l1)) + A2 = np.identity(l2) - (1 / float(l2)) * np.ones((l2, l2)) + + # calculate within calss scatter matrix N + N1 = np.dot(A1, K1.T) + N1 = np.dot(K1, N1) + + N2 = np.dot(A2, K2.T) + N2 = np.dot(K2, N2) + + self.N = N1 + N2 + self.lmb * np.identity(len(N1)) + + M1 = np.sum(K1,axis=1)/float(l1) + M2 = np.sum(K2,axis=1)/float(l2) + + # calculate alpha + M_diff = M1 - M2 + self.M = np.outer(M_diff.transpose(), M_diff) + + _, vecs = linalg.eig(self.M, self.N) + self.EigenVectors = vecs + + return self + + + def find_distance_matrix(self, data): + sq_dists = pdist(data.reshape(10,64*64), 'sqeuclidean') + mat_sq_dists = squareform(sq_dists) + return mat_sq_dists + + def transform(self, X_test): + """Transform data with the trained KernelLDA model. + + Parameters + ---------- + X_test: numpy array of shape [n_samples, n_features] + The input data. + + Returns + ------- + y_pred: array-like, shape (n_samples, n_components) + Transformations for X. + + """ + test_kernel_matrix = [] + for test_item in X_test: + dist = np.array([np.sum((test_item - row)**2) for row in self.X]) + k = np.exp(-self.gamma * dist) + test_kernel_matrix.append(k) + + # print(len(test_kernel_matrix)) + + # projecting data + y_pred = np.zeros((len(X_test), self.n_components)) + for i in range(0, self.n_components): + alpha_i = self.EigenVectors[:, i] + for j in range(len(test_kernel_matrix)): + test_kernel = test_kernel_matrix[j] + y_pred[j, i] = np.dot(alpha_i.transpose(),test_kernel) + + return y_pred + + +################################################ + +# 指定 KNN 中最近邻的个数 (k 的值) +n_neighbors = 3 + +# 设置随机数种子让实验可以复现 +random_state = 0 + +# 现在人脸数据集 +faces = fetch_olivetti_faces() +targets = faces.target + +# show sample images +images = faces.images[targets < 2] # save images + +features = faces.data # features +targets = faces.target # targets + +fig = plt.figure() # create a new figure window +for i in range(20): # display 20 images + # subplot : 4 rows and 5 columns + img_grid = fig.add_subplot(4, 5, i+1) + # plot features as image + img_grid.imshow(images[i], cmap='gray') + +plt.show() + +# Prepare data, 只限于处理类别 0 和 1 的人脸 +X, y = faces.data[targets < 2], faces.target[targets < 2] + +# Split into train/test +X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=0.5, stratify=y, + random_state=random_state) + + +# Reduce dimension to 2 with KernelDiscriminantAnalysis +# can adjust the value of 'gamma' as needed. +kda = make_pipeline(StandardScaler(), + KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005)) + +# Use a nearest neighbor classifier to evaluate the methods +knn = KNeighborsClassifier(n_neighbors=n_neighbors) + + +plt.figure() +# plt.subplot(1, 3, i + 1, aspect=1) + +# Fit the method's model +kda.fit(X_train, y_train) + +# Fit a nearest neighbor classifier on the embedded training set +knn.fit(kda.transform(X_train), y_train) + +# Compute the nearest neighbor accuracy on the embedded test set +acc_knn = knn.score(kda.transform(X_test), y_test) + +# Embed the data set in 2 dimensions using the fitted model +X_embedded = kda.transform(X) + +# Plot the projected points and show the evaluation score +plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1') +plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda', + n_neighbors, + acc_knn)) +plt.show() -- libgit2 0.26.0