From aee8dc20e6e5c58f6cba3c99202017a057b48231 Mon Sep 17 00:00:00 2001 From: corvettettt <corvettettt@gmail.com> Date: Thu, 30 Apr 2020 00:28:38 -0500 Subject: [PATCH] new --- face.rec.klda.homework.py | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ homework2/œÓƒø÷∏µº£∫Kernel trick with LDA.docx | Bin 0 -> 528387 bytes homework2/Ω·π˚’π æÕº.jpg | Bin 0 -> 101799 bytes homework2/◊˜“µÃ· æ1.jpg | Bin 0 -> 326256 bytes homework2/◊˜“µÃ· æ2.jpg | Bin 0 -> 251135 bytes 5 files changed, 222 insertions(+) create mode 100644 face.rec.klda.homework.py create mode 100755 homework2/œÓƒø÷∏µº£∫Kernel trick with LDA.docx create mode 100755 homework2/Ω·π˚’π æÕº.jpg create mode 100755 homework2/◊˜“µÃ· æ1.jpg create mode 100755 homework2/◊˜“µÃ· æ2.jpg diff --git a/face.rec.klda.homework.py b/face.rec.klda.homework.py new file mode 100644 index 0000000..75a33b6 --- /dev/null +++ b/face.rec.klda.homework.py @@ -0,0 +1,222 @@ +""" +============================================================== +基于 Kernel LDA + KNN 的人脸识别 +使用 Kernel Discriminant Analysis 做特征降维 +使用 K-Nearest-Neighbor 做分类 + +数据: + 人脸图像来自于 Olivetti faces data-set from AT&T (classification) + 数据集包含 40 个人的人脸图像, 每个人都有 10 张图像 + 我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像 + +算法: + 需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维 + 代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容 +============================================================== +""" + +# License: BSD 3 clause + +import numpy as np +import matplotlib.pyplot as plt +import math +from sklearn import datasets +from sklearn.model_selection import train_test_split +from sklearn.datasets import fetch_olivetti_faces +from sklearn.neighbors import KNeighborsClassifier +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler + +print(__doc__) +################################################ +""" +Scikit-learn-compatible Kernel Discriminant Analysis. +""" + +import numpy as np +from scipy import linalg +from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin +from sklearn.preprocessing import OneHotEncoder +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y +from sklearn.metrics.pairwise import rbf_kernel +from numpy.linalg import matrix_rank, eigh, inv + +class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin, + TransformerMixin): + """Kernel Discriminant Analysis. + + Parameters + ---------- + n_components: integer. + The dimension after transform. + gamma: float. + Parameter to RBF Kernel + lmb: float (>= 0.0), default=0.001. + Regularization parameter + + """ + + def __init__(self, n_components, gamma, lmb=0.001): + self.n_components = n_components + self.gamma = gamma + self.lmb = lmb + self.X = None # 用于存放输入的训练数据的 X + self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix + self.M = None # 用于存放 Kernel LDA 最优化公式中的 M + self.N = 0 # 用于存放 Kernel LDA 最优化公式中的 N + self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序 + + def Kernal(self,x,y,gamma): + return math.exp(-(x-y).T.dot(x-y)*gamma) + + def Kernal_matrix(self,x1,x2,gamma): + #print(x1.shape,x2.shape,gamma) + #print(self.Kernal([1,1],[2,2],gamma=gamma)) + return np.fromfunction(np.vectorize(lambda i,j: self.Kernal(x1[int(i),:],x2[int(j),:],gamma=gamma)), (x1.shape[0],x2.shape[0]), dtype=np.float32) + + def fit(self, X, y): + """Fit KDA model. + + Parameters + ---------- + X: numpy array of shape [n_samples, n_features] + Training set. + y: numpy array of shape [n_samples] + Target values. Only works for 2 classes with label/target 0 and 1. + + Returns + ------- + self + + """ + + self.X = X + + ### n is the # of samples, and m is the dimension of data + n,m = X.shape + + ### X0 are those whose tag ==0, X1 are those whose tag ==1 + X0 = X[y==0] + X1 = X[y==1] + + ### Computing the number of samples of each tag + l=[] + l.append(X0.shape[0]) + l.append(X1.shape[0]) + + ### Computing M, based on M = (M0-M1) @ (M1-M1).T, M is symmetric + M0 = np.mean(self.Kernal_matrix(X,X0,gamma=self.gamma),axis=-1).reshape(-1,1) + M1 = np.mean(self.Kernal_matrix(X,X1,gamma=self.gamma),axis=-1).reshape(-1,1) + self.M = (M0-M1) @ (M0-M1).T + + ### Computing kernal matrix K0 and K1 + self.K = [] + self.K.append(rbf_kernel(X,X0,gamma=self.gamma)) + self.K.append(rbf_kernel(X,X1,gamma=self.gamma)) + ### Computing N, N is symmetric + for i in range(1): + self.N += self.K[i] @ (np.eye(l[i]) - np.full((l[i],l[i]),1./l[i])) @ self.K[i].T + + ### rebuild N if it's not invertable + AddOn = 0.01 + while matrix_rank(self.N) != self.N.shape[0]: + self.N += np.eye(n)*AddOn + AddOn *= 5 + + ### Computing the EigenValue of matrix N^-1 * M, because both M and N are symmetric + ### we can use the eigenvalue of N^-1 * M + E_values, E_vectors = eigh(inv(self.N) @ self.M) + + ### sort eigenvalues and associated eigenvectors + idx = E_values.argsort()[::-1] + E_values = E_values[idx] + E_vectors = E_vectors[:,idx] + + ### store the eigenvectors + self.EigenVectors = E_vectors + + def transform(self, X_test): + """Transform data with the trained KernelLDA model. + + Parameters + ---------- + X_test: numpy array of shape [n_samples, n_features] + The input data. + + Returns + ------- + y_pred: array-like, shape (n_samples, n_components) + Transformations for X. + + """ + ### Take the eigenvectors corresponding to n_components highest eigenvalues + Transform_maxtrix = self.EigenVectors[:,:self.n_components] + ### transform data + X_transformed = rbf_kernel(self.X,X_test,gamma=self.gamma).T @ Transform_maxtrix + + return X_transformed +################################################ + +# 指定 KNN 中最近邻的个数 (k 的值) +n_neighbors = 3 + +# 设置随机数种子让实验可以复现 +random_state = 0 + +# 现在人脸数据集 +faces = fetch_olivetti_faces() +targets = faces.target + +# show sample images +images = faces.images[targets < 2] # save images + +features = faces.data # features +targets = faces.target # targets + +fig = plt.figure() # create a new figure window +for i in range(20): # display 20 images + # subplot : 4 rows and 5 columns + img_grid = fig.add_subplot(4, 5, i+1) + # plot features as image + img_grid.imshow(images[i], cmap='gray') + +plt.show() + +# Prepare data, 只限于处理类别 0 和 1 的人脸 +X, y = faces.data[targets < 2], faces.target[targets < 2] + +# Split into train/test +X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=0.5, stratify=y, + random_state=random_state) + + +# Reduce dimension to 2 with KernelDiscriminantAnalysis +# can adjust the value of 'gamma' as needed. +kda = make_pipeline(StandardScaler(), + KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005)) + +# Use a nearest neighbor classifier to evaluate the methods +knn = KNeighborsClassifier(n_neighbors=n_neighbors) + + +plt.figure() +# plt.subplot(1, 3, i + 1, aspect=1) + +# Fit the method's model +kda.fit(X_train, y_train) + +# Fit a nearest neighbor classifier on the embedded training set +knn.fit(kda.transform(X_train), y_train) + +# Compute the nearest neighbor accuracy on the embedded test set +acc_knn = knn.score(kda.transform(X_test), y_test) +# Embed the data set in 2 dimensions using the fitted model +X_embedded = kda.transform(X) + +# Plot the projected points and show the evaluation score +plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1') +plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda', + n_neighbors, + acc_knn)) +plt.show() diff --git "a/homework2/\305\223\303\223\306\222\303\270\303\267\342\210\217\302\265\302\272\302\243\342\210\253Kernel trick with LDA.docx" "b/homework2/\305\223\303\223\306\222\303\270\303\267\342\210\217\302\265\302\272\302\243\342\210\253Kernel trick with LDA.docx" new file mode 100755 index 0000000..1dd82b6 Binary files /dev/null and "b/homework2/\305\223\303\223\306\222\303\270\303\267\342\210\217\302\265\302\272\302\243\342\210\253Kernel trick with LDA.docx" differ diff --git "a/homework2/\316\251\302\267\317\200\313\232\342\200\231\317\200\302\240\303\246\303\225\302\272.jpg" "b/homework2/\316\251\302\267\317\200\313\232\342\200\231\317\200\302\240\303\246\303\225\302\272.jpg" new file mode 100755 index 0000000..2f86574 Binary files /dev/null and "b/homework2/\316\251\302\267\317\200\313\232\342\200\231\317\200\302\240\303\246\303\225\302\272.jpg" differ diff --git "a/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2461.jpg" "b/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2461.jpg" new file mode 100755 index 0000000..2b70d0a Binary files /dev/null and "b/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2461.jpg" differ diff --git "a/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2462.jpg" "b/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2462.jpg" new file mode 100755 index 0000000..dd0af80 Binary files /dev/null and "b/homework2/\342\227\212\313\234\342\200\234\302\265\303\203\302\267\302\240\303\2462.jpg" differ -- libgit2 0.26.0