Commit aee8dc20 by 20200318093

new

parents
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
from sklearn.metrics.pairwise import rbf_kernel
from numpy.linalg import matrix_rank, eigh, inv
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = 0 # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def Kernal(self,x,y,gamma):
return math.exp(-(x-y).T.dot(x-y)*gamma)
def Kernal_matrix(self,x1,x2,gamma):
#print(x1.shape,x2.shape,gamma)
#print(self.Kernal([1,1],[2,2],gamma=gamma))
return np.fromfunction(np.vectorize(lambda i,j: self.Kernal(x1[int(i),:],x2[int(j),:],gamma=gamma)), (x1.shape[0],x2.shape[0]), dtype=np.float32)
def fit(self, X, y):
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1.
Returns
-------
self
"""
self.X = X
### n is the # of samples, and m is the dimension of data
n,m = X.shape
### X0 are those whose tag ==0, X1 are those whose tag ==1
X0 = X[y==0]
X1 = X[y==1]
### Computing the number of samples of each tag
l=[]
l.append(X0.shape[0])
l.append(X1.shape[0])
### Computing M, based on M = (M0-M1) @ (M1-M1).T, M is symmetric
M0 = np.mean(self.Kernal_matrix(X,X0,gamma=self.gamma),axis=-1).reshape(-1,1)
M1 = np.mean(self.Kernal_matrix(X,X1,gamma=self.gamma),axis=-1).reshape(-1,1)
self.M = (M0-M1) @ (M0-M1).T
### Computing kernal matrix K0 and K1
self.K = []
self.K.append(rbf_kernel(X,X0,gamma=self.gamma))
self.K.append(rbf_kernel(X,X1,gamma=self.gamma))
### Computing N, N is symmetric
for i in range(1):
self.N += self.K[i] @ (np.eye(l[i]) - np.full((l[i],l[i]),1./l[i])) @ self.K[i].T
### rebuild N if it's not invertable
AddOn = 0.01
while matrix_rank(self.N) != self.N.shape[0]:
self.N += np.eye(n)*AddOn
AddOn *= 5
### Computing the EigenValue of matrix N^-1 * M, because both M and N are symmetric
### we can use the eigenvalue of N^-1 * M
E_values, E_vectors = eigh(inv(self.N) @ self.M)
### sort eigenvalues and associated eigenvectors
idx = E_values.argsort()[::-1]
E_values = E_values[idx]
E_vectors = E_vectors[:,idx]
### store the eigenvectors
self.EigenVectors = E_vectors
def transform(self, X_test):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
### Take the eigenvectors corresponding to n_components highest eigenvalues
Transform_maxtrix = self.EigenVectors[:,:self.n_components]
### transform data
X_transformed = rbf_kernel(self.X,X_test,gamma=self.gamma).T @ Transform_maxtrix
return X_transformed
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 3
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces()
targets = faces.target
# show sample images
images = faces.images[targets < 2] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(20): # display 20 images
# subplot : 4 rows and 5 columns
img_grid = fig.add_subplot(4, 5, i+1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X, y = faces.data[targets < 2], faces.target[targets < 2]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.5, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
plt.figure()
# plt.subplot(1, 3, i + 1, aspect=1)
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment