Commit 8021fe86 by 20200318111

kernel_lda是二分类,kernel_lda_three是多分类

parents
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="renderExternalDocumentation" value="true" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="pytest" />
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/kernelLDA_fac_reg.iml" filepath="$PROJECT_DIR$/.idea/kernelLDA_fac_reg.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW" value="true" />
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>
\ No newline at end of file
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def rbfkernel(self, gamma, distance):
return np.exp(gamma * distance)
def kernel(self, X, X_c, gamma):
return np.exp(-gamma * np.sum((X - X_c) ** 2))
def fit(self, X, y):
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1
Returns
-------
self
"""
self.X = X
# 计算M矩阵
def kernel_M(X, c=0):
K_m = []
c_len = len([i for i in y if i == c])
for row in X:
K_one = 0.0
for c_row in X[y == c]:
K_one += self.kernel(row, c_row, self.gamma)
K_m.append(K_one / c_len)
return np.array(K_m)
K_mean = []
for row in X:
K_one = 0.0
for c_row in X:
K_one += self.kernel(row, c_row, self.gamma)
K_mean.append(K_one / X.shape[0])
K_mean = np.array(K_mean)
class_c = np.unique(y)
#计算每一类的个数
c = [0] * class_c
for j in class_c:
c[j] = len([i for i in y if i == j])
class_c = np.unique(y)
K = [np.zeros((X.shape[0], 1))] * len(class_c)
#K = [np.zeros(X.shape[0], 1)] * len(class_c)
for i in class_c:
K[i] = kernel_M(X, i)
K_m = np.zeros((X.shape[0], X.shape[0]))
for i in class_c:
K_m += c[i] * (K[i] - K_mean)[:, np.newaxis].dot((K[i] - K_mean)[np.newaxis, :])
self.M = K_m
# 计算N矩阵
def kernel_N(X, c):
c_len = len([i for i in y if i == c])
I = np.eye(c_len)
I_n = np.eye(N)
I_c = np.ones((c_len, c_len)) / c_len
K_one = np.zeros((N, c_len))
for i in range(N):
K_one[i, :] = np.array([self.kernel(X[i], c_row, self.gamma) for c_row in X[y == c]])
K_n = K_one.dot(I - I_c).dot(K_one.T) + I_n * self.lmb ##+ I_n*0.001
return K_n
N = X.shape[0]
K_n = np.zeros((N, N))
for i in np.unique(y):
K_n += kernel_N(X, i)
self.N = K_n
# 计算特征值特征向量
eigvals_, eigvecs_ = linalg.eig(self.M, self.N)
eigvals, eigvecs = np.linalg.eig(np.linalg.inv(K_n).dot(K_m))
eigen_pairs = [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))]
eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True)
# 取n_components个主方向
alpha = [np.zeros((X.shape[0], 1))] * self.n_components
for i in range(self.n_components):
alpha[i] = eigen_pairs[i][1][:, np.newaxis]
self.EigenVectors = alpha
def transform(self, X_test):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
def project_x(X_new, X, ii):
N = X_new.shape[0]
X_proj = np.zeros((N, 1))
for i in range(len(X_new)):
k = np.exp(-self.gamma * np.array([np.sum((X_new[i] - row) ** 2) for row in X]))
X_proj[i, 0] = np.real(k[np.newaxis, :].dot(self.EigenVectors[ii])) ##不能带虚部
return X_proj
# 计算投影之后的点
X_new = np.zeros((X_test.shape[0], self.n_components))
for i in range(self.n_components):
X_new[:, i][:, np.newaxis] = project_x(X_test, self.X, i) # alphas_one,最佳参数gamma=14.52
return X_new
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 1
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces()
targets = faces.target
# show sample images
images = faces.images[targets < 5] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(50): # display 30 images
# subplot : 5 rows and 6 columns
img_grid = fig.add_subplot(10, 5, i + 1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# 多分类,分为五类
X, y = faces.data[targets < 5], faces.target[targets < 5]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.5, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
# why do we standarScaler data?
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=4, gamma=0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
plt.figure()
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
print(acc_knn)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment