First project

a58d26aa · 20200318019 · 6b4d772a · a58d26aa · a58d26aa
Commit a58d26aa authored May 22, 2020 by 20200318019
Show whitespace changes
Inline Side-by-side

Showing with 223 additions and 0 deletions

README.md
+1 -0

face.rec.klda.homework.py
+222 -0

No files found.
--- a/README.md
+++ b/README.md
+#Homework is submit here
--- a/face.rec.klda.homework.py
+++ b/face.rec.klda.homework.py
+"""
+==============================================================
+基于 Kernel LDA + KNN 的人脸识别
+使用 Kernel Discriminant Analysis 做特征降维
+使用 K-Nearest-Neighbor 做分类
+
+数据:
+    人脸图像来自于 Olivetti faces data-set from AT&T (classification)
+    数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
+    我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
+
+算法:
+    需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
+    代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
+==============================================================
+"""
+
+# License: BSD 3 clause
+
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.datasets import fetch_olivetti_faces
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+
+print(__doc__)
+################################################
+"""
+Scikit-learn-compatible Kernel Discriminant Analysis.
+"""
+
+import numpy as np
+from scipy import linalg
+from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
+from scipy.spatial.distance import pdist, squareform
+
+class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
+                                 TransformerMixin):
+    """Kernel Discriminant Analysis.
+
+    Parameters
+    ----------
+    n_components: integer.
+                  The dimension after transform.
+    gamma: float.
+           Parameter to RBF Kernel
+    lmb: float (>= 0.0), default=0.001. 
+         Regularization parameter
+
+    """
+
+    def __init__(self, n_components, gamma, lmb=0.001):
+        self.n_components = n_components
+        self.gamma = gamma
+        self.lmb = lmb
+        self.X = None # 用于存放输入的训练数据的 X
+        self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
+        self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
+        self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
+        self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
+
+    def fit(self, X, y):
+        """Fit KDA model.
+
+        Parameters
+        ----------
+        X: numpy array of shape [n_samples, n_features]
+           Training set.
+        y: numpy array of shape [n_samples]
+           Target values. Only works for 2 classes with label/target 0 and 1.
+
+        Returns
+        -------
+        self
+
+        """
+        self.X = X
+
+        # calculate the euclidean matrix
+        distance_matrix = self.find_distance_matrix(X)
+        self.K = np.exp(-self.gamma * distance_matrix)
+
+        # calculate indexes of data points of two class
+        K1 = self.K[:,y==0]
+        K2 = self.K[:,y==1]
+
+        # calculate A = I - 1_{l_j}  for calc of N
+        l1, l2 = K1.shape[1], K2.shape[1]
+        A1 = np.identity(l1) - (1 / float(l1)) * np.ones((l1, l1))
+        A2 = np.identity(l2) - (1 / float(l2)) * np.ones((l2, l2))
+
+        # calculate within calss scatter matrix N
+        N1 = np.dot(A1, K1.T)
+        N1 = np.dot(K1, N1)
+
+        N2 = np.dot(A2, K2.T)
+        N2 = np.dot(K2, N2)
+
+        self.N = N1 + N2 + self.lmb * np.identity(len(N1))
+
+        M1 = np.sum(K1,axis=1)/float(l1)
+        M2 = np.sum(K2,axis=1)/float(l2)
+
+        # calculate alpha
+        M_diff = M1 - M2
+        self.M = np.outer(M_diff.transpose(), M_diff)
+
+        _, vecs = linalg.eig(self.M, self.N)
+        self.EigenVectors = vecs
+
+        return self
+
+
+    def find_distance_matrix(self, data):
+        sq_dists = pdist(data.reshape(10,64*64), 'sqeuclidean')
+        mat_sq_dists = squareform(sq_dists)
+        return mat_sq_dists
+
+    def transform(self, X_test):
+        """Transform data with the trained KernelLDA model.
+
+        Parameters
+        ----------
+        X_test: numpy array of shape [n_samples, n_features]
+           The input data.
+
+        Returns
+        -------
+        y_pred: array-like, shape (n_samples, n_components)
+                Transformations for X.
+
+        """
+        test_kernel_matrix = []
+        for test_item in X_test:
+            dist = np.array([np.sum((test_item - row)**2) for row in self.X])
+            k = np.exp(-self.gamma * dist)
+            test_kernel_matrix.append(k)
+        
+        # print(len(test_kernel_matrix))
+
+        # projecting data
+        y_pred = np.zeros((len(X_test), self.n_components))
+        for i in range(0, self.n_components):
+            alpha_i = self.EigenVectors[:, i]
+            for j in range(len(test_kernel_matrix)):
+                test_kernel = test_kernel_matrix[j]
+                y_pred[j, i] = np.dot(alpha_i.transpose(),test_kernel)
+
+        return y_pred
+
+
+################################################
+
+# 指定 KNN 中最近邻的个数 (k 的值)
+n_neighbors = 3
+
+# 设置随机数种子让实验可以复现
+random_state = 0
+
+# 现在人脸数据集
+faces = fetch_olivetti_faces()
+targets = faces.target
+
+# show sample images
+images = faces.images[targets < 2] # save images
+
+features = faces.data  # features
+targets = faces.target # targets
+
+fig = plt.figure() # create a new figure window
+for i in range(20): # display 20 images
+    # subplot : 4 rows and 5 columns
+    img_grid = fig.add_subplot(4, 5, i+1)
+    # plot features as image
+    img_grid.imshow(images[i], cmap='gray')
+
+plt.show()
+
+# Prepare data, 只限于处理类别 0 和 1 的人脸
+X, y = faces.data[targets < 2], faces.target[targets < 2]
+
+# Split into train/test
+X_train, X_test, y_train, y_test = \
+    train_test_split(X, y, test_size=0.5, stratify=y,
+                     random_state=random_state)
+
+
+# Reduce dimension to 2 with KernelDiscriminantAnalysis
+# can adjust the value of 'gamma' as needed.
+kda = make_pipeline(StandardScaler(),
+                    KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))
+
+# Use a nearest neighbor classifier to evaluate the methods
+knn = KNeighborsClassifier(n_neighbors=n_neighbors)
+
+
+plt.figure()
+# plt.subplot(1, 3, i + 1, aspect=1)
+
+# Fit the method's model
+kda.fit(X_train, y_train)
+
+# Fit a nearest neighbor classifier on the embedded training set
+knn.fit(kda.transform(X_train), y_train)
+
+# Compute the nearest neighbor accuracy on the embedded test set
+acc_knn = knn.score(kda.transform(X_test), y_test)
+
+# Embed the data set in 2 dimensions using the fitted model
+X_embedded = kda.transform(X)
+
+# Plot the projected points and show the evaluation score
+plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
+plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
+                                                              n_neighbors,
+                                                              acc_knn))
+plt.show()