homework2

ac04cd68 · 20200318029 · f400d5ad · ac04cd68 · ac04cd68 · ac04cd68
Commit ac04cd68 authored Apr 29, 2020 by 20200318029
Hide whitespace changes
Inline Side-by-side

Showing with 295 additions and 0 deletions

homework2/face.rec.klda.homework.py
+266 -0

homework2/olivetti_faces.py
+13 -0

homework2/readme.md
+16 -0

No files found.
--- a/homework2/face.rec.klda.homework.py
+++ b/homework2/face.rec.klda.homework.py
+"""
+==============================================================
+基于 Kernel LDA + KNN 的人脸识别
+使用 Kernel Discriminant Analysis 做特征降维
+使用 K-Nearest-Neighbor 做分类
+数据:
+    人脸图像来自于 Olivetti faces data-set from AT&T (classification)
+    数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
+    我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
+算法:
+    需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
+    代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
+==============================================================
+"""
+# License: BSD 3 clause
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+from sklearn.datasets import fetch_olivetti_faces
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+print(__doc__)
+################################################
+"""
+Scikit-learn-compatible Kernel Discriminant Analysis.
+"""
+import numpy as np
+from scipy import linalg
+from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
+class RBFKernel(object):
+    """
+    a gaussian kernel
+    k(x, y) = exp( - gamma * || x - y ||^2)
+    """
+    def __init__(self, gamma: float) -> None:
+        self._gamma = gamma
+    @property
+    def gamma(self) -> float:
+        return self._gamma
+    @gamma.setter
+    def gamma(self, gamma: float) -> None:
+        self._gamma = gamma
+    def _rbf(self, x: np.array, y: np.array) -> float:
+        return np.exp(- self._gamma * np.sum((x - y) ** 2))
+    def __call__(self, x: np.array, y: np.array) -> float:
+        return self._rbf(x, y)
+class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
+                                 TransformerMixin):
+    """Kernel Discriminant Analysis.
+    Parameters
+    ----------
+    n_components: integer.
+                  The dimension after transform.
+    gamma: float.
+           Parameter to RBF Kernel
+    lmb: float (>= 0.0), default=0.001.
+         Regularization parameter
+    """
+    def __init__(self, n_components, gamma, lmb=0.001):
+        self.n_components = n_components
+        self.gamma = gamma
+        self.lmb = lmb
+        self.X = None # 用于存放输入的训练数据的 X
+        self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
+        self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
+        self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
+        self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
+        self._rbf = RBFKernel(gamma)
+    def fit(self, X: np.array, y: np.array) -> None:
+        """Fit KDA model.
+        Parameters
+        ----------
+        X: numpy array of shape [n_samples, n_features]
+           Training set.
+        y: numpy array of shape [n_samples]
+           Target values. Only works for 2 classes with label/target 0 and 1.
+        Returns
+        -------
+        self
+        """
+        self.X = X
+        classes = list(set(y))
+        assert len(classes) == 2, "only works for 2 classes"
+        X_cls_1 = X[y == classes[0], :]
+        X_cls_2 = X[y == classes[1], :]
+        # M, M_1 & M_2
+        M_1 = self._M_cls(X, X_cls_1)
+        M_2 = self._M_cls(X, X_cls_2)
+        self.M = np.matmul((M_2 - M_1), np.transpose(M_2 - M_1))
+        # N, K_1 & K_2
+        num_samples = X.shape[0]
+        num_samples_cls_1 = X_cls_1.shape[0]
+        num_samples_cls_2 = X_cls_2.shape[0]
+        K_1 = self._K_cls(X, X_cls_1)
+        K_2 = self._K_cls(X, X_cls_2)
+        self.N = np.matmul(
+            np.matmul(
+                K_1,
+                (np.identity(num_samples_cls_1) - 1 / num_samples_cls_1 * np.ones(shape=(num_samples_cls_1, num_samples_cls_1)))
+            ),
+            np.transpose(K_1)
+        )
+        self.N += np.matmul(
+            np.matmul(
+                K_2,
+                (np.identity(num_samples_cls_2) - 1 / num_samples_cls_2 * np.ones(shape=(num_samples_cls_2, num_samples_cls_2)))
+            ),
+            np.transpose(K_2)
+        )
+        self.N += self.lmb + np.identity(num_samples)
+        self.K = [K_1, K_2]
+        _, vecs = linalg.eig(self.M, self.N)
+        self.EigenVectors = vecs[:, : self.n_components]
+    def transform(self, X_test: np.array) -> np.array:
+        """Transform data with the trained KernelLDA model.
+        Parameters
+        ----------
+        X_test: numpy array of shape [n_samples, n_features]
+                The input data.
+        Returns
+        -------
+        y_pred: array-like, shape (n_samples, n_components)
+                Transformations for X.
+        """
+        num_samples_test = X_test.shape[0]
+        num_samples = self.X.shape[0]
+        y_pred = np.zeros(shape=(num_samples_test, self.n_components))
+        for i in range(num_samples_test):
+            # kernel
+            x = X_test[i, :]
+            x_rbf = np.zeros(shape=(num_samples, ))
+            for k in range(num_samples):
+                x_rbf[k] = self._rbf(self.X[k, :], x)
+            y_pred[i, :] = np.matmul(x_rbf, self.EigenVectors)
+        return y_pred
+    def _M_cls(self, X: np.array, X_cls: np.array) -> np.array:
+        """
+        """
+        num_samples = X.shape[0]
+        num_samples_cls = X_cls.shape[0]
+        # M_cls
+        M_cls = np.zeros(shape=(num_samples, 1))
+        for j in range(num_samples):
+            for k in range(num_samples_cls):
+                M_cls[j] += self._rbf(X[j, :], X_cls[k, :])
+        M_cls /= num_samples_cls
+        return M_cls
+    def _K_cls(self, X: np.array, X_cls: np.array) -> np.array:
+        num_samples = X.shape[0]
+        num_samples_cls = X_cls.shape[0]
+        K_cls = np.zeros(shape=(num_samples, num_samples_cls))
+        for n in range(num_samples):
+            for m in range(num_samples_cls):
+                K_cls[n, m] = self._rbf(X[n, :], X_cls[m, :])
+        return K_cls
+################################################
+# 指定 KNN 中最近邻的个数 (k 的值)
+n_neighbors = 3
+# 设置随机数种子让实验可以复现
+random_state = 0
+# 现在人脸数据集
+faces = fetch_olivetti_faces(download_if_missing=False)
+targets = faces.target
+# show sample images
+images = faces.images[targets < 2] # save images
+features = faces.data  # features
+targets = faces.target # targets
+fig = plt.figure() # create a new figure window
+for i in range(20): # display 20 images
+    # subplot : 4 rows and 5 columns
+    img_grid = fig.add_subplot(4, 5, i+1)
+    # plot features as image
+    img_grid.imshow(images[i], cmap='gray')
+plt.show()
+# Prepare data, 只限于处理类别 0 和 1 的人脸
+X, y = faces.data[targets < 2], faces.target[targets < 2]
+# Split into train/test
+X_train, X_test, y_train, y_test = \
+    train_test_split(X, y, test_size=0.5, stratify=y,
+                     random_state=random_state)
+# Reduce dimension to 2 with KernelDiscriminantAnalysis
+# can adjust the value of 'gamma' as needed.
+kda = make_pipeline(StandardScaler(),
+                    KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))
+# Use a nearest neighbor classifier to evaluate the methods
+knn = KNeighborsClassifier(n_neighbors=n_neighbors)
+# Fit the method's model
+kda.fit(X_train, y_train)
+# Fit a nearest neighbor classifier on the embedded training set
+knn.fit(kda.transform(X_train), y_train)
+# Compute the nearest neighbor accuracy on the embedded test set
+acc_knn = knn.score(kda.transform(X_test), y_test)
+# Embed the data set in 2 dimensions using the fitted model
+X_embedded = kda.transform(X)
+plt.figure()
+# plt.subplot(1, 3, i + 1, aspect=1)
+# Plot the projected points and show the evaluation score
+plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
+plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
+                                                              n_neighbors,
+                                                              acc_knn))
+plt.show()
--- a/homework2/olivetti_faces.py
+++ b/homework2/olivetti_faces.py
+from scipy.io.matlab import loadmat
+from sklearn.datasets._base import _pkl_filepath
+import joblib
+import os
+data_home = os.environ.get('SCIKIT_LEARN_DATA', os.path.join('~', 'scikit_learn_data'))
+data_home = os.path.expanduser(data_home)
+mfile = loadmat(file_name=os.path.join(data_home, "olivettifaces.mat"))
+faces = mfile['faces'].T.copy()
+filepath = _pkl_filepath(data_home, 'olivetti.pkz')
+joblib.dump(faces, filepath, compress=6)
--- a/homework2/readme.md
+++ b/homework2/readme.md
+这个是咱们第一个项目，没有思路的可以找小徐班主任要Jerry老师的视频描述（建议大家先自己多理解看看，如果暂时没有思路可以看看之前的课程，也可以再找找资料）<br/><br/>
+要求如下：<br/><br/>
+作业截至时间：5月22日23：59，具体答案将会在本周末讲解并公布<br/><br/>
+----基于 Kernel LDA + KNN 的人脸识别<br/>
+----使用 Kernel Discriminant Analysis 做特征降维<br/>
+----使用 K-Nearest-Neighbor 做分类<br/><br/><br/>
+数据:<br/>
+----人脸图像来自于 Olivetti faces data-set from AT&T (classification)<br/>
+----数据集包含 40 个人的人脸图像, 每个人都有 10 张图像<br/>
+----我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像<br/><br/><br/>
+算法:<br/>
+----需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维<br/>
+----代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容<br/><br/><br/>
+结果：<br/>
+1.要求识别成功率：100%<br/>
+2.达到如图所示效果图