"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类

数据:
    人脸图像来自于 Olivetti faces data-set from AT&T (classification)
    数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
    我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像

算法:
    需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
    代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""

# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""

import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y

class RBFKernel(object):
    """
    a gaussian kernel

    k(x, y) = exp( - gamma * || x - y ||^2)
    """
    def __init__(self, gamma: float) -> None:
        self._gamma = gamma

    @property
    def gamma(self) -> float:
        return self._gamma
    @gamma.setter
    def gamma(self, gamma: float) -> None:
        self._gamma = gamma

    def _rbf(self, x: np.array, y: np.array) -> float:
        return np.exp(- self._gamma * np.sum((x - y) ** 2))

    def __call__(self, x: np.array, y: np.array) -> float:
        return self._rbf(x, y)


class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
                                 TransformerMixin):
    """Kernel Discriminant Analysis.

    Parameters
    ----------
    n_components: integer.
                  The dimension after transform.
    gamma: float.
           Parameter to RBF Kernel
    lmb: float (>= 0.0), default=0.001.
         Regularization parameter
    """

    def __init__(self, n_components, gamma, lmb=0.001):
        self.n_components = n_components
        self.gamma = gamma
        self.lmb = lmb
        self.X = None # 用于存放输入的训练数据的 X
        self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
        self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
        self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
        self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序

        self._rbf = RBFKernel(gamma)

    def fit(self, X: np.array, y: np.array) -> None:
        """Fit KDA model.

        Parameters
        ----------
        X: numpy array of shape [n_samples, n_features]
           Training set.
        y: numpy array of shape [n_samples]
           Target values. Only works for 2 classes with label/target 0 and 1.

        Returns
        -------
        self
        """
        self.X = X
        classes = list(set(y))
        assert len(classes) == 2, "only works for 2 classes"

        X_cls_1 = X[y == classes[0], :]
        X_cls_2 = X[y == classes[1], :]

        # M, M_1 & M_2
        M_1 = self._M_cls(X, X_cls_1)
        M_2 = self._M_cls(X, X_cls_2)
        self.M = np.matmul((M_2 - M_1), np.transpose(M_2 - M_1))

        # N, K_1 & K_2
        num_samples = X.shape[0]
        num_samples_cls_1 = X_cls_1.shape[0]
        num_samples_cls_2 = X_cls_2.shape[0]
        K_1 = self._K_cls(X, X_cls_1)
        K_2 = self._K_cls(X, X_cls_2)
        self.N = np.matmul(
            np.matmul(
                K_1,
                (np.identity(num_samples_cls_1) - 1 / num_samples_cls_1 * np.ones(shape=(num_samples_cls_1, num_samples_cls_1)))
            ),
            np.transpose(K_1)
        )
        self.N += np.matmul(
            np.matmul(
                K_2,
                (np.identity(num_samples_cls_2) - 1 / num_samples_cls_2 * np.ones(shape=(num_samples_cls_2, num_samples_cls_2)))
            ),
            np.transpose(K_2)
        )
        self.N += self.lmb + np.identity(num_samples)
        self.K = [K_1, K_2]

        _, vecs = linalg.eig(self.M, self.N)
        self.EigenVectors = vecs[:, : self.n_components]

    def transform(self, X_test: np.array) -> np.array:
        """Transform data with the trained KernelLDA model.

        Parameters
        ----------
        X_test: numpy array of shape [n_samples, n_features]
                The input data.

        Returns
        -------
        y_pred: array-like, shape (n_samples, n_components)
                Transformations for X.
        """
        num_samples_test = X_test.shape[0]
        num_samples = self.X.shape[0]

        y_pred = np.zeros(shape=(num_samples_test, self.n_components))

        for i in range(num_samples_test):
            # kernel
            x = X_test[i, :]
            x_rbf = np.zeros(shape=(num_samples, ))
            for k in range(num_samples):
                x_rbf[k] = self._rbf(self.X[k, :], x)

            y_pred[i, :] = np.matmul(x_rbf, self.EigenVectors)

        return y_pred

    def _M_cls(self, X: np.array, X_cls: np.array) -> np.array:
        """
        """
        num_samples = X.shape[0]
        num_samples_cls = X_cls.shape[0]

        # M_cls
        M_cls = np.zeros(shape=(num_samples, 1))
        for j in range(num_samples):

            for k in range(num_samples_cls):
                M_cls[j] += self._rbf(X[j, :], X_cls[k, :])
        M_cls /= num_samples_cls

        return M_cls

    def _K_cls(self, X: np.array, X_cls: np.array) -> np.array:

        num_samples = X.shape[0]
        num_samples_cls = X_cls.shape[0]

        K_cls = np.zeros(shape=(num_samples, num_samples_cls))

        for n in range(num_samples):
            for m in range(num_samples_cls):
                K_cls[n, m] = self._rbf(X[n, :], X_cls[m, :])

        return K_cls


################################################

# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 3

# 设置随机数种子让实验可以复现
random_state = 0

# 现在人脸数据集
faces = fetch_olivetti_faces(download_if_missing=False)
targets = faces.target

# show sample images
images = faces.images[targets < 2] # save images

features = faces.data  # features
targets = faces.target # targets

fig = plt.figure() # create a new figure window
for i in range(20): # display 20 images
    # subplot : 4 rows and 5 columns
    img_grid = fig.add_subplot(4, 5, i+1)
    # plot features as image
    img_grid.imshow(images[i], cmap='gray')

plt.show()

# Prepare data, 只限于处理类别 0 和 1 的人脸
X, y = faces.data[targets < 2], faces.target[targets < 2]

# Split into train/test
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.5, stratify=y,
                     random_state=random_state)


# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda = make_pipeline(StandardScaler(),
                    KernelDiscriminantAnalysis(n_components=2, gamma = 0.000005))

# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)



# Fit the method's model
kda.fit(X_train, y_train)

# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)

# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)

# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)

plt.figure()
# plt.subplot(1, 3, i + 1, aspect=1)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
                                                              n_neighbors,
                                                              acc_knn))
plt.show()