Commit 8021fe86 by 20200318111

kernel_lda是二分类,kernel_lda_three是多分类

parents
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding" addBOMForNewFiles="with NO BOM" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="renderExternalDocumentation" value="true" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="pytest" />
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/kernelLDA_fac_reg.iml" filepath="$PROJECT_DIR$/.idea/kernelLDA_fac_reg.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW" value="true" />
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="e652180e-c826-4109-9996-1e3e2b401bde" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="CoverageDataManager">
<SUITE FILE_PATH="coverage/kernelLDA_fac_reg$kernel_LDA_Three.coverage" NAME="kernel_LDA_Three Coverage Results" MODIFIED="1589811467943" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/kernelLDA_fac_reg$kernel_LDA.coverage" NAME="kernel_LDA Coverage Results" MODIFIED="1589811059757" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
<SUITE FILE_PATH="coverage/kernelLDA_fac_reg$codecs.coverage" NAME="codecs Coverage Results" MODIFIED="1588047377092" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="D:/AI/anaconda/Lib" />
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/kernel_LDA_Three.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="391">
<caret line="201" column="40" lean-forward="true" selection-start-line="201" selection-start-column="40" selection-end-line="201" selection-end-column="40" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/pipeline.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="7843">
<caret line="354" selection-start-line="354" selection-end-line="354" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/kernel_LDA.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-5960">
<caret line="37" selection-start-line="37" selection-end-line="37" />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/kernel_LDA.py" />
<option value="$PROJECT_DIR$/kernel_LDA_Three.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="191" />
<option name="y" value="100" />
<option name="width" value="1375" />
<option name="height" value="845" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="kernelLDA_fac_reg" type="b2602c69:ProjectViewProjectNode" />
<item name="kernelLDA_fac_reg" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="kernelLDA_fac_reg" type="b2602c69:ProjectViewProjectNode" />
<item name="External Libraries" type="cb654da1:ExternalLibrariesNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
<property name="nodejs_npm_path_reset_for_default_project" value="true" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.kernel_LDA_Three">
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="kernelLDA_fac_reg" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="true" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="codecs" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="kernelLDA_fac_reg" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="D:/AI/anaconda/Lib" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="D:/AI/anaconda/Lib/codecs.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="true" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="kernel_LDA" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="kernelLDA_fac_reg" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
<env name="PATH" value="D:\AI\anaconda\Library\bin" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/kernel_LDA.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="kernel_LDA_Three" type="PythonConfigurationType" factoryName="Python" temporary="true">
<module name="kernelLDA_fac_reg" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
<env name="PATH" value="D:\AI\anaconda\Library\bin" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/kernel_LDA_Three.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<list>
<item itemvalue="Python.kernel_LDA" />
<item itemvalue="Python.codecs" />
<item itemvalue="Python.kernel_LDA_Three" />
</list>
<recent_temporary>
<list>
<item itemvalue="Python.kernel_LDA_Three" />
<item itemvalue="Python.kernel_LDA" />
<item itemvalue="Python.codecs" />
</list>
</recent_temporary>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="e652180e-c826-4109-9996-1e3e2b401bde" name="Default Changelist" comment="" />
<created>1587567198801</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1587567198801</updated>
<workItem from="1587567199980" duration="778000" />
<workItem from="1587887401733" duration="3412000" />
<workItem from="1588028952020" duration="14259000" />
<workItem from="1588161185261" duration="1289000" />
<workItem from="1588205286079" duration="22439000" />
<workItem from="1588253383835" duration="5398000" />
<workItem from="1588261971641" duration="2716000" />
<workItem from="1588304858497" duration="3103000" />
<workItem from="1588577040978" duration="41000" />
<workItem from="1588672749621" duration="1063000" />
<workItem from="1588983713268" duration="18000" />
<workItem from="1589383271826" duration="893000" />
<workItem from="1589695054173" duration="684000" />
<workItem from="1589811028540" duration="456000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="56549000" />
</component>
<component name="TodoView">
<todo-panel id="selected-file">
<is-autoscroll-to-source value="true" />
</todo-panel>
<todo-panel id="all">
<are-packages-shown value="true" />
<is-autoscroll-to-source value="true" />
</todo-panel>
</component>
<component name="ToolWindowManager">
<frame x="-7" y="-7" width="1550" height="878" extended-state="6" />
<layout>
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.07506702" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.16733602" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.40026775" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" weight="0.32931727" />
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="Version Control" order="8" />
<window_info anchor="bottom" id="Database Changes" order="9" />
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
<window_info anchor="bottom" id="Terminal" order="11" weight="0.32931727" />
<window_info anchor="bottom" id="Python Console" order="12" weight="0.3815261" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
<window_info active="true" anchor="right" id="SciView" order="3" sideWeight="0.91700137" visible="true" weight="0.30026808" />
<window_info anchor="right" id="Database" order="4" />
<window_info anchor="right" x="0" y="0" width="337" height="678" id="Documentation" order="5" sideWeight="0.08299866" side_tool="true" visible="true" weight="0.30026808" />
</layout>
<layout-to-restore>
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.13337801" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Favorites" order="2" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" weight="0.33467203" />
<window_info active="true" anchor="bottom" id="Debug" order="3" visible="true" weight="0.34672022" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" weight="0.32931727" />
<window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="Version Control" order="8" />
<window_info anchor="bottom" id="Database Changes" order="9" />
<window_info anchor="bottom" id="Event Log" order="10" side_tool="true" />
<window_info anchor="bottom" id="Terminal" order="11" weight="0.32931727" />
<window_info anchor="bottom" id="Python Console" order="12" weight="0.33065596" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
<window_info anchor="right" id="SciView" order="3" sideWeight="0.49795082" visible="true" weight="0.1816354" />
<window_info anchor="right" id="Database" order="4" />
<window_info anchor="right" x="0" y="0" width="337" height="678" id="Documentation" order="5" sideWeight="0.5020492" side_tool="true" visible="true" weight="0.1816354" />
</layout-to-restore>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<default-breakpoints>
<breakpoint type="python-exception">
<properties notifyOnTerminate="true" exception="BaseException">
<option name="notifyOnTerminate" value="true" />
</properties>
</breakpoint>
</default-breakpoints>
</breakpoint-manager>
</component>
<component name="editorHistoryManager">
<entry file="file://D:/AI/anaconda/Lib/site-packages/numpy/core/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="437">
<caret line="56" selection-start-line="56" selection-end-line="56" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/utils/validation.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-3863">
<caret line="600" column="55" lean-forward="true" selection-start-line="600" selection-start-column="55" selection-end-line="600" selection-end-column="55" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/matplotlib/pyplot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="889">
<caret line="506" column="22" lean-forward="true" selection-start-line="506" selection-start-column="22" selection-end-line="506" selection-end-column="22" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/preprocessing/_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="255">
<caret line="635" selection-start-line="635" selection-end-line="635" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/base.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="169">
<caret line="189" selection-start-line="189" selection-end-line="189" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/utils/metaestimators.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="146">
<caret line="65" selection-start-line="65" selection-end-line="65" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/codecs.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="287">
<caret line="314" column="67" lean-forward="true" selection-start-line="314" selection-start-column="67" selection-end-line="314" selection-end-column="67" />
</state>
</provider>
</entry>
<entry file="file://D:/AI/anaconda/Lib/site-packages/sklearn/pipeline.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="7843">
<caret line="354" selection-start-line="354" selection-end-line="354" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/kernel_LDA.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-5960">
<caret line="37" selection-start-line="37" selection-end-line="37" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/kernel_LDA_Three.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="391">
<caret line="201" column="40" lean-forward="true" selection-start-line="201" selection-start-column="40" selection-end-line="201" selection-end-column="40" />
</state>
</provider>
</entry>
</component>
</project>
\ No newline at end of file
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def rbfkernel(self, gamma, distance):
return np.exp(gamma * distance)
def kernel(self, X, X_c, gamma):
return np.exp(-gamma * np.sum((X - X_c) ** 2))
def fit(self, X, y):
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1
Returns
-------
self
"""
self.X = X
"""
distance_matrix = self.find_distance_matrix(X)
K = self.rbfkernel(self.gamma, distance_matrix)
self.K = K
index1 = []
index2 = []
for i in range(len(y)):
if y[i] == 0:
index1.append(i)
else:
index2.append(i)
K1 = []
K2 = []
# 遍历K矩阵的每一行
for i in K:
temp1 = []
temp2 = []
# 分别生成两类k_one的每一行
for j in index1:
temp1.append(i[j])
for j in index2:
temp2.append(i[j])
# 把每一行都append进来
K1.append(np.array(temp1))
K2.append(np.array(temp2))
K1 = np.array(K1)
K2 = np.array(K2)
l1 = len(index1)
l2 = len(index2)
A1 = np.identity(l1) - ((1 / float(l1)) * np.ones((l1, l1)))
A2 = np.identity(l2) - ((1 / float(l2)) * np.ones((l2, l2)))
N1 = np.dot(A1, K1.T)
N1 = np.dot(K1, N1)
N2 = np.dot(A2, K2.T)
N2 = np.dot(K2, N2)
self.N = N1 + N2 + self.lmb * np.identity(len(N1))
M1 = []
M2 = []
for i in range(len(K1)):
M1.append(np.sum(K1[i]) / float(l1))
for i in range(len(K2)):
M2.append(np.sum(K2[i]) / float(l2))
M1 = np.array(M1)
M2 = np.array(M2)
M_diff = M1 - M2
self.M = np.outer(M_diff.transpose(), M_diff)
_, vecs = linalg.eig(self.M, self.N)
#__, vecs = linalg.eig(np.linalg.inv(self.N).dot(self.M))
self.EigenVectors = vecs
def find_distance_matrix(self, data):
euclid_distance = []
for i in data:
distance = []
for j in data:
distance.append(np.linalg.norm(i - j) * np.linalg.norm(i - j))
distance = np.array(distance)
euclid_distance.append(distance)
euclid_distance = np.array(euclid_distance)
return euclid_distance
"""
#计算M矩阵
def kernel_M(X, c=0):
K_m = []
c_len = len([i for i in y if i == c])
for row in X:
K_one = 0.0
for c_row in X[y == c]:
K_one += self.kernel(row, c_row, self.gamma)
K_m.append(K_one / c_len)
return np.array(K_m)
K = [np.zeros((X.shape[0], 1))] * 2
for i in np.unique(y):
K[i] = kernel_M(X, i)
K_m = (K[0] - K[1])[:, np.newaxis].dot((K[0] - K[1])[np.newaxis, :])
self.M = K_m
# 计算N矩阵
def kernel_N(X, c):
c_len = len([i for i in y if i == c])
I = np.eye(c_len)
I_n = np.eye(N)
I_c = np.ones((c_len, c_len)) / c_len
K_one = np.zeros((N, c_len))
for i in range(N):
K_one[i, :] = np.array([self.kernel(X[i], c_row, self.gamma) for c_row in X[y == c]])
K_n = K_one.dot(I - I_c).dot(K_one.T) + I_n * self.lmb ##+ I_n*0.001
return K_n
N = X.shape[0]
K_n = np.zeros((N, N))
for i in np.unique(y):
K_n += kernel_N(X, i)
self.N = K_n
# 计算特征值特征向量
eigvals_, eigvecs_ = linalg.eig(self.M, self.N)
eigvals, eigvecs = np.linalg.eig(np.linalg.inv(K_n).dot(K_m))
eigen_pairs = [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))]
eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True)
alphas1 = eigen_pairs[0][1][:, np.newaxis]
alphas2 = eigen_pairs[1][1][:, np.newaxis]
EigenVectors = np.column_stack((alphas1, alphas2))
self.EigenVectors = EigenVectors
def transform(self, X_test):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
"""
test_kernel_matrix = []
for test_item in X_test:
dist = np.array([np.sum((test_item - row) ** 2) for row in self.X])
k = np.exp(-self.gamma * dist)
test_kernel_matrix.append(k)
y_pred = np.zeros((len(X_test), self.n_components))
for i in range(0, self.n_components):
alpha_i = self.EigenVectors[:, i]
for j in range(len(test_kernel_matrix)):
test_kernel = test_kernel_matrix[j]
temp = 0
for k in range(len(test_kernel)):
temp += alpha_i[k] * test_kernel[k]
y_pred[j, i] = temp
return y_pred
"""
def project_x(X_new, X, ii):
N = X_new.shape[0]
X_proj = np.zeros((N, 1))
for i in range(len(X_new)):
k = np.exp(-self.gamma * np.array([np.sum((X_new[i] - row) ** 2) for row in X]))
X_proj[i, 0] = np.real(k[np.newaxis, :].dot(self.EigenVectors[:, ii])) ##不能带虚部
return X_proj
# 计算投影之后的点
X_new = np.zeros((X_test.shape[0], 2))
for i in range(self.n_components):
X_new[:, i][:, np.newaxis] = project_x(X_test, self.X, i) # alphas_one,最佳参数gamma=14.52
return X_new
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 3
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces()
targets = faces.target
# show sample images
images = faces.images[targets < 2] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(20): # display 20 images
# subplot : 4 rows and 5 columns
img_grid = fig.add_subplot(4, 5, i+1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X, y = faces.data[targets < 2], faces.target[targets < 2]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.5, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
# why do we standarScaler data?
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=2, gamma=0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
plt.figure()
# plt.subplot(1, 3, i + 1, aspect=1)
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
print(__doc__)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, ClassifierMixin, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
class KernelDiscriminantAnalysis(BaseEstimator, ClassifierMixin,
TransformerMixin):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def __init__(self, n_components, gamma, lmb=0.001):
self.n_components = n_components
self.gamma = gamma
self.lmb = lmb
self.X = None # 用于存放输入的训练数据的 X
self.K = None # 用于存放训练数据 X 产生的 Kernel Matrix
self.M = None # 用于存放 Kernel LDA 最优化公式中的 M
self.N = None # 用于存放 Kernel LDA 最优化公式中的 N
self.EigenVectors = None # 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def rbfkernel(self, gamma, distance):
return np.exp(gamma * distance)
def kernel(self, X, X_c, gamma):
return np.exp(-gamma * np.sum((X - X_c) ** 2))
def fit(self, X, y):
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1
Returns
-------
self
"""
self.X = X
# 计算M矩阵
def kernel_M(X, c=0):
K_m = []
c_len = len([i for i in y if i == c])
for row in X:
K_one = 0.0
for c_row in X[y == c]:
K_one += self.kernel(row, c_row, self.gamma)
K_m.append(K_one / c_len)
return np.array(K_m)
K_mean = []
for row in X:
K_one = 0.0
for c_row in X:
K_one += self.kernel(row, c_row, self.gamma)
K_mean.append(K_one / X.shape[0])
K_mean = np.array(K_mean)
class_c = np.unique(y)
#计算每一类的个数
c = [0] * class_c
for j in class_c:
c[j] = len([i for i in y if i == j])
class_c = np.unique(y)
K = [np.zeros((X.shape[0], 1))] * len(class_c)
#K = [np.zeros(X.shape[0], 1)] * len(class_c)
for i in class_c:
K[i] = kernel_M(X, i)
K_m = np.zeros((X.shape[0], X.shape[0]))
for i in class_c:
K_m += c[i] * (K[i] - K_mean)[:, np.newaxis].dot((K[i] - K_mean)[np.newaxis, :])
self.M = K_m
# 计算N矩阵
def kernel_N(X, c):
c_len = len([i for i in y if i == c])
I = np.eye(c_len)
I_n = np.eye(N)
I_c = np.ones((c_len, c_len)) / c_len
K_one = np.zeros((N, c_len))
for i in range(N):
K_one[i, :] = np.array([self.kernel(X[i], c_row, self.gamma) for c_row in X[y == c]])
K_n = K_one.dot(I - I_c).dot(K_one.T) + I_n * self.lmb ##+ I_n*0.001
return K_n
N = X.shape[0]
K_n = np.zeros((N, N))
for i in np.unique(y):
K_n += kernel_N(X, i)
self.N = K_n
# 计算特征值特征向量
eigvals_, eigvecs_ = linalg.eig(self.M, self.N)
eigvals, eigvecs = np.linalg.eig(np.linalg.inv(K_n).dot(K_m))
eigen_pairs = [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))]
eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True)
# 取n_components个主方向
alpha = [np.zeros((X.shape[0], 1))] * self.n_components
for i in range(self.n_components):
alpha[i] = eigen_pairs[i][1][:, np.newaxis]
self.EigenVectors = alpha
def transform(self, X_test):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
def project_x(X_new, X, ii):
N = X_new.shape[0]
X_proj = np.zeros((N, 1))
for i in range(len(X_new)):
k = np.exp(-self.gamma * np.array([np.sum((X_new[i] - row) ** 2) for row in X]))
X_proj[i, 0] = np.real(k[np.newaxis, :].dot(self.EigenVectors[ii])) ##不能带虚部
return X_proj
# 计算投影之后的点
X_new = np.zeros((X_test.shape[0], self.n_components))
for i in range(self.n_components):
X_new[:, i][:, np.newaxis] = project_x(X_test, self.X, i) # alphas_one,最佳参数gamma=14.52
return X_new
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors = 1
# 设置随机数种子让实验可以复现
random_state = 0
# 现在人脸数据集
faces = fetch_olivetti_faces()
targets = faces.target
# show sample images
images = faces.images[targets < 5] # save images
features = faces.data # features
targets = faces.target # targets
fig = plt.figure() # create a new figure window
for i in range(50): # display 30 images
# subplot : 5 rows and 6 columns
img_grid = fig.add_subplot(10, 5, i + 1)
# plot features as image
img_grid.imshow(images[i], cmap='gray')
plt.show()
# 多分类,分为五类
X, y = faces.data[targets < 5], faces.target[targets < 5]
# Split into train/test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.5, stratify=y,
random_state=random_state)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
# why do we standarScaler data?
kda = make_pipeline(StandardScaler(),
KernelDiscriminantAnalysis(n_components=4, gamma=0.000005))
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
plt.figure()
# Fit the method's model
kda.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(kda.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(kda.transform(X_test), y_test)
print(acc_knn)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = kda.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap='Set1')
plt.title("{}, KNN (k={})\nTest accuracy = {:.2f}".format('kda',
n_neighbors,
acc_knn))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment