Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
H
Homework_Submit
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200318019
Homework_Submit
Commits
a58d26aa
Commit
a58d26aa
authored
May 22, 2020
by
20200318019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First project
parent
6b4d772a
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
223 additions
and
0 deletions
+223
-0
README.md
+1
-0
face.rec.klda.homework.py
+222
-0
No files found.
README.md
View file @
a58d26aa
#Homework is submit here
face.rec.klda.homework.py
0 → 100644
View file @
a58d26aa
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
# License: BSD 3 clause
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn
import
datasets
from
sklearn.model_selection
import
train_test_split
from
sklearn.datasets
import
fetch_olivetti_faces
from
sklearn.neighbors
import
KNeighborsClassifier
from
sklearn.pipeline
import
make_pipeline
from
sklearn.preprocessing
import
StandardScaler
print
(
__doc__
)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import
numpy
as
np
from
scipy
import
linalg
from
sklearn.base
import
BaseEstimator
,
ClassifierMixin
,
TransformerMixin
from
sklearn.preprocessing
import
OneHotEncoder
from
sklearn.utils.validation
import
check_array
,
check_is_fitted
,
check_X_y
from
scipy.spatial.distance
import
pdist
,
squareform
class
KernelDiscriminantAnalysis
(
BaseEstimator
,
ClassifierMixin
,
TransformerMixin
):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def
__init__
(
self
,
n_components
,
gamma
,
lmb
=
0.001
):
self
.
n_components
=
n_components
self
.
gamma
=
gamma
self
.
lmb
=
lmb
self
.
X
=
None
# 用于存放输入的训练数据的 X
self
.
K
=
None
# 用于存放训练数据 X 产生的 Kernel Matrix
self
.
M
=
None
# 用于存放 Kernel LDA 最优化公式中的 M
self
.
N
=
None
# 用于存放 Kernel LDA 最优化公式中的 N
self
.
EigenVectors
=
None
# 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
def
fit
(
self
,
X
,
y
):
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1.
Returns
-------
self
"""
self
.
X
=
X
# calculate the euclidean matrix
distance_matrix
=
self
.
find_distance_matrix
(
X
)
self
.
K
=
np
.
exp
(
-
self
.
gamma
*
distance_matrix
)
# calculate indexes of data points of two class
K1
=
self
.
K
[:,
y
==
0
]
K2
=
self
.
K
[:,
y
==
1
]
# calculate A = I - 1_{l_j} for calc of N
l1
,
l2
=
K1
.
shape
[
1
],
K2
.
shape
[
1
]
A1
=
np
.
identity
(
l1
)
-
(
1
/
float
(
l1
))
*
np
.
ones
((
l1
,
l1
))
A2
=
np
.
identity
(
l2
)
-
(
1
/
float
(
l2
))
*
np
.
ones
((
l2
,
l2
))
# calculate within calss scatter matrix N
N1
=
np
.
dot
(
A1
,
K1
.
T
)
N1
=
np
.
dot
(
K1
,
N1
)
N2
=
np
.
dot
(
A2
,
K2
.
T
)
N2
=
np
.
dot
(
K2
,
N2
)
self
.
N
=
N1
+
N2
+
self
.
lmb
*
np
.
identity
(
len
(
N1
))
M1
=
np
.
sum
(
K1
,
axis
=
1
)
/
float
(
l1
)
M2
=
np
.
sum
(
K2
,
axis
=
1
)
/
float
(
l2
)
# calculate alpha
M_diff
=
M1
-
M2
self
.
M
=
np
.
outer
(
M_diff
.
transpose
(),
M_diff
)
_
,
vecs
=
linalg
.
eig
(
self
.
M
,
self
.
N
)
self
.
EigenVectors
=
vecs
return
self
def
find_distance_matrix
(
self
,
data
):
sq_dists
=
pdist
(
data
.
reshape
(
10
,
64
*
64
),
'sqeuclidean'
)
mat_sq_dists
=
squareform
(
sq_dists
)
return
mat_sq_dists
def
transform
(
self
,
X_test
):
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
test_kernel_matrix
=
[]
for
test_item
in
X_test
:
dist
=
np
.
array
([
np
.
sum
((
test_item
-
row
)
**
2
)
for
row
in
self
.
X
])
k
=
np
.
exp
(
-
self
.
gamma
*
dist
)
test_kernel_matrix
.
append
(
k
)
# print(len(test_kernel_matrix))
# projecting data
y_pred
=
np
.
zeros
((
len
(
X_test
),
self
.
n_components
))
for
i
in
range
(
0
,
self
.
n_components
):
alpha_i
=
self
.
EigenVectors
[:,
i
]
for
j
in
range
(
len
(
test_kernel_matrix
)):
test_kernel
=
test_kernel_matrix
[
j
]
y_pred
[
j
,
i
]
=
np
.
dot
(
alpha_i
.
transpose
(),
test_kernel
)
return
y_pred
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors
=
3
# 设置随机数种子让实验可以复现
random_state
=
0
# 现在人脸数据集
faces
=
fetch_olivetti_faces
()
targets
=
faces
.
target
# show sample images
images
=
faces
.
images
[
targets
<
2
]
# save images
features
=
faces
.
data
# features
targets
=
faces
.
target
# targets
fig
=
plt
.
figure
()
# create a new figure window
for
i
in
range
(
20
):
# display 20 images
# subplot : 4 rows and 5 columns
img_grid
=
fig
.
add_subplot
(
4
,
5
,
i
+
1
)
# plot features as image
img_grid
.
imshow
(
images
[
i
],
cmap
=
'gray'
)
plt
.
show
()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X
,
y
=
faces
.
data
[
targets
<
2
],
faces
.
target
[
targets
<
2
]
# Split into train/test
X_train
,
X_test
,
y_train
,
y_test
=
\
train_test_split
(
X
,
y
,
test_size
=
0.5
,
stratify
=
y
,
random_state
=
random_state
)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda
=
make_pipeline
(
StandardScaler
(),
KernelDiscriminantAnalysis
(
n_components
=
2
,
gamma
=
0.000005
))
# Use a nearest neighbor classifier to evaluate the methods
knn
=
KNeighborsClassifier
(
n_neighbors
=
n_neighbors
)
plt
.
figure
()
# plt.subplot(1, 3, i + 1, aspect=1)
# Fit the method's model
kda
.
fit
(
X_train
,
y_train
)
# Fit a nearest neighbor classifier on the embedded training set
knn
.
fit
(
kda
.
transform
(
X_train
),
y_train
)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn
=
knn
.
score
(
kda
.
transform
(
X_test
),
y_test
)
# Embed the data set in 2 dimensions using the fitted model
X_embedded
=
kda
.
transform
(
X
)
# Plot the projected points and show the evaluation score
plt
.
scatter
(
X_embedded
[:,
0
],
X_embedded
[:,
1
],
c
=
y
,
s
=
30
,
cmap
=
'Set1'
)
plt
.
title
(
"{}, KNN (k={})
\n
Test accuracy = {:.2f}"
.
format
(
'kda'
,
n_neighbors
,
acc_knn
))
plt
.
show
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment