Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
ml2_MiniAssignments
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200318029
ml2_MiniAssignments
Commits
ac04cd68
Commit
ac04cd68
authored
Apr 29, 2020
by
20200318029
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
homework2
parent
f400d5ad
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
295 additions
and
0 deletions
+295
-0
homework2/face.rec.klda.homework.py
+266
-0
homework2/olivetti_faces.py
+13
-0
homework2/readme.md
+16
-0
No files found.
homework2/face.rec.klda.homework.py
0 → 100644
View file @
ac04cd68
"""
==============================================================
基于 Kernel LDA + KNN 的人脸识别
使用 Kernel Discriminant Analysis 做特征降维
使用 K-Nearest-Neighbor 做分类
数据:
人脸图像来自于 Olivetti faces data-set from AT&T (classification)
数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
算法:
需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
==============================================================
"""
# License: BSD 3 clause
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
sklearn
import
datasets
from
sklearn.model_selection
import
train_test_split
from
sklearn.datasets
import
fetch_olivetti_faces
from
sklearn.neighbors
import
KNeighborsClassifier
from
sklearn.pipeline
import
make_pipeline
from
sklearn.preprocessing
import
StandardScaler
print
(
__doc__
)
################################################
"""
Scikit-learn-compatible Kernel Discriminant Analysis.
"""
import
numpy
as
np
from
scipy
import
linalg
from
sklearn.base
import
BaseEstimator
,
ClassifierMixin
,
TransformerMixin
from
sklearn.preprocessing
import
OneHotEncoder
from
sklearn.utils.validation
import
check_array
,
check_is_fitted
,
check_X_y
class
RBFKernel
(
object
):
"""
a gaussian kernel
k(x, y) = exp( - gamma * || x - y ||^2)
"""
def
__init__
(
self
,
gamma
:
float
)
->
None
:
self
.
_gamma
=
gamma
@property
def
gamma
(
self
)
->
float
:
return
self
.
_gamma
@gamma.setter
def
gamma
(
self
,
gamma
:
float
)
->
None
:
self
.
_gamma
=
gamma
def
_rbf
(
self
,
x
:
np
.
array
,
y
:
np
.
array
)
->
float
:
return
np
.
exp
(
-
self
.
_gamma
*
np
.
sum
((
x
-
y
)
**
2
))
def
__call__
(
self
,
x
:
np
.
array
,
y
:
np
.
array
)
->
float
:
return
self
.
_rbf
(
x
,
y
)
class
KernelDiscriminantAnalysis
(
BaseEstimator
,
ClassifierMixin
,
TransformerMixin
):
"""Kernel Discriminant Analysis.
Parameters
----------
n_components: integer.
The dimension after transform.
gamma: float.
Parameter to RBF Kernel
lmb: float (>= 0.0), default=0.001.
Regularization parameter
"""
def
__init__
(
self
,
n_components
,
gamma
,
lmb
=
0.001
):
self
.
n_components
=
n_components
self
.
gamma
=
gamma
self
.
lmb
=
lmb
self
.
X
=
None
# 用于存放输入的训练数据的 X
self
.
K
=
None
# 用于存放训练数据 X 产生的 Kernel Matrix
self
.
M
=
None
# 用于存放 Kernel LDA 最优化公式中的 M
self
.
N
=
None
# 用于存放 Kernel LDA 最优化公式中的 N
self
.
EigenVectors
=
None
# 用于存放 Kernel LDA 最优化公式中的 M 对应的广义特征向量, 每一列为一个特征向量, 按照对应特征值大小排序
self
.
_rbf
=
RBFKernel
(
gamma
)
def
fit
(
self
,
X
:
np
.
array
,
y
:
np
.
array
)
->
None
:
"""Fit KDA model.
Parameters
----------
X: numpy array of shape [n_samples, n_features]
Training set.
y: numpy array of shape [n_samples]
Target values. Only works for 2 classes with label/target 0 and 1.
Returns
-------
self
"""
self
.
X
=
X
classes
=
list
(
set
(
y
))
assert
len
(
classes
)
==
2
,
"only works for 2 classes"
X_cls_1
=
X
[
y
==
classes
[
0
],
:]
X_cls_2
=
X
[
y
==
classes
[
1
],
:]
# M, M_1 & M_2
M_1
=
self
.
_M_cls
(
X
,
X_cls_1
)
M_2
=
self
.
_M_cls
(
X
,
X_cls_2
)
self
.
M
=
np
.
matmul
((
M_2
-
M_1
),
np
.
transpose
(
M_2
-
M_1
))
# N, K_1 & K_2
num_samples
=
X
.
shape
[
0
]
num_samples_cls_1
=
X_cls_1
.
shape
[
0
]
num_samples_cls_2
=
X_cls_2
.
shape
[
0
]
K_1
=
self
.
_K_cls
(
X
,
X_cls_1
)
K_2
=
self
.
_K_cls
(
X
,
X_cls_2
)
self
.
N
=
np
.
matmul
(
np
.
matmul
(
K_1
,
(
np
.
identity
(
num_samples_cls_1
)
-
1
/
num_samples_cls_1
*
np
.
ones
(
shape
=
(
num_samples_cls_1
,
num_samples_cls_1
)))
),
np
.
transpose
(
K_1
)
)
self
.
N
+=
np
.
matmul
(
np
.
matmul
(
K_2
,
(
np
.
identity
(
num_samples_cls_2
)
-
1
/
num_samples_cls_2
*
np
.
ones
(
shape
=
(
num_samples_cls_2
,
num_samples_cls_2
)))
),
np
.
transpose
(
K_2
)
)
self
.
N
+=
self
.
lmb
+
np
.
identity
(
num_samples
)
self
.
K
=
[
K_1
,
K_2
]
_
,
vecs
=
linalg
.
eig
(
self
.
M
,
self
.
N
)
self
.
EigenVectors
=
vecs
[:,
:
self
.
n_components
]
def
transform
(
self
,
X_test
:
np
.
array
)
->
np
.
array
:
"""Transform data with the trained KernelLDA model.
Parameters
----------
X_test: numpy array of shape [n_samples, n_features]
The input data.
Returns
-------
y_pred: array-like, shape (n_samples, n_components)
Transformations for X.
"""
num_samples_test
=
X_test
.
shape
[
0
]
num_samples
=
self
.
X
.
shape
[
0
]
y_pred
=
np
.
zeros
(
shape
=
(
num_samples_test
,
self
.
n_components
))
for
i
in
range
(
num_samples_test
):
# kernel
x
=
X_test
[
i
,
:]
x_rbf
=
np
.
zeros
(
shape
=
(
num_samples
,
))
for
k
in
range
(
num_samples
):
x_rbf
[
k
]
=
self
.
_rbf
(
self
.
X
[
k
,
:],
x
)
y_pred
[
i
,
:]
=
np
.
matmul
(
x_rbf
,
self
.
EigenVectors
)
return
y_pred
def
_M_cls
(
self
,
X
:
np
.
array
,
X_cls
:
np
.
array
)
->
np
.
array
:
"""
"""
num_samples
=
X
.
shape
[
0
]
num_samples_cls
=
X_cls
.
shape
[
0
]
# M_cls
M_cls
=
np
.
zeros
(
shape
=
(
num_samples
,
1
))
for
j
in
range
(
num_samples
):
for
k
in
range
(
num_samples_cls
):
M_cls
[
j
]
+=
self
.
_rbf
(
X
[
j
,
:],
X_cls
[
k
,
:])
M_cls
/=
num_samples_cls
return
M_cls
def
_K_cls
(
self
,
X
:
np
.
array
,
X_cls
:
np
.
array
)
->
np
.
array
:
num_samples
=
X
.
shape
[
0
]
num_samples_cls
=
X_cls
.
shape
[
0
]
K_cls
=
np
.
zeros
(
shape
=
(
num_samples
,
num_samples_cls
))
for
n
in
range
(
num_samples
):
for
m
in
range
(
num_samples_cls
):
K_cls
[
n
,
m
]
=
self
.
_rbf
(
X
[
n
,
:],
X_cls
[
m
,
:])
return
K_cls
################################################
# 指定 KNN 中最近邻的个数 (k 的值)
n_neighbors
=
3
# 设置随机数种子让实验可以复现
random_state
=
0
# 现在人脸数据集
faces
=
fetch_olivetti_faces
(
download_if_missing
=
False
)
targets
=
faces
.
target
# show sample images
images
=
faces
.
images
[
targets
<
2
]
# save images
features
=
faces
.
data
# features
targets
=
faces
.
target
# targets
fig
=
plt
.
figure
()
# create a new figure window
for
i
in
range
(
20
):
# display 20 images
# subplot : 4 rows and 5 columns
img_grid
=
fig
.
add_subplot
(
4
,
5
,
i
+
1
)
# plot features as image
img_grid
.
imshow
(
images
[
i
],
cmap
=
'gray'
)
plt
.
show
()
# Prepare data, 只限于处理类别 0 和 1 的人脸
X
,
y
=
faces
.
data
[
targets
<
2
],
faces
.
target
[
targets
<
2
]
# Split into train/test
X_train
,
X_test
,
y_train
,
y_test
=
\
train_test_split
(
X
,
y
,
test_size
=
0.5
,
stratify
=
y
,
random_state
=
random_state
)
# Reduce dimension to 2 with KernelDiscriminantAnalysis
# can adjust the value of 'gamma' as needed.
kda
=
make_pipeline
(
StandardScaler
(),
KernelDiscriminantAnalysis
(
n_components
=
2
,
gamma
=
0.000005
))
# Use a nearest neighbor classifier to evaluate the methods
knn
=
KNeighborsClassifier
(
n_neighbors
=
n_neighbors
)
# Fit the method's model
kda
.
fit
(
X_train
,
y_train
)
# Fit a nearest neighbor classifier on the embedded training set
knn
.
fit
(
kda
.
transform
(
X_train
),
y_train
)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn
=
knn
.
score
(
kda
.
transform
(
X_test
),
y_test
)
# Embed the data set in 2 dimensions using the fitted model
X_embedded
=
kda
.
transform
(
X
)
plt
.
figure
()
# plt.subplot(1, 3, i + 1, aspect=1)
# Plot the projected points and show the evaluation score
plt
.
scatter
(
X_embedded
[:,
0
],
X_embedded
[:,
1
],
c
=
y
,
s
=
30
,
cmap
=
'Set1'
)
plt
.
title
(
"{}, KNN (k={})
\n
Test accuracy = {:.2f}"
.
format
(
'kda'
,
n_neighbors
,
acc_knn
))
plt
.
show
()
homework2/olivetti_faces.py
0 → 100644
View file @
ac04cd68
from
scipy.io.matlab
import
loadmat
from
sklearn.datasets._base
import
_pkl_filepath
import
joblib
import
os
data_home
=
os
.
environ
.
get
(
'SCIKIT_LEARN_DATA'
,
os
.
path
.
join
(
'~'
,
'scikit_learn_data'
))
data_home
=
os
.
path
.
expanduser
(
data_home
)
mfile
=
loadmat
(
file_name
=
os
.
path
.
join
(
data_home
,
"olivettifaces.mat"
))
faces
=
mfile
[
'faces'
]
.
T
.
copy
()
filepath
=
_pkl_filepath
(
data_home
,
'olivetti.pkz'
)
joblib
.
dump
(
faces
,
filepath
,
compress
=
6
)
homework2/readme.md
0 → 100644
View file @
ac04cd68
这个是咱们第一个项目,没有思路的可以找小徐班主任要Jerry老师的视频描述(建议大家先自己多理解看看,如果暂时没有思路可以看看之前的课程,也可以再找找资料)
<br/><br/>
要求如下:
<br/><br/>
作业截至时间:5月22日23:59,具体答案将会在本周末讲解并公布
<br/><br/>
----基于 Kernel LDA + KNN 的人脸识别
<br/>
----使用 Kernel Discriminant Analysis 做特征降维
<br/>
----使用 K-Nearest-Neighbor 做分类
<br/><br/><br/>
数据:
<br/>
----人脸图像来自于 Olivetti faces data-set from AT&T (classification)
<br/>
----数据集包含 40 个人的人脸图像, 每个人都有 10 张图像
<br/>
----我们只使用其中标签(label/target)为 0 和 1 的前 2 个人的图像
<br/><br/><br/>
算法:
<br/>
----需要自己实现基于 RBF Kernel 的 Kernel Discriminant Analysis 用于处理两个类别的数据的特征降维
<br/>
----代码的框架已经给出, 需要学生自己补充 KernelDiscriminantAnalysis 的 fit() 和 transform() 函数的内容
<br/><br/><br/>
结果:
<br/>
1.
要求识别成功率:100%
<br/>
2.
达到如图所示效果图
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment