Commit 40d04cf7 by 20200318029

homework4

parent a2a29945
This source diff could not be displayed because it is too large. You can view the blob instead.
import numpy as np
import time
import os
import cv2
import kmodel
from utils import transparentOverlay
os.environ['KMP_DUPLICATE_LIB_OK']='True'
# 加载预先训练好的模型
# my_model = kmodel.load_trained_model('yuan_model_mac')
# 加载自己训练好的模型(测试时取消下面行的注释)
my_model = kmodel.load_trained_model('my_model')
# 创建人脸检测器
face_cascade = cv2.CascadeClassifier('cascades/haarcascade_frontalface_default.xml')
# 加载摄像头
camera = cv2.VideoCapture(0)
# 加载一个太阳眼镜图像
sunglasses = cv2.imread('sunglass.png', cv2.IMREAD_UNCHANGED)
# 死循环
while True:
time.sleep(0.1)
# 从摄像头获取一张图像
(_, frame) = camera.read()
frame = cv2.flip(frame, 1)
frame2 = np.copy(frame)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 检测所有的人脸
faces = face_cascade.detectMultiScale(gray, 1.25, 6)
# 对每一个检测到的人脸
for (x, y, w, h) in faces:
# 只包含人脸的图像
gray_face = gray[y:y+h, x:x+w]
color_face = frame[y:y+h, x:x+w]
# 将人脸图像的值 normalize 在 [0, 1] 之间
gray_normalized = gray_face / 255
# 缩放灰度图人脸到 96x96 匹配网络的输入
original_shape = gray_face.shape # A Copy for future reference
face_resized = cv2.resize(gray_normalized, (96, 96), interpolation = cv2.INTER_AREA)
face_resized = face_resized.reshape(1, 96, 96, 1)
# 预测关键点坐标
keypoints = my_model.predict(face_resized)
# 将关键点坐标的值从 [-1, 1] 之间转换为 [0, 96] 之间
keypoints = keypoints * 48 + 48
# 缩放彩色图人脸到 96x96 匹配关键点
face_resized_color = cv2.resize(color_face, (96, 96), interpolation = cv2.INTER_AREA)
face_resized_color2 = np.copy(face_resized_color)
# 将网络输出的30个值配对为15个tuple对
points = []
for i, co in enumerate(keypoints[0][0::2]):
points.append((co, keypoints[0][1::2][i]))
# 按照关键点的 left_eyebrow_outer_end_x[7], right_eyebrow_outer_end_x[9]确定眼镜的宽度
sunglass_width = int((points[7][0]-points[9][0])*1.1)
# 按照关键点的 nose_tip_y[10], right_eyebrow_inner_end_y[8]确定眼镜的高度
sunglass_height = int((points[10][1]-points[8][1])/1.1)
sunglass_resized = cv2.resize(sunglasses, (sunglass_width, sunglass_height), interpolation = cv2.INTER_CUBIC)
face_resized_color = transparentOverlay(face_resized_color, sunglass_resized , pos=(int(points[9][0]),int(points[9][1])), scale = 1)
# 将覆盖了眼镜的 face_resized_color 图像转为摄像头捕捉到的原始图像中的大小
frame[y:y+h, x:x+w] = cv2.resize(face_resized_color, original_shape, interpolation = cv2.INTER_CUBIC)
# 在人脸图像中显示关键点坐标
for keypoint in points:
cv2.circle(face_resized_color2, keypoint, 1, (0,255,0), 1)
frame2[y:y+h, x:x+w] = cv2.resize(face_resized_color2, original_shape, interpolation = cv2.INTER_CUBIC)
# 显示加了眼镜的图像
cv2.imshow("With Glass", frame)
# 显示添加了关键点的图像
cv2.imshow("With Keypoints", frame2)
# 当 'q' 键被点击, 退出循环
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# 释放摄像头, 关闭窗口
camera.release()
cv2.destroyAllWindows()
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Activation, BatchNormalization, Convolution2D, \
MaxPooling2D, Dropout, Input
from keras.layers import Flatten, Dense
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
def create_model():
'''
网络的输入为96x96的单通道灰阶图像, 输出30个值, 代表的15个关键点的横坐标和纵坐标
'''
model = Sequential()
# 96
model.add(BatchNormalization(input_shape=(96, 96, 1)))
model.add(Convolution2D(32, (5, 5), activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2)))
planes = 64
# 48 -> 24 -> 12 -> 6 -> 3
for _ in range(4):
model.add(Convolution2D(planes, (3, 3), padding="same"))
model.add(BatchNormalization())
model.add(Activation(activation="relu"))
model.add(Convolution2D(planes, (3, 3), padding="same"))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
planes *= 2
# 1
model.add(Convolution2D(planes, (3, 3), padding="valid", activation="relu"))
model.add(Convolution2D(planes // 2, (1, 1), padding="valid", activation="relu"))
model.add(Flatten())
model.add(Dense(planes // 4, activation="relu"))
model.add(Dropout(rate=0.5))
model.add(Dense(30, activation="tanh"))
return model
def compile_model(model):
optimizer = Adam(lr=1e-3)
loss = "mean_squared_error"
metrics = ['mae']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
def train_model(model, X_train, y_train):
return model.fit(X_train, y_train, epochs=500, batch_size=200, verbose=1, validation_split=0.2)
def save_model(model, fileName):
model.save(fileName + '.h5')
def load_trained_model(fileName):
return load_model(fileName + '.h5')
from utils import load_data
import kmodel
import sys
import pickle
# 加载训练数据
X_train, y_train = load_data()
# 创建网络结构
my_model = kmodel.create_model()
with open("net.txt", mode="w") as f:
orig_stdout = sys.stdout
sys.stdout = f
my_model.summary()
sys.stdout = orig_stdout
# 编译网络模型
kmodel.compile_model(my_model)
# my_model = kmodel.load_trained_model('my_model')
# 训练网络模型
history = kmodel.train_model(my_model, X_train, y_train)
with open("log.pickle", mode="wb") as f:
pickle.dump(history.history, f)
# 保存网络模型
kmodel.save_model(my_model, 'my_model')
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
batch_normalization_1 (Batch (None, 96, 96, 1) 4
_________________________________________________________________
conv2d_1 (Conv2D) (None, 96, 96, 32) 832
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 48, 48, 32) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 48, 48, 64) 18496
_________________________________________________________________
batch_normalization_2 (Batch (None, 48, 48, 64) 256
_________________________________________________________________
activation_1 (Activation) (None, 48, 48, 64) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 48, 48, 64) 36928
_________________________________________________________________
batch_normalization_3 (Batch (None, 48, 48, 64) 256
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 24, 64) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 24, 24, 128) 73856
_________________________________________________________________
batch_normalization_4 (Batch (None, 24, 24, 128) 512
_________________________________________________________________
activation_2 (Activation) (None, 24, 24, 128) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 24, 24, 128) 147584
_________________________________________________________________
batch_normalization_5 (Batch (None, 24, 24, 128) 512
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 12, 12, 128) 0
_________________________________________________________________
conv2d_6 (Conv2D) (None, 12, 12, 256) 295168
_________________________________________________________________
batch_normalization_6 (Batch (None, 12, 12, 256) 1024
_________________________________________________________________
activation_3 (Activation) (None, 12, 12, 256) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 12, 12, 256) 590080
_________________________________________________________________
batch_normalization_7 (Batch (None, 12, 12, 256) 1024
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 6, 6, 256) 0
_________________________________________________________________
conv2d_8 (Conv2D) (None, 6, 6, 512) 1180160
_________________________________________________________________
batch_normalization_8 (Batch (None, 6, 6, 512) 2048
_________________________________________________________________
activation_4 (Activation) (None, 6, 6, 512) 0
_________________________________________________________________
conv2d_9 (Conv2D) (None, 6, 6, 512) 2359808
_________________________________________________________________
batch_normalization_9 (Batch (None, 6, 6, 512) 2048
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 3, 3, 512) 0
_________________________________________________________________
conv2d_10 (Conv2D) (None, 1, 1, 1024) 4719616
_________________________________________________________________
conv2d_11 (Conv2D) (None, 1, 1, 512) 524800
_________________________________________________________________
flatten_1 (Flatten) (None, 512) 0
_________________________________________________________________
dense_1 (Dense) (None, 256) 131328
_________________________________________________________________
dropout_1 (Dropout) (None, 256) 0
_________________________________________________________________
dense_2 (Dense) (None, 30) 7710
=================================================================
Total params: 10,094,050
Trainable params: 10,090,208
Non-trainable params: 3,842
_________________________________________________________________
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
import cv2
def load_data(test=False):
"""
当 test 为真, 加载测试数据, 否则加载训练数据
"""
FTRAIN = './data/training.csv'
FTEST = './data/test.csv'
fname = FTEST if test else FTRAIN
df = read_csv(fname)
# 将'Image' 列中 '空白键' 分割的数字们转换为一个 numpy array
df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
# 丢弃有缺失值的数据
df = df.dropna()
# 将图像的数字从 0 到 255 的整数转换为 0 到 1 的实数
X = np.vstack(df['Image'].values) / 255.
X = X.astype(np.float32)
# 将 X 的每一行转换为一个 96 * 96 * 1 的三维数组
X = X.reshape(-1, 96, 96, 1)
# 只有 FTRAIN 包含关键点的数据 (target value)
if not test:
y = df[df.columns[:-1]].values
# 将关键点的值 normalize 到 [-1, 1] 之间
y = (y - 48) / 48
# 置乱训练数据
X, y = shuffle(X, y, random_state=42)
y = y.astype(np.float32)
else:
y = None
return X, y
def transparentOverlay(src , overlay , pos=(0,0), scale = 1):
"""
将带透明通道(png图像)的图像 overlay 叠放在src图像上方
:param src: 背景图像
:param overlay: 带透明通道的图像 (BGRA)
:param pos: 叠放的起始位置
:param scale : overlay图像的缩放因子
:return: Resultant Image
"""
if scale != 1:
overlay = cv2.resize(overlay,(0,0),fx=scale,fy=scale)
# overlay图像的高和宽
h,w,_ = overlay.shape
# 叠放的起始坐标
y,x = pos[0],pos[1]
# 以下被注释的代码是没有优化的版本, 便于理解, 与如下没有注释的版本的功能一样
"""
# src图像的高和款
rows,cols,_ = src.shape
for i in range(h):
for j in range(w):
if x+i >= rows or y+j >= cols:
continue
alpha = float(overlay[i][j][3]/255.0) # 读取alpha通道的值
src[x+i][y+j] = alpha*overlay[i][j][:3]+(1-alpha)*src[x+i][y+j]
return src """
alpha = overlay[:,:,3]/255.0
alpha = alpha[..., np.newaxis]
src[x:x+h,y:y+w,:] = alpha * overlay[:,:,:3] + (1-alpha)*src[x:x+h,y:y+w,:]
return src
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment