Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
C
course-info
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20220312010
course-info
Commits
e93b7dad
Commit
e93b7dad
authored
Sep 21, 2020
by
TeacherZhu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Delete 0920聊天机器人代码.py
parent
6d4994d5
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
266 deletions
+0
-266
课件/0920聊天机器人代码.py
+0
-266
No files found.
课件/0920聊天机器人代码.py
deleted
100644 → 0
View file @
6d4994d5
# coding=utf-8
# coding=utf-8
import
pandas
as
pd
import
fool
import
re
import
random
from
sklearn.feature_extraction.text
import
TfidfVectorizer
from
sklearn.linear_model
import
LogisticRegression
# -----------------------------------------------------
# 加载停用词词典
stopwords
=
{}
with
open
(
r'stopword.txt'
,
'r'
,
encoding
=
'utf-8'
)
as
fr
:
for
word
in
fr
:
stopwords
[
word
.
strip
()]
=
0
# -----------------------------------------------------
# 加载同义词词典
simi
=
{}
with
open
(
r'simi.txt'
,
'r'
,
encoding
=
'utf-8'
)
as
sr
:
for
line
in
sr
:
items
=
line
.
strip
()
.
split
()
if
len
(
items
)
>=
2
:
stopwords
[
items
[
0
]]
=
items
[
1
]
# 定义类
class
CLF_MODEL
:
# 类目标:该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中
# 初始化模块
def
__init__
(
self
):
self
.
model
=
LinearRegression
()
# 成员变量,用于存储模型
self
.
vectorizer
=
TfidfVectorizer
()
# 成员变量,用于存储tfidf统计值
# 训练模块
def
train
(
self
):
# 函数目标:读取训练数据,训练意图分类模型,并将训练好的分类模型赋值给成员变量self.model
# input:无
# output:无
# 从excel文件读取训练样本
d_train
=
pd
.
read_excel
(
"data_train.xlsx"
)
# 对训练数据进行预处理
d_train
.
sentence_train
=
d_train
.
sentence_train
.
apply
(
self
.
fun_clean
)
print
(
"训练样本 =
%
d"
%
len
(
d_train
))
"""
TODO:利用sklearn中的函数进行训练,将句子转化为特征features
"""
features
=
self
.
vectorizer
.
fit_transform
(
d_train
.
sentence_train
.
to_list
())
self
.
model
.
fit
(
features
,
d_train
.
label
)
# 预测模块(使用模型预测)
def
predict_model
(
self
,
sentence
):
# 函数目标:使用意图分类模型预测意图
# input:sentence(用户输入)
# output:clf_result(意图类别),score(意图分数)
# --------------
# 对样本中没有的特殊情况做特别判断
if
sentence
in
[
"好的"
,
"需要"
,
"是的"
,
"要的"
,
"好"
,
"要"
,
"是"
]:
return
1
,
0.8
# --------------
"""
TODO:利用已训练好的意图分类模型进行意图识别
"""
inputs
=
vectorizer
.
transform
(
fool
.
cut
(
self
.
fun_clean
(
sentence
)))
scores
=
self
.
model
.
predict_proba
(
inputs
)
clf_result
=
np
.
argmax
(
scores
,
axis
=
0
)
score
=
socres
[
clf_result
]
return
clf_result
,
score
# 预测模块(使用规则)
def
predict_rule
(
self
,
sentence
):
# 函数目标:如果模型训练出现异常,可以使用规则进行预测,同时也可以让学员融合"模型"及"规则"的预测方式
# input:sentence(用户输入)
# output:clf_result(意图类别),score(意图分数)
sentence
=
sentence
.
replace
(
' '
,
''
)
if
re
.
findall
(
r'不需要|不要|停止|终止|退出|不买|不定|不订'
,
sentence
):
return
2
,
0.8
elif
re
.
findall
(
r'订|定|预定|买|购'
,
sentence
)
or
sentence
in
[
"好的"
,
"需要"
,
"是的"
,
"要的"
,
"好"
,
"要"
,
"是"
]:
return
1
,
0.8
else
:
return
0
,
0.8
# 预处理函数
def
fun_clean
(
self
,
sentence
):
# 函数目标:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等
# input:sentence(用户输入语句)
# output:sentence(预处理结果)
"""
TODO:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等
"""
tokens
=
map
(
lambda
x
:
simi
.
get
(
x
,
x
),
sentence
.
split
())
tokens
=
filter
(
lambda
x
:
x
not
in
stopwords
,
tokens
)
sentence
=
' '
.
join
(
tokens
)
return
sentence
# 分类主函数
def
fun_clf
(
self
,
sentence
):
# 函数目标:意图识别主函数
# input:sentence( 用户输入语句)
# output:clf_result(意图类别),score(意图分数)
# 对用户输入进行预处理
sentence
=
self
.
fun_clean
(
sentence
)
# 得到意图分类结果(0为“查询”类别,1为“订票”类别,2为“终止服务”类别)
clf_result
,
score
=
self
.
predict_model
(
sentence
)
# 使用训练的模型进行意图预测
# clf_result, score = self.predict_rule(sentence) # 使用规则进行意图预测(可与用模型进行意图识别的方法二选一)
return
clf_result
,
score
def
fun_replace_num
(
sentence
):
# 函数目标:替换时间中的数字(目的是便于实体识别包fool对实体的识别)
# input:sentence
# output:sentence
# 定义要替换的数字
time_num
=
{
"一"
:
"1"
,
"二"
:
"2"
,
"三"
:
"3"
,
"四"
:
"4"
,
"五"
:
"5"
,
"六"
:
"6"
,
"七"
:
"7"
,
"八"
:
"8"
,
"九"
:
"9"
,
"十"
:
"10"
,
"十一"
:
"11"
,
"十二"
:
"12"
}
for
k
,
v
in
time_num
.
items
():
sentence
=
sentence
.
replace
(
k
,
v
)
return
sentence
def
slot_fill
(
sentence
,
key
=
None
):
# 函数目标:填槽函数(该函数从sentence中寻找需要的内容,完成填槽工作)
# input:sentence(用户输入), key(指定槽位,只对该句话提取指定槽位的信息)
# output:slot(返回填槽的结果,以json格式返回,key为槽位名,value为值)
slot
=
{}
# 进行实体识别
words
,
ners
=
fool
.
analysis
(
sentence
)
"""
TODO:从sentence中寻找需要的内容,完成填槽工作
"""
for
item
in
ners
:
name
,
value
=
item
[
2
],
item
[
3
]
if
name
==
'location'
:
if
'from_city'
in
slot
:
slot
[
'to_city'
]
=
value
else
:
slot
[
'from_city'
]
=
value
else
:
slot
[
name
]
=
value
return
slot
if
not
key
else
slot
.
get
(
key
,{})
def
fun_wait
(
clf_obj
):
# 函数目标:等待,获取用户输入问句
# input:CLF_MODEL类实例化对象
# output:clf_result(用户输入意图类别), score(意图识别分数), sentence(用户输入)
# 等待用户输入
print
(
"
\n\n\n
"
)
print
(
"-------------------------------------------------------------"
)
print
(
"----*------*-----*-----*----*-----*-----*-----*-----*------"
)
print
(
"Starting ..."
)
sentence
=
input
(
"客服:请问需要什么服务?(时间请用12小时制表示)
\n
"
)
# 对用户输入进行意图识别
clf_result
,
score
=
clf_obj
.
fun_clf
(
sentence
)
return
clf_result
,
score
,
sentence
def
fun_search
(
clf_result
,
sentence
):
# 函数目标:为用户查询余票
# input:clf_result(意图分类结果), sentence(用户输入问句)
# output:是否有票
# 定义槽存储空间
name
=
{
"time"
:
"出发时间"
,
"date"
:
"出发日期"
,
"from_city"
:
"出发城市"
,
"to_city"
:
"到达城市"
}
slot
=
{
"time"
:
""
,
"date"
:
""
,
"from_city"
:
""
,
"to_city"
:
""
}
# 使用用户第一句话进行填槽
sentence
=
fun_replace_num
(
sentence
)
slot_init
=
slot_fill
(
sentence
)
for
key
in
slot_init
.
keys
():
slot
[
key
]
=
slot_init
[
key
]
# 对未填充对槽位,向用户提问,进行针对性填槽
while
""
in
slot
.
values
():
for
key
in
slot
.
keys
():
if
slot
[
key
]
==
""
:
sentence
=
input
(
"客服:请问
%
s是?
\n
"
%
(
name
[
key
]))
sentence
=
fun_replace_num
(
sentence
)
slot_cur
=
slot_fill
(
sentence
,
key
)
for
key
in
slot_cur
.
keys
():
if
slot
[
key
]
==
""
:
slot
[
key
]
=
slot_cur
[
key
]
# 查询是否有票,并答复用户(本次查询是否有票使用随机数完成,实际情况可查询数据库返回)
if
random
.
random
()
>
0.5
:
print
(
"客服:
%
s
%
s从
%
s到
%
s的票充足"
%
(
slot
[
"date"
],
slot
[
"time"
],
slot
[
"from_city"
],
slot
[
"to_city"
]))
# 返回1表示有票
return
1
else
:
print
(
"客服:
%
s
%
s从
%
s到
%
s无票"
%
(
slot
[
"date"
],
slot
[
"time"
],
slot
[
"from_city"
],
slot
[
"to_city"
]))
print
(
"End !!!"
)
print
(
"----*------*-----*-----*----*-----*-----*-----*-----*------"
)
print
(
"-------------------------------------------------------------"
)
# 返回0表示无票
return
0
def
fun_book
():
# 函数目标:执行下单订票动作
# input:无
# output:无
print
(
"客服:已为您完成订票。
\n\n\n
"
)
print
(
"End !!!"
)
print
(
"----*------*-----*-----*----*-----*-----*-----*-----*------"
)
print
(
"-------------------------------------------------------------"
)
if
__name__
==
"__main__"
:
# 实例化对象
clf_obj
=
CLF_MODEL
()
# 完成意图识别模型的训练
clf_obj
.
train
()
# 用户定义阈值(当分类器分类的分数大于阈值才采纳本次意图分类结果,目的是排除分数过低的意图分类结果)
threshold
=
0.55
# 循环提供服务
while
1
:
clf_result
,
score
,
sentence
=
fun_wait
(
clf_obj
)
# -------------------------------------------------------------------------------
# 状态转移条件(等待-->等待):用户输入未达到“查询”、“订票”类别的阈值 OR 意图被分类为“终止服务”
# -------------------------------------------------------------------------------
if
score
<
threshold
or
clf_result
==
2
:
continue
# -------------------------------------------------------------------------------
# 状态转移条件(等待-->查询):用户输入分类为“查询” OR “订票”
# -------------------------------------------------------------------------------
else
:
# 收集订票细节信息
search_result
=
fun_search
(
clf_result
,
sentence
)
# 查询无票
# -------------------------------------------------------------------------------
# 状态转移条件(查询-->等待):FUN_SEARCH执行完后用户输入意图为“终止服务” OR FUN_SEARCH返回无票
# -------------------------------------------------------------------------------
if
search_result
==
0
:
continue
# 查询有票
else
:
# 等待用户输入
sentence
=
input
(
"客服:需要为您订票吗?
\n
"
)
# 对用户输入进行意图识别
clf_result
,
score
=
clf_obj
.
fun_clf
(
sentence
)
# -------------------------------------------------------------------------------
# 状态转移条件(查询-->订票):FUN_SEARCH返回有票 AND 用户输入意图为“订票”
# -------------------------------------------------------------------------------
if
clf_result
==
1
:
fun_book
()
continue
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment