From 1e8350963dc4dd790056f5053778215b8f4e20fe Mon Sep 17 00:00:00 2001
From: zeyu <zeyu@unifisoftware.com>
Date: Mon, 20 Apr 2020 20:09:42 -0700
Subject: [PATCH] initial commit

---
 .DS_Store                                                               | Bin 0 -> 6148 bytes
 .ipynb_checkpoints/Untitled-checkpoint.ipynb                            | 391 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 Readme.txt                                                              |   3 +++
 __MACOSX/._project2_greedy_upload                                       | Bin 0 -> 220 bytes
 __MACOSX/project2_greedy_upload/._.DS_Store                             | Bin 0 -> 120 bytes
 __MACOSX/project2_greedy_upload/._Readme.txt                            | Bin 0 -> 484 bytes
 __MACOSX/project2_greedy_upload/._data_train.xlsx                       | Bin 0 -> 334 bytes
 __MACOSX/project2_greedy_upload/._project2_main.py                      | Bin 0 -> 176 bytes
 __MACOSX/project2_greedy_upload/._票务对话机器人项目说明.pdf | Bin 0 -> 343 bytes
 data_train.xlsx                                                         | Bin 0 -> 17277 bytes
 project2_main.py                                                        | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 stopword.txt                                                            |  13 +++++++++++++
 12 files changed, 679 insertions(+)
 create mode 100644 .DS_Store
 create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb
 create mode 100644 Readme.txt
 create mode 100755 __MACOSX/._project2_greedy_upload
 create mode 100644 __MACOSX/project2_greedy_upload/._.DS_Store
 create mode 100644 __MACOSX/project2_greedy_upload/._Readme.txt
 create mode 100644 __MACOSX/project2_greedy_upload/._data_train.xlsx
 create mode 100644 __MACOSX/project2_greedy_upload/._project2_main.py
 create mode 100644 __MACOSX/project2_greedy_upload/._票务对话机器人项目说明.pdf
 create mode 100644 data_train.xlsx
 create mode 100644 project2_main.py
 create mode 100644 stopword.txt

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..0003bba
Binary files /dev/null and b/.DS_Store differ
diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000..a749903
--- /dev/null
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,391 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+      "/Users/zeyusu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import fool\n",
+    "import time\n",
+    "import re\n",
+    "import random\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.linear_model import LogisticRegression"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "stopwords = {}\n",
+    "with open(r'stopword.txt', 'r', encoding='utf-8') as fr:\n",
+    "    for word in fr:\n",
+    "        stopwords[word.strip()] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "class CLF_MODEL:\n",
+    "    # 类目标:该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中\n",
+    "\n",
+    "    # 初始化模块\n",
+    "    def __init__(self):\n",
+    "        self.model = \"\"  # 成员变量,用于存储模型\n",
+    "        self.vectorizer = \"\"  # 成员变量,用于存储tfidf统计值\n",
+    "\n",
+    "    # 训练模块\n",
+    "    def train(self):\n",
+    "        # 函数目标:读取训练数据,训练意图分类模型,并将训练好的分类模型赋值给成员变量self.model\n",
+    "        # input:无\n",
+    "        # output:无\n",
+    "\n",
+    "        # 从excel文件读取训练样本\n",
+    "        d_train = pd.read_excel(\"data_train.xlsx\")\n",
+    "        # 对训练数据进行预处理\n",
+    "        d_train.sentence_train = d_train.sentence_train.apply(self.fun_clean)\n",
+    "        print(\"训练样本 = %d\" % len(d_train))\n",
+    "\n",
+    "        \"\"\"\n",
+    "        TODO:利用sklearn中的函数进行训练,将句子转化为特征features\n",
+    "        \"\"\"\n",
+    "        self.vectorizer = TfidfVectorizer()\n",
+    "        X = self.vectorizer.fit_transform(d_train.sentence_train)\n",
+    "        self.model = LogisticRegression(penalty='l1', solver='liblinear')\n",
+    "\n",
+    "        self.model.fit(X, d_train.label)\n",
+    "\n",
+    "    # 预测模块(使用模型预测)\n",
+    "    def predict_model(self, sentence):\n",
+    "        # 函数目标:使用意图分类模型预测意图\n",
+    "        #  input:sentence(用户输入)\n",
+    "        # output:clf_result(意图类别),score(意图分数)\n",
+    "\n",
+    "        # --------------\n",
+    "        # 对样本中没有的特殊情况做特别判断\n",
+    "        if sentence in [\"好的\", \"需要\", \"是的\", \"要的\", \"好\", \"要\", \"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        # --------------\n",
+    "\n",
+    "        \"\"\"\n",
+    "        TODO:利用已训练好的意图分类模型进行意图识别\n",
+    "        \"\"\"\n",
+    "        sentence = self.fun_clean(sentence)\n",
+    "        x = self.vectorizer.transform([sentence])\n",
+    "        clf_result = self.model.predict(x)[0]\n",
+    "        score = self.model.predict_proba(x)[0][clf_result]\n",
+    "\n",
+    "\n",
+    "        return clf_result, score\n",
+    "\n",
+    "    # 预测模块(使用规则)\n",
+    "    def predict_rule(self, sentence):\n",
+    "        # 函数目标:如果模型训练出现异常,可以使用规则进行预测,同时也可以让学员融合\"模型\"及\"规则\"的预测方式\n",
+    "        # input:sentence(用户输入)\n",
+    "        # output:clf_result(意图类别),score(意图分数)\n",
+    "\n",
+    "        sentence = sentence.replace(' ', '')\n",
+    "        if re.findall(r'不需要|不要|停止|终止|退出|不买|不定|不订', sentence):\n",
+    "            return 2, 0.8\n",
+    "        elif re.findall(r'订|定|预定|买|购', sentence) or sentence in [\"好的\",\"需要\",\"是的\",\"要的\",\"好\",\"要\",\"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        else:\n",
+    "            return 0, 0.8\n",
+    "\n",
+    "    # 预处理函数\n",
+    "    def fun_clean(self, sentence):\n",
+    "        # 函数目标:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等\n",
+    "        # input:sentence(用户输入语句)\n",
+    "        # output:sentence(预处理结果)\n",
+    "\n",
+    "        \"\"\"\n",
+    "        TODO:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等\n",
+    "        \"\"\"\n",
+    "        words, ners = fool.analysis(sentence)\n",
+    "\n",
+    "        res = []\n",
+    "        ner_idx = 0\n",
+    "        cur_idx = 0\n",
+    "        for word in words[0]:\n",
+    "            if ner_idx < len(ners[0]) and cur_idx == ners[0][ner_idx][0]:\n",
+    "                if ners[0][ner_idx][0] == 'time':\n",
+    "                    res.append('DATE')\n",
+    "                elif ners[0][ner_idx][0] == 'location':\n",
+    "                    res.append('CITY')\n",
+    "                elif word[0] not in stopwords:\n",
+    "                    res.append(word[0])\n",
+    "                ner_idx += 1\n",
+    "            elif word[0] not in stopwords:\n",
+    "                res.append(word[0])\n",
+    "            cur_idx += len(word)\n",
+    "                \n",
+    "        return ' '.join(res)\n",
+    "\n",
+    "    # 分类主函数\n",
+    "    def fun_clf(self, sentence):\n",
+    "        # 函数目标:意图识别主函数\n",
+    "        # input:sentence( 用户输入语句)\n",
+    "        # output:clf_result(意图类别),score(意图分数)\n",
+    "\n",
+    "        # 对用户输入进行预处理\n",
+    "        sentence = self.fun_clean(sentence)\n",
+    "        # 得到意图分类结果(0为“查询”类别,1为“订票”类别,2为“终止服务”类别)\n",
+    "        clf_result, score = self.predict_model(sentence)  # 使用训练的模型进行意图预测\n",
+    "        # clf_result, score = self.predict_rule(sentence)  # 使用规则进行意图预测(可与用模型进行意图识别的方法二选一)\n",
+    "        return clf_result, score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "sentence = '帮 我 查 一下 DATE 去 CITY 的 票'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "训练样本 = 99\n"
+     ]
+    }
+   ],
+   "source": [
+    "clf_obj = CLF_MODEL()\n",
+    "# 完成意图识别模型的训练\n",
+    "clf_obj.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "clf_result, score = clf_obj.predict_model(sentence)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def fun_replace_num(sentence):\n",
+    "    # 函数目标:替换时间中的数字(目的是便于实体识别包fool对实体的识别)\n",
+    "    # input:sentence\n",
+    "    # output:sentence\n",
+    "\n",
+    "    # 定义要替换的数字\n",
+    "    time_num = {\"一\":\"1\",\"二\":\"2\",\"三\":\"3\",\"四\":\"4\",\"五\":\"5\",\"六\":\"6\",\"七\":\"7\",\"八\":\"8\",\"九\":\"9\",\"十\":\"10\",\"十一\":\"11\",\"十二\":\"12\"}\n",
+    "    for k, v in time_num.items():\n",
+    "        sentence = sentence.replace(k, v)\n",
+    "    return sentence"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def slot_fill(sentence, key=None):\n",
+    "    # 函数目标:填槽函数(该函数从sentence中寻找需要的内容,完成填槽工作)\n",
+    "    # input:sentence(用户输入), key(指定槽位,只对该句话提取指定槽位的信息)\n",
+    "    # output:slot(返回填槽的结果,以json格式返回,key为槽位名,value为值)\n",
+    "\n",
+    "    slot = {}\n",
+    "    # 进行实体识别\n",
+    "    slot_lst = {\"time\":\"time\", \"date\":\"time\", \"from_city\":'location', \"to_city\":'location'}\n",
+    "    words, ners = fool.analysis(sentence)\n",
+    "    \"\"\"\n",
+    "    TODO:从sentence中寻找需要的内容,完成填槽工作\n",
+    "    \"\"\"\n",
+    "    if not key:\n",
+    "        keys = [\"time\", \"date\", \"from_city\", \"to_city\"]\n",
+    "    else:\n",
+    "        keys = [key]\n",
+    "    for key in keys:\n",
+    "        print(key)\n",
+    "        if ners != [[]]:\n",
+    "            for i in range(len(ners[0])):\n",
+    "                if ners[0][i][2] == slot_lst[key]:\n",
+    "                    if key == \"time\" and not isTimeFormat(ners[0][i][3]):\n",
+    "                        continue\n",
+    "                    slot[key] = ners[0][i][3]\n",
+    "                    del ners[0][i]\n",
+    "                    break\n",
+    "    return slot\n",
+    "\n",
+    "\n",
+    "def fun_wait(clf_obj):\n",
+    "    # 函数目标:等待,获取用户输入问句\n",
+    "    # input:CLF_MODEL类实例化对象\n",
+    "    # output:clf_result(用户输入意图类别), score(意图识别分数), sentence(用户输入)\n",
+    "\n",
+    "    # 等待用户输入\n",
+    "    print(\"\\n\\n\\n\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"----*------*-----*-----*----*-----*-----*-----*-----*------\")\n",
+    "    print(\"Starting ...\")\n",
+    "    sentence = input(\"客服:请问需要什么服务?(时间请用12小时制表示)\\n\")\n",
+    "    # 对用户输入进行意图识别\n",
+    "    clf_result, score = clf_obj.fun_clf(sentence)\n",
+    "    return clf_result, score, sentence\n",
+    "\n",
+    "\n",
+    "def fun_search(clf_result, sentence):\n",
+    "    # 函数目标:为用户查询余票\n",
+    "    # input:clf_result(意图分类结果), sentence(用户输入问句)\n",
+    "    # output:是否有票\n",
+    "\n",
+    "    # 定义槽存储空间\n",
+    "    name = {\"time\":\"出发时间\", \"date\":\"出发日期\", \"from_city\":\"出发城市\", \"to_city\":\"到达城市\"}\n",
+    "    slot = {\"time\":\"\", \"date\":\"\", \"from_city\":\"\", \"to_city\":\"\"}\n",
+    "    # 使用用户第一句话进行填槽\n",
+    "    sentence = fun_replace_num(sentence)\n",
+    "    slot_init = slot_fill(sentence)\n",
+    "    for key in slot_init.keys():\n",
+    "        slot[key] = slot_init[key]\n",
+    "    # 对未填充对槽位,向用户提问,进行针对性填槽\n",
+    "    while \"\" in slot.values():\n",
+    "        for key in slot.keys():\n",
+    "            if slot[key]==\"\":\n",
+    "                sentence = input(\"客服:请问%s是?\\n\"%(name[key]))\n",
+    "                sentence = fun_replace_num(sentence)\n",
+    "                slot_cur = slot_fill(sentence, key)\n",
+    "                for key in slot_cur.keys():\n",
+    "                    if slot[key]==\"\":\n",
+    "                        slot[key] = slot_cur[key]\n",
+    "\n",
+    "    # 查询是否有票,并答复用户(本次查询是否有票使用随机数完成,实际情况可查询数据库返回)\n",
+    "    if random.random()>0.5:\n",
+    "        print(\"客服:%s%s从%s到%s的票充足\"%(slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        # 返回1表示有票\n",
+    "        return 1\n",
+    "    else:\n",
+    "        print(\"客服:%s%s从%s到%s无票\" % (slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        print(\"End !!!\")\n",
+    "        print(\"----*------*-----*-----*----*-----*-----*-----*-----*------\")\n",
+    "        print(\"-------------------------------------------------------------\")\n",
+    "        # 返回0表示无票\n",
+    "        return 0\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<_sre.SRE_Match object; span=(0, 3), match='4pm'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "time='4pm' \n",
+    "print(re.match('^\\d{1,2}([:.]?\\d{1,2})?([ ]?[a|p]m)?$',time))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[('4', 'm'), ('pm', 'nx')]]\n",
+      "[[]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "s = '4pm'\n",
+    "words, ners = fool.analysis(s)\n",
+    "print(words)\n",
+    "print(ners)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Readme.txt b/Readme.txt
new file mode 100644
index 0000000..49258d5
--- /dev/null
+++ b/Readme.txt
@@ -0,0 +1,3 @@
+project2_main.py python执行脚本(所有的代码位置)
+data_train.xlsx 意图分类训练数据
+stopword.txt 停用词
diff --git a/__MACOSX/._project2_greedy_upload b/__MACOSX/._project2_greedy_upload
new file mode 100755
index 0000000..b5ea4b8
Binary files /dev/null and b/__MACOSX/._project2_greedy_upload differ
diff --git a/__MACOSX/project2_greedy_upload/._.DS_Store b/__MACOSX/project2_greedy_upload/._.DS_Store
new file mode 100644
index 0000000..a5b28df
Binary files /dev/null and b/__MACOSX/project2_greedy_upload/._.DS_Store differ
diff --git a/__MACOSX/project2_greedy_upload/._Readme.txt b/__MACOSX/project2_greedy_upload/._Readme.txt
new file mode 100644
index 0000000..bdaae4b
Binary files /dev/null and b/__MACOSX/project2_greedy_upload/._Readme.txt differ
diff --git a/__MACOSX/project2_greedy_upload/._data_train.xlsx b/__MACOSX/project2_greedy_upload/._data_train.xlsx
new file mode 100644
index 0000000..395d34b
Binary files /dev/null and b/__MACOSX/project2_greedy_upload/._data_train.xlsx differ
diff --git a/__MACOSX/project2_greedy_upload/._project2_main.py b/__MACOSX/project2_greedy_upload/._project2_main.py
new file mode 100644
index 0000000..ce1c222
Binary files /dev/null and b/__MACOSX/project2_greedy_upload/._project2_main.py differ
diff --git "a/__MACOSX/project2_greedy_upload/._\347\245\250\345\212\241\345\257\271\350\257\235\346\234\272\345\231\250\344\272\272\351\241\271\347\233\256\350\257\264\346\230\216.pdf" "b/__MACOSX/project2_greedy_upload/._\347\245\250\345\212\241\345\257\271\350\257\235\346\234\272\345\231\250\344\272\272\351\241\271\347\233\256\350\257\264\346\230\216.pdf"
new file mode 100644
index 0000000..d79570e
Binary files /dev/null and "b/__MACOSX/project2_greedy_upload/._\347\245\250\345\212\241\345\257\271\350\257\235\346\234\272\345\231\250\344\272\272\351\241\271\347\233\256\350\257\264\346\230\216.pdf" differ
diff --git a/data_train.xlsx b/data_train.xlsx
new file mode 100644
index 0000000..86f353b
Binary files /dev/null and b/data_train.xlsx differ
diff --git a/project2_main.py b/project2_main.py
new file mode 100644
index 0000000..c51a7df
--- /dev/null
+++ b/project2_main.py
@@ -0,0 +1,272 @@
+# coding=utf-8
+
+import pandas as pd
+import fool
+import re
+import random
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+
+
+# -----------------------------------------------------
+# 加载停用词词典
+stopwords = {}
+with open(r'stopword.txt', 'r', encoding='utf-8') as fr:
+    for word in fr:
+        stopwords[word.strip()] = 0
+# -----------------------------------------------------
+
+
+# 定义类
+class CLF_MODEL:
+    # 类目标:该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中
+
+    # 初始化模块
+    def __init__(self):
+        self.model = ""  # 成员变量,用于存储模型
+        self.vectorizer = ""  # 成员变量,用于存储tfidf统计值
+
+    # 训练模块
+    def train(self):
+        # 函数目标:读取训练数据,训练意图分类模型,并将训练好的分类模型赋值给成员变量self.model
+        # input:无
+        # output:无
+
+        # 从excel文件读取训练样本
+        d_train = pd.read_excel("data_train.xlsx")
+        # 对训练数据进行预处理
+        d_train.sentence_train = d_train.sentence_train.apply(self.fun_clean)
+        print("训练样本 = %d" % len(d_train))
+
+        """
+        TODO:利用sklearn中的函数进行训练,将句子转化为特征features
+        """
+        self.vectorizer = TfidfVectorizer()
+        X = self.vectorizer.fit_transform(d_train.sentence_train)
+        self.model = LogisticRegression(penalty='l1', solver='liblinear')
+
+        self.model.fit(X, d_train.label)
+
+    # 预测模块(使用模型预测)
+    def predict_model(self, sentence):
+        # 函数目标:使用意图分类模型预测意图
+        #  input:sentence(用户输入)
+        # output:clf_result(意图类别),score(意图分数)
+
+        # --------------
+        # 对样本中没有的特殊情况做特别判断
+        if sentence in ["好的", "需要", "是的", "要的", "好", "要", "是"]:
+            return 1, 0.8
+        # --------------
+
+        """
+        TODO:利用已训练好的意图分类模型进行意图识别
+        """
+        x = self.vectorizer.transform([sentence])
+        clf_result = self.model.predict(x)[0]
+        score = self.model.predict_proba(x)[0][clf_result]
+        return clf_result, score
+
+    # 预测模块(使用规则)
+    def predict_rule(self, sentence):
+        # 函数目标:如果模型训练出现异常,可以使用规则进行预测,同时也可以让学员融合"模型"及"规则"的预测方式
+        # input:sentence(用户输入)
+        # output:clf_result(意图类别),score(意图分数)
+
+        sentence = sentence.replace(' ', '')
+        if re.findall(r'不需要|不要|停止|终止|退出|不买|不定|不订', sentence):
+            return 2, 0.8
+        elif re.findall(r'订|定|预定|买|购', sentence) or sentence in ["好的","需要","是的","要的","好","要","是"]:
+            return 1, 0.8
+        else:
+            return 0, 0.8
+
+    # 预处理函数
+    def fun_clean(self, sentence):
+        # 函数目标:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等
+        # input:sentence(用户输入语句)
+        # output:sentence(预处理结果)
+
+        """
+        TODO:预处理函数,将必要的实体转换成统一符号(利于分类准确),去除停用词等
+        """
+        words, ners = fool.analysis(sentence)
+        res = []
+        ner_idx = 0
+        cur_idx = 0
+        for word in words[0]:
+            if ner_idx < len(ners[0]) and cur_idx == ners[0][ner_idx][0]:
+                if ners[0][ner_idx][2] == 'time':
+                    res.append('DATE')
+                elif ners[0][ner_idx][2] == 'location':
+                    res.append('CITY')
+                elif word[0] not in stopwords:
+                    res.append(word[0])
+                ner_idx += 1
+            elif word[0] not in stopwords:
+                res.append(word[0])
+            cur_idx += len(word[0])
+        return ' '.join(res)
+
+    # 分类主函数
+    def fun_clf(self, sentence):
+        # 函数目标:意图识别主函数
+        # input:sentence( 用户输入语句)
+        # output:clf_result(意图类别),score(意图分数)
+
+        # 对用户输入进行预处理
+        sentence = self.fun_clean(sentence)
+        # 得到意图分类结果(0为“查询”类别,1为“订票”类别,2为“终止服务”类别)
+        clf_result, score = self.predict_model(sentence)  # 使用训练的模型进行意图预测
+        # clf_result, score = self.predict_rule(sentence)  # 使用规则进行意图预测(可与用模型进行意图识别的方法二选一)
+        return clf_result, score
+
+
+def fun_replace_num(sentence):
+    # 函数目标:替换时间中的数字(目的是便于实体识别包fool对实体的识别)
+    # input:sentence
+    # output:sentence
+
+    # 定义要替换的数字
+    time_num = {"一":"1","二":"2","三":"3","四":"4","五":"5","六":"6","七":"7","八":"8","九":"9","十":"10","十一":"11","十二":"12"}
+    for k, v in time_num.items():
+        sentence = sentence.replace(k, v)
+    return sentence
+
+
+def slot_fill(sentence, key=None):
+    # 函数目标:填槽函数(该函数从sentence中寻找需要的内容,完成填槽工作)
+    # input:sentence(用户输入), key(指定槽位,只对该句话提取指定槽位的信息)
+    # output:slot(返回填槽的结果,以json格式返回,key为槽位名,value为值)
+
+    slot = {}
+    # 进行实体识别
+    slot_lst = {"time":"time", "date":"time", "from_city":'location', "to_city":'location'}
+    words, ners = fool.analysis(sentence)
+    """
+    TODO:从sentence中寻找需要的内容,完成填槽工作
+    """
+    if not key:
+        keys = ["time", "date", "from_city", "to_city"]
+    else:
+        keys = [key]
+    for key in keys:
+        if ners != [[]]:
+            for i in range(len(ners[0])):
+                if ners[0][i][2] == slot_lst[key]:
+                    if key == "time" and not re.match('^\d{1,2}([:.]?\d{1,2})?([ ]?[a|p]m)?$',ners[0][i][3]):
+                        continue
+                    slot[key] = ners[0][i][3]
+                    del ners[0][i]
+                    break
+    return slot
+
+
+def fun_wait(clf_obj):
+    # 函数目标:等待,获取用户输入问句
+    # input:CLF_MODEL类实例化对象
+    # output:clf_result(用户输入意图类别), score(意图识别分数), sentence(用户输入)
+
+    # 等待用户输入
+    print("\n\n\n")
+    print("-------------------------------------------------------------")
+    print("----*------*-----*-----*----*-----*-----*-----*-----*------")
+    print("Starting ...")
+    sentence = input("客服:请问需要什么服务?(时间请用12小时制表示)\n")
+    # 对用户输入进行意图识别
+    clf_result, score = clf_obj.fun_clf(sentence)
+    return clf_result, score, sentence
+
+
+def fun_search(clf_result, sentence):
+    # 函数目标:为用户查询余票
+    # input:clf_result(意图分类结果), sentence(用户输入问句)
+    # output:是否有票
+
+    # 定义槽存储空间
+    name = {"time":"出发时间", "date":"出发日期", "from_city":"出发城市", "to_city":"到达城市"}
+    slot = {"time":"", "date":"", "from_city":"", "to_city":""}
+    # 使用用户第一句话进行填槽
+    sentence = fun_replace_num(sentence)
+    slot_init = slot_fill(sentence)
+    for key in slot_init.keys():
+        slot[key] = slot_init[key]
+    # 对未填充对槽位,向用户提问,进行针对性填槽
+    while "" in slot.values():
+        for key in slot.keys():
+            if slot[key]=="":
+                sentence = input("客服:请问%s是?\n"%(name[key]))
+                sentence = fun_replace_num(sentence)
+                slot_cur = slot_fill(sentence, key)
+                for key in slot_cur.keys():
+                    if slot[key]=="":
+                        slot[key] = slot_cur[key]
+
+    # 查询是否有票,并答复用户(本次查询是否有票使用随机数完成,实际情况可查询数据库返回)
+    if random.random()>0.5:
+        print("客服:%s%s从%s到%s的票充足"%(slot["date"], slot["time"], slot["from_city"], slot["to_city"]))
+        # 返回1表示有票
+        return 1
+    else:
+        print("客服:%s%s从%s到%s无票" % (slot["date"], slot["time"], slot["from_city"], slot["to_city"]))
+        print("End !!!")
+        print("----*------*-----*-----*----*-----*-----*-----*-----*------")
+        print("-------------------------------------------------------------")
+        # 返回0表示无票
+        return 0
+
+
+def fun_book():
+    # 函数目标:执行下单订票动作
+    # input:无
+    # output:无
+
+    print("客服:已为您完成订票。\n\n\n")
+    print("End !!!")
+    print("----*------*-----*-----*----*-----*-----*-----*-----*------")
+    print("-------------------------------------------------------------")
+
+
+
+if __name__=="__main__":
+    # 实例化对象
+    clf_obj = CLF_MODEL()
+    # 完成意图识别模型的训练
+    clf_obj.train()
+    # 用户定义阈值(当分类器分类的分数大于阈值才采纳本次意图分类结果,目的是排除分数过低的意图分类结果)
+    threshold = 0.55
+    
+    # 循环提供服务
+    while 1:
+        clf_result, score, sentence = fun_wait(clf_obj)
+        # -------------------------------------------------------------------------------
+        # 状态转移条件(等待-->等待):用户输入未达到“查询”、“订票”类别的阈值 OR 意图被分类为“终止服务”
+        # -------------------------------------------------------------------------------
+        if score<threshold or clf_result==2:
+            continue
+
+        # -------------------------------------------------------------------------------
+        # 状态转移条件(等待-->查询):用户输入分类为“查询” OR “订票”
+        # -------------------------------------------------------------------------------
+        else:
+            # 收集订票细节信息
+            search_result = fun_search(clf_result, sentence)
+            # 查询无票
+            # -------------------------------------------------------------------------------
+            # 状态转移条件(查询-->等待):FUN_SEARCH执行完后用户输入意图为“终止服务” OR FUN_SEARCH返回无票
+            # -------------------------------------------------------------------------------
+            if search_result==0:
+                continue
+            # 查询有票
+            else:
+                # 等待用户输入
+                sentence = input("客服:需要为您订票吗?\n")
+                # 对用户输入进行意图识别
+                clf_result, score = clf_obj.fun_clf(sentence)
+                # -------------------------------------------------------------------------------
+                # 状态转移条件(查询-->订票):FUN_SEARCH返回有票 AND 用户输入意图为“订票”
+                # -------------------------------------------------------------------------------
+                if clf_result == 1:
+                    fun_book()
+                    continue
+    
diff --git a/stopword.txt b/stopword.txt
new file mode 100644
index 0000000..a2664c4
--- /dev/null
+++ b/stopword.txt
@@ -0,0 +1,13 @@
+的
+到
+一下
+我
+,
+帮
+去
+了
+?
+吧
+。
+请
+么
--
libgit2 0.26.0