commit all file

00f55e40 · 20200913050 · 00f55e40 · 00f55e40 · 00f55e40 · 00f55e40
Commit 00f55e40 authored Jan 17, 2021 by 20200913050
12 changed files
--- a/.idea/encodings.xml
+++ b/.idea/encodings.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
+</project>
\ No newline at end of file
--- a/.idea/greedy.iml
+++ b/.idea/greedy.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.6 (Workspace)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="projectConfiguration" value="pytest" />
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/project2_greedy.iml" filepath="$PROJECT_DIR$/.idea/project2_greedy.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="f0c9c573-b9ae-4bf1-8e72-d6720fc99052" name="Default Changelist" comment="" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="TRACKING_ENABLED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
+      <file leaf-file-name="project2_main.py" pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/project2_main.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="447">
+              <caret line="143" column="33" selection-start-line="143" selection-start-column="33" selection-end-line="143" selection-end-column="33" />
+              <folding>
+                <element signature="e#16#35#0" expanded="true" />
+              </folding>
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file leaf-file-name="project2_main_original.py" pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/project2_main_original.py">
+          <provider selected="true" editor-type-id="text-editor" />
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="FindInProjectRecents">
+    <findStrings>
+      <find>book</find>
+      <find>fun_replace_num</find>
+      <find>replace</find>
+      <find>clf_model</find>
+    </findStrings>
+    <replaceStrings>
+      <replace>CLF_MODEL</replace>
+    </replaceStrings>
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/project2_20190526.py" />
+        <option value="$PROJECT_DIR$/project2_main.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" fullScreen="true">
+    <option name="y" value="23" />
+    <option name="width" value="1440" />
+    <option name="height" value="798" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="project2_greedy_upload的副本" type="b2602c69:ProjectViewProjectNode" />
+              <item name="project2_greedy_upload的副本" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+      <pane id="Scope" />
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
+    </key>
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="RunManager" selected="Python.project2_main">
+    <configuration name="project2_20190526" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="project2_greedy" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/project2_20190526.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <configuration name="project2_main" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="project2_greedy" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/project2_main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <configuration name="project2_main_original" type="PythonConfigurationType" factoryName="Python" temporary="true">
+      <module name="project2_greedy" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/project2_main_original.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+    </configuration>
+    <list>
+      <item itemvalue="Python.project2_20190526" />
+      <item itemvalue="Python.project2_main_original" />
+      <item itemvalue="Python.project2_main" />
+    </list>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.project2_main" />
+        <item itemvalue="Python.project2_main_original" />
+        <item itemvalue="Python.project2_20190526" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="f0c9c573-b9ae-4bf1-8e72-d6720fc99052" name="Default Changelist" comment="" />
+      <created>1558967327224</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1558967327224</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="0" y="0" width="1440" height="900" extended-state="0" />
+    <editor active="true" />
+    <layout>
+      <window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.15694444" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Event Log" order="7" side_tool="true" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Python Console" order="7" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Terminal" order="7" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.3985849" />
+      <window_info id="Favorites" order="2" side_tool="true" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Find" order="1" />
+    </layout>
+  </component>
+  <component name="VcsContentAnnotationSettings">
+    <option name="myLimit" value="2678400000" />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager>
+      <breakpoints>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/project2_20190526.py</url>
+          <line>184</line>
+          <option name="timeStamp" value="6" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/project2_20190526.py</url>
+          <line>106</line>
+          <option name="timeStamp" value="13" />
+        </line-breakpoint>
+      </breakpoints>
+      <default-breakpoints>
+        <breakpoint type="python-exception">
+          <properties notifyOnTerminate="true" exception="BaseException">
+            <option name="notifyOnTerminate" value="true" />
+          </properties>
+        </breakpoint>
+      </default-breakpoints>
+    </breakpoint-manager>
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://E:/Program Files/Anaconda3/Lib/urllib/parse.py" />
+    <entry file="file://E:/Program Files/Anaconda3/Lib/codecs.py" />
+    <entry file="file://E:/Program Files/Anaconda3/Lib/site-packages/sklearn/feature_extraction/text.py" />
+    <entry file="file://$PROJECT_DIR$/project2_20190526.py" />
+    <entry file="file://$PROJECT_DIR$/对话示例.png">
+      <provider selected="true" editor-type-id="images" />
+    </entry>
+    <entry file="file://$PROJECT_DIR$/project2_main_original.py">
+      <provider selected="true" editor-type-id="text-editor" />
+    </entry>
+    <entry file="file://$PROJECT_DIR$/project2_main.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="447">
+          <caret line="143" column="33" selection-start-line="143" selection-start-column="33" selection-end-line="143" selection-end-column="33" />
+          <folding>
+            <element signature="e#16#35#0" expanded="true" />
+          </folding>
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>
\ No newline at end of file
--- a/.ipynb_checkpoints/main_playground-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main_playground-checkpoint.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
+      "  import pandas.util.testing as tm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "训练样本 = 99\n",
+      "训练样本特征表长度为 (99, 122)\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "-------------------------------------------------------------\n",
+      "-------------------------------------------------------------\n",
+      "Starting ...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m    884\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 885\u001b[0;31m                 \u001b[0mident\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstdin_socket\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    886\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/jupyter_client/session.py\u001b[0m in \u001b[0;36mrecv\u001b[0;34m(self, socket, mode, content, copy)\u001b[0m\n\u001b[1;32m    802\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 803\u001b[0;31m             \u001b[0mmsg_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_multipart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    804\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mzmq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mZMQError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/zmq/sugar/socket.py\u001b[0m in \u001b[0;36mrecv_multipart\u001b[0;34m(self, flags, copy, track)\u001b[0m\n\u001b[1;32m    474\u001b[0m         \"\"\"\n\u001b[0;32m--> 475\u001b[0;31m         \u001b[0mparts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrack\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrack\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    476\u001b[0m         \u001b[0;31m# have first part already, only loop while more to receive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket.Socket.recv\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket.Socket.recv\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket._recv_copy\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/zmq/backend/cython/checkrc.pxd\u001b[0m in \u001b[0;36mzmq.backend.cython.checkrc._check_rc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-501d65ecb59f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m    246\u001b[0m     \u001b[0mthreshold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.55\u001b[0m  \u001b[0;31m# 用户定义阈值（当分类器分类的分数大于阈值才采纳本次意图分类结果，目的是排除分数过低的意图分类结果）\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mwhile\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m         \u001b[0mclf_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscore\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfun_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf_obj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    249\u001b[0m         \u001b[0;31m# -------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    250\u001b[0m         \u001b[0;31m# 状态转移条件（等待-->等待）：用户输入未达到“查询”、“订票”类别的阈值 OR 被分类为“终止服务”\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-1-501d65ecb59f>\u001b[0m in \u001b[0;36mfun_wait\u001b[0;34m(clf_obj)\u001b[0m\n\u001b[1;32m    184\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"-------------------------------------------------------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Starting ...\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m     \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"客服：请问需要什么服务？(时间请用12小时制表示）\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    187\u001b[0m     \u001b[0;31m# 对用户输入进行意图识别\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    188\u001b[0m     \u001b[0mclf_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf_obj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfun_clf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentence\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m    858\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    859\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_header\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 860\u001b[0;31m             \u001b[0mpassword\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    861\u001b[0m         )\n\u001b[1;32m    862\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m    888\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    889\u001b[0m                 \u001b[0;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 890\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    891\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    892\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# coding=utf-8\n",
+    "\n",
+    "import pandas as pd\n",
+    "import fool\n",
+    "import re\n",
+    "import random\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "# 加载停用词词典\n",
+    "stopwords = {}\n",
+    "with open(r'stopword.txt', 'r', encoding='utf-8') as fr:\n",
+    "    for word in fr:\n",
+    "        stopwords[word.strip()] = 0\n",
+    "\n",
+    "class clf_model:\n",
+    "    \"\"\"\n",
+    "    该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中\n",
+    "    \"\"\"\n",
+    "    def __init__(self):\n",
+    "        self.model = \"\"  # 成员变量，用于存储模型\n",
+    "        self.vectorizer = \"\"  # 成员变量，用于存储tfidf统计值\n",
+    "\n",
+    "    # 训练模块\n",
+    "    def train(self):\n",
+    "        # 函数目标：读取训练数据，训练意图分类模型，并将训练好的分类模型赋值给成员变量self.model\n",
+    "        # input：无\n",
+    "        # output：无\n",
+    "        \n",
+    "        # 从excel文件读取训练样本 \n",
+    "        d_train = pd.read_excel(\"data_train.xlsx\")\n",
+    "        # 对训练数据进行预处理\n",
+    "        d_train.sentence_train = d_train.sentence_train.apply(self.fun_clean)\n",
+    "        print(\"训练样本 = %d\" % len(d_train))\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        TODO：利用sklearn中的函数进行训练，将句子转化为特征features\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        # get tifidf features\n",
+    "        self.vectorizer = TfidfVectorizer(analyzer=\"word\",\n",
+    "                                          token_pattern=r\"(?u)\\b\\w+\\b\")\n",
+    "        \n",
+    "        # 指定token_pattern，避免sklearn将一个字长度的单词过滤筛除\n",
+    "        features = self.vectorizer.fit_transform(d_train.sentence_train)\n",
+    "        print(\"训练样本特征表长度为 \" + str(features.shape))\n",
+    "        #\n",
+    "        self.model = LogisticRegression(C=10)\n",
+    "        self.model.fit(features, d_train.label)\n",
+    "\n",
+    "    # 预测模块（使用模型预测） 返回意图类别  和得分\n",
+    "    def predict_model(self, sentence):\n",
+    "        # 函数目标：使用意图分类模型预测意图\n",
+    "        #  input：sentence（用户输入）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        # --------------\n",
+    "        # 对样本中没有点特殊情况做特别判断\n",
+    "        if sentence in [\"好的\", \"需要\", \"是的\", \"要的\", \"好\", \"要\", \"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        # --------------\n",
+    "\n",
+    "        \"\"\"\n",
+    "        TODO：利用已训练好的意图分类模型进行意图识别\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        sent_features = self.vectorizer.transform([sentence])\n",
+    "        pre_test = self.model.predict_proba(sent_features).tolist()[0]\n",
+    "        clf_result = pre_test.index(max(pre_test))\n",
+    "        score = max(pre_test)\n",
+    "        return clf_result, score\n",
+    "\n",
+    "    # 预测模块（使用规则）\n",
+    "    def predict_rule(self, sentence):\n",
+    "        # 函数目标：如果模型训练出现异常，可以使用规则进行预测，同时也可以让学员融合\"模型\"及\"规则\"的预测方式\n",
+    "        # input：sentence（用户输入）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        sentence = sentence.replace(' ', '')\n",
+    "        if re.findall(r'不需要|不要|停止|终止|退出|不买|不定|不订', sentence):\n",
+    "            return 2, 0.8\n",
+    "        elif re.findall(r'订|定|预定|买|购', sentence) or sentence in [\"好的\", \"需要\", \"是的\", \"要的\", \"好\", \"要\", \"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        else:\n",
+    "            return 0, 0.8\n",
+    "\n",
+    "    # 预处理函数\n",
+    "    def fun_clean(self, sentence):\n",
+    "        # 函数目标：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等\n",
+    "        # input：sentence（用户输入语句）\n",
+    "        # output：sentence（预处理结果）预处理结果:\n",
+    "        \"\"\"\n",
+    "        TODO：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        # 使用foolnltk进行实体识别\n",
+    "        words, ners = fool.analysis(sentence)\n",
+    "        # 对识别结果按长度倒序排序\n",
+    "        ners = ners[0].sort(key=lambda x: len(x[-1]), reverse=True)\n",
+    "        # 如果有实体被识别出来，就将实体的字符串替换成实体类别的字符串\n",
+    "        if ners:\n",
+    "            for ner in ners:\n",
+    "                sentence = sentence.replace(ner[-1], ' ' + ner[2] + ' ')\n",
+    "        # 分词，并去除停用词\n",
+    "        word_lst = [w for w in fool.cut(sentence)[0] if w not in stopwords]\n",
+    "        output_str = ' '.join(word_lst)\n",
+    "        output_str = re.sub(r'\\s+', ' ', output_str)\n",
+    "        return output_str.strip()\n",
+    "\n",
+    "    # 分类主函数\n",
+    "    def fun_clf(self, sentence):\n",
+    "        # 函数目标：意图识别主函数\n",
+    "        # input：sentence（ 用户输入语句）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        # 对用户输入进行预处理\n",
+    "        sentence = self.fun_clean(sentence)\n",
+    "        # 得到意图分类结果（0为“查询”类别，1为“订票”类别，2为“终止服务”类别）\n",
+    "        clf_result, score = self.predict_model(sentence)  \n",
+    "        return clf_result, score\n",
+    "\n",
+    "\n",
+    "def fun_replace_num(sentence):\n",
+    "    # 函数目标：替换时间中的数字（目的是便于实体识别包fool对实体的识别）\n",
+    "    # input：sentence\n",
+    "    # output：sentence\n",
+    "    # 定义要替换的数字\n",
+    "    \n",
+    "    time_num = {\"一\": \"1\", \"二\": \"2\", \"三\": \"3\", \"四\": \"4\", \"五\": \"5\", \"六\": \"6\", \"七\": \"7\", \"八\": \"8\", \"九\": \"9\", \"十\": \"10\",\n",
+    "                \"十一\": \"11\", \"十二\": \"12\"}\n",
+    "    for k, v in time_num.items():\n",
+    "        sentence = sentence.replace(k, v)\n",
+    "    return sentence\n",
+    "\n",
+    "\n",
+    "def slot_fill(sentence, key=None):\n",
+    "    # 函数目标：填槽函数（该函数从sentence中寻找需要的内容，完成填槽工作）\n",
+    "    # input：sentence（用户输入）, key（指定槽位，只对该句话提取指定槽位的信息）\n",
+    "    # output：slot（返回填槽的结果，以json格式返回，key为槽位名，value为值）\n",
+    "    slot = {}\n",
+    "    # 进行实体识别\n",
+    "    words, ners = fool.analysis(sentence)\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    TODO：从sentence中寻找需要的内容，完成填槽工作\n",
+    "    \"\"\"\n",
+    "    to_city_flag = 0  \n",
+    "    # flag为1代表找到到达城市（作用：当找到到达城市时，默认句子中另一个城市信息是出发城市）\n",
+    "    for ner in ners[0]:\n",
+    "        # 首先对time类别的实体进行信息抽取填槽工作\n",
+    "        if ner[2] == 'time':\n",
+    "            # --------------------\n",
+    "            # 寻找日期的关键词\n",
+    "            date_content = re.findall(\n",
+    "                r'后天|明天|今天|大后天|周末|周一|周二|周三|周四|周五|周六|周日|本周一|本周二|本周三|本周四|本周五|本周六|本周日|下周一|下周二|下周三|下周四|下周五|下周六|下周日|这周一|这周二|这周三|这周四|这周五|这周六|这周日|\\d{,2}月\\d{,2}号|\\d{,2}月\\d{,2}日',\n",
+    "                ner[-1])\n",
+    "            slot[\"date\"] = date_content[0] if date_content else \"\"\n",
+    "            # 完成日期的填槽\n",
+    "            # --------------------\n",
+    "\n",
+    "            # --------------------\n",
+    "            # 寻找具体时间的关键词\n",
+    "            time_content = re.findall(r'\\d{,2}点\\d{,2}分|\\d{,2}点钟|\\d{,2}点', ner[-1])\n",
+    "            # 寻找上午下午的关键词\n",
+    "            pmam_content = re.findall(r'上午|下午|早上|晚上|中午|早晨', ner[-1])\n",
+    "            slot[\"time\"] = pmam_content[0] if pmam_content else \"\" + time_content[0] if time_content else \"\"\n",
+    "            # 完成时间的填槽\n",
+    "            # --------------------\n",
+    "        # 对location类别对实体进行信息抽取填槽工作\n",
+    "        if ner[2] == 'location':\n",
+    "            # --------------------\n",
+    "            # 开始对城市填槽\n",
+    "            # 如果没有指定槽位\n",
+    "            if key is None:\n",
+    "                if re.findall(r'(到|去|回|回去)%s' % (ner[-1]), sentence):\n",
+    "                    to_city_flag = 1\n",
+    "                    slot[\"to_city\"] = ner[-1]\n",
+    "                    continue\n",
+    "                if re.findall(r'从%s|%s出发' % (ner[-1], ner[-1]), sentence):\n",
+    "                    slot[\"from_city\"] = ner[-1]\n",
+    "                elif to_city_flag == 1:\n",
+    "                    slot[\"from_city\"] = ner[-1]\n",
+    "            # 如果指定了槽位\n",
+    "            elif key in [\"from_city\", \"to_city\"]:\n",
+    "                slot[key] = ner[-1]\n",
+    "            # 完成出发城市、到达城市的填槽工作\n",
+    "            # --------------------\n",
+    "\n",
+    "    return slot\n",
+    "\n",
+    "\n",
+    "def fun_wait(clf_obj):\n",
+    "    # 函数目标：等待，获取用户输入问句\n",
+    "    # input：CLF_MODEL类实例化对象\n",
+    "    # output：clf_result（用户输入意图类别）, score（意图识别分数）, sentence（用户输入）\n",
+    "    \n",
+    "    # 等待用户输入\n",
+    "    print(\"\\n\\n\\n\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"Starting ...\")\n",
+    "    sentence = input(\"客服：请问需要什么服务？(时间请用12小时制表示）\\n\")\n",
+    "    # 对用户输入进行意图识别\n",
+    "    clf_result, score = clf_obj.fun_clf(sentence)\n",
+    "    return clf_result, score, sentence\n",
+    "\n",
+    "\n",
+    "def fun_search(clf_result, sentence):\n",
+    "    # 函数目标：为用户查询余票\n",
+    "    # input：clf_result（意图分类结果）, sentence（用户输入问句）\n",
+    "    # output：是否有票\n",
+    "    \n",
+    "    # 定义槽存储空间\n",
+    "    name = {\"time\": \"出发时间\", \"date\": \"出发日期\", \"from_city\": \"出发城市\", \"to_city\": \"到达城市\"}\n",
+    "    slot = {\"time\": \"\", \"date\": \"\", \"from_city\": \"\", \"to_city\": \"\"}\n",
+    "    # 使用用户第一句话进行填槽\n",
+    "    sentence = fun_replace_num(sentence)\n",
+    "    slot_init = slot_fill(sentence)\n",
+    "    for key in slot_init.keys():\n",
+    "        slot[key] = slot_init[key]\n",
+    "    # 对未填充对槽位，向用户提问，进行针对性填槽\n",
+    "    while \"\" in slot.values():\n",
+    "        for key in slot.keys():\n",
+    "            if slot[key] == \"\":\n",
+    "                sentence = input(\"客服：请问%s是？\\n\" % (name[key]))\n",
+    "                sentence = fun_replace_num(sentence)\n",
+    "                slot_cur = slot_fill(sentence, key)\n",
+    "                for key in slot_cur.keys():\n",
+    "                    if slot[key] == \"\":\n",
+    "                        slot[key] = slot_cur[key]\n",
+    "\n",
+    "    # 查询是否有票，并答复用户（本次查询是否有票使用随机数完成）\n",
+    "    if random.random() > 0.5:\n",
+    "        print(\"客服：%s%s从%s到%s的票充足\" % (slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        # 返回1表示有票\n",
+    "        return 1\n",
+    "    else:\n",
+    "        print(\"客服：%s%s从%s到%s无票\" % (slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        print(\"End !!!\")\n",
+    "        print(\"-------------------------------------------------------------\")\n",
+    "        print(\"-------------------------------------------------------------\")\n",
+    "        # 返回0表示无票\n",
+    "        return 0\n",
+    "\n",
+    "\n",
+    "def fun_book():\n",
+    "    # 函数目标：执行下单订票动作\n",
+    "    # input：无\n",
+    "    # output：无\n",
+    "    \n",
+    "    print(\"客服：已为您完成订票。\\n\\n\\n\")\n",
+    "    print(\"End !!!\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    # 实例化对象\n",
+    "    clf_obj = clf_model()\n",
+    "    clf_obj.train()\n",
+    "    threshold = 0.55  # 用户定义阈值（当分类器分类的分数大于阈值才采纳本次意图分类结果，目的是排除分数过低的意图分类结果）\n",
+    "    while 1:\n",
+    "        clf_result, score, sentence = fun_wait(clf_obj)\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        # 状态转移条件（等待-->等待）：用户输入未达到“查询”、“订票”类别的阈值 OR 被分类为“终止服务”\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        if score < threshold or clf_result == 2:\n",
+    "            continue\n",
+    "\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        # 状态转移条件（等待-->查询）：用户输入分类为“查询” OR “订票”\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        else:\n",
+    "            search_result = fun_search(clf_result, sentence)\n",
+    "            if search_result == 0:\n",
+    "                continue\n",
+    "            else:\n",
+    "                # 等待用户输入\n",
+    "                sentence = input(\"客服：需要为您订票吗？\\n\")\n",
+    "                # 对用户输入进行意图识别\n",
+    "                clf_result, score = clf_obj.fun_clf(sentence)\n",
+    "                # -------------------------------------------------------------------------------\n",
+    "                # 状态转移条件（查询-->订票）：FUN_SEARCH返回有票 AND 用户输入分类为“订票”\n",
+    "                # -------------------------------------------------------------------------------\n",
+    "                if clf_result == 1:\n",
+    "                    fun_book()\n",
+    "                    continue"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[['一个', '傻子', '在', '北京']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import fool\n",
+    "\n",
+    "text = \"一个傻子在北京\"\n",
+    "print(fool.cut(text))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.13.2'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "tf.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Readme.txt
+++ b/Readme.txt
+project2_main.py python执行脚本（所有的代码位置）
+data_train.xlsx 意图分类训练数据
+stopword.txt 停用词
--- a/data_train.xlsx
+++ b/data_train.xlsx
--- a/main.py
+++ b/main.py
+# coding=utf-8
+import pandas as pd
+import fool
+import re
+import random
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+# 加载停用词词典
+stopwords = {}
+with open(r'stopword.txt', 'r', encoding='utf-8') as fr:
+    for word in fr:
+        stopwords[word.strip()] = 0
+class clf_model:
+    """
+    该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中
+    """
+    def __init__(self):
+        self.model = ""  # 成员变量，用于存储模型
+        self.vectorizer = ""  # 成员变量，用于存储tfidf统计值
+    # 训练模块
+    def train(self):
+        # 函数目标：读取训练数据，训练意图分类模型，并将训练好的分类模型赋值给成员变量self.model
+        # input：无
+        # output：无
+        # 从excel文件读取训练样本 
+        d_train = pd.read_excel("data_train.xlsx")
+        # 对训练数据进行预处理
+        d_train.sentence_train = d_train.sentence_train.apply(self.fun_clean)
+        print("训练样本 = %d" % len(d_train))
+        """
+        TODO：利用sklearn中的函数进行训练，将句子转化为特征features
+        """
+        # get tifidf features
+        self.vectorizer = TfidfVectorizer(analyzer="word",
+                                          token_pattern=r"(?u)\b\w+\b")
+        # 指定token_pattern，避免sklearn将一个字长度的单词过滤筛除
+        features = self.vectorizer.fit_transform(d_train.sentence_train)
+        print("训练样本特征表长度为 " + str(features.shape))
+        #
+        self.model = LogisticRegression(C=10)
+        self.model.fit(features, d_train.label)
+    # 预测模块（使用模型预测） 返回意图类别  和得分
+    def predict_model(self, sentence):
+        # 函数目标：使用意图分类模型预测意图
+        #  input：sentence（用户输入）
+        # output：clf_result（意图类别），score（意图分数）
+        # --------------
+        # 对样本中没有点特殊情况做特别判断
+        if sentence in ["好的", "需要", "是的", "要的", "好", "要", "是"]:
+            return 1, 0.8
+        # --------------
+        """
+        TODO：利用已训练好的意图分类模型进行意图识别
+        """
+        sent_features = self.vectorizer.transform([sentence])
+        pre_test = self.model.predict_proba(sent_features).tolist()[0]
+        clf_result = pre_test.index(max(pre_test))
+        score = max(pre_test)
+        return clf_result, score
+    # 预测模块（使用规则）
+    def predict_rule(self, sentence):
+        # 函数目标：如果模型训练出现异常，可以使用规则进行预测，同时也可以让学员融合"模型"及"规则"的预测方式
+        # input：sentence（用户输入）
+        # output：clf_result（意图类别），score（意图分数）
+        sentence = sentence.replace(' ', '')
+        if re.findall(r'不需要|不要|停止|终止|退出|不买|不定|不订', sentence):
+            return 2, 0.8
+        elif re.findall(r'订|定|预定|买|购', sentence) or sentence in ["好的", "需要", "是的", "要的", "好", "要", "是"]:
+            return 1, 0.8
+        else:
+            return 0, 0.8
+    # 预处理函数
+    def fun_clean(self, sentence):
+        # 函数目标：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等
+        # input：sentence（用户输入语句）
+        # output：sentence（预处理结果）预处理结果:
+        """
+        TODO：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等
+        """
+        # 使用foolnltk进行实体识别
+        words, ners = fool.analysis(sentence)
+        # 对识别结果按长度倒序排序
+        ners = ners[0].sort(key=lambda x: len(x[-1]), reverse=True)
+        # 如果有实体被识别出来，就将实体的字符串替换成实体类别的字符串
+        if ners:
+            for ner in ners:
+                sentence = sentence.replace(ner[-1], ' ' + ner[2] + ' ')
+        # 分词，并去除停用词
+        word_lst = [w for w in fool.cut(sentence)[0] if w not in stopwords]
+        output_str = ' '.join(word_lst)
+        output_str = re.sub(r'\s+', ' ', output_str)
+        return output_str.strip()
+    # 分类主函数
+    def fun_clf(self, sentence):
+        # 函数目标：意图识别主函数
+        # input：sentence（ 用户输入语句）
+        # output：clf_result（意图类别），score（意图分数）
+        # 对用户输入进行预处理
+        sentence = self.fun_clean(sentence)
+        # 得到意图分类结果（0为“查询”类别，1为“订票”类别，2为“终止服务”类别）
+        clf_result, score = self.predict_model(sentence)  
+        return clf_result, score
+def fun_replace_num(sentence):
+    # 函数目标：替换时间中的数字（目的是便于实体识别包fool对实体的识别）
+    # input：sentence
+    # output：sentence
+    # 定义要替换的数字
+    time_num = {"一": "1", "二": "2", "三": "3", "四": "4", "五": "5", "六": "6", "七": "7", "八": "8", "九": "9", "十": "10",
+                "十一": "11", "十二": "12"}
+    for k, v in time_num.items():
+        sentence = sentence.replace(k, v)
+    return sentence
+def slot_fill(sentence, key=None):
+    # 函数目标：填槽函数（该函数从sentence中寻找需要的内容，完成填槽工作）
+    # input：sentence（用户输入）, key（指定槽位，只对该句话提取指定槽位的信息）
+    # output：slot（返回填槽的结果，以json格式返回，key为槽位名，value为值）
+    slot = {}
+    # 进行实体识别
+    words, ners = fool.analysis(sentence)
+    """
+    TODO：从sentence中寻找需要的内容，完成填槽工作
+    """
+    to_city_flag = 0  
+    # flag为1代表找到到达城市（作用：当找到到达城市时，默认句子中另一个城市信息是出发城市）
+    for ner in ners[0]:
+        # 首先对time类别的实体进行信息抽取填槽工作
+        if ner[2] == 'time':
+            # --------------------
+            # 寻找日期的关键词
+            date_content = re.findall(
+                r'后天|明天|今天|大后天|周末|周一|周二|周三|周四|周五|周六|周日|本周一|本周二|本周三|本周四|本周五|本周六|本周日|下周一|下周二|下周三|下周四|下周五|下周六|下周日|这周一|这周二|这周三|这周四|这周五|这周六|这周日|\d{,2}月\d{,2}号|\d{,2}月\d{,2}日',
+                ner[-1])
+            slot["date"] = date_content[0] if date_content else ""
+            # 完成日期的填槽
+            # --------------------
+            # --------------------
+            # 寻找具体时间的关键词
+            time_content = re.findall(r'\d{,2}点\d{,2}分|\d{,2}点钟|\d{,2}点', ner[-1])
+            # 寻找上午下午的关键词
+            pmam_content = re.findall(r'上午|下午|早上|晚上|中午|早晨', ner[-1])
+            slot["time"] = pmam_content[0] if pmam_content else "" + time_content[0] if time_content else ""
+            # 完成时间的填槽
+            # --------------------
+        # 对location类别对实体进行信息抽取填槽工作
+        if ner[2] == 'location':
+            # --------------------
+            # 开始对城市填槽
+            # 如果没有指定槽位
+            if key is None:
+                if re.findall(r'(到|去|回|回去)%s' % (ner[-1]), sentence):
+                    to_city_flag = 1
+                    slot["to_city"] = ner[-1]
+                    continue
+                if re.findall(r'从%s|%s出发' % (ner[-1], ner[-1]), sentence):
+                    slot["from_city"] = ner[-1]
+                elif to_city_flag == 1:
+                    slot["from_city"] = ner[-1]
+            # 如果指定了槽位
+            elif key in ["from_city", "to_city"]:
+                slot[key] = ner[-1]
+            # 完成出发城市、到达城市的填槽工作
+            # --------------------
+    return slot
+def fun_wait(clf_obj):
+    # 函数目标：等待，获取用户输入问句
+    # input：CLF_MODEL类实例化对象
+    # output：clf_result（用户输入意图类别）, score（意图识别分数）, sentence（用户输入）
+    # 等待用户输入
+    print("\n\n\n")
+    print("-------------------------------------------------------------")
+    print("-------------------------------------------------------------")
+    print("Starting ...")
+    sentence = input("客服：请问需要什么服务？(时间请用12小时制表示）\n")
+    # 对用户输入进行意图识别
+    clf_result, score = clf_obj.fun_clf(sentence)
+    return clf_result, score, sentence
+def fun_search(clf_result, sentence):
+    # 函数目标：为用户查询余票
+    # input：clf_result（意图分类结果）, sentence（用户输入问句）
+    # output：是否有票
+    # 定义槽存储空间
+    name = {"time": "出发时间", "date": "出发日期", "from_city": "出发城市", "to_city": "到达城市"}
+    slot = {"time": "", "date": "", "from_city": "", "to_city": ""}
+    # 使用用户第一句话进行填槽
+    sentence = fun_replace_num(sentence)
+    slot_init = slot_fill(sentence)
+    for key in slot_init.keys():
+        slot[key] = slot_init[key]
+    # 对未填充对槽位，向用户提问，进行针对性填槽
+    while "" in slot.values():
+        for key in slot.keys():
+            if slot[key] == "":
+                sentence = input("客服：请问%s是？\n" % (name[key]))
+                sentence = fun_replace_num(sentence)
+                slot_cur = slot_fill(sentence, key)
+                for key in slot_cur.keys():
+                    if slot[key] == "":
+                        slot[key] = slot_cur[key]
+    # 查询是否有票，并答复用户（本次查询是否有票使用随机数完成）
+    if random.random() > 0.5:
+        print("客服：%s%s从%s到%s的票充足" % (slot["date"], slot["time"], slot["from_city"], slot["to_city"]))
+        # 返回1表示有票
+        return 1
+    else:
+        print("客服：%s%s从%s到%s无票" % (slot["date"], slot["time"], slot["from_city"], slot["to_city"]))
+        print("End !!!")
+        print("-------------------------------------------------------------")
+        print("-------------------------------------------------------------")
+        # 返回0表示无票
+        return 0
+def fun_book():
+    # 函数目标：执行下单订票动作
+    # input：无
+    # output：无
+    print("客服：已为您完成订票。\n\n\n")
+    print("End !!!")
+    print("-------------------------------------------------------------")
+    print("-------------------------------------------------------------")
+if __name__ == "__main__":
+    # 实例化对象
+    clf_obj = clf_model()
+    clf_obj.train()
+    threshold = 0.55  # 用户定义阈值（当分类器分类的分数大于阈值才采纳本次意图分类结果，目的是排除分数过低的意图分类结果）
+    while 1:
+        clf_result, score, sentence = fun_wait(clf_obj)
+        # -------------------------------------------------------------------------------
+        # 状态转移条件（等待-->等待）：用户输入未达到“查询”、“订票”类别的阈值 OR 被分类为“终止服务”
+        # -------------------------------------------------------------------------------
+        if score < threshold or clf_result == 2:
+            continue
+        # -------------------------------------------------------------------------------
+        # 状态转移条件（等待-->查询）：用户输入分类为“查询” OR “订票”
+        # -------------------------------------------------------------------------------
+        else:
+            search_result = fun_search(clf_result, sentence)
+            if search_result == 0:
+                continue
+            else:
+                # 等待用户输入
+                sentence = input("客服：需要为您订票吗？\n")
+                # 对用户输入进行意图识别
+                clf_result, score = clf_obj.fun_clf(sentence)
+                # -------------------------------------------------------------------------------
+                # 状态转移条件（查询-->订票）：FUN_SEARCH返回有票 AND 用户输入分类为“订票”
+                # -------------------------------------------------------------------------------
+                if clf_result == 1:
+                    fun_book()
+                    continue
\ No newline at end of file
--- a/main_playground.ipynb
+++ b/main_playground.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
+      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
+      "/Users/andri.sumitro/opt/anaconda3/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
+      "  import pandas.util.testing as tm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "训练样本 = 99\n",
+      "训练样本特征表长度为 (99, 122)\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "-------------------------------------------------------------\n",
+      "-------------------------------------------------------------\n",
+      "Starting ...\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m    884\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 885\u001b[0;31m                 \u001b[0mident\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreply\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstdin_socket\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    886\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/jupyter_client/session.py\u001b[0m in \u001b[0;36mrecv\u001b[0;34m(self, socket, mode, content, copy)\u001b[0m\n\u001b[1;32m    802\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 803\u001b[0;31m             \u001b[0mmsg_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_multipart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    804\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mzmq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mZMQError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/zmq/sugar/socket.py\u001b[0m in \u001b[0;36mrecv_multipart\u001b[0;34m(self, flags, copy, track)\u001b[0m\n\u001b[1;32m    474\u001b[0m         \"\"\"\n\u001b[0;32m--> 475\u001b[0;31m         \u001b[0mparts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrack\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrack\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    476\u001b[0m         \u001b[0;31m# have first part already, only loop while more to receive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket.Socket.recv\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket.Socket.recv\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mzmq/backend/cython/socket.pyx\u001b[0m in \u001b[0;36mzmq.backend.cython.socket._recv_copy\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/zmq/backend/cython/checkrc.pxd\u001b[0m in \u001b[0;36mzmq.backend.cython.checkrc._check_rc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-1-501d65ecb59f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m    246\u001b[0m     \u001b[0mthreshold\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.55\u001b[0m  \u001b[0;31m# 用户定义阈值（当分类器分类的分数大于阈值才采纳本次意图分类结果，目的是排除分数过低的意图分类结果）\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mwhile\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 248\u001b[0;31m         \u001b[0mclf_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscore\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfun_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclf_obj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    249\u001b[0m         \u001b[0;31m# -------------------------------------------------------------------------------\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    250\u001b[0m         \u001b[0;31m# 状态转移条件（等待-->等待）：用户输入未达到“查询”、“订票”类别的阈值 OR 被分类为“终止服务”\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-1-501d65ecb59f>\u001b[0m in \u001b[0;36mfun_wait\u001b[0;34m(clf_obj)\u001b[0m\n\u001b[1;32m    184\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"-------------------------------------------------------------\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Starting ...\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m     \u001b[0msentence\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"客服：请问需要什么服务？(时间请用12小时制表示）\\n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    187\u001b[0m     \u001b[0;31m# 对用户输入进行意图识别\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    188\u001b[0m     \u001b[0mclf_result\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf_obj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfun_clf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentence\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m    858\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    859\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_parent_header\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 860\u001b[0;31m             \u001b[0mpassword\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    861\u001b[0m         )\n\u001b[1;32m    862\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/lib/python3.7/site-packages/ipykernel/kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m    888\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    889\u001b[0m                 \u001b[0;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 890\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    891\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    892\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# coding=utf-8\n",
+    "\n",
+    "import pandas as pd\n",
+    "import fool\n",
+    "import re\n",
+    "import random\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "\n",
+    "# 加载停用词词典\n",
+    "stopwords = {}\n",
+    "with open(r'stopword.txt', 'r', encoding='utf-8') as fr:\n",
+    "    for word in fr:\n",
+    "        stopwords[word.strip()] = 0\n",
+    "\n",
+    "class clf_model:\n",
+    "    \"\"\"\n",
+    "    该类将所有模型训练、预测、数据预处理、意图识别的函数包括其中\n",
+    "    \"\"\"\n",
+    "    def __init__(self):\n",
+    "        self.model = \"\"  # 成员变量，用于存储模型\n",
+    "        self.vectorizer = \"\"  # 成员变量，用于存储tfidf统计值\n",
+    "\n",
+    "    # 训练模块\n",
+    "    def train(self):\n",
+    "        # 函数目标：读取训练数据，训练意图分类模型，并将训练好的分类模型赋值给成员变量self.model\n",
+    "        # input：无\n",
+    "        # output：无\n",
+    "        \n",
+    "        # 从excel文件读取训练样本 \n",
+    "        d_train = pd.read_excel(\"data_train.xlsx\")\n",
+    "        # 对训练数据进行预处理\n",
+    "        d_train.sentence_train = d_train.sentence_train.apply(self.fun_clean)\n",
+    "        print(\"训练样本 = %d\" % len(d_train))\n",
+    "        \n",
+    "        \"\"\"\n",
+    "        TODO：利用sklearn中的函数进行训练，将句子转化为特征features\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        # get tifidf features\n",
+    "        self.vectorizer = TfidfVectorizer(analyzer=\"word\",\n",
+    "                                          token_pattern=r\"(?u)\\b\\w+\\b\")\n",
+    "        \n",
+    "        # 指定token_pattern，避免sklearn将一个字长度的单词过滤筛除\n",
+    "        features = self.vectorizer.fit_transform(d_train.sentence_train)\n",
+    "        print(\"训练样本特征表长度为 \" + str(features.shape))\n",
+    "        #\n",
+    "        self.model = LogisticRegression(C=10)\n",
+    "        self.model.fit(features, d_train.label)\n",
+    "\n",
+    "    # 预测模块（使用模型预测） 返回意图类别  和得分\n",
+    "    def predict_model(self, sentence):\n",
+    "        # 函数目标：使用意图分类模型预测意图\n",
+    "        #  input：sentence（用户输入）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        # --------------\n",
+    "        # 对样本中没有点特殊情况做特别判断\n",
+    "        if sentence in [\"好的\", \"需要\", \"是的\", \"要的\", \"好\", \"要\", \"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        # --------------\n",
+    "\n",
+    "        \"\"\"\n",
+    "        TODO：利用已训练好的意图分类模型进行意图识别\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        sent_features = self.vectorizer.transform([sentence])\n",
+    "        pre_test = self.model.predict_proba(sent_features).tolist()[0]\n",
+    "        clf_result = pre_test.index(max(pre_test))\n",
+    "        score = max(pre_test)\n",
+    "        return clf_result, score\n",
+    "\n",
+    "    # 预测模块（使用规则）\n",
+    "    def predict_rule(self, sentence):\n",
+    "        # 函数目标：如果模型训练出现异常，可以使用规则进行预测，同时也可以让学员融合\"模型\"及\"规则\"的预测方式\n",
+    "        # input：sentence（用户输入）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        sentence = sentence.replace(' ', '')\n",
+    "        if re.findall(r'不需要|不要|停止|终止|退出|不买|不定|不订', sentence):\n",
+    "            return 2, 0.8\n",
+    "        elif re.findall(r'订|定|预定|买|购', sentence) or sentence in [\"好的\", \"需要\", \"是的\", \"要的\", \"好\", \"要\", \"是\"]:\n",
+    "            return 1, 0.8\n",
+    "        else:\n",
+    "            return 0, 0.8\n",
+    "\n",
+    "    # 预处理函数\n",
+    "    def fun_clean(self, sentence):\n",
+    "        # 函数目标：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等\n",
+    "        # input：sentence（用户输入语句）\n",
+    "        # output：sentence（预处理结果）预处理结果:\n",
+    "        \"\"\"\n",
+    "        TODO：预处理函数，将必要的实体转换成统一符号（利于分类准确），去除停用词等\n",
+    "        \"\"\"\n",
+    "        \n",
+    "        # 使用foolnltk进行实体识别\n",
+    "        words, ners = fool.analysis(sentence)\n",
+    "        # 对识别结果按长度倒序排序\n",
+    "        ners = ners[0].sort(key=lambda x: len(x[-1]), reverse=True)\n",
+    "        # 如果有实体被识别出来，就将实体的字符串替换成实体类别的字符串\n",
+    "        if ners:\n",
+    "            for ner in ners:\n",
+    "                sentence = sentence.replace(ner[-1], ' ' + ner[2] + ' ')\n",
+    "        # 分词，并去除停用词\n",
+    "        word_lst = [w for w in fool.cut(sentence)[0] if w not in stopwords]\n",
+    "        output_str = ' '.join(word_lst)\n",
+    "        output_str = re.sub(r'\\s+', ' ', output_str)\n",
+    "        return output_str.strip()\n",
+    "\n",
+    "    # 分类主函数\n",
+    "    def fun_clf(self, sentence):\n",
+    "        # 函数目标：意图识别主函数\n",
+    "        # input：sentence（ 用户输入语句）\n",
+    "        # output：clf_result（意图类别），score（意图分数）\n",
+    "        \n",
+    "        # 对用户输入进行预处理\n",
+    "        sentence = self.fun_clean(sentence)\n",
+    "        # 得到意图分类结果（0为“查询”类别，1为“订票”类别，2为“终止服务”类别）\n",
+    "        clf_result, score = self.predict_model(sentence)  \n",
+    "        return clf_result, score\n",
+    "\n",
+    "\n",
+    "def fun_replace_num(sentence):\n",
+    "    # 函数目标：替换时间中的数字（目的是便于实体识别包fool对实体的识别）\n",
+    "    # input：sentence\n",
+    "    # output：sentence\n",
+    "    # 定义要替换的数字\n",
+    "    \n",
+    "    time_num = {\"一\": \"1\", \"二\": \"2\", \"三\": \"3\", \"四\": \"4\", \"五\": \"5\", \"六\": \"6\", \"七\": \"7\", \"八\": \"8\", \"九\": \"9\", \"十\": \"10\",\n",
+    "                \"十一\": \"11\", \"十二\": \"12\"}\n",
+    "    for k, v in time_num.items():\n",
+    "        sentence = sentence.replace(k, v)\n",
+    "    return sentence\n",
+    "\n",
+    "\n",
+    "def slot_fill(sentence, key=None):\n",
+    "    # 函数目标：填槽函数（该函数从sentence中寻找需要的内容，完成填槽工作）\n",
+    "    # input：sentence（用户输入）, key（指定槽位，只对该句话提取指定槽位的信息）\n",
+    "    # output：slot（返回填槽的结果，以json格式返回，key为槽位名，value为值）\n",
+    "    slot = {}\n",
+    "    # 进行实体识别\n",
+    "    words, ners = fool.analysis(sentence)\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    TODO：从sentence中寻找需要的内容，完成填槽工作\n",
+    "    \"\"\"\n",
+    "    to_city_flag = 0  \n",
+    "    # flag为1代表找到到达城市（作用：当找到到达城市时，默认句子中另一个城市信息是出发城市）\n",
+    "    for ner in ners[0]:\n",
+    "        # 首先对time类别的实体进行信息抽取填槽工作\n",
+    "        if ner[2] == 'time':\n",
+    "            # --------------------\n",
+    "            # 寻找日期的关键词\n",
+    "            date_content = re.findall(\n",
+    "                r'后天|明天|今天|大后天|周末|周一|周二|周三|周四|周五|周六|周日|本周一|本周二|本周三|本周四|本周五|本周六|本周日|下周一|下周二|下周三|下周四|下周五|下周六|下周日|这周一|这周二|这周三|这周四|这周五|这周六|这周日|\\d{,2}月\\d{,2}号|\\d{,2}月\\d{,2}日',\n",
+    "                ner[-1])\n",
+    "            slot[\"date\"] = date_content[0] if date_content else \"\"\n",
+    "            # 完成日期的填槽\n",
+    "            # --------------------\n",
+    "\n",
+    "            # --------------------\n",
+    "            # 寻找具体时间的关键词\n",
+    "            time_content = re.findall(r'\\d{,2}点\\d{,2}分|\\d{,2}点钟|\\d{,2}点', ner[-1])\n",
+    "            # 寻找上午下午的关键词\n",
+    "            pmam_content = re.findall(r'上午|下午|早上|晚上|中午|早晨', ner[-1])\n",
+    "            slot[\"time\"] = pmam_content[0] if pmam_content else \"\" + time_content[0] if time_content else \"\"\n",
+    "            # 完成时间的填槽\n",
+    "            # --------------------\n",
+    "        # 对location类别对实体进行信息抽取填槽工作\n",
+    "        if ner[2] == 'location':\n",
+    "            # --------------------\n",
+    "            # 开始对城市填槽\n",
+    "            # 如果没有指定槽位\n",
+    "            if key is None:\n",
+    "                if re.findall(r'(到|去|回|回去)%s' % (ner[-1]), sentence):\n",
+    "                    to_city_flag = 1\n",
+    "                    slot[\"to_city\"] = ner[-1]\n",
+    "                    continue\n",
+    "                if re.findall(r'从%s|%s出发' % (ner[-1], ner[-1]), sentence):\n",
+    "                    slot[\"from_city\"] = ner[-1]\n",
+    "                elif to_city_flag == 1:\n",
+    "                    slot[\"from_city\"] = ner[-1]\n",
+    "            # 如果指定了槽位\n",
+    "            elif key in [\"from_city\", \"to_city\"]:\n",
+    "                slot[key] = ner[-1]\n",
+    "            # 完成出发城市、到达城市的填槽工作\n",
+    "            # --------------------\n",
+    "\n",
+    "    return slot\n",
+    "\n",
+    "\n",
+    "def fun_wait(clf_obj):\n",
+    "    # 函数目标：等待，获取用户输入问句\n",
+    "    # input：CLF_MODEL类实例化对象\n",
+    "    # output：clf_result（用户输入意图类别）, score（意图识别分数）, sentence（用户输入）\n",
+    "    \n",
+    "    # 等待用户输入\n",
+    "    print(\"\\n\\n\\n\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"Starting ...\")\n",
+    "    sentence = input(\"客服：请问需要什么服务？(时间请用12小时制表示）\\n\")\n",
+    "    # 对用户输入进行意图识别\n",
+    "    clf_result, score = clf_obj.fun_clf(sentence)\n",
+    "    return clf_result, score, sentence\n",
+    "\n",
+    "\n",
+    "def fun_search(clf_result, sentence):\n",
+    "    # 函数目标：为用户查询余票\n",
+    "    # input：clf_result（意图分类结果）, sentence（用户输入问句）\n",
+    "    # output：是否有票\n",
+    "    \n",
+    "    # 定义槽存储空间\n",
+    "    name = {\"time\": \"出发时间\", \"date\": \"出发日期\", \"from_city\": \"出发城市\", \"to_city\": \"到达城市\"}\n",
+    "    slot = {\"time\": \"\", \"date\": \"\", \"from_city\": \"\", \"to_city\": \"\"}\n",
+    "    # 使用用户第一句话进行填槽\n",
+    "    sentence = fun_replace_num(sentence)\n",
+    "    slot_init = slot_fill(sentence)\n",
+    "    for key in slot_init.keys():\n",
+    "        slot[key] = slot_init[key]\n",
+    "    # 对未填充对槽位，向用户提问，进行针对性填槽\n",
+    "    while \"\" in slot.values():\n",
+    "        for key in slot.keys():\n",
+    "            if slot[key] == \"\":\n",
+    "                sentence = input(\"客服：请问%s是？\\n\" % (name[key]))\n",
+    "                sentence = fun_replace_num(sentence)\n",
+    "                slot_cur = slot_fill(sentence, key)\n",
+    "                for key in slot_cur.keys():\n",
+    "                    if slot[key] == \"\":\n",
+    "                        slot[key] = slot_cur[key]\n",
+    "\n",
+    "    # 查询是否有票，并答复用户（本次查询是否有票使用随机数完成）\n",
+    "    if random.random() > 0.5:\n",
+    "        print(\"客服：%s%s从%s到%s的票充足\" % (slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        # 返回1表示有票\n",
+    "        return 1\n",
+    "    else:\n",
+    "        print(\"客服：%s%s从%s到%s无票\" % (slot[\"date\"], slot[\"time\"], slot[\"from_city\"], slot[\"to_city\"]))\n",
+    "        print(\"End !!!\")\n",
+    "        print(\"-------------------------------------------------------------\")\n",
+    "        print(\"-------------------------------------------------------------\")\n",
+    "        # 返回0表示无票\n",
+    "        return 0\n",
+    "\n",
+    "\n",
+    "def fun_book():\n",
+    "    # 函数目标：执行下单订票动作\n",
+    "    # input：无\n",
+    "    # output：无\n",
+    "    \n",
+    "    print(\"客服：已为您完成订票。\\n\\n\\n\")\n",
+    "    print(\"End !!!\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "    print(\"-------------------------------------------------------------\")\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    # 实例化对象\n",
+    "    clf_obj = clf_model()\n",
+    "    clf_obj.train()\n",
+    "    threshold = 0.55  # 用户定义阈值（当分类器分类的分数大于阈值才采纳本次意图分类结果，目的是排除分数过低的意图分类结果）\n",
+    "    while 1:\n",
+    "        clf_result, score, sentence = fun_wait(clf_obj)\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        # 状态转移条件（等待-->等待）：用户输入未达到“查询”、“订票”类别的阈值 OR 被分类为“终止服务”\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        if score < threshold or clf_result == 2:\n",
+    "            continue\n",
+    "\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        # 状态转移条件（等待-->查询）：用户输入分类为“查询” OR “订票”\n",
+    "        # -------------------------------------------------------------------------------\n",
+    "        else:\n",
+    "            search_result = fun_search(clf_result, sentence)\n",
+    "            if search_result == 0:\n",
+    "                continue\n",
+    "            else:\n",
+    "                # 等待用户输入\n",
+    "                sentence = input(\"客服：需要为您订票吗？\\n\")\n",
+    "                # 对用户输入进行意图识别\n",
+    "                clf_result, score = clf_obj.fun_clf(sentence)\n",
+    "                # -------------------------------------------------------------------------------\n",
+    "                # 状态转移条件（查询-->订票）：FUN_SEARCH返回有票 AND 用户输入分类为“订票”\n",
+    "                # -------------------------------------------------------------------------------\n",
+    "                if clf_result == 1:\n",
+    "                    fun_book()\n",
+    "                    continue"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Testing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[['一个', '傻子', '在', '北京']]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import fool\n",
+    "\n",
+    "text = \"一个傻子在北京\"\n",
+    "print(fool.cut(text))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.13.2'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "tf.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/stopword.txt
+++ b/stopword.txt
+的
+到
+一下
+我
+，
+帮
+去
+了
+？
+吧
+。
+请
+么
--- a/票务对话机器人项目说明.pdf
+++ b/票务对话机器人项目说明.pdf