modify project3

7c638758 · 20200519016 · 0a92a1fa · 7c638758 · 7c638758
Commit 7c638758 authored Oct 30, 2020 by 20200519016
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 22 deletions

project_3/机器翻译项目/.ipynb_checkpoints/transformer_nmt_student-checkpoint.ipynb
+9 -9

project_3/机器翻译项目/transformer_nmt_student.ipynb
+26 -13

No files found.
--- a/project_3/机器翻译项目/.ipynb_checkpoints/transformer_nmt_student-checkpoint.ipynb
+++ b/project_3/机器翻译项目/.ipynb_checkpoints/transformer_nmt_student-checkpoint.ipynb
-{
+{
@@ -129,7 +129,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -158,13 +158,13 @@
    "        self.en_word_dict, self.en_total_words, self.en_index_dict = self.build_dict(self.train_en)\n",
    "        self.cn_word_dict, self.cn_total_words, self.cn_index_dict = self.build_dict(self.train_cn)\n",
    "\n",
-    "#         # id化\n",
-    "#         self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
-    "#         self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
+    "        # id化\n",
+    "        self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
+    "        self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
    "\n",
-    "#         # 划分batch + padding + mask\n",
-    "#         self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
-    "#         self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
+    "        # 划分batch + padding + mask\n",
+    "        self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
+    "        self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
    "\n",
    "    def load_data(self, path):\n",
    "        \"\"\"\n",
@@ -285,7 +285,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [

--- a/project_3/机器翻译项目/transformer_nmt_student.ipynb
+++ b/project_3/机器翻译项目/transformer_nmt_student.ipynb
-{
+{
@@ -129,7 +129,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -158,13 +158,13 @@
    "        self.en_word_dict, self.en_total_words, self.en_index_dict = self.build_dict(self.train_en)\n",
    "        self.cn_word_dict, self.cn_total_words, self.cn_index_dict = self.build_dict(self.train_cn)\n",
    "\n",
-    "#         # id化\n",
-    "#         self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
-    "#         self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
+    "        # id化\n",
+    "        self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
+    "        self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
    "\n",
-    "#         # 划分batch + padding + mask\n",
-    "#         self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
-    "#         self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
+    "        # 划分batch + padding + mask\n",
+    "        self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
+    "        self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
    "\n",
    "    def load_data(self, path):\n",
    "        \"\"\"\n",
@@ -219,8 +219,8 @@
    "        length = len(en)\n",
    "        \n",
    "        # TODO: 将翻译前(英文)数据和翻译后(中文)数据都转换为id表示的形式\n",
-    "        out_en_ids = [[en_dict.get(w, 0) for w in sent] for sent in en]\n",
-    "        out_cn_ids = [[cn_dict.get(w, 0) for w in sent] for sent in cn]\n",
+    "        out_en_ids = [[en_dict.get(w) for w in sent] for sent in en]\n",
+    "        out_cn_ids = [[cn_dict.get(w) for w in sent] for sent in cn]\n",
    "\n",
    "\n",
    "        # 构建一个按照句子长度排序的函数\n",
@@ -285,14 +285,27 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 31,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "# test load data\n",
    "prepare = PrepareData('nmt/en-cn/train_mini.txt','nmt/en-cn/train_mini.txt')\n",
    "# [en,cn] = prepare.load_data('nmt/en-cn/train_mini.txt')\n",
-    "# print(cn)"
+    "# print(cn)\n",
+    "d = {'a':3,'b':5}\n",
+    "d.get('a')"
   ]
  },
  {