Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
projects
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200519016
projects
Commits
7c638758
Commit
7c638758
authored
4 years ago
by
20200519016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify project3
parent
0a92a1fa
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
22 deletions
+35
-22
project_3/机器翻译项目/.ipynb_checkpoints/transformer_nmt_student-checkpoint.ipynb
+9
-9
project_3/机器翻译项目/transformer_nmt_student.ipynb
+26
-13
No files found.
project_3/机器翻译项目/.ipynb_checkpoints/transformer_nmt_student-checkpoint.ipynb
View file @
7c638758
{
{
...
...
@@ -129,7 +129,7 @@
},
{
"cell_type": "code",
"execution_count": 2
1
,
"execution_count": 2
3
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -158,13 +158,13 @@
" self.en_word_dict, self.en_total_words, self.en_index_dict = self.build_dict(self.train_en)\n",
" self.cn_word_dict, self.cn_total_words, self.cn_index_dict = self.build_dict(self.train_cn)\n",
"\n",
"
#
# id化\n",
"
#
self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
"
#
self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
" # id化\n",
" self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
" self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
"\n",
"
#
# 划分batch + padding + mask\n",
"
#
self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
"
#
self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
" # 划分batch + padding + mask\n",
" self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
" self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
"\n",
" def load_data(self, path):\n",
" \"\"\"\n",
...
...
@@ -285,7 +285,7 @@
},
{
"cell_type": "code",
"execution_count": 2
2
,
"execution_count": 2
4
,
"metadata": {},
"outputs": [],
"source": [
...
...
This diff is collapsed.
Click to expand it.
project_3/机器翻译项目/transformer_nmt_student.ipynb
View file @
7c638758
{
{
...
...
@@ -129,7 +129,7 @@
},
{
"cell_type": "code",
"execution_count":
21
,
"execution_count":
32
,
"metadata": {},
"outputs": [],
"source": [
...
...
@@ -158,13 +158,13 @@
" self.en_word_dict, self.en_total_words, self.en_index_dict = self.build_dict(self.train_en)\n",
" self.cn_word_dict, self.cn_total_words, self.cn_index_dict = self.build_dict(self.train_cn)\n",
"\n",
"
#
# id化\n",
"
#
self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
"
#
self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
" # id化\n",
" self.train_en, self.train_cn = self.wordToID(self.train_en, self.train_cn, self.en_word_dict, self.cn_word_dict)\n",
" self.dev_en, self.dev_cn = self.wordToID(self.dev_en, self.dev_cn, self.en_word_dict, self.cn_word_dict)\n",
"\n",
"
#
# 划分batch + padding + mask\n",
"
#
self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
"
#
self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
" # 划分batch + padding + mask\n",
" self.train_data = self.splitBatch(self.train_en, self.train_cn, BATCH_SIZE)\n",
" self.dev_data = self.splitBatch(self.dev_en, self.dev_cn, BATCH_SIZE)\n",
"\n",
" def load_data(self, path):\n",
" \"\"\"\n",
...
...
@@ -219,8 +219,8 @@
" length = len(en)\n",
" \n",
" # TODO: 将翻译前(英文)数据和翻译后(中文)数据都转换为id表示的形式\n",
" out_en_ids = [[en_dict.get(w
, 0
) for w in sent] for sent in en]\n",
" out_cn_ids = [[cn_dict.get(w
, 0
) for w in sent] for sent in cn]\n",
" out_en_ids = [[en_dict.get(w) for w in sent] for sent in en]\n",
" out_cn_ids = [[cn_dict.get(w) for w in sent] for sent in cn]\n",
"\n",
"\n",
" # 构建一个按照句子长度排序的函数\n",
...
...
@@ -285,14 +285,27 @@
},
{
"cell_type": "code",
"execution_count":
22
,
"execution_count":
31
,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"3"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# test load data\n",
"prepare = PrepareData('nmt/en-cn/train_mini.txt','nmt/en-cn/train_mini.txt')\n",
"# [en,cn] = prepare.load_data('nmt/en-cn/train_mini.txt')\n",
"# print(cn)"
"# print(cn)\n",
"d = {'a':3,'b':5}\n",
"d.get('a')"
]
},
{
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment