Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
Projects
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200519052
Projects
Commits
90fbfa9a
Commit
90fbfa9a
authored
Jul 13, 2020
by
20200519052
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加了代码文件
parent
1e077ad7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
0 deletions
+51
-0
Project1/related.py
+51
-0
Project1/starter_code.ipynb
+0
-0
No files found.
Project1/related.py
0 → 100644
View file @
90fbfa9a
import
heapq
import
numpy
as
np
# 利用priority queue性质求解Top-K问题
def
topk
(
inputs
,
k
):
pq
=
[]
pq_index
=
[]
for
index
,
value
in
enumerate
(
inputs
):
if
len
(
pq
)
<
k
:
heapq
.
heappush
(
pq
,
value
)
heapq
.
heappush
(
pq_index
,
index
)
elif
value
>
pq
[
0
]:
heapq
.
heapreplace
(
pq
,
value
)
heapq
.
heapreplace
(
pq_index
,
index
)
ret
=
list
()
while
pq_index
:
ret
.
append
(
heapq
.
heappop
(
pq_index
))
return
ret
[::
-
1
]
# 计算每个词的glove词向量的语义相似度,每个词取最相似topk
def
get_top_glove_results
(
vec
,
emb
,
vocab
):
# 计算余弦相似度
cos
=
np
.
dot
(
vec
,
emb
)
/
(
np
.
linalg
.
norm
(
vec
)
*
(
np
.
linalg
.
norm
(
emb
)))
inputs
=
list
(
cos
)
k
=
10
top_idxs
=
topk
(
inputs
,
k
)
# top_idxs存放相似度最高的词向量下标
return
vocab
[
top_idxs
]
# 返回相似度最高的问题对应的答案,作为TOP10答案
with
open
(
'./data/word2id.txt'
,
'r'
)
as
f
:
word2id
=
eval
(
f
.
read
())
emb
=
np
.
loadtxt
(
'./data/embedding.txt'
,
delimiter
=
','
)
res
=
{}
vocab
=
np
.
array
(
list
(
word2id
.
keys
()))
for
k
in
word2id
:
word_vec
=
emb
[
word2id
[
k
]
-
1
,
:]
res
[
k
]
=
list
(
get_top_glove_results
(
word_vec
,
emb
.
T
,
vocab
))
# 保存结果
with
open
(
'related_words.txt'
,
'w'
)
as
f
:
f
.
write
(
str
(
res
))
\ No newline at end of file
Project1/starter_code.ipynb
0 → 100755
View file @
90fbfa9a
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment