Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
Project3
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200203063
Project3
Commits
cea76185
Commit
cea76185
authored
Sep 01, 2020
by
20200203063
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Replace business.py
parent
6251e257
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
95 additions
and
13 deletions
+95
-13
business.py
+95
-13
No files found.
business.py
View file @
cea76185
# encoding: utf-8
from
model_training
import
SentimentModel
from
gen_id2business
import
id2business
from
tqdm
import
tqdm
from
sentence
import
Sentence
import
json
import
pandas
as
pd
model_path
=
"model/model.pkl"
vector_path
=
"model/vector.pkl"
SENTIMENT_MODEL
=
SentimentModel
()
# 把已经训练好的模型存放在文件里,并导入进来
class
Business
(
object
):
"""
用来表示跟business相关的变量和函数
"""
SENTIMENT_MODEL
=
SentimentModel
()
# 把已经训练好的模型存放在文件里,并导入进来
def
__init__
(
self
,
review_df
):
# 初始化变量以及函数
self
.
review_df
=
review_df
self
.
business_id
=
self
.
review_df
.
iloc
[
0
]
.
business_id
self
.
business_name
=
id2business
[
self
.
business_id
][
"name"
]
self
.
review_nps
,
self
.
aspects
=
self
.
extract_aspects
()
def
aspect_based_summary
(
self
):
def
aspect_based_summary
(
self
,
threshold
=
0.5
):
"""
返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
具体细节请看给定的文档。
"""
aspect_info
=
[]
for
aspect
in
self
.
aspects
:
aspect_info
.
append
({
"aspect"
:
aspect
,
"stars"
:
[],
"pos"
:
[],
"neg"
:
[]
})
for
idx
in
tqdm
(
range
(
len
(
self
.
review_df
))):
review
=
self
.
review_df
.
iloc
[
idx
]
text
=
review
.
text
current_review_nps
=
self
.
review_nps
[
idx
]
for
idx_aspect
in
range
(
len
(
self
.
aspects
)):
aspect
=
self
.
aspects
[
idx_aspect
]
if
aspect
in
current_review_nps
:
aspect_info
[
idx_aspect
][
"stars"
]
.
append
(
review
.
stars
)
data
=
[
text
]
result
,
score
=
SENTIMENT_MODEL
.
predict
(
data
,
model_path
=
model_path
,
vectorizer_path
=
vector_path
)
print
(
result
,
score
)
if
score
[
0
][
1
]
>=
threshold
:
aspect_info
[
idx_aspect
][
"pos"
]
.
append
((
idx
,
score
[
0
]))
else
:
aspect_info
[
idx_aspect
][
"neg"
]
.
append
((
idx
,
score
[
0
]))
business_rating
=
0
detail
=
[]
for
idx_aspect
in
range
(
len
(
self
.
aspects
)):
aspect
=
self
.
aspects
[
idx_aspect
]
business_rating
+=
sum
(
aspect_info
[
idx_aspect
][
"stars"
])
step_pos
=
len
(
aspect_info
[
idx_aspect
][
"pos"
])
//
100
if
len
(
aspect_info
[
idx_aspect
][
"pos"
])
>
100
else
1
step_neg
=
len
(
aspect_info
[
idx_aspect
][
"neg"
])
//
100
if
len
(
aspect_info
[
idx_aspect
][
"neg"
])
>
100
else
1
info
=
{
"aspect"
:
aspect
,
"rating"
:
sum
(
aspect_info
[
idx_aspect
][
"stars"
])
/
len
(
aspect_info
[
idx_aspect
][
"stars"
]),
"pos"
:
list
(
map
(
lambda
y
:
self
.
review_df
.
iloc
[
y
[
0
]]
.
text
,
sorted
(
aspect_info
[
idx_aspect
][
"pos"
],
key
=
lambda
x
:
x
[
1
])[::
step_pos
][:
5
])),
"neg"
:
list
(
map
(
lambda
y
:
self
.
review_df
.
iloc
[
y
[
0
]]
.
text
,
sorted
(
aspect_info
[
idx_aspect
][
"neg"
],
key
=
lambda
x
:
x
[
1
])[::
step_neg
][:
5
]))}
detail
.
append
(
info
)
business_rating
=
business_rating
/
len
(
self
.
review_df
)
return
{
'business_id'
:
'business_name'
:
'business_rating'
:
'aspect_summary'
:
return
{
'business_id'
:
self
.
business_id
,
'business_name'
:
self
.
business_name
,
'business_rating'
:
business_rating
,
'aspect_summary'
:
detail
}
...
...
@@ -32,6 +78,41 @@ class Business(object):
"""
从一个business的review中抽取aspects
"""
np_dict
=
dict
()
review_nps
=
[]
for
idx
in
tqdm
(
range
(
len
(
self
.
review_df
))):
review
=
self
.
review_df
.
iloc
[
idx
]
sen
=
Sentence
(
review
.
text
)
nps
=
[]
for
np
in
sen
.
extract_noun_phrase
():
print
(
np
)
nps
.
append
(
np
)
if
np
not
in
np_dict
:
np_dict
[
np
]
=
0
np_dict
[
np
]
+=
1
review_nps
.
append
(
nps
)
sort_np_dict_items_top_5
=
sorted
(
np_dict
.
items
(),
key
=
lambda
x
:
x
[
1
])[:
5
]
aspects
=
[
aspect
for
aspect
,
times
in
sort_np_dict_items_top_5
]
return
review_nps
,
aspects
if
__name__
==
"__main__"
:
review_path
=
"data/review.json"
reviews
=
[]
with
open
(
review_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
.
readlines
():
if
len
(
line
.
strip
())
==
0
:
continue
ele
=
json
.
loads
(
line
.
strip
())
reviews
.
append
(
ele
)
review_df
=
pd
.
DataFrame
(
reviews
)
print
(
len
(
review_df
))
print
(
review_df
.
head
())
business_ids
=
[
"ujmEBvifdJM6h6RLv4wQIg"
]
for
business_id
in
business_ids
:
current_review_df
=
review_df
[
review_df
.
business_id
==
business_id
]
print
(
current_review_df
.
head
())
print
(
len
(
current_review_df
))
business
=
Business
(
current_review_df
)
print
(
"Aspects"
,
business
.
aspects
)
print
(
business
.
aspect_based_summary
())
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment