Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
Project3
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200203063
Project3
Commits
cea76185
Commit
cea76185
authored
4 years ago
by
20200203063
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Replace business.py
parent
6251e257
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
97 additions
and
15 deletions
+97
-15
business.py
+97
-15
No files found.
business.py
View file @
cea76185
# encoding: utf-8
from
model_training
import
SentimentModel
from
gen_id2business
import
id2business
from
tqdm
import
tqdm
from
sentence
import
Sentence
import
json
import
pandas
as
pd
model_path
=
"model/model.pkl"
vector_path
=
"model/vector.pkl"
SENTIMENT_MODEL
=
SentimentModel
()
# 把已经训练好的模型存放在文件里,并导入进来
class
Business
(
object
):
"""
用来表示跟business相关的变量和函数
"""
SENTIMENT_MODEL
=
SentimentModel
()
# 把已经训练好的模型存放在文件里,并导入进来
def
__init__
(
self
,
review_df
):
# 初始化变量以及函数
self
.
review_df
=
review_df
self
.
business_id
=
self
.
review_df
.
iloc
[
0
]
.
business_id
self
.
business_name
=
id2business
[
self
.
business_id
][
"name"
]
self
.
review_nps
,
self
.
aspects
=
self
.
extract_aspects
()
def
aspect_based_summary
(
self
):
def
aspect_based_summary
(
self
,
threshold
=
0.5
):
"""
返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
具体细节请看给定的文档。
"""
return
{
'business_id'
:
'business_name'
:
'business_rating'
:
'aspect_summary'
:
aspect_info
=
[]
for
aspect
in
self
.
aspects
:
aspect_info
.
append
({
"aspect"
:
aspect
,
"stars"
:
[],
"pos"
:
[],
"neg"
:
[]
})
for
idx
in
tqdm
(
range
(
len
(
self
.
review_df
))):
review
=
self
.
review_df
.
iloc
[
idx
]
text
=
review
.
text
current_review_nps
=
self
.
review_nps
[
idx
]
for
idx_aspect
in
range
(
len
(
self
.
aspects
)):
aspect
=
self
.
aspects
[
idx_aspect
]
if
aspect
in
current_review_nps
:
aspect_info
[
idx_aspect
][
"stars"
]
.
append
(
review
.
stars
)
data
=
[
text
]
result
,
score
=
SENTIMENT_MODEL
.
predict
(
data
,
model_path
=
model_path
,
vectorizer_path
=
vector_path
)
print
(
result
,
score
)
if
score
[
0
][
1
]
>=
threshold
:
aspect_info
[
idx_aspect
][
"pos"
]
.
append
((
idx
,
score
[
0
]))
else
:
aspect_info
[
idx_aspect
][
"neg"
]
.
append
((
idx
,
score
[
0
]))
business_rating
=
0
detail
=
[]
for
idx_aspect
in
range
(
len
(
self
.
aspects
)):
aspect
=
self
.
aspects
[
idx_aspect
]
business_rating
+=
sum
(
aspect_info
[
idx_aspect
][
"stars"
])
step_pos
=
len
(
aspect_info
[
idx_aspect
][
"pos"
])
//
100
if
len
(
aspect_info
[
idx_aspect
][
"pos"
])
>
100
else
1
step_neg
=
len
(
aspect_info
[
idx_aspect
][
"neg"
])
//
100
if
len
(
aspect_info
[
idx_aspect
][
"neg"
])
>
100
else
1
info
=
{
"aspect"
:
aspect
,
"rating"
:
sum
(
aspect_info
[
idx_aspect
][
"stars"
])
/
len
(
aspect_info
[
idx_aspect
][
"stars"
]),
"pos"
:
list
(
map
(
lambda
y
:
self
.
review_df
.
iloc
[
y
[
0
]]
.
text
,
sorted
(
aspect_info
[
idx_aspect
][
"pos"
],
key
=
lambda
x
:
x
[
1
])[::
step_pos
][:
5
])),
"neg"
:
list
(
map
(
lambda
y
:
self
.
review_df
.
iloc
[
y
[
0
]]
.
text
,
sorted
(
aspect_info
[
idx_aspect
][
"neg"
],
key
=
lambda
x
:
x
[
1
])[::
step_neg
][:
5
]))}
detail
.
append
(
info
)
business_rating
=
business_rating
/
len
(
self
.
review_df
)
return
{
'business_id'
:
self
.
business_id
,
'business_name'
:
self
.
business_name
,
'business_rating'
:
business_rating
,
'aspect_summary'
:
detail
}
...
...
@@ -32,6 +78,41 @@ class Business(object):
"""
从一个business的review中抽取aspects
"""
np_dict
=
dict
()
review_nps
=
[]
for
idx
in
tqdm
(
range
(
len
(
self
.
review_df
))):
review
=
self
.
review_df
.
iloc
[
idx
]
sen
=
Sentence
(
review
.
text
)
nps
=
[]
for
np
in
sen
.
extract_noun_phrase
():
print
(
np
)
nps
.
append
(
np
)
if
np
not
in
np_dict
:
np_dict
[
np
]
=
0
np_dict
[
np
]
+=
1
review_nps
.
append
(
nps
)
sort_np_dict_items_top_5
=
sorted
(
np_dict
.
items
(),
key
=
lambda
x
:
x
[
1
])[:
5
]
aspects
=
[
aspect
for
aspect
,
times
in
sort_np_dict_items_top_5
]
return
review_nps
,
aspects
if
__name__
==
"__main__"
:
review_path
=
"data/review.json"
reviews
=
[]
with
open
(
review_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
f
.
readlines
():
if
len
(
line
.
strip
())
==
0
:
continue
ele
=
json
.
loads
(
line
.
strip
())
reviews
.
append
(
ele
)
review_df
=
pd
.
DataFrame
(
reviews
)
print
(
len
(
review_df
))
print
(
review_df
.
head
())
business_ids
=
[
"ujmEBvifdJM6h6RLv4wQIg"
]
for
business_id
in
business_ids
:
current_review_df
=
review_df
[
review_df
.
business_id
==
business_id
]
print
(
current_review_df
.
head
())
print
(
len
(
current_review_df
))
business
=
Business
(
current_review_df
)
print
(
"Aspects"
,
business
.
aspects
)
print
(
business
.
aspect_based_summary
())
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment