Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
Project3
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
20200203063
Project3
Commits
8e8f131f
Commit
8e8f131f
authored
4 years ago
by
20200203063
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
4d476bb4
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
0 deletions
+30
-0
gen_valid_business_id.py
+30
-0
No files found.
gen_valid_business_id.py
0 → 100644
View file @
8e8f131f
#encoding: utf-8
import
pandas
as
pd
import
json
from
tqdm
import
tqdm
review_path
=
"data/review.json"
valid_business_id_path
=
"data/valid_business_id.txt"
def
gen_valid_business_id
(
review_path
,
count_citerion
=
100
):
valid_business
=
[]
count_business_id
=
dict
()
reviews
=
[]
with
open
(
review_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
for
line
in
tqdm
(
f
.
readlines
()):
if
len
(
line
.
strip
())
==
0
:
continue
ele
=
json
.
loads
(
line
.
strip
())
if
not
ele
[
"business_id"
]
in
count_business_id
:
count_business_id
[
ele
[
"business_id"
]]
=
0
count_business_id
[
ele
[
"business_id"
]]
+=
1
reviews
.
append
(
ele
)
review_df
=
pd
.
DataFrame
(
reviews
)
print
(
"total count of business id in {}: {}"
.
format
(
len
(
count_business_id
),
review_path
))
for
key
,
value
in
count_business_id
.
items
():
if
value
>=
count_citerion
:
valid_business
.
append
(
"{}
\t
{}"
.
format
(
str
(
key
),
str
(
value
)))
return
valid_business
valid_business
=
gen_valid_business_id
(
review_path
,
count_citerion
=
100
)
with
open
(
valid_business_id_path
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
f
.
write
(
"
\n
"
.
join
(
valid_business
))
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment