Commit 15fc74ca by 20200318029

homework9

parent e32c3ea5
......@@ -2,36 +2,37 @@
class Business(object):
"""
用来表示跟business相关的变量和函数
"""
"""
用来表示跟business相关的变量和函数
"""
SENTIMENT_MODEL = SentimentModel() # 把已经训练好的模型存放在文件里,并导入进来
SENTIMENT_MODEL = SentimentModel() # 把已经训练好的模型存放在文件里,并导入进来
def __init__(self, review_df):
# 初始化变量以及函数
def __init__(self, review_df):
# 初始化变量以及函数
pass
def aspect_based_summary(self):
"""
返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
具体细节请看给定的文档。
"""
def aspect_based_summary(self):
"""
返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
具体细节请看给定的文档。
"""
return {'business_id':
'business_name':
'business_rating':
'aspect_summary':
}
def extract_aspects(self):
"""
从一个business的review中抽取aspects
"""
return {'business_id':
'business_name':
'business_rating':
'aspect_summary':
}
def extract_aspects(self):
"""
从一个business的review中抽取aspects
"""
pass
def get_review_summary_for_business(biz_id):
# 获取每一个business的评论总结
# 获取每一个business的评论总结
def main():
bus_ids = [] # 指定几个business ids
def main():
for bus_id in bus_ids:
print ("Working on biz_id %s" % bus_id)
start = time.time()
bus_ids = [] # 指定几个business ids
summary = get_review_summary_for_business(bus_id)
# format and print....
for bus_id in bus_ids:
print ("Working on biz_id %s" % bus_id)
start = time.time()
summary = get_review_summary_for_business(bus_id)
# format and print....
if __name__ == "__main__":
main()
main()
# 此文件包含模型的训练。 给定数据集,训练出情感分类模型,并把模型文件存放在 model文件夹里。
# 此文件包含模型的训练。 给定数据集,训练出情感分类模型,并把模型文件存放在 model文件夹里。
class YelpDataset(object):
def __init__(self):
self._maps = {}
pass
def generator(self):
pass
\ No newline at end of file
......@@ -2,25 +2,28 @@
class Sentence(object):
WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)
LEMMATIZER = WordNetLemmatizer()
# 针对于每一句话抽取aspects
ASP_EXTRACTOR =
WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)
def __init__(self):
LEMMATIZER = WordNetLemmatizer()
def word_tokenize(self):
# 针对于每一句话抽取aspects
ASP_EXTRACTOR =
def pos_tag(self):
def __init__(self):
def lemmatize(self):
def contain_aspect(self):
\ No newline at end of file
def word_tokenize(self):
pass
def pos_tag(self):
pass
def lemmatize(self):
pass
def contain_aspect(self):
pass
business.json
Contains business data including location data, attributes, and categories.
{
// string, 22 character unique string business id
"business_id": "tnhfDv5Il8EaGSXZGiuQGg",
// string, the business's name
"name": "Garaje",
// string, the full address of the business
"address": "475 3rd St",
// string, the city
"city": "San Francisco",
// string, 2 character state code, if applicable
"state": "CA",
// string, the postal code
"postal code": "94107",
// float, latitude
"latitude": 37.7817529521,
// float, longitude
"longitude": -122.39612197,
// float, star rating, rounded to half-stars
"stars": 4.5,
// integer, number of reviews
"review_count": 1198,
// integer, 0 or 1 for closed or open, respectively
"is_open": 1,
// object, business attributes to values. note: some attribute values might be objects
"attributes": {
"RestaurantsTakeOut": true,
"BusinessParking": {
"garage": false,
"street": true,
"validated": false,
"lot": false,
"valet": false
},
},
// an array of strings of business categories
"categories": [
"Mexican",
"Burgers",
"Gastropubs"
],
// an object of key day to value hours, hours are using a 24hr clock
"hours": {
"Monday": "10:00-21:00",
"Tuesday": "10:00-21:00",
"Friday": "10:00-21:00",
"Wednesday": "10:00-21:00",
"Thursday": "10:00-21:00",
"Sunday": "11:00-18:00",
"Saturday": "10:00-21:00"
}
}
review.json
Contains full review text data including the user_id that wrote the review and the business_id the review is written for.
{
// string, 22 character unique review id
"review_id": "zdSx_SD6obEhz9VrW9uAWA",
// string, 22 character unique user id, maps to the user in user.json
"user_id": "Ha3iJu77CxlrFm-vQRs_8g",
// string, 22 character business id, maps to business in business.json
"business_id": "tnhfDv5Il8EaGSXZGiuQGg",
// integer, star rating
"stars": 4,
// string, date formatted YYYY-MM-DD
"date": "2016-03-09",
// string, the review itself
"text": "Great place to hang out after work: the prices are decent, and the ambience is fun. It's a bit loud, but very lively. The staff is friendly, and the food is good. They have a good selection of drinks.",
// integer, number of useful votes received
"useful": 0,
// integer, number of funny votes received
"funny": 0,
// integer, number of cool votes received
"cool": 0
}
user.json
User data including the user's friend mapping and all the metadata associated with the user.
{
// string, 22 character unique user id, maps to the user in user.json
"user_id": "Ha3iJu77CxlrFm-vQRs_8g",
// string, the user's first name
"name": "Sebastien",
// integer, the number of reviews they've written
"review_count": 56,
// string, when the user joined Yelp, formatted like YYYY-MM-DD
"yelping_since": "2011-01-01",
// array of strings, an array of the user's friend as user_ids
"friends": [
"wqoXYLWmpkEH0YvTmHBsJQ",
"KUXLLiJGrjtSsapmxmpvTA",
"6e9rJKQC3n0RSKyHLViL-Q"
],
// integer, number of useful votes sent by the user
"useful": 21,
// integer, number of funny votes sent by the user
"funny": 88,
// integer, number of cool votes sent by the user
"cool": 15,
// integer, number of fans the user has
"fans": 1032,
// array of integers, the years the user was elite
"elite": [
2012,
2013
],
// float, average rating of all reviews
"average_stars": 4.31,
// integer, number of hot compliments received by the user
"compliment_hot": 339,
// integer, number of more compliments received by the user
"compliment_more": 668,
// integer, number of profile compliments received by the user
"compliment_profile": 42,
// integer, number of cute compliments received by the user
"compliment_cute": 62,
// integer, number of list compliments received by the user
"compliment_list": 37,
// integer, number of note compliments received by the user
"compliment_note": 356,
// integer, number of plain compliments received by the user
"compliment_plain": 68,
// integer, number of cool compliments received by the user
"compliment_cool": 91,
// integer, number of funny compliments received by the user
"compliment_funny": 99,
// integer, number of writer compliments received by the user
"compliment_writer": 95,
// integer, number of photo compliments received by the user
"compliment_photos": 50
}
checkin.json
Checkins on a business.
{
// string, 22 character business id, maps to business in business.json
"business_id": "tnhfDv5Il8EaGSXZGiuQGg"
// string which is a comma-separated list of timestamps for each checkin, each with format YYYY-MM-DD HH:MM:SS
"date": "2016-04-26 19:49:16, 2016-08-30 18:36:57, 2016-10-15 02:45:18, 2016-11-18 01:54:50, 2017-04-20 18:39:06, 2017-05-03 17:58:02"
}
tip.json
Tips written by a user on a business. Tips are shorter than reviews and tend to convey quick suggestions.
{
// string, text of the tip
"text": "Secret menu - fried chicken sando is da bombbbbbb Their zapatos are good too.",
// string, when the tip was written, formatted like YYYY-MM-DD
"date": "2013-09-20",
// integer, how many compliments it has
"compliment_count": 172,
// string, 22 character business id, maps to business in business.json
"business_id": "tnhfDv5Il8EaGSXZGiuQGg",
// string, 22 character unique user id, maps to the user in user.json
"user_id": "49JhAJh8vSQ-vM4Aourl0g"
}
photo.json
Contains photo data including the caption and classification (one of "food", "drink", "menu", "inside" or "outside").
{
// string, 22 character unique photo id
"photo_id": "_nN_DhLXkfwEkwPNxne9hw",
// string, 22 character business id, maps to business in business.json
"business_id" : "tnhfDv5Il8EaGSXZGiuQGg",
// string, the photo caption, if any
"caption" : "carne asada fries",
// string, the category the photo belongs to, if any
"label" : "food"
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment