homework9

15fc74ca · 20200318029 · e32c3ea5 · 15fc74ca · 15fc74ca · 15fc74ca
Commit 15fc74ca authored Sep 11, 2020 by 20200318029
Hide whitespace changes
Inline Side-by-side

Showing with 280 additions and 51 deletions

homework9/business.py
+23 -22

homework9/main.py
+11 -11

homework9/model_training.py
+9 -1

homework9/sentence.py
+19 -17

homework9/yelp
+218 -0

No files found.
--- a/homework9/business.py
+++ b/homework9/business.py
@@ -2,36 +2,37 @@


 class Business(object):
-	"""
-	用来表示跟business相关的变量和函数
-	"""
+    """
+    用来表示跟business相关的变量和函数
+    """

-	SENTIMENT_MODEL = SentimentModel() # 把已经训练好的模型存放在文件里，并导入进来
-	
+    SENTIMENT_MODEL = SentimentModel() # 把已经训练好的模型存放在文件里，并导入进来

-	def __init__(self, review_df):
-		# 初始化变量以及函数

+    def __init__(self, review_df):
+        # 初始化变量以及函数
+        pass

-	def aspect_based_summary(self):
-		"""
-		返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews. 
-		具体细节请看给定的文档。 
-		"""

-		
+    def aspect_based_summary(self):
+        """
+        返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
+        具体细节请看给定的文档。
+        """

-		return {'business_id': 
-				'business_name': 
-				'business_rating': 
-				'aspect_summary': 	
-				}


-	def extract_aspects(self):
-		"""
-		从一个business的review中抽取aspects
-		"""
+        return {'business_id':
+                'business_name':
+                'business_rating':
+                'aspect_summary':
+                }


+    def extract_aspects(self):
+        """
+        从一个business的review中抽取aspects
+        """
+        pass
+

--- a/homework9/main.py
+++ b/homework9/main.py
 def get_review_summary_for_business(biz_id):
-	# 获取每一个business的评论总结
-	
+    # 获取每一个business的评论总结

-def main(): 

-       	bus_ids = []  # 指定几个business ids
+def main():

-	for bus_id in bus_ids:
-		print ("Working on biz_id %s" % bus_id)
-		start = time.time()
+           bus_ids = []  # 指定几个business ids

-		summary = get_review_summary_for_business(bus_id)
-		
-		# format and print....
+    for bus_id in bus_ids:
+        print ("Working on biz_id %s" % bus_id)
+        start = time.time()
+
+        summary = get_review_summary_for_business(bus_id)
+
+        # format and print....

 if __name__ == "__main__":
-	main()
+    main()


--- a/homework9/model_training.py
+++ b/homework9/model_training.py

-# 此文件包含模型的训练。 给定数据集，训练出情感分类模型，并把模型文件存放在 model文件夹里。 
+# 此文件包含模型的训练。 给定数据集，训练出情感分类模型，并把模型文件存放在 model文件夹里。


+class YelpDataset(object):
+    def __init__(self):
+        self._maps = {}
+        pass
+
+    def generator(self):
+        pass
\ No newline at end of file
--- a/homework9/sentence.py
+++ b/homework9/sentence.py
@@ -2,25 +2,28 @@


 class Sentence(object):
-	
-	WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)
-	
-	LEMMATIZER = WordNetLemmatizer()

-	# 针对于每一句话抽取aspects
-	ASP_EXTRACTOR = 
+    WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)

-	def __init__(self):
-		
+    LEMMATIZER = WordNetLemmatizer()

-	def word_tokenize(self):
-	
+    # 针对于每一句话抽取aspects
+    ASP_EXTRACTOR =

-	def pos_tag(self):
-		
+    def __init__(self):

-	def lemmatize(self):
-		

-	def contain_aspect(self):
-		
\ No newline at end of file
+    def word_tokenize(self):
+        pass
+
+
+    def pos_tag(self):
+        pass
+
+
+    def lemmatize(self):
+        pass
+
+
+    def contain_aspect(self):
+        pass
--- a/homework9/yelp
+++ b/homework9/yelp
+business.json
+Contains business data including location data, attributes, and categories.
+
+{
+    // string, 22 character unique string business id
+    "business_id": "tnhfDv5Il8EaGSXZGiuQGg",
+
+    // string, the business's name
+    "name": "Garaje",
+
+    // string, the full address of the business
+    "address": "475 3rd St",
+
+    // string, the city
+    "city": "San Francisco",
+
+    // string, 2 character state code, if applicable
+    "state": "CA",
+
+    // string, the postal code
+    "postal code": "94107",
+
+    // float, latitude
+    "latitude": 37.7817529521,
+
+    // float, longitude
+    "longitude": -122.39612197,
+
+    // float, star rating, rounded to half-stars
+    "stars": 4.5,
+
+    // integer, number of reviews
+    "review_count": 1198,
+
+    // integer, 0 or 1 for closed or open, respectively
+    "is_open": 1,
+
+    // object, business attributes to values. note: some attribute values might be objects
+    "attributes": {
+        "RestaurantsTakeOut": true,
+        "BusinessParking": {
+            "garage": false,
+            "street": true,
+            "validated": false,
+            "lot": false,
+            "valet": false
+        },
+    },
+
+    // an array of strings of business categories
+    "categories": [
+        "Mexican",
+        "Burgers",
+        "Gastropubs"
+    ],
+
+    // an object of key day to value hours, hours are using a 24hr clock
+    "hours": {
+        "Monday": "10:00-21:00",
+        "Tuesday": "10:00-21:00",
+        "Friday": "10:00-21:00",
+        "Wednesday": "10:00-21:00",
+        "Thursday": "10:00-21:00",
+        "Sunday": "11:00-18:00",
+        "Saturday": "10:00-21:00"
+    }
+}
+review.json
+Contains full review text data including the user_id that wrote the review and the business_id the review is written for.
+
+{
+    // string, 22 character unique review id
+    "review_id": "zdSx_SD6obEhz9VrW9uAWA",
+
+    // string, 22 character unique user id, maps to the user in user.json
+    "user_id": "Ha3iJu77CxlrFm-vQRs_8g",
+
+    // string, 22 character business id, maps to business in business.json
+    "business_id": "tnhfDv5Il8EaGSXZGiuQGg",
+
+    // integer, star rating
+    "stars": 4,
+
+    // string, date formatted YYYY-MM-DD
+    "date": "2016-03-09",
+
+    // string, the review itself
+    "text": "Great place to hang out after work: the prices are decent, and the ambience is fun. It's a bit loud, but very lively. The staff is friendly, and the food is good. They have a good selection of drinks.",
+
+    // integer, number of useful votes received
+    "useful": 0,
+
+    // integer, number of funny votes received
+    "funny": 0,
+
+    // integer, number of cool votes received
+    "cool": 0
+}
+user.json
+User data including the user's friend mapping and all the metadata associated with the user.
+
+{
+    // string, 22 character unique user id, maps to the user in user.json
+    "user_id": "Ha3iJu77CxlrFm-vQRs_8g",
+
+    // string, the user's first name
+    "name": "Sebastien",
+
+    // integer, the number of reviews they've written
+    "review_count": 56,
+
+    // string, when the user joined Yelp, formatted like YYYY-MM-DD
+    "yelping_since": "2011-01-01",
+
+    // array of strings, an array of the user's friend as user_ids
+    "friends": [
+        "wqoXYLWmpkEH0YvTmHBsJQ",
+        "KUXLLiJGrjtSsapmxmpvTA",
+        "6e9rJKQC3n0RSKyHLViL-Q"
+    ],
+
+    // integer, number of useful votes sent by the user
+    "useful": 21,
+
+    // integer, number of funny votes sent by the user
+    "funny": 88,
+
+    // integer, number of cool votes sent by the user
+    "cool": 15,
+
+    // integer, number of fans the user has
+    "fans": 1032,
+
+    // array of integers, the years the user was elite
+    "elite": [
+        2012,
+        2013
+    ],
+
+    // float, average rating of all reviews
+    "average_stars": 4.31,
+
+    // integer, number of hot compliments received by the user
+    "compliment_hot": 339,
+
+    // integer, number of more compliments received by the user
+    "compliment_more": 668,
+
+    // integer, number of profile compliments received by the user
+    "compliment_profile": 42,
+
+    // integer, number of cute compliments received by the user
+    "compliment_cute": 62,
+
+    // integer, number of list compliments received by the user
+    "compliment_list": 37,
+
+    // integer, number of note compliments received by the user
+    "compliment_note": 356,
+
+    // integer, number of plain compliments received by the user
+    "compliment_plain": 68,
+
+    // integer, number of cool compliments received by the user
+    "compliment_cool": 91,
+
+    // integer, number of funny compliments received by the user
+    "compliment_funny": 99,
+
+    // integer, number of writer compliments received by the user
+    "compliment_writer": 95,
+
+    // integer, number of photo compliments received by the user
+    "compliment_photos": 50
+}
+checkin.json
+Checkins on a business.
+
+{
+    // string, 22 character business id, maps to business in business.json
+    "business_id": "tnhfDv5Il8EaGSXZGiuQGg"
+
+    // string which is a comma-separated list of timestamps for each checkin, each with format YYYY-MM-DD HH:MM:SS
+    "date": "2016-04-26 19:49:16, 2016-08-30 18:36:57, 2016-10-15 02:45:18, 2016-11-18 01:54:50, 2017-04-20 18:39:06, 2017-05-03 17:58:02"
+}
+tip.json
+Tips written by a user on a business. Tips are shorter than reviews and tend to convey quick suggestions.
+
+{
+    // string, text of the tip
+    "text": "Secret menu - fried chicken sando is da bombbbbbb Their zapatos are good too.",
+
+    // string, when the tip was written, formatted like YYYY-MM-DD
+    "date": "2013-09-20",
+
+    // integer, how many compliments it has
+    "compliment_count": 172,
+
+    // string, 22 character business id, maps to business in business.json
+    "business_id": "tnhfDv5Il8EaGSXZGiuQGg",
+
+    // string, 22 character unique user id, maps to the user in user.json
+    "user_id": "49JhAJh8vSQ-vM4Aourl0g"
+}
+photo.json
+Contains photo data including the caption and classification (one of "food", "drink", "menu", "inside" or "outside").
+
+{
+    // string, 22 character unique photo id
+    "photo_id": "_nN_DhLXkfwEkwPNxne9hw",
+    // string, 22 character business id, maps to business in business.json
+    "business_id" : "tnhfDv5Il8EaGSXZGiuQGg",
+    // string, the photo caption, if any
+    "caption" : "carne asada fries",
+    // string, the category the photo belongs to, if any
+    "label" : "food"
+}
\ No newline at end of file