Commit 97a20a97 by 20200519047

Initial commit

parents
class Business(object):
"""
用来表示跟business相关的变量和函数
"""
SENTIMENT_MODEL = SentimentModel() # 把已经训练好的模型存放在文件里,并导入进来
def __init__(self, review_df):
# 初始化变量以及函数
def aspect_based_summary(self):
"""
返回一个business的summary. 针对于每一个aspect计算出它的正面负面情感以及TOP reviews.
具体细节请看给定的文档。
"""
return {'business_id':
'business_name':
'business_rating':
'aspect_summary':
}
def extract_aspects(self):
"""
从一个business的review中抽取aspects
"""
def get_review_summary_for_business(biz_id):
# 获取每一个business的评论总结
def main():
bus_ids = [] # 指定几个business ids
for bus_id in bus_ids:
print ("Working on biz_id %s" % bus_id)
start = time.time()
summary = get_review_summary_for_business(bus_id)
# format and print....
if __name__ == "__main__":
main()
这里存放已经训练好的模型(情感分析模型或者其他模型)。模型方面请提前训练好,然后serialize到一个文件里,运行的时候直接使用即可以。
# 此文件包含模型的训练。 给定数据集,训练出情感分类模型,并把模型文件存放在 model文件夹里。
# dependency and version
class Sentence(object):
WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)
LEMMATIZER = WordNetLemmatizer()
# 针对于每一句话抽取aspects
ASP_EXTRACTOR =
def __init__(self):
def word_tokenize(self):
def pos_tag(self):
def lemmatize(self):
def contain_aspect(self):
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment