#encoding: utf-8importpandasaspdimportjsonfromtqdmimporttqdmreview_path="data/review.json"valid_business_id_path="data/valid_business_id.txt"defgen_valid_business_id(review_path,count_citerion=100):valid_business=[]count_business_id=dict()reviews=[]withopen(review_path,"r",encoding="utf-8")asf:forlineintqdm(f.readlines()):iflen(line.strip())==0:continueele=json.loads(line.strip())ifnotele["business_id"]incount_business_id:count_business_id[ele["business_id"]]=0count_business_id[ele["business_id"]]+=1reviews.append(ele)review_df=pd.DataFrame(reviews)print("total count of business id in {}: {}".format(len(count_business_id),review_path))forkey,valueincount_business_id.items():ifvalue>=count_citerion:valid_business.append("{}\t{}".format(str(key),str(value)))returnvalid_businessvalid_business=gen_valid_business_id(review_path,count_citerion=100)withopen(valid_business_id_path,'w',encoding='utf-8')asf:f.write("\n".join(valid_business))