From 4d476bb47b55c68a057a9dc6475d8b5dca5360c5 Mon Sep 17 00:00:00 2001 From: 20200203063 <maplechen111@aliyun.com> Date: Tue, 1 Sep 2020 00:20:51 +0800 Subject: [PATCH] Upload New File --- gen_id2business.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 gen_id2business.py diff --git a/gen_id2business.py b/gen_id2business.py new file mode 100644 index 0000000..663a53b --- /dev/null +++ b/gen_id2business.py @@ -0,0 +1,33 @@ +# encoding: utf-8 +import json +import pickle +import os + +business_path = "data/business.json" +save_id2business_path = "model/id2business.pkl" + +def load_id2business(): + id2business = None + if not os.path.exists(save_id2business_path): + id2business = dict() + with open(business_path,"r",encoding="utf-8") as f: + for line in f.readlines(): + if len(line.strip()) == 0: + continue + ele = json.loads(line.strip()) + business_id = ele["business_id"] + if ele["business_id"] in id2business: + print("{} duplicated".format(str(business_id))) + continue + id2business[business_id] = ele + + with open(save_id2business_path,'wb') as f: + pickle.dump(id2business,f) + + else: + with open(save_id2business_path,'rb') as f: + id2business = pickle.load(f) + return id2business + + +id2business = load_id2business() \ No newline at end of file -- libgit2 0.26.0