Commit 89211377 by ligang

'z抓取京东信息'

parent e91d8789
#!/usr/local/env python3
#-*-coding:utf-8 -*-
from selenium import webdriver
import time
import os
import json
# 鼠标事件依赖于ActionChains对象
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
path = "C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
target = "https://www.jd.com"
#验证登录的正确性
test_order = "https://order.jd.com/center/list.action"
#定义登录的用户名与密码
uname = "16601258428"
pwd = "123456qwert"
driver = webdriver.Chrome(path)
def get_element_dict(info_element):
#拿到第一列的信息
computer_part = info_element.find_element_by_tag_name("h3")
#计算机信息中的key值
computer_info_keys = info_element.find_elements_by_tag_name("dt")
#计算机信息中的values值
computer_info_values = info_element.find_elements_by_xpath("dl//dd[not(contains(@class,'Ptable-tips'))]")
#存储计算机信息中的Key 和value
#最终数据的样子{"主题":{"主板":"zzz",....},"":{}}
key_and_value_dict = {}
#用来存储所有的计算机组成信息的字典
parts_dict = {}
for i in range(len(computer_info_keys)):
key_and_value_dict[computer_info_keys[i].text] = computer_info_values[i].text
parts_dict[computer_part.text] = key_and_value_dict
return parts_dict
def save_goods_info(info_list):
project_path = os.path.dirname(os.getcwd())
file_path = project_path + '/goods_info/'
if not os.path.exists(file_path):
os.mkdir(file_path)
with open(file_path + "computer.infos","a",encoding="utf-8") as f:
f.write(str(info_list))
def get_notebook_info():
driver.get(target)
elem = driver.find_element_by_link_text("电脑")
# 鼠标悬停
ActionChains(driver).move_to_element(elem).perform()
time.sleep(2)
# 笔记本
driver.find_element_by_link_text("笔记本").click()
time.sleep(2)
#切换句柄
handles = driver.window_handles #所有窗口句柄
index_handle = driver.current_window_handle#当前窗口句柄
for handle in handles:#循环切换至新打开窗口
if handle != index_handle:
driver.switch_to.window(handle)
# 点击thinkpad
driver.find_element_by_xpath(".//*[ @id =\"brand-11518\"]/a/img").click()
time.sleep(3)
#点击7000以上
selector = driver.find_element_by_xpath("//*[@id=\"J_selectorPrice\"]/div/div[2]/div/ul/li[7]/a").click()
time.sleep(2)
#点击评论数
driver.find_element_by_xpath("//*[@id=\"J_filter\"]/div[1]/div[1]/a[3]").click()
time.sleep(2)
# 点击评论数最多的那个--第一款电脑
driver.find_element_by_xpath("//*[@id =\"plist\"]/ul/li[1]/div/div[1]/a/img").click()
time.sleep(3)
#切换句柄
notebook_handle = driver.current_window_handle
handles = driver.window_handles
#必须要重新获取一下handles 因为这里有三个窗口了
for handle in handles:
# 获取的窗口不等于当前窗口并且不等于上一次打开的窗口
if handle != notebook_handle and handle != index_handle:
driver.switch_to.window(handle)
#滑动滚轮
js = "window.scrollTo(0,1500)"
driver.execute_script(js)
#点击规则与参数
driver.find_element_by_xpath("//*[@id=\"detail\"]/div[1]/ul/li[2]").click()
info_elements = driver.find_elements_by_class_name("Ptable-item")
#解析所有的标签
result_list = []
for info_element in info_elements:
#获取到每一行的笔记本的信息
info_element_dict = get_element_dict(info_element)
result_list.append(info_element_dict)
#保存这些信息到文件中
save_goods_info(result_list)
#保存cookies信息到driver中
def save_cookies_to_driver():
cookies_file = get_cookies_file()
if not os.path.exists(cookies_file):#判断如果此文件夹中不存在,则直接跳转到登 录页面
login()
jd_cookies_file = open(cookies_file,"r")
#if os.path.getsize(jd_cookies_file)==0:
jd_cookies_str = jd_cookies_file.readline()
if len(jd_cookies_str) == 0:#判断如果此文件夹中的文件内容为空,则直接跳转到登录页面读取cookies
login()
jd_cookies_dict = json.loads(jd_cookies_str)
#这里必须清除掉旧的cookies
driver.get(target)
driver.delete_all_cookies()
for cookie in jd_cookies_dict:
driver.add_cookie(cookie)
return driver
def check_cookies():
#设置一个登录状态,初始值是未登录
login_status = False
#将cookies信息保存到driver中
driver = save_cookies_to_driver()
#进行跳转链接的检测
driver.get(test_order)
current_url = driver.current_url
if current_url == test_order:
login_status = True
#print("登录成功")
return login_status
else:
#print("登录失败")
return login_status
#获取cookies存放路径
def get_cookies_dir():
project_path = os.path.dirname(os.getcwd())
file_path = project_path + '/cookies/'
if not os.path.exists(file_path):
os.mkdir(file_path)
return file_path
def get_cookies_file():
return get_cookies_dir() + "jd.cookies"
def save_cookies_to_file(driver):
file_path = get_cookies_dir()
#保存cookies到文件 中
cookies = driver.get_cookies()
with open(file_path + "jd.cookies","w") as c:
#这里必须用dump方式写入文件
#不然你loads的时候会有问题
#格式不匹配
json.dump(cookies,c)
def login():
driver.get(target)
driver.maximize_window()
time.sleep(3)
driver.find_element_by_class_name("link-login").click()
driver.find_element_by_link_text("账户登录").click()
search_key = driver.find_element_by_id("loginname")
search_key.clear()# 清空搜索框中的内容
search_key.send_keys(uname)
driver.find_element_by_id("nloginpwd").send_keys(pwd)
driver.find_element_by_id("loginsubmit").click()
time.sleep(5)
save_cookies_to_file(driver)
if __name__ == "__main__":
try:
loop_status = True
while loop_status:
#检验cookies是否生效
login_status = check_cookies()
if login_status:
loop_status = False
else:
login()
time.sleep(2)
get_notebook_info()
finally:
time.sleep(5)
driver.quit()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment