From 8beded6435e41c74f3766f95f9302fcd159d56f9 Mon Sep 17 00:00:00 2001 From: za2016 <920082975@qq.com> Date: Fri, 21 Jul 2017 10:02:18 +0800 Subject: [PATCH 1/3] Update zhihu_picture_downloader.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 获取图片链接无需xsrf了 --- zhihu/zhihu_picture_downloader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zhihu/zhihu_picture_downloader.py b/zhihu/zhihu_picture_downloader.py index 6730ef9..2c1a0a1 100644 --- a/zhihu/zhihu_picture_downloader.py +++ b/zhihu/zhihu_picture_downloader.py @@ -99,7 +99,7 @@ def login(email,passwd): def get_pic_from_topic(id,offset): global session topicurl=topic_url+str(id) - _xsrf=get_xsrf(topicurl) + #_xsrf=get_xsrf(topicurl) pic_re=re.compile('data-actualsrc="(.*?)"') inner_data={"url_token":id ,"pagesize":10 @@ -111,7 +111,7 @@ def get_pic_from_topic(id,offset): session.headers['Referer']=topicurl session.headers['Host']='www.zhihu.com' session.headers['Origin']='https://www.zhihu.com' - session.headers['X-Xsrftoken']=_xsrf + #session.headers['X-Xsrftoken']=_xsrf js_data=session.post(api_url,data=data) dat=json.loads(js_data.content)['msg'] pictures=[] @@ -160,4 +160,4 @@ def downloader(url,path): for pic in pictures: downloader(pic,savepath) print u"=====下载完毕=====" - \ No newline at end of file + From e74365cdf4b1ea6920c4c524a881cecc2e482d34 Mon Sep 17 00:00:00 2001 From: za2016 <920082975@qq.com> Date: Thu, 14 Sep 2017 11:11:16 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=94=AF=E4=BB=98?= =?UTF-8?q?=E5=AE=9D=E7=99=BB=E5=BD=95=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + alipay/README.md | 11 +++ alipay/alipay.py | 181 ++++++++++++++++++++++++++++++++++++++++++++++ hostloc/README.md | 3 +- 4 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 alipay/README.md create mode 100644 alipay/alipay.py diff --git a/README.md b/README.md index 490e98b..10e9e8c 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ 5. fuliba:福利吧签到脚本 6. ASCII-ART:图片转字符画 7. baidu:百度图床~~ +8. alipay: 支付宝登录&获取订单信息 > 注: diff --git a/alipay/README.md b/alipay/README.md new file mode 100644 index 0000000..604313d --- /dev/null +++ b/alipay/README.md @@ -0,0 +1,11 @@ +# 支付宝登录 & 获取订单信息脚本 + +使用前准备: +1. 安装selenium:`pip install selenium` +2. 下载webdriver。phantomjs无界面,适合linux;chromedriver方便调试。自行百度下载 +3. 修改**USERNMAE**和**PASSWD** + +运行: +`python alipay_login.py` + +pps. 脚本非原创。在v2ex一名v友的基础上修改的 diff --git a/alipay/alipay.py b/alipay/alipay.py new file mode 100644 index 0000000..78b9fa2 --- /dev/null +++ b/alipay/alipay.py @@ -0,0 +1,181 @@ +#-*- coding=utf-8 -*- +""" +支付宝登录获取订单信息脚本 +ps.没啥卵用,使用selenium不稳定,经常出现问题。 +使用前准备: +1.安装selenium:pip install selenium +2.下载webdriver。phantomjs无界面,适合linux;chromedriver方便调试。自定百度 +3.修改USERNMAE和PASSWD + +运行: +python alipay_login.py + +pps. 脚本非原创。在v2ex一名v友的基础上修改的 +""" +import requests +from selenium import webdriver +import time +import pickle +import re +requests.packages.urllib3.disable_warnings() + +# 登录 url +Login_Url = 'https://auth.alipay.com/login/index.htm?goto=https://consumeprod.alipay.com/record/advanced.htm' +# 账单 url +Bill_Url = 'https://consumeprod.alipay.com/record/advanced.htm' +# 登录用户名和密码 +USERNMAE = '' +PASSWD = '' +# 自定义 headers +HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', + 'Referer': 'https://consumeprod.alipay.com/record/advanced.htm', + 'Host': 'consumeprod.alipay.com', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', + 'Connection': 'keep-alive' +} + + +class Alipay_Bill_Info(object): + '''支付宝账单信息''' + + def __init__(self, headers, user, passwd): + ''' + 类的初始化 + headers:请求头 + cookies: 持久化访问 + info_list: 存储账单信息的列表 + ''' + self.headers = headers + # 初始化用户名和密码 + self.user = user + self.passwd = passwd + # 利用 requests 库构造持久化请求 + self.session = requests.Session() + # 将请求头添加到缓存之中 + self.session.headers = self.headers + try: + cookies = pickle.load(open("cookies", "rb")) + for cookie in cookies: + self.session.cookies.set(cookie['name'], cookie['value']) + print u"获取cookies成功!" + except: + print u"未登陆过,需先登录" + self.get_cookies() + if not self.login_status(): + print u"cookies失效,重新登录" + self.get_cookies() + # 初始化存储列表 + self.info_list = [] + + def wait_input(self, ele, str): + '''减慢账号密码的输入速度''' + for i in str: + ele.send_keys(i) + time.sleep(0.5) + + def get_cookies(self): + '''获取 cookies''' + # 初始化浏览器对象 + # sel = webdriver.PhantomJS( + # executable_path='C:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe') + # sel = webdriver.PhantomJS( + # executable_path='/root/phantomjs/bin/phantomjs') + sel = webdriver.Chrome(executable_path='C:/chromedriver.exe') + sel.maximize_window() + sel.get(Login_Url) + sel.implicitly_wait(3) + # 找到用户名字输入框 + uname = sel.find_element_by_id('J-input-user') + uname.clear() + print u"正在输入账号....." + self.wait_input(uname, self.user) + time.sleep(1) + # 找到密码输入框 + upass = sel.find_element_by_id('password_rsainput') + upass.clear() + print u"正在输入密码...." + self.wait_input(upass, self.passwd) + # 截图查看 + sel.save_screenshot('1.png') + # 找到登录按钮 + button = sel.find_element_by_id('J-login-btn') + time.sleep(1) + print 1 + button.click() + print 2 + sel.save_screenshot('2.png') + if len(re.findall('checkSecurity', sel.current_url)) > 0: + riskackcode = sel.find_element_by_id('riskackcode') + riskackcode.clear() + print u"等待输入验证码:" + msgcode = raw_input() + self.wait_input(riskackcode, msgcode) + button = sel.find_element_by_xpath( + '//*[@id="J-submit"]/input') # ui-button + time.sleep(1) + button.click() + sel.save_screenshot('2.1.png') + print(sel.current_url) + # 跳转到账单页面 + print u"正在跳转页面...." + sel.get(Bill_Url) + sel.implicitly_wait(3) + sel.save_screenshot('3.png') + # 获取 cookies 并转换为字典类型 + cookies = sel.get_cookies() + pickle.dump(cookies, open("cookies", "wb")) + for cookie in cookies: + self.session.cookies.set(cookie['name'], cookie['value']) + # 关闭浏览器 + sel.close() + + def set_cookies(self): + '''将获取到的 cookies 加入 session''' + self.get_cookies() + + def login_status(self): + '''判断登录状态''' + # 添加 cookies + status = self.session.get( + Bill_Url, timeout=5, allow_redirects=False, verify=False).status_code + print(status) + if status == 200: + return True + else: + return False + + def get_data(self): + ''' + 利用 正则表达式解析 html + 并抓取数据, + 数据以字典格式保存在列表里 + ''' + status = self.login_status() + if status: + html = self.session.get(Bill_Url, verify=False).text + # 抓取前五个交易记录 + trades = re.findall('
.*?(\d{4}\.\d{2}\.\d{2})', trade)[0] + time = re.findall( + '
.*?(\d{2}:\d{2})', trade)[0] + amount = re.findall( + '(.*?) ', trade)[0] + ddh = re.findall( + '