Python爬取12306登录 转载

/ 4评 / 0

转载的,就是为了学习用多点思路

<span class="hljs-comment"># !/usr/bin/env python</span>
<span class="hljs-comment"># -*- coding:utf-8 -*-</span>
<span class="hljs-string">"""12306登陆"""</span>
<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">import</span> base64
<span class="hljs-keyword">import</span> re
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">get_point</span><span class="hljs-params">(index)</span>:</span>
map = {
<span class="hljs-string">'1'</span>: <span class="hljs-string">'37,46'</span>,
<span class="hljs-string">'2'</span>: <span class="hljs-string">'110,46'</span>,
<span class="hljs-string">'3'</span>: <span class="hljs-string">'181,46'</span>,
<span class="hljs-string">'4'</span>: <span class="hljs-string">'253,46'</span>,
<span class="hljs-string">'5'</span>: <span class="hljs-string">'37,116'</span>,
<span class="hljs-string">'6'</span>: <span class="hljs-string">'110,116'</span>,
<span class="hljs-string">'7'</span>: <span class="hljs-string">'181,116'</span>,
<span class="hljs-string">'8'</span>: <span class="hljs-string">'253,116'</span>,
}
index = index.split(<span class="hljs-string">','</span>)
temp = []
<span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> index:
temp.append(map[item])
<span class="hljs-keyword">return</span> <span class="hljs-string">','</span>.join(temp)

<span class="hljs-comment"># 实例化一个Session</span>
session = requests.Session()  <span class="hljs-comment"># 自动的处理cookie</span>
headers = {
<span class="hljs-string">'User-Agent'</span>: <span class="hljs-string">'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'</span>
}
session.headers.update(headers)

<span class="hljs-comment"># 伪装成浏览器</span>
<span class="hljs-comment"># 1. 访问 获取cookie</span>
cookie_url = <span class="hljs-string">'https://kyfw.12306.cn/otn/login/conf'</span>
response = session.get(cookie_url)
<span class="hljs-comment"># 2. 下载验证码</span>
captcha_url = <span class="hljs-string">'https://kyfw.12306.cn/passport/captcha/captcha-image64?login_site=E&module=login&rand=sjrand&1541686714134&callback=jQuery19109992892609908492_1541686371355&_=1541686371356'</span>
response = session.get(captcha_url)
data = response.text
img_base64 = re.findall(<span class="hljs-string">r'"image":"(.*?)"'</span>, data)[<span class="hljs-number">0</span>]
<span class="hljs-comment"># img_base64 = 'data:image/jpg;base64,' + img_base64</span>
<span class="hljs-comment"># 转换成二级制数据</span>
img_bytes = base64.b64decode(img_base64)
<span class="hljs-comment"># 写到文件</span>
<span class="hljs-keyword">with</span> open(<span class="hljs-string">'captcha.jpg'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> f:
f.write(img_bytes)

<span class="hljs-comment"># 3校验验证码</span>
check_captcha = <span class="hljs-string">'https://kyfw.12306.cn/passport/captcha/captcha-check?callback=jQuery19109992892609908492_1541686371355&rand=sjrand&login_site=E&_=1541686371358'</span>
response = session.get(check_captcha, params={<span class="hljs-string">'answer'</span>: get_point(input(<span class="hljs-string">'请输入正确的序号>>>:'</span>))} )<span class="hljs-comment">#验证码输入,请输入正确验证码的序号(不是坐标),序号之间以,号隔开,验证码在程序目录下</span>
res = response.text
code = re.findall(<span class="hljs-string">r'"result_code":"(.*?)"'</span>, res)[<span class="hljs-number">0</span>]
<span class="hljs-keyword">if</span> code == <span class="hljs-string">'4'</span>:
print(<span class="hljs-string">'验证码校验成功'</span>)
<span class="hljs-comment"># 4 校验用户名密码</span>
login_url = <span class="hljs-string">'https://kyfw.12306.cn/passport/web/login'</span>
form_data = {
<span class="hljs-string">'username'</span>: <span class="hljs-string">'12306账号'</span>,<span class="hljs-comment">#替换成自己的12306账号</span>
<span class="hljs-string">'password'</span>: <span class="hljs-string">'12306密码'</span>,<span class="hljs-comment">#替换成自己的12306密码</span>
<span class="hljs-string">'appid'</span>: <span class="hljs-string">'otn'</span>
}
response = session.post(login_url, data=form_data)
res = response.json()
<span class="hljs-keyword">if</span> res[<span class="hljs-string">"result_code"</span>] == <span class="hljs-number">0</span>:
print(<span class="hljs-string">'用户名密码校验成功!'</span>)
<span class="hljs-comment"># 5获取权限token</span>
uamtk_url = <span class="hljs-string">'https://kyfw.12306.cn/passport/web/auth/uamtk'</span>
response = session.post(uamtk_url, data={<span class="hljs-string">'appid'</span>: <span class="hljs-string">'otn'</span>})
res = response.json()
<span class="hljs-keyword">if</span> res[<span class="hljs-string">"result_code"</span>] == <span class="hljs-number">0</span>:
print(<span class="hljs-string">'获取token成功'</span>)
<span class="hljs-comment"># 6.校验 token</span>
check_token_url = <span class="hljs-string">'https://kyfw.12306.cn/otn/uamauthclient'</span>
response = session.post(check_token_url, data={<span class="hljs-string">'tk'</span>: res[<span class="hljs-string">'newapptk'</span>]})
print(response.text)

4条回应:“Python爬取12306登录 转载”

  1. หวย说道:

    Each Midi Baccarat table will probably be limited to 18 gamers.

  2. rickyhan.biz说道:

    Normally I do not read article on blogs, but
    I wish to say that this write-up very pressured me to check out
    and do it! Your writing taste has been surprised me. Thanks, very great post.

  3. If you want to get much from this paragraph then you
    have to apply such methods to your won webpage.

  4. tienda china说道:

    There is certainly a great deal to find out about this subject.

    I really like all the points you have made.

发表评论

电子邮件地址不会被公开。 必填项已用*标注