最新Python新浪微博爬虫2014-07

之前微博的登陆密码加密是三层sha1算法,现在更改为rsa2算法

下面是python脚本,并把首页账号首页微博抓取下来。

亲测可行。

 

 

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import urllib2
import urllib
import json
import re
import base64
import hashlib
import rsa
import binascii
import cookielib


body = {  
   __rnd:‘‘,  
   _k:‘‘,  
   _t:0,  
   count:50,  
   end_id:‘‘,  
   max_id:‘‘,  
   page:1,  
   pagebar:‘‘,  
   pre_page:0,  
   uid:1742439305  
} 

uuid = None
cj = cookielib.LWPCookieJar()
cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)

#获取 servertime noce pubkey rsakv
def get_info():
    url = http://login.sina.com.cn/sso/prelogin.php?entry=sso&callback=sinaSSOController.preloginCallBack&su=[email protected]&rsakt=mod&client=ssologin.js(v1.4.4)
    data = urllib2.urlopen(url).read()
    p = re.compile(\((.*)\))
    try:
        json_data = p.search(data).group(1)
        data = json.loads(json_data)
        servertime = str(data[servertime])
        nonce = data[nonce]
        publicKey = data[pubkey]
        rsakey = data[rsakv]
        return servertime, nonce, publicKey, rsakey
    except:
        print error
        return None

st, non, pubkey, rsakv = get_info()

#用户名加密
def get_user(username):
    username_ = urllib.quote(username)
    username = base64.encodestring(username_)[:-1]
    return username

#密码加密
def get_pwd(pwd):
    rsaPublicKey = int(pubkey, 16)
    key = rsa.PublicKey(rsaPublicKey, 65537)
    message = str(st) + \t + str(non) + \n + str(pwd)
    pwd_1 = rsa.encrypt(message, key)
    pwd_2 = binascii.b2a_hex(pwd_1)
    return pwd_2

def login(username, pwd):
    url = http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.4)
    postdata = {  
        entry: weibo,  
        gateway: 1,  
        from: ‘‘,  
        savestate: 7,  
        userticket: 1,  
        ssosimplelogin: 1,  
        vsnf: 1,  
        vsnval: ‘‘,  
        su: ‘‘,  
        service: miniblog,  
        servertime: ‘‘,  
        nonce: ‘‘,  
        pwencode: rsa2,  
        sp: ‘‘,  
        encoding: UTF-8,  
        prelt:115,
        rsakv: rsakv,
        url: http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack,  
        returntype: META  
    }  
    postdata[servertime] = st
    postdata[nonce] = non
    postdata[su] = get_user(username)
    postdata[sp] = get_pwd(pwd)
    postdata_url = urllib.urlencode(postdata)
    headers = {User-Agent:Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0 Chrome/20.0.1132.57 Safari/536.11}
    req = urllib2.Request(url = url, data = postdata_url, headers = headers)
    result = urllib2.urlopen(req)
    text = result.read()
    p = re.compile(location\.replace\(\‘(.*?)\‘\))
    try:
    url_login = p.search(text).group(1)
    content = urllib2.urlopen(url_login).read()
        p = re.compile(uniqueid\"\:\"(.*?)\")
        uuid =  p.search(content).group(1)
    except:
    print error


if __name__ == __main__:

    login(账号, 密码)

    url = http://weibo.com/%s?from=otherprofile&wvr=3.6&loc=tagweibo % (uuid)
    url = url + urllib.urlencode(body)
    req = urllib2.Request(url)
    result = urllib2.urlopen(req).read()
    print result

引用并感谢http://blog.csdn.net/monsion/article/details/7981366


 

 

最新Python新浪微博爬虫2014-07,古老的榕树,5-wow.com

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。