下载e-hentai的图片~~,,import urlli


import urllib,urllib2,cookielibimport sys,re,timedef getehentai(url):    rule1=re.compile('<a\\shref="([^<>"]*)"><img[^<>]*><br[^<>]*>[0-9]+</a>')    rule2=re.compile('</iframe><a\\shref="[^<>]*"><img\\ssrc="([^<>]*)"\\sstyle=.*/></a><iframe')    proxy_support = urllib2.ProxyHandler({'http': 'http://127.0.0.1:8087'})    cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar())    opener=urllib2.build_opener(proxy_support,cookie_support,urllib2.HTTPHandler)    urllib2.install_opener(opener)    header={'User-Agent':'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11 QIHU 360EE',    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',    'Accept-Language':'zh-CN,zh;q=0.8',    'Accept-Charset':'utf-8;q=0.7,*;q=0.7',    'connection':'keep-alive'}    header2={'User-Agent':'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C;  QIHU 360EE)',    'Referer':'http://g.e-hentai.org/',    'Accept':'*/*',    'Accept-Language':'zh-cn',    'Accept-Charset':'utf-8;q=0.7,*;q=0.7',    'connection':'keep-alive'}    i=0    pick={}    fail={}    for j in xrange(12):        header['Referer']='http://g.e-hentai.org/'        req=urllib2.Request('%s?p=%d'%(url,j),headers=header)        data=urllib2.urlopen(req).read()        if data:            medi=rule1.findall(data)            for k in medi:                if not pick.has_key(k):                    i+=1                    name='%03d.jpg'%(i)                    pick[k]=name                    req=urllib2.Request(k,headers=header)                    data=urllib2.urlopen(req).read()                    if data:                        data=rule2.findall(data)[0]                        if data:                            file=open(name,'wb')                            print '[page]%s' %(k),                            header['Referer']=k                            req=urllib2.Request(data,headers=header)                            file.write(urllib2.urlopen(req).read())                            if file.tell()<2048:                                fail[k]=name                                print '\\n'                            else:                                print '--> [File]%s\\n'% name                            file.close()    while fail:        header2,header=header,header2        cookie_support.close()        cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar())        opener=urllib2.build_opener(proxy_support,cookie_support,urllib2.HTTPHandler)        urllib2.install_opener(opener)        pick={}        print '\\n[Retry]\\n'        for i,j in fail.items():            req=urllib2.Request(i,headers=header)            data=urllib2.urlopen(req).read()            if data:                data=rule2.findall(data)[0]                if data:                    file=open(j,'wb')                    print '[page]%s' %(i),                    header['Referer']=i                    req=urllib2.Request(data,headers=header)                    file.write(urllib2.urlopen(req).read())                    if file.tell()<2048:                        pick[i]=j                        print '\\n'                    else:                        print '--> [File]%s\\n'% j                    file.close()        time.sleep(60)        fail=pickif __name__=='__main__':    url = raw_input('Input Url Address:')    getehentai(url)#该片段来自于http://byrx.net

评论关闭