Python3.4版本的抓淘宝妹子图代码,python3.4妹子,import urlli


import urllib.requestmmurl = "http://mm.taobao.com/json/request_top_list.htm?type=0&amp;page="SUCC = 0FAIL = 0def getPicUrl(htmlurl,num):    global SUCC , FAIL    i = 0    flg = 1    tmpa = 0    tmpb = 0    while flg == 1:        try:            percontHTML = urllib.request.urlopen(htmlurl).read().decode('GBK')            pica = '''src="http://img0'''            picb = ".jpg"            tmpa = percontHTML.find(pica,tmpb)            tmpb = percontHTML.find(picb,tmpa)            imgurl = percontHTML[tmpa + 5:tmpb + 4]            #print(imgurl)            if imgurl == "":               flg = 0               print("第" , num , "个美女的图片下载完毕")            else:                i += 1                print("正在下载第" , num , "个美女的第" , i , "张图片")                urllib.request.urlretrieve(imgurl,"beautiful\\" + str(num) + "-" + str(i) + ".jpg")                SUCC += 1        except:            print("图片获取失败,可能是服务器自动屏蔽掉了")            FAIL += 1            passdef getPageUrl(mmurl):    i = 1    j = 1    pa = 0    pb = 0    hreflist = []    while i < 81:        url = mmurl + str(i)        #print(url)        cont = urllib.request.urlopen(url).read().decode('GBK')        diva = '''<div class="pic s60">'''        divb = '</div>'        while j<11:            pa = cont.find(diva, pb)#div截取            pb = cont.find(divb, pa)            divcont = cont[pa:pb]            #print(divcont)            hrefa = "<a href="            hrefb = ".htm"            aa = divcont.find(hrefa)#网址截取            ab = divcont.find(hrefb)            acont = divcont[aa + 9:ab + 4]            hreflist.append(acont)            #print(acont)            j += 1        j = 1        pa = 0        pb = 0        print("正在读取第" + str(i) + "页的美女图片地址")        i += 1    return hreflistif __name__ == "__main__":    global SUCC , FAIL    hreflist = getPageUrl(mmurl)    print("共有美女个人图片页面", len(hreflist))    for i in range(0,len(hreflist)):        num = i + 1        print("开始下载第" , num , "个美女图")        test = hreflist[i]        getPicUrl(test,num)    print("成功下载图片:" , SUCC , "****下载图片失败:" , FAIL)

评论关闭