抓取tuigirl图片,抓取tuigirl,import urlli


import urllib.requestimport reimport osimport sysfrom collections import dequeif __name__ == "__main__":    url = "http://www.tuigirl8.com/forum/view/"    queue = deque()    for i in range(1,2000):        queue.append(url+str(i))    cnt = 0    while queue:        url = queue.popleft()        print(str(cnt)+url)        cnt += 1        targetDir = r"D:/%d"%cnt        if not os.path.isdir(targetDir):            os.mkdir(targetDir)        req = urllib.request.Request(url)        try:            webpage = urllib.request.urlopen(req,timeout=2)            contentBytes = webpage.read()        except:            continue        linkre = re.compile(r'(http:[^\s]*?(jpg|gif|png))')        for link,t in linkre.findall(str(contentBytes)):            print(link)            pos = link.rindex('/')            t = os.path.join(targetDir,link[pos+1:])            urllib.request.urlretrieve(link,t)

评论关闭