【Python】diao丝娱乐(终极版)-->从百度贴吧下载妹子图,pythondiao,# -*- coding
文章由Byrx.net分享于2019-03-23 07:03:20
【Python】diao丝娱乐(终极版)-->从百度贴吧下载妹子图,pythondiao,# -*- coding
# -*- coding: gbk -*-import requestsfrom bs4 import BeautifulSoupimport reimport osimport threadimport timedef validate_title(title): rstr = r"[\\/\\\\\\:\\*\\?\\"\\<\\>\\|]" # '/\\:*?"<>|' new_title = re.sub(rstr, "", title) return new_titledef ensure_dir(f): print(f) if not os.path.exists(f): os.makedirs(f)def get_soup_from_url(url): page = requests.get(url) return BeautifulSoup(page.content)def get_img_from_url(url): soup = get_soup_from_url(url) img = soup.find_all('img', src=re.compile('imgsrc')) return imgdef down_links_to_folder(links, folder): try: for i, link in enumerate(links): r = requests.get(link) if r.status_code == 200: with open(folder + '\\\\' + str(i) + '.jpg', 'wb') as f: for chunk in r.iter_content(): f.write(chunk) except Exception, e: print 'error..', edef get_tieba_img_url_from_url(url): try: imgs = {} links = set() soup = get_soup_from_url(url) dirname = validate_title(soup.title.text.encode('gbk', 'ignore')) loc = os.getcwd() + '\\\\' + dirname imgs[1] = get_img_from_url(url) if imgs[1]: ensure_dir(loc) total_page_div = soup.find('span', class_='red') if (hasattr(total_page_div, 'text')): total_page = int(total_page_div.text) else: total_page = 1 for i in range(2, total_page + 1): imgs[i] = get_img_from_url(url + "&pn=" + str(i)) for i in imgs: for j in imgs[i]: links.add(j.get('src')) thread.start_new(down_links_to_folder, (links, dirname)) return links except Exception, e: print 'error..', ebaidu_base_url = 'http://tieba.baidu.com'baidu_homepage = requests.get("http://tieba.baidu.com/f?ie=utf-8&kw=%E5%A7%90%E8%84%B1")soup = BeautifulSoup(baidu_homepage.content)titles = soup.find_all('a', target="_blank", class_="j_th_tit")urls = {}for i, title in enumerate(titles): urls[i] = baidu_base_url + title.get('href') print urls[i], title.text.encode('gbk', 'ignore')for i in urls: thread.start_new(get_tieba_img_url_from_url, (urls[i] + '?see_lz=1',))#time.sleep(100000)#该片段来自于http://byrx.net
相关内容
- 糗百连连看,连连看,#!/usr/bin/p
- python, shell, golang获取本机IP地址,pythongolang,shell获取I
- 生成回文数,生成回文,生成10位以内的回文数不
- 输入地名获得当地天气预报,地名当地天气预报,#!/us
- python实现中文繁体和中文简体之间的相互转换,,# -*-
- python写的用WMI检测windows系统信息、硬盘信息、网卡信息
- python 基础学习第二弹:类属性和实例属性,python实例
- 获取博客园首页的博客列表,获取博客首页,#! /usr/bin
- python根据出生日期返回年龄,python出生日期返回, d
- python filter函数使用范例演示,pythonfilter,# Suppose yo
评论关闭