下载豆瓣友邻相册,豆瓣友邻相册,开发环境:WIn7,Py
下载豆瓣友邻相册,豆瓣友邻相册,开发环境:WIn7,Py
开发环境:WIn7,Python2.7
使用了第三方的类库:BeautifulSoup
后期还需要增加多线程处理。
图片会存放到代码所在目录 DownFile下。
# coding:utf-8__author__ = 'zz'import os,wximport urllibimport sysfrom bs4 import BeautifulSoupreload(sys) sys.setdefaultencoding('utf8')#自定义输出图片下载信息class ImageOutMessage(): def __init__(self,obj): self.Object = obj def emit(self,strMessage): if strMessage: self.Object.AppendText(strMessage)class main_windows(wx.Frame): def __init__(self): wx.Frame.__init__(self,None,-1,"Down DouBan Image",size = (450,400)) bkg = wx.Panel(self,-1) DownImageButton = wx.Button(bkg,label = "DownImage") DownImageButton.Bind(wx.EVT_BUTTON,self.DownImage) self.UrlText = wx.TextCtrl(bkg) self.contents = wx.TextCtrl(bkg,style = wx.TE_MULTILINE) self.contents.SetEditable(False) hbox = wx.BoxSizer() hbox.Add(self.UrlText,proportion = 1,flag=wx.EXPAND) hbox.Add(DownImageButton,proportion = 0,flag= wx.LEFT,border = 5) vbox = wx.BoxSizer(wx.VERTICAL) vbox.Add(hbox,proportion = 0,flag = wx.EXPAND,border =5) vbox.Add(self.contents,proportion = 1,flag = wx.EXPAND | wx.LEFT | wx.BOTTOM | wx.RIGHT,border = 5) bkg.SetSizer(vbox) def ReadHtml(self,src): try: content = urllib.urlopen(src).read() strHtml = BeautifulSoup(''.join(content)) return strHtml except Exception,ex: self.contents.AppendText("STOP,ERROR:%s.\\n"%(ex)) def NextPage(self,strUrl): try: #从页面Html源码中获取下一个页面地址,最后一页返回None content = self.ReadHtml(strUrl) strHref = None for line in content('link'): if line.find_all(rel= 'next') > 0 : if (line.get('href')).find('start=') > 0: strHref = line.get('href') if strHref: return strHref else: return None except Exception,ex: self.contents.AppendText("STOP,ERROR:%s.\\n"%(ex)) def PicInfo(self,src): try: #从Html源码中获取全部图片的相对地址 lstPicHref = [] content = self.ReadHtml(src) for line in content('img'): # 判断img地址是否为缩略图 if (line.get('src')).find('thumb/public') > 0: strPicHref = line.get('src') #将缩略图地址修改为原图地址 strPicHref = strPicHref.replace('thumb','photo') lstPicHref.append(strPicHref) #判断返回值 if lstPicHref: return lstPicHref else: return None except Exception,ex: self.contents.AppendText("STOP,ERROR:%s.\\n"%(ex)) def WritePic(self,HtmlTitle,listPicHref,FilePath): try: #获取当前页面Title对应的文件夹路径 strFilePath = FilePath + HtmlTitle + '\\\\' #对获取到的Title的编码形式进行转换 if isinstance(strFilePath, unicode): strFilePath.encode('gb2312') else: strFilePath.decode('utf-8').encode('gb2312') #判断strFilePath是否存在 ,不存在创建该目录 if not os.path.exists(strFilePath): os.mkdir(strFilePath) PicLength = len(listPicHref) self.contents.AppendText("%s.\\n"%( 'Current page {} picture waiting for download...'.format(PicLength))) i = 1 for item in listPicHref: strPicName = item.split("/") #将图片写入到本地指定路径 urllib.urlretrieve(item,strFilePath + strPicName[7],None) self.contents.AppendText("%s.\\n"%('Download picture {}/{}:{}'.format(i,PicLength,strPicName[7]))) i += 1 except Exception,ex: self.contents.AppendText("STOP,ERROR:%s.\\n"%(ex)) def DownImage(self,event): try: strUrl = self.UrlText.GetValue() #验证strUrl格式是否符合要求 if strUrl.find("www.douban.com/photos/album/") > 0: if strUrl: # 获取页面Title strTitle = self.ReadHtml(strUrl).html.head.title.string strFilePath = os.getcwd() + '\\\\DownFile\\\\' strTitle = ''.join(strTitle.split()) #验证图片存放路径是否存在 if not os.path.exists(strFilePath): os.mkdir(strFilePath) while strUrl: #验证下一页路径是否重复 if strUrl.find('start') > 0: strPrevNumber =strUrl.split('=') else: strPrevNumber = ['1','0'] listPicHref = self.PicInfo(strUrl) self.contents.AppendText("%s.\\n"%(strUrl)) self.WritePic(strTitle,listPicHref,strFilePath) strUrl = self.NextPage(strUrl) #判断是否有下一页Url if not strUrl: break strUrlNumber = strUrl.split('=') if int(strPrevNumber[1]) > int(strUrlNumber[1]): break self.contents.AppendText("Download complete") else: self.contents.AppendText("URL cannot be empty") else: self.contents.SetValue("") self.contents.AppendText("URL format is invalid, for example:\\n %s"%("http://www.douban.com/photos/album/92848474/")) except Exception,ex: self.contents.AppendText("STOP,ERROR:%s.\\n"%(ex))class App(wx.App): def OnInit(self): self.frame = main_windows() self.frame.Show(True) self.SetTopWindow(self.frame) return Trueif __name__ == "__main__": app = App() app.MainLoop()#该片段来自于http://byrx.net
相关内容
- 话说python的数据结构真的很适合快速排序啊,python数据
- 随机生成中文验证码的python代码,中文验证码python,#
- 关于Tk中Treeview的使用方法,tktreeview,#python3.3关于
- 爬虫 爬取网址,爬虫网址,爬取网页上的链接 然后分
- 清空当前目录下,除本脚本文件的所有文本文件(点文件
- RSA算法的简单实现,RSA算法简单实现,#!/usr/bin/e
- Python 将DOC文档转换为PDF,,import sys,
- Django获取当前request,django当前request,django只能在vie
- python redis订阅发布示例代码,pythonredis,可以使用pip in
- 批量删除所下载的.git文件夹,批量删除.git文件夹,因经
评论关闭