人人网相册下载,人人网相册,本脚本用于下载人人网用户
人人网相册下载,人人网相册,本脚本用于下载人人网用户
本脚本用于下载人人网用户的整个相册,保存在脚本同级的albums目录中以album-<ALBUM_ID>命名的文件夹中。
运行脚本后,输入signin命令会提示输入用户名和密码
登陆成功后,可以使用“save 相册地址”来保存该相册下的所有照片。
相册地址如下格式:
http://photo.renren.com/photo/296349158/album-377670404?ref=hotnewsfeed&sfet=709&fin=16&ff_id=296349158#thumb
后面的查询串可以不要
http://photo.renren.com/photo/296349158/album-377670404
# coding utf-8import sysimport osimport reimport urllibimport urllib2import cookielibimport cmdimport jsonimport getpass# import simplejson as json # < 2.6class RRAD(): def __init__(self): # initialize the download dir. self.download_dir = 'albums' if not os.path.isdir(self.download_dir): os.mkdir(self.download_dir) # build the session self.session = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) # sign into renren.com def signin(self): username = raw_input('username: ') password = getpass.getpass('password: ') data = ( ('email', username), ('password',password), ('origURL',"<a href="http://www.renren.com/Home.do">http://www.renren.com/Home.do"), ('domain',"renren.com")) page = self.session.open('<a href="http://www.renren.com/PLogin.do">http://www.renren.com/PLogin.do', urllib.urlencode(data)) page.close() def signout(self): self.session.open('<a href="http://www.renren.com/Logout.do">http://www.renren.com/Logout.do') # fetch source code by post def do_post(self, url): return self.session.open(url, {}).read() # fetch source code by get def do_get(self, url): return self.session.open(url).read() # get the album's photo links def get_album_info(self, album_url): photo_links = [] # photo links in all pages album_name = album_url.split('/')[-1] # fetching links page by page content = self.do_get(album_url) links = re.findall(r'<a.*href="(.*)" class="picture">', content) if links: photo_links.extend([re.sub(r'\\?.*$', '', link) for link in links]) return { 'album_name': album_name, 'photos': photo_links } def get_photo_file(self, photo_url): content = self.do_get(photo_url + '/large?xtype=album') open('c:/c.txt', 'w').write(content) match = re.search(r'<div id="large-con"(.*?)src="(?P<src>.*?)" class="photo"', content, flags=re.MULTILINE|re.DOTALL|re.IGNORECASE) return match and match.group('src') # download the photo into the given album directory def save_photo_file(self, album_dir, photo_file): try: filename = photo_file.split('/')[-1] f = open(os.path.join(album_dir, filename), 'wb') f.write(self.session.open(photo_file).read()) f.close() return True except Exception, e: return False # download the album def save_album(self, url): album_url = re.sub(r'[\\?\\#].*$', '', url) album_info = self.get_album_info(album_url) # create the album directory if not exists album_dir = os.path.join(self.download_dir, album_info['album_name']) if not os.path.isdir(album_dir): os.mkdir(album_dir) # download each photo into the album directory print 'saving album to', album_dir for i, link in enumerate(album_info['photos']): print '(%d/%d) %s' % (i + 1, len(album_info['photos']), link), try: photo_file = self.get_photo_file(link) self.save_photo_file(album_dir, photo_file) print 'saved.' except: print 'failed.' print 'all downloads completed.'class RRADCmd(cmd.Cmd): def __init__(self): cmd.Cmd.__init__(self) self.intro = '''Renren Album Downloader V0.1=========================================author : greatghoulemail : greatghoul@gmail.comcopyright : <a href="http://www.g2w.me">http://www.g2w.me''' self.prompt = '> ' self.rrad = RRAD() def help_signin(self): print '''Sign into renren.com signin username password''' def do_signin(self, null): self.rrad.signin() def help_save(self): print '''Save the given album. Example: save <a href="http://photo.renren.com/photo/253423487/album-396516481">http://photo.renren.com/photo/253423487/album-396516481''' def do_save(self, album_url): self.rrad.save_album(album_url) def help_exit(self): print 'Quit the application.' def do_exit(self, null): sys.exit(0)if __name__ == '__main__': rrad_cmd = RRADCmd() rrad_cmd.cmdloop()#该片段来自于http://byrx.net
相关内容
- Python 字符编码,python字符编码,#coding=utf-
- python在windows和linux下获得本机本地ip地址的方法汇总,
- python针对局域网的arp欺骗代码,pythonarp,#coding:utf-
- python判断指定的端口是否被占用,python判断指定端口
- 普通 IP 转换为十进制 IP,ip转换为十进制,Python语言:
- python在指定的目录下查找gif文件,python指定查找gif,#!
- python每隔N秒运行指定的函数程序,python每隔,import osi
- 一个简单的爬虫,简单爬虫,import urlli
- 生成4位字母数字组合密码字典,4位密码字典,f=open("
- 域名转IP 2种方法,域名转ip2种,#!/usr/local
评论关闭