批量下载腾讯图库图片,腾讯图库图片,使用方法:-t targ


使用方法:

-t targetUrl
网址类似: http://comic.qq.com/disppics.htm#did=202&tid=24&pid=15104

图库入口: http://comic.qq.com/pic/

from optparse import OptionParser;from urllib.parse import urlparse;import json;import os;import urllib.request;def vaildChange(script, key):    return script.replace(key + ':', '\\"' + key + '\\":');def splitJavascript(script, key, end):    realKey = '\\"' + key + ' \\":';    try:        temp = script[script.index(realKey) + len(realKey):script.index('\\"' + end)];        return temp[:temp.rfind(',')];    except:        return '[]';def dowmloadFile(url, foldname, filename):    try:        f = urllib.request.urlopen(url)        sb = f.read(-1)        f.close()        sERR = ""    except   Exception as Err:        sERR = str(Err)    if sERR != "":        return sERR    try:        f1 = open(foldname + "/" + filename, "wb");        f1.write(sb);        f1.close();    except Exception as Err :        sERR = str(Err)    return sERRif __name__ == '__main__':    parser = OptionParser(version="%prog 1.0");    parser.add_option('-t', dest='targetUrl', help='要下载的图库网址');    (options, args) = parser.parse_args();    if not (options.targetUrl):        parser.error('没有指定网址!');    else:        o = urlparse(options.targetUrl);        if(o.netloc == 'comic.qq.com' and o.path == '/disppics.htm' and o.scheme == 'http'):            print('开始运行');            print('目标:' + options.targetUrl);            key = o.fragment;            nPos = key.index('&tid=');            key = key[4:nPos];            structUrl = 'http://comic.qq.com/d/pic/1/{key}/plist.js';            structUrl = structUrl.replace('{key}', key);            print('正在获取信息');            jsonData = urllib.request.urlopen(structUrl).read().decode('gb2312', 'ignore');            jsonData = jsonData[len('var oPiclib='):jsonData.index(';/*') - 1];            temp = ['nID', 'nDataID', 'nTypeID', 'sOriginalImgUrl', 'sZoomImgUrl', 'sDesc', 'sTheD', 'arrPic24', 'arrPic23', 'arrPic25', 'brandid', 'brandname', 'brandurlhead', 'oPicInfo', 'showinfo', 'dname', 'durl', 'typelist', 'typepnum', 'typename', 'stpicnum', 'id', 'name', 'arrPic23 ', 'arrPic24 ', 'arrPic25 '];            for tem in temp:                jsonData = vaildChange(jsonData, tem);            targetPics = [];            try:                temp = ['arrPic23', 'arrPic24', 'arrPic25', 'oPicInfo'];                jsonData.index('arrPic25');            except:                temp = ['arrPic23', 'arrPic24', 'oPicInfo'];            for i in range(0, len(temp) - 1):                data = splitJavascript(jsonData, temp[i], temp[i + 1]);                data = json.loads(data);                for item in data:                    targetPics.append(item);            namePos = jsonData.index('\\"name\\":\\"') + len('\\"name\\":\\"');            jsonData = jsonData[namePos:];            name = jsonData[:jsonData.index('"')];            print('共有 %s 张图片。即将开始下载' % len(targetPics));            if not(os.path.isdir(name)):                os.mkdir(name);            baseUrl = 'http://img1.gtimg.com';            index = 0;            for item in targetPics:                index = index + 1;                url = baseUrl + item['sOriginalImgUrl'];                print('正在处理第%s张' % index);                dowmloadFile(url, name, str(index) + '.jpg');            print('处理完成');        else:            parser.error('错误的网址,应该类似于http://comic.qq.com/disppics.htm#did=287&tid=25&pid=15169');#该片段来自于http://byrx.net

评论关闭