python之小说下载器version3.0


 我想了一下,每个版本打包exe 意义不大,如果是最终版什么的 打一个还行,所以暂时不打包了 有需要的可以留言,我在打包.

        这里使用了第三方库pyquery,安装方法见:1.0版本链接

        在上一版本的基础上做个改进.主要是,增加推荐榜,支持模糊查询,交互方式有一定优化.如上图:

 

 \\
 

 

        下面是代码:


[python] 
# -*- coding:gbk -*-  
'''''
file desc:novel downloader
author:kingviker
email:kingviker@163.com.kingviker88@gmail.com
date:2013-05-21
depends:python 2.7.4,pyquery
''' 
 
import os,codecs,urllib,urllib2,sys 
from pyquery import PyQuery as pq 
 
 
   
searchUrl = 'http://www.dushuge.net/modules/article/search.php'     
baseSavePath="E:/enovel/" 
   
def searchTopList(): 
    html = pq(url="http://www.dushuge.net") 
    searchTopA= html("#compZnNav").next().find("td").eq(0)("a") 
    searchTopList = [] 
    for index in range(1,len(searchTopA)): 
        searchTopList.append([searchTopA.eq(index).text(),searchTopA.eq(index).attr("href")]) 
    return  searchTopList 
 
def searchNovel(novelName): 
     
  
 
    values = urllib.urlencode({'searchkey' : novelName,     
              'searchtype' : 'articlename'}) 
    req = urllib2.Request(url=searchUrl,data=values) 
    response = urllib2.urlopen(req) 
    searchHtml_pq = pq(response.read()) 
 
    novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a") 
    #print novelUrlHtml  
    if not novelUrlHtml : 
        resultList = searchHtml_pq("#content > table > tr") 
        if resultList: 
            novelList = []     
            for index in range(1,len(resultList)): 
                novelTR = resultList.eq(index) 
                novelA = novelTR("td").eq(0)("a") 
                #print novelA.text(),novelA.attr("href")  
                novelList.append([novelA.text(),novelA.attr("href")]) 
            return novelList 
        else: 
            return 
    return (novelUrlHtml.eq(2).text(),novelUrlHtml.eq(2).attr("href")) 
 
def analyzeNovel(url): 
    print "开始分析章节信息..." 
    #using pyquery to grub the webpage's content  
    html_pq = pq(url=url) 
    #print html_pq("div.book_article_texttable").find(".book_article_texttext")  
    totalChapters = len(html_pq("div.book_article_texttable").find(".book_article_listtext").find("a")) 
    print "总章节数:",totalChapters 
    return totalChapters 
 
def fetchDownloadLinks(url): 
    #using to save pieces and chapter lists  
    pieceList=[] 
    chapterList=[] 
 
    html_pq = pq(url=url) 
    #find the first piece of the novel.  
    piece = html_pq("div.book_article_texttable > div").eq(0) 
 
    isPiece = True 
    if piece.attr("class")=="book_article_texttitle": 
        #get the current piece's text  
        pieceList.append(piece.text()) 
        #print "piece Text:", piece  
    else: 
        isPiece = False 
        pieceList.append("无") 
    #scan out the piece and chapter lists  
    nextPiece=False 
    while nextPiece==False: 
        if isPiece: 
            chapterDiv = piece.next() 
        else: 
            isPiece = True 
            chapterDiv = piece 
        #print "章节div长度:",chapterDiv.length  
        piece = chapterDiv 
        if chapterDiv.length==0: 
            pieceList.append(chapterList[:]) 
            del chapterList[:] 
            nextPiece=True 
        elif chapterDiv.attr("class")=="book_article_texttitle": 
            pieceList.append(chapterList[:]) 
            del chapterList[:] 
            pieceList.append(piece.text()) 
         
        else: 
            chapterUrls = chapterDiv.find("a"); 
            for urlA in chapterUrls: 
                urlList_temp = [pq(urlA).text(),pq(urlA).attr("href")] 
                chapterList.append(urlList_temp) 
 
    print "下载列表收集完成",len(pieceList) 
    return pieceList 
     
def downloadNovel(novel,startChapterNum): 
   # if os.path.exists(baseSavePath+novel[0]) is not True:  
    #    os.mkdir(baseSavePath+novel[0])  
 
    #based on the piecelist,grub the special webpage's novel content and save them .  
    if os.path.exists(baseSavePath+novel[0]+".txt"):os.remove(baseSavePath+novel[0]+".txt") 
 
    #using codecs to create a file. write mode(w+) is appended.  
    novelFile = codecs.open(baseSavePath+novel[0]+".txt","wb+","utf-8") 
 
 
    pieceList = fetchDownloadLinks(novel[1]) 
    
     
    chapterTotal = 0; 
 
    print "从",startChapterNum,"章开始下载" 
    #just using two for loops to analyze the piecelist.  
    for pieceNum in range(0,len(pieceList),2): 
        piece = pieceList[pieceNum] 
        print "开始下载篇章",pieceList[pieceNum] 
        chapterList = pieceList[pieceNum+1] 
        for chapterNum in range(0,len(chapterList)): 
            chapterTotal +=1 
           # print chapterTotal,startChapterNum,startChapterNum>chapterTotal  
           # print type(startChapterNum),type(chapterTotal)  
            if startChapterNum > chapterTotal: 
                continue 
            
            chapter = chapterList[chapterNum] 
            print "开始下载章节",chapter[0] 
            chapterPage = pq(url=novel[1]+chapter[1]) 
 
            chapterContent = (piece+" "+chapter[0]+" \r") 
            
            #print chapterPage("#booktext").remove("strong").html()  
            chapterContent += chapterPage("#booktext").remove("strong").html().replace("<br />","\r") 
 
            print "小说内容:",len(chapterContent) 
            novelFile.write(chapterContent+"\r"+"\r") 
         
    novelFile.close() 
    print "下载完成,文件位置:",baseSavePath+novel[0]+".txt" 
     
 
 
print "小说下载器 by kingviker!,输入quit返回或退出:"             
#if the novel's file system  not exists,created.  
if os.path.exists(baseSavePath) is not True: 
        os.mkdir(baseSavePath) 
while(True): 
 
    searchToplist = searchTopList() 
    for index in range(len(searchToplist)): 
                print "(",index,")",searchToplist[index][0] 
    name = raw_input("输入序号直接下载热门小说,或者输入想要下载的小说名称搜索") 
    try: 
        if name =="quit": 
            print "bey" 
            break 
        elif int(name) in range(11): 
            print "选择小说:",searchToplist[int(name)][0] 
            totalChapters = analyzeNovel(searchToplist[int(name)][1]) 
             
            startChapterNum = raw_input("请输入起始章节,全部下载请按回车:") 
            if startChapterNum =="quit": 
                continue 
            if not startChapterNum : 
                startChapterNum=0 
            #print startChapterNum  
            downloadNovel(searchToplist[int(name)],int(startChapterNum))  
    except Exception ,e: 
         
        novel = searchNovel(name) 
        if not novel : 
            print "没有查找到小说",name,"或者小说名称输出错误!" 
        else: 
            if isinstance(novel,list): 
                print "以下是结果列表" 
                for index in range(len(novel)): 
                    print "(",index,")",novel[index][0] 
 
                novelIndex = raw_input("请输入序号选择:") 
                if novelIndex =="quit": 
                    continue 
                 
                novelIndex = int(novelIndex) 
                print"选定小说:",novel[novelIndex][0] 
                searchHtml_pq = pq(url=novel[index][1]) 
 
                novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a") 
                novel[index][1] = novelUrlHtml.eq(2).attr("href") 
                totalChapters = analyzeNovel(novel[index][1]) 
                startChapterNum = raw_input("请输入起始章节,全部下载请按回车:") 
                if startChapterNum =="quit": 
                    continue 
                if not startChapterNum : 
                    startChapterNum=0 
                #print startChapterNum  
                downloadNovel(novel[index],int(startChapterNum))    
            elif isinstance(novel,tuple): 
                print "已找到小说:",novel[0] 
                result = raw_input("输入yes或回车选定小说,输入no重新输入小说名称:") 
                if not result or result=="yes": 
                     
                    totalChapters = analyzeNovel(novel[1]) 
                    startChapterNum = raw_input("请输入起始章节,全部下载请按回车:") 
                    if not startChapterNum : 
                        startChapterNum=0 
                    #print startChapterNum  
                    downloadNovel(novel,int(startChapterNum))    
                elif result =="no": 
                    pass 

# -*- coding:gbk -*-
'''
file desc:novel downloader
author:kingviker
email:kingviker@163.com.kingviker88@gmail.com
date:2013-05-21
depends:python 2.7.4,pyquery
'''

import os,codecs,urllib,urllib2,sys
from pyquery import PyQuery as pq


 
searchUrl = 'http://www.dushuge.net/modules/article/search.php'   
baseSavePath="E:/enovel/"
 
def searchTopList():
    html = pq(url="http://www.dushuge.net")
    searchTopA= html("#compZnNav").next().find("td").eq(0)("a")
    searchTopList = []
    for index in range(1,len(searchTopA)):
        searchTopList.append([searchTopA.eq(index).text(),searchTopA.eq(index).attr("href")])
    return  searchTopList

def searchNovel(novelName):
   
 

    values = urllib.urlencode({'searchkey' : novelName,   
              'searchtype' : 'articlename'})
    req = urllib2.Request(url=searchUrl,data=values)
    response = urllib2.urlopen(req)
    searchHtml_pq = pq(response.read())

    novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
    #print novelUrlHtml
    if not novelUrlHtml :
        resultList = searchHtml_pq("#content > table > tr")
        if resultList:
            novelList = []   
            for index in range(1,len(resultList)):
                novelTR = resultList.eq(index)
                novelA = novelTR("td").eq(0)("a")
                #print novelA.text(),novelA.attr("href")
                novelList.append([novelA.text(),novelA.attr("href")])
            return novelList
        else:
            return
    return (novelUrlHtml.eq(2).text(),novelUrlHtml.eq(2).attr("href"))

def analyzeNovel(url):
    print "开始分析章节信息..."
    #using pyquery to grub the webpage's content
    html_pq = pq(url=url)
    #print html_pq("div.book_article_texttable").find(".book_article_texttext")
    totalChapters = len(html_pq("div.book_article_texttable").find(".book_article_listtext").find("a"))
    print "总章节数:",totalChapters
    return totalChapters

def fetchDownloadLinks(url):
    #using to save pieces and chapter lists
    pieceList=[]
    chapterList=[]

    html_pq = pq(url=url)
    #find the first piece of the novel.
    piece = html_pq("div.book_article_texttable > div").eq(0)

    isPiece = True
    if piece.attr("class")=="book_article_texttitle":
        #get the current piece's text
        pieceList.append(piece.text())
        #print "piece Text:", piece
    else:
        isPiece = False
        pieceList.append("无")
    #scan out the piece and chapter lists
    nextPiece=False
    while nextPiece==False:
        if isPiece:
            chapterDiv = piece.next()
        else:
            isPiece = True
            chapterDiv = piece
        #print "章节div长度:",chapterDiv.length
        piece = chapterDiv
        if chapterDiv.length==0:
            pieceList.append(chapterList[:])
            del chapterList[:]
            nextPiece=True
        elif chapterDiv.attr("class")=="book_article_texttitle":
            pieceList.append(chapterList[:])
            del chapterList[:]
            pieceList.append(piece.text())
       
        else:
            chapterUrls = chapterDiv.find("a");
            for urlA in chapterUrls:
                urlList_temp = [pq(urlA).text(),pq(urlA).attr("href")]
                chapterList.append(urlList_temp)

    print "下载列表收集完成",len(pieceList)
    return pieceList
   
def downloadNovel(novel,startChapterNum):
   # if os.path.exists(baseSavePath+novel[0]) is not True:
    #    os.mkdir(baseSavePath+novel[0])

    #based on the piecelist,grub the special webpage's novel content and save them .
    if os.path.exists(baseSavePath+novel[0]+".txt"):os.remove(baseSavePath+novel[0]+".txt")

    #using codecs to create a file. write mode(w+) is appended.
    novelFile = codecs.open(baseSavePath+novel[0]+".txt","wb+","utf-8")


    pieceList = fetchDownloadLinks(novel[1])
  
   
    chapterTotal = 0;

    print "从",startChapterNum,"章开始下载"
    #just using two for loops to analyze the piecelist.
    for pieceNum in range(0,len(pieceList),2):
        piece = pieceList[pieceNum]
        print "开始下载篇章",pieceList[pieceNum]
        chapterList = pieceList[pieceNum+1]
        for chapterNum in range(0,len(chapterList)):
            chapterTotal +=1
           # print chapterTotal,startChapterNum,startChapterNum>chapterTotal
           # print type(startChapterNum),type(chapterTotal)
            if startChapterNum > chapterTotal:
                continue
          
            chapter = chapterList[chapterNum]
            print "开始下载章节",chapter[0]
            chapterPage = pq(url=novel[1]+chapter[1])

            chapterContent = (piece+" "+chapter[0]+" \r")
          
            #print chapterPage("#booktext").remove("strong").html()
            chapterContent += chapterPage("#booktext").remove("strong").html().replace("<br />","\r")

            print "小说内容:",len(chapterContent)
            novelFile.write(chapterContent+"\r"+"\r")
       
    novelFile.close()
    print "下载完成,文件位置:",baseSavePath+novel[0]+".txt"
   


print "小说下载器 by kingviker!,输入quit返回或退出:"           
#if the novel's file system  not exists,created.
if os.path.exists(baseSavePath) is not True:
        os.mkdir(baseSavePath)
while(True):

    searchToplist = searchTopList()
    for index in range(len(searchToplist)):
                print "(",index,")",searchToplist[index][0]
    name = raw_input("输入序号直接下载热门小说,或者输入想要下载的小说名称搜索")
    try:
        if name =="quit":
            print "bey"
            break
        elif int(name) in range(11):
            print "选择小说:",searchToplist[int(name)][0]
            totalChapters = analyzeNovel(searchToplist[int(name)][1])
           
            startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
            if startChapterNum =="quit":
                continue
            if not startChapterNum :
                startChapterNum=0
            #print startChapterNum
            downloadNovel(searchToplist[int(name)],int(startChapterNum))
    except Exception ,e:
       
        novel = searchNovel(name)
        if not novel :
            print "没有查找到小说",name,"或者小说名称输出错误!"
        else:
            if isinstance(novel,list):
                print "以下是结果列表"
                for index in range(len(novel)):
                    print "(",index,")",novel[index][0]

                novelIndex = raw_input("请输入序号选择:")
                if novelIndex =="quit":
                    continue
               
                novelIndex = int(novelIndex)
                print"选定小说:",novel[novelIndex][0]
                searchHtml_pq = pq(url=novel[index][1])

                novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
                novel[index][1] = novelUrlHtml.eq(2).attr("href")
                totalChapters = analyzeNovel(novel[index][1])
                startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
                if startChapterNum =="quit":
                    continue
                if not startChapterNum :
                    startChapterNum=0
                #print startChapterNum
                downloadNovel(novel[index],int(startChapterNum))  
            elif isinstance(novel,tuple):
                print "已找到小说:",novel[0]
                result = raw_input("输入yes或回车选定小说,输入no重新输入小说名称:")
                if not result or result=="yes":
                   
                    totalChapters = analyzeNovel(novel[1])
                    startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
                    if not startChapterNum :
                        startChapterNum=0
                    #print startChapterNum
                    downloadNovel(novel,int(startChapterNum))  
                elif result =="no":
                    pass

 

 

        这一版本主要更新如下:


1.增加推荐榜


2,改进用户交互界面.


3,支持模糊查询

 
 

相关内容

    暂无相关文章

评论关闭