python之小说下载器version3.0
python之小说下载器version3.0
我想了一下,每个版本打包exe 意义不大,如果是最终版什么的 打一个还行,所以暂时不打包了 有需要的可以留言,我在打包.
这里使用了第三方库pyquery,安装方法见:1.0版本链接
在上一版本的基础上做个改进.主要是,增加推荐榜,支持模糊查询,交互方式有一定优化.如上图:
下面是代码:
[python]
# -*- coding:gbk -*-
'''''
file desc:novel downloader
author:kingviker
email:kingviker@163.com.kingviker88@gmail.com
date:2013-05-21
depends:python 2.7.4,pyquery
'''
import os,codecs,urllib,urllib2,sys
from pyquery import PyQuery as pq
searchUrl = 'http://www.dushuge.net/modules/article/search.php'
baseSavePath="E:/enovel/"
def searchTopList():
html = pq(url="http://www.dushuge.net")
searchTopA= html("#compZnNav").next().find("td").eq(0)("a")
searchTopList = []
for index in range(1,len(searchTopA)):
searchTopList.append([searchTopA.eq(index).text(),searchTopA.eq(index).attr("href")])
return searchTopList
def searchNovel(novelName):
values = urllib.urlencode({'searchkey' : novelName,
'searchtype' : 'articlename'})
req = urllib2.Request(url=searchUrl,data=values)
response = urllib2.urlopen(req)
searchHtml_pq = pq(response.read())
novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
#print novelUrlHtml
if not novelUrlHtml :
resultList = searchHtml_pq("#content > table > tr")
if resultList:
novelList = []
for index in range(1,len(resultList)):
novelTR = resultList.eq(index)
novelA = novelTR("td").eq(0)("a")
#print novelA.text(),novelA.attr("href")
novelList.append([novelA.text(),novelA.attr("href")])
return novelList
else:
return
return (novelUrlHtml.eq(2).text(),novelUrlHtml.eq(2).attr("href"))
def analyzeNovel(url):
print "开始分析章节信息..."
#using pyquery to grub the webpage's content
html_pq = pq(url=url)
#print html_pq("div.book_article_texttable").find(".book_article_texttext")
totalChapters = len(html_pq("div.book_article_texttable").find(".book_article_listtext").find("a"))
print "总章节数:",totalChapters
return totalChapters
def fetchDownloadLinks(url):
#using to save pieces and chapter lists
pieceList=[]
chapterList=[]
html_pq = pq(url=url)
#find the first piece of the novel.
piece = html_pq("div.book_article_texttable > div").eq(0)
isPiece = True
if piece.attr("class")=="book_article_texttitle":
#get the current piece's text
pieceList.append(piece.text())
#print "piece Text:", piece
else:
isPiece = False
pieceList.append("无")
#scan out the piece and chapter lists
nextPiece=False
while nextPiece==False:
if isPiece:
chapterDiv = piece.next()
else:
isPiece = True
chapterDiv = piece
#print "章节div长度:",chapterDiv.length
piece = chapterDiv
if chapterDiv.length==0:
pieceList.append(chapterList[:])
del chapterList[:]
nextPiece=True
elif chapterDiv.attr("class")=="book_article_texttitle":
pieceList.append(chapterList[:])
del chapterList[:]
pieceList.append(piece.text())
else:
chapterUrls = chapterDiv.find("a");
for urlA in chapterUrls:
urlList_temp = [pq(urlA).text(),pq(urlA).attr("href")]
chapterList.append(urlList_temp)
print "下载列表收集完成",len(pieceList)
return pieceList
def downloadNovel(novel,startChapterNum):
# if os.path.exists(baseSavePath+novel[0]) is not True:
# os.mkdir(baseSavePath+novel[0])
#based on the piecelist,grub the special webpage's novel content and save them .
if os.path.exists(baseSavePath+novel[0]+".txt"):os.remove(baseSavePath+novel[0]+".txt")
#using codecs to create a file. write mode(w+) is appended.
novelFile = codecs.open(baseSavePath+novel[0]+".txt","wb+","utf-8")
pieceList = fetchDownloadLinks(novel[1])
chapterTotal = 0;
print "从",startChapterNum,"章开始下载"
#just using two for loops to analyze the piecelist.
for pieceNum in range(0,len(pieceList),2):
piece = pieceList[pieceNum]
print "开始下载篇章",pieceList[pieceNum]
chapterList = pieceList[pieceNum+1]
for chapterNum in range(0,len(chapterList)):
chapterTotal +=1
# print chapterTotal,startChapterNum,startChapterNum>chapterTotal
# print type(startChapterNum),type(chapterTotal)
if startChapterNum > chapterTotal:
continue
chapter = chapterList[chapterNum]
print "开始下载章节",chapter[0]
chapterPage = pq(url=novel[1]+chapter[1])
chapterContent = (piece+" "+chapter[0]+" \r")
#print chapterPage("#booktext").remove("strong").html()
chapterContent += chapterPage("#booktext").remove("strong").html().replace("<br />","\r")
print "小说内容:",len(chapterContent)
novelFile.write(chapterContent+"\r"+"\r")
novelFile.close()
print "下载完成,文件位置:",baseSavePath+novel[0]+".txt"
print "小说下载器 by kingviker!,输入quit返回或退出:"
#if the novel's file system not exists,created.
if os.path.exists(baseSavePath) is not True:
os.mkdir(baseSavePath)
while(True):
searchToplist = searchTopList()
for index in range(len(searchToplist)):
print "(",index,")",searchToplist[index][0]
name = raw_input("输入序号直接下载热门小说,或者输入想要下载的小说名称搜索")
try:
if name =="quit":
print "bey"
break
elif int(name) in range(11):
print "选择小说:",searchToplist[int(name)][0]
totalChapters = analyzeNovel(searchToplist[int(name)][1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if startChapterNum =="quit":
continue
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(searchToplist[int(name)],int(startChapterNum))
except Exception ,e:
novel = searchNovel(name)
if not novel :
print "没有查找到小说",name,"或者小说名称输出错误!"
else:
if isinstance(novel,list):
print "以下是结果列表"
for index in range(len(novel)):
print "(",index,")",novel[index][0]
novelIndex = raw_input("请输入序号选择:")
if novelIndex =="quit":
continue
novelIndex = int(novelIndex)
print"选定小说:",novel[novelIndex][0]
searchHtml_pq = pq(url=novel[index][1])
novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
novel[index][1] = novelUrlHtml.eq(2).attr("href")
totalChapters = analyzeNovel(novel[index][1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if startChapterNum =="quit":
continue
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(novel[index],int(startChapterNum))
elif isinstance(novel,tuple):
print "已找到小说:",novel[0]
result = raw_input("输入yes或回车选定小说,输入no重新输入小说名称:")
if not result or result=="yes":
totalChapters = analyzeNovel(novel[1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(novel,int(startChapterNum))
elif result =="no":
pass
# -*- coding:gbk -*-
'''
file desc:novel downloader
author:kingviker
email:kingviker@163.com.kingviker88@gmail.com
date:2013-05-21
depends:python 2.7.4,pyquery
'''
import os,codecs,urllib,urllib2,sys
from pyquery import PyQuery as pq
searchUrl = 'http://www.dushuge.net/modules/article/search.php'
baseSavePath="E:/enovel/"
def searchTopList():
html = pq(url="http://www.dushuge.net")
searchTopA= html("#compZnNav").next().find("td").eq(0)("a")
searchTopList = []
for index in range(1,len(searchTopA)):
searchTopList.append([searchTopA.eq(index).text(),searchTopA.eq(index).attr("href")])
return searchTopList
def searchNovel(novelName):
values = urllib.urlencode({'searchkey' : novelName,
'searchtype' : 'articlename'})
req = urllib2.Request(url=searchUrl,data=values)
response = urllib2.urlopen(req)
searchHtml_pq = pq(response.read())
novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
#print novelUrlHtml
if not novelUrlHtml :
resultList = searchHtml_pq("#content > table > tr")
if resultList:
novelList = []
for index in range(1,len(resultList)):
novelTR = resultList.eq(index)
novelA = novelTR("td").eq(0)("a")
#print novelA.text(),novelA.attr("href")
novelList.append([novelA.text(),novelA.attr("href")])
return novelList
else:
return
return (novelUrlHtml.eq(2).text(),novelUrlHtml.eq(2).attr("href"))
def analyzeNovel(url):
print "开始分析章节信息..."
#using pyquery to grub the webpage's content
html_pq = pq(url=url)
#print html_pq("div.book_article_texttable").find(".book_article_texttext")
totalChapters = len(html_pq("div.book_article_texttable").find(".book_article_listtext").find("a"))
print "总章节数:",totalChapters
return totalChapters
def fetchDownloadLinks(url):
#using to save pieces and chapter lists
pieceList=[]
chapterList=[]
html_pq = pq(url=url)
#find the first piece of the novel.
piece = html_pq("div.book_article_texttable > div").eq(0)
isPiece = True
if piece.attr("class")=="book_article_texttitle":
#get the current piece's text
pieceList.append(piece.text())
#print "piece Text:", piece
else:
isPiece = False
pieceList.append("无")
#scan out the piece and chapter lists
nextPiece=False
while nextPiece==False:
if isPiece:
chapterDiv = piece.next()
else:
isPiece = True
chapterDiv = piece
#print "章节div长度:",chapterDiv.length
piece = chapterDiv
if chapterDiv.length==0:
pieceList.append(chapterList[:])
del chapterList[:]
nextPiece=True
elif chapterDiv.attr("class")=="book_article_texttitle":
pieceList.append(chapterList[:])
del chapterList[:]
pieceList.append(piece.text())
else:
chapterUrls = chapterDiv.find("a");
for urlA in chapterUrls:
urlList_temp = [pq(urlA).text(),pq(urlA).attr("href")]
chapterList.append(urlList_temp)
print "下载列表收集完成",len(pieceList)
return pieceList
def downloadNovel(novel,startChapterNum):
# if os.path.exists(baseSavePath+novel[0]) is not True:
# os.mkdir(baseSavePath+novel[0])
#based on the piecelist,grub the special webpage's novel content and save them .
if os.path.exists(baseSavePath+novel[0]+".txt"):os.remove(baseSavePath+novel[0]+".txt")
#using codecs to create a file. write mode(w+) is appended.
novelFile = codecs.open(baseSavePath+novel[0]+".txt","wb+","utf-8")
pieceList = fetchDownloadLinks(novel[1])
chapterTotal = 0;
print "从",startChapterNum,"章开始下载"
#just using two for loops to analyze the piecelist.
for pieceNum in range(0,len(pieceList),2):
piece = pieceList[pieceNum]
print "开始下载篇章",pieceList[pieceNum]
chapterList = pieceList[pieceNum+1]
for chapterNum in range(0,len(chapterList)):
chapterTotal +=1
# print chapterTotal,startChapterNum,startChapterNum>chapterTotal
# print type(startChapterNum),type(chapterTotal)
if startChapterNum > chapterTotal:
continue
chapter = chapterList[chapterNum]
print "开始下载章节",chapter[0]
chapterPage = pq(url=novel[1]+chapter[1])
chapterContent = (piece+" "+chapter[0]+" \r")
#print chapterPage("#booktext").remove("strong").html()
chapterContent += chapterPage("#booktext").remove("strong").html().replace("<br />","\r")
print "小说内容:",len(chapterContent)
novelFile.write(chapterContent+"\r"+"\r")
novelFile.close()
print "下载完成,文件位置:",baseSavePath+novel[0]+".txt"
print "小说下载器 by kingviker!,输入quit返回或退出:"
#if the novel's file system not exists,created.
if os.path.exists(baseSavePath) is not True:
os.mkdir(baseSavePath)
while(True):
searchToplist = searchTopList()
for index in range(len(searchToplist)):
print "(",index,")",searchToplist[index][0]
name = raw_input("输入序号直接下载热门小说,或者输入想要下载的小说名称搜索")
try:
if name =="quit":
print "bey"
break
elif int(name) in range(11):
print "选择小说:",searchToplist[int(name)][0]
totalChapters = analyzeNovel(searchToplist[int(name)][1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if startChapterNum =="quit":
continue
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(searchToplist[int(name)],int(startChapterNum))
except Exception ,e:
novel = searchNovel(name)
if not novel :
print "没有查找到小说",name,"或者小说名称输出错误!"
else:
if isinstance(novel,list):
print "以下是结果列表"
for index in range(len(novel)):
print "(",index,")",novel[index][0]
novelIndex = raw_input("请输入序号选择:")
if novelIndex =="quit":
continue
novelIndex = int(novelIndex)
print"选定小说:",novel[novelIndex][0]
searchHtml_pq = pq(url=novel[index][1])
novelUrlHtml = searchHtml_pq("#content > .sf-mainbox > .head > h1 > a")
novel[index][1] = novelUrlHtml.eq(2).attr("href")
totalChapters = analyzeNovel(novel[index][1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if startChapterNum =="quit":
continue
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(novel[index],int(startChapterNum))
elif isinstance(novel,tuple):
print "已找到小说:",novel[0]
result = raw_input("输入yes或回车选定小说,输入no重新输入小说名称:")
if not result or result=="yes":
totalChapters = analyzeNovel(novel[1])
startChapterNum = raw_input("请输入起始章节,全部下载请按回车:")
if not startChapterNum :
startChapterNum=0
#print startChapterNum
downloadNovel(novel,int(startChapterNum))
elif result =="no":
pass
这一版本主要更新如下:
1.增加推荐榜
2,改进用户交互界面.
3,支持模糊查询
相关内容
- 暂无相关文章
评论关闭