网络小说下载器,,只能用于飞库网,改动字符


只能用于飞库网,改动字符串后也能用于其他网站,学习spider时写的.

# coding=utf-6 #飞库小说import bs4from bs4 import  BeautifulSoupimport urllib2import sysimport rereobj1=re.compile('(<br */> *\\n *<br */>)|(<br */>)')reobj2=re.compile('( )|(<div *>)|(< */div>)')def getContents(url,title):    f = urllib2.urlopen(url)    soup = BeautifulSoup(f.read().decode('utf-8','ignore'))    str1='\\n'+title+'\\n'+str(soup.find('div',id='chcontent'))    print title    return str1def filter(txt):    result, number = reobj1.subn('\\n', txt)    result, number = reobj2.subn(',', result)    return resultif __name__=='__main__':    print(u'输入目录网址:')    url =raw_input()    f = urllib2.urlopen(url)    soup = BeautifulSoup(f.read().decode('utf-8','ignore').encode('utf-8'))    body = soup.findAll('td')    str3=""    print(u'输入书名:')    bookname=raw_input().encode('utf-8')    ff=open(bookname+'.txt','w')       for i in body:        try:            str2=i.a['href']            str3=getContents(str2,str(i.a.string))            str3=filter(str3)            ff.write(str3)        except Exception:            pass    ff.close()    f.close()#该片段来自于http://byrx.net

评论关闭