python爬取小说,,运行结果:代码: 1
python爬取小说,,运行结果:代码: 1
运行结果:
代码:
1 import requests 2 from bs4 import BeautifulSoup 3 from selenium import webdriver 4 import os 5 6 class NovelSpider: 7 def __init__(self): 8 self.start_url = ‘https://www.biqukan.com/1_1680/‘ 9 10 def get_novel(self):11 response = requests.get(self.start_url)12 soup = BeautifulSoup(response.text, ‘html.parser‘)13 div_chapter = soup.find(class_="listmain")14 chapter_list = div_chapter.find_all(‘a‘)15 chapter_list = chapter_list[12:]16 chapter = []17 chapter_num = len(chapter_list)18 count = 019 print(‘《凡人修仙传仙界篇》开始下载:‘)20 for cl in chapter_list:21 chapter_dict = {}22 chapter_name = cl.get_text()23 chapter_dict[‘name‘] = chapter_name24 chapter_url = cl.get(‘href‘)25 chapter_dict[‘value‘] = ‘https://www.biqukan.com‘ + chapter_url26 if chapter_dict not in chapter:27 chapter.append(chapter_dict)28 print(f"已下载:{count}/{chapter_num}")29 self.download_novel(chapter_dict)30 count += 131 32 def parse_novel(self, url):33 browser = webdriver.PhantomJS(executable_path=r‘F:\Spider\novelSpider\phantomjs.exe‘)34 browser.get(url)35 soup = BeautifulSoup(browser.page_source, ‘html.parser‘)36 find_txt = soup.find(class_=‘showtxt‘)37 # print(type(find_txt.get_text()))38 return find_txt.get_text()39 40 def download_novel(self, data): 41 filename = data[‘name‘]42 url = data[‘value‘]43 txt = self.parse_novel(url)44 45 path = r"F:\Spider\novelSpider"46 isExists = os.path.exists(path)47 if not isExists:48 os.mkdir(path)49 else:50 pass51 52 with open(path + f‘\凡人修仙传仙界篇.txt‘, ‘a‘, encoding=‘utf-8‘) as f:53 f.write(f‘{filename}\n\n‘)54 f.write(txt)55 f.write(‘\n======\n\n‘)56 f.close()57 58 if __name__ == ‘__main__‘:59 ns = NovelSpider()60 ns.get_novel()
python爬取小说
评论关闭