python爬取小说,,运行结果:代码: 1


运行结果:

技术图片

代码:

 1 import requests 2 from bs4 import BeautifulSoup 3 from selenium import webdriver 4 import os 5   6 class NovelSpider: 7     def __init__(self): 8         self.start_url = ‘https://www.biqukan.com/1_1680/‘ 9  10     def get_novel(self):11         response = requests.get(self.start_url)12         soup = BeautifulSoup(response.text, ‘html.parser‘)13         div_chapter = soup.find(class_="listmain")14         chapter_list = div_chapter.find_all(‘a‘)15         chapter_list = chapter_list[12:]16         chapter = []17         chapter_num = len(chapter_list)18         count = 019         print(‘《凡人修仙传仙界篇》开始下载:‘)20         for cl in chapter_list:21             chapter_dict = {}22             chapter_name = cl.get_text()23             chapter_dict[‘name‘] = chapter_name24             chapter_url = cl.get(‘href‘)25             chapter_dict[‘value‘] = ‘https://www.biqukan.com‘ + chapter_url26             if chapter_dict not in chapter:27                 chapter.append(chapter_dict)28             print(f"已下载:{count}/{chapter_num}")29             self.download_novel(chapter_dict)30             count += 131  32     def parse_novel(self, url):33         browser = webdriver.PhantomJS(executable_path=r‘F:\Spider\novelSpider\phantomjs.exe‘)34         browser.get(url)35         soup = BeautifulSoup(browser.page_source, ‘html.parser‘)36         find_txt = soup.find(class_=‘showtxt‘)37         # print(type(find_txt.get_text()))38         return find_txt.get_text()39  40     def download_novel(self, data): 41         filename = data[‘name‘]42         url = data[‘value‘]43         txt = self.parse_novel(url)44  45         path = r"F:\Spider\novelSpider"46         isExists = os.path.exists(path)47         if not isExists:48             os.mkdir(path)49         else:50             pass51  52         with open(path + f‘\凡人修仙传仙界篇.txt‘, ‘a‘, encoding=‘utf-8‘) as f:53             f.write(f‘{filename}\n\n‘)54             f.write(txt)55             f.write(‘\n======\n\n‘)56             f.close()57  58 if __name__ == ‘__main__‘:59     ns = NovelSpider()60     ns.get_novel()

python爬取小说

评论关闭