python爬虫,爬豆瓣top250电影

文章由Byrx.net分享于2019-03-22 02:03:31评论（286）

python爬虫,爬豆瓣top250电影

import string
import re
import urllib2

class DouBanSpider(object) :
    def __init__(self) :
        self.page = 1
        self.cur_url = "http://movie.douban.com/top250?start={page}&filter=&type="
        self.datas = []
        self._top_num = 1
    def get_page(self, cur_page) :
        url = self.cur_url.format(page = (cur_page - 1) * 25)
        my_page = urllib2.urlopen(url).read().decode("utf-8")
        return my_page
    def find_title(self, my_page) :
        temp_data = []
        movie_items = re.findall(r'(.*?)', my_page, re.S)
        for index, item in enumerate(movie_items) :
            if item.find(" ") == -1 :
                temp_data.append("Top" + str(self._top_num) + " " + item)
                self._top_num += 1
        self.datas.extend(temp_data)
    
    def start_spider(self) :
        while self.page <= 4 :
            my_page = self.get_page(self.page)
            self.find_title(my_page)
            self.page += 1
def main() :
    my_spider = DouBanSpider()
    my_spider.start_spider()
    for item in my_spider.datas :
        print item
main()

热门文章：

python爬虫,爬豆瓣top250电影

python爬虫,爬豆瓣top250电影

python爬虫,爬豆瓣top250电影

相关内容

最新python教程

python~HOT