Python 抓取天气预报,python抓取天气预报,#!/usr/bin/e


#!/usr/bin/env python# weather html parserfrom HTMLParser import HTMLParserimport sys,urllib2,string,re# define a class to parser a htmlclass HtmlParser(HTMLParser):    def __init__(self):        self.data=''        self.readingdata=0        HTMLParser.__init__(self)    def handle_starttag(self,tag,attrs):        if tag == 'td':            self.readingdata=1    def handle_data(self,chars):        if self.readingdata:            self.data+=chars    def handle_endtag(self,tag):        if tag=='td':            self.readingdata=0    def cleanse(self):        self.data = re.sub('\\s+',' ', self.data)    def getdata(self):        self.cleanse()        return self.data# this url is a place where you want to know the weather forecasturl="<a href="http://www.weather.com.cn/html/weather/101210501.shtml">http://www.weather.com.cn/html/weather/101210501.shtml"req=urllib2.Request(url)fd=urllib2.urlopen(req)tp=HtmlParser()tp.feed(fd.read())weather=tp.getdata()# when you are getting a weather after parsering# this weather string have 7 days weather forecast# the following if for my awesome formatweather=weather.split()tag=[weather.index(i) for i in weather if '\\xe6\\x97\\xa5' in i]first=weather[:tag[1]]second=weather[tag[1]:tag[2]]if second[1]!=second[7]:second[1]+=' --> '+second[7]second[2]=second[9]+' --> '+second[3]second[0]=second[0][:-6]second=second[:3]third=weather[tag[2]:tag[3]]if third[1]!=third[7]:third[1]+=' --> '+third[7]third[2]=third[9]+' --> '+third[3]third[0]=third[0][:-6]third=third[:3]weather=['    Weather:']+first+['|']+second+['|']+thirdfor i in weather:print i,#该片段来自于http://byrx.net

评论关闭