使用Python抓取58同城(南京站)的演出票的信息,python南京,#!/usr/bin/e
文章由Byrx.net分享于2019-03-23 05:03:35
使用Python抓取58同城(南京站)的演出票的信息,python南京,#!/usr/bin/e
#!/usr/bin/env python#-*-coding: utf-8 -*-import reimport urllib2from bs4 import BeautifulSoup as bsimport csvimport osimport sysreload(sys)sys.setdefaultencoding('utf-8')def GetAllLink(): num = int(raw_input("爬取多少页:>")) if not os.path.exists('./data/'): os.mkdir('./data/') for i in range(num): if i+1 == 1: url = 'http://nj.58.com/piao/' GetPage(url, i) else: url = 'http://nj.58.com/piao/pn%s/' %(i+1) GetPage(url, i)def GetPage(url, num): Url = url user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0' headers = { 'User-Agent' : user_agent } req = urllib2.Request(Url, headers = headers) page = urllib2.urlopen(req).read().decode('utf-8') soup = bs(page) table = soup.table tag = table.find_all('tr') # 提取出所需的那段 soup2 = bs(str(tag)) title = soup2.find_all('a','t') #标题与url price = soup2.find_all('b', 'pri') #价格 fixedprice = soup2.find_all('del') #原价 date = soup2.find_all('span','pr25') #时间 atitle = [] ahref = [] aprice = [] afixedprice = [] adate = [] for i in title: #print i.get_text(), i.get('href') atitle.append(i.get_text()) ahref.append(i.get('href')) for i in price: #print i.get_text() aprice.append(i.get_text()) for i in fixedprice: #print j.get_text() afixedprice.append(i.get_text()) for i in date: #print i.get_text() adate.append(i.get_text()) csvfile = file('./data/ticket_%s.csv'%num, 'w') writer = csv.writer(csvfile) writer.writerow(['标题','url','售价','原价','演出时间']) ''' 每个字段必有title,但是不一定有时间date 如果没有date日期,我们就设为'---' ''' if len(atitle) > len(adate): for i in range(len(atitle) - len(adate)): adate.append('---') for i in range(len(atitle)): message = atitle[i]+'|'+ahref[i]+'|'+aprice[i]+ '|'+afixedprice[i]+'|'+ adate[i] writer.writerow([i for i in str(message).split('|')]) print "[Result]:> 页面 %s 信息保存完毕!"%(num+1) csvfile.close()if __name__ == '__main__': GetAllLink()
相关内容
- python使用Queue实现优先级队列,pythonqueue,使用Queue.Queu
- python循环监控远程端口的代码,python端口代码,在ip.tx
- python基础教程代码分享,python基础教程分享,people = {
- Python 扫描IP段 指定端口是否开放,pythonip,Python 扫描I
- 将十进制的数转换为任意进制,十进制数转换进制,#c
- python实现ssh批量登录并执行命令,pythonssh执行命令,局域
- 自己写的简单的类似minecraft的游戏,类似minecraft游戏
- 遍历制定目录下所有文件,制定目录,#_*_coding:u
- python获得本机机器名,python获得机器,import sys,
- 自动识别现接serial端口名称,识别serial端口名称,#!/us
评论关闭