提取 双色球 每期信息,提取双色球信息,#!/usr/bin/p
#!/usr/bin/python#coding = GBKimport sysimport osimport urllibimport reimport MySQLdb#con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao')#cursor =con.cursor()reload(sys)sys.setdefaultencoding( "utf-8" )pager=urllib.urlopen('http://zx.caipiao.163.com/trend/ssq_basic.html?beginPeriod=2004001&endPeriod=2012149&historyPeriod=2012150&year=')data=pager.read()pager.close()p=re.compile('(?<=<tbody id="cpdata".).+?(?=</tbody)',re.I|re.S)data = p.search(data).group()data = data.decode('utf-8')del pp=data.split('</tr>');output = open('/home/liukai/python/data.txt', 'a')for i in range(len(p)): con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao') cursor =con.cursor() if(i == 1591): break pattern = re.compile('(?=title=).+?(?=">)',re.I|re.S) if((i-5) ==0 or(i-5)%6==0): continue title = pattern.search(p[i]).group() title = title.split('"') if(len(title)==2): title = title[1] else: title = "" nums = p[i].split('</td>') list = title for j in range(len(nums)): if(j==0): continue# print nums[j]# patt = re.compile('(?=chartBall).+?',re.I|re.S) flag = nums[j].find('chartBall') if(flag != -1): numList = nums[j].split(">"); list =list+ numList[1] if(i != -1): date = list[7:16] first = list[19:21] second = list[21:23] third = list[23:25] four = list[25:27] five = list[27:29] six = list[29:31] seven = list[31:33] sql = "insert into 500wan (data_time,first,second,third,fouth,five,six,seven) values('"+date+"','"+first+"','"+second+"','"+third+"','"+four+"','"+five+"','"+six+"','"+seven+"')" cursor.execute(sql) cursor.close() con.close() #print list+"\\n" print first+"_"+second+"_"+third+"_"+four+"_"+five+"_"+six+"_"+seven+"_"+date #break #print list output.write(list+'\\n')#该片段来自于http://byrx.net
评论关闭