提取 双色球 每期信息,提取双色球信息,#!/usr/bin/p


#!/usr/bin/python#coding = GBKimport sysimport osimport urllibimport reimport MySQLdb#con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao')#cursor =con.cursor()reload(sys)sys.setdefaultencoding( "utf-8" )pager=urllib.urlopen('http://zx.caipiao.163.com/trend/ssq_basic.html?beginPeriod=2004001&endPeriod=2012149&historyPeriod=2012150&year=')data=pager.read()pager.close()p=re.compile('(?<=<tbody id="cpdata".).+?(?=</tbody)',re.I|re.S)data = p.search(data).group()data = data.decode('utf-8')del pp=data.split('</tr>');output = open('/home/liukai/python/data.txt', 'a')for i in range(len(p)):  con= MySQLdb.connect(host='localhost',user='root',passwd='',db='caipiao')  cursor =con.cursor()  if(i == 1591):    break  pattern = re.compile('(?=title=).+?(?=">)',re.I|re.S)  if((i-5) ==0 or(i-5)%6==0):    continue  title = pattern.search(p[i]).group()  title = title.split('"')  if(len(title)==2):    title = title[1]  else:    title = ""  nums = p[i].split('</td>')  list = title  for j in range(len(nums)):    if(j==0):      continue#      print nums[j]#      patt = re.compile('(?=chartBall).+?',re.I|re.S)    flag = nums[j].find('chartBall')     if(flag != -1):      numList = nums[j].split(">");      list =list+ numList[1]  if(i != -1):    date = list[7:16]    first = list[19:21]    second = list[21:23]    third = list[23:25]    four = list[25:27]    five = list[27:29]    six = list[29:31]    seven = list[31:33]    sql = "insert into 500wan (data_time,first,second,third,fouth,five,six,seven) values('"+date+"','"+first+"','"+second+"','"+third+"','"+four+"','"+five+"','"+six+"','"+seven+"')"    cursor.execute(sql)    cursor.close()    con.close()    #print list+"\\n"    print first+"_"+second+"_"+third+"_"+four+"_"+five+"_"+six+"_"+seven+"_"+date    #break  #print list  output.write(list+'\\n')#该片段来自于http://byrx.net

评论关闭