批量查询网址是否备案,批量网址备案,#coding:utf-


#coding:utf-8import requests,xlrd,datetime,threading,sysfrom bs4 import BeautifulSoupreload(sys)sys.setdefaultencoding("utf-8")def beian(url,excel):    f = xlrd.open_workbook(excel)    sheet1 = f.sheet_by_name('Sheet1')    num_cols = sheet1.ncols    for curr_col in range(num_cols):         rows = sheet1.col_values(curr_col)         for each in rows:             data = {'s': each,'guid': '1e4b4b3f-310f-4aaa-90f7-a552db48758d'}             r = requests.post(url,data=data)             soup = BeautifulSoup(r.content,'html.parser')             tags  = soup.find_all('div',id='contenthtml')             try:                 for tag in tags:                     d_name = tag.find('td',class_='tdright').get_text()                     #print d_name                     print ('%s 已备案') % each             except AttributeError:                print ('%s 未备案') % eachif  __name__ == "__main__":    url = 'http://tool.chinaz.com/beian.aspx'    excel = (r'C:\\1.xlsx')    threads = []    print "程序开始运行%s" % datetime.datetime.now()    t1 = threading.Thread(target=beian,args=(url,excel))    threads.append(t1)    for th in threads:            th.setDaemon(True)            th.start()    th.join()    print "程序结束时间%s" % datetime.datetime.now()    #beian(url,excel)

评论关闭