天涯帖子解放F5(只看楼主),天涯f5,'''天涯帖子直播201


'''天涯帖子直播20150426Python3.4.3'''import re,time,osimport requestsfrom bs4 import *header={'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8','Accept-Encoding':'gzip,deflate,sdch','Accept-Language':'zh-CN,zh;q=0.8','Cache-Control':'max-age=0','Connection':'keep-alive','Host':'bbs.tianya.cn','User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36'}refushtime=30 #默认刷新时间lastpage=1    #最新页面Furl=''#最新页面URLlzname=''#Turl='http://bbs.tianya.cn/post-stocks-1131734-1.shtml'#初始页面URLTurl='http://bbs.tianya.cn/post-stocks-1345750-1.shtml'def bsp(newurl):#bs4初始化    html=requests.get(newurl,headers=header,timeout=10)    #print(html)#正常显示中文    soup=BeautifulSoup(html.content.decode())    return soupdef pagnum(soup):#获取总页数    #print(soup.find('script',{'type':'text/javascript'}).text)    tx=soup.find('script',{'type':'text/javascript'}).text    req=re.search(r'pageCount : \d*,',tx).group(0)    req=req[12:-1]#截第12位后字符和倒数1字符之间    #print(req)    return int(req)def pagepro():#URL处理    global Furl    tx1=Turl.split('-')    #['http://bbs.tianya.cn/post', 'stocks', '1131734', '1.shtml']    Furl=tx1[0]+'-'+tx1[1]+'-'+tx1[2]+'-'+'%d.shtml'%lastpage    #print(Furl)    return Furldef pagecollect():#获取内容    soup=bsp(Furl)    #print(soup)    txt=[]    times=[]    lzpost=soup.findAll('div',{'_host':lzname})    #print(lzpost[0])    if lzpost==[]:        pass    else:        for i in range(len(lzpost)):            ntime=lzpost[i].find('div',{'class':'atl-info'}).text #获取时间            #times.append(ntime)            #print(times)            post=lzpost[i].find('div',{'class':'atl-content'})#一级搜索            post=post.find('div',{'class':'bbs-content'}).text#二级搜索            post=post.strip()            #print(post)            txt.append(ntime)            txt.append(post)    return txtdef formatprint(txt):#输出处理    if txt==[]:        print('===========None============')    else:        for i in range(0,len(txt),2): #数组【时间,内容,时间……】            print('='*30)            print(txt[i])            txt[i+1]=txt[i+1].replace('-'*29,'\n----------\n')            print(txt[i+1])            print('='*30)            #time.sleep(0.5) #显示间隔            passdef main():    #Turl=input('>>')    global refushtime,lastpage,Furl,Turl    soup=bsp(Turl)    title=re.sub('_.*','=====',soup.title.text)    print('=====',title)    lastpage=pagnum(soup)    print('LastPage:',lastpage)    Furl=pagepro() #合成最新URL    print('LastURL:',Furl)    lzname=soup.find('div',{'class':'atl-menu clearfix js-bbs-act'})['js_activityusername']    print('Lzname:',lzname)    formatprint(pagecollect()) #第一次输出    while True:        time.sleep(refushtime)        soup=bsp(Turl)  #刷新        newtime=pagnum(soup)        if newtime > lastpage:            print('LastPage:',newtime)            lastpage=newtime            Furl=pagepro()            formatprint(pagecollect())        else:            os.system('cls')            formatprint(pagecollect())        print('==========Refush==========')if __name__ == '__main__':    soup=bsp(Turl)    title=re.sub('_.*','=====',soup.title.text)    print('=====',title)    lastpage=pagnum(soup)    print('LastPage:',lastpage)    Furl=pagepro() #合成最新URL    print('LastURL:',Furl)    lzname=soup.find('div',{'class':'atl-menu clearfix js-bbs-act'})['js_activityusername']    print('Lzname:',lzname)    formatprint(pagecollect()) #第一次输出    while True:        time.sleep(refushtime)        #input('Go to Refush')        soup=bsp(Turl)  #刷新        newtime=pagnum(soup)        if newtime > lastpage:            print('LastPage:',newtime)            lastpage=newtime            Furl=pagepro()            formatprint(pagecollect())        else:            os.system('cls')            print('==========Refush==========')            formatprint(pagecollect())

评论关闭