批量下载51voa的文本和MP3,51voa文本mp3,[Python]代码My


[Python]代码

Myvoa.py文件 主要为Myvoa class#! /usr/bin/env pythonimport osimport sysimport urllib.request as reqclass Myvoa:    '''    download texts and mp3 of http://www.51voa.com    '''    def __init__(self):        self.basicurl="http://www.51voa.com"        self.basicdir=os.curdir        self.urls=[]    def savefile(self,name,url=-1,data=-1):        path=os.curdir        name=name.replace(os.sep,'')        target=path+os.sep+name        print(target)        if os.path.isfile(target):            pass        else:                        if url!=-1:                fp=open(path+os.sep+name,'wb')                try:                    res=req.urlopen(url)                    fp.write(res.read())                finally:                    fp.close()            elif data!=-1:                fp=open(path+os.sep+name,'w')                try:                    print(len(data))                    data=data.replace(r"\n",'')                    data=data.replace("<br />",'\n')                    linestart='<p>'                    lineend='</p>'                    i=data.find(linestart)                    temp=[]                    while(i!=-1):                                        j=data.find(lineend)                        temp=data[i+3:j]                        fp.write(temp+'\n')                                       data=data[j+4:]                        i=data.find(linestart)                                finally:                    fp.close()    def dlvoa(self,url):                res=req.urlopen(url)        data=res.read()        res.close()        startag='''<div id="title">'''        endtag='''<div id="Bottom_VOA">'''        data=str(data)        data2=data[data.find(startag)+16:data.find(endtag)]        title=data2[:data2.find("</div>")]        mp3url=self.basicurl+data2[data2.find('''<a href="''')+9:data2.find('''.mp3">''')+4]        self.savefile(title+'.mp3',url=mp3url)        self.savefile(title+'.txt',data=data2)    def geturls(self):        exurl="/VOA_Standard_English"        res=req.urlopen(self.basicurl+exurl)        data=res.read()        res.close()        startag='''<ul><li>'''        endtag='''</li></ul>'''        data=str(data)        data=data[data.find(startag)+8:data.find(endtag)]        linestart='<a href='        lineend='target='        i=data.find(linestart)        while(i!=-1):            j=data.find(lineend)            self.urls=self.urls+[self.basicurl+data[i+9:j-2]]            data=data[j+5:]            i=data.find(linestart)    def dlurls(self):        if self.urls==[]:            self.geturls()        list(map(self.dlvoa,self.urls))

[Python]代码

test.py 文件 测试用#! /usr/bin/env pythonfrom Myvoa import *temp=Myvoa()temp.geturls()temp.dlurls()

评论关闭