Python脚本提取谷歌音乐搜索结果，python谷歌,[Python]代码#!

文章由Byrx.net分享于2019-03-23 08:03:14评论（545）
Python脚本提取谷歌音乐搜索结果，python谷歌,[Python]代码#!

[Python]代码
#! /usr/bin/env python#coding=utf-8'''Created on 2011-8-19@author: yaoboyuan'''from urllib import request,parseimport re,sysdef extractSongRawData(text):    '抓取每一首歌的原始数据'    text = re.sub('\n+','',text)    songList = re.findall('\&lt;tbody.*?\&lt;/tbody&gt;',text)    nums = len(songList)    print('search ' + str(nums) + ' songs')    return songListdef translate(text):    '去掉text中的无用字符，转换unicode码'    text = re.sub('\&lt;b&gt;','',text)    text = re.sub('\&lt;/b&gt;','',text)    #find the &amp;#25104; and translate into chinese    s = re.findall('&amp;#([0-9]+);',text)    if len(s)&lt;=0:        return text    else:        text = ''        for i in range(len(s)):            value = int(s[i],10) #from str'123' to 10 base's int 124            text += chr(value)   #from value to char        return textdef extractSongName(song):    '提取歌曲名字'    td = re.findall('(?:\&lt;td class\="Title).*(?:\&lt;/td&gt;)',song)    name = re.findall('.+?\&lt;a.+?&gt;(.*?)\&lt;/a&gt;',td[0])    songName = translate(name[0])    return songNamedef extractAuthorName(song):    '提取作者名字'    td = re.findall('(?:\&lt;td class\="Artist).*(?:\&lt;/td&gt;)',song)    name = re.findall('.+?\&lt;a.+?&gt;(.*?)\&lt;/a&gt;',td[0])    authorName = name[0]    authorName = translate(authorName)    return authorNamedef extrackAlbumName(song):    '提取专辑名字'    td = re.findall('(?:\&lt;td class\="Album).*(?:\&lt;/td&gt;)',song)    name = re.findall('.+?\&lt;a.+?&gt;(.*?)\&lt;/a&gt;',td[0])    albumName = translate(name[0])    return albumNamedef extractID(song):    '提取歌曲id'    td = re.findall('''\&lt;tbody id\="([a-zA-Z0-9]+)"''',song)    if len(td)&gt;0:        return td[0]    else:        return songdef extractLink(song):    '提取歌曲下载链接'    td = re.findall('''\&lt;td class\="Icon.*?(?=title\="下载").*?onclick\="(.*?)&gt;''',song)    if len(td) == 0:        return 'NULL'    s = str(td[0])    rawLink = re.findall('http.*?(?=\?)',s)    if len(rawLink) == 0:        return s    link = rawLink[0]    link = re.sub('%3D','=',link)    id = extractID(song)    return link + '?id=' + iddef extractPageNums(text):    '提取返回结果的页数，最多要10页'    pageList = re.findall('page_link',text)    return len(pageList)def extractSongInfo(song):    '提取歌曲信息，返回歌曲列表'    songList = []    for i in range(len(song)):        songName = extractSongName(song[i])        authorName = extractAuthorName(song[i])        albumName = extrackAlbumName(song[i])        link = extractLink(song[i])        songItem = [songName,authorName,albumName,link]        songList.append(songItem)        index = ''        if i&lt;9:            index = '0' + str(i+1)        else:            index = str(i + 1)        #print(index + '  ' + songName + '  ' + authorName + '  ' + albumName + '  ' + link)    return songListdef main():    while True:        url = 'http://www.google.cn/music/search?q='        key = input('请输入歌曲名字或关键字:')        key = parse.quote(key) #统一编码成utf-8        url += key        mf = request.urlopen(url)        c = mf.readall()        c = str(c,encoding = 'utf-8')        num = extractPageNums(c)        print(str(num+1) + ' pages found')        song = extractSongRawData(c)        songList = extractSongInfo(song)        #if the result great than 2 pages, then request all pages        if num&gt;0:            for i in range(num):                start = (i+1)*20                next_page = '&amp;cat=song&amp;start=%d'%(start)                #next_page = parse.quote(next_page) #统一编码成utf-8                url += next_page                mf = request.urlopen(url)                c = mf.readall()                c = str(c,encoding = 'utf-8')                song = extractSongRawData(c)                songList += extractSongInfo(song) #find all results        for i in range(len(songList)): #print the result            index = ''            if i&lt;9:                index = '0' + str(i+1)            else:                index = str(i + 1)            print(index + '  ' + str(songList[i]))if __name__ == '__main__':    main()
热门文章：
Python脚本提取谷歌音乐搜索结果，python谷歌,[Python]代码#!

Python脚本提取谷歌音乐搜索结果，python谷歌,[Python]代码#!

相关内容

最新python源码实例

python~HOT