Python有道字典查询单词,python有道字典,在Ubuntu下,Pyt


在Ubuntu下,Python有道字典查询单词,想在终端查询不认识的单词,所以就利用了Python语言,通过正则表达式等知识,提取出有道的翻译结果。

1.[文件] WebDict.py~4KB

#----------------------------------------------------------------# -*- coding: utf-8 -*-#!/usr/bin/env python#----------------------------------------------------------------#   Author : Scharfsinnig##   E-Mail : scharfsinnig@163.com##   File   : WebDict.py##   Introduction:#   脚本主要是对有道网的单词查询网页,进行简单的信息提取任务,找到#   单词的翻译结果。方便自己在终端下对不认识的单词进行查询。#----------------------------------------------------------------import reimport timeimport threadimport urllibimport urllib2from urllib import quoteclass Spider_Youdao:    #初始化    def __init__(self):        #有道网页翻译段        self.Trans_Youdao_Tag = re.compile(r'\s?<li>.*?</li>\s?')        #21世纪大词典段        self.Trans_Shiji_Tag = re.compile(r'\s?<span.*?class="def">.*?</span>')        #退出标志         self.run = True    #获得查询的单词    def SearchWord(self):        S_Word = raw_input("\n#[输入单词]\n>")                return S_Word    #得到URL    def GetUrl(self):        SWord = self.SearchWord()        #加上查询的单词以后        if quote(SWord) == SWord:            MyUrl = "http://dict.youdao.com/search?len=eng&q="+quote(SWord)+"&keyfrom=dict.top"            return MyUrl    #获得页面    def GetPage(self):        #获取URL        Youdao_Url = self.GetUrl()        #伪装成浏览器请求        user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:32.0) Gecko/20100101 Firefox/32.0'        headers = { 'User-Agent' : user_agent }        req = urllib2.Request(Youdao_Url, headers = headers)        Res = urllib2.urlopen(req)        #将其他编码的字符串转换成unicode编码        ResultPage = Res.read().decode("utf-8")        #ResultPage = Res.read()        return ResultPage    #开始提取网页中的信息    def ExtractPage(self):        #获得页面        MyPage = self.GetPage()        #提取有道的基本翻译        YoudaoTrans = self.Trans_Youdao_Tag        #提取21世纪词典的翻译        ShijiTrans = self.Trans_Shiji_Tag        print "--------------------------------------------"         YouDaoTrans = self.Trans_Youdao_Tag        TransYdIterator = YouDaoTrans.finditer(MyPage)        print "#(翻译来自有道词典):"        myItems = re.findall('<div.*?class="trans-container">(.*?)<div id="webTrans" class="trans-wrapper trans-tab">',MyPage,re.S)            for item in myItems:            YDTmp = item        TransYdIterator = YouDaoTrans.finditer(YDTmp)        for iterator in TransYdIterator:            YouDao = iterator.group()            YDTag = re.compile('\s?<.*?>')            print YDTag.sub('',YouDao)        print "--------------------------------------------"         TransSjIterator = ShijiTrans.finditer(MyPage)        print "#(翻译来自21世纪大词典):"        for iterator in TransSjIterator:            ShiJi = iterator.group()            SJTag = re.compile('\s?<.*?>')            print SJTag.sub('',ShiJi)        print "--------------------------------------------"     #启动爬虫    def Start(self):        while self.run:            S_Word = raw_input("\n#[\"!\"号退出.回车继续.]\n>")            if S_Word != "!":                self.ExtractPage()                #thread.start_new_thread(self.ExtractPage,())                #time.sleep(5)            else:                self.run = False                                if __name__ == '__main__':    mydict = Spider_Youdao()    mydict.Start()

2.[图片] 111.png


编橙之家文章,

评论关闭