Python viterbi算法,pythonviterbi算法,[Python]代码de


[Python]代码

def InitDicForViterbi(nodes,posw,posdi,n):       newWordList = []       # 解决未登录词       for i in nodes:           if not posw.has_key(i):               newWordList.append(i)       maxPosList = NPos(posdi,n)       print maxPosList       GenerateDicPos(newWordList,maxPosList,posw,posdi)   def InitViterbi(node,posw,posdi):       viStatePath = []       for i in posw[node]:           if i <> "@@@":               print "i:",i               viStatePath.append([posw[node][i]*posdi[i]["@@@"],[i]])       return viStatePath  

[Python]代码

""" nodes 就是分好的词, posw 是词转换为词性的概率,posdi是词性之间的转换概率,n 是n个最大的词性将此用于未登录词中,      weightNone是未出现的词性转移的概率      nodes format {word1,word2...} , posw format {word1:{pos1:fre,pos2:fre,..."@@@":totalnum},..."@@@":total}      posdi format {pos1:{pos2:fre,pos3:fre...."@@@":total},...."@@@":total}      """  def Viterbi(nodes,posw,posdi,n,weightNone):       InitDicForViterbi(nodes,posw,posdi,n)       viStatePath = InitViterbi(nodes[0],posw,posdi)       length = len(nodes)       currentNode = 1      while currentNode < length:           currentPosList = posw[nodes[currentNode]]           paths = []          # print "vstate:",viStatePath           for k in currentPosList:               if k <> "@@@":                   ajk = weightNone                   heap = []                   for j in xrange(len(viStatePath)):                       # compute every state j to every state k in ti            #          temppath = viStatePath[j]            #           print "lastpos:",temppath                       lastpos = viStatePath[j][1][-1]                       lastweight = viStatePath[j][0]                       lastposList = posdi[lastpos]                       if lastposList.has_key(k):                               ajk =lastposList[k]                       currentweight = lastweight * ajk * currentPosList[k]           #            print "viStatePath:",viStatePath[j][1]                       pathNew = [data for data in viStatePath[j][1]]                       pathNew.append(k)          #             print "pathNew:",pathNew                       heappush(heap,[currentweight,pathNew])                    # get the max possibility of state k in ti         #          print "path:",path                   paths.append(nlargest(1,heap)[0])           del viStatePath           viStatePath = paths       #    print "paths:",paths           currentNode = currentNode + 1      heap = []       # get the max possibility path       for i in viStatePath:           heappush(heap,i)       return nlargest(1,heap) 

[Python]代码

"""nodes format {word1,word2,...} path is [weight,[pos1,pos2....]]"""  def Result(nodes,path,edcode="utf-8"):       realPath = path[0][1]       ResultPrint(nodes,realPath,edcode)   """nodes format {word1,word2,...} path is [pos1,pos2....]"""  def ResultPrint(nodes,path,edcode="utf-8"):       for i in xrange(len(nodes)):           print nodes[i].decode(edcode),"/",path[i].decode(edcode)  

[Python]代码

aa = ConvertGBKtoUTF("球球")       bb = ConvertGBKtoUTF("娃娃")   cc = ConvertGBKtoUTF("吃饭")   dd = ConvertGBKtoUTF("好")   ee = ConvertGBKtoUTF("dddwieoewkem")   dictions = {aa:{bb:1,"@@@":4},bb:{cc:2,aa:3,"@@@":40},"@@@":400}   posdi = {"n":{"s":3,"v":3,"@@@":40},"s":{"v":2,"e":3,"@@@":33},"v":{"@@@":1},"@@@":100}   posw = {aa:{"n":1,"v":3,"@@@":29},bb:{"n":1,"s":1,"@@@":19},cc:{"n":1,"@@@":1},"@@@":10002}   nodes = [aa,bb,cc,ee,dd,bb,aa,cc]   path = Viterbi(nodes,posw,posdi,3,0.01)   print path   Result(nodes,path)  

评论关闭