python对xapian的一个简单封装类,pythonxapian封装,import xapia


import xapian, configfrom mmseg.search import seg_txt_2_dictclass Xapian():    """xapian search class """    def __init__(self):        """init xapian search class        :returns: class        """        self.db = xapian.WritableDatabase(config.xapian_index_dir, xapian.DB_CREATE_OR_OPEN)        self.enquire = xapian.Enquire(self.db)        self.enquire.set_sort_by_value(1, True)    def get_document(self, id):        """获取doc        :id: id        :returns: Document        """        return self.db.get_document(id)    def delete_document(self,id):        """删除索引        :id: 索引id        """        try:            return self.db.delete_document(id)        except:            return None    def update_index(self, id, text=None, values=None, data=None):        """更新索引        :id: 要替换的id        :doc: 新的doc        """        try:            doc = self.get_document(id)        except:            return False        if text:            doc.clear_terms()#清除terms            for word, value in seg_txt_2_dict(text).iteritems():                doc.add_term(word)        if values:            doc.clear_values()            for key, value in values.iteritems():                doc.add_value(key, value)        if data:            doc.set_data(data)        try:            self.db.replace_document(id, doc)            return True        except:            return False    def index(self, id, text, values={}, data=''):        """index to xapian        :id: data id        :text: search content is utf-8        :returns: boolean        """        doc = xapian.Document()        for word, value in seg_txt_2_dict(text).iteritems():            print word, value            doc.add_term(word)        #添加value用于排序,key似乎只能是数字        for key, value in values.iteritems():            doc.add_value(key, value)        if data:            doc.set_data(data)        try:            self.db.replace_document(id, doc)            return True        except:            return False    def search(self, keywords, offset=0, limit=10):        """search xapian        :keywords: 搜索的关键字        :offset: 起始位置        :limit: 结束位置        :returns: matches对象        """        query_list = []        for word, value in seg_txt_2_dict(keywords.encode('utf-8')).iteritems():            query = xapian.Query(word)            query_list.append(query)        if len(query_list) != 1:            query = xapian.Query(xapian.Query.OP_AND, query_list)        else:            query = query_list[0]        self.enquire.set_query(query)        matches = self.enquire.get_mset(offset, limit, 10000)        return matches    def flush(self):        """flush to disk        :returns: flush结果        """        return self.db.flush()search = Xapian()

评论关闭