Elasticsearch之pythonAPI简单使用,,elasticsea


elasticsearch自动补全建议功能

数据入库操作

ESmapping要求
PUT music{    "mappings": {        "_doc" : {            "properties" : {                "suggest" : {                    "type" : "completion"                },                "title" : {                    "type": "keyword"                }            }        }    }}

DocType类

from elasticsearch_dsl import DocType, Date, Nested, Boolean,     analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integerfrom elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzerfrom elasticsearch_dsl.connections import connectionsconnections.create_connection(hosts=["localhost"])class CustomAnalyzer(_CustomAnalyzer):    """        避免ik_analyzer参数传递时会报错的问题    """    def get_analysis_definition(self):        return {}ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])class ArticleType(DocType):    suggest = Completion(analyzer=ik_analyzer)    ... 

Items类

from models.es_types import ArticleTypefrom elasticsearch_dsl.connections import connectionses = connections.create_connection(ArticleType._doc_type.using)def gen_suggests(index, info_tuple):    # 根据字符串生成搜索建议数组    used_words = set()    suggests = []    for text, weight in info_tuple:        if text:            # 调用es的analyze接口分析字符串            words = es.indices.analyze(index=index, analyzer="ik_max_word", params={‘filter‘:["lowercase"]}, body=text)            anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])            new_words = anylyzed_words - used_words        else:            new_words = set()        if new_words:            suggests.append({"input":list(new_words), "weight":weight})class JobBoleArticleItem(scrapy.Item):    ...    def save_to_es(self):                ...        article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7)))        article.save()        redis_cli.incr("jobbole_count")        return

ES搜索语法

POST myindex/_search?pretty{    "suggest": {        "my-suggest": {            "text": "linux",            "completion": {                "field": "suggest",                "fuzzy": {                    "fuzziness": 2                }            }        }    },    "_source": ["title"]  }

自动补全建议核心代码
# django_views中的写法from search.models import ArticleTypeclass SearchSuggest(View):    def get(self, request):        key_words = request.GET.get(‘s‘,‘‘)        re_datas = []        if key_words:            s = ArticleType.search()            s = s.suggest(‘my_suggest‘, key_words, completion={                "field":"suggest", "fuzzy":{                    "fuzziness":2                },                "size": 10            })            suggestions = s.execute_suggest()            for match in suggestions.my_suggest[0].options:                source = match._source                re_datas.append(source["title"])        return HttpResponse(json.dumps(re_datas), content_type="application/json")

elasticsearch内容搜索功能

数据入库操作  和上面一样搜索核心代码
# django_views中的写法from elasticsearch import Elasticsearchclient = Elasticsearch(hosts=["127.0.0.1"])class SearchView(View):        def get(self, request):        key_words = request.GET.get("q","")        s_type = request.GET.get("s_type", "article")        page = request.GET.get("p", "1")        try:            page = int(page)        except:            page = 1        start_time = datetime.now()        response = client.search(            index= "jobbole",            body={                "query":{                    "multi_match":{                        "query":key_words,                        "fields":["tags", "title", "content"]                    }                },                "from":(page-1)*10,                "size":10,                "highlight": {                    "pre_tags": [‘<span class="keyWord">‘],                    "post_tags": [‘</span>‘],                    "fields": {                        "title": {},                        "content": {},                    }                }            }        )        end_time = datetime.now()        last_seconds = (end_time-start_time).total_seconds()        total_nums = response["hits"]["total"]        if (page%10) > 0:            page_nums = int(total_nums/10) +1        else:            page_nums = int(total_nums/10)        hit_list = []        for hit in response["hits"]["hits"]:            hit_dict = {}            if "title" in hit["highlight"]:                hit_dict["title"] = "".join(hit["highlight"]["title"])            else:                hit_dict["title"] = hit["_source"]["title"]            if "content" in hit["highlight"]:                hit_dict["content"] = "".join(hit["highlight"]["content"])[:500]            else:                hit_dict["content"] = hit["_source"]["content"][:500]            hit_dict["create_date"] = hit["_source"]["create_date"]            hit_dict["url"] = hit["_source"]["url"]            hit_dict["score"] = hit["_score"]            hit_list.append(hit_dict)        return render(request, "result.html", {"page":page,                                            "all_hits":hit_list,                                            "key_words":key_words,                                            "total_nums":total_nums,                                            "page_nums":page_nums,                                            "last_seconds":last_seconds                                            })

scrapy框架+django框架组合使用

github项目参考

https://github.com/holgerd77/django-dynamic-scraper

Elasticsearch之pythonAPI简单使用

评论关闭