Elasticsearch之pythonAPI简单使用,,elasticsea
Elasticsearch之pythonAPI简单使用,,elasticsea
elasticsearch自动补全建议功能
数据入库操作
ESmapping要求PUT music{ "mappings": { "_doc" : { "properties" : { "suggest" : { "type" : "completion" }, "title" : { "type": "keyword" } } } }}
DocType类
from elasticsearch_dsl import DocType, Date, Nested, Boolean, analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integerfrom elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzerfrom elasticsearch_dsl.connections import connectionsconnections.create_connection(hosts=["localhost"])class CustomAnalyzer(_CustomAnalyzer): """ 避免ik_analyzer参数传递时会报错的问题 """ def get_analysis_definition(self): return {}ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])class ArticleType(DocType): suggest = Completion(analyzer=ik_analyzer) ...
Items类
from models.es_types import ArticleTypefrom elasticsearch_dsl.connections import connectionses = connections.create_connection(ArticleType._doc_type.using)def gen_suggests(index, info_tuple): # 根据字符串生成搜索建议数组 used_words = set() suggests = [] for text, weight in info_tuple: if text: # 调用es的analyze接口分析字符串 words = es.indices.analyze(index=index, analyzer="ik_max_word", params={‘filter‘:["lowercase"]}, body=text) anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1]) new_words = anylyzed_words - used_words else: new_words = set() if new_words: suggests.append({"input":list(new_words), "weight":weight})class JobBoleArticleItem(scrapy.Item): ... def save_to_es(self): ... article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title,10),(article.tags, 7))) article.save() redis_cli.incr("jobbole_count") return
ES搜索语法
POST myindex/_search?pretty{ "suggest": { "my-suggest": { "text": "linux", "completion": { "field": "suggest", "fuzzy": { "fuzziness": 2 } } } }, "_source": ["title"] }自动补全建议核心代码
# django_views中的写法from search.models import ArticleTypeclass SearchSuggest(View): def get(self, request): key_words = request.GET.get(‘s‘,‘‘) re_datas = [] if key_words: s = ArticleType.search() s = s.suggest(‘my_suggest‘, key_words, completion={ "field":"suggest", "fuzzy":{ "fuzziness":2 }, "size": 10 }) suggestions = s.execute_suggest() for match in suggestions.my_suggest[0].options: source = match._source re_datas.append(source["title"]) return HttpResponse(json.dumps(re_datas), content_type="application/json")
elasticsearch内容搜索功能
数据入库操作 和上面一样搜索核心代码# django_views中的写法from elasticsearch import Elasticsearchclient = Elasticsearch(hosts=["127.0.0.1"])class SearchView(View): def get(self, request): key_words = request.GET.get("q","") s_type = request.GET.get("s_type", "article") page = request.GET.get("p", "1") try: page = int(page) except: page = 1 start_time = datetime.now() response = client.search( index= "jobbole", body={ "query":{ "multi_match":{ "query":key_words, "fields":["tags", "title", "content"] } }, "from":(page-1)*10, "size":10, "highlight": { "pre_tags": [‘<span class="keyWord">‘], "post_tags": [‘</span>‘], "fields": { "title": {}, "content": {}, } } } ) end_time = datetime.now() last_seconds = (end_time-start_time).total_seconds() total_nums = response["hits"]["total"] if (page%10) > 0: page_nums = int(total_nums/10) +1 else: page_nums = int(total_nums/10) hit_list = [] for hit in response["hits"]["hits"]: hit_dict = {} if "title" in hit["highlight"]: hit_dict["title"] = "".join(hit["highlight"]["title"]) else: hit_dict["title"] = hit["_source"]["title"] if "content" in hit["highlight"]: hit_dict["content"] = "".join(hit["highlight"]["content"])[:500] else: hit_dict["content"] = hit["_source"]["content"][:500] hit_dict["create_date"] = hit["_source"]["create_date"] hit_dict["url"] = hit["_source"]["url"] hit_dict["score"] = hit["_score"] hit_list.append(hit_dict) return render(request, "result.html", {"page":page, "all_hits":hit_list, "key_words":key_words, "total_nums":total_nums, "page_nums":page_nums, "last_seconds":last_seconds })
scrapy框架+django框架组合使用
github项目参考
https://github.com/holgerd77/django-dynamic-scraper
Elasticsearch之pythonAPI简单使用
相关内容
- python学习之-propetry装饰器,,propetry是一
- appnium定位+操作方式(python),,1.定位:A.利用A
- python3学习(3),,练习题:1.?26个
- python对kafka的基本操作,,-- coding:
- Python数据挖掘—分类—随机森林,,概念随机森林(Ran
- 9-->>python3,,判断一个整数是否是回
- python的return,,关于python的r
- Python的一些版本分析,,Python 2.6
- Python操作excel的方法总结(xlrd、xlwt、openpyxl),
- 选择 Python3.6 还是 Python 3.7,,转自 白月黑羽Pyt
评论关闭