es查询与聚合



"""

官方文档：https://www.elastic.co/guide/cn/elasticsearch/guide/current/aggregations.html

官方文档：https://elasticsearch-dsl.readthedocs.io/en/latest/search_dsl.html

参考：https://blog.csdn.net/hanyuyang19940104/article/details/81668880中的bug解决方案

可参考：https://blog.csdn.net/junfeng666/article/details/78251788

可参考： https://linux.ctolib.com/elasticsearch-dsl-py.html

"""

# metric的方法有sum、avg、max、min, value_count等等

import time

from elasticsearch import Elasticsearch

from elasticsearch_dsl import Search, Q, A

from elasticsearch.helpers import bulk

import requests

import json

es = Elasticsearch(['localhost'], port=9200)

dict_1 = {"name": "test", "ac": "bob", "address": {"city":"shanghai"}}

dict_2 = [

    {"name":'bob', "age":100, "ac":"sssssss"},

    {"name":'marry', "age":110, "ac":"i am marry"},

    {"name":'lili', "age":155, "ac":"helloworld"},

          ]

def get_data_by_id():

    return es.get(index="bank", doc_type="account", id='qwe')

def query_data():

    res = es.search(index="bank", doc_type="account")

    return res

def index_data():

    return es.index(index="bank", doc_type="account", body=dict_1)

def bulk_data(data=None):

    if not data:

        data = dict_2

    actions = []

    # '_op_type':'index',#操作 index update create delete

    for i in data:

        action = {

            '_op_type': 'index',  # 操作 index update create delete

            # '_index': "bank",

            '_index': "cars",

            "_type": "transactions",

            # "_type": "account",

            "_source": i

        }

        actions.append(action)

    success, _ = bulk(es, actions=actions, raise_on_error=True)

    return success

def Q_func():

    # 官方文档：https://elasticsearch-dsl.readthedocs.io/en/latest/search_dsl.html

    # q = Q("multi_match", query="bob", fields=["name", 'ac'])

    s = Search(using=es, index="bank")

    # Q("match", title='python') & Q("match", title='django')

    s.query = Q('bool', must=[Q('match', name='bob'), Q('match', ac='bob')])  # name=bob且ac=bob

    # s.query = Q('bool', must=[Q('match', name='bob')])

    res_3 = s.query().execute()

    print(res_3)

    print(len(res_3))

    # <Response: [<Hit(bank/account/a_AJWGYB6B4UEZt2YIRu): {'name': 'marry', 'age': 10, 'ac': 'i am marry'}>

def q_search():

    # .source(["address"])可以指定返回字段

    s = Search(using=es, index="bank")

    # s = s.filter('term', category__keyword='Python')

    s = s.query('match', address__city='shanghai')  # 查二级数据

    # data为dict_1 = {"name": "test", "ac": "bob", "address": {"city":"shanghai"}}

    res = s.execute()

    print(res)

# 聚合：

def A_func():

    s = Search(using=es, index="bank")

    # a = A('terms', field='name')

    # s.aggs.bucket("term_name", "terms", field='name')

    # res =a.metric('clicks_per_category', 'sum', field='clicks') \

    #     .bucket('tags_per_category', 'terms', field='tags')

    s.aggs.bucket('sum_age', 'match', field='name') \

        .metric("max_age", "sum", script="doc['downFlux'].value+doc['upFlux'].value")

    # .metric("max_age", "sum", field='age')

    # s.aggs.bucket('sum_age', 'terms', field='name')  # 参数为group_name, 方法, 栏

    # s.aggs.metric('max_age', 'max', field='age')

    # s.aggs.bucket('per_name', 'terms', field='name') \

    #     .metric('max_age', 'max', field='age')

    res = s.execute()

    for i in res:

        print(i)

    print(len(res))

    # a = {'terms': {'field': 'name'}}

    # {

    #   'terms': {'field': 'category'},

    #   'aggs': {

    #     'clicks_per_category': {'sum': {'field': 'clicks'}},

    #     'tags_per_category': {'terms': {'field': 'tags'}}

    #   }

    # }

# index_data()

# q_search()

# A_func()

# print(bulk_data())

def curl_es():

    data = [

        {"price": 10000, "color": "red", "make": "honda", "sold": "2014-10-28"},

        {"price": 20000, "color": "red", "make": "honda", "sold": "2014-11-05"},

        {"price": 30000, "color": "green", "make": "ford", "sold": "2014-05-18"},

        {"price": 15000, "color": "blue", "make": "toyota", "sold": "2014-07-02"},

        {"price": 12000, "color": "green", "make": "toyota", "sold": "2014-08-19"},

        {"price": 20000, "color": "red", "make": "honda", "sold": "2014-11-05"},

        {"price": 80000, "color": "red", "make": "bmw", "sold": "2014-01-01"},

        {"price": 25000, "color": "blue", "make": "ford", "sold": "2014-02-12"},

    ]

    body = {

        "size": 0,

        "aggs": {

            "popular_colors": {

                "terms": {

                    "field": "color.keyword"

                }

            }

        }

    }

    res = es.search(index="cars", doc_type="transactions", body=body)

    print(res)

    # for key, i in res:

    #     print(key, i)

def agg_es():

    #

    # s = Search(using=es, index="cars", doc_type='transactions').extra(size=0)  ### 注意这里size=0可加快查询速度

    s = Search(using=es, index="cars", doc_type='transactions')

    # metric的方法有sum、avg、max、min, value_count等等

    # bucket的size参数只返回1个bucket桶

    # 加上size=1000返回的数据不会只有10条

    s.aggs.bucket('test', 'terms', field='color.keyword',size=1000).metric("sum_test", 'count', field='make.keyword')

    # metric("max_age", "sum", script="doc['downFlux'].value+doc['upFlux'].value")

    print(s.to_dict(),'\n')

    res = s.execute()

    print(res)

    print(res.aggregations)

    print(res.to_dict())

    '''

    {'_index': 'cars', '_type': 'transactions', '_id': 'fPDTW2YB6B4UEZt2CYQ_', '_score': 1.0,

          '_source': {'price': 20000, 'color': 'red', 'make': 'honda', 'sold': '2014-11-05'}}]}, 'aggregations': {

        'test': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0,

                 'buckets': [{'key': 'red', 'doc_count': 4, 'sum_test': {'value': 130000.0}},

                             {'key': 'blue', 'doc_count': 2, 'sum_test': {'value': 40000.0}},

                             {'key': 'green', 'doc_count': 2, 'sum_test': {'value': 42000.0}}]}}}

    '''

if __name__ == "__main__":

    agg_es()

# doc_count:查询出的记录条数,与聚合后的buckets的list 长度不同

es查询与聚合的更多相关文章

es查询，聚合、平均值、值范围、cardinality去重查询
原文:https://blog.csdn.net/sxf_123456/article/details/78195829 普通查询 GET ana-apk/_search { "query& ...
ElasticSearch 学习记录之ES查询添加排序字段和使用missing或existing字段查询
ES添加排序在默认的情况下,ES 是根据文档的得分score来进行文档额排序的.但是自己可以根据自己的针对一些字段进行排序.就像下面的查询脚本一样.下面的这个查询是根据productid这个值进行排 ...
Elasticsearch(8) --- 聚合查询(Metric聚合)
Elasticsearch(8) --- 聚合查询(Metric聚合) 在Mysql中,我们可以获取一组数据的最大值(Max).最小值(Min).同样我们能够对这组数据进行分组(Group).那么 ...
Elasticsearch(9) --- 聚合查询(Bucket聚合)
Elasticsearch(9) --- 聚合查询(Bucket聚合) 上一篇讲了Elasticsearch聚合查询中的Metric聚合:Elasticsearch(8) --- 聚合查询(Metri ...
Es查询工具使用
Kibana按照索引过滤数据 1.创建索引模式 2.查询索引中的数据 Es查询不返回数据创建索引的时候指定mapping mappings={ "mappings": { &qu ...
ES查询语句
记录常用的es 查询聚合 GET _cat / indices GET / p_ext_develop / _mapping / g GET / p_ext_develop / _analyze { ...
ElasticSearch的高级复杂查询：非聚合查询和聚合查询
一.非聚合复杂查询(这儿展示了非聚合复杂查询的常用流程) 查询条件QueryBuilder的构建方法 1.1 精确查询(必须完全匹配上,相当于SQL语句中的“=”) ① 单个匹配 termQuery ...
java查询elasticsearch聚合
java查es多分组聚合: SearchRequestBuilder requestBuilderOfLastMonth = transportClient.prepareSearch(TYPE_NA ...
Elasticsearch使用系列-基本查询和聚合查询+sql插件
Elasticsearch使用系列-ES简介和环境搭建 Elasticsearch使用系列-ES增删查改基本操作+ik分词 Elasticsearch使用系列-基本查询和聚合查询+sql插件 Elas ...

随机推荐

快速平方根算法的javascript实现
前几天看见了一个来自雷神之槌的平方根源码,原理多方有介绍,不赘述. 源码是c语言写的,我思考后发现这样的算法在javascript中也是可以完成的. function InvSqrt(x){ var ...
虚拟现实-VR-UE4-构建光照显示光照构建失败，Swarm启动失败
闲的无聊折腾,发现想构建光照的时候,总是显示失败如下图百度许久,有大神指出是我在编译源码的的时候没有将其中的某个模块编译进去,只需要重新编译摸个模块就好在UE4 的sln文件下,会看到一个Unr ...
常用模块（chardet）
作用:检测二进制的编码格式,不是百分百正确 import chardet f = open('test.txt', 'rb')data = f.read()print(data)result = ch ...
LeetCode 33——搜索旋转排序数组
1. 题目 2. 解答 2.1. 方法一直接进行二分查找,在判断查找方向的时候详细分类. 当 nums[mid] < target 时, 若 nums[left] <= nums[mid ...
linux消息队列通信
IPC机制进程间通信机制(Inter Process Communication,IPC),这些IPC机制的存在使UNIX在进程通信领域手段相当丰富,也使得程序员在开发一个由多个进程协作的任务组成的 ...
http长连接和短连接以及连接的本职
HTTP长连接和短连接原理浅析本文主要讲了,http长连接本质是tcp的长连接. 网络通信过程中,建立连接的本质是什么? 连接的本质建立连接这个词,是从早期的电话系统中来的,那个时候,“建立连接” ...
Mac下离线安装SDK
背景之前电脑上使用的是Android Studio,其sdk在Libarey下,最近需要在Eclipse下继续做之前的安卓项目,在配置sdk时eclipse自动选择了之前Android Studio ...
PTA循环，函数，数组作业
PTA循环实验作业题目一:统计素数并求和 ### 1.PTA提交列表 2.设计思路(+流程图) 先定义变量(包含素数区间,循环次数,除数,素数个数记录和和的记录) 输入范围一重循环:循环提取自然数 ...
C - 安装雷达
C - 安装雷达 Time Limit: 1000/1000MS (C++/Others) Memory Limit: 65536/65536KB (C++/Others) Problem Descr ...
hdu 1575 Tr A (二分矩阵)
Tr A Time Limit: 1000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total Submis ...

es查询与聚合

es查询与聚合的更多相关文章

随机推荐

热门专题