ElasticSearch how to do a sub aggregation in a sum aggregation - python

Hello I have an index in ElasticSearch with:
Plant, Department, Date, Value
I am trying to do a query in elasticsearch
1) Group by Plant and Date in specific departments and sum Value:
es = Elasticsearch('elasticsearch:9200')
body = Dict({"query": {
"bool": {
"must_not": {
"match": {
"Department": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword", "size":10000},
"aggs": {
"group_date": {
"terms": {
"field": "Date"},
"aggs": {
"group_value": {
"sum":{
"field": "Value"}}}}}}}})
2) Group by Plant and Range of Dates, and get avg and median:
es = Elasticsearch('elasticsearch:9200')
body = Dict(
{"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword",
"size": 10000},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": datelist},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [25]}},
"Mean": {
"avg": {
"field":
"Value}}}}}}}})
it works too but in this case i didn't do the grouping by plant and date before, so mixing both i have something like:
body = Dict({"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword", "size":10000},
"aggs": {
"group_date": {
"terms": {
"field": "Date"},
"aggs": {
"group_value": {
"sum":{
"field": "Value"},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": datelist},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [25]}},
"Mean": {
"avg": {
"field":
"Value"}}}}}}}}}}}})
res = es.search(index=self.index, doc_type='test', body=body)
I have this:
TransportError: TransportError(500, 'aggregation_initialization_exception', 'Aggregator [group_value] of type [sum] cannot accept sub-aggregations')
So it exists a way to do this?
if it could help my code python before was:
data = test[~test.Department.str.startswith('Indirect')]
group1 = data.groupby(['Plant', 'Date'])['Value'].sum()
group2 = pd.DataFrame(group1.reset_index()).groupby(['Plant', pd.Grouper(key='Date', freq='W')])['Value'].median()

The error is clear:"Aggregator [group_value] of type [sum] cannot accept sub-aggregations"
When you do 'sum' aggregation you can't split the result anymore.
So you'd better change the position of sum aggs.
i.e.:
{
"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"
}
}
}
},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword",
"size": 10000
},
"aggs": {
"group_date": {
"terms": {
"field": "Date"
},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": "sdf"
},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [
25
]
}
},
"aggs": {
"group_value": {
"sum": {
"field": "Value"
}
}
}
}
}
}
}
}
}

Related

elasticsearch doesn't raise score when matching on n-gram inside should clause

i'm trying to search an item inside elastic search, and raise the score in case i find an sku or something like that.
this is my index configuration:
{
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 3,
"token_chars": [
"letter",
"digit",
"symbol",
"whitespace",
"punctuation"
]
}
}
}
},
"mappings": {
"properties": {
"job_desc": {
"type": "text",
"analyzer": "ngram_analyzer"
},
"sku": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_price": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_type": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
}
}
}
}
and this is my query to find the item:
{
"bool": {
"must": {
"multi_match": {
"query": "<name of item & possibly sku>",
"fields": [
"name",
"sku"
]
}
},
"should": {
"match": {
"job_desc": {
"query": "424241 (sku of item)",
"analyzer": "ngram_analyzer"
}
}
}
}
}
but for some reason, the "should" clause does not raise the score of the search.
what is the issue here?
thanks in advance,
Yaniv Akiva

Why i'm getting null value instead of aggregated response?

I'm trying to perform min aggregation using nested aggregation in elasticsearch but still getting null values..
GET /my_index/_search
{
"query": {
"match": {
"FirstName": "Cheryl"
}
},
"aggs": {
"art": {
"nested": {
"path": "art"
},
"aggs": {
"min_price": {
"min": {
"field": "art.Income"
}
}
}
}
}
}
Mappings :
{
"mappings": {
"properties": {
"art": {
"type": "nested",
"properties": {
"FirstName": {
"type": "text"
},
"Price": {
"type": "integer"
}
}
}
}
}
}

Unhashable type 'dict' when trying to send an Elasticsearch

I keep on getting the following error in Python
Exception has occurred: TypeError unhashable type: 'dict'
on line 92
"should": [],
"must_not": []
This is the query string
res = es.search(
scroll = '2m',
index = "logstash-*",
body = {
{
"aggs": {
"2": {
"terms": {
"field": "src_ip.keyword",
"size": 50,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"cardinality": {
"field": "src_ip.keyword"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "flow.start",
"format": "date_time"
},
{
"field": "timestamp",
"format": "date_time"
},
{
"field": "tls.notafter",
"format": "date_time"
},
{
"field": "tls.notbefore",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": 1555777931992,
"lte": 1558369931992,
"format": "epoch_millis"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
}
})
the value of body is a set ({ } without key-value is a set literal, e.g., {1,2} is a set). Inside this set you have a dictionary.
Items in a set have to be hashable, and dictionary isn't.
As the comment from #Carcigenicate says, it seems like a typo of having {{ }} instead of { } for the value of body.
Elasticsearch documentation shows that body should be a dictionary.
More about sets from python docs

Build relation between elasticsearch aggregators - Nested groupings

I need to create nested groupings between fields.
Let us consider the example given below,
Documents:
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Chennai"
},
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Trichy"
},
{
"keyword": "abc",
"country": "IN",
"state": "KL",
"city": "TVM"
},
{
"keyword": "abc",
"country": "US",
"state": "Cal",
"city": "California"
}
Required output(Something like this):
{
"country": "IN",
"TN": [
"Chennai",
"Trichy"
],
"KL": [
"TVM"
]
},
{
"country": "US",
"Cal": [
"California"
]
}
Query used:
{
"from": 0,
"size": 1,
"aggs": {
"country": {
"terms": {
"field": "country.keyword",
"size": 50000
}
},
"state": {
"terms": {
"field": "state.keyword",
"size": 50000
}
},
"city": {
"terms": {
"field": "city.keyword",
"size": 50000
}
}
},
"query": {
"query_string": {
"query": "(keyword:abc) "
}
}
}
For this query I got separate bucket as output for city , state and country.
But what I need is city should be grouped under state and state should be grouped under country.
Thanks in advance.
Following query with aggregation should work for you
{
"query": {
"query_string": {
"query": "(keyword:abc)"
}
},
"size": 0,
"aggs": {
"country_agg": {
"terms": {
"field": "country.keyword",
"size": 10
},
"aggs": {
"state_agg": {
"terms": {
"field": "state.keyword",
"size": 10
},
"aggs": {
"city_agg": {
"terms": {
"field": "city.keyword",
"size": 10
}
}
}
}
}
}
}
}

Sum for Multiple Ranges on GroupBy Aggregations in Elasticsearch

The following mapping is aggregated on multiple levels on a field grouping documents using another field.
Mapping:
{
'predictions': {
'properties': {
'Company':{'type':'string'},
'TxnsId':{'type':'string'},
'Emp':{'type':'string'},
'Amount':{'type':'float'},
'Cash/online':{'type':'string'},
'items':{'type':'float'},
'timestamp':{'type':'date'}
}
}
}
My requirement is bit complex, I need to
For each Emp (Getting the distinct employees)
Check whether it is online or cashed transaction
Group by items with the ranges like 0-10,11-20,21-30....
Sum the Amount
Final Output is like:
>Emp-online-range-Amount
>a-online-(0-10)-1240$
>a-online-(21-30)-3543$
>b-online-(0-10)-2345$
>b-online-(11-20)-3456$
Something like this should do the job:
{
"size": 0,
"aggs": {
"by_emp": {
"terms": {
"field": "Emp"
},
"aggs": {
"cash_online": {
"filters": {
"filters": {
"cashed": {
"term": {
"Cash/online": "cached"
}
},
"online": {
"term": {
"Cash/online": "online"
}
}
}
},
"aggs": {
"ranges": {
"range": {
"field": "items",
"ranges": [
{
"from": 0,
"to": 11
},
{
"from": 11,
"to": 21
},
{
"from": 21,
"to": 31
}
]
},
"aggs": {
"total": {
"sum": {
"field": "Amount"
}
}
}
}
}
}
}
}
}
}

Categories