issue in Elastic Search Term Aggregation - python

In elastic search aggregation query I need to get all the movies watched by the user who watches the movie "Frozen". This is how my Result source
{
"_index": "user",
"_type": "user",
"_id": "ovUowmUBREWOv-CU-4RT",
"_version": 4,
"_score": 1,
"_source": {
"movies": [
"Angry birds 1",
"PINNOCCHIO",
"Frozen",
"Hotel Transylvania 3"
],
"user_id": 86
}
}
This is the query I'm using.
{
"query": {
"match": {
"movies": "Frozen"
}
},
"size": 0,
"aggregations": {
"movies_like_Frozen": {
"terms": {
"field": "movies",
"min_doc_count": 1
}
}
}
}
The result I got in the bucket is correct, but the movie names are splits by white space like this
"buckets": [
{
"key": "3",
"doc_count": 2
},
{
"key": "hotel",
"doc_count": 2
},
{
"key": "transylvania",
"doc_count": 2
},
{
"key": "1",
"doc_count": 1
},
{
"key": "angry",
"doc_count": 1
},
{
"key": "birds",
"doc_count": 1
}
]
How can I get buckets with "Angry birds 1", "Hotel Transylvania 3" as result.
Please help.

In elasticsearch 6.x, every text field is analyzed implicitly. To override this, you need to create a mapping for text type fields as not_analyzed in an index, then insert documents in it.
In your case,
{
"mappings": {
"user": {
"properties": {
"movies": {
"type": "text",
"index": "not_analyzed",
"fields": {
"keyword": {
"type": "text",
"index": "not_analyzed"
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
Hope it works.

Related

A Python script that can navigate a .json and export a .csv based on a search term

I want to take a .json of "_PRESET..." items and their "code-state"s with "actions" that contain other "code-state"s, "appearance"s, and "switch"s and turn it into .csv produced from the actions under a given "_PRESET...", including the "code-state"s and the "actions" listed under their individual entries.
This would allow a user to enter the "_PRESET..." name and receive a 3-column .csv file containing each action's "type", name, and "value". There are of course ways to export the entire .json easily, but I can't fathom a way to navigate it like is needed.
enters "_PRESET_Config_A" for
input.json:
{
"abc_data": {
"_PRESET_Config_A": {
"properties": {
"category": "configuration",
"name": "_PRESET_Config_A",
"collection": null,
"description": ""
},
"actions": {
"EN-R9": {
"type": "code_state",
"value": "on"
}
}
},
"PN4FP": {
"properties": {
"category": "uncategorized",
"name": "PN4FP",
"collection": null,
"description": ""
},
"actions": {
"E_xxxxxx_Default": {
"type": "appearance",
"value": "M_Red"
}
}
},
"HEDIS": {
"properties": {
"category": "uncategorized",
"name": "HEDIS",
"collection": null,
"description": ""
},
"actions": {
"E_xxxxxx_Default": {
"type": "appearance",
"value": "M_Purple"
}
}
},
"_PRESET_Config_B": {
"properties": {
"category": "configuration",
"name": "_PRESET_Config_A",
"collection": null,
"description": ""
},
"actions": {
"HEDIS": {
"type": "code_state",
"value": "on"
}
}
},
"EN-R9": {
"properties": {
"category": "uncategorized",
"name": "EN-R9",
"collection": null,
"description": ""
},
"actions": {
"PN4FP": {
"type": "code_state",
"value": "on"
},
"switch_StorageBin": {
"type": "switch",
"value": "00_w_Storage_Bin_R9"
}
}
}
}
}
Desired output.csv
type,name,value
code_state,EN-R9,on
code_state,PN4FP,on
appearance,E_xxxxxx_Default,M_Red
switch,switch_StorageBin,00_w_Storage_Bin_R9

elasticsearch doesn't raise score when matching on n-gram inside should clause

i'm trying to search an item inside elastic search, and raise the score in case i find an sku or something like that.
this is my index configuration:
{
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 3,
"token_chars": [
"letter",
"digit",
"symbol",
"whitespace",
"punctuation"
]
}
}
}
},
"mappings": {
"properties": {
"job_desc": {
"type": "text",
"analyzer": "ngram_analyzer"
},
"sku": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_price": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_type": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
}
}
}
}
and this is my query to find the item:
{
"bool": {
"must": {
"multi_match": {
"query": "<name of item & possibly sku>",
"fields": [
"name",
"sku"
]
}
},
"should": {
"match": {
"job_desc": {
"query": "424241 (sku of item)",
"analyzer": "ngram_analyzer"
}
}
}
}
}
but for some reason, the "should" clause does not raise the score of the search.
what is the issue here?
thanks in advance,
Yaniv Akiva

Keyword searching AND Filtering in Elasticsearch

I have to search for keywords in one field and an exact match in a different field. I have tried something but it does not seem to work at all.
I tried giving the full article with the author as i have put in AWS ElasticSearch but it still won't retrieve anything.
query=json.dumps({
"query": {
"bool": {
"must": {
"match": {
"article": "man killed kim jones"
}
},
"filter": {
"term": {
"author": "Barbara Boyer"
}
}
}
}
})
response = requests.get(url-ES-domain/data/_search?",headers=headers,data=(query))
response.json()
Mapping details
{
"mappings": {
"article": {
"full_name": "article",
"mapping": {
"article": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
This is for the keyword in the article. Even if I give the full article as it is in the ES index, it still won't give any hits.
Try like this
Mappings
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 3,
"max_gram": 20,
"token_chars": [
"letter",
"digit"
]
}
},
"normalizer": {
"lc_normalizer": {
"type": "custom",
"filter": ["lowercase"]
}
}
},
"max_ngram_diff": 20
},
"mappings": {
"properties": {
"article": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lc_normalizer"
}
}
},
"key": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publication": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"title": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
Query
{
"query": {
"bool": {
"must" : {
"multi_match" : {
"query": "man killed kim jones",
"fields": [ "article", "title" ]
}
},
"filter": {
"term": {
"author.keyword": "Maddie Hanna"
}
}
}
}
}
The above query returns matches and returns the document you have added to the document.
When you are searching for a multi-word match, I suggest you use the match_phrase query. By default, elasticsearch will create keyword mapping for the text fields.
Note: You can try these things using Kibana UI provided by the elastic team. It will save a lot of time.

Build relation between elasticsearch aggregators - Nested groupings

I need to create nested groupings between fields.
Let us consider the example given below,
Documents:
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Chennai"
},
{
"keyword": "abc",
"country": "IN",
"state": "TN",
"city": "Trichy"
},
{
"keyword": "abc",
"country": "IN",
"state": "KL",
"city": "TVM"
},
{
"keyword": "abc",
"country": "US",
"state": "Cal",
"city": "California"
}
Required output(Something like this):
{
"country": "IN",
"TN": [
"Chennai",
"Trichy"
],
"KL": [
"TVM"
]
},
{
"country": "US",
"Cal": [
"California"
]
}
Query used:
{
"from": 0,
"size": 1,
"aggs": {
"country": {
"terms": {
"field": "country.keyword",
"size": 50000
}
},
"state": {
"terms": {
"field": "state.keyword",
"size": 50000
}
},
"city": {
"terms": {
"field": "city.keyword",
"size": 50000
}
}
},
"query": {
"query_string": {
"query": "(keyword:abc) "
}
}
}
For this query I got separate bucket as output for city , state and country.
But what I need is city should be grouped under state and state should be grouped under country.
Thanks in advance.
Following query with aggregation should work for you
{
"query": {
"query_string": {
"query": "(keyword:abc)"
}
},
"size": 0,
"aggs": {
"country_agg": {
"terms": {
"field": "country.keyword",
"size": 10
},
"aggs": {
"state_agg": {
"terms": {
"field": "state.keyword",
"size": 10
},
"aggs": {
"city_agg": {
"terms": {
"field": "city.keyword",
"size": 10
}
}
}
}
}
}
}
}

"object mapping [prices] can't be changed from nested to non-nested" on Bulk Python

I'm trying to insert a doc in ElasticSearch but every time i try to insert in python, its return me an error. But if i try to insert from Kibana or cUrl, its succeed.
I already tried the elasticserach-dsl but i've got the same error.
(Sorry for my bad english, i'm from brazil :D)
Error i've got:
elasticsearch.helpers.BulkIndexError: ((...)'status': 400, 'error': {'type':
'illegal_argument_exception', 'reason': "object mapping [prices] can't be changed from nested to non-nested"}}}])
My code:
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
doc = [{
"_index": "products",
"_type": "test_products",
"_source": {
[...]
"prices": {
"latest": {
"value": 89,
"when": 1502795602848
},
"old": [
{
"value": 0,
"when": 1502795602848
}
]
},
"sizes": [
{
"name": "P",
"available": True
},
{
"name": "M",
"available": True
}
],
"created": "2017-08-15T08:13:22.848284"
}
}]
bulk(self.es, doc, index="products")
My ES mapping:
{
"test_products": {
"mappings": {
"products": {
"properties": {
"approved": {
"type": "boolean"
},
"available": {
"type": "boolean"
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"buyClicks": {
"type": "integer"
},
"category": {
"type": "keyword"
},
"code": {
"type": "keyword"
},
"color": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"created": {
"type": "date"
},
"description": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"gender": {
"type": "keyword"
},
"images": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"likes": {
"type": "integer"
},
"link": {
"type": "keyword"
},
"name": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"prices": {
"type": "nested",
"properties": {
"latest": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
},
"old": {
"type": "nested",
"properties": {
"value": {
"type": "long"
},
"when": {
"type": "date",
"format": "dd-MM-yyyy||epoch_millis"
}
}
}
}
},
"redirectClicks": {
"type": "integer"
},
"sizes": {
"type": "nested",
"properties": {
"available": {
"type": "boolean"
},
"name": {
"type": "keyword"
},
"quantity": {
"type": "integer"
}
}
},
"slug": {
"type": "keyword"
},
"store": {
"type": "keyword"
},
"subCategories": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"tags": {
"type": "text",
"fields": {
"raw": {
"type": "text",
"term_vector": "yes",
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
},
"thumbnails": {
"type": "keyword"
}
}
}
}
}
}

Categories