Related
i'm trying to search an item inside elastic search, and raise the score in case i find an sku or something like that.
this is my index configuration:
{
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "ngram",
"min_gram": 2,
"max_gram": 3,
"token_chars": [
"letter",
"digit",
"symbol",
"whitespace",
"punctuation"
]
}
}
}
},
"mappings": {
"properties": {
"job_desc": {
"type": "text",
"analyzer": "ngram_analyzer"
},
"sku": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"name": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_price": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
},
"unit_type": {
"type": "text",
"fields": {
"keyword": {"type": "keyword", "ignore_above": 256}
}
}
}
}
}
and this is my query to find the item:
{
"bool": {
"must": {
"multi_match": {
"query": "<name of item & possibly sku>",
"fields": [
"name",
"sku"
]
}
},
"should": {
"match": {
"job_desc": {
"query": "424241 (sku of item)",
"analyzer": "ngram_analyzer"
}
}
}
}
}
but for some reason, the "should" clause does not raise the score of the search.
what is the issue here?
thanks in advance,
Yaniv Akiva
I have a JSON file that is received from a REST API. An example of the return is like this:
{
"d": {
"results": [
{
"__metadata": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')",
"type": "EmpEmployment"
},
"personIdExternal": "60000033",
"userId": "60000033",
"hiringNotCompleted": false,
"isECRecord": true,
"lastModifiedDateTime": "/Date(1642917586000+0000)/",
"endDate": "/Date(1675123200000)/",
"createdDateTime": "/Date(1641473919000+0000)/",
"createdOn": "/Date(1641473919000)/",
"originalStartDate": "/Date(1501545600000)/",
"customDate1": "/Date(1501545600000)/",
"customString17": null,
"customString18": null,
"customString19": null,
"assignmentClass": "ST",
"lastModifiedBy": "This Dude",
"okToRehire": true,
"customString4": null,
"customString3": "3",
"customString2": null,
"assignmentIdExternal": "60000033",
"customString16": null,
"lastModifiedOn": "/Date(1642917586000)/",
"customString1": null,
"createdBy": "This Dudette",
"seniorityDate": "/Date(1501545600000)/",
"startDate": "/Date(1659398400000)/",
"customString16Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString16Nav"
}
},
"customString1Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString1Nav"
}
},
"customString18Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString18Nav"
}
},
"customString3Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString3Nav"
}
},
"paymentInformationNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/paymentInformationNav"
}
},
"empJobRelationshipNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empJobRelationshipNav"
}
},
"personNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/personNav"
}
},
"empWorkPermitNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empWorkPermitNav"
}
},
"photoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/photoNav"
}
},
"compInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/compInfoNav"
}
},
"userNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/userNav"
}
},
"customString2Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString2Nav"
}
},
"customString19Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/customString19Nav"
}
},
"jobInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/jobInfoNav"
}
},
"wfRequestNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/wfRequestNav"
}
},
"costDistributionNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/costDistributionNav"
}
},
"empPayCompNonRecurringNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='60000033',userId='60000033')/empPayCompNonRecurringNav"
}
}
},
{
"__metadata": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')",
"type": "EmpEmployment"
},
"personIdExternal": "100003",
"userId": "100003",
"hiringNotCompleted": false,
"isECRecord": true,
"lastModifiedDateTime": "/Date(1638051713000+0000)/",
"endDate": null,
"createdDateTime": "/Date(1638051713000+0000)/",
"createdOn": "/Date(1638051713000)/",
"originalStartDate": "/Date(1635724800000)/",
"customDate1": null,
"customString17": null,
"customString18": null,
"customString19": null,
"assignmentClass": "ST",
"lastModifiedBy": "This Dudette",
"okToRehire": null,
"customString4": null,
"customString3": null,
"customString2": null,
"assignmentIdExternal": "100003",
"customString16": null,
"lastModifiedOn": "/Date(1638051713000)/",
"customString1": null,
"createdBy": "This Dude",
"seniorityDate": "/Date(1635724800000)/",
"startDate": "/Date(1635724800000)/",
"customString16Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString16Nav"
}
},
"customString1Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString1Nav"
}
},
"customString18Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString18Nav"
}
},
"customString3Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString3Nav"
}
},
"paymentInformationNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/paymentInformationNav"
}
},
"empJobRelationshipNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empJobRelationshipNav"
}
},
"personNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/personNav"
}
},
"empWorkPermitNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empWorkPermitNav"
}
},
"photoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/photoNav"
}
},
"compInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/compInfoNav"
}
},
"userNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/userNav"
}
},
"customString2Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString2Nav"
}
},
"customString19Nav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/customString19Nav"
}
},
"jobInfoNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/jobInfoNav"
}
},
"wfRequestNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/wfRequestNav"
}
},
"costDistributionNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/costDistributionNav"
}
},
"empPayCompNonRecurringNav": {
"__deferred": {
"uri": "https://someapi.insomeplace.com/odata/v2/EmpEmployment(personIdExternal='100003',userId='100003')/empPayCompNonRecurringNav"
}
}
}
]
}
}
I'm at present just looking to pull the userId & startDate from the JSON. I've tried using the explode command as shown in this example.
https://adatis.co.uk/parsing-nested-json-lists-in-databricks-using-python/
But all I seem to do is be able to put the entire result into one column in the dataframe, or if I use the the following:
Some help in being pointed in the right direction would be great please. Am I better just building a custom schema and trying to parse the JSON into that?
All I'm looking to do is return the result as per the image but each userId and startDate on on its own row due to them relating to each other.
Explode results to get them into row
df = spark.read.json("./sample.json", multiLine=True)
df2 = df.withColumn('d', explode(col('d.results')))
df2.select(df2.d.userId, df2.d.startDate).show(10,False)
+--------+---------------------+
|d.userId|d.startDate |
+--------+---------------------+
|60000033|/Date(1659398400000)/|
|100003 |/Date(1635724800000)/|
+--------+---------------------+
You can add as many attributes as required e.g
df.select(explode(col('d.results'))).\
selectExpr("col.userId","col.startDate","col.lastModifiedBy").\
show(10,False)
+--------+---------------------+--------------+
|userId |startDate |lastModifiedBy|
+--------+---------------------+--------------+
|60000033|/Date(1659398400000)/|This Dude |
|100003 |/Date(1635724800000)/|This Dudette |
+--------+---------------------+--------------+
I have to search for keywords in one field and an exact match in a different field. I have tried something but it does not seem to work at all.
I tried giving the full article with the author as i have put in AWS ElasticSearch but it still won't retrieve anything.
query=json.dumps({
"query": {
"bool": {
"must": {
"match": {
"article": "man killed kim jones"
}
},
"filter": {
"term": {
"author": "Barbara Boyer"
}
}
}
}
})
response = requests.get(url-ES-domain/data/_search?",headers=headers,data=(query))
response.json()
Mapping details
{
"mappings": {
"article": {
"full_name": "article",
"mapping": {
"article": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
This is for the keyword in the article. Even if I give the full article as it is in the ES index, it still won't give any hits.
Try like this
Mappings
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 3,
"max_gram": 20,
"token_chars": [
"letter",
"digit"
]
}
},
"normalizer": {
"lc_normalizer": {
"type": "custom",
"filter": ["lowercase"]
}
}
},
"max_ngram_diff": 20
},
"mappings": {
"properties": {
"article": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lc_normalizer"
}
}
},
"key": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publication": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"title": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
Query
{
"query": {
"bool": {
"must" : {
"multi_match" : {
"query": "man killed kim jones",
"fields": [ "article", "title" ]
}
},
"filter": {
"term": {
"author.keyword": "Maddie Hanna"
}
}
}
}
}
The above query returns matches and returns the document you have added to the document.
When you are searching for a multi-word match, I suggest you use the match_phrase query. By default, elasticsearch will create keyword mapping for the text fields.
Note: You can try these things using Kibana UI provided by the elastic team. It will save a lot of time.
Hello I have an index in ElasticSearch with:
Plant, Department, Date, Value
I am trying to do a query in elasticsearch
1) Group by Plant and Date in specific departments and sum Value:
es = Elasticsearch('elasticsearch:9200')
body = Dict({"query": {
"bool": {
"must_not": {
"match": {
"Department": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword", "size":10000},
"aggs": {
"group_date": {
"terms": {
"field": "Date"},
"aggs": {
"group_value": {
"sum":{
"field": "Value"}}}}}}}})
2) Group by Plant and Range of Dates, and get avg and median:
es = Elasticsearch('elasticsearch:9200')
body = Dict(
{"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword",
"size": 10000},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": datelist},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [25]}},
"Mean": {
"avg": {
"field":
"Value}}}}}}}})
it works too but in this case i didn't do the grouping by plant and date before, so mixing both i have something like:
body = Dict({"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"}}}},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword", "size":10000},
"aggs": {
"group_date": {
"terms": {
"field": "Date"},
"aggs": {
"group_value": {
"sum":{
"field": "Value"},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": datelist},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [25]}},
"Mean": {
"avg": {
"field":
"Value"}}}}}}}}}}}})
res = es.search(index=self.index, doc_type='test', body=body)
I have this:
TransportError: TransportError(500, 'aggregation_initialization_exception', 'Aggregator [group_value] of type [sum] cannot accept sub-aggregations')
So it exists a way to do this?
if it could help my code python before was:
data = test[~test.Department.str.startswith('Indirect')]
group1 = data.groupby(['Plant', 'Date'])['Value'].sum()
group2 = pd.DataFrame(group1.reset_index()).groupby(['Plant', pd.Grouper(key='Date', freq='W')])['Value'].median()
The error is clear:"Aggregator [group_value] of type [sum] cannot accept sub-aggregations"
When you do 'sum' aggregation you can't split the result anymore.
So you'd better change the position of sum aggs.
i.e.:
{
"query": {
"bool": {
"must_not": {
"match": {
"Department_Substrate": "Indirect*"
}
}
}
},
"aggs": {
"group_code": {
"terms": {
"field": "Plant.keyword",
"size": 10000
},
"aggs": {
"group_date": {
"terms": {
"field": "Date"
},
"aggs": {
"group_date": {
"range": {
"field": "Date",
"ranges": "sdf"
},
"aggs": {
"Median": {
"percentiles": {
"field": "Value",
"percents": [
25
]
}
},
"aggs": {
"group_value": {
"sum": {
"field": "Value"
}
}
}
}
}
}
}
}
}
The following mapping is aggregated on multiple levels on a field grouping documents using another field.
Mapping:
{
'predictions': {
'properties': {
'Company':{'type':'string'},
'TxnsId':{'type':'string'},
'Emp':{'type':'string'},
'Amount':{'type':'float'},
'Cash/online':{'type':'string'},
'items':{'type':'float'},
'timestamp':{'type':'date'}
}
}
}
My requirement is bit complex, I need to
For each Emp (Getting the distinct employees)
Check whether it is online or cashed transaction
Group by items with the ranges like 0-10,11-20,21-30....
Sum the Amount
Final Output is like:
>Emp-online-range-Amount
>a-online-(0-10)-1240$
>a-online-(21-30)-3543$
>b-online-(0-10)-2345$
>b-online-(11-20)-3456$
Something like this should do the job:
{
"size": 0,
"aggs": {
"by_emp": {
"terms": {
"field": "Emp"
},
"aggs": {
"cash_online": {
"filters": {
"filters": {
"cashed": {
"term": {
"Cash/online": "cached"
}
},
"online": {
"term": {
"Cash/online": "online"
}
}
}
},
"aggs": {
"ranges": {
"range": {
"field": "items",
"ranges": [
{
"from": 0,
"to": 11
},
{
"from": 11,
"to": 21
},
{
"from": 21,
"to": 31
}
]
},
"aggs": {
"total": {
"sum": {
"field": "Amount"
}
}
}
}
}
}
}
}
}
}