I need to search multiple indices along with its field maps.
For example I want to query a string, in
field1 with index1
field2 with index2
from elasticsearch import Elasticsearch
es = Elasticsearch([eshost])
req_string = {
"size":1000,
"query": {
"query_string": {
"query": "string to be searched",
"fields": ["field1","field2"],
}
}
}
res = es.search(index='index1,index2', body=req_string)
Is it possible to do it ?
If yes please guide with some links. Thanks in Advance !
You can use _index field, when querying across multiple indexes.
The _index field allows matching on the index a document was indexed
into. Its value is accessible in certain queries and aggregations, and
when sorting or scripting
Adding a working example with index data,search query, and search result.
Index Data:
PUT/ index1/_doc/1
{
"name": "Hello"
}
PUT/ index2/_doc/1
{
"name": "Hello World"
}
Search Query:
{
"query": {
"bool": {
"filter": [
{
"terms": {
"_index": [
"index1",
"index2"
]
}
}
],
"must": [
{
"simple_query_string": {
"query": "hello",
"fields": [
"name",
"title"
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "index2",
"_type": "_doc",
"_id": "1",
"_score": 0.4700036,
"_source": {
"name": "Hello World"
}
},
{
"_index": "index1",
"_type": "_doc",
"_id": "1",
"_score": 0.2876821,
"_source": {
"title": "Hello"
}
}
]
Updated Search Query:
The below search query will search for title field only in index1 and name field only in index2
{
"query": {
"bool": {
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_index": [
"index1"
]
}
}
],
"must": [
{
"query_string": {
"query": "hello",
"fields": [
"title"
]
}
}
]
}
},
{
"bool": {
"filter": [
{
"terms": {
"_index": [
"index2"
]
}
}
],
"must": [
{
"query_string": {
"query": "hello",
"fields": [
"name"
]
}
}
]
}
}
]
}
}
}
Related
generate unique id in nested document - Pymongo
my database looks like this...
{
"_id":"5ea661d6213894a6082af6d1",
"blog_id":"blog_one",
"comments": [
{
"user_id":"1",
"comment":"comment for blog one this is good"
},
{
"user_id":"2",
"comment":"other for blog one"
},
]
}
I want to add unique id in each and every comment,
I want it to output like this,
{
"_id":"5ea661d6213894a6082af6d1",
"blog_id":"blog_one",
"comments": [
{
"id" : "something" (auto generate unique),
"user_id":"1",
"comment":"comment for blog one this is good"
},
{
"id" : "something" (auto generate unique),
"user_id":"2",
"comment":"other for blog one"
},
]
}
I'm using PyMongo, is there a way to update this kind of document?
it's possible or not?
This update will add an unique id value to each of the comments array with nested documents. The id value is calculated based upon the present time as milliseconds. This value is incremented for each array element to get the new id value for the nested documents of the array.
The code runs with MongoDB version 4.2 and PyMongo 3.10.
pipeline = [
{
"$set": {
"comments": {
"$map": {
"input": { "$range": [ 0, { "$size": "$comments" } ] },
"in": {
"$mergeObjects": [
{ "id": { "$add": [ { "$toLong" : datetime.datetime.now() }, "$$this" ] } },
{ "$arrayElemAt": [ "$comments", "$$this" ] }
]
}
}
}
}
}
]
collection.update_one( { }, pipeline )
The updated document:
{
"_id" : "5ea661d6213894a6082af6d1",
"blog_id" : "blog_one",
"comments" : [
{
"id" : NumberLong("1588179349566"),
"user_id" : "1",
"comment" : "comment for blog one this is good"
},
{
"id" : NumberLong("1588179349567"),
"user_id" : "2",
"comment" : "other for blog one"
}
]
}
[ EDIT ADD ]
The following works from mongo shell. It adds unique id for the comments array's nested documents - unique across the documents.
db.collection.aggregate( [
{
"$unwind": "$comments" },
{
"$group": {
"_id": null,
"count": { "$sum": 1 },
"docs": { "$push": "$$ROOT" },
"now": { $first: "$$NOW" }
}
},
{
"$addFields": {
"docs": {
"$map": {
"input": { "$range": [ 0, "$count" ] },
"in": {
"$mergeObjects": [
{ "comments_id": { "$add": [ { "$toLong" : "$now" }, "$$this" ] } },
{ "$arrayElemAt": [ "$docs", "$$this" ] }
]
}
}
}
}
},
{
"$unwind": "$docs"
},
{
"$addFields": {
"docs.comments.comments_id": "$docs.comments_id"
}
},
{
"$replaceRoot": { "newRoot": "$docs" }
},
{
"$group": {
"_id": { "_id": "$_id", "blog_id": "$blog_id" },
"comments": { "$push": "$comments" }
}
},
{
$project: {
"_id": 0,
"_id": "$_id._id",
"blog_id": "$_id.blog_id",
"comments": 1
}
}
] ).forEach(doc => db.blogs.updateOne( { _id: doc._id }, { $set: { comments: doc.comments } } ) )
You can use ObjectId constructor to create the ids and place them in your nested documents.
I have a date field in my documents and I want to return only documents that have a date less than now - 5m, however not all my documents have the filed, they only recieve on the first time they get individually queried.
documents = es.search(index='index_name', size=10000, body={
"query": {
"bool": {
"must": [
{
"range": {
"time_lockout": {
"lt": "now-5m"
}
}
}
],
"filter": [
],
"should": [
],
"must_not": [
]
}
}})
So my pseudo code would be,
if `time_lockout` exists
give documents that are now-5 including all documents thats dont have `time_lockout`
Exclude documents that dat range falls withon that 5 minute window
Update the query to below:
{
"query": {
"bool": {
"should": [
{
"range": {
"time_lockout": {
"lt": "now-5m"
}
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "time_lockout"
}
}
]
}
}
]
}
}
}
I keep on getting the following error in Python
Exception has occurred: TypeError unhashable type: 'dict'
on line 92
"should": [],
"must_not": []
This is the query string
res = es.search(
scroll = '2m',
index = "logstash-*",
body = {
{
"aggs": {
"2": {
"terms": {
"field": "src_ip.keyword",
"size": 50,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"cardinality": {
"field": "src_ip.keyword"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "flow.start",
"format": "date_time"
},
{
"field": "timestamp",
"format": "date_time"
},
{
"field": "tls.notafter",
"format": "date_time"
},
{
"field": "tls.notbefore",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": 1555777931992,
"lte": 1558369931992,
"format": "epoch_millis"
}
}
}
],
"filter": [
{
"match_all": {}
}
],
"should": [],
"must_not": []
}
}
}
})
the value of body is a set ({ } without key-value is a set literal, e.g., {1,2} is a set). Inside this set you have a dictionary.
Items in a set have to be hashable, and dictionary isn't.
As the comment from #Carcigenicate says, it seems like a typo of having {{ }} instead of { } for the value of body.
Elasticsearch documentation shows that body should be a dictionary.
More about sets from python docs
I have a elastic search index collection like below,
"_index":"test",
"_type":"abc",
"_source":{
"file_name":"xyz.ex"
"metadata":{
"format":".ex"
"profile":[
{"date_value" : "2018-05-30T00:00:00",
"key_id" : "1",
"type" : "date",
"value" : [ "30-05-2018" ]
},
{
"key_id" : "2",
"type" : "freetext",
"value" : [ "New york" ]
}
}
Now I need to search for document by matching key_id to its value. (key_id is some field whose value is stored in "value")
Ex. For key_id='1'field, if it's value = "30-05-2018" it should match the above document.
I tried mapping this as a nested object, But I am not able to write query to search with 2 or more key_id matching its respective value.
This is how I would do it. You need to AND together via bool/filter (or bool/must) two nested queries for each of the condition pair, since you want to match two different nested elements from the same parent document.
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "metadata.profile",
"query": {
"bool": {
"filter": [
{
"term": {
"metadata.profile.f1": "a"
}
},
{
"term": {
"metadata.profile.f2": true
}
}
]
}
}
}
},
{
"nested": {
"path": "metadata.profile",
"query": {
"bool": {
"filter": [
{
"term": {
"metadata.profile.f1": "b"
}
},
{
"term": {
"metadata.profile.f2": false
}
}
]
}
}
}
}
]
}
}
}
The following JSON structure gives me an error when doing a query:
{
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "BRCA1",
"fuzziness": "AUTO",
"fields": [
"Long_Name",
"Short_Name",
"Uniprot_ID^10",
"Genes^2",
"Diseases^2",
"Function",
"Domains"
]
}
},
{
"term": {
"Is_Reviewed": true
}
},
{
"term": {
"Has_Function": true
}
}
]
}
}
},
"field_value_factor": {
"field": "Number_Of_Structures"
}
},
"size": 100
}
The error is:
[function_score] malformed query, expected [END_OBJECT] but found [FIELD_NAME]
The bool query on its own works perfectly, but as soon as I use function_score, it stops working. I have tried to follow this example: https://www.elastic.co/guide/en/elasticsearch/guide/master/boosting-by-popularity.html
Any ideas as to what I am doing wrong would be much appreciated!
You must put field_value_factor one level higher, inside function_score:
{
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "BRCA1",
"fuzziness": "AUTO",
"fields": [
"Long_Name",
"Short_Name",
"Uniprot_ID^10",
"Genes^2",
"Diseases^2",
"Function",
"Domains"
]
}
},
{
"term": {
"Is_Reviewed": true
}
},
{
"term": {
"Has_Function": true
}
}
]
}
},
"field_value_factor": {
"field": "Number_Of_Structures"
}
}
},
"size": 100
}