Finding ElasticSearch records matching empty and null values - python

I have some elasticsearch records that are being stored as either an empty string, or a null value. I am trying to develop a query that will allow me to return these from the index. I came up with:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
],
}
}
}
}
}
Which works as intended for my purpose, and returns the correct row. However, if I try and search for any more than a single field, the 'should' clause OR's the two fields together. This is a problem, because I want there to be an AND relationship:
{
'query': {
'filtered': {
'filter': {
'bool': {
'should': [
{'term': {'field1': ''}},
{"missing" : {"field": "field1"}},
# these fields should be AND but are OR
{'term': {'field2': ''}},
{"missing" : {"field": "field2"}},
],
}
}
}
}
}
Is there anyway I can do the above with a single filter, or AND the two filters together?

You could use the and filter for that purpose, and AND the two bool/should filters, like this:
{
"query": {
"filtered": {
"filter": {
"and": [
{
"bool": {
"should": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
}
},
{
"bool": {
"should": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
}
]
}
}
}
}
Or you can also bool/must two or filters like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"or": [
{
"term": {
"field1": ""
}
},
{
"missing": {
"field": "field1"
}
}
]
},
{
"or": [
{
"term": {
"field2": ""
}
},
{
"missing": {
"field": "field2"
}
}
]
}
]
}
}
}
}
}

Related

How can I write DSL in python when I want to query multiple "must_not"?

I want to query multiple "must_not" like this:
{
"query":{
"bool": {
"must": { "regexp": { "DstIP": "192.168.81.*" }},
"should": [
{ "bool": {
"must_not": { "regexp": { "sourceIP": "192.168.*" }},
"must_not": { "regexp": { "sourceIP": "10.[0-255]." }},
}}
],
"minimum_should_match": 1
}
}}
This means that I want to query DstIP is 192.168.81.0-255 but except for sourceIP 192.168.* and 10.* .
But the dictionary uses unique key in python so this code result is:
{'query': {'bool': {'must': {'regexp': {'ciscoDstIP': '192.168.81.*'}},
'should': [{'bool': {'must_not': {'regexp': {'ciscoSrcIP': '10.[0-255].'}}}}],
'minimum_should_match': 1}}}
How can I change my DSL code to query multiple "must_not" ?
You should modify your query as
{
"query": {
"bool": {
"must": {
"regexp": {
"DstIP": "192.168.81.*"
}
},
"must_not": [
{
"bool": {
"should": [
{
"regexp": {
"sourceIP": "192.168.*"
}
},
{
"regexp": {
"sourceIP": "10.[0-255].*"
}
}
],
"minimum_should_match": 1
}
}
]
}
}
}

Construct a dictionary by using another dictionary keys and values

I have dictionary below.
my_d = {'country': ['Germany',"France"],
'games': ['Football,Motorsport'],
'bayern': ['Muller']}
I need to create a dictionary using above key and values
Each key will be added keyword in the output country.keyword
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
},
{
"terms": {
"games.keyword": [
"Football",
"Motorsport"
]
}
},
{
"match": {
"bayern.keyword": ["Muller"]
}
}
]
}
}
}
if my_d = {'country': ['Germany',"France"]} or my_d = {'country': ['Germany',"France"],
'games': None,
'bayern':None}
{
"query": {
"bool": {
"must": [
{
"terms": {
"country.keyword": [
"Germany",
"France"
]
}
}
]
}
}
}
Generally I would recommend using Elasticsearch 3rd party python package do query Elasticsearch, but I believe this code should work (python 3.5+):
must_clauses = [{f"{key}.keyword": value} for key, value in my_d.items()]
terms = [{"terms": must_clause} for must_clause in must_clauses]
query_template = {
"query": {
"bool": {
"must":
terms
}
}
}

MongoDB elemMatch in lookup pipeline?

I have a document that references another document, and I'd like to join these documents and filter based on the contents of an array in the child document:
deployment_machine document:
{
"_id": 1,
"name": "Test Machine",
"machine_status": 10,
"active": true
}
machine_status document:
{
"_id": 10,
"breakdown": [
{
"status_name": "Rollout",
"state": "complete"
},
{
"status_name": "Deploying",
"state": "complete"
}
]
}
I'm using Mongo 3.6 and am having mixed success with the lookup and pipeline, heres the object I'm using in the python MongoEngine being passed to the aggregate function:
pipeline = [
{'$match': {'breakdown': {'$elemMatch': {'status_name': 'Rollout'}}}},
{'$lookup':
{
'from': 'deployment_machine',
'let': {'status_id': '$_id'},
'pipeline': [
{'$match':
{'$expr':
{'$and': [
{'$eq': ['$machine_status', '$$status_id']},
]},
}
}
],
'as': 'result',
},
},
{'$project': {
'breakdown': {'$filter': {
'input': '$breakdown',
'as': 'breakdown',
'cond': {'$eq': ['$$breakdown.status_name', 'Rollout']}
}}
}},
]
result = list(MachineStatus.objects.aggregate(*pipeline))
This works well, but how can I exclude results where the Deployment Machine isn't active? I feel it must go in the project but can't find a condition that works. Any help appreciated.
You can add more condition in $lookup pipeline
pipeline = [
{ $match: { breakdown: { $elemMatch: { status_name: "Rollout" } } } },
{
$lookup: {
from: "deployment_machine",
let: { status_id: "$_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$machine_status", "$$status_id"] },
active: false
}
}
],
as: "result",
}
},
{
$project: {
breakdown: {
$filter: {
input: "$breakdown",
as: "breakdown",
cond: { $eq: ["$$breakdown.status_name", "Rollout"] },
}
}
}
}
];

Filter MongoDB query to find documents only if a field in a list of objects is not empty

I have a MongoDB document structure like following:
Structure
{
"stores": [
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": [],
"item_category": "101",
"item_id": "11"
}
]
},
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
},
{
"feedback": [],
"item_category": "101",
"item_id": "12"
},
{
"feedback": [],
"item_category": "102",
"item_id": "13"
},
{
"feedback": [],
"item_category": "102",
"item_id": "14"
}
],
"store_id": 500
}
]
}
This is a single document in a collection. Some field are deleted to produce minimal representation of the data.
What I want is to get items only if the feedback field in the items array is not empty. The expected result is:
Expected result
{
"stores": [
{
"items": [
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
}
],
"store_id": 500
}
]
}
This is what I tried based on examples in this, which I think pretty same situation, but it didn't work. What's wrong with my query, isn't it the same situation in zipcode search example in the link? It returns everything like in the first JSON code, Structure:
What I tried
query = {
'date': {'$gte': since, '$lte': until},
'stores.items': {"$elemMatch": {"feedback": {"$ne": []}}}
}
Thanks.
Please try this :
db.yourCollectionName.aggregate([
{ $match: { 'date': { '$gte': since, '$lte': until }, 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores' },
{ $match: { 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores.items' },
{ $match: { 'stores.items.feedback': { "$ne": [] } } },
{ $group: { _id: { _id: '$_id', store_id: '$stores.store_id' }, items: { $push: '$stores.items' } } },
{ $project: { _id: '$_id._id', store_id: '$_id.store_id', items: 1 } },
{ $group: { _id: '$_id', stores: { $push: '$$ROOT' } } },
{ $project: { 'stores._id': 0 } }
])
We've all these stages as you need to operate on an array of arrays, this query is written assuming you're dealing with a large set of data, Since you're filtering on dates just in case if your documents size is way less after first $match then you can avoid following $match stage which is in between two $unwind's.
Ref 's :
$match,
$unwind,
$project,
$group
This aggregate query gets the needed result (using the provided sample document and run from the mongo shell):
db.stores.aggregate( [
{ $unwind: "$stores" },
{ $unwind: "$stores.items" },
{ $addFields: { feedbackExists: { $gt: [ { $size: "$stores.items.feedback" }, 0 ] } } },
{ $match: { feedbackExists: true } },
{ $project: { _id: 0, feedbackExists: 0 } }
] )

ordering json in python mapping object

I am using elasticsearch where the query is to be posted in json and should be in standard order or else the result will be wrong. the problem is that the python is changing my json ordering. my original json query is.
x= {
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*a*"
}
},
"filter": {
"and": {
"filters": [
{
"term": {
"city": "london"
}
},
{
"term": {
"industry.industry_not_analyed": "oil"
}
}
]
}
}
}
},
"facets": {
"industry": {
"terms": {
"field": "industry.industry_not_analyed"
}
},
"city": {
"terms": {
"field": "city.city_not_analyzed"
}
}
}
}
but the resulting python object is as follow.
{
'query': {
'filtered': {
'filter': {
'and': {
'filters': [
{
'term': {
'city': 'london'
}
},
{
'term': {
'industry.industry_not_analyed': 'oil'
}
}
]
}
},
'query': {
'query_string': {
'query': '*a*'
}
}
}
},
'facets': {
'city': {
'terms': {
'field': 'city.city_not_analyzed'
}
},
'industry': {
'terms': {
'field': 'industry.industry_not_analyed'
}
}
}
}
the result is different than what I need how do I solve this.
Use OrderedDict() instead of {}. Note that you can't simply use OrderedDict(query=...) because that would create an unordered dict in the background. Use this code instead:
x = OrderedDict()
x['query'] = OrderedDict()
...
I suggest to implement a builder for this:
x = Query().filtered().query_string("*a*").and()....

Categories