search for data from a list in MongoDB - python

my database looks like:
{'_id': ObjectId('3f05e2aa794e17504a6674a7'),
'lt': [
{'_id': ObjectId('6f05e2aa794e177b456674a9'), 'name': 'text1'},
{'_id': ObjectId('2f05e2aa794e1765286674a8'),'name': 'text3', }
]
}
{'_id': ObjectId('3f05e3aa791e17f23b6674aa'),
'lt': [
{'_id': ObjectId('7f05e2aa494e17f5b36674ac'), 'name': 'text12'},
{'_id': ObjectId('5f05e2aa794e1707006674ab'), 'name': 'text2'}
]
}
also i have a list
lists =["6f05e2aa794e177b456674a9", "2f05e2aa794e1765286674a8", "5f05e2aa794e1707006674ab"]
I need to find only those objects and their names that are in the list
`{
'lt': [
{'_id': ObjectId('6f05e2aa794e177b456674a9'), 'name': 'text1'},
{'_id': ObjectId('2f05e2aa794e1765286674a8'),'name': 'text3', }
]
}
{
'lt': [
{'_id': ObjectId('5f05e2aa794e1707006674ab'), 'name': 'text2'}
]
}
I wrote a query that worked for one value
id = "6f05e2aa794e177b456674a9"
objInstance = ObjectId(id)
fx = mycol.find(
{ "lt": { "$elemMatch": { "_id": objInstance }}},
{ "lt": { "$elemMatch": { "_id": objInstance }},
"_id":0, "lt._id":1 , "lt.name":1}
).limit(5)
maydata=[]
for x in fx:
print(x)
but when I rewrote it for list search
lists =["6f05e2aa794e177b456674a9", "2f05e2aa794e1765286674a8", "5f05e2aa794e1707006674ab"]
obj_ids = list(map(lambda x: ObjectId(x), lists))
fx = mycol.find(
{ "lt": { "$elemMatch": { "_id": {"$in" : obj_ids }}}},
{ "lt": { "$elemMatch": { "_id": {"$in": obj_ids }}},
"_id":0, "lt._id":1 , "lt.name":1}
).limit(5)
maydata=[]
for x in fx:
print(x)
it returned me only one element of the document
{
'lt': [
{'_id': ObjectId('2f05e2aa794e1765286674a8'),'name': 'text3', }
]
}
{
'lt': [
{'_id': ObjectId('5f05e2aa794e1707006674ab'), 'name': 'text2'}
]
}

You can use an aggregate pipeline using $filter like this:
This query project the result using $project showing only the values into lt that are also into your array.
db.collection.aggregate([
{
"$project": {
"_id": 0,
"lt": {
"$filter": {
"input": "$lt",
"cond": {
"$in": [
"$$this._id",
obj_ids
]
}
}
}
}
}
])
Example here

Related

Adding unique key before duplicate JSON keys

I have the following JSON string:
[
{
"id":"1",
"comment":"hello"
},
{
"id":"2",
"comment":"hi"
}
]
I'm trying to make it like this:
[
{
"finding-1":{
"id":"1",
"comment":"hello"
}
},
{
"finding-2":{
"id":"2",
"comment":"hi"
}
}
]
What is the cleanest way to do this in Python?
j = [
{
"id":"1",
"comment":"hello"
},
{
"id":"2",
"comment":"hi"
}
]
n = [{f'finding-{d["id"]}': d} for d in j]
# [{'finding-1': {'comment': 'hello', 'id': '1'}}, {'finding-2': {'comment': 'hi', 'id': '2'}}]

MongoDB elemMatch in lookup pipeline?

I have a document that references another document, and I'd like to join these documents and filter based on the contents of an array in the child document:
deployment_machine document:
{
"_id": 1,
"name": "Test Machine",
"machine_status": 10,
"active": true
}
machine_status document:
{
"_id": 10,
"breakdown": [
{
"status_name": "Rollout",
"state": "complete"
},
{
"status_name": "Deploying",
"state": "complete"
}
]
}
I'm using Mongo 3.6 and am having mixed success with the lookup and pipeline, heres the object I'm using in the python MongoEngine being passed to the aggregate function:
pipeline = [
{'$match': {'breakdown': {'$elemMatch': {'status_name': 'Rollout'}}}},
{'$lookup':
{
'from': 'deployment_machine',
'let': {'status_id': '$_id'},
'pipeline': [
{'$match':
{'$expr':
{'$and': [
{'$eq': ['$machine_status', '$$status_id']},
]},
}
}
],
'as': 'result',
},
},
{'$project': {
'breakdown': {'$filter': {
'input': '$breakdown',
'as': 'breakdown',
'cond': {'$eq': ['$$breakdown.status_name', 'Rollout']}
}}
}},
]
result = list(MachineStatus.objects.aggregate(*pipeline))
This works well, but how can I exclude results where the Deployment Machine isn't active? I feel it must go in the project but can't find a condition that works. Any help appreciated.
You can add more condition in $lookup pipeline
pipeline = [
{ $match: { breakdown: { $elemMatch: { status_name: "Rollout" } } } },
{
$lookup: {
from: "deployment_machine",
let: { status_id: "$_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$machine_status", "$$status_id"] },
active: false
}
}
],
as: "result",
}
},
{
$project: {
breakdown: {
$filter: {
input: "$breakdown",
as: "breakdown",
cond: { $eq: ["$$breakdown.status_name", "Rollout"] },
}
}
}
}
];

Do a pymongo Query with elemmatch and filter

I have the following data structure:
[
{
"site_id": ObjectId("5e85b9d20498abd407e9a030"),
"status": "ERROR"
},
{
"site_id": ObjectId("5e85b9d20498abd407e9a120"),
"status": "ERROR"
},
{
"site_id": ObjectId("5e85b9d20498abd407e9a030"),
"status": "OK",
"risk_categories": [
{
"position": 1,
"category_id": 1414,
},
{
"position": 2,
"category_id": 1402,
},
{
"position": 3,
"category_id": 1392,
}
]
}
]
I want to make a query with pymongo like this:
collection.find_one(filter=filter)
where:
filter = {'$and': [{'$and': [{'site_id': ObjectId('5e85b9d20498abd407e9a030')}, {'status': 'OK'}]}, {'risk_categories': {'$elemMatch': {'$or': [{'position': {'$eq': 1}}, {'position': {'$eq': 2}}]}}}]}
however, it returns me the entire object. Not only the values of risk categories that I want.
What can I do on my filter to modify that?
The aggregation runs from mongo shell:
db.collection.aggregate( [
{
$match: {
site_id: ObjectId('5e85b9d20498abd407e9a030'),
status: "OK"
}
},
{
$addFields: {
risk_categories: {
$filter: {
input: "$risk_categories",
as: "cat",
cond: {
$in: [ "$$cat.position", [ 1, 2 ] ] // this is equivalent to using the "$or"
}
}
}
}
},
] ).pretty()
The output:
{
"_id" : ObjectId("5e85c7b6724e461876467077"),
"site_id" : ObjectId("5e85b9d20498abd407e9a030"),
"status" : "OK",
"risk_categories" : [
{
"position" : 1,
"category_id" : 1414
},
{
"position" : 2,
"category_id" : 1402
}
]
}
Using PyMongo 3.9 and MongoDB 4.2, from the Python shell:
import pymongo
from pymongo import MongoClient
client = MongoClient()
db = client.test
collection = db.collection
import pprint
from bson.objectid import ObjectId
pipeline = [
{
'$match': {
'site_id': ObjectId('5e85b9d20498abd407e9a030'),
'status': 'OK'
}
},
{
'$addFields': {
'risk_categories': {
'$filter': {
'input': '$risk_categories',
'as': 'cat',
'cond': {
'$in': [ '$$cat.position', [ 1, 2 ] ]
}
}
}
}
},
]
pprint.pprint(list(collection.aggregate(pipeline)))

Remove duplicate values from list of nested dictionaries

I have list of dictionaries with nested structure. I need to remove all duplicate values. I'm newbie in Python and can't solve this task. Anyone can help me?
My list looks like:
[
{
"task_id":123,
"results":[
{
"url":"site.com",
"date":"04.18.2019"
},
{
"url":"another_site.com",
"date":"04.18.2019"
},
{
"url":"site1.com",
"date":"04.18.2019"
}
]
},
{
"task_id":456,
"results":[
{
"url":"site3.com",
"date":"04.18.2019"
},
{
"url":"site.com",
"date":"04.18.2019"
}
]
},
{
"task_id":789,
"results":[
{
"url":"site7.com",
"date":"04.18.2019"
},
{
"url":"site9.com",
"date":"04.18.2019"
},
{
"url":"site.com",
"date":"04.18.2019"
}
]
}
]
I need to set site.com only once. If any value of url is duplicated - exclude it from dict.
As result:
task 123 with 3 dicts in results
task 456 with 1 dict in results (exclude site.com)
task 789 with 2 dict in results (exclude site.com)
Desired output should looks like:
[
{
"task_id":123,
"results":[
{
"url":"site.com",
"date":"04.18.2019"
},
{
"url":"another_site.com",
"date":"04.18.2019"
},
{
"url":"site1.com",
"date":"04.18.2019"
}
]
},
{
"task_id":456,
"results":[
{
"url":"site3.com",
"date":"04.18.2019"
}
]
},
{
"task_id":789,
"results":[
{
"url":"site7.com",
"date":"04.18.2019"
},
{
"url":"site9.com",
"date":"04.18.2019"
}
]
}
]
let results to be your array.
u = set()
final = []
for dict in results:
for res in dict["results"]:
if res["url"] not in u:
u.add(res["url"])
final.append(res)
print(final)
You can use a list comprehension:
d = [{'task_id': 123, 'results': [{'url': 'site.com', 'date': '04.18.2019'}, {'url': 'another_site.com', 'date': '04.18.2019'}, {'url': 'site1.com', 'date': '04.18.2019'}]}, {'task_id': 456, 'results': [{'url': 'site3.com', 'date': '04.18.2019'}, {'url': 'site.com', 'date': '04.18.2019'}]}, {'task_id': 789, 'results': [{'url': 'site7.com', 'date': '04.18.2019'}, {'url': 'site9.com', 'date': '04.18.2019'}, {'url': 'site.com', 'date': '04.18.2019'}]}]
new_d = [{**a, 'results':[c for c in a['results'] if all(c not in b['results'] for b in d[:i])]} for i, a in enumerate(d)]
Output:
[
{
"task_id": 123,
"results": [
{
"url": "site.com",
"date": "04.18.2019"
},
{
"url": "another_site.com",
"date": "04.18.2019"
},
{
"url": "site1.com",
"date": "04.18.2019"
}
]
},
{
"task_id": 456,
"results": [
{
"url": "site3.com",
"date": "04.18.2019"
}
]
},
{
"task_id": 789,
"results": [
{
"url": "site7.com",
"date": "04.18.2019"
},
{
"url": "site9.com",
"date": "04.18.2019"
}
]
}
]
people = {
1: {'name': 'John',},
2: {'name': 'Marie'},
3: {'name': 'Ann',},
4: {'name': 'John'},
}
print(people)
unique = {}
for key, value in people.items():
if value not in unique.values():
unique[key] = value
print(unique)
try these

Pymongo Aggregate with multiple conditions: lookup, unwind, redact, cond, sort and limit

done_status = ['BAD_PU', 'TO_WH', 'RCVDPORT', 'RCVD', 'BAD_DEL', 'MISSFLT', 'OFFLOAD']
shipments = db.db_shipment.aggregate([{
"$lookup":{
"from":"db_shipment_status_history",
"localField":"_id",
"foreignField":"fk_shipment_id",
"as":"shipment_status_history_collection"
}
},
{"$unwind":
"$shipment_status_history_collection"},
{"$redact":{"$cond":{ "$if": { "status_value": {"$in": done_status } } },
"$then": "$$KEEP"
,"$else":"$$PRUNE"
}
},
{"$sort":
{'shipment_status_history_collection.rec_timestamp':-1}},
{"$limit":1},
{"$project":{"pkey":"$pkey","_code":"$_code"}}
])
error:
pymongo.errors.OperationFailure: An object representing an expression must have exactly one field: { $cond: { $if: { status_value: { $in: [ "BAD_PU", "TO_WH", "RCVDPORT", "RCVD", "BAD_DEL", "MISSFLT", "OFFLOAD" ] } } }, $else: "$$PRUNE", $then: "$$KEEP" }
how to fix this error? Im trying to add the latest shipment status history in the shipment record where the status value is in the given status value.
Update the redact stage for your aggregation pipeline. if, then and else are a part of the $cond operator and they're not operators in themselves.
Also, $in operator is passed an array where its first item is checked for presence in the second item. The second item is usually an iterable.
Mongo 3.6
messenger_pipeline_status = (
messenger_active_status['data']['pending']
+ messenger_active_status['data']['processing']
)
assigned_status = ['DEL_ASSIGNED','PU_ASSIGNED']
subpipeline = [
{
'$match': {
'$expr': {
'$and': [
{'$eq': ['$fk_shipment_id', '$$pkey']},
{'$eq': ['$fk_messenger_id', fk_user_id]},
{'$in': ['$status_value', assigned_status]}
]
}
}
},
{
'$sort': {
'rec_timestamp': -1
}
},
{
'$limit': 1
},
{
'$project': {
'fk_shipment_id': 1
}
}
]
pipeline = [
{
'$match': {
'status_value': {'$in': messenger_pipeline_status}
'is_deleted': False,
'is_postponed': False,
'is_active': True,
}
},
{
'$lookup': {
'from': 'db_shipment_status_history',
'let': {'pkey': '$pkey'},
'pipeline': subpipeline,
'as': 'shipment_status_history'
}
},
{
'$match': {
'shipment_status_history': {
'$ne': []
}
}
},
{
'$unwind': '$shipment_status_history'
},
{
'$project': {
'_id': 1,
'pkey': 1,
'_code': 1,
'date_created': 1,
'sender_full_name': '$sender.full_name',
'sender_raw_address': '$sender.raw_address',
'sender_formatted_address': '$sender.formatted_address',
'receiver_full_name': '$receiver.full_name',
'receiver_raw_address': '$receiver.raw_address',
'receiver_formatted_address': '$receiver.formatted_address',
'status_name': 1,
'team_value': 1,
'cs_name': 1,
'fk_messenger_id': '$shipment_status_history.fk_shipment_id'
}
}
]
result = db.db_shipment.aggregate(pipeline)
print(list(result))
[Edit] Mongo 3.2
The following aggregation pipeline produces similar results as the above and is valid query for Mongo 3.2.
messenger_pipeline_status = ['MISSFLT', 'OFFLOAD']
pipeline = [
{
'$match': {
'status_value': { '$in': messenger_pipeline_status}
'is_deleted': False,
'is_postponed': False,
'is_active': True,
}
},
{
"$lookup": {
'from': 'db_shipment_status_history',
'localField': 'pkey',
'foreignField': 'fk_shipment_id',
'as': 'shipment_status_history'
}
},
{
'$match': {
'shipment_status_history': {
'$ne': []
}
}
},
{
'$project': {
'_id': 1,
'pkey': 1,
'_code': 1,
'date_created': 1,
'sender_full_name': '$sender.full_name',
'sender_raw_address': '$sender.raw_address',
'sender_formatted_address': '$sender.formatted_address',
'receiver_full_name': '$receiver.full_name',
'receiver_raw_address': '$receiver.raw_address',
'receiver_formatted_address': '$receiver.formatted_address',
'status_name': 1,
'team_value': 1,
'cs_name': 1,
'shipment_status_history': {
'$filter': {
'input': '$shipment_status_history',
'as': 'shipment',
'cond': {
'$and': [
{'$eq': ['$$shipment.fk_shipment_id', fk_user_id]},
{'$in': ['$$shipment.status_value', assigned_status]},
]
}
}
},
}
},
{
'$unwind': '$shipment_status_history'
},
{
'$sort': {
'shipment_status_history.rec_timestamp': -1,
}
},
{
'$group': {
'_id': '$pkey',
'doc': {
'$first': '$$CURRENT'
}
}
},
{
'$unwind': '$doc'
},
{ # last projection, I promise
'$project': {
'_id': '$doc.id',
'pkey': '$doc.pkey',
'_code': '$doc._code',
'date_created': '$doc.date_created',
'sender_full_name': '$doc.sender_full_name',
'sender_raw_address': '$doc.sender_raw_address',
'sender_formatted_address': '$doc.sender_formatted_address',
'receiver_full_name': '$doc.receiver_full_name',
'receiver_raw_address': '$doc.receiver_raw_address',
'receiver_formatted_address': '$doc.receiver_formatted_address',
'status_name': '$doc.status_name',
'team_value': '$doc.team_value',
'cs_name': '$doc.cs_name',
'fk_messenger_id': '$doc.shipment_status_history.fk_shipment_id'
}
},
]
res = db.db_shipment.aggregate(pipeline)

Categories