Trouble understanding mongo aggregation - python

I am trying to list all the virtual machines (vms) in my mongo database that use a certain data store, EMC_123.
I have this script, but it list vms that do not use the data store EMC_123.
#!/usr/bin/env python
import pprint
import pymongo
def run_query():
server = '127.0.0.1'
client = pymongo.MongoClient("mongodb://%s:27017/" % server)
db = client["data_center_test"]
collection = db["data_centers"]
pipeline = [
{ "$match": { "clusters.hosts.vms.data_stores.name" : "EMC_123"}},
{ "$group": { "_id" : "$clusters.hosts.vms.name" }}
]
for doc in list(db.data_centers.aggregate(pipeline)):
pp = pprint.PrettyPrinter()
pp.pprint(doc)
pp.pprint (db.command('aggregate', 'data_centers', pipeline=pipeline, explain=True))
def main():
run_query()
return 0
# Start program
if __name__ == "__main__":
main()
I assume I there is something wrong with my pipeline.
Here is the plan that gets printed out:
{u'ok': 1.0,
u'stages': [{u'$cursor': {u'fields': {u'_id': 0,
u'clusters.hosts.vms.name': 1},
u'query': {u'clusters.hosts.vms.data_stores.name': u'EMC_123'},
u'queryPlanner': {u'indexFilterSet': False,
u'namespace': u'data_center_test.data_centers',
u'parsedQuery': {u'clusters.hosts.vms.data_stores.name': {u'$eq': u'EMC_123'}},
u'plannerVersion': 1,
u'rejectedPlans': [],
u'winningPlan': {u'direction': u'forward',
u'filter': {u'clusters.hosts.vms.data_stores.name': {u'$eq': u'EMC_123'}},
u'stage': u'COLLSCAN'}}}},
{u'$group': {u'_id': u'$clusters.hosts.vms.name'}}]}
UPDATE:
Here is a skeleton of what the document looks like:
{
"name" : "data_center_name",
"clusters" : [
{
"hosts" : [
{
"name" : "esxi-hostname",
"vms" : [
{
"data_stores" : [ { "name" : "EMC_123" } ],
"name" : "vm-name1",
"networks" : [ { "name" : "vlan334" } ]
},
{
"data_stores" : [ { "name" : "some_other_data_store" } ],
"name" : "vm-name2",
"networks" : [ { "name" : "vlan334" } ]
}
]
}
],
"name" : "cluster_name"
}
]
}
The problem I am seeing is that vm-name2 shows up in the results when it doesn't have EMC_123 as a data store.
Upate 2:
ok I am able to write a mongo shell query that does what I want. It is a little ugly:
db.data_centers.aggregate({$unwind: '$clusters'}, {$unwind: '$clusters.hosts'}, {$unwind: '$clusters.hosts.vms'}, {$unwind: '$clusters.hosts.vms.data_stores'}, {$match: {"clusters.hosts.vms.data_stores.name": "EMC_123"}})
I came about this in the second answer of this SO question: MongoDB Projection of Nested Arrays

Based on the answers in MongoDB Projection of Nested Arrays I had to change my pipeline to this:
pipeline = [
{'$unwind': '$clusters'},
{'$unwind': '$clusters.hosts'},
{'$unwind': '$clusters.hosts.vms'},
{'$unwind': '$clusters.hosts.vms.data_stores'},
{'$match': {"clusters.hosts.vms.data_stores.name": "EMC_123"}}
]

Related

How to import JSON file with embedded array into Mongodb using Compass [duplicate]

I am working on MongoDB in python [pymongo]. I want to insert an array of multiple fields in a document. For example: In the below structure of a collection, I want to insert array of Places Visited in all documents. I do not know what it is called in the world of Mongo.So that I may insert it. How to insert an array in a document? Can some one help?
collectionName
{
"_id" : "4564345343",
"name": "Bunty",
"Basic Intro": "A.B.C.D.",
"Places Visited": [
"1" : "Palace of Dob",
"2" : "Palace of Victoria",
"3" : "Sahara Desert"
]
}
{
"_id" : "45657865745",
"name": "Humty",
"Basic Intro": "B.C.D.",
"Places Visited": [
"1" : "Palace of Pakistan",
"2" : "Palace of U.S.A."
"3" : "White House"
]
}
This should give you the idea how to do it
import pymongo
client = pymongo.MongoClient('yourHost', 30000) # adjust to your needs
db = client.so
coll = db.yourcollection
# show initial data
for doc in coll.find():
print(doc)
# update data
places_visited = [
"Palace of Dob",
"Palace of Victoria",
"Sahara Desert"
]
coll.update({}, { "$set": { "Places Visited": places_visited } }, multi=True)
# show updated data
for doc in coll.find():
print(doc)
which for your sample data should give output similar to this
daxaholic$ python3 main.py
{'name': 'Bunty', 'Basic Intro': 'A.B.C.D.', '_id': '4564345343'}
{'name': 'Humty', 'Basic Intro': 'B.C.D.', '_id': '45657865745'}
{'name': 'Bunty', 'Places Visited': ['Palace of Dob', 'Palace of Victoria', 'Sahara Desert'], 'Basic Intro': 'A.B.C.D.', '_id': '4564345343'}
{'name': 'Humty', 'Places Visited': ['Palace of Dob', 'Palace of Victoria', 'Sahara Desert'], 'Basic Intro': 'B.C.D.', '_id': '45657865745'}
For further information see the docs about update

MongoEngine not deleting all documents

I have a some unit tests which submit some info to a server which saves the info into a document in a mongo engine. At the end of the test, I want to delete all of the documents made by the test:
#router.delete("/all", summary="Delete all jobs in an organization")
async def delete_all_jobs(job_data: AuthorizedResource = Depends(CanActOnResource("delete", "jobs"))):
MongoJob.objects(organization=job_data.organization).delete()
However when I run this endpoint, some of the documents are only partially deleted:
This is what the JSON looks like before being deleted:
{
"_id" : "242d07ac-eafb-4875-a8f4-8ec89c7bc21f",
"_cls" : "MongoJob",
"_created_by" : "tom.mclean",
"_date_created" : ISODate("2022-02-24T08:23:50.943Z"),
"_date_modified" : ISODate("2022-02-24T08:25:02.062Z"),
"_modified_by" : "tom.mclean",
"client_info" : {
"protocol" : "tcp",
"interface" : "0.0.0.0",
"port" : 0
},
"grib_data" : {
"grib_dir_clim" : "X:\\Weather_Files\\Climatology",
"grib_dir_wind" : "X:\\Weather_Files\\NOAA_Forcasts",
"grib_dir_wave" : "X:\\Weather_Files\\NOAA_Forcasts"
},
"organization" : "8b50d3f2-03fe-4aca-9cf6-9922854f2989",
"output_dir" : "C:\\Users\\Tom.Mclean\\src\\routingserver\\WeatherRouting\\WeatherRouting\\..\\output",
"polars" : [
"5d19d7d0-eba2-49e5-8719-760d352d50dc"
],
"result" : {
"costs" : {
"total_cost" : null,
"fuel_cost" : null,
"hire_cost" : null
}
},
"route_form" : {
"waypoints" : [
{
"type" : "Waypoint",
"lon" : -7.25,
"lat" : 49.42,
"normal_deviation" : 0.2
},
{
"type" : "Waypoint",
"lon" : -50.0,
"lat" : 40.0,
"normal_deviation" : 0.0
}
],
"start_time" : ISODate("2022-02-24T08:23:50.842Z"),
"arrival_window" : {
"early" : null,
"late" : null
},
"max_tws" : 40.0,
"max_lat" : 65.0,
"min_lat" : -40.0,
"max_speed" : 16.0,
"min_speed" : 8.0,
"great_circle" : false,
"objective_funcs" : [
{
"hire_cost" : 16000.0,
"fuel_cost" : 550.0
}
],
"decision_time" : 24.0,
"course_change_angle" : 15.0,
"speed_step" : 0.5
},
"ship" : "f2775ef8-c58d-4aa3-a6a0-b82539535e88",
"status" : "FAILED",
"wave_data" : false
}
And then after running that end point of the API, some of the documents are deleted however some are left with just three fields:
{
"_id" : "5f04ffc3-45a3-4652-a79d-68b37e737268",
"_date_modified" : ISODate("2022-02-24T15:13:28.013Z"),
"status" : "FAILED"
}
If I run the unit tests in debug mode and pause on the line which calls the delete endpoint and then run it later on, it safely deletes all the documents:
#classmethod
def tearDownClass(cls) -> None:
# TODO Once jobs can be deleted, clear test jobs from the routing server
loop = asyncio.new_event_loop()
loop.run_until_complete(cls.oauth.get_new_access_token())
organization_path = cls.api._organization_path
pathname = f"{organization_path}/jobs/all"
loop.run_until_complete(cls.api.delete(pathname, token=cls.oauth.access_token)) <- PAUSE HERE
How can I safely ensure that all of the documents are deleted? I could add a pause to the unit test before calling the delete endpoint, but this does not feel right and I should just try and fix the issue first.

pymongo - Update a data and access the found value

I am trying to update a value of an array stored in a mongodb collection
any_collection: {
{
"_id": "asdw231231"
"values": [
{
"item" : "a"
},
{
"item" : "b"
}
],
"role": "role_one"
},
...many similar
}
the idea is that I want to access values ​​and edit a value with the following code that I found in the mongodb documentation
conn.any_collection.find_one_and_update(
{
"_id": any_id,
"values.item": "b"
},
{
"$set": {
"values.$.item": "new_value" # here the error, ".$."
}
}
)
This should work, but I can't understand what the error is or what is the correct syntax for pymongo. The error is generated when adding "$";
It works fine with my fastAPI.
#app.get("/find/{id}")
async def root(id: int):
db = get_database()
q = {'_id': 'asdw231231','values.item': 'b'}
u = {'$set': {'values.$.item': 'new_value' }}
c = db['any'].find_one_and_update(q, u)
return {"message": c}
mongoplayground

MongoDB Python MongoEngine - Returning Document by filter of Embedded Documents Sum of Filtered property

I am using Python and MongoEngine to try and query the below Document in MongoDB.
I need a query to efficiently get the Documents only when they contain Embedded Documents 'Keywords' that match the following criteria:
Keywords Filtered where the Property 'SFR' is LTE '100000'
SUM the filtered keywords
Return the parent documents where SUM of the keywords matching the criteria is Greater than '9'
Example structure:
{
"_id" : ObjectId("5eae60e4055ef0e717f06a50"),
"registered_data" : ISODate("2020-05-03T16:12:51.999+0000"),
"UniqueName" : "SomeUniqueNameHere",
"keywords" : [
{
"keyword" : "carport",
"search_volume" : NumberInt(10532),
"sfr" : NumberInt(20127),
"percent_contribution" : 6.47,
"competing_product_count" : NumberInt(997),
"avg_review_count" : NumberInt(143),
"avg_review_score" : 4.05,
"avg_price" : 331.77,
"exact_ppc_bid" : 3.44,
"broad_ppc_bid" : 2.98,
"exact_hsa_bid" : 8.33,
"broad_hsa_bid" : 9.29
},
{
"keyword" : "party tent",
"search_volume" : NumberInt(6944),
"sfr" : NumberInt(35970),
"percent_contribution" : 4.27,
"competing_product_count" : NumberInt(2000),
"avg_review_count" : NumberInt(216),
"avg_review_score" : 3.72,
"avg_price" : 210.16,
"exact_ppc_bid" : 1.13,
"broad_ppc_bid" : 0.55,
"exact_hsa_bid" : 9.66,
"broad_hsa_bid" : 8.29
}
]
}
From the research I have been doing, I believe an Aggregate type query might do what I am attempting.
Unfortunately, being new to MongoDB / MongoEngine I am struggling to figure out how to structure the query and have failed in finding an example similar to what I am attempting to do (RED FLAG RIGHT????).
I did find an example of a aggregate but unsure how to structure my criteria in it, maybe something like this is getting close but does not work.
pipeline = [
{
"$lte": {
"$sum" : {
"keywords" : {
"$lte": {
"keyword": 100000
}
}
}: 9
}
}
]
data = product.objects().aggregate(pipeline)
Any guidance would be greatly appreciated.
Thanks,
Ben
you can try something like this
db.collection.aggregate([
{
$project: { // the first project to filter the keywords array
registered_data: 1,
UniqueName: 1,
keywords: {
$filter: {
input: "$keywords",
as: "item",
cond: {
$lte: [
"$$item.sfr",
100000
]
}
}
}
}
},
{
$project: { // the second project to get the length of the keywords array
registered_data: 1,
UniqueName: 1,
keywords: 1,
keywordsLength: {
$size: "$keywords"
}
}
},
{
$match: { // then do the match
keywordsLength: {
$gte: 9
}
}
}
])
you can test it here Mongo Playground
hope it helps
Note, I used sfr property only from the keywords array for simplicity

PyMongo iterate through documents / arrays within documents, replace found values

I have the following data in Mongodb:
{ "_id" : 1, "items" : [ "apple", "orange", "plum" ] }
{ "_id" : 2, "items" : [ "orange", "apple", "pineapple" ] }
{ "_id" : 3, "items" : [ "cherry", "carrot", "apple" ] }
{ "_id" : 4, "items" : [ "sprouts", "pear", "lettuce" ] }
I am trying to make a function using Python / PyMongo that takes 2 arguments, an old and a new string. I would like to find all the "apple" in all of the arrays across all of the documents and replace them with the string "banana".
Below is the code I have so far:
def update(old, new):
for result in db.collection.find({"items" : old}):
for i in result["items"]:
if i == old:
db.collection.update({"_id": result["._id"]}, {"$set": {"items": new}})
Finally figured this out, it is actually really simple:
from pymongo import MongoClient
mongo_client = MongoClient(127.0.0.1, 27017)
db = mongo_client.my_databse
def update(old, new)
for result in db.collection.find({'items' : old}):
db.collection.update_one({ '_id': result['_id'], 'items': old}, { '$set' : {'items.$' : new}})
use update_many() to update multiple documents in one query instead of looping through the documents.
from pymongo import MongoClient
mongo_client = MongoClient(127.0.0.1, 27017)
db = mongo_client.my_databse
def update(old, new)
db.collection.update_many({'items': old}, { '$set' : {'items.$' : new}})
For pymongo less than 3.2 use update with multi flag
def update(old, new)
db.collection.update({'items': old}, { '$set' : {'items.$' : new}}, multi=True)

Categories