generate unique id in nested document - Pymongo - python

generate unique id in nested document - Pymongo
my database looks like this...
{
"_id":"5ea661d6213894a6082af6d1",
"blog_id":"blog_one",
"comments": [
{
"user_id":"1",
"comment":"comment for blog one this is good"
},
{
"user_id":"2",
"comment":"other for blog one"
},
]
}
I want to add unique id in each and every comment,
I want it to output like this,
{
"_id":"5ea661d6213894a6082af6d1",
"blog_id":"blog_one",
"comments": [
{
"id" : "something" (auto generate unique),
"user_id":"1",
"comment":"comment for blog one this is good"
},
{
"id" : "something" (auto generate unique),
"user_id":"2",
"comment":"other for blog one"
},
]
}
I'm using PyMongo, is there a way to update this kind of document?
it's possible or not?

This update will add an unique id value to each of the comments array with nested documents. The id value is calculated based upon the present time as milliseconds. This value is incremented for each array element to get the new id value for the nested documents of the array.
The code runs with MongoDB version 4.2 and PyMongo 3.10.
pipeline = [
{
"$set": {
"comments": {
"$map": {
"input": { "$range": [ 0, { "$size": "$comments" } ] },
"in": {
"$mergeObjects": [
{ "id": { "$add": [ { "$toLong" : datetime.datetime.now() }, "$$this" ] } },
{ "$arrayElemAt": [ "$comments", "$$this" ] }
]
}
}
}
}
}
]
collection.update_one( { }, pipeline )
The updated document:
{
"_id" : "5ea661d6213894a6082af6d1",
"blog_id" : "blog_one",
"comments" : [
{
"id" : NumberLong("1588179349566"),
"user_id" : "1",
"comment" : "comment for blog one this is good"
},
{
"id" : NumberLong("1588179349567"),
"user_id" : "2",
"comment" : "other for blog one"
}
]
}
[ EDIT ADD ]
The following works from mongo shell. It adds unique id for the comments array's nested documents - unique across the documents.
db.collection.aggregate( [
{
"$unwind": "$comments" },
{
"$group": {
"_id": null,
"count": { "$sum": 1 },
"docs": { "$push": "$$ROOT" },
"now": { $first: "$$NOW" }
}
},
{
"$addFields": {
"docs": {
"$map": {
"input": { "$range": [ 0, "$count" ] },
"in": {
"$mergeObjects": [
{ "comments_id": { "$add": [ { "$toLong" : "$now" }, "$$this" ] } },
{ "$arrayElemAt": [ "$docs", "$$this" ] }
]
}
}
}
}
},
{
"$unwind": "$docs"
},
{
"$addFields": {
"docs.comments.comments_id": "$docs.comments_id"
}
},
{
"$replaceRoot": { "newRoot": "$docs" }
},
{
"$group": {
"_id": { "_id": "$_id", "blog_id": "$blog_id" },
"comments": { "$push": "$comments" }
}
},
{
$project: {
"_id": 0,
"_id": "$_id._id",
"blog_id": "$_id.blog_id",
"comments": 1
}
}
] ).forEach(doc => db.blogs.updateOne( { _id: doc._id }, { $set: { comments: doc.comments } } ) )

You can use ObjectId constructor to create the ids and place them in your nested documents.

Related

Select mongo documents whose subdoc array has duplicate field?

With a schema like this
{
"doc1": {
"items": [
{
"item_id": 1
},
{
"item_id": 2
},
{
"item_id": 3
},
]
},
"doc2": {
"items": [
{
"item_id": 1
},
{
"item_id": 2
},
{
"item_id": 1
},
]
}
}
I want to query for documents that contain a duplicate item in their items array field. A duplicate means items with the same item_id field.
So the result for the example above should return doc2 only, because it has two items with the same item_id
Something like this?
qry = {
"items": {
"$size": {
"$ne": {
"items.unique_count" # obviously this doesn't exist, not sure how to do it
}
}
}
}
result = MyDocument.find(qry)
One option similar to #rickhg12hs and your suggestions is:
db.collection.aggregate([
{$match: {
$expr: {
$ne: [
{$size: "$items"},
{$size: {
$reduce: {
input: "$items",
initialValue: [],
in: {$setUnion: ["$$value", ["$$this.item_id"]]}
}
}
}
]
}
}
}
])
See how it works on the playground example

Aggregation $match within a $sum

I was wondering if it was possible to somehow use the $match operator within the $sum function for aggregation.
{ "$unwind": "$info.avatarInfoList" },
{ "$unwind": "$info.avatarInfoList.equipList" },
{ "$unwind": "$info.avatarInfoList.equipList.flat.reliquarySubstats" },
{
"$project": {
"name" : "$name",
"character" : "$info.avatarInfoList.avatarId",
"artifact" : "$info.avatarInfoList.equipList.itemId",
"statValue" : {
"$sum": [
{"$match" : { "$info.avatarInfoList.equipList.flat.reliquarySubstats.appendPropId" : "FIGHT_PROP_CRITICAL_HURT" } },
{"$multiply": [2, {"$match" : { "$info.avatarInfoList.equipList.flat.reliquarySubstats.appendPropId" : "FIGHT_PROP_CRITICAL" } }]}
]
},
}
},
{ "$sort": { "statValue": -1 }},
{ '$limit' : 30 }
]).to_list(length=None)
print(data)
I want to be able to use the value of the $sum operator within the project fields somehow, I just don't really understand what the right approach would be for this.
Sample Input (may be too long):
https://www.toptal.com/developers/hastebin/ixamekaxoq.json
Sample Output:
( 2 * FIGHT_PROP_CRITICAL ) + FIGHT_PROP_CRITICAL_HURT sorted from highest to lowest for each item.
{name: hat, character: Slayer, artifact: 13, statValue : 25.6}
There are still a few ambiguities about how you want to aggregate your data, but using the full document from your link, here's one way to produce the output you want.
N.B.: Weapons in the "equipList" don't have "reliquarySubstats" so they show a "statValue" of null in the output.
db.collection.aggregate([
{"$unwind": "$info.avatarInfoList"},
{"$unwind": "$info.avatarInfoList.equipList"},
{
"$project": {
"_id": 0,
"name": 1,
"character": "$info.avatarInfoList.avatarId",
"artifact": "$info.avatarInfoList.equipList.itemId",
"statValue": {
"$reduce": {
"input": "$info.avatarInfoList.equipList.flat.reliquarySubstats",
"initialValue": 0,
"in": {
"$switch": {
"branches": [
{
"case": {"$eq": ["$$this.appendPropId", "FIGHT_PROP_CRITICAL"]},
"then": {
"$add": [
"$$value",
{"$multiply": [2, "$$this.statValue"]}
]
}
},
{
"case": {"$eq": ["$$this.appendPropId", "FIGHT_PROP_CRITICAL_HURT"]},
"then": {"$add": ["$$value", "$$this.statValue"]}
}
],
"default": "$$value"
}
}
}
}
}
},
{"$sort": {"statValue": -1}}
])
Try it on mongoplayground.net.
It's not quite clear what you want to achieve, but as mentioned you want to be using $cond here.
like so:
{
"$project": {
"statValue": {
"$sum": [
{
$cond: [
{ // if this condition is true (prop id = prop critical hurt )
$eq: [
"$info.avatarInfoList.equipList.flat.reliquarySubstats.appendPropId",
"FIGHT_PROP_CRITICAL_HURT"
]
},
{ // then use this value for the "$sum"
"$multiply": [
2,
"$info.avatarInfoList.equipList.flat.reliquarySubstats.statValue"
]
},
0 // otherwise use this value for the sum.
]
}
]
}
}
Mongo Playground

MongoDB elemMatch in lookup pipeline?

I have a document that references another document, and I'd like to join these documents and filter based on the contents of an array in the child document:
deployment_machine document:
{
"_id": 1,
"name": "Test Machine",
"machine_status": 10,
"active": true
}
machine_status document:
{
"_id": 10,
"breakdown": [
{
"status_name": "Rollout",
"state": "complete"
},
{
"status_name": "Deploying",
"state": "complete"
}
]
}
I'm using Mongo 3.6 and am having mixed success with the lookup and pipeline, heres the object I'm using in the python MongoEngine being passed to the aggregate function:
pipeline = [
{'$match': {'breakdown': {'$elemMatch': {'status_name': 'Rollout'}}}},
{'$lookup':
{
'from': 'deployment_machine',
'let': {'status_id': '$_id'},
'pipeline': [
{'$match':
{'$expr':
{'$and': [
{'$eq': ['$machine_status', '$$status_id']},
]},
}
}
],
'as': 'result',
},
},
{'$project': {
'breakdown': {'$filter': {
'input': '$breakdown',
'as': 'breakdown',
'cond': {'$eq': ['$$breakdown.status_name', 'Rollout']}
}}
}},
]
result = list(MachineStatus.objects.aggregate(*pipeline))
This works well, but how can I exclude results where the Deployment Machine isn't active? I feel it must go in the project but can't find a condition that works. Any help appreciated.
You can add more condition in $lookup pipeline
pipeline = [
{ $match: { breakdown: { $elemMatch: { status_name: "Rollout" } } } },
{
$lookup: {
from: "deployment_machine",
let: { status_id: "$_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$machine_status", "$$status_id"] },
active: false
}
}
],
as: "result",
}
},
{
$project: {
breakdown: {
$filter: {
input: "$breakdown",
as: "breakdown",
cond: { $eq: ["$$breakdown.status_name", "Rollout"] },
}
}
}
}
];

Filter MongoDB query to find documents only if a field in a list of objects is not empty

I have a MongoDB document structure like following:
Structure
{
"stores": [
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": [],
"item_category": "101",
"item_id": "11"
}
]
},
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
},
{
"feedback": [],
"item_category": "101",
"item_id": "12"
},
{
"feedback": [],
"item_category": "102",
"item_id": "13"
},
{
"feedback": [],
"item_category": "102",
"item_id": "14"
}
],
"store_id": 500
}
]
}
This is a single document in a collection. Some field are deleted to produce minimal representation of the data.
What I want is to get items only if the feedback field in the items array is not empty. The expected result is:
Expected result
{
"stores": [
{
"items": [
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
}
],
"store_id": 500
}
]
}
This is what I tried based on examples in this, which I think pretty same situation, but it didn't work. What's wrong with my query, isn't it the same situation in zipcode search example in the link? It returns everything like in the first JSON code, Structure:
What I tried
query = {
'date': {'$gte': since, '$lte': until},
'stores.items': {"$elemMatch": {"feedback": {"$ne": []}}}
}
Thanks.
Please try this :
db.yourCollectionName.aggregate([
{ $match: { 'date': { '$gte': since, '$lte': until }, 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores' },
{ $match: { 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores.items' },
{ $match: { 'stores.items.feedback': { "$ne": [] } } },
{ $group: { _id: { _id: '$_id', store_id: '$stores.store_id' }, items: { $push: '$stores.items' } } },
{ $project: { _id: '$_id._id', store_id: '$_id.store_id', items: 1 } },
{ $group: { _id: '$_id', stores: { $push: '$$ROOT' } } },
{ $project: { 'stores._id': 0 } }
])
We've all these stages as you need to operate on an array of arrays, this query is written assuming you're dealing with a large set of data, Since you're filtering on dates just in case if your documents size is way less after first $match then you can avoid following $match stage which is in between two $unwind's.
Ref 's :
$match,
$unwind,
$project,
$group
This aggregate query gets the needed result (using the provided sample document and run from the mongo shell):
db.stores.aggregate( [
{ $unwind: "$stores" },
{ $unwind: "$stores.items" },
{ $addFields: { feedbackExists: { $gt: [ { $size: "$stores.items.feedback" }, 0 ] } } },
{ $match: { feedbackExists: true } },
{ $project: { _id: 0, feedbackExists: 0 } }
] )

Elastic Search nested object query

I have a elastic search index collection like below,
"_index":"test",
"_type":"abc",
"_source":{
"file_name":"xyz.ex"
"metadata":{
"format":".ex"
"profile":[
{"date_value" : "2018-05-30T00:00:00",
"key_id" : "1",
"type" : "date",
"value" : [ "30-05-2018" ]
},
{
"key_id" : "2",
"type" : "freetext",
"value" : [ "New york" ]
}
}
Now I need to search for document by matching key_id to its value. (key_id is some field whose value is stored in "value")
Ex. For key_id='1'field, if it's value = "30-05-2018" it should match the above document.
I tried mapping this as a nested object, But I am not able to write query to search with 2 or more key_id matching its respective value.
This is how I would do it. You need to AND together via bool/filter (or bool/must) two nested queries for each of the condition pair, since you want to match two different nested elements from the same parent document.
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "metadata.profile",
"query": {
"bool": {
"filter": [
{
"term": {
"metadata.profile.f1": "a"
}
},
{
"term": {
"metadata.profile.f2": true
}
}
]
}
}
}
},
{
"nested": {
"path": "metadata.profile",
"query": {
"bool": {
"filter": [
{
"term": {
"metadata.profile.f1": "b"
}
},
{
"term": {
"metadata.profile.f2": false
}
}
]
}
}
}
}
]
}
}
}

Categories