pymongo get id for collection - python

i have this code:
def get_attribute_colour(colour_code):
attribute_colour_meta = db.attributes.aggregate([{ '$match': {"name.en-UK": "Colour"} },
{ '$unwind' : "$values" },
{ '$project': { "code" : "$values.code", "valueId": "$values._id"} },
{ '$match': {"code": colour_code} }])
return attribute_colour_meta['result']
that looks up a collection called attributes, which has the following structure:
> db.attributes.find({}).pretty();
{
"_id" : ObjectId("53b27bded901f26432996e00"),
"values" : [
{
"code" : "AQ",
"pmsCode" : "638c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "16-4529 TCX",
"hexCode" : "#00aed8",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53b27bded901f26432996d83")
},
{
"code" : "AQ",
"pmsCode" : "3115c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "",
"hexCode" : "#00c4db",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53b27bded901f26432996d84")
},
.....
}
],
"name" : {
"en-UK" : "Colour"
}
}
{
"_id" : ObjectId("53b27bded901f26432996e1b"),
"values" : [
{
"code" : 0,
"_id" : ObjectId("53b27bded901f26432996e01"),
"name" : {
"en-UK" : "0-3 MTHS"
}
},
.....
}
],
"name" : {
"en-UK" : "Size"
}
}
{
"_id" : ObjectId("53b27bded901f26432996e28"),
"values" : [
{
"Currency" : "GBP",
"_id" : ObjectId("53b27bded901f26432996e1c"),
"name" : {
"en-UK" : "Carton price list"
}
},
}
],
"name" : {
"en-UK" : "Price list"
}
}
>
basically, there are 3 attributes, colour, size and price list, each of which has sub-documents called values
in my def get_attribute_colour function, how do i return the _id for the attribute within the results, so that i get something like:
{ attributeId: ObjectId("53b27bded901f26432996e00"),
valueId: ObjectId("53b27bded901f26432996d83") }
the result does return the _id:
[{u'code': u'AQ', u'_id': ObjectId('53b27bded901f26432996e00'), u'valueId': ObjectId('53b27bded901f26432996d83')}]
but i don't see where this is specified?
any advice much appreciated.

Related

Group an elasticsearch query with similar field and fetch both documents but together

I am new to elasticsearch and trying to make queries.
I have an index where among other fields two fields are Sno and request_sno.
I want to make query where document/row with certain Sno should be followed by doc/row which has request_sno exactly same for previous Sno.
example,
Sno:1, name:'a', address:'b',..., request_sno:''
Sno:2, name:'', address:'',...., request_sno:1
These two should come together one row filled by other.
At first I thought of group by but I don't want aggregation.
Any help will be highly appreciable.
You can resolve this usecase with collapse functionality of Elasticsearch.
For implement Collapse, your document should have one unique field. So lets create field call collpase_id which have value of request_sno and if request_sno is empty or null then copy value of sno field.
So your final document will be look like this:
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "kNNEbH4Bb7CAaZKC_vAY",
"_score" : 1.0,
"_source" : {
"sno" : 1,
"name" : "a",
"address" : "b",
"collapse_id" : 1
}
},
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "kdNFbH4Bb7CAaZKC-PBM",
"_score" : 1.0,
"_source" : {
"sno" : 2,
"name" : "a",
"address" : "b",
"request_sno" : 1,
"collapse_id" : 1
}
You can use below query to get collpase result
POST collapse/_search
{
"_source": false,
"query": {
"match_all": {}
},
"collapse": {
"field": "collapse_id",
"inner_hits": {
"name": "sno_reqsno_match",
"size": 10
}
}
}
your result will look like below:
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "kNNEbH4Bb7CAaZKC_vAY",
"_score" : 1.0,
"fields" : {
"collapse_id" : [
1
]
},
"inner_hits" : {
"sno_reqsno_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "kNNEbH4Bb7CAaZKC_vAY",
"_score" : 1.0,
"_source" : {
"sno" : 1,
"name" : "a",
"address" : "b",
"collapse_id" : 1
}
},
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "kdNFbH4Bb7CAaZKC-PBM",
"_score" : 1.0,
"_source" : {
"sno" : 2,
"name" : "a",
"address" : "b",
"request_sno" : 1,
"collapse_id" : 1
}
}
]
}
}
}
},
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "ktNNbH4Bb7CAaZKC5PC8",
"_score" : 1.0,
"fields" : {
"collapse_id" : [
3
]
},
"inner_hits" : {
"sno_reqsno_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "ktNNbH4Bb7CAaZKC5PC8",
"_score" : 1.0,
"_source" : {
"sno" : 3,
"name" : "a",
"address" : "b",
"collapse_id" : 3
}
},
{
"_index" : "collapse",
"_type" : "_doc",
"_id" : "k9NObH4Bb7CAaZKCGfAc",
"_score" : 1.0,
"_source" : {
"sno" : 4,
"name" : "a",
"address" : "b",
"request_sno" : 3,
"collapse_id" : 3
}
}
]
}
}
}
}
]
}

KeyError Pandas

I am trying to read a nested JSON using json_normalize method of Pandas. I am trying to use one of the fields as the record_path. I have also included the errors = 'ignore' to ignore any errors due to missing key. Can you please help me with what am I doing wrong here?
Here is the JSON -
{
"_id" : "31aa9894-6a43-40f9-8911-116c14c42636",
"message" : {
"serviceOperationName" : "/logUserEvents/event",
"accountNumber" : "1234",
"userId" : null,
"market" : null,
"extract" : {
"request" : {
"USER_EVENT_LOGGING" : {
"payload" : [
{
"eventType" : "audibleSummaryUsage",
"ntid" : "abc",
"accountNumber" : "Not Found",
"workOrderNumber" : "",
"data" : [
{
"name" : "userAction",
"value" : "DISMISSED"
},
{
"name" : "employeeTenure",
"value" : "3.9"
},
{
"name" : "ffc",
"value" : "1234"
},
{
"name" : "ntid",
"value" : "abcd"
},
{
"name" : "isAccountView",
"value" : "true"
},
{
"name" : "userAction",
"value" : "DISMISSED"
},
{
"name" : "title",
"value" : "abcd"
},
{
"name" : "jobType",
"value" : ""
},
{
"name" : "jobClassCd",
"value" : ""
}
]
}
]
}
},
"response" : {}
},
"#timestamp" : "2021-02-18T05:38:48.00269Z",
"eventKeys" : [
"USER_EVENT_LOGGING"
],
"requestStartTimestampText" : "2021-02-18T05:38:48.268Z"
},
"createdOn" : ISODate("2021-02-18T05:38:48.269Z")
}
/* 2 */
{
"_id" : "4189da82-299d-4a9e-8f10-ddb5da9b97b5",
"message" : {
"serviceOperationName" : "/logUserEvents/event",
"accountNumber" : "7890",
"userId" : null,
"market" : null,
"extract" : {
"request" : {
"USER_EVENT_LOGGING" : {
"payload" : [
{
"eventType" : "audibleSummaryUsage",
"ntid" : "defg",
"accountNumber" : "Not Found",
"workOrderNumber" : "",
"data" : [
{
"name" : "userAction",
"value" : "DISMISSED"
},
{
"name" : "userAction",
"value" : "DISMISSED"
},
{
"name" : "employeeTenure",
"value" : "3.9"
},
{
"name" : "jobType",
"value" : ""
},
{
"name" : "jobClassCd",
"value" : ""
},
{
"name" : "ntid",
"value" : "dfer"
},
{
"name" : "ffc",
"value" : "3456"
},
{
"name" : "title",
"value" : "erty"
},
{
"name" : "isAccountView",
"value" : "true"
}
]
}
]
}
},
"response" : {}
},
"#timestamp" : "2021-02-18T05:39:11.00659Z",
"eventKeys" : [
"USER_EVENT_LOGGING"
],
"requestStartTimestampText" : "2021-02-18T05:39:11.658Z"
},
"createdOn" : ISODate("2021-02-18T05:39:11.659Z")
}
Here is the code -
db = mongo_client.conciselogs
col = db.logs
cursor = col.find({"message.extract.request.USER_EVENT_LOGGING.payload.eventType":"audibleSummaryUsage"})
mongo_docs = list(cursor)
df = pd.json_normalize(mongo_docs, ['message.extract.request.USER_EVENT_LOGGING.payload.data'], errors = 'ignore')
df.to_csv('sample_data0220_3.csv', index=False)```
Your record_path argument is incorrect, it should be a list:
df = pd.json_normalize(
mongo_docs,
['message', 'extract', 'request', 'USER_EVENT_LOGGING', 'payload', 'data'], # list, not 'key.key.key'
errors='ignore',
)
df.to_csv('sample_data0220_3.csv', index=False)
Output:
name,value
userAction,DISMISSED
employeeTenure,3.9
ffc,1234
ntid,abcd
isAccountView,true
userAction,DISMISSED
title,abcd
jobType,
jobClassCd,
userAction,DISMISSED
userAction,DISMISSED
employeeTenure,3.9
jobType,
jobClassCd,
ntid,dfer
ffc,3456
title,erty
isAccountView,true

MongoDB query with projection without nested field that has no fixed parent name

I'm trying to retrieve some documents from MongoDB through a query using projection.
The document looks something like this:
{
"_id": "01",
"country": "EUA",
"created": "2020-09-10T18:12:20.649Z",
"products": {
"0001": {
"id": "0001",
"price": "1.25",
"timestamp": "16004443546",
"class": "com.website.ecommerce.src.main.java.model.product"
},
"0123": {
"id": "0123",
"price": "1.50",
"timestamp": "16004443546",
"class": "com.website.ecommerce.src.main.java.model.product"
},
"0443": {
"id": "00443",
"price": "1.75",
"timestamp": "16004443546",
"class": "com.website.ecommerce.src.main.java.model.product"
}
}
}
I don't need the "class" field to be retrieved, so given a query with 10k+ results, this field represents a big part of the response size.
collection.find({'_id': some_id}, {'products.*._class': 0 })
My guess is that there's some kind of wildcard character will do the job, but I'm unabled to find.
I tried: , $, $, $**, **
but no success.
//actual code output from mongoshell 4.2.6 on windows
//prepare the document in a collection called eua, as given in problem statement
> db.eua.find().pretty();
{
"_id" : "01",
"country" : "EUA",
"created" : "2020-09-10T18:12:20.649Z",
"products" : {
"0001" : {
"id" : "0001",
"price" : "1.25",
"timestamp" : "16004443546",
"class" : "com.website.ecommerce.src.main.java.model.product"
},
"0123" : {
"id" : "0123",
"price" : "1.50",
"timestamp" : "16004443546",
"class" : "com.website.ecommerce.src.main.java.model.product"
},
"0443" : {
"id" : "00443",
"price" : "1.75",
"timestamp" : "16004443546",
"class" : "com.website.ecommerce.src.main.java.model.product"
}
}
}
//use aggreate project command to first convert object to array in first stage
// use the project in 2nd stage to hide the class field
// reconvert back to original array to object with required fields marked as 1
> db.eua.aggregate([
...
... {
... $project: {
... _id: 1,
... country: 1,
... created: 1,
... prodToArray: {
... $objectToArray: "$products"
... }
... }
... },
... {
... $project: {
... "prodToArray.v.class": 0
... }
... },
... {
... $project:{
... _id: 1,
... country: 1,
... created: 1,
... products:{
... $arrayToObject:"$prodToArray"
... }
... }
... }
... ]).pretty()
{
"_id" : "01",
"country" : "EUA",
"created" : "2020-09-10T18:12:20.649Z",
"products" : {
"0001" : {
"id" : "0001",
"price" : "1.25",
"timestamp" : "16004443546"
},
"0123" : {
"id" : "0123",
"price" : "1.50",
"timestamp" : "16004443546"
},
"0443" : {
"id" : "00443",
"price" : "1.75",
"timestamp" : "16004443546"
}
}
}
>

Matching / Mapping lists with elasticsearch

There is a list in mongodb,
eg:
db_name = "Test"
collection_name = "Map"
db.Map.findOne()
{
"_id" : ObjectId(...),
"Id" : "576",
"FirstName" : "xyz",
"LastName" : "abc",
"skills" : [
"C++",
"Java",
"Python",
"MongoDB",
]
}
There is a list in elastcisearch index (I am using kibana to execute queries)
GET /user/_search
{
"took" : 31,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 7,
"max_score" : 1.0,
"hits" : [
{
"_index" : "customer",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"name" : "xyz abc"
"Age" : 21,
"skills" : [
"C++",
"Java",
"Python",
"MongoDB",
]
}
},
]
}
}
Can anyone help with the elasticsearch query that will match both the records based on skills.
I am using python to write the code
If a match is found, I am trying to get the first name and last name of that user
First name : "xyz"
Last name : "abc"
Assuming you are indexing all the document in elastic and of these you want to match documents where skills has both java and mongodb the query will be as:
{
"query": {
"bool": {
"filter": [
{
"term": {
"skills": "mongodb"
}
},
{
"term": {
"skills": "java"
}
}
]
}
}
}

pymongo extract objectID from find

hello i have the following mongodb collection:
> db.attributes.find().pretty()
{
"_id" : ObjectId("53a4445fd901f278f8685b91"),
"values" : [
{
"code" : "AQ",
"pmsCode" : "638c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "16-4529 TCX",
"hexCode" : "#00aed8",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53a4445fd901f278f8685b17")
},
{
"code" : "AQ",
"pmsCode" : "3115c",
"name" : {
"en-UK" : "Aqua"
},
"tcxCode" : "",
"hexCode" : "#00c4db",
"images" : [
"AQ.jpg"
],
"_id" : ObjectId("53a4445fd901f278f8685b18")
}],
"name" : {
"en-UK" : "Colour"
}
}
{
"_id" : ObjectId("53a4445fd901f278f8685bac"),
"values" : [
{
"code" : 0,
"_id" : ObjectId("53a4445fd901f278f8685b92"),
"name" : {
"en-UK" : "0-3 MTHS"
}
}, {
"code" : 0,
"_id" : ObjectId("53a4445fd901f278f8685b93"),
"name" : {
"en-UK" : "ONE SIZE"
}
}
,
"name" : {
"en-UK" : "Size"
}
}
basically a collection that has two object Colour and Size which have sub-objects called values
what is the correct way to find the ObjectId for specific Colour values code using pymongo?
I have this attribute_id = attributes.find({"values.code": product_color_code}) but how do i extract the actual ObjectID from this?
any advise much appreciated.
u can try using select _id from table_name GROUP BY _id HAVING some_condition_on_colout methods of SQL
in mongodb using python, you can do the following
ideas.aggregate([
{"$match": {'colour':'some_Color_of_ur_choice' }},
{'$group':{'_id': "$_id",'count':{"$sum": 1 } }}
])
this will help u even count no. of occurrences of colours according to the ObjectIds

Categories