Update dictionary with nested dictionary - python

I have this json file used to list material by ref -> color and size:
{
"base": {
"ref": {
"3021000": {
"color": {
"bleu azur": {
"size": {
"01": "3021000-80300-01",
"13": "3021000-80300-13",
"12": "3021000-80300-12",
"36": "3021000-80300-36"
}
}
}
}
},
"customer_ref": {}
}
}
With a program I will load the json as a dict and search the dict to try and find the full ref corresponding to the value of a size (the full ref for the material 3021000 bleu azur 01 is 3021000-80300-01
It's working like a charm, but now, if I have a material with: ref=3021000, color=blanc and size=01, it doesn't exist in the dict, so I would like to insert the missing key - value: {"blanc": {"size": {"01": "corresponding full ref"}}}
I tried this:
ref = "3021000"
color = "blanc"
size = "01"
full_ref = "corresponding full ref"
missing_data = {ref: {"color": {color: {"size": {size: full_ref}}}}}
data["base"]["ref"] = missing_data
but it overwrite the dictionary; what I would like is to update the dict, not overwrite it.

How about this?
import json
d = {
"base": {
"ref": {
"3021000": {
"color": {
"bleu azur": {
"size": {
"01": "3021000-80300-01",
"13": "3021000-80300-13",
"12": "3021000-80300-12",
"36": "3021000-80300-36"
}
}
}
}
},
"customer_ref": {}
}
}
ref = "3021000"
color = "blanc"
size = "01"
full_ref = "corresponding full ref"
missing_data = {color: {"size": {size: full_ref}}}
d["base"]["ref"][ref]["color"].update(missing_data)
print(json.dumps(d, indent=2))
Output:
{
"base": {
"ref": {
"3021000": {
"color": {
"bleu azur": {
"size": {
"01": "3021000-80300-01",
"13": "3021000-80300-13",
"12": "3021000-80300-12",
"36": "3021000-80300-36"
}
},
"blanc": {
"size": {
"01": "corresponding full ref"
}
}
}
}
},
"customer_ref": {}
}
}

Related

Sort an array by occurances mongodb

Is it possible to sort an array by occurrences?
For Example, given
{
"_id": {
"$oid": "60d20d342c7951852a21s53a"
},
"site": "www.xyz.ie",
"A": ["mary", "jamie", "john", "mary", "mary", "john"],
}
return
{
"_id": {
"$oid": "60d20d342c7951852a21s53a"
},
"site": "www.xyz.ie",
"A": ["mary", "jamie", "john", "mary", "mary", "john"],
"sorted_A" : ["mary","john","jamie"]
}
I am able to get it most of the way there but I cannot figure out how to join them all back together in an array.
I have been using an aggregation pipeline
Starting with $match to find the site I want
Then $unwind on with path: "$A"
Next $sortByCount on "$A"
???? I can't figure out how to group it all back together.
Here is the pipeline:
[
{
'$match': {
'site': 'www.xyz.ie'
}
}, {
'$unwind': {
'path': '$A'
}
}, {
'$sortByCount': '$A'
}, {
????
}
]
$group nu _id and A, get first site and count total elements
$sort by count in descending order
$group by only _id and get first site, and construct array of A
[
{ $match: { site: "www.xyz.ie" } },
{ $unwind: "$A" },
{
$group: {
_id: { _id: "$_id", A: "$A" },
site: { $first: "$site" },
count: { $sum: 1 }
}
},
{ $sort: { count: -1 } },
{
$group: {
_id: "$_id._id",
site: { $first: "$site" },
A: { $push: "$_id.A" }
}
}
]
Playground

MongoDB Aggregation Attribute Pattern Pipeline/Query

I have Attribute Patterned (https://www.mongodb.com/blog/post/building-with-patterns-the-attribute-pattern) field that looks like this:
"cmr_diag": [{
"name": "shd?",
"value": {
"$numberDouble": "1"
}
}, {
"name": "ischemic_hd",
"value": {
"$numberDouble": "1"
}
}, {
"name": "non-ischemic_dcmp",
"value": {
"$numberDouble": "1"
}
}, {
"name": "myocarditis",
"value": {
"$numberDouble": "0"
}
}, {
"name": "hcm",
"value": {
"$numberDouble": "0"
}
}, {
"name": "amyloidosis",
"value": {
"$numberDouble": "0"
}
}, {
"name": "toxic_cmp",
"value": {
"$numberDouble": "1"
}
.
.
.
I'd like to create an aggregation pipeline that finds all patients with ONLY ischemic_hd, while all other possible illnesses are 0. I am not sure how to create this query however?
You can use $elemMatch to identify patients with a specific attribute.
If you want to exclude everything else, use $reduce to sum up the value of all of the attributes, and match where count = 1.
db.collection.aggregate([
{$match: {
cmr_diag: {
$elemMatch: {
name: "ischemic_hd",
value: { "$numberDouble": "1" }
}
}
}},
{$addFields: {
diagcount: {
$reduce: {
input: "$cmr_diag",
initialValue: 0,
in: {$sum: ["$$value","$$this.value.$numberDouble"]}
}
}
}},
{$match: { diagcount: 1}}
])

Filter MongoDB query to find documents only if a field in a list of objects is not empty

I have a MongoDB document structure like following:
Structure
{
"stores": [
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": [],
"item_category": "101",
"item_id": "11"
}
]
},
{
"items": [
{
"feedback": [],
"item_category": "101",
"item_id": "10"
},
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
},
{
"feedback": [],
"item_category": "101",
"item_id": "12"
},
{
"feedback": [],
"item_category": "102",
"item_id": "13"
},
{
"feedback": [],
"item_category": "102",
"item_id": "14"
}
],
"store_id": 500
}
]
}
This is a single document in a collection. Some field are deleted to produce minimal representation of the data.
What I want is to get items only if the feedback field in the items array is not empty. The expected result is:
Expected result
{
"stores": [
{
"items": [
{
"feedback": ["A feedback"],
"item_category": "101",
"item_id": "11"
}
],
"store_id": 500
}
]
}
This is what I tried based on examples in this, which I think pretty same situation, but it didn't work. What's wrong with my query, isn't it the same situation in zipcode search example in the link? It returns everything like in the first JSON code, Structure:
What I tried
query = {
'date': {'$gte': since, '$lte': until},
'stores.items': {"$elemMatch": {"feedback": {"$ne": []}}}
}
Thanks.
Please try this :
db.yourCollectionName.aggregate([
{ $match: { 'date': { '$gte': since, '$lte': until }, 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores' },
{ $match: { 'stores.items': { "$elemMatch": { "feedback": { "$ne": [] } } } } },
{ $unwind: '$stores.items' },
{ $match: { 'stores.items.feedback': { "$ne": [] } } },
{ $group: { _id: { _id: '$_id', store_id: '$stores.store_id' }, items: { $push: '$stores.items' } } },
{ $project: { _id: '$_id._id', store_id: '$_id.store_id', items: 1 } },
{ $group: { _id: '$_id', stores: { $push: '$$ROOT' } } },
{ $project: { 'stores._id': 0 } }
])
We've all these stages as you need to operate on an array of arrays, this query is written assuming you're dealing with a large set of data, Since you're filtering on dates just in case if your documents size is way less after first $match then you can avoid following $match stage which is in between two $unwind's.
Ref 's :
$match,
$unwind,
$project,
$group
This aggregate query gets the needed result (using the provided sample document and run from the mongo shell):
db.stores.aggregate( [
{ $unwind: "$stores" },
{ $unwind: "$stores.items" },
{ $addFields: { feedbackExists: { $gt: [ { $size: "$stores.items.feedback" }, 0 ] } } },
{ $match: { feedbackExists: true } },
{ $project: { _id: 0, feedbackExists: 0 } }
] )

Create dynamic json object in python

I have a dictionary which is contain multiple keys and values and the values also contain the key, value pair. I am not getting how to create dynamic json using this dictionary in python. Here's the dictionary:
image_dict = {"IMAGE_1":{"img0":"IMAGE_2","img1":"IMAGE_3","img2":"IMAGE_4"},"IMAGE_2":{"img0":"IMAGE_1", "img1" : "IMAGE_3"},"IMAGE_3":{"img0":"IMAGE_1", "img1":"IMAGE_2"},"IMAGE_4":{"img0":"IMAGE_1"}}
My expected result like this :
{
"data": [
{
"image": {
"imageId": {
"id": "IMAGE_1"
},
"link": {
"target": {
"id": "IMAGE_2"
},
"target": {
"id": "IMAGE_3"
},
"target": {
"id": "IMAGE_4"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_2"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_3"
}
}
},
"updateData": "link"
},
{
"image": {
"imageId": {
"id": "IMAGE_3"
},
"link": {
"target": {
"id": "IMAGE_1"
},
"target": {
"id": "IMAGE_2"
}
}
},
"updateData": "link"
} ,
{
"image": {
"imageId": {
"id": "IMAGE_4"
},
"link": {
"target": {
"id": "IMAGE_1"
}
}
},
"updateData": "link"
}
]
}
I tried to solve it but I didn't get expected result.
result = {"data":[]}
for k,v in sorted(image_dict.items()):
for a in sorted(v.values()):
result["data"].append({"image":{"imageId":{"id": k},
"link":{"target":{"id": a}}},"updateData": "link"})
print(json.dumps(result, indent=4))
In Python dictionaries you can't have 2 values with the same key. So you can't have multiple targets all called "target". So you can index them. Also I don't know what this question has to do with dynamic objects but here's the code I got working:
import re
dict_res = {}
ind = 0
for image in image_dict:
lin_ind = 0
sub_dict = {'image' + str(ind): {'imageId': {image}, 'link': {}}}
for sub in image_dict[image].values():
sub_dict['image' + str(ind)]['link'].update({'target' + str(lin_ind): {'id': sub}})
lin_ind += 1
dict_res.update(sub_dict)
ind += 1
dict_res = re.sub('target\d', 'target', re.sub('image\d', 'image', str(dict_res)))
print dict_res

Convert float string to float in json

I have a json(test.json) file with the below data. I have around 10000 records. I need to convert value from string to float write in the new file(test1.json). How can I do do this from Python?
{
"name":"test001",
"cat":"test",
"loc":"x loc",
"ings":[
{
"name":"rrrrrr",
"value":"13.0"
},
{
"name":"hhhh",
"value":"18.0"
}
],
"nums":[
{
"name":"kkkk",
"value":"82.05"
},
{
"name":"uuuuu",
"value":"53.55"
}
]
},
{
"name":"test002",
"cat":"test1",
"loc":"y loc",
"ings":[
{
"name":"trtrtr",
"value":"11.0"
},
{
"name":"wewew",
"value":"19.0"
}
],
"nums":[
{
"name":"iuyt",
"value":"122.05"
},
{
"name":"oiui",
"value":"15.5"
}
]
}
resulting json file(test1.json) should be like below...
{
"name":"test001",
"cat":"test",
"loc":"x loc",
"ings":[
{
"name":"rrrrrr",
"value":13.0
},
{
"name":"hhhh",
"value":18.0
}
],
"nums":[
{
"name":"kkkk",
"value":82.05
},
{
"name":"uuuuu",
"value":53.55
}
]
},
{
"name":"test002",
"cat":"test1",
"loc":"y loc",
"ings":[
{
"name":"trtrtr",
"value":11.0
},
{
"name":"wewew",
"value":19.0
}
],
"nums":[
{
"name":"iuyt",
"value":122.05
},
{
"name":"oiui",
"value":15.5
}
]
}
You can provide an object_hook to the json.loads method which will allow you to modify any object (dicts) found within the json:
import json
json_data = """
[{
"name":"test001",
"cat":"test",
"loc":"x loc",
"ings":[
{
"name":"rrrrrr",
"value":"13.0"
},
{
"name":"hhhh",
"value":"18.0"
}
],
"nums":[
{
"name":"kkkk",
"value":"82.05"
},
{
"name":"uuuuu",
"value":"53.55"
}
]
},
{
"name":"test002",
"cat":"test1",
"loc":"y loc",
"ings":[
{
"name":"trtrtr",
"value":"11.0"
},
{
"name":"wewew",
"value":"19.0"
}
],
"nums":[
{
"name":"iuyt",
"value":"122.05"
},
{
"name":"oiui",
"value":"15.5"
}
]
}]
"""
def as_float(obj):
"""Checks each dict passed to this function if it contains the key "value"
Args:
obj (dict): The object to decode
Returns:
dict: The new dictionary with changes if necessary
"""
if "value" in obj:
obj["value"] = float(obj["value"])
return obj
if __name__ == '__main__':
l = json.loads(json_data, object_hook=as_float)
print (json.dumps(l, indent=4))
This results in what you want:
[
{
"loc": "x loc",
"ings": [
{
"name": "rrrrrr",
"value": 13.0
},
{
"name": "hhhh",
"value": 18.0
}
],
"name": "test001",
"nums": [
{
"name": "kkkk",
"value": 82.05
},
{
"name": "uuuuu",
"value": 53.55
}
],
"cat": "test"
},
{
"loc": "y loc",
"ings": [
{
"name": "trtrtr",
"value": 11.0
},
{
"name": "wewew",
"value": 19.0
}
],
"name": "test002",
"nums": [
{
"name": "iuyt",
"value": 122.05
},
{
"name": "oiui",
"value": 15.5
}
],
"cat": "test1"
}
]
To write to a file instead:
with open("out.json", "w+") as out:
json.dump(l, out, indent=4)
You would need to recursively traverse the data and convert anything that looks like a float to a float:
def fix_floats(data):
if isinstance(data,list):
iterator = enumerate(data)
elif isinstance(data,dict):
iterator = data.items()
else:
raise TypeError("can only traverse list or dict")
for i,value in iterator:
if isinstance(value,(list,dict)):
fix_floats(value)
elif isinstance(value,str):
try:
data[i] = float(value)
except ValueError:
pass
It should do the trick:
my_data = [
{ "name" : "rrrrrr",
"value" : "13.0" },
{ "name" : "hhhh",
"value" : "18.0" },
]
fix_floats(my_data)
>>> my_data
[{'name': 'rrrrrr', 'value': 13.0}, {'name': 'hhhh', 'value': 18.0}]
If you have a single or specific key value object, you can reiterate the value containing alphabetical strings or numerical strings, then map and check against their type with string.isnumeric():
dict = { 'a':'100', 'b':'200', 'c':'300', 'd':'four_hundred', 'e':'500' }
dict_parse = {k: int(v) if v.isnumeric() else v for k, v in dict.items()}
>>> dict_parse
{ 'a': 100, 'b': 200, 'c': 300, 'd':'four_hundred', 'e':500}
when dealing with float numbers amend the if statement to replace decimal point, you can apply same principal to negative numbers:
dict = { 'a':'10.0', 'b':'20.12', 'c':'300.3', 'd':'four_hundred', 'e':'500' }
dict_parse = {k: float(v) if v.replace(".", "").isnumeric() else v for k, v in dict.items()}
>>> dict_parse
{ 'a': 10.0, 'b': 20.12, 'c': 300.3, 'd':'four_hundred', 'e':500}

Categories