Create dictionary from difference of two dictionaries - python

Suppose, if I have a dictonary,
dictA = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3
}
}
and if dictA is updated (say to dictB)
dictB = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3,
'orange': 4
}
}
now how would I get a dictionary of only newly added items (preserving the structure}, something like,
difference(dictB, dictA) = {'fruit': {'orange': 4}}
by this way, I would avoid storing redundant items each time and instead have a smaller dictionary showing only newly added items
This kind of manipulation of dictionaries has a lot of practical uses, but unfortunately harder
Any help would be much appreciated and Thanks in advance

Use DictDiffer:
from dictdiffer import diff, patch, swap, revert
dictA = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3
}
}
dictB = {
'flower':
{
'jasmine': 10,
'roses':
{
'red': 1,
'white': 2
}
},
'fruit':
{
'apple':3,
'orange': 4
}
}
result = diff(dictA, dictB)
# [('add', 'fruit', [('orange', 4)])]
print(f'Diffrence :\n{list(result)}')
patched = patch(result, dictA)
# {'flower': {'jasmine': 10, 'roses': {'red': 1, 'white': 2}}, 'fruit': {'apple': 3}}
print(f'Apply diffrence :\n{patched}')

Related

How to filter field in array in MongoDB

I have array:
{'_id': ObjectId('7262718c217dda3ad90ef151'), 'SomeText': "aaa", 'items': [{'status': 500, 'x': 1, 'y': None, 'price': 3}, { 'status': 500, 'x': 2, 'y': None, 'price': 4}]}
{'_id': ObjectId('7262718c217dda3ad90ef152'), 'SomeText': "bbb", 'items': [{'status': 300, 'x': 1, 'y': 2, 'price': 7}, { 'status': 500, 'x': 2, 'y': 2, 'price': 5}]}
{'_id': ObjectId('7262718c217dda3ad90ef153'), 'SomeText': "ccc", 'items': [{'status': 300, 'x': 1, 'y': 1, 'price': 8}]}
I need to find documents only those values where x not equal y and only field "SomeText", "х", "y"
Query:
fx = mycol.find({"$expr": {"$ne": ["$items.x", "$items.y"]}},
{ "_id": 0, "SomeText": 1, "items.x": 1, "items.y": 1, }
).limit(3)
for x in fx:
print(x)
Returns document with ALL array items in shapes,but I'd like to get the document only with the array that contains x not equal y
{'SomeText': "aaa" 'items': [{'x': 1, 'y': None,}, {'x': 2, 'y': None,}]}
{'SomeText': "bbb", 'items': [{'x': 1, 'y': 2}, {'x': 2, 'y': 2]}
Query:
fx=mycol.aggregate([
{"$match": {"$expr": {"$ne": ["$items.x", "$items.y"]}}},
{"$project": {
"items": {"$filter": {
"input": '$items',
"as": 'item',
"cond": {"$ne": ["$$item.x", "$$item.y"]}
}},
"_id": 0, "SomeText": 1
}},
{"$limit" : 5}
])
for x in fx:
print(x)
Returns document with "status" and "price"
{'SomeText': "aaa", 'items': [{'status': 500, 'x': 1, 'y': None, 'price': 3}, { 'status': 500, 'x': 2, 'y': None, 'price': 4}]}
{'SomeText': "bbb", 'items': [{'status': 300, 'x': 1, 'y': 2, 'price': 7}]}
Can I filter element in array to get result?
{'SomeText': "aaa", 'items': [{'x': 1, 'y': None,}, {'x': 2, 'y': None,}]}
{'SomeText': "bbb", 'items': [{'x': 1, 'y': 2}]}
Just add a $map step to your current query:
db.collection.aggregate([
{
$match: {$expr: {$ne: ["$items.x", "$items.y"]}}
},
{
$project: {
items: {
$filter: {
input: "$items",
as: "item",
cond: {$ne: ["$$item.x", "$$item.y"]}
}
},
_id: 0,
SomeText: 1
}
},
{
$set: {
items: {
$map: {
input: "$items",
as: "item",
in: {x: "$$item.x", y: "$$item.y"}
}
}
}
},
{
$limit: 5
}
])
See how it works on the playground example

Sort nested list of dict by it's dict value

I have the following structure:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'c': 'third',
'price': 3
},
{
'b': 'second',
'price': 2
},
{
'a': 'first',
'price': 1
}
]
},
2: {
'list': [
{
'f': 'sixth',
'price': 6
},
{
'e': 'fifth',
'price': 5
},
{
'd': 'fourth',
'price': 4
}
]
}
}
}
}
}
I need to order each list by price, ascending. The result should be:
d = {
'futures': {
'test': {
'nested': {
1: {
'list': [
{
'a': 'first',
'price': 1
},
{
'b': 'second',
'price': 2
},
{
'c': 'third',
'price': 3
},
]
},
2: {
'list': [
{
'd': 'fourth',
'price': 4
},
{
'e': 'fifth',
'price': 5
},
{
'f': 'sixth',
'price': 6
}
]
}
}
}
}
}
None of the questions I've found fits my needs because of this particular structure.
Is there a way to order it without having to access each previous keys? Because on my project I have cases with more nested keys before the list, so I need a dynamic solution for sorting it.
I mean, I don't know the exactly path to the list, only the list key.
Make a function to recursively traverse your dict looking for lists, and sort each one based on your criteria:
def find_and_sort_lists(d):
for value in d.values():
if isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)
If it's a requirement to sort only lists whose key is actually 'list', you can use the following:
def find_and_sort_lists(d):
for key, value in d.items():
if key == 'list' and isinstance(value, list):
value.sort(key = lambda nested_d: nested_d['price'])
if isinstance(value, dict):
find_and_sort_lists(value)

Python sum list of dicts by key with nested dicts

I have a list of dicts and would like to design a function to output a new dict which contains the sum for each unique key across all the dicts in the list.
For the list:
[
{
'apples': 1,
'oranges': 1,
'grapes': 2
},
{
'apples': 3,
'oranges': 5,
'grapes': 8
},
{
'apples': 13,
'oranges': 21,
'grapes': 34
}
]
So far so good, this can be done with a counter:
def sumDicts(listToProcess):
c = Counter()
for entry in listToProcess:
c.update(entry)
return (dict(c))
Which correctly returns:
{'apples': 17, 'grapes': 44, 'oranges': 27}
The trouble comes when the dicts in my list start to contain nested dicts:
[
{
'fruits': {
'apples': 1,
'oranges': 1,
'grapes': 2
},
'vegetables': {
'carrots': 6,
'beans': 3,
'peas': 2
},
'grains': 4,
'meats': 1
},
{
'fruits': {
'apples': 3,
'oranges': 5,
'grapes': 8
},
'vegetables': {
'carrots': 7,
'beans': 4,
'peas': 3
},
'grains': 3,
'meats': 2
},
{
'fruits': {
'apples': 13,
'oranges': 21,
'grapes': 34
},
'vegetables': {
'carrots': 8,
'beans': 5,
'peas': 4
},
'grains': 2,
'meats': 3
},
]
Now the same function will give a TypeError because the counter can't add two Dicts.
The desired result would be:
{
'fruits': {
'apples': 17,
'oranges': 27,
'grapes': 44
},
'vegetables': {
'carrots': 21,
'beans': 12,
'peas': 9
},
'grains': 9,
'meats': 6
}
Any ideas on how to do this in a reasonably efficient, Pythonic, generalizable way?
I would do this by performing a recursive merge on a recursively defined collections.defaultdict object.
from collections import defaultdict
def merge(d, new_d):
for k, v in new_d.items():
if isinstance(v, dict):
merge(d[k], v)
else:
d[k] = d.setdefault(k, 0) + v
# https://stackoverflow.com/a/19189356/4909087
nested = lambda: defaultdict(nested)
d = nested()
for subd in data:
merge(d, subd)
Using default_to_regular to convert it back, we have:
default_to_regular(d)
# {
# "fruits": {
# "apples": 17,
# "oranges": 27,
# "grapes": 44
# },
# "vegetables": {
# "carrots": 21,
# "beans": 12,
# "peas": 9
# },
# "grains": 9,
# "meats": 6
# }
You can use recursion. This solution finds all the dictionary keys in the input passed to merge, and then sums the values for each key if the values are integers. If the values are dictionaries, however, merge is called again:
def merge(c):
_keys = {i for b in c for i in b}
return {i:[sum, merge][isinstance(c[0][i], dict)]([h[i] for h in c]) for i in _keys}
d = [{'fruits': {'apples': 1, 'oranges': 1, 'grapes': 2}, 'vegetables': {'carrots': 6, 'beans': 3, 'peas': 2}, 'grains': 4, 'meats': 1}, {'fruits': {'apples': 3, 'oranges': 5, 'grapes': 8}, 'vegetables': {'carrots': 7, 'beans': 4, 'peas': 3}, 'grains': 3, 'meats': 2}, {'fruits': {'apples': 13, 'oranges': 21, 'grapes': 34}, 'vegetables': {'carrots': 8, 'beans': 5, 'peas': 4}, 'grains': 2, 'meats': 3}]
import json
print(json.dumps(merge(d), indent=4))
Output:
{
"meats": 6,
"grains": 9,
"fruits": {
"grapes": 44,
"oranges": 27,
"apples": 17
},
"vegetables": {
"beans": 12,
"peas": 9,
"carrots": 21
}
}

turn a dict that may contain a pandas dataframe to several dicts

I have a dict that may be 'infinitely' nested and contain several pandas DataFrame's (all the DataFrame's have the same amount of rows).
I want to create a new dict for each row in the DataFrame's, with the row being transformed to a dict (the key's are the column names) and the rest of the dictionary staying the same.
Note: I am not making a cartesian product between the rows of the different DataFrame's.
what would be the best and most pythonic way to do it?
Example:
the original dict:
d = {'a': 1,
'inner': {
'b': 'string',
'c': pd.DataFrame({'c_col1': range(1,3), 'c_col2': range(2,4)})
},
'd': pd.DataFrame({'d_col1': range(4,6), 'd_col2': range(7,9)})
}
the desired result:
lst_of_dicts = [
{'a': 1,
'inner': {
'b': 'string',
'c': {
'c_col1': 1, 'c_col2':2
}
},
'd': {
'd_col1': 4, 'd_col2': 7
}
},
{'a': 1,
'inner': {
'b': 'string',
'c': {
'c_col1': 2, 'c_col2': 3
}
},
'd': {
'd_col1': 5, 'd_col2': 8
}
}
]

Sum values grouped by key in list of dict

I have a list of dicts and now I am trying to find the total jobs for each remote identifier.
In this case I am expecting for the id 64 -> 11 jobs and 68 -> 0 jobs
[{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
I already tried something like this, but I don't know how to adapt it to my needs, since that only counts the number of occurrences.
from collections import Counter
print Counter(item['remote_identifier']['id'] for item in items )
Pretty straight forward with a defaultdict. (data is your original list.)
>>> from collections import defaultdict
>>> d = defaultdict(int)
>>>
>>> for d_inner in data:
... id_ = d_inner['remote_identifier']['id']
... d[int(id_)] += d_inner['jobs']['count']
...
>>> d
defaultdict(<type 'int'>, {64: 11, 68: 0})
You can use a defaultdict to add up the counts:
from collections import defaultdict
jobs = [{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
counts = defaultdict(int)
for job in jobs:
counts[job['remote_identifier']['id']] += job['jobs']['count']
print(counts)
Output:
defaultdict(<class 'int'>, {'64': 11, '68': 0})
The simplest way is by using the itertools module, which provides the function groupby.
import itertools as it
def get_id(entry):
return entry['remote_identifier']['id']
data.sort(key=get_id)
for key, group in it.groupby(data, get_id):
print(key, sum(entry['jobs']['count'] for entry in group))
Note that groupby assumes that the data is already sorted by the key you are using to group the elements in the data.
This should do the trick:
result = {}
for i in items:
ri = i['remote_identifier']['id']
j = i['jobs']['count']
if ri in result:
result[ri] += j
else:
result[ri] = j
result
#{'68': 0, '64': 11}
Another solution is as follows:
input = [{
'jobs': {
'count': 4
},
'remote_identifier': {
'id': '64'
}
}, {
'jobs': {
'count': 0
},
'remote_identifier': {
'id': '68'
}
}, {
'jobs': {
'count': 7
},
'remote_identifier': {
'id': '64'
}
}]
res = dict()
for item in input:
if item['remote_identifier']['id'] in res:
total = res[item['remote_identifier']['id']] + item['jobs']['count']
else:
total = item['jobs']['count']
res.update({item['remote_identifier']['id']: total})
print res
output:
{'68': 0, '64': 11}

Categories