Related
I have a list of dictionary. I want to convert this list into dictionary using parent and child relation. I have try many time. But its difficult for me.
Thanks in advance for solving the problem.
Input =
data = [
{
"_id": 1,
"label": "Property",
"index": 1
},
{
"_id": 2,
"label": "Find Property",
"index": 1,
"parent_id": 1
},
{
"_id": 3,
"label": "Add Property",
"index": 2,
"parent_id": 1
},
{
"_id": 4,
"label": "Offer",
"index": 2
},
{
"_id": 5,
"label": "My Offer",
"index": 1,
"parent_id": 4
},
{
"_id": 6,
"label": "Accept",
"index": 1,
"parent_id": 5
}
]
I have a list of dictionary. I want to convert this list into dictionary using parent and child relation. I have try many time. But its difficult for me.
Thanks in advance for solving the problem.
Expected Output:
[
{
"_id": 1,
"label": "Property",
"index": 1,
"children" : [
{
"_id": 2,
"label": "Find Property",
"index": 1
},
{
"_id": 3,
"label": "Add Property",
"index": 2
}
]
},
{
"_id": 4,
"label": "Offer",
"index": 2,
"children" : [
{
"_id": 5,
"label": "My Offer",
"index": 1,
"children" : [
{
"_id": 6,
"label": "Accept",
"index": 1
}
]
}
]
},
]
I would do it like this. Keep in mind that this solution also affects the original data list.
parents = list()
# First, create a new dict where the key is property id and the value
# is the property itself.
indexed = {d["_id"]:d for d in data}
for id_, item in indexed.items():
# If a property doesn't have "parent_id" key it means that
# this is the root property, appending it to the result list.
if "parent_id" not in item:
parents.append(item)
continue
# Saving parent id for convenience.
p_id = item["parent_id"]
# Adding a children list if a parent doesn't have it yet.
if "children" not in indexed[p_id]:
indexed[p_id]["children"] = list()
indexed[p_id]["children"].append(item)
And the result is:
import pprint
pprint.pprint(parents)
[{'_id': 1,
'children': [{'_id': 2, 'index': 1, 'label': 'Find Property', 'parent_id': 1},
{'_id': 3, 'index': 2, 'label': 'Add Property', 'parent_id': 1}],
'index': 1,
'label': 'Property'},
{'_id': 4,
'children': [{'_id': 5,
'children': [{'_id': 6,
'index': 1,
'label': 'Accept',
'parent_id': 5}],
'index': 1,
'label': 'My Offer',
'parent_id': 4}],
'index': 2,
'label': 'Offer'}]
I want to create a list per user so i got this jsonfile:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price":8,
},
]
I'm on python and I want to have a result like
for the user with 'id':1 [1,10,10]
and for the user with "id": "2": [3,8]
so two lists corresponding to the prices according to the ids
is it possible to do that in python ?
note, in fact user id are UUID type and randomly generated.
edit: quantity was a mistake all data are price and id, sorry
collections.defaultdict to the rescue.
Assuming you really do have mixed quantitys and prices and you don't care about mixing them into the same list,
from collections import defaultdict
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
},
]
by_id = defaultdict(list)
for item in data:
item = item.copy() # we need to mutate the item
id = item.pop("id")
# whatever is the other value in the dict, grab that:
other_value = item.popitem()[1]
by_id[id].append(other_value)
print(dict(by_id))
The output is
{'1': [1, 10, 10], '2': [3, 8]}
If you actually only do have prices, the loop is simpler:
by_id = defaultdict(list)
for item in data:
by_id[item["id"]].append(item.get("price"))
or
by_id = defaultdict(list)
for item in data:
by_id[item["id"]].append(item["price"])
to fail fast when the price is missing.
first :
you structur data : {[]}, is not supported in python.
assume your data is :
my_json = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price":8,
},
]
then you can achive with this:
results = {}
for data in my_json:
if data.get('id') not in results:
results[data.get('id')] = [data.get('price') or data.get('quantity')]
else:
results[data.get('id')].append(data.get('price') or data.get('quantity'))
print(results)
output:
{'1': [1, 10, 10], '2': [3, 8]}
Maybe like this:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"quantity": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
}
]
result = {}
for item in data:
try:
result[item['id']].append(item.get('price'))
except KeyError:
result[item['id']] = [item.get('price')]
print(result)
Where None is put in place of the missing price for that entry, quantity key ignored.
Result:
{'1': [1, 10, 10], '2': [None, 8]}
A simple loop that enumerates your list (it's not JSON) in conjunction with setdefault() is all you need:
data = [
{
"id": "1",
"price": 1,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 3,
},
{
"id": "1",
"price": 10,
},
{
"id": "2",
"price": 8,
}
]
dict_ = {}
for d in data:
dict_.setdefault(d['id'], []).append(d['price'])
print(dict_)
Output:
{'1': [1, 10, 10], '2': [3, 8]}
Note:
This will fail (KeyError) if either 'id' or 'price' is missing from the dictionaries in the list
hopefully he the title is not too confusing, I have a dictionary (sample below) whereby im trying to sort the dictionary by the number of list (dictionary items) across a number of key values beneath a parent. Hopefully the example makes more sense then my description?
{
"data": {
"London": {
"SHOP 1": [
{
"kittens": 10,
"type": "fluffy"
},
{
"puppies": 11,
"type": "squidgy"
}
],
"SHOP 2": [
{
"kittens": 15,
"type": "fluffy"
},
{
"puppies": 3,
"type": "squidgy"
},
{
"fishes": 132,
"type": "floaty"
}
]
},
"Manchester": {
"SHOP 1": [
{
"kittens": 10,
"type": "fluffy"
},
{
"puppies": 11,
"type": "squidgy"
}
],
"SHOP 2": [
{
"kittens": 15,
"type": "fluffy"
},
{
"puppies": 3,
"type": "squidgy"
},
{
"fishes": 132,
"type": "floaty"
}
],
"SHOP 3": [
{
"kittens": 15,
"type": "fluffy"
},
{
"puppies": 3,
"type": "squidgy"
},
]
},
"Edinburgh": {
"SHOP 1": [
{
"kittens": 10,
"type": "fluffy"
},
{
"puppies": 11,
"type": "squidgy"
}
],
"SHOP 2": [
{
"kittens": 15,
"type": "fluffy"
},
],
"SHOP 3": [
{
"puppies": 3,
"type": "squidgy"
},
]
}
}
}
Summary
# London 2 shops, 5 item dictionaries total
# Machester 3 shops, 7 item dictionaries total
# Edinburgh 3 shops, 4 item dictionaries total
Desired sorting would be by total items across the shops, so ordered Manchester, London, Edinburgh
id usually use somethign like the below to sort, but im not sure how to do this oen with it being counting the number of items across a number of keys?
{k: v for k, v in sorted(x.items(), key=lambda item: item[1])}
You need to reverse sort based on the total number of items for each location, which you can generate as:
sum(len(i) for i in s.values())
where s is the shop dictionary for each location.
Putting this into a sorted expression:
dict(sorted(d['data'].items(), key=lambda t:sum(len(i) for i in t[1].values()), reverse=True))
gives:
{
'Manchester': {
'SHOP 1': [{'kittens': 10, 'type': 'fluffy'}, {'puppies': 11, 'type': 'squidgy'}],
'SHOP 2': [{'kittens': 15, 'type': 'fluffy'}, {'puppies': 3, 'type': 'squidgy'}, {'fishes': 132, 'type': 'floaty'}],
'SHOP 3': [{'kittens': 15, 'type': 'fluffy'}, {'puppies': 3, 'type': 'squidgy'}]
},
'London': {
'SHOP 1': [{'kittens': 10, 'type': 'fluffy'}, {'puppies': 11, 'type': 'squidgy'}],
'SHOP 2': [{'kittens': 15, 'type': 'fluffy'}, {'puppies': 3, 'type': 'squidgy'}, {'fishes': 132, 'type': 'floaty'}]
},
'Edinburgh': {
'SHOP 1': [{'kittens': 10, 'type': 'fluffy'}, {'puppies': 11, 'type': 'squidgy'}],
'SHOP 2': [{'kittens': 15, 'type': 'fluffy'}], 'SHOP 3': [{'puppies': 3, 'type': 'squidgy'}]
}
}
No need to make things complex:
adict = adict['data']
result = []
for capital, value in adict.items():
shop_count = len(value)
items = sum([len(obj) for obj in value.values()])
result.append((capital, shop_count, items))
for capital, shop_count, items in sorted(result, key=lambda x: x[2], reverse=True):
print(f'{capital} {shop_count} shops, {items} item dictionaries total')
Output:
Manchester 3 shops, 7 item dictionaries total
London 2 shops, 5 item dictionaries total
Edinburgh 3 shops, 4 item dictionaries total
data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
This is my python object, How can I identify the Key's-Value is a list. example here, after traversing through 'print(data["cars"]["model1"]["company_details"]["some_list"])'I get the list, since it is small dictionary it was easy, but how can I identify the same if I encounter list as a value for some other key in future.
Example:
data = {
"persons": {"1": {"name": "siddu"}, "2": {"name": "manju"}},
"cars": {
"model1": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": "US",
"some_list": [1, 2, 1],
},
},
"model2": {
"make": 1990,
"company_details": {
"name": "Ford Corporation",
"country": ["US", "UK", "IND"],
"some_list": [1, 2, 1, 1, 1],
},
},
},
}
Can anyone please suggest/guide me to understand how to identify the key's value is a list.
The final goal is to remove the duplicates in the list if any exists?
Thank you very much:)
You can have a recursive function that goes to any depth and make the items of the list unique like below:
In [8]: def removeDuplicatesFromList(di):
...: for key, val in di.items():
...: if isinstance(val, dict):
...: removeDuplicatesFromList(val)
...: elif isinstance(val, list):
...: di[key] =list(set(val))
...: else:
...: continue
...:
...:
In [9]: removeDuplicatesFromList(data)
In [10]: data
Out[10]:
{'persons': {'1': {'name': 'siddu'}, '2': {'name': 'manju'}},
'cars': {'model1': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}},
'model2': {'make': 1990,
'company_details': {'name': 'Ford Corporation',
'country': 'US',
'some_list': [1, 2]}}}}
I have a dataframe as follows:
Name_ID | URL | Count | Rating
------------------------------------------------
ABC | www.example.com/ABC | 10 | 5
123 | www.example.com/123 | 9 | 4
XYZ | www.example.com/XYZ | 5 | 2
ABC111 | www.example.com/ABC111 | 5 | 2
ABC121 | www.example.com/ABC121 | 5 | 2
222 | www.example.com/222 | 5 | 3
abc222 | www.example.com/abc222 | 4 | 2
ABCaaa | www.example.com/ABCaaa | 4 | 2
I am trying to create a JSON as follows:
{
"name": "sampledata",
"children": [
{
"name": 9,
"children": [
{
"name": 4,
"children": [
{
"name": "123",
"size": 100
}
]
}
]
},
{
"name": 10,
"children": [
{
"name": 5,
"children": [
{
"name": "ABC",
"size": 100
}
]
}
]
},
{
"name": 4,
"children": [
{
"name": 2,
"children": [
{
"name": "abc222",
"size": 50
},
{
"name": "ABCaaa",
"size": 50
}
]
}
]
},
{
"name": 5,
"children": [
{
"name": 2,
"children": [
{
"name": "ABC",
"size": 16
},
{
"name": "ABC111",
"size": 16
},
{
"name": "ABC121",
"size": 16
}
]
},
{
"name": 3,
"children": [
{
"name": "222",
"size": 50
}
]
}
]
}
]
}
In order to do that:
I am trying to add labels such as "name" and "children" to the json while creating it.
I tried something like
results = [{"name": i, "children": j} for i,j in results.items()]
But it won't label it properly I believe.
Also, add another field with the label `"size"which I am planning to calculate based on the formula:
(Rating*Count*10000)/number_of_children_to_the_immediate_parent
Here is my dirty code:
import pandas as pd
from collections import defaultdict
import json
data =[('ABC', 'www.example.com/ABC', 10 , 5), ('123', 'www.example.com/123', 9, 4), ('XYZ', 'www.example.com/XYZ', 5, 2), ('ABC111', 'www.example.com/ABC111', 5, 2), ('ABC121', 'www.example.com/ABC121', 5, 2), ('222', 'www.example.com/222', 5, 3), ('abc222', 'www.example.com/abc222', 4, 2), ('ABCaaa', 'www.example.com/ABCaaa', 4, 2)]
df = pd.DataFrame(data, columns=['Name', 'URL', 'Count', 'Rating'])
gp = df.groupby(['Count'])
dict_json = {"name": "flare"}
children = []
for name, group in gp:
temp = {}
temp["name"] = name
temp["children"] = []
rgp = group.groupby(['Rating'])
for n, g in rgp:
temp2 = {}
temp2["name"] = n
temp2["children"] = g.reset_index().T.to_dict().values()
for t in temp2["children"]:
t["size"] = (t["Rating"] * t["Count"] * 10000) / len(temp2["children"])
t["name"] = t["Name"]
del t["Count"]
del t["Rating"]
del t["URL"]
del t["Name"]
del t["index"]
temp["children"].append(temp2)
children.append(temp)
dict_json["children"] = children
print json.dumps(dict_json, indent=4)
Though the above code does print what I need, I am looking for more efficient and cleaner way to do the same, mainly because the actual dataset might be even more nested and complicated. Any help/suggestion will be much appreciated.
Quite an interesting problem and a great question!
You can improve your approach by reorganizing the code inside the loops and using list comprehensions. No need to delete things and introduce temp variables inside loops:
dict_json = {"name": "flare"}
children = []
for name, group in gp:
temp = {"name": name, "children": []}
rgp = group.groupby(['Rating'])
for n, g in rgp:
temp["children"].append({
"name": n,
"children": [
{"name": row["Name"],
"size": row["Rating"] * row["Count"] * 10000 / len(g)}
for _, row in g.iterrows()
]
})
children.append(temp)
dict_json["children"] = children
Or, a "wrapped" version:
dict_json = {
"name": "flare",
"children": [
{
"name": name,
"children": [
{
"name": n,
"children": [
{
"name": row["Name"],
"size": row["Rating"] * row["Count"] * 10000 / len(g)
} for _, row in g.iterrows()
]
} for n, g in group.groupby(['Rating'])
]
} for name, group in gp
]
}
I'm getting the following dictionary printed for you sample input dataframe:
{
"name": "flare",
"children": [
{
"name": 4,
"children": [
{
"name": 2,
"children": [
{
"name": "abc222",
"size": 40000
},
{
"name": "ABCaaa",
"size": 40000
}
]
}
]
},
{
"name": 5,
"children": [
{
"name": 2,
"children": [
{
"name": "XYZ",
"size": 33333
},
{
"name": "ABC111",
"size": 33333
},
{
"name": "ABC121",
"size": 33333
}
]
},
{
"name": 3,
"children": [
{
"name": "222",
"size": 150000
}
]
}
]
},
{
"name": 9,
"children": [
{
"name": 4,
"children": [
{
"name": "123",
"size": 360000
}
]
}
]
},
{
"name": 10,
"children": [
{
"name": 5,
"children": [
{
"name": "ABC",
"size": 500000
}
]
}
]
}
]
}
If I understand correctly what you wan't to do is put a groupby into a nested json, if that is the case then you could use pandas groupby and cast it into a nested list of lists as so:
lol = pd.DataFrame(df.groupby(['Count','Rating'])\
.apply(lambda x: list(x['Name_ID']))).reset_index().values.tolist()
lol should look something like this:
[['10', '5', ['ABC']],
['4', '2', ['abc222', 'ABCaaa']],
['5', '2', ['XYZ ', 'ABC111', 'ABC121']],
['5', '3', ['222 ']],
['9', '4', ['123 ']]]
after that you could loop over lol to put it into a dict, but since you want to set nested items you'l have to use autovivification (check it out):
class autovividict(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
d = autovividict()
for l in lol:
d[l[0]][l[1]] = l[2]
now you can use the json pack for printing and exporting:
print json.dumps(d,indent=2)
In case you need more than one groupby, you could concat your groups with pandas, cast to lol, remove any nans, and then loop, let me know if a full example can help.
setup
from io import StringIO
import pandas as pd
txt = """Name_ID,URL,Count,Rating
ABC,www.example.com/ABC,10,5
123,www.example.com/123,9,4
XYZ,www.example.com/XYZ,5,2
ABC111,www.example.com/ABC111,5,2
ABC121,www.example.com/ABC121,5,2
222,www.example.com/222,5,3
abc222,www.example.com/abc222,4,2
ABCaaa,www.example.com/ABCaaa,4,2"""
df = pd.read_csv(StringIO(txt))
size
pre-calculate it
df['size'] = df.Count.mul(df.Rating) \
.mul(10000) \
.div(df.groupby(
['Count', 'Rating']).Name_ID.transform('count')
).astype(int)
solution
create recursive function
def h(d):
if isinstance(d, pd.Series): d = d.to_frame().T
rec_cond = d.index.nlevels > 1 or d.index.nunique() > 1
return {'name': str(d.index[0]), 'size': str(d['size'].iloc[0])} if not rec_cond else \
[dict(name=str(n), children=h(g.xs(n))) for n, g in d.groupby(level=0)]
demo
import json
my_dict = dict(name='flare', children=h(df.set_index(['Count', 'Rating', 'Name_ID'])))
json.dumps(my_dict)
'{"name": "flare", "children": [{"name": "4", "children": [{"name": "2", "children": [{"name": "ABCaaa", "children": {"name": "ABCaaa", "size": "40000"}}, {"name": "abc222", "children": {"name": "abc222", "size": "40000"}}]}]}, {"name": "5", "children": [{"name": "2", "children": [{"name": "ABC111", "children": {"name": "ABC111", "size": "33333"}}, {"name": "ABC121", "children": {"name": "ABC121", "size": "33333"}}, {"name": "XYZ", "children": {"name": "XYZ", "size": "33333"}}]}, {"name": "3", "children": {"name": "222", "size": "150000"}}]}, {"name": "9", "children": [{"name": "4", "children": {"name": "123", "size": "360000"}}]}, {"name": "10", "children": [{"name": "5", "children": {"name": "ABC", "size": "500000"}}]}]}'
my_dict
{'children': [{'children': [{'children': [{'children': {'name': 'ABCaaa',
'size': '40000'},
'name': 'ABCaaa'},
{'children': {'name': 'abc222', 'size': '40000'}, 'name': 'abc222'}],
'name': '2'}],
'name': '4'},
{'children': [{'children': [{'children': {'name': 'ABC111', 'size': '33333'},
'name': 'ABC111'},
{'children': {'name': 'ABC121', 'size': '33333'}, 'name': 'ABC121'},
{'children': {'name': 'XYZ', 'size': '33333'}, 'name': 'XYZ'}],
'name': '2'},
{'children': {'name': '222', 'size': '150000'}, 'name': '3'}],
'name': '5'},
{'children': [{'children': {'name': '123', 'size': '360000'}, 'name': '4'}],
'name': '9'},
{'children': [{'children': {'name': 'ABC', 'size': '500000'}, 'name': '5'}],
'name': '10'}],
'name': 'flare'}