I have a rather deep dict that I need to simplify. And I've encountered some problems by doing that.
Here is a small sample of the dictionary that needs to be simplified:
data_dict = {
"DATA": {
"Page1": [{
"Section": [{
"Name": [{
"text": "John"
}],
"ID_Number": [{
"text": "123456"
}]
}]
}],
"Page2": [{
"Section": [{
"Name": [{
"text": "Rob"
}],
"ID_Number": [{
"text": "654321"
}]
}]
}]
}
}
What I've done already:
my_dict = {}
for value in data_dict.values():
for key, val in value.items():
if "Tab" in key:
my_dict[key] = val
if type(val) == list:
for i in val:
for key1, val1 in i.items():
my_dict[key] = val1
result_dict = {}
page_list = []
for keys, values in my_dict.items():
for val in values:
if type(val) != str:
for key1, val1 in val.items():
for x in val1:
result_dict[key1] = x.get('text')
page_list.append(result_dict)
my_dict[keys] = page_list
print("my_dict = ", my_dict)
Current result:
my_dict = {'Page1': [{'Name': 'Rob', 'ID_Number': '654321'}, {'Name': 'Rob', 'ID_Number': '654321'}, {'Name': 'Rob', 'ID_Number': '65432
1'}, {'Name': 'Rob', 'ID_Number': '654321'}], 'Page2': [{'Name': 'Rob', 'ID_Number': '654321'}, {'Name': 'Rob', 'ID_Number': '
654321'}, {'Name': 'Rob', 'ID_Number': '654321'}, {'Name': 'Rob', 'ID_Number': '654321'}]}
The problem is that result_dict is being appended to page_list more than once which is unnecessary. Also, my approach is very messy. Is there a cleaner way to get the same result?
Desired result:
my_dict = {"Page1": [{"Name": "John", "ID_Number": "123456"}], "Page2": [{"Name": "Rob", "ID_Number": "654321"}]}
Solution 1 (less loops, but added if statements):
If you want to avoid too many nested for loops. I would take advantage of knowing before-hand the duplicate keys and use that information to easily get to the inner keys or values.
Reference to dict for solution 1 & 2:
data_dict = {"DATA": {"Page1": [{"Section": [{"Name": [{"text": "John"}],"ID_Number": [{"text": "123456"}]}]}],"Page2": [{"Section": [{"Name": [{"text": "Rob"}],"ID_Number": [{"text": "654321"}]}]}]}}
Code:
# Depth #1
old_dict = data_dict["DATA"]
new_dict = {}
for d1_key in old_dict:
d2 = old_dict[d1_key][0]["Section"][0]
for d2_key in d2:
if d2_key == "Name":
new_dict[d1_key] = [{d2_key: d2[d2_key][0]["text"]}]
if d2_key == "ID_Number":
merge = new_dict[d1_key][0]
# Merge above if statement (dict merging)
new_dict[d1_key] = [{**merge, **{d2_key:d2[d2_key][0]["text"]}}]
print(new_dict)
Output:
{'Page1': [{'Name': 'John', 'ID_Number': '123456'}], 'Page2': [{'Name': 'Rob', 'ID_Number': '654321'}]}
Solution 2: (more for loops, more readible)
(Recommended)
Here is a second solution that gives the same desired output that does not take advantage of information about the keys or values, but only looks at the structure of the data. I prefer this one as it is easy to read, modify or extend!
Code:
# Depth #1
old_dict = data_dict["DATA"]
new_dict = {}
unlist = 0
k3_temp = None # instead of merge
v4_temp = None
for k1, v1 in old_dict.items():
for v2 in v1[unlist].values(): # using values because we don't use the Section key
for k3, v3 in v2[unlist].items():
for k4, v4 in v3[unlist].items():
new_dict[k1] = [{k3_temp:v4_temp, k3:v4}]
k3_temp = k3
v4_temp = v4
print(new_dict)
Output:
{'Page1': [{'Name': 'John', 'ID_Number': '123456'}], 'Page2': [{'Name': 'Rob', 'ID_Number': '654321'}]}
Just to see another solution with a ridiculous amount of for loops:
new_dic = {}
inner_list = []
for i in data_dict:
for j in data_dict[i]:
for k in data_dict[i][j]:
for m in k:
for n in k[m]:
for x in n:
for y in n[x]:
for keys, values in y.items():
inner_list.append(values)
new_dic[j] = [{'Name': inner_list[0], 'ID_Number': inner_list[1]}]
inner_list = []
print(new_dic)
output
{'Page1': [{'Name': 'John', 'ID_Number': '123456'}], 'Page2': [{'Name': 'Rob', 'ID_Number': '654321'}]}
Related
I have a dictionary with some values that are type list, i need to convert each list in another dictionary and insert this new dictionary at the place of the list.
Basically, I have this dictionary
Dic = {
'name': 'P1',
'srcintf': 'IntA',
'dstintf': 'IntB',
'srcaddr': 'IP1',
'dstaddr': ['IP2', 'IP3', 'IP4'],
'service': ['P_9100', 'SNMP'],
'schedule' : 'always',
}
I need to reemplace the values that are lists
Expected output:
Dic = {
'name': 'P1',
'srcintf': 'IntA',
'dstintf': 'IntB',
'srcaddr': 'IP1',
'dstaddr': [
{'name': 'IP2'},
{'name': 'IP3'},
{'name': 'IP4'}
],
'service': [
{'name': 'P_9100'},
{'name': 'SNMP'}
],
'schedule' : 'always',
}
So far I have come up with this code:
for k,v in Dic.items():
if not isinstance(v, list):
NewDic = [k,v]
print(NewDic)
else:
values = v
keys = ["name"]*len(values)
for item in range(len(values)):
key = keys[item]
value = values[item]
SmallDic = {key : value}
liste.append(SmallDic)
NewDic = [k,liste]
which print this
['name', 'P1']
['srcintf', 'IntA']
['dstintf', 'IntB']
['srcaddr', 'IP1']
['schedule', 'always']
['schedule', 'always']
I think is a problem with the loop for, but so far I haven't been able to figure it out.
You need to re-create the dictionary. With some modifications to your existing code so that it generates a new dictionary & fixing the else clause:
NewDic = {}
for k, v in Dic.items():
if not isinstance(v, list):
NewDic[k] = v
else:
NewDic[k] = [
{"name": e} for e in v # loop through the list values & generate a dict for each
]
print(NewDic)
Result:
{'name': 'P1', 'srcintf': 'IntA', 'dstintf': 'IntB', 'srcaddr': 'IP1', 'dstaddr': [{'name': 'IP2'}, {'name': 'IP3'}, {'name': 'IP4'}], 'service': [{'name': 'P_9100'}, {'name': 'SNMP'}], 'schedule': 'always'}
a =[{
"id":"1",
"Name":'BK',
"Age":'56'
},
{
"id":"1",
"Sex":'Male'
},
{
"id":"2",
"Name":"AK",
"Age":"32"
}]
I have a list of dictionary with a person information split in multiple dictionary as above for ex above id 1's information is contained in first 2 dictionary , how can i get an output of below
{1: {'Name':'BK','Age':'56','Sex':'Male'}, 2: { 'Name': 'AK','Age':'32'}}
You can use a defaultdict to collect the results.
from collections import defaultdict
a =[{ "id":"1", "Name":'BK', "Age":'56' }, { "id":"1", "Sex":'Male' }, { "id":"2", "Name":"AK", "Age":"32" }]
results = defaultdict(dict)
key = lambda d: d['id']
for a_dict in a:
results[a_dict.pop('id')].update(a_dict)
This gives you:
>>> results
defaultdict(<class 'dict'>, {'1': {'Name': 'BK', 'Age': '56', 'Sex': 'Male'}, '2': {'Name': 'AK', 'Age': '32'}})
The defaultdict type behaves like a normal dict, except that when you reference an unknown value, a default value is returned. This means that as the dicts in a are iterated over, the values (except for id) are updated onto either an existing dict, or an automatic newly created one.
How does collections.defaultdict work?
Using defaultdict
from collections import defaultdict
a = [{
"id": "1",
"Name": 'BK',
"Age": '56'
},
{
"id": "1",
"Sex": 'Male'
},
{
"id": "2",
"Name": "AK",
"Age": "32"
}
]
final_ = defaultdict(dict)
for row in a:
final_[row.pop('id')].update(row)
print(final_)
defaultdict(<class 'dict'>, {'1': {'Name': 'BK', 'Age': '56', 'Sex': 'Male'}, '2': {'Name': 'AK', 'Age': '32'}})
You can combine 2 dictionaries by using the .update() function
dict_a = { "id":"1", "Name":'BK', "Age":'56' }
dict_b = { "id":"1", "Sex":'Male' }
dict_a.update(dict_b) # {'Age': '56', 'Name': 'BK', 'Sex': 'Male', 'id': '1'}
Since the output the you want is in dictionary form
combined_dict = {}
for item in a:
id = item.pop("id") # pop() remove the id key from item and return the value
if id in combined_dict:
combined_dict[id].update(item)
else:
combined_dict[id] = item
print(combined_dict) # {'1': {'Name': 'BK', 'Age': '56', 'Sex': 'Male'}, '2': {'Name': 'AK', 'Age': '32'}}
from collections import defaultdict
result = defaultdict(dict)
a =[{ "id":"1", "Name":'BK', "Age":'56' }, { "id":"1", "Sex":'Male' }, { "id":"2", "Name":"AK", "Age":"32" }]
for b in a:
result[b['id']].update(b)
print(result)
d = {}
for p in a:
id = p["id"]
if id not in d.keys():
d[id] = p
else:
d[id] = {**d[id], **p}
d is the result dictionary you want.
In the for loop, if you encounter an id for the first time, you just store the incomplete value.
If the id is in the existing keys, update it.
The combination happens in {**d[id], **p}
where ** is unpacking the dict.
It unpacks the existing incomplete dict associated withe the id and the current dict, then combine them into a new dict.
I am trying to remove all the '/api/1/employees/' and /api/1/seats/ from the python list of dictionaries. What is the easiest way to do so. My list of dictionaries are looking like this at the moment -dict1 = [{'to_seat_url': '/api/1/seats/19014', 'id': 5051, 'employee_url': '/api/1/employees/4027'}, {'to_seat_url': '/api/1/seats/19013', 'id': 5052, 'employee_url': '/api/1/employees/4048'}, {'to_seat_url': '/api/1/seats/19012', 'id': 5053, 'employee_url': '/api/1/employees/4117'}, {'to_seat_url': None, 'id': 5054, 'employee_url': '/api/1/employees/15027'}]
excepting below result:
new_dict = [{'to_seat_url': '19014', 'id': 5051, 'employee_url': '4027'}, {'to_seat_url': '19013', 'id': 5052, 'employee_url': '4048'}, {'to_seat_url': '19012', 'id': 5053, 'employee_url': '4117'}, {'to_seat_url': '', 'id': 5054, 'employee_url': '15027'}]
If you want last part of the URLs:
new_dict = [
{k: v.split("/")[-1] if k != "id" else v for k, v in d.items()}
for d in dict1
]
print(new_dict)
Prints:
[
{"to_seat_url": "19014", "id": 5051, "employee_url": "4027"},
{"to_seat_url": "19013", "id": 5052, "employee_url": "4048"},
{"to_seat_url": "19012", "id": 5053, "employee_url": "4117"},
{"to_seat_url": "9765", "id": 5054, "employee_url": "15027"},
]
EDIT: To filter-out None values:
new_dict = [
{k: v.split("/")[-1] if k != "id" else v for k, v in d.items()}
for d in dict1
if not d is None
]
print(new_dict)
EDIT 2: If some values are None:
new_dict = [
{
k: ("" if v is None else v.split("/")[-1]) if k != "id" else v
for k, v in d.items()
}
for d in dict1
if not d is None
]
print(new_dict)
data = [{"id": "78ab45",
"name": "Jonh"},
{"id": "69cd234457",
"name": "Joe"}]
I want my function to return the largest value lengths for each key from all dictionaries:
expected_output = [
{ "size": 10, "name": "id" }, #because the length of the largest "id" value is 10
{ "size": 4, "name": "name" }, #because the length of the largest "name" value is 4
]
My code so far:
def my_func(data):
headers_and_sizes = []
for item in data:
for key, value in item.items():
headers_and_sizes.append({"size": f'{len(value)}', "name": key})
if int(headers_and_sizes[0]["size"]) < len(value):
headers_and_sizes[0]["size"] = len(value)
return headers_and_sizes
Gives me this:
[{'size': '6', 'name': 'id'}, {'size': '4', 'name': 'name'}, {'size': '10', 'name': 'id'}, {'size': '3', 'name': 'name'}]
How can I fix that so that it will return the values as in expected_output?
You'll want to be updating a dictionary that stores each key mapped to the maximum length seen for that key thus far.
data = [
{
"id": "78ab45",
"name": "Jonh",
},
{
"id": "69cd234457",
"name": "Joe",
},
]
key_to_max_len = {}
for datum in data:
for key, val in datum.items():
if key not in key_to_max_len or len(val) > key_to_max_len[key]:
key_to_max_len[key] = len(val)
key_size_arr = [{"size": val, "name": key} for key, val in key_to_max_len.items()]
you can get the max value for id and name like below code, and structure the output accordingly
>>> data
[{'id': '78ab45', 'name': 'Jonh'}, {'id': '69cd234457', 'name': 'Joe'}]
id = max(map(lambda x:len(x['id']), data))
name = max(map(lambda x:len(x['name']), data))
>>> id
10
>>> name
4
You can use list comprehension to form a tuple with ids and names:
names_ids = [(eachdict['id'],eachdict['name']) for eachdict in data]
Format the output to have the desired shape (dictionaries), find the max length (using the max() function, passing it the lengths of names and ids, using another list comprehension, inside max()):
expected_output = \
[{"size":max([len(each[0]) for each in names_ids]),"name":"id"},
{"size":max([len(each[1]) for each in names_ids]),"name":"name"}]
Output will be:
[{'name': 'id', 'size': 10}, {'name': 'name', 'size': 4}]
Using the following:
keys = list(data[0].keys())
output = {key:-1 for key in keys}
for d in data:
for k in d.keys():
if len(d[k]) > output[k]:
output[k] = len(d[k])
Will output:
{'id': 10, 'name': 4}
I think the easiest method here is pandas...
import pandas as pd
df = pd.DataFrame(data)
out = [{'size': df['id'].str.len().max(), 'name':'id'},
{'size': df['name'].str.len().max(), 'name':'name'}]
output:
[{'size': 10, 'name': 'id'}, {'size': 4, 'name': 'name'}]
or for addt'l names..
[{'size':df[col].str.len().max(), 'name':col} for col in df.columns]
Here is how you can use a nested dictionary comprehension:
data = [{"id": "78ab45",
"name": "Jonh"},
{"id": "69cd234457",
"name": "Joe"}]
expected_output = [{'size': len(max([i[k] for i in data], key=len)),
'name': k} for k in data[0]]
print(expected_output)
Output:
[{'size': 10, 'name': 'id'},
{'size': 4, 'name': 'name'}]
I have a list of dictionaries, themselves with nested lists of dictionaries. All of the nest levels have a similar structure, thankfully. I desire to sort these nested lists of dictionaries. I grasp the technique to sort a list of dictionaries by value. I'm struggling with the recursion that will sort the inner lists.
def reorder(l, sort_by):
# I have been trying to add a recursion here
# so that the function calls itself for each
# nested group of "children". So far, fail
return sorted(l, key=lambda k: k[sort_by])
l = [
{ 'name': 'steve',
'children': [
{ 'name': 'sam',
'children': [
{'name': 'sally'},
{'name': 'sabrina'}
]
},
{'name': 'sydney'},
{'name': 'sal'}
]
},
{ 'name': 'fred',
'children': [
{'name': 'fritz'},
{'name': 'frank'}
]
}
]
print(reorder(l, 'name'))
def reorder(l, sort_by):
l = sorted(l, key=lambda x: x[sort_by])
for item in l:
if "children" in item:
item["children"] = reorder(item["children"], sort_by)
return l
Since you state "I grasp the technique to sort a list of dictionaries by value" I will post some code for recursively gathering data from another SO post I made, and leave it to you to implement your sorting technique. The code:
myjson = {
'transportation': 'car',
'address': {
'driveway': 'yes',
'home_address': {
'state': 'TX',
'city': 'Houston'}
},
'work_address': {
'state': 'TX',
'city': 'Sugarland',
'location': 'office-tower',
'salary': 30000}
}
def get_keys(some_dictionary, parent=None):
for key, value in some_dictionary.items():
if '{}.{}'.format(parent, key) not in my_list:
my_list.append('{}.{}'.format(parent, key))
if isinstance(value, dict):
get_keys(value, parent='{}.{}'.format(parent, key))
else:
pass
my_list = []
get_keys(myjson, parent='myjson')
print(my_list)
Is intended to retrieve all keys recursively from the json file. It outputs:
['myjson.address',
'myjson.address.home_address',
'myjson.address.home_address.state',
'myjson.address.home_address.city',
'myjson.address.driveway',
'myjson.transportation',
'myjson.work_address',
'myjson.work_address.state',
'myjson.work_address.salary',
'myjson.work_address.location',
'myjson.work_address.city']
The main thing to note is that if isinstance(value, dict): results in get_keys() being called again, hence the recursive capabilities of it (but only for nested dictionaries in this case).