How to flatten disorganised dictionaries into list? - python

I attempted to flatten a disorganized dictionary (that in turn was taken from a json file) to ease extracting info. Below is an example of how the dictionary is structured and my attempt at flattening it:
data = {'horse':{'speed':{"walk": 40, "run":50}}, 'dog':{'run':30}, 'human':{'gait':{'normal':{'run': 25, 'walk': 30}}}}
flat_dict = []
for items in list(data.items()):
flat_list = []
flat_list.append(items[0])
try:
for item in list(items[1].items())[0]:
if type(item) is not dict:
flat_list.append(item)
else:
flat_list.append(list(item.keys())[0])
flat_list.append(list(item.values())[0])
except:
flat_list.append(items[0])
flat_dict.append(flat_list)
print(flat_dict)
However the above code does not flatten the entire dictionary and some information is lost, here's the output of the above code:
[['horse', 'speed', 'walk', 40], ['dog', 'run', 30], ['human', 'gait', 'normal', {'run': 25, 'walk': 30}]]
What I wanted was:
[['horse', 'speed', 'walk', 40, 'run', 50], ['dog', 'run', 30], ['human', 'gait', 'normal', 'run', 25, 'walk', 30]]
What do I do?

you can use a recursive approach with a list comprehension:
def gen(d):
if isinstance(d, dict):
for k, v in d.items():
yield k
yield from gen(v)
else:
yield d
[[k, *gen(v)] for k, v in data.items()]
output:
[['horse', 'speed', 'walk', 40, 'run', 50],
['dog', 'run', 30],
['human', 'gait', 'normal', 'run', 25, 'walk', 30]]

As you don't know the structure inside the dict you cannot use simple loops to handle each case, you need to use recursion, I'd suggest an utility method to flatten whatever structure recursivly, then make use it to make arrays of [key, flatten(values)]
def flatten(values) -> list:
if isinstance(values, list):
return [v for value in values for v in flatten(value)]
if isinstance(values, dict):
return [*values.keys(), *flatten(list(values.values()))]
return [values]
def flatten_dict(values: dict) -> list:
return [[key, *flatten(value)] for key, value in values.items()]
if __name__ == '__main__':
# ['foo']
print(flatten('foo'))
# ['foo', 'bar', 'uio', 1, 2, 3, 'k1', 'k2', 'v1', 'kk1', '9', 5, 9, 8, 7]
print(flatten(['foo', ['bar', 'uio', [1, 2, 3]], {'k1': 'v1', 'k2': {'kk1': ['9', 5, 9, 8, 7, ]}}]))
data = {'horse': {'speed': {"walk": 40, "run": 50}}, 'dog': {'run': 30},
'human': {'gait': {'normal': {'run': 25, 'walk': 30}}}}
# [['horse', 'speed', 'walk', 'run', 40, 50], ['dog', 'run', 30], ['human', 'gait', 'normal', 'run', 'walk', 25, 30]]
print(flatten_dict(data))

Answered as asked:
data = {
'horse': {
'speed': {
"walk": 40, "run": 50}},
'dog': {
'run': 30},
'human': {
'gait': {
'normal': {
'run': 25, 'walk': 30}}}}
def my_flatten(ddict, mylist):
for k, v in ddict.items():
if isinstance(v, dict):
mylist.append(k)
my_flatten(v, mylist)
else:
mylist.extend([k, v])
return mylist
flist = [my_flatten(v, [k]) for k, v in data.items()]
print(flist)

Related

Convert tuples to list of dictionary

I'm having trouble converting two tuples into a list of dictionaries. Here is the structure:
train_detail = (Counter({2: 50, 0: 62, 1: 38}),
{2: 0.3333333333333333, 0: 0.41333333333333333, 1: 0.25333333333333335})
test_detail = (Counter({2: 6, 0: 49, 1: 4}),
{2: 0.1016949152542373, 0: 0.8305084745762712, 1: 0.06779661016949153})
Now i want to turn these two into a structure like the following:
[
{
"label": "0",
"trainPercent": 0.41333333333333333,
"trainNumber": 62,
"testPercent": 0.8305084745762712,
"testNumber": 49,
},
{
"label": "1",
"trainPercent": 0.25333333333333335,
"trainNumber": 38,
"testPercent": 0.06779661016949153,
"testNumber": 4,
},
{
"label": "2",
"trainPercent": 0.3333333333333333,
"trainNumber": 50,
"testPercent": 0.1016949152542373,
"testNumber": 6,
},
]
What's an effective way of doing that with minimum looping? Thank you. Note Counter is a subclass of dict so inherited every methods of a regular dict.
from pprint import pprint
from collections import Counter
train_detail = (Counter({2: 50, 0: 62, 1: 38}),
{2: 0.3333333333333333, 0: 0.41333333333333333, 1: 0.25333333333333335})
test_detail = (Counter({2: 6, 0: 49, 1: 4}),
{2: 0.1016949152542373, 0: 0.8305084745762712, 1: 0.06779661016949153})
out = []
for t in train_detail[0]:
out.append({
'label': str(t),
'trainNumber': train_detail[0][t],
'trainPercent': train_detail[1][t],
'testPercent': test_detail[1][t],
'testNumber': test_detail[0][t]
})
# pretty print to screen:
pprint(out)
Prints:
[{'label': '2',
'testNumber': 6,
'testPercent': 0.1016949152542373,
'trainNumber': 50,
'trainPercent': 0.3333333333333333},
{'label': '0',
'testNumber': 49,
'testPercent': 0.8305084745762712,
'trainNumber': 62,
'trainPercent': 0.41333333333333333},
{'label': '1',
'testNumber': 4,
'testPercent': 0.06779661016949153,
'trainNumber': 38,
'trainPercent': 0.25333333333333335}]
Use lambda function.lambda is used to unpack tuples.
Thanks for #Andrej Kesely and #Björn Marschollek
To combine their answers:
labels_detail_list = [{'label': str(i),
'trainNumber': train_detail[0][i],
'trainPercent': train_detail[1][i],
'testNumber': test_detail[0][i],
'testPercent': test_detail[1][i]
} for i in train_detail[0]]
sorted(labels_detail_list, key=lambda x: int(x['label']))
should be more concise version.

How to add values to a list inside a dic in python

I try to append strings into a list inside a dictionary, so that every participant would have a list of words. Here is my code:
words = [
{'word': 'we', 'start_time': 90, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': "haven't", 'start_time': 91, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'even', 'start_time': 91, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'spoken', 'start_time': 91, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'about', 'start_time': 92, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'your', 'start_time': 92, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'newest', 'start_time': 92, 'participant': 'str_MIC_Y6E6_con_VnGhveZbaS'},
{'word': 'some word here', 'start_time': 45, 'participant': 'other user'}
]
words.sort(key=lambda x: x['start_time'])
clean_transcript = []
wordChunk = {'participant': '', 'words': []}
for w in words:
if wordChunk['participant'] == w['participant']:
wordChunk['words'].append(w['word'])
else:
wordChunk['participant'] = w['participant']
print(wordChunk['participant'])
wordChunk['words'].append(w['word'])
clean_transcript.append(wordChunk)
This gives me this result:
[{'participant': 'str_MIC_Y6E6_con_VnGhveZbaS', 'words': ['some word here', 'we', "haven't", 'even', 'spoken', 'about', 'your', 'newest']}]
So the some word here is in a wrong list. How I need to modify this to create an own word list for other user also?
you can restucture your data some and just store it in a dict that has participants as keys:
wordChunk = {}
for w in words:
wordChunk.setdefault(w["participant"],[]).append(w["word"])
wordChunk is now a dict with participants as keys:
>>> wordChunk
{'str_MIC_Y6E6_con_VnGhveZbaS': ['we', "haven't", 'even', 'spoken', 'about', 'your', 'newest'], 'other user': ['some word here']}
You can use itertools.groupby
from itertools import groupby
res = []
words = sorted(words, key = lambda x: x['start_time'])
for k, g in groupby(words, key = lambda x: x['participant']):
d = {'participant': k, 'words': [x['word'] for x in g]}
res.append(d)
print(res)
Output:
[{'participant': 'other user', 'words': ['some word here']}, {'participant': 'str_MIC_Y6E6_con_VnGhveZbaS', 'words': ['we', "haven't", 'even', 'spoken', 'about', 'your', 'newest']}]
Using list comprehension
res = [{'participant': k, 'words': [x['word'] for x in g]} for k, g in
groupby(sorted(words, key=lambda x: x['start_time']), key=lambda x: x['participant'])]

Modifying keys and values in nested dictionaries in python

I'm working with a nested dictionary and I'm trying to figure out how to modify certain nested and non-nested keys/values effectively. In short, I'm trying to do the following:
take a nested value and switch it with a non-nested key
rename a nested key.
Here's a basic example of a dictionary that I'm working with:
pet_dictionary = {'Buford':{'color':'white', 'weight': 95, 'age':'3',
'breed':'bulldog'},
'Henley':{'color':'blue', 'weight': 70, 'age':'2',
'breed':'bulldog'},
'Emi':{'color':'lilac', 'weight': 65, 'age':'1',
'breed':'bulldog'},
}
I want to take the non-nested key, which is name of each dog (i.e. Buford, Henley, Emi), switch it with nested value for the age (i.e. 3, 2, 1), and then change the nested key name from 'age' to 'name.' So the output should look like this:
pet_dictionary = {'3':{'color':'white', 'weight': 95, 'name':'Buford',
'breed':'bulldog'},
'2':{'color':'blue', 'weight': 70, 'name':'Henley',
'breed':'bulldog'},
'1':{'color':'lilac', 'weight': 65, 'name':'Emi',
'breed':'bulldog'},
}
I understand how to do this manually one-by-one, but I'm not sure what the best approach is for making all of these changes in a more elegant/optimal way.
While iterating your dictionary, you can cleanly build a new dictionary in three steps:
# Preserves original dict
d = {}
for k, v in pet_dictionary.items():
key = v["age"] # 1. grab the new key
d[key] = {"name": k} # 2. add new "name" item
d[key].update({k_:v_ for k_, v_ in v.items() if k_!="age"}) # 3. update the new dict
d
This might help
pet_dictionary = {'Buford':{'color':'white', 'weight': 95, 'age':'3',
'breed':'bulldog'},
'Henley':{'color':'blue', 'weight': 70, 'age':'2',
'breed':'bulldog'},
'Emi':{'color':'lilac', 'weight': 65, 'age':'1',
'breed':'bulldog'},
}
d = {}
for k,v in pet_dictionary.items():
d[v['age']] = pet_dictionary[k]
d[v['age']].update({"name": k})
del d[v['age']]['age']
print d
Output:
{'1': {'color': 'lilac', 'breed': 'bulldog', 'name': 'Emi', 'weight': 65}, '3': {'color': 'white', 'breed': 'bulldog', 'name': 'Buford', 'weight': 95}, '2': {'color': 'blue', 'breed': 'bulldog', 'name': 'Henley', 'weight': 70}}
This is a situation where pythons iterables come to shine
p = {'Buford':{'color':'white', 'weight': 95, 'age':'3',
'breed':'bulldog'},
'Henley':{'color':'blue', 'weight': 70, 'age':'2',
'breed':'bulldog'},
'Emi':{'color':'lilac', 'weight': 65, 'age':'1',
'breed':'bulldog'},
}
new_dictionary = {p[i]['age']:{'color':p[i]['color'],'weight':p[i]['weight'],
'name':i,'breed':p[i]['breed']} for i in p}
Output:
{'3': {'color': 'white', 'weight': 95, 'name': 'Buford', 'breed': 'bulldog'},
'2': {'color': 'blue', 'weight': 70, 'name': 'Henley', 'breed': 'bulldog'},
'1': {'color': 'lilac', 'weight': 65, 'name': 'Emi', 'breed': 'bulldog'}}
With a couple of comprehensions, you can do that transformation like:
Code:
new_dict = {
info['age']: {k: v for k, v in list(info.items()) + [('name', name)]
if k != 'age'}
for name, info in pet_dictionary.items()
}
Test Code:
pet_dictionary = {
'Buford': {'color': 'white', 'weight': 95, 'age': '3', 'breed': 'bulldog'},
'Henley': {'color': 'blue', 'weight': 70, 'age': '2', 'breed': 'bulldog'},
'Emi': {'color': 'lilac', 'weight': 65, 'age': '1', 'breed': 'bulldog'},
}
new_dict = {
info['age']: {k: v for k, v in list(info.items()) + [('name', name)]
if k != 'age'}
for name, info in pet_dictionary.items()
}
for dog in new_dict.items():
print(dog)
Results:
('3', {'color': 'white', 'weight': 95, 'breed': 'bulldog', 'name': 'Buford'})
('2', {'color': 'blue', 'weight': 70, 'breed': 'bulldog', 'name': 'Henley'})
('1', {'color': 'lilac', 'weight': 65, 'breed': 'bulldog', 'name': 'Emi'})
With pandas,
import pandas as pd
pet_dictionary = {'Buford':{'color':'white', 'weight': 95, 'age':'3',
'breed':'bulldog'},
'Henley':{'color':'blue', 'weight': 70, 'age':'2',
'breed':'bulldog'},
'Emi':{'color':'lilac', 'weight': 65, 'age':'1',
'breed':'bulldog'},
}
pd.DataFrame.from_dict(pet_dictionary, orient='index') \
.reset_index() \
.rename(columns={'index': 'name'}) \
.set_index('age') \
.to_dict('index')
def flip(k, v):
v1 = dict(v)
v1.update(name=k)
return v1.pop('age'), v1
pet_dictionary2 = dict([flip(k, v) for k, v in pet_dictionary.items()])
# import pprint as pp; pp.pprint(pet_dictionary2)
# {'1': {'breed': 'bulldog', 'color': 'lilac', 'name': 'Emi', 'weight': 65},
# '2': {'breed': 'bulldog', 'color': 'blue', 'name': 'Henley', 'weight': 70},
# '3': {'breed': 'bulldog', 'color': 'white', 'name': 'Buford', 'weight': 95}}
If it is ok to change the previous dictionary, then you can do:
def flip(k, v):
v.update(name=k)
return v.pop('age'), v
Doing this in few lines, without any additional libs, but mutating original dictionary:
pet_dictionary = {
nested.pop('age'): nested.update({'name': name}) or nested
for name, nested in pet_dictionary.items()
}
And with additional import, but without mutating pet_dictionary:
import copy
new_pet_dict = {
nested.pop('age'): nested.update({'name': name}) or nested
for name, nested in copy.deepcopy(pet_dictionary).items()
}
...which leaves original pet_dictionary untouched.
Info
Initially, I published different answer, where key in new dict where created using .pop method, and nested dict using {**nested, 'name': name} but it didn't work. It would be much cleaner solution, but AFAIK, interpreter reads code from right to left and... that's obviously wouldn't work using this approach.
How does this work then? It looks little tricky, especially line:
nested.update({'name': name}) or nested
But let's have a closer look. We need nested to be updated with name key, but that returns None and mutates object. So left part of this or will be always None and we would like to have dict object in our dict comprehension. Here comes short circuit evaluation in Python, which always returns second operand if first is falsy.
None-mutating example uses deepcopy and mutates ad-hoc copy, not original dictionary.
Update for Python 3.8:
Only if mutating original dict is acceptable (credits to #pylang for noticing it), there is neat syntax for playing with dictionaries:
new = {nested.pop('age'): {**nested, 'name': name} for name, nested in pet_dictionary.items()}
Here's a two liner:
##add new value, which was formerly the key
{k: v.update({'name':k}) for k,v in pet_dictionary.items()}
##reset keys to value of 'age'
new_pet = {v.get('age'): v for k,v in pet_dictionary.items()}

How to flatten a list of dicts with nested dicts

I want to flatten a list of dict but having issues,
let's say i have a list of dict as,
d = [{'val': 454,'c': {'name': 'ss'}, 'r': {'name1': 'ff'}},{'val': 'ss', 'c': {'name': 'ww'}, 'r': {'name1': 'ff'}}, {'val': 22,'c': {'name': 'dd'}, 'r': {'name1': 'aa'}}]
And the output I'm trying to get is,
d = [{'val': 454,'name': 'ss', 'name1': 'ff'},{'val': 'ss','name': 'ww', 'name1': 'ff'},{'val': 22, 'name': 'dd', 'name1': 'aa'}]
For which I'm using the following function,
def flatten(structure, key="", flattened=None):
if flattened is None:
flattened = {}
if type(structure) not in(dict, list):
flattened[key] = structure
elif isinstance(structure, list):
for i, item in enumerate(structure):
flatten(item, "%d" % i, flattened)
else:
for new_key, value in structure.items():
flatten(value, new_key, flattened)
return flattened
Now, the issue I have is, it's only generating the first element in the dict
You are probably initializing something in the wrong place. Take a look at the code below:
d = [{'val': 454, 'c': {'name': 'ss'}, 'r': {'name1': 'ff'}}, {'val': 55, 'c': {'name': 'ww'}, 'r': {'name1': 'ff'}}, {'val': 22, 'c': {'name': 'dd'}, 'r': {'name1': 'aa'}}]
# ^ typo here
def flatten(my_dict):
res = []
for sub in my_dict:
print(sub)
dict_ = {}
for k, v in sub.items():
if isinstance(v, dict):
for k_new, v_new in v.items():
dict_[k_new] = v_new
else:
dict_[k] = v
res.append(dict_)
return res
result = flatten(d)
print(result) # [{'name': 'ss', 'name1': 'ff', 'val': 454}, {'name': 'ww', 'name1': 'ff', 'val': 55}, {'name': 'dd', 'name1': 'aa', 'val': 22}]
You should initialize flattened to the same type as structure if it's None, and pass None when recursing at the list case:
def flatten_2(structure, key="", flattened=None):
if flattened is None:
flattened = {} if isinstance(structure, dict) else []
if type(structure) not in(dict, list):
flattened[key] = structure
elif isinstance(structure, list):
for i, item in enumerate(structure):
flattened.append(flatten(item, "%d" % i))
else:
for new_key, value in structure.items():
flatten(value, new_key, flattened)
return flattened
In [13]: flatten_2(d)
Out[13]:
[{'name': 'ss', 'name1': 'ff', 'val': 454},
{'name': 'ww', 'name1': 'ff', 'val': 'ss'},
{'name': 'dd', 'name1': 'aa', 'val': 22}]
This of course only works for a limited type of data.

how to delete empty dict inside list of dictionary?

How can I remove empty dict from list of dict as,
{
"ages":[{"age":25,"job":"teacher"},
{},{},
{"age":35,"job":"clerk"}
]
}
I am beginner to python.
Thanks in advance.
Try this
In [50]: mydict = {
....: "ages":[{"age":25,"job":"teacher"},
....: {},{},
....: {"age":35,"job":"clerk"}
....: ]
....: }
In [51]: mydict = {"ages":[i for i in mydict["ages"] if i]}
In [52]: mydict
Out[52]: {'ages': [{'age': 25, 'job': 'teacher'}, {'age': 35, 'job': 'clerk'}]}
OR simply use filter
>>>mylist = [{'age': 25, 'job': 'teacher'}, {}, {}, {'age': 35, 'job': 'clerk'}]
>>>filter(None, mylist)
[{'age': 25, 'job': 'teacher'}, {'age': 35, 'job': 'clerk'}]
So in your dict, apply it as
{
"ages":filter(None, [{"age":25,"job":"teacher"},
{},{},
{"age":35,"job":"clerk"}
])
}
There's a even simpler and more intuitive way than filter, and it works in Python 2 and Python 3:
You can do a "truth value testing" on a dict to test if it's empty or not:
>>> foo = {}
>>> if foo:
... print(foo)
...
>>>
>>> bar = {'key':'value'}
>>> if bar:
... print(bar)
...
{'key':'value'}
Therefore you can iterate over mylist and test for empty dicts with an if-statement:
>>> mylist = [{'age': 25, 'job': 'teacher'}, {}, {}, {'age': 35, 'job': 'clerk'}]
>>> [i for i in mylist if i]
[{'age': 25, 'job': 'teacher'}, {'age': 35, 'job': 'clerk'}]
If you are using Python 3, simply do:
list(filter(None, your_list_name))
This removes all empty dicts from your list.
This while loop will keep looping while there's a {} in the list and remove each one until there's none left.
while {} in dictList:
dictList.remove({})
You may try the following function:
def trimmer(data):
if type(data) is dict:
new_data = {}
for key in data:
if data[key]:
new_data[key] = trimmer(data[key])
return new_data
elif type(data) is list:
new_data = []
for index in range(len(data)):
if data[index]:
new_data.append(trimmer(data[index]))
return new_data
else:
return data
This will trim
{
"ages":[{"age":25,"job":"teacher"},
{},{},
{"age":35,"job":"clerk"}
]
}
and even
{'ages': [
{'age': 25, 'job': 'teacher', 'empty_four': []},
[], {},
{'age': 35, 'job': 'clerk', 'empty_five': []}],
'empty_one': [], 'empty_two': '',
'empty_three': {}
}
to this:
{'ages': [{'age': 25, 'job': 'teacher'}, {'age': 35, 'job': 'clerk'}]}
You can also try this
mylist = [{'age': 25, 'job': 'teacher'}, {}, {}, {'age': 35, 'job': 'clerk'}]
mylist=[i for i in mylist if i]
print(mylist)
Output:
[{'age': 25, 'job': 'teacher'}, {'age': 35, 'job': 'clerk'}]

Categories