Convert a nested dictionary into a string - python

I am trying to achieve the following:
Input = {
'key1':{'key11':'val11', 'key12':'val12'},
'key2':{'key21':{'key211':'val211', 'key212':'val212'}},
'key3':'val3',
'key4':{'key41':'val41', 'key42':'val42'}
}
Output =
1) When value is a dictionary, then create an output string variable =
key1 { key11 val11 key12 val12 } key2 { key21 { key211 val211 key212 val212 }} key4 { key41 val41 key42 val42}
2) When value is not a dictionary, then print "just a string element"
Below is my code:
from collections import defaultdict
def stringBuilder(dictionary):
stringOption = ""
innerString = ""
# print dictionary
for key, value in dictionary.iteritems():
if isinstance(value, dict):
stringBuilder(value)
else:
innerString = innerString + " " + str(key) + " " + str(value)
print innerString
stringOption = "{" + innerString + " }"
print stringOption
return stringOption
d = {'key1':{'key11':'val11', 'key12':'val12'}, 'key2':{'key21':{'key211':'val211', 'key212':'val212'}}, 'key3':'val3', 'key4':{'key41':'val41', 'key42':'val42'}}
print d
stringOption = ""
for key, value in d.iteritems():
if isinstance(value, dict):
stringOption = stringOption + " " + str(key) + " " + stringBuilder(value)
print stringOption
else:
print "just a string element"
print stringOption
Here is the output that I get:
{'key2': {'key21': {'key211': 'val211', 'key212': 'val212'}}}
key211 val211
{ key211 val211 }
key2
key2

Your main problem is that you discard the returned string of your dict case:
if isinstance(value, dict):
stringBuilder(value)
Instead, try saving the value to pass back up the line:
if isinstance(value, dict):
stringOption = stringBuilder(value)
Resulting output:
key2 { key211 val211 key212 val212 } key1 { key12 val12 key11 val11 } key4 { key41 val41 key42 val42 }

Rather than writing your own implementation (unless that's what you want) why not use the json library? You can extend JSONEncoder and JSONDecoder to suit your use case?

A great recursion exercise!
def rec(x):
# assuming values are either str or otherwise dict
if isinstance(x, str):
return x
return '{' + ' '.join([k + ' ' + rec(v) for (k, v) in x.items()]) + '}'
Input = {
'key1':{'key11':'val11', 'key12':'val12'},
'key2':{'key21':{'key211':'val211', 'key212':'val212'}},
'key3':'val3',
'key4':{'key41':'val41', 'key42':'val42'}
}
Output:
In [6]: rec(Input)
Out[6]: '{key1 {key11 val11 key12 val12} key2 {key21 {key211 val211 key212 val212}} key3 val3 key4 {key41 val41 key42 val42}}'

Related

convert string which contains sub string to dictionary

I am tring to convert particular strings which are in particular format to Python dictionary.
String format is like below,
st1 = 'key1 key2=value2 key3="key3.1, key3.2=value3.2 , key3.3 = value3.3, key3.4" key4'
I want to parse it and convert to dictionary as below,
dict1 {
key1: None,
key2: value2,
key3: {
key3.1: None,
key3.2: value3.2,
key3.3: value3.3,
key3.2: None
}
key4: None,
I tried to use python re package and string split function. not able to acheive the result. I have thousands of string in same format, I am trying to automate it. could someone help.
If all your strings are consistent, and only have 1 layer of sub dict, this code below should do the trick, you may need to make tweaks/changes to it.
import json
st1 = 'key1 key2=item2 key3="key3.1, key3.2=item3.2 , key3.3 = item3.3, key3.4" key4'
st1 = st1.replace(' = ', '=')
st1 = st1.replace(' ,', ',')
new_dict = {}
no_keys=False
while not no_keys:
st1 = st1.lstrip()
if " " in st1:
item = st1.split(" ")[0]
else:
item = st1
if '=' in item:
if '="' in item:
item = item.split('=')[0]
new_dict[item] = {}
st1 = st1.replace(f'{item}=','')
sub_items = st1.split('"')[1]
sub_values = sub_items.split(',')
for sub_item in sub_values:
if "=" in sub_item:
sub_key, sub_value = sub_item.split('=')
new_dict[item].update({sub_key.strip():sub_value.strip()})
else:
new_dict[item].update({sub_item.strip(): None})
st1 = st1.replace(f'"{sub_items}"', '')
else:
key, value = item.split('=')
new_dict.update({key:value})
st1 = st1.replace(f"{item} ","")
else:
new_dict.update({item: None})
st1 = st1.replace(f"{item}","")
if st1 == "":
no_keys=True
print(json.dumps(new_dict, indent=4))
Consider use parsing tool like lark. A simple example to your case:
_grammar = r'''
?start: value
?value: object
| NON_SEPARATOR_STRING?
object : "\"" [pair (_SEPARATOR pair)*] "\""
pair : NON_SEPARATOR_STRING [_PAIRTOR] value
NON_SEPARATOR_STRING: /[a-zA-z0-9\.]+/
_SEPARATOR: /[, ]+/
| ","
_PAIRTOR: " = "
| "="
'''
parser = Lark(_grammar)
st1 = 'key1 key2=value2 key3="key3.1, key3.2=value3.2 , key3.3 = value3.3, key3.4" key4'
tree = parser.parse(f'"{st1}"')
print(tree.pretty())
"""
object
pair
key1
value
pair
key2
value2
pair
key3
object
pair
key3.1
value
pair
key3.2
value3.2
pair
key3.3
value3.3
pair
key3.4
value
pair
key4
value
"""
Then you can write your own Transformer to transform this tree to your desired date type.

PYTHON - Fastest Way of Flattening/Exploding multiple large JSON files with nested arrays, have more than 100000 json files

I have written an efficient JSON flattening logic that explodes and join nested JSON arrays it works faster for on JSON with more than 100s of nested arrays and nested dict but problem is now I have 100000 JSON files to handle. is there a way to either merge multiple JSONs to one big ad run this code or something else, any help will be great....
I know there are some duplicate question but this is mainly regarding the efficiently handling large number of large JSON files
# let's say I have this json and flattening/exploding code:
from collections import defaultdict, MutableMapping
from copy import deepcopy
import pandas as pd
sample = {
"rss": {
"overview": {
"id": {
"data": [
{
"stuff": [
{
"onetype": [
{"id": '1', "name": "John Doe"},
{"id": '2', "name": "Don Joeh"},
]
},
{"othertype": [{"id": '2', "company": "ACME"}]},
]
},
{"otherstuff": [{"thing": [['1', '42'], ['2', '2']]}]},
]
}
}
}
}
# Flattening with exploding Logic:
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def dict_maker(dic_list):
dd = defaultdict(list)
for d in dic_list:
for key, value in d.items():
dd[key].append(value)
return dd
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def flatten_struct(data, prev_heading=""):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_struct(value, prev_heading + "_" + key))
elif isinstance(data, list):
rows = []
for i in range(len(data)):
[
rows.append(elem)
for elem in flatten_list(flatten_struct(data[i], prev_heading))
]
else:
rows = [{prev_heading[1:]: data}]
return rows
def flatten(d, parent_key="", sep="_"):
items = []
if isinstance(d, dict):
for k, v in d.items():
new_key = parent_key + sep + k if parent_key else k
if isinstance(v, MutableMapping):
items.extend(flatten(v, new_key, sep=sep).items())
else:
items.append((new_key, v))
else:
{}
return dict(items)
def get_section_df(section, section_grp, id=None):
df_lst = []
finalMap = {}
for elem in section:
d = flatten(elem)
flat = [
{k + "_" + key: val for key, val in dict_maker(flatten_struct(v)).items()}
if isinstance(v, list)
else {k: v}
for k, v in d.items()
]
for new_d in flat:
finalMap.update(new_d)
# finalMap.update({k:v for k,v in id})
if len(finalMap) > 0:
df = pd.concat(
{
str(section_grp)
+ "_"
+ k.replace("#", "").replace("#", ""): pd.Series(v)
for k, v in finalMap.items()
},
axis=1,
)
df_lst.append(df)
return df_lst
def process(json_sample):
df_list = []
master_d = flatten(json_sample)
master_keys = [k for k in master_d.keys() if type(master_d.get(k)) == list]
grouped_path_dict = {x: x.split("_")[2] for x in master_keys}
master_id = ''
for flatted in master_keys:
lst = master_d.get(flatted)
path_group = grouped_path_dict.get(flatted)
# if isinstance(lst, list):
if len(get_section_df(section=lst, id=master_id, section_grp=path_group)) > 0:
pdf = pd.concat(
get_section_df(section=lst, id=master_id, section_grp=path_group)
)
df_list.append(pdf)
df = pd.concat(df_list)
return df
print(process(json_sample=sample))
id_stuff_onetype_id id_stuff_onetype_name id_stuff_othertype_id id_stuff_othertype_company id_otherstuff_thing
0 1 John Doe 2 ACME NaN
1 2 Don Joeh NaN NaN NaN
0 1 John Doe 2 ACME 1
1 2 Don Joeh NaN NaN 42
2 NaN NaN NaN NaN 2
3 NaN NaN NaN NaN 2

Recursively parse/convert structured text to dictionary

Is there any good regex/function or packages that allows us to parse indented structured text/data into a dictionary? For example, I have data something like this (can have deeper levels than I mentioned below):
xyz1 : 14
xyz2 : 35
xyz3 : 14
xyz4
sub1_xyz4
sub1_sub1_xyz4 : 45
sub2_sub1_xyz4 : b1fawe
sub2 xyz4 : 455
xyz5 : 2424
And I want to convert it into a dictionary like:
{
'xyz1': '14',
'xyz2': '34',
'xyz3': '14',
'xyz4': {
'sub1_xyz4': {
'sub1_sub1_xyz4': '45',
'sub2_sub1_xyz4': 'b1fawe',
},
'sub2_xyz4': '455'
},
'xyz5': '2424'
}
I tried the following but not able to get it consistently. I feel like there is a very good recursive (so that it can handle unknown depths) function when trying to manage the indented/sub attributes. Any suggestions?
def parse_output(value, indent=0):
parsed_dict = dict()
if indent > 0:
for i in re.split('\n(?!\s{,%d})' % (indent - 1), value):
print("split value is: : ", i)
if '\n' not in i:
iter_val = iter(list(map(lambda x: x.strip(), re.split(' : ', i))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
parse_bearer_info(re.split('\n', i, 1)[1])
iter_val = iter(list(map(lambda x: x.strip(), re.split('\n', i, 1))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
for i in re.split('\n(?!\s+)', value):
#print("iteration value is: ", i)
if '\n' not in i:
iter_val = iter(list(map(lambda x: x.strip(), re.split(' : ', i))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
else:
#print(re.split('\n', i, 1))
#out = parse_bearer_info(re.split('\n', i, 1)[1], 4)
iter_val = iter(list(map(lambda x: x.strip(), re.split('\n', i, 1))))
parsed_dict = {**parsed_dict, **dict(zip(iter_val, iter_val))}
return parsed_dict
You can use itertools.groupby with recursion:
import itertools, re, json
_data = [re.split('\s+:\s+', i) for i in filter(None, content.split('\n'))]
def group_data(d):
_d = [[a, list(b)] for a, b in itertools.groupby(d, key=lambda x:bool(x[-1]) and not x[0].startswith(' '))]
_new_result = {}
for a, b in _d:
if a:
_new_result.update(dict([[c, _d] for c, [_d] in b]))
else:
_new_result[b[0][0]] = group_data([[c[2:], _d] for c, _d in b[1:]])
return _new_result
print(json.dumps(group_data([[a, b] for a, *b in _data]), indent=4))
Output:
{
"xyz1": "14",
"xyz2": "35",
"xyz3": "14",
"xyz4": {
"sub1_xyz4": {
"sub1_sub1_xyz4": "45",
"sub2_sub1_xyz4": "b1fawe"
},
"sub2 xyz4": "455"
},
"xyz5": "2424"
}
Where content is:
xyz1 : 14
xyz2 : 35
xyz3 : 14
xyz4
sub1_xyz4
sub1_sub1_xyz4 : 45
sub2_sub1_xyz4 : b1fawe
sub2 xyz4 : 455
xyz5 : 2424
You could probably do this recursively, but since you only need to track a single indent level, you could just keep a stack with the current object. Add keys to the last item in the stack. When the value is empty, add a new dictionary and push it to the stack. When the indent decreases, pop from the stack.
Something like:
res = {}
stack = [res]
cur_indent = 0
for line in s.split('\n'):
indent = len(line) - len(line.lstrip())
if (indent < cur_indent): # backing out
stack.pop()
cur_indent = indent
else:
cur_indent = indent
vals = line.replace(" ", "").split(':')
current_dict = stack[-1]
if(len(vals) == 2):
current_dict[vals[0]] = vals[1]
else: # no value, must be a new level
current_dict[vals[0]] = {}
stack.append(current_dict[vals[0]])
Result:
{'xyz1': '14',
'xyz2': '35',
'xyz3': '14',
'xyz4': {'sub1_xyz4': {'sub1_sub1_xyz4': '45', 'sub2_sub1_xyz4': 'b1fawe'},
'sub2xyz4': '455'},
'xyz5': '2424'}

Python parsing an ugly configuration file

I have an application generating a weird config file
app_id1 {
key1 = val
key2 = val
...
}
app_id2 {
key1 = val
key2 = val
...
}
...
And I am struggling on how to parse this in python. The keys of each app may vary too.
I can't change the application to generate the configuration file in some easily parsable format :)
Any suggestions on how to do this pythonically ?
I am thinking along the lines of dict of dict
conf = {'app_id1': {'key1' : 'val', 'key2' : 'val'},
'app_id2' : {'key1' : 'val', 'key2' : 'val'}
}
Try something like this:
I assumed you read the content of the file to a string
config_file_string = '''app_id1 {
key1 = val
key2 = val
key3 = val
}
app_id2 {
key1 = val
key2 = val
}'''
config = {}
appid = ''
for line in config_file_string.splitlines():
print(line)
if line.endswith('{'):
appid = line.split()[0].strip()
placeholder_dict = {}
elif line.startswith('}'):
config[appid] = placeholder_dict
else:
placeholder_dict[line.split('=')[0].strip()] = line.split('=')[1].strip()
print(config)
This returns:
{'app_id2': {'key2 ': ' val', 'key1 ': ' val'}, 'app_id1': {'key3 ': ' val', 'key2 ': ' val', 'key1 ': ' val'}}
You could use regex: (\w+)\s*\{([^}]*) will find a name { values } construct, and ([^\s=]+)\s*=\s*([^\n]*) will find key = value pairs.
As a one-liner, assuming the contents of the file are in the variable s:
config= {key:dict(re.findall(r'([^\s=]+)\s*=\s*([^\n]*)', values)) for key,values in re.findall(r'(\w+)\s*\{([^}]*)', s)}
You can use pyparsing for less strict grammar:
from pyparsing import alphanums, restOfLine, OneOrMore, Word, Suppress
from copy import copy
lbrace,rbrace,eq = map(Suppress,"{}=")
configitem = {}
configall = {}
wd = Word(alphanums+'_')
kw = wd + eq + restOfLine
kw.setParseAction(lambda x: configitem.__setitem__(x[0],x[1].strip()))
group = wd + lbrace + OneOrMore(kw) + rbrace
group.addParseAction(lambda x: configall.__setitem__(x[0],copy(configitem)))
group.addParseAction(lambda x: configitem.clear())
config = OneOrMore(group)
config_file_string = '''app_id1
{
key1 = val
key2 = val
key3 = val
}
app_id2 {
key1 = val
key2 = val
}'''
config.parseString(config_file_string)
print(configall)

How to pretty print nested dictionaries?

How can I pretty print a dictionary with depth of ~4 in Python? I tried pretty printing with pprint(), but it did not work:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(mydict)
I simply want an indentation ("\t") for each nesting, so that I get something like this:
key1
value1
value2
key2
value1
value2
etc.
How can I do this?
My first thought was that the JSON serializer is probably pretty good at nested dictionaries, so I'd cheat and use that:
>>> import json
>>> print(json.dumps({'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}},
... sort_keys=True, indent=4))
{
"a": 2,
"b": {
"x": 3,
"y": {
"t1": 4,
"t2": 5
}
}
}
I'm not sure how exactly you want the formatting to look like, but you could start with a function like this:
def pretty(d, indent=0):
for key, value in d.items():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
print('\t' * (indent+1) + str(value))
You could try YAML via PyYAML. Its output can be fine-tuned. I'd suggest starting with the following:
print(yaml.dump(data, allow_unicode=True, default_flow_style=False))
The result is very readable; it can be also parsed back to Python if needed.
Edit:
Example:
>>> import yaml
>>> data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> print(yaml.dump(data, default_flow_style=False))
a: 2
b:
x: 3
y:
t1: 4
t2: 5
By this way you can print it in pretty way for example your dictionary name is yasin
import json
print (json.dumps(yasin, indent=2))
or, safer:
print (json.dumps(yasin, indent=2, default=str))
One of the most pythonic ways for that is to use the already build pprint module.
The argument that you need for define the print depth is as you may expect depth
import pprint
pp = pprint.PrettyPrinter(depth=4)
pp.pprint(mydict)
That's it !
As of what have been done, I don't see any pretty printer that at least mimics the output of the python interpreter with very simple formatting so here's mine :
class Formatter(object):
def __init__(self):
self.types = {}
self.htchar = '\t'
self.lfchar = '\n'
self.indent = 0
self.set_formater(object, self.__class__.format_object)
self.set_formater(dict, self.__class__.format_dict)
self.set_formater(list, self.__class__.format_list)
self.set_formater(tuple, self.__class__.format_tuple)
def set_formater(self, obj, callback):
self.types[obj] = callback
def __call__(self, value, **args):
for key in args:
setattr(self, key, args[key])
formater = self.types[type(value) if type(value) in self.types else object]
return formater(self, value, self.indent)
def format_object(self, value, indent):
return repr(value)
def format_dict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + repr(key) + ': ' +
(self.types[type(value[key]) if type(value[key]) in self.types else object])(self, value[key], indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_list(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_tuple(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + self.lfchar + self.htchar * indent)
To initialize it :
pretty = Formatter()
It can support the addition of formatters for defined types, you simply need to make a function for that like this one and bind it to the type you want with set_formater :
from collections import OrderedDict
def format_ordereddict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) +
"(" + repr(key) + ', ' + (self.types[
type(value[key]) if type(value[key]) in self.types else object
])(self, value[key], indent + 1) + ")"
for key in value
]
return 'OrderedDict([%s])' % (','.join(items) +
self.lfchar + self.htchar * indent)
pretty.set_formater(OrderedDict, format_ordereddict)
For historical reasons, I keep the previous pretty printer which was a function instead of a class, but they both can be used the same way, the class version simply permit much more :
def pretty(value, htchar='\t', lfchar='\n', indent=0):
nlch = lfchar + htchar * (indent + 1)
if type(value) is dict:
items = [
nlch + repr(key) + ': ' + pretty(value[key], htchar, lfchar, indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is list:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is tuple:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + lfchar + htchar * indent)
else:
return repr(value)
To use it :
>>> a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':pretty,'unicode':u'\xa7',("tuple","key"):"valid"}
>>> a
{'function': <function pretty at 0x7fdf555809b0>, 'tuple': ('a', 'b', 1, 2), 'list': ['a', 'b', 1, 2], 'dict': {'a': 1, 2: 'b'}, 'unicode': u'\xa7', ('tuple', 'key'): 'valid'}
>>> print(pretty(a))
{
'function': <function pretty at 0x7fdf555809b0>,
'tuple': (
'a',
'b',
1,
2
),
'list': [
'a',
'b',
1,
2
],
'dict': {
'a': 1,
2: 'b'
},
'unicode': u'\xa7',
('tuple', 'key'): 'valid'
}
Compared to other versions :
This solution looks directly for object type, so you can pretty print almost everything, not only list or dict.
Doesn't have any dependancy.
Everything is put inside a string, so you can do whatever you want with it.
The class and the function has been tested and works with Python 2.7 and 3.4.
You can have all type of objects inside, this is their representations and not theirs contents that being put in the result (so string have quotes, Unicode string are fully represented ...).
With the class version, you can add formatting for every object type you want or change them for already defined ones.
key can be of any valid type.
Indent and Newline character can be changed for everything we'd like.
Dict, List and Tuples are pretty printed.
I had to pass the default parameter as well, like this:
print(json.dumps(my_dictionary, indent=4, default=str))
and if you want the keys sorted, then you can do:
print(json.dumps(my_dictionary, sort_keys=True, indent=4, default=str))
in order to fix this type error:
TypeError: Object of type 'datetime' is not JSON serializable
which caused by datetimes being some values in the dictionary.
The modern solution here is to use rich. Install with
pip install rich
and use as
from rich import print
d = {
"Alabama": "Montgomery",
"Alaska": "Juneau",
"Arizona": "Phoenix",
"Arkansas": "Little Rock",
"California": "Sacramento",
"Colorado": "Denver",
"Connecticut": "Hartford",
"Delaware": "Dover",
"Florida": "Tallahassee",
"Georgia": "Atlanta",
"Hawaii": "Honolulu",
"Idaho": "Boise",
}
print(d)
The output is nicely indented:
Another option with yapf:
from pprint import pformat
from yapf.yapflib.yapf_api import FormatCode
dict_example = {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5], '4': {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5]}}
dict_string = pformat(dict_example)
formatted_code, _ = FormatCode(dict_string)
print(formatted_code)
Output:
{
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5]
}
}
You can use print-dict
from print_dict import pd
dict1 = {
'key': 'value'
}
pd(dict1)
Output:
{
'key': 'value'
}
Output of this Python code:
{
'one': 'value-one',
'two': 'value-two',
'three': 'value-three',
'four': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'method': <function custom_method at 0x7ff6ecd03e18>,
'tuple': (1, 2),
'unicode': '✓',
'ten': 'value-ten',
'eleven': 'value-eleven',
'3': [1, 2, 3, 4]
}
},
'object1': <__main__.Object1 object at 0x7ff6ecc588d0>,
'object2': <Object2 info>,
'class': <class '__main__.Object1'>
}
Install:
$ pip install print-dict
Disclosure: I'm the author of print-dict
As others have posted, you can use recursion/dfs to print the nested dictionary data and call recursively if it is a dictionary; otherwise print the data.
def print_json(data):
if type(data) == dict:
for k, v in data.items():
print k
print_json(v)
else:
print data
pout can pretty print anything you throw at it, for example (borrowing data from another answer):
data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
pout.vs(data)
would result in output printed to the screen like:
{
'a': 2,
'b':
{
'y':
{
't2': 5,
't1': 4
},
'x': 3
}
}
or you can return the formatted string output of your object:
v = pout.s(data)
Its primary use case is for debugging so it doesn't choke on object instances or anything and it handles unicode output as you would expect, works in python 2.7 and 3.
disclosure: I'm the author and maintainer of pout.
prettyformatter
Disclaimer: I'm the author of the package.
For a comparison with other formatters, see Other Formatters.
Formatting
Unlike pprint.pprint, prettyformatter spreads vertically more and attempts to align items more.
Unlike json.dumps, prettyformatter is usually more compact and attempts to align dictionary values wherever reasonable.
from prettyformatter import pprint
batters = [
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
]
toppings = [
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
]
data = {"id": "0001", "type": "donut", "name": "Cake", "ppu": 0.55, "batters": batters, "topping": toppings}
pprint(data)
Output:
{
"id" : "0001",
"type" : "donut",
"name" : "Cake",
"ppu" : 0.55,
"batters":
[
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
],
"topping":
[
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
],
}
Features
See here for the full documentation.
JSON
Unlike pprint.pprint, prettyformatter supports JSON conversion via the json=True argument. This includes changing None to null, True to true, False to false, and correct use of quotes.
Unlike json.dumps, prettyformatter supports JSON coercion with more data types. This includes changing any dataclass or mapping into a dict and any iterable into a list.
from dataclasses import dataclass
from prettyformatter import PrettyDataclass, pprint
#dataclass(unsafe_hash=True)
class Point(PrettyDataclass):
x: int
y: int
pprint((Point(1, 2), Point(3, 4)), json=True)
Output:
[{"x": 1, "y": 2}, {"x": 3, "y": 4}]
Customization
Unlike pprint.pprint or json.dumps, prettyformatter supports easy customization with additional types.
Implementing the __pargs__ and/or __pkwargs__ methods for a prettyformatter.PrettyClass subclass allows one to easily customize classes in the form of "cls_name(*args, **kwargs)".
from prettyformatter import PrettyClass
class Dog(PrettyClass):
def __init__(self, name, **kwargs):
self.name = name
def __pkwargs__(self):
return {"name": self.name}
print(Dog("Fido"))
"""
Dog(name="Fido")
"""
print(Dog("Fido"), json=True)
"""
{"name": "Fido"}
"""
Implementing the __pformat__ method allows even more specific implementations of the pformat function.
Implementing the #prettyformatter.register function also allows customizing classes that already exist in the same way implementing __pformat__ would.
import numpy as np
from prettyformatter import pprint, register
#register(np.ndarray)
def pformat_ndarray(obj, specifier, depth, indent, shorten, json):
if json:
return pformat(obj.tolist(), specifier, depth, indent, shorten, json)
with np.printoptions(formatter=dict(all=lambda x: format(x, specifier))):
return repr(obj).replace("\n", "\n" + " " * depth)
pprint(dict.fromkeys("ABC", np.arange(9).reshape(3, 3)))
Output:
{
"A":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"B":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"C":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
}
I took sth's answer and modified it slightly to fit my needs of a nested dictionaries and lists:
def pretty(d, indent=0):
if isinstance(d, dict):
for key, value in d.iteritems():
print '\t' * indent + str(key)
if isinstance(value, dict) or isinstance(value, list):
pretty(value, indent+1)
else:
print '\t' * (indent+1) + str(value)
elif isinstance(d, list):
for item in d:
if isinstance(item, dict) or isinstance(item, list):
pretty(item, indent+1)
else:
print '\t' * (indent+1) + str(item)
else:
pass
Which then gives me output like:
>>>
xs:schema
#xmlns:xs
http://www.w3.org/2001/XMLSchema
xs:redefine
#schemaLocation
base.xsd
xs:complexType
#name
Extension
xs:complexContent
xs:restriction
#base
Extension
xs:sequence
xs:element
#name
Policy
#minOccurs
1
xs:complexType
xs:sequence
xs:element
...
I used what you guys taught me plus the power of decorators to overload the classic print function. Just change the indent to your needs. I added it as a gist in github in case you want to star(save) it.
def print_decorator(func):
"""
Overload Print function to pretty print Dictionaries
"""
def wrapped_func(*args,**kwargs):
if isinstance(*args, dict):
return func(json.dumps(*args, sort_keys=True, indent=2, default=str))
else:
return func(*args,**kwargs)
return wrapped_func
print = print_decorator(print)
Now just use print as usual.
I wrote this simple code to print the general structure of a json object in Python.
def getstructure(data, tab = 0):
if type(data) is dict:
print ' '*tab + '{'
for key in data:
print ' '*tab + ' ' + key + ':'
getstructure(data[key], tab+4)
print ' '*tab + '}'
elif type(data) is list and len(data) > 0:
print ' '*tab + '['
getstructure(data[0], tab+4)
print ' '*tab + ' ...'
print ' '*tab + ']'
the result for the following data
a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':'p','unicode':u'\xa7',("tuple","key"):"valid"}
getstructure(a)
is very compact and looks like this:
{
function:
tuple:
list:
[
...
]
dict:
{
a:
2:
}
unicode:
('tuple', 'key'):
}
I'm just returning to this question after taking sth's answer and making a small but very useful modification. This function prints all keys in the JSON tree as well as the size of leaf nodes in that tree.
def print_JSON_tree(d, indent=0):
for key, value in d.iteritems():
print ' ' * indent + unicode(key),
if isinstance(value, dict):
print; print_JSON_tree(value, indent+1)
else:
print ":", str(type(d[key])).split("'")[1], "-", str(len(unicode(d[key])))
It's really nice when you have large JSON objects and want to figure out where the meat is. Example:
>>> print_JSON_tree(JSON_object)
key1
value1 : int - 5
value2 : str - 16
key2
value1 : str - 34
value2 : list - 5623456
This would tell you that most of the data you care about is probably inside JSON_object['key1']['key2']['value2'] because the length of that value formatted as a string is very large.
I tried the following and got my desired results
Method 1:
Step 1: Install print_dict by typing the following command in cmd
pip install print_dict
Step 2: Import print_dict as
from print_dict import pd
Step 3: Printing using pd
pd(your_dictionary_name)
Example Output:
{
'Name': 'Arham Rumi',
'Age': 21,
'Movies': ['adas', 'adfas', 'fgfg', 'gfgf', 'vbxbv'],
'Songs': ['sdfsd', 'dfdgfddf', 'dsdfd', 'sddfsd', 'sdfdsdf']
}
Method 2:
We can also use for loop to print the dictionary using items method
for key, Value in your_dictionary_name.items():
print(f"{key} : {Value}")
The easiest is to install IPython and use something like below
from IPython.lib.pretty import pretty
class MyClass:
__repr__(self):
return pretty(data) # replace data with what makes sense
In your case
print(pretty(mydict))
Sth, i sink that's pretty ;)
def pretty(d, indent=0):
for key, value in d.iteritems():
if isinstance(value, dict):
print '\t' * indent + (("%30s: {\n") % str(key).upper())
pretty(value, indent+1)
print '\t' * indent + ' ' * 32 + ('} # end of %s #\n' % str(key).upper())
elif isinstance(value, list):
for val in value:
print '\t' * indent + (("%30s: [\n") % str(key).upper())
pretty(val, indent+1)
print '\t' * indent + ' ' * 32 + ('] # end of %s #\n' % str(key).upper())
else:
print '\t' * indent + (("%30s: %s") % (str(key).upper(),str(value)))
This class prints out a complex nested dictionary with sub dictionaries and sub lists.
##
## Recursive class to parse and print complex nested dictionary
##
class NestedDictionary(object):
def __init__(self,value):
self.value=value
def print(self,depth):
spacer="--------------------"
if type(self.value)==type(dict()):
for kk, vv in self.value.items():
if (type(vv)==type(dict())):
print(spacer[:depth],kk)
vvv=(NestedDictionary(vv))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
if (type(vv)==type(list())):
for i in vv:
vvv=(NestedDictionary(i))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
print(spacer[:depth],kk,vv)
##
## Instatiate and execute - this prints complex nested dictionaries
## with sub dictionaries and sub lists
## 'something' is a complex nested dictionary
MyNest=NestedDictionary(weather_com_result)
MyNest.print(0)
Late, but an answer that does not require any additional libraries. Similar to STH's answer, but a little more robust in formatting and returns a full string that can then be printed:
def pretty_print_dict(
input_dictionary,
indent=1,
depth=0
):
# Bool flag to add comma's after first item in dict.
needs_comma = False
# String for any dict will start with a '{'
return_string = '\t' * depth + '{\n'
# Iterate over keys and values, building the full string out.
for key, value in input_dictionary.items():
# Start with key. If key follows a previous item, add comma.
if needs_comma:
return_string = return_string + ',\n' + '\t' * (depth + 1) + str(key) + ': '
else:
return_string = return_string + '\t' * (depth + 1) + str(key) + ': '
# If the value is a dict, recursively call function.
if isinstance(value, dict):
return_string = return_string + '\n' + pretty_print_dict(value, depth=depth+2)
else:
return_string = return_string + '\t' * indent + str(value)
# After first line, flip bool to True to make sure commas make it.
needs_comma = True
# Complete the dict with a '}'
return_string = return_string + '\n' + '\t' * depth + '}'
# Return dict string.
return return_string
Let's see how it handles a dict like test_dict={1: 2, 3: {4: {5: 6}, 7: 8}, 9: 10}.
The string looks like: '{\n\t1: \t2,\n\t3: \n\t\t{\n\t\t\t4: \n\t\t\t\t{\n\t\t\t\t\t5: \t6\n\t\t\t\t},\n\t\t\t7: \t8\n\t\t},\n\t9: \t10\n}'.
Printing that string yields:
{
1: 2,
3:
{
4:
{
5: 6
},
7: 8
},
9: 10
}
I'm a relative python newbie myself but I've been working with nested dictionaries for the past couple weeks and this is what I had came up with.
You should try using a stack. Make the keys from the root dictionary into a list of a list:
stack = [ root.keys() ] # Result: [ [root keys] ]
Going in reverse order from last to first, lookup each key in the dictionary to see if its value is (also) a dictionary. If not, print the key then delete it. However if the value for the key is a dictionary, print the key then append the keys for that value to the end of the stack, and start processing that list in the same way, repeating recursively for each new list of keys.
If the value for the second key in each list were a dictionary you would have something like this after several rounds:
[['key 1','key 2'],['key 2.1','key 2.2'],['key 2.2.1','key 2.2.2'],[`etc.`]]
The upside to this approach is that the indent is just \t times the length of the stack:
indent = "\t" * len(stack)
The downside is that in order to check each key you need to hash through to the relevant sub-dictionary, though this can be handled easily with a list comprehension and a simple for loop:
path = [li[-1] for li in stack]
# The last key of every list of keys in the stack
sub = root
for p in path:
sub = sub[p]
if type(sub) == dict:
stack.append(sub.keys()) # And so on
Be aware that this approach will require you to cleanup trailing empty lists, and to delete the last key in any list followed by an empty list (which of course may create another empty list, and so on).
There are other ways to implement this approach but hopefully this gives you a basic idea of how to do it.
EDIT: If you don't want to go through all that, the pprint module prints nested dictionaries in a nice format.
Here's a function I wrote based on what sth's comment. It's works the same as json.dumps with indent, but I'm using tabs instead of space for indents. In Python 3.2+ you can specify indent to be a '\t' directly, but not in 2.7.
def pretty_dict(d):
def pretty(d, indent):
for i, (key, value) in enumerate(d.iteritems()):
if isinstance(value, dict):
print '{0}"{1}": {{'.format( '\t' * indent, str(key))
pretty(value, indent+1)
if i == len(d)-1:
print '{0}}}'.format( '\t' * indent)
else:
print '{0}}},'.format( '\t' * indent)
else:
if i == len(d)-1:
print '{0}"{1}": "{2}"'.format( '\t' * indent, str(key), value)
else:
print '{0}"{1}": "{2}",'.format( '\t' * indent, str(key), value)
print '{'
pretty(d,indent=1)
print '}'
Ex:
>>> dict_var = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> pretty_dict(dict_var)
{
"a": "2",
"b": {
"y": {
"t2": "5",
"t1": "4"
},
"x": "3"
}
}
Here's something that will print any sort of nested dictionary, while keeping track of the "parent" dictionaries along the way.
dicList = list()
def prettierPrint(dic, dicList):
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
print str(key) + ": " + str(value)
print str(key) + ' was found in the following path:',
print dicList
print '\n'
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
prettierPrint(dicExample, dicList)
This is a good starting point for printing according to different formats, like the one specified in OP. All you really need to do is operations around the Print blocks. Note that it looks to see if the value is 'OrderedDict()'. Depending on whether you're using something from Container datatypes Collections, you should make these sort of fail-safes so the elif block doesn't see it as an additional dictionary due to its name. As of now, an example dictionary like
example_dict = {'key1': 'value1',
'key2': 'value2',
'key3': {'key3a': 'value3a'},
'key4': {'key4a': {'key4aa': 'value4aa',
'key4ab': 'value4ab',
'key4ac': 'value4ac'},
'key4b': 'value4b'}
will print
key3a: value3a
key3a was found in the following path: ['key3']
key2: value2
key2 was found in the following path: []
key1: value1
key1 was found in the following path: []
key4ab: value4ab
key4ab was found in the following path: ['key4', 'key4a']
key4ac: value4ac
key4ac was found in the following path: ['key4', 'key4a']
key4aa: value4aa
key4aa was found in the following path: ['key4', 'key4a']
key4b: value4b
key4b was found in the following path: ['key4']
~altering code to fit the question's format~
lastDict = list()
dicList = list()
def prettierPrint(dic, dicList):
global lastDict
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
if lastDict == dicList:
sameParents = True
else:
sameParents = False
if dicList and sameParents is not True:
spacing = ' ' * len(str(dicList))
print dicList
print spacing,
print str(value)
if dicList and sameParents is True:
print spacing,
print str(value)
lastDict = list(dicList)
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
Using the same example code, it will print the following:
['key3']
value3a
['key4', 'key4a']
value4ab
value4ac
value4aa
['key4']
value4b
This isn't exactly what is requested in OP. The difference is that a parent^n is still printed, instead of being absent and replaced with white-space. To get to OP's format, you'll need to do something like the following: iteratively compare dicList with the lastDict. You can do this by making a new dictionary and copying dicList's content to it, checking if i in the copied dictionary is the same as i in lastDict, and -- if it is -- writing whitespace to that i position using the string multiplier function.
From this link:
def prnDict(aDict, br='\n', html=0,
keyAlign='l', sortKey=0,
keyPrefix='', keySuffix='',
valuePrefix='', valueSuffix='',
leftMargin=0, indent=1 ):
'''
return a string representive of aDict in the following format:
{
key1: value1,
key2: value2,
...
}
Spaces will be added to the keys to make them have same width.
sortKey: set to 1 if want keys sorted;
keyAlign: either 'l' or 'r', for left, right align, respectively.
keyPrefix, keySuffix, valuePrefix, valueSuffix: The prefix and
suffix to wrap the keys or values. Good for formatting them
for html document(for example, keyPrefix='<b>', keySuffix='</b>').
Note: The keys will be padded with spaces to have them
equally-wide. The pre- and suffix will be added OUTSIDE
the entire width.
html: if set to 1, all spaces will be replaced with ' ', and
the entire output will be wrapped with '<code>' and '</code>'.
br: determine the carriage return. If html, it is suggested to set
br to '<br>'. If you want the html source code eazy to read,
set br to '<br>\n'
version: 04b52
author : Runsun Pan
require: odict() # an ordered dict, if you want the keys sorted.
Dave Benjamin
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/161403
'''
if aDict:
#------------------------------ sort key
if sortKey:
dic = aDict.copy()
keys = dic.keys()
keys.sort()
aDict = odict()
for k in keys:
aDict[k] = dic[k]
#------------------- wrap keys with ' ' (quotes) if str
tmp = ['{']
ks = [type(x)==str and "'%s'"%x or x for x in aDict.keys()]
#------------------- wrap values with ' ' (quotes) if str
vs = [type(x)==str and "'%s'"%x or x for x in aDict.values()]
maxKeyLen = max([len(str(x)) for x in ks])
for i in range(len(ks)):
#-------------------------- Adjust key width
k = {1 : str(ks[i]).ljust(maxKeyLen),
keyAlign=='r': str(ks[i]).rjust(maxKeyLen) }[1]
v = vs[i]
tmp.append(' '* indent+ '%s%s%s:%s%s%s,' %(
keyPrefix, k, keySuffix,
valuePrefix,v,valueSuffix))
tmp[-1] = tmp[-1][:-1] # remove the ',' in the last item
tmp.append('}')
if leftMargin:
tmp = [ ' '*leftMargin + x for x in tmp ]
if html:
return '<code>%s</code>' %br.join(tmp).replace(' ',' ')
else:
return br.join(tmp)
else:
return '{}'
'''
Example:
>>> a={'C': 2, 'B': 1, 'E': 4, (3, 5): 0}
>>> print prnDict(a)
{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, sortKey=1)
{
'B' :1,
'C' :2,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, keyPrefix="<b>", keySuffix="</b>")
{
<b>'C' </b>:2,
<b>'B' </b>:1,
<b>'E' </b>:4,
<b>(3, 5)</b>:0
}
>>> print prnDict(a, html=1)
<code>{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}</code>
>>> b={'car': [6, 6, 12], 'about': [15, 9, 6], 'bookKeeper': [9, 9, 15]}
>>> print prnDict(b, sortKey=1)
{
'about' :[15, 9, 6],
'bookKeeper':[9, 9, 15],
'car' :[6, 6, 12]
}
>>> print prnDict(b, keyAlign="r")
{
'car':[6, 6, 12],
'about':[15, 9, 6],
'bookKeeper':[9, 9, 15]
}
'''
Use this function:
def pretty_dict(d, n=1):
for k in d:
print(" "*n + k)
try:
pretty_dict(d[k], n=n+4)
except TypeError:
continue
Call it like this:
pretty_dict(mydict)
This is what I came up with while working on a class that needed to write a dictionary in a .txt file:
#staticmethod
def _pretty_write_dict(dictionary):
def _nested(obj, level=1):
indentation_values = "\t" * level
indentation_braces = "\t" * (level - 1)
if isinstance(obj, dict):
return "{\n%(body)s%(indent_braces)s}" % {
"body": "".join("%(indent_values)s\'%(key)s\': %(value)s,\n" % {
"key": str(key),
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for key, value in obj.items()),
"indent_braces": indentation_braces
}
if isinstance(obj, list):
return "[\n%(body)s\n%(indent_braces)s]" % {
"body": "".join("%(indent_values)s%(value)s,\n" % {
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for value in obj),
"indent_braces": indentation_braces
}
else:
return "\'%(value)s\'" % {"value": str(obj)}
dict_text = _nested(dictionary)
return dict_text
Now, if we have a dictionary like this:
some_dict = {'default': {'ENGINE': [1, 2, 3, {'some_key': {'some_other_key': 'some_value'}}], 'NAME': 'some_db_name', 'PORT': '', 'HOST': 'localhost', 'USER': 'some_user_name', 'PASSWORD': 'some_password', 'OPTIONS': {'init_command': 'SET foreign_key_checks = 0;'}}}
And we do:
print(_pretty_write_dict(some_dict))
We get:
{
'default': {
'ENGINE': [
'1',
'2',
'3',
{
'some_key': {
'some_other_key': 'some_value',
},
},
],
'NAME': 'some_db_name',
'OPTIONS': {
'init_command': 'SET foreign_key_checks = 0;',
},
'HOST': 'localhost',
'USER': 'some_user_name',
'PASSWORD': 'some_password',
'PORT': '',
},
}
There are so many nice implementations here, it made me want to add my own :). I used it for debugging in CircuitPython and MicroPython where json.dumps does not allow using the indent parameter, and pprint is not available as well.
It is implemented with self so can be dropped in into a class, and for each data it is showing the data type, which I find very useful for debugging. Not dependent on any external module.
def pretty_print_dict(self, d, indent=0):
INDENT = 2
if isinstance(d, dict):
print(' ' * indent + '{')
for key, value in d.items():
print(f'{" " * (indent + INDENT)}{key}:')
self.pretty_print_dict(value, indent + 2 * INDENT)
print(' ' * indent + '}')
elif isinstance(d, list):
print(' ' * indent + '[')
for item in d:
self.pretty_print_dict(item, indent + INDENT)
print(' ' * indent + ']')
elif isinstance(d, str):
print(' ' * indent + '<s>' + d + '</s>')
elif isinstance(d, int):
print(' ' * indent + '<i>' + str(d) + '</i>')
elif isinstance(d, bool):
print(' ' * indent + '<b>' + str(d) + '</b>')
elif isinstance(d, float):
print(' ' * indent + '<f>' + str(d) + '</f>')
else:
print(' ' * indent + '<?>' + str(d) + '</?>')
Usage: self.pretty_print_dict(my_dict)

Categories