Iterate over all values in complex Dictionary - python

Goal: to add a ~tag to the tail end of any value, in a complex dictionary, with a % of occurrence.
Code works for "shallow" dictionaries (with no sub-dicts). I want to work with any complex dictionary.
Note: tag includes ~, if when it occurs.
Code:
import re
import random
RE_TAG = re.compile(r".+(~.+)")
DLM = '~'
tag_occurance = 25 # as %
thisdict = {
"Key1~tag": "foo",
"Key2": "bar",
"Key3~tag": {
"Key3.1": "x",
"Key3.2~tag": "y"
}
}
def tag(_str):
m = RE_TAG.match(_str)
if m:
return DLM + m[1][1:] # '~tag'
else:
return ''
# Main Process
thisdict = {key: val + tag(key) if random.randint(0, 100) < tag_occurance else val for key, val in thisdict.items()} # 25% tag
print(thisdict) # view difference
Error:
val is its own a dictionary, hence error.
Traceback (most recent call last):
File "./prog.py", line 25, in <module>
File "./prog.py", line 25, in <dictcomp>
TypeError: unsupported operand type(s) for +: 'dict' and 'str'
Desired Output:
{
"Key1~tag": "foo~tag", # tag added as postfix concatenated string
"Key2": "bar",
"Key3~tag": {
"Key3.1": "x",
"Key3.2~tag": "y~tag" # tag added as postfix concatenated string
}
}
Cause of Error
thisdict.values() returns the sub-dicts. I'm only interested in their actual sub-values.
print(thisdict.values())
>>> dict_values(['foo', 'bar', {'Key3.1~tag': 'x', 'Key3.2~tag': 'y'}])
Desired iteration:
['foo', 'bar', 'x', 'y']
Please let me know if there is anything else I can add to post.

One approach, as mentioned in the comments, is to write a recursive function:
def nested_tag(d):
res = {}
for key, value in d.items():
if isinstance(value, dict):
res[key] = nested_tag(value)
else:
res[key] = value + tag(key) if random.randint(0, 100) < tag_occurance else value
return res
final = nested_tag(this_dict)
print(final)
Output
{'Key1~tag': 'foo', 'Key2': 'bar', 'Key3~tag': {'Key3.1': 'x', 'Key3.2~tag': 'y~tag'}}
The above solution assumes the only complex values are dictionaries.

Related

Python dictionary print with parentheses

I am trying to edit this function so the values of the dictionary will not be printed in parentheses and will be iterable:
def traverse_appended(key):
reg_dict = {}
#keypath = r"SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\"
for item in traverse_reg(key):
keypath_str = str(keypath+item)
reg_dict[item] = str(get_reg("Displayversion", keypath_str)), str(get_reg("DisplayName", keypath_str))
#reg_dict[item] = get_reg("DisplayName", keypath_str)
return reg_dict
the expected output is :
{'DXM_Runtime': 'None', 'None'}
The function output:
{'DXM_Runtime': ('None', 'None')}
#Consider traverse_appended returns following dict.
#I think, converting func_dict values which are tuple into string, will help you to get expected output.
func_dict = {"DXM_Runtime":('None','None'),
"TMP_KEY":('A','B')
}
derived_dict = {}
for k,v in func_dict.viewitems():
tmp_str = ",".join(v)
derived_dict[k] = tmp_str
print derived_dict
#Output
E:\tmp_python>python tmp.py
{'DXM_Runtime': 'None,None', 'TMP_KEY': 'A,B'}
#If this doesn't help you, then please post the code for get_reg and traverse_reg function also.

Dot notation to Json in python

I receive data from the Loggly service in dot notation, but to put data back in, it must be in JSON.
Hence, I need to convert:
{'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}
Into:
{'message': {'code': {'response': 80}, 'status': {'time': 50}}, 'time': 100}
I have put together a function to do so, but I wonder if there is a more direct and simpler way to accomplish the same result.
def dot_to_json(a):
# Create root for JSON tree structure
resp = {}
for k,v in a.items():
# eliminate json. (if metric comes from another type, it will keep its root)
k = re.sub(r'\bjson.\b','',k)
if '.' in k:
# Field has a dot
r = resp
s = ''
k2 = k.split('.')
l = len(k2)
count = 0
t = {}
for f in k2:
count += 1
if f not in resp.keys():
r[f]={}
r = r[f]
if count < l:
s += "['" + f + "']"
else:
s = "resp%s" % s
t = eval(s)
# Assign value to the last branch
t[f] = v
else:
r2 = resp
if k not in resp.keys():
r2[k] = {}
r2[k] = v
return resp
You can turn the path into dictionary access with:
def dot_to_json(a):
output = {}
for key, value in a.iteritems():
path = key.split('.')
if path[0] == 'json':
path = path[1:]
target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
target[path[-1]] = value
return output
This takes the key as a path, ignoring the first json part. With reduce() you can walk the elements of path (except for the last one) and fetch the nested dictionary with it.
Essentially you start at output and for each element in path fetch the value and use that value as the input for the next iteration. Here dict.setdefault() is used to default to a new empty dictionary each time a key doesn't yet exist. For a path ['foo', 'bar', 'baz'] this comes down to the call output.setdefault('foo', {}).setdefault('bar', {}).setdefault('baz', {}), only more compact and supporting arbitrary length paths.
The innermost dictionary is then used to set the value with the last element of the path as the key.
Demo:
>>> def dot_to_json(a):
... output = {}
... for key, value in a.iteritems():
... path = key.split('.')[1:] # ignore the json. prefix
... target = reduce(lambda d, k: d.setdefault(k, {}), path[:-1], output)
... target[path[-1]] = value
... return output
...
>>> dot_to_json({'json.message.status.time':50, 'json.message.code.response':80, 'json.time':100}))
{'message': {'status': {'time': 50}, 'code': {'response': 80}}, 'time': 100}

python generating nested dictionary key error

I am trying to create a nested dictionary from a mysql query but I am getting a key error
result = {}
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
error
KeyError: 'data'
desired result
result = {
'data': {
0: {'firstName': ''...}
1: {'firstName': ''...}
2: {'firstName': ''...}
}
}
You wanted to create a nested dictionary
result = {} will create an assignment for a flat dictionary, whose items can have any values like "string", "int", "list" or "dict"
For this flat assignment
python knows what to do for result["first"]
If you want "first" also to be another dictionary you need to tell Python by an assingment
result['first'] = {}.
otherwise, Python raises "KeyError"
I think you are looking for this :)
>>> from collections import defaultdict
>>> mydict = lambda: defaultdict(mydict)
>>> result = mydict()
>>> result['Python']['rules']['the world'] = "Yes I Agree"
>>> result['Python']['rules']['the world']
'Yes I Agree'
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data']['i'] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
Alternatively, you can use you own class which adds the extra dicts automatically
class AutoDict(dict):
def __missing__(self, k):
self[k] = AutoDict()
return self[k]
result = AutoDict()
for i, q in enumerate(query):
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email
result['data'] does exist. So you cannot add data to it.
Try this out at the start:
result = {'data': []};
You have to create the key data first:
result = {}
result['data'] = {}
for i, q in enumerate(query):
result['data'][i] = {}
result['data'][i]['firstName'] = q.first_name
result['data'][i]['lastName'] = q.last_name
result['data'][i]['email'] = q.email

Split dictionary key and list of values from dict

I want to split keys and values and display the dictionary result below mentioned format. I'm reading a file and splitting the data into list and later moving to dictionary.
Please help me to get the result.
INPUT FILE - commands.txt
login url=http://demo.url.net username=test#url.net password=mytester
create-folder foldername=demo
select-folder foldername=test123
logout
Expected result format
print result_dict
"0": {
"login": [
{
"url": "http://demo.url.net",
"username": "test#url.net",
"password": "mytester"
}
]
},
"1": {
"create-folder": {
"foldername": "demo"
}
},
"2": {
"select-folder": {
"foldername": "test-folder"
}
},
"3": {
"logout": {}
}
CODE
file=os.path.abspath('catalog/commands.txt')
list_output=[f.rstrip().split() for f in open(file).readlines()]
print list_output
counter=0
for data in list_output:
csvdata[counter]=data[0:]
counter=counter+1
print csvdata
for key,val in csvdata.iteritems():
for item in val:
if '=' in item:
key,value=item.split("=")
result[key]=value
print result
As a function:
from collections import defaultdict
from itertools import count
def read_file(file_path):
result = defaultdict(dict)
item = count()
with open(file_path) as f:
for line in f:
if not line:
continue
parts = line.split()
result[next(item)][parts[0]] = dict(p.split('=') for p in parts[1:])
return dict(result)
Better example and explanation:
s = """
login url=http://demo.url.net username=test#url.net password=mytester
create-folder foldername=demo
select-folder foldername=test123
logout
"""
from collections import defaultdict
from itertools import count
result_dict = defaultdict(dict)
item = count()
# pretend you opened the file and are reading it line by line
for line in s.splitlines():
if not line:
continue # skip empty lines
parts = line.split()
result_dict[next(item)][parts[0]] = dict(p.split('=') for p in parts[1:])
With pretty print:
>>> pprint(dict(result_dict))
{0: {'login': {'password': 'mytester',
'url': 'http://demo.url.net',
'username': 'test#url.net'}},
1: {'create-folder': {'foldername': 'demo'}},
2: {'select-folder': {'foldername': 'test123'}},
3: {'logout': {}}}
lines = ["login url=http://demo.url.net username=test#url.net password=mytester",
"create-folder foldername=demo",
"select-folder foldername=test123",
"logout"]
result = {}
for no, line in enumerate(lines):
values = line.split()
pairs = [v.split('=') for v in values[1:]]
result[str(no)] = {values[0]: [dict(pairs)] if len(pairs) > 1 else dict(pairs)}
import pprint
pprint.pprint(result)
Output:
{'0': {'login': [{'password': 'mytester',
'url': 'http://demo.url.net',
'username': 'test#url.net'}]},
'1': {'create-folder': {'foldername': 'demo'}},
'2': {'select-folder': {'foldername': 'test123'}},
'3': {'logout': {}}}
But are you sure you need the extra list inside the login value? If not, just change [dict(pairs)] if len(pairs) > 1 else dict(pairs) to dict(pairs).
r = dict()
f = open('commands.txt')
for i, line in enumerate(f.readlines()):
r[str(i)] = dict()
actions = line.split()
list_actions = {}
for action in actions[1:]:
if "=" in action:
k, v = action.split('=')
list_actions[k] = v
if len(actions[1:]) > 1:
r[str(i)][actions[0]] = [list_actions]
else:
r[str(i)][actions[0]] = list_actions
print r
Should be work

How to pretty print nested dictionaries?

How can I pretty print a dictionary with depth of ~4 in Python? I tried pretty printing with pprint(), but it did not work:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(mydict)
I simply want an indentation ("\t") for each nesting, so that I get something like this:
key1
value1
value2
key2
value1
value2
etc.
How can I do this?
My first thought was that the JSON serializer is probably pretty good at nested dictionaries, so I'd cheat and use that:
>>> import json
>>> print(json.dumps({'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}},
... sort_keys=True, indent=4))
{
"a": 2,
"b": {
"x": 3,
"y": {
"t1": 4,
"t2": 5
}
}
}
I'm not sure how exactly you want the formatting to look like, but you could start with a function like this:
def pretty(d, indent=0):
for key, value in d.items():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
print('\t' * (indent+1) + str(value))
You could try YAML via PyYAML. Its output can be fine-tuned. I'd suggest starting with the following:
print(yaml.dump(data, allow_unicode=True, default_flow_style=False))
The result is very readable; it can be also parsed back to Python if needed.
Edit:
Example:
>>> import yaml
>>> data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> print(yaml.dump(data, default_flow_style=False))
a: 2
b:
x: 3
y:
t1: 4
t2: 5
By this way you can print it in pretty way for example your dictionary name is yasin
import json
print (json.dumps(yasin, indent=2))
or, safer:
print (json.dumps(yasin, indent=2, default=str))
One of the most pythonic ways for that is to use the already build pprint module.
The argument that you need for define the print depth is as you may expect depth
import pprint
pp = pprint.PrettyPrinter(depth=4)
pp.pprint(mydict)
That's it !
As of what have been done, I don't see any pretty printer that at least mimics the output of the python interpreter with very simple formatting so here's mine :
class Formatter(object):
def __init__(self):
self.types = {}
self.htchar = '\t'
self.lfchar = '\n'
self.indent = 0
self.set_formater(object, self.__class__.format_object)
self.set_formater(dict, self.__class__.format_dict)
self.set_formater(list, self.__class__.format_list)
self.set_formater(tuple, self.__class__.format_tuple)
def set_formater(self, obj, callback):
self.types[obj] = callback
def __call__(self, value, **args):
for key in args:
setattr(self, key, args[key])
formater = self.types[type(value) if type(value) in self.types else object]
return formater(self, value, self.indent)
def format_object(self, value, indent):
return repr(value)
def format_dict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + repr(key) + ': ' +
(self.types[type(value[key]) if type(value[key]) in self.types else object])(self, value[key], indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_list(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_tuple(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + self.lfchar + self.htchar * indent)
To initialize it :
pretty = Formatter()
It can support the addition of formatters for defined types, you simply need to make a function for that like this one and bind it to the type you want with set_formater :
from collections import OrderedDict
def format_ordereddict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) +
"(" + repr(key) + ', ' + (self.types[
type(value[key]) if type(value[key]) in self.types else object
])(self, value[key], indent + 1) + ")"
for key in value
]
return 'OrderedDict([%s])' % (','.join(items) +
self.lfchar + self.htchar * indent)
pretty.set_formater(OrderedDict, format_ordereddict)
For historical reasons, I keep the previous pretty printer which was a function instead of a class, but they both can be used the same way, the class version simply permit much more :
def pretty(value, htchar='\t', lfchar='\n', indent=0):
nlch = lfchar + htchar * (indent + 1)
if type(value) is dict:
items = [
nlch + repr(key) + ': ' + pretty(value[key], htchar, lfchar, indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is list:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is tuple:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + lfchar + htchar * indent)
else:
return repr(value)
To use it :
>>> a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':pretty,'unicode':u'\xa7',("tuple","key"):"valid"}
>>> a
{'function': <function pretty at 0x7fdf555809b0>, 'tuple': ('a', 'b', 1, 2), 'list': ['a', 'b', 1, 2], 'dict': {'a': 1, 2: 'b'}, 'unicode': u'\xa7', ('tuple', 'key'): 'valid'}
>>> print(pretty(a))
{
'function': <function pretty at 0x7fdf555809b0>,
'tuple': (
'a',
'b',
1,
2
),
'list': [
'a',
'b',
1,
2
],
'dict': {
'a': 1,
2: 'b'
},
'unicode': u'\xa7',
('tuple', 'key'): 'valid'
}
Compared to other versions :
This solution looks directly for object type, so you can pretty print almost everything, not only list or dict.
Doesn't have any dependancy.
Everything is put inside a string, so you can do whatever you want with it.
The class and the function has been tested and works with Python 2.7 and 3.4.
You can have all type of objects inside, this is their representations and not theirs contents that being put in the result (so string have quotes, Unicode string are fully represented ...).
With the class version, you can add formatting for every object type you want or change them for already defined ones.
key can be of any valid type.
Indent and Newline character can be changed for everything we'd like.
Dict, List and Tuples are pretty printed.
I had to pass the default parameter as well, like this:
print(json.dumps(my_dictionary, indent=4, default=str))
and if you want the keys sorted, then you can do:
print(json.dumps(my_dictionary, sort_keys=True, indent=4, default=str))
in order to fix this type error:
TypeError: Object of type 'datetime' is not JSON serializable
which caused by datetimes being some values in the dictionary.
The modern solution here is to use rich. Install with
pip install rich
and use as
from rich import print
d = {
"Alabama": "Montgomery",
"Alaska": "Juneau",
"Arizona": "Phoenix",
"Arkansas": "Little Rock",
"California": "Sacramento",
"Colorado": "Denver",
"Connecticut": "Hartford",
"Delaware": "Dover",
"Florida": "Tallahassee",
"Georgia": "Atlanta",
"Hawaii": "Honolulu",
"Idaho": "Boise",
}
print(d)
The output is nicely indented:
Another option with yapf:
from pprint import pformat
from yapf.yapflib.yapf_api import FormatCode
dict_example = {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5], '4': {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5]}}
dict_string = pformat(dict_example)
formatted_code, _ = FormatCode(dict_string)
print(formatted_code)
Output:
{
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5]
}
}
You can use print-dict
from print_dict import pd
dict1 = {
'key': 'value'
}
pd(dict1)
Output:
{
'key': 'value'
}
Output of this Python code:
{
'one': 'value-one',
'two': 'value-two',
'three': 'value-three',
'four': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'method': <function custom_method at 0x7ff6ecd03e18>,
'tuple': (1, 2),
'unicode': '✓',
'ten': 'value-ten',
'eleven': 'value-eleven',
'3': [1, 2, 3, 4]
}
},
'object1': <__main__.Object1 object at 0x7ff6ecc588d0>,
'object2': <Object2 info>,
'class': <class '__main__.Object1'>
}
Install:
$ pip install print-dict
Disclosure: I'm the author of print-dict
As others have posted, you can use recursion/dfs to print the nested dictionary data and call recursively if it is a dictionary; otherwise print the data.
def print_json(data):
if type(data) == dict:
for k, v in data.items():
print k
print_json(v)
else:
print data
pout can pretty print anything you throw at it, for example (borrowing data from another answer):
data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
pout.vs(data)
would result in output printed to the screen like:
{
'a': 2,
'b':
{
'y':
{
't2': 5,
't1': 4
},
'x': 3
}
}
or you can return the formatted string output of your object:
v = pout.s(data)
Its primary use case is for debugging so it doesn't choke on object instances or anything and it handles unicode output as you would expect, works in python 2.7 and 3.
disclosure: I'm the author and maintainer of pout.
prettyformatter
Disclaimer: I'm the author of the package.
For a comparison with other formatters, see Other Formatters.
Formatting
Unlike pprint.pprint, prettyformatter spreads vertically more and attempts to align items more.
Unlike json.dumps, prettyformatter is usually more compact and attempts to align dictionary values wherever reasonable.
from prettyformatter import pprint
batters = [
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
]
toppings = [
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
]
data = {"id": "0001", "type": "donut", "name": "Cake", "ppu": 0.55, "batters": batters, "topping": toppings}
pprint(data)
Output:
{
"id" : "0001",
"type" : "donut",
"name" : "Cake",
"ppu" : 0.55,
"batters":
[
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
],
"topping":
[
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
],
}
Features
See here for the full documentation.
JSON
Unlike pprint.pprint, prettyformatter supports JSON conversion via the json=True argument. This includes changing None to null, True to true, False to false, and correct use of quotes.
Unlike json.dumps, prettyformatter supports JSON coercion with more data types. This includes changing any dataclass or mapping into a dict and any iterable into a list.
from dataclasses import dataclass
from prettyformatter import PrettyDataclass, pprint
#dataclass(unsafe_hash=True)
class Point(PrettyDataclass):
x: int
y: int
pprint((Point(1, 2), Point(3, 4)), json=True)
Output:
[{"x": 1, "y": 2}, {"x": 3, "y": 4}]
Customization
Unlike pprint.pprint or json.dumps, prettyformatter supports easy customization with additional types.
Implementing the __pargs__ and/or __pkwargs__ methods for a prettyformatter.PrettyClass subclass allows one to easily customize classes in the form of "cls_name(*args, **kwargs)".
from prettyformatter import PrettyClass
class Dog(PrettyClass):
def __init__(self, name, **kwargs):
self.name = name
def __pkwargs__(self):
return {"name": self.name}
print(Dog("Fido"))
"""
Dog(name="Fido")
"""
print(Dog("Fido"), json=True)
"""
{"name": "Fido"}
"""
Implementing the __pformat__ method allows even more specific implementations of the pformat function.
Implementing the #prettyformatter.register function also allows customizing classes that already exist in the same way implementing __pformat__ would.
import numpy as np
from prettyformatter import pprint, register
#register(np.ndarray)
def pformat_ndarray(obj, specifier, depth, indent, shorten, json):
if json:
return pformat(obj.tolist(), specifier, depth, indent, shorten, json)
with np.printoptions(formatter=dict(all=lambda x: format(x, specifier))):
return repr(obj).replace("\n", "\n" + " " * depth)
pprint(dict.fromkeys("ABC", np.arange(9).reshape(3, 3)))
Output:
{
"A":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"B":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"C":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
}
I took sth's answer and modified it slightly to fit my needs of a nested dictionaries and lists:
def pretty(d, indent=0):
if isinstance(d, dict):
for key, value in d.iteritems():
print '\t' * indent + str(key)
if isinstance(value, dict) or isinstance(value, list):
pretty(value, indent+1)
else:
print '\t' * (indent+1) + str(value)
elif isinstance(d, list):
for item in d:
if isinstance(item, dict) or isinstance(item, list):
pretty(item, indent+1)
else:
print '\t' * (indent+1) + str(item)
else:
pass
Which then gives me output like:
>>>
xs:schema
#xmlns:xs
http://www.w3.org/2001/XMLSchema
xs:redefine
#schemaLocation
base.xsd
xs:complexType
#name
Extension
xs:complexContent
xs:restriction
#base
Extension
xs:sequence
xs:element
#name
Policy
#minOccurs
1
xs:complexType
xs:sequence
xs:element
...
I used what you guys taught me plus the power of decorators to overload the classic print function. Just change the indent to your needs. I added it as a gist in github in case you want to star(save) it.
def print_decorator(func):
"""
Overload Print function to pretty print Dictionaries
"""
def wrapped_func(*args,**kwargs):
if isinstance(*args, dict):
return func(json.dumps(*args, sort_keys=True, indent=2, default=str))
else:
return func(*args,**kwargs)
return wrapped_func
print = print_decorator(print)
Now just use print as usual.
I wrote this simple code to print the general structure of a json object in Python.
def getstructure(data, tab = 0):
if type(data) is dict:
print ' '*tab + '{'
for key in data:
print ' '*tab + ' ' + key + ':'
getstructure(data[key], tab+4)
print ' '*tab + '}'
elif type(data) is list and len(data) > 0:
print ' '*tab + '['
getstructure(data[0], tab+4)
print ' '*tab + ' ...'
print ' '*tab + ']'
the result for the following data
a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':'p','unicode':u'\xa7',("tuple","key"):"valid"}
getstructure(a)
is very compact and looks like this:
{
function:
tuple:
list:
[
...
]
dict:
{
a:
2:
}
unicode:
('tuple', 'key'):
}
I'm just returning to this question after taking sth's answer and making a small but very useful modification. This function prints all keys in the JSON tree as well as the size of leaf nodes in that tree.
def print_JSON_tree(d, indent=0):
for key, value in d.iteritems():
print ' ' * indent + unicode(key),
if isinstance(value, dict):
print; print_JSON_tree(value, indent+1)
else:
print ":", str(type(d[key])).split("'")[1], "-", str(len(unicode(d[key])))
It's really nice when you have large JSON objects and want to figure out where the meat is. Example:
>>> print_JSON_tree(JSON_object)
key1
value1 : int - 5
value2 : str - 16
key2
value1 : str - 34
value2 : list - 5623456
This would tell you that most of the data you care about is probably inside JSON_object['key1']['key2']['value2'] because the length of that value formatted as a string is very large.
I tried the following and got my desired results
Method 1:
Step 1: Install print_dict by typing the following command in cmd
pip install print_dict
Step 2: Import print_dict as
from print_dict import pd
Step 3: Printing using pd
pd(your_dictionary_name)
Example Output:
{
'Name': 'Arham Rumi',
'Age': 21,
'Movies': ['adas', 'adfas', 'fgfg', 'gfgf', 'vbxbv'],
'Songs': ['sdfsd', 'dfdgfddf', 'dsdfd', 'sddfsd', 'sdfdsdf']
}
Method 2:
We can also use for loop to print the dictionary using items method
for key, Value in your_dictionary_name.items():
print(f"{key} : {Value}")
The easiest is to install IPython and use something like below
from IPython.lib.pretty import pretty
class MyClass:
__repr__(self):
return pretty(data) # replace data with what makes sense
In your case
print(pretty(mydict))
Sth, i sink that's pretty ;)
def pretty(d, indent=0):
for key, value in d.iteritems():
if isinstance(value, dict):
print '\t' * indent + (("%30s: {\n") % str(key).upper())
pretty(value, indent+1)
print '\t' * indent + ' ' * 32 + ('} # end of %s #\n' % str(key).upper())
elif isinstance(value, list):
for val in value:
print '\t' * indent + (("%30s: [\n") % str(key).upper())
pretty(val, indent+1)
print '\t' * indent + ' ' * 32 + ('] # end of %s #\n' % str(key).upper())
else:
print '\t' * indent + (("%30s: %s") % (str(key).upper(),str(value)))
This class prints out a complex nested dictionary with sub dictionaries and sub lists.
##
## Recursive class to parse and print complex nested dictionary
##
class NestedDictionary(object):
def __init__(self,value):
self.value=value
def print(self,depth):
spacer="--------------------"
if type(self.value)==type(dict()):
for kk, vv in self.value.items():
if (type(vv)==type(dict())):
print(spacer[:depth],kk)
vvv=(NestedDictionary(vv))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
if (type(vv)==type(list())):
for i in vv:
vvv=(NestedDictionary(i))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
print(spacer[:depth],kk,vv)
##
## Instatiate and execute - this prints complex nested dictionaries
## with sub dictionaries and sub lists
## 'something' is a complex nested dictionary
MyNest=NestedDictionary(weather_com_result)
MyNest.print(0)
Late, but an answer that does not require any additional libraries. Similar to STH's answer, but a little more robust in formatting and returns a full string that can then be printed:
def pretty_print_dict(
input_dictionary,
indent=1,
depth=0
):
# Bool flag to add comma's after first item in dict.
needs_comma = False
# String for any dict will start with a '{'
return_string = '\t' * depth + '{\n'
# Iterate over keys and values, building the full string out.
for key, value in input_dictionary.items():
# Start with key. If key follows a previous item, add comma.
if needs_comma:
return_string = return_string + ',\n' + '\t' * (depth + 1) + str(key) + ': '
else:
return_string = return_string + '\t' * (depth + 1) + str(key) + ': '
# If the value is a dict, recursively call function.
if isinstance(value, dict):
return_string = return_string + '\n' + pretty_print_dict(value, depth=depth+2)
else:
return_string = return_string + '\t' * indent + str(value)
# After first line, flip bool to True to make sure commas make it.
needs_comma = True
# Complete the dict with a '}'
return_string = return_string + '\n' + '\t' * depth + '}'
# Return dict string.
return return_string
Let's see how it handles a dict like test_dict={1: 2, 3: {4: {5: 6}, 7: 8}, 9: 10}.
The string looks like: '{\n\t1: \t2,\n\t3: \n\t\t{\n\t\t\t4: \n\t\t\t\t{\n\t\t\t\t\t5: \t6\n\t\t\t\t},\n\t\t\t7: \t8\n\t\t},\n\t9: \t10\n}'.
Printing that string yields:
{
1: 2,
3:
{
4:
{
5: 6
},
7: 8
},
9: 10
}
I'm a relative python newbie myself but I've been working with nested dictionaries for the past couple weeks and this is what I had came up with.
You should try using a stack. Make the keys from the root dictionary into a list of a list:
stack = [ root.keys() ] # Result: [ [root keys] ]
Going in reverse order from last to first, lookup each key in the dictionary to see if its value is (also) a dictionary. If not, print the key then delete it. However if the value for the key is a dictionary, print the key then append the keys for that value to the end of the stack, and start processing that list in the same way, repeating recursively for each new list of keys.
If the value for the second key in each list were a dictionary you would have something like this after several rounds:
[['key 1','key 2'],['key 2.1','key 2.2'],['key 2.2.1','key 2.2.2'],[`etc.`]]
The upside to this approach is that the indent is just \t times the length of the stack:
indent = "\t" * len(stack)
The downside is that in order to check each key you need to hash through to the relevant sub-dictionary, though this can be handled easily with a list comprehension and a simple for loop:
path = [li[-1] for li in stack]
# The last key of every list of keys in the stack
sub = root
for p in path:
sub = sub[p]
if type(sub) == dict:
stack.append(sub.keys()) # And so on
Be aware that this approach will require you to cleanup trailing empty lists, and to delete the last key in any list followed by an empty list (which of course may create another empty list, and so on).
There are other ways to implement this approach but hopefully this gives you a basic idea of how to do it.
EDIT: If you don't want to go through all that, the pprint module prints nested dictionaries in a nice format.
Here's a function I wrote based on what sth's comment. It's works the same as json.dumps with indent, but I'm using tabs instead of space for indents. In Python 3.2+ you can specify indent to be a '\t' directly, but not in 2.7.
def pretty_dict(d):
def pretty(d, indent):
for i, (key, value) in enumerate(d.iteritems()):
if isinstance(value, dict):
print '{0}"{1}": {{'.format( '\t' * indent, str(key))
pretty(value, indent+1)
if i == len(d)-1:
print '{0}}}'.format( '\t' * indent)
else:
print '{0}}},'.format( '\t' * indent)
else:
if i == len(d)-1:
print '{0}"{1}": "{2}"'.format( '\t' * indent, str(key), value)
else:
print '{0}"{1}": "{2}",'.format( '\t' * indent, str(key), value)
print '{'
pretty(d,indent=1)
print '}'
Ex:
>>> dict_var = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> pretty_dict(dict_var)
{
"a": "2",
"b": {
"y": {
"t2": "5",
"t1": "4"
},
"x": "3"
}
}
Here's something that will print any sort of nested dictionary, while keeping track of the "parent" dictionaries along the way.
dicList = list()
def prettierPrint(dic, dicList):
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
print str(key) + ": " + str(value)
print str(key) + ' was found in the following path:',
print dicList
print '\n'
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
prettierPrint(dicExample, dicList)
This is a good starting point for printing according to different formats, like the one specified in OP. All you really need to do is operations around the Print blocks. Note that it looks to see if the value is 'OrderedDict()'. Depending on whether you're using something from Container datatypes Collections, you should make these sort of fail-safes so the elif block doesn't see it as an additional dictionary due to its name. As of now, an example dictionary like
example_dict = {'key1': 'value1',
'key2': 'value2',
'key3': {'key3a': 'value3a'},
'key4': {'key4a': {'key4aa': 'value4aa',
'key4ab': 'value4ab',
'key4ac': 'value4ac'},
'key4b': 'value4b'}
will print
key3a: value3a
key3a was found in the following path: ['key3']
key2: value2
key2 was found in the following path: []
key1: value1
key1 was found in the following path: []
key4ab: value4ab
key4ab was found in the following path: ['key4', 'key4a']
key4ac: value4ac
key4ac was found in the following path: ['key4', 'key4a']
key4aa: value4aa
key4aa was found in the following path: ['key4', 'key4a']
key4b: value4b
key4b was found in the following path: ['key4']
~altering code to fit the question's format~
lastDict = list()
dicList = list()
def prettierPrint(dic, dicList):
global lastDict
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
if lastDict == dicList:
sameParents = True
else:
sameParents = False
if dicList and sameParents is not True:
spacing = ' ' * len(str(dicList))
print dicList
print spacing,
print str(value)
if dicList and sameParents is True:
print spacing,
print str(value)
lastDict = list(dicList)
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
Using the same example code, it will print the following:
['key3']
value3a
['key4', 'key4a']
value4ab
value4ac
value4aa
['key4']
value4b
This isn't exactly what is requested in OP. The difference is that a parent^n is still printed, instead of being absent and replaced with white-space. To get to OP's format, you'll need to do something like the following: iteratively compare dicList with the lastDict. You can do this by making a new dictionary and copying dicList's content to it, checking if i in the copied dictionary is the same as i in lastDict, and -- if it is -- writing whitespace to that i position using the string multiplier function.
From this link:
def prnDict(aDict, br='\n', html=0,
keyAlign='l', sortKey=0,
keyPrefix='', keySuffix='',
valuePrefix='', valueSuffix='',
leftMargin=0, indent=1 ):
'''
return a string representive of aDict in the following format:
{
key1: value1,
key2: value2,
...
}
Spaces will be added to the keys to make them have same width.
sortKey: set to 1 if want keys sorted;
keyAlign: either 'l' or 'r', for left, right align, respectively.
keyPrefix, keySuffix, valuePrefix, valueSuffix: The prefix and
suffix to wrap the keys or values. Good for formatting them
for html document(for example, keyPrefix='<b>', keySuffix='</b>').
Note: The keys will be padded with spaces to have them
equally-wide. The pre- and suffix will be added OUTSIDE
the entire width.
html: if set to 1, all spaces will be replaced with ' ', and
the entire output will be wrapped with '<code>' and '</code>'.
br: determine the carriage return. If html, it is suggested to set
br to '<br>'. If you want the html source code eazy to read,
set br to '<br>\n'
version: 04b52
author : Runsun Pan
require: odict() # an ordered dict, if you want the keys sorted.
Dave Benjamin
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/161403
'''
if aDict:
#------------------------------ sort key
if sortKey:
dic = aDict.copy()
keys = dic.keys()
keys.sort()
aDict = odict()
for k in keys:
aDict[k] = dic[k]
#------------------- wrap keys with ' ' (quotes) if str
tmp = ['{']
ks = [type(x)==str and "'%s'"%x or x for x in aDict.keys()]
#------------------- wrap values with ' ' (quotes) if str
vs = [type(x)==str and "'%s'"%x or x for x in aDict.values()]
maxKeyLen = max([len(str(x)) for x in ks])
for i in range(len(ks)):
#-------------------------- Adjust key width
k = {1 : str(ks[i]).ljust(maxKeyLen),
keyAlign=='r': str(ks[i]).rjust(maxKeyLen) }[1]
v = vs[i]
tmp.append(' '* indent+ '%s%s%s:%s%s%s,' %(
keyPrefix, k, keySuffix,
valuePrefix,v,valueSuffix))
tmp[-1] = tmp[-1][:-1] # remove the ',' in the last item
tmp.append('}')
if leftMargin:
tmp = [ ' '*leftMargin + x for x in tmp ]
if html:
return '<code>%s</code>' %br.join(tmp).replace(' ',' ')
else:
return br.join(tmp)
else:
return '{}'
'''
Example:
>>> a={'C': 2, 'B': 1, 'E': 4, (3, 5): 0}
>>> print prnDict(a)
{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, sortKey=1)
{
'B' :1,
'C' :2,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, keyPrefix="<b>", keySuffix="</b>")
{
<b>'C' </b>:2,
<b>'B' </b>:1,
<b>'E' </b>:4,
<b>(3, 5)</b>:0
}
>>> print prnDict(a, html=1)
<code>{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}</code>
>>> b={'car': [6, 6, 12], 'about': [15, 9, 6], 'bookKeeper': [9, 9, 15]}
>>> print prnDict(b, sortKey=1)
{
'about' :[15, 9, 6],
'bookKeeper':[9, 9, 15],
'car' :[6, 6, 12]
}
>>> print prnDict(b, keyAlign="r")
{
'car':[6, 6, 12],
'about':[15, 9, 6],
'bookKeeper':[9, 9, 15]
}
'''
Use this function:
def pretty_dict(d, n=1):
for k in d:
print(" "*n + k)
try:
pretty_dict(d[k], n=n+4)
except TypeError:
continue
Call it like this:
pretty_dict(mydict)
This is what I came up with while working on a class that needed to write a dictionary in a .txt file:
#staticmethod
def _pretty_write_dict(dictionary):
def _nested(obj, level=1):
indentation_values = "\t" * level
indentation_braces = "\t" * (level - 1)
if isinstance(obj, dict):
return "{\n%(body)s%(indent_braces)s}" % {
"body": "".join("%(indent_values)s\'%(key)s\': %(value)s,\n" % {
"key": str(key),
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for key, value in obj.items()),
"indent_braces": indentation_braces
}
if isinstance(obj, list):
return "[\n%(body)s\n%(indent_braces)s]" % {
"body": "".join("%(indent_values)s%(value)s,\n" % {
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for value in obj),
"indent_braces": indentation_braces
}
else:
return "\'%(value)s\'" % {"value": str(obj)}
dict_text = _nested(dictionary)
return dict_text
Now, if we have a dictionary like this:
some_dict = {'default': {'ENGINE': [1, 2, 3, {'some_key': {'some_other_key': 'some_value'}}], 'NAME': 'some_db_name', 'PORT': '', 'HOST': 'localhost', 'USER': 'some_user_name', 'PASSWORD': 'some_password', 'OPTIONS': {'init_command': 'SET foreign_key_checks = 0;'}}}
And we do:
print(_pretty_write_dict(some_dict))
We get:
{
'default': {
'ENGINE': [
'1',
'2',
'3',
{
'some_key': {
'some_other_key': 'some_value',
},
},
],
'NAME': 'some_db_name',
'OPTIONS': {
'init_command': 'SET foreign_key_checks = 0;',
},
'HOST': 'localhost',
'USER': 'some_user_name',
'PASSWORD': 'some_password',
'PORT': '',
},
}
There are so many nice implementations here, it made me want to add my own :). I used it for debugging in CircuitPython and MicroPython where json.dumps does not allow using the indent parameter, and pprint is not available as well.
It is implemented with self so can be dropped in into a class, and for each data it is showing the data type, which I find very useful for debugging. Not dependent on any external module.
def pretty_print_dict(self, d, indent=0):
INDENT = 2
if isinstance(d, dict):
print(' ' * indent + '{')
for key, value in d.items():
print(f'{" " * (indent + INDENT)}{key}:')
self.pretty_print_dict(value, indent + 2 * INDENT)
print(' ' * indent + '}')
elif isinstance(d, list):
print(' ' * indent + '[')
for item in d:
self.pretty_print_dict(item, indent + INDENT)
print(' ' * indent + ']')
elif isinstance(d, str):
print(' ' * indent + '<s>' + d + '</s>')
elif isinstance(d, int):
print(' ' * indent + '<i>' + str(d) + '</i>')
elif isinstance(d, bool):
print(' ' * indent + '<b>' + str(d) + '</b>')
elif isinstance(d, float):
print(' ' * indent + '<f>' + str(d) + '</f>')
else:
print(' ' * indent + '<?>' + str(d) + '</?>')
Usage: self.pretty_print_dict(my_dict)

Categories