recursively traverse multidimensional dictionary and export to csv - python

I've have a complex multidimensional dictionary that I want to export some of the the key value pairs to a csv file as a running log file. I've tried the various help with exporting to cvs functions and hacked away at most of the code example in stackoverflow on traversing multidimensional dictionaries but have failed to arrive at a solution. This problem is also unique in that it has only some key values I want to export.
Here is the dictionary:
cpu_stats = {'time_stamp': {'hour': 22, 'month': 5, 'second': 43, 'year': 2014, 'day': 29, 'minute': 31}, 'cpus': [[{'metric_type': 'CPU_INDEX', 'value': 1}, {'metric_type': 'CPU_TEMPERATURE', 'value': 39}, {'metric_type': 'CPU_FAN_SPEED', 'value': 12000}]]}
I need to format the values in time_stamp into a yyyy-mm-dd hh:mm:ss and store it as the first cell of the row. I then need the values in 'cpus' for CPU_INDEX, CPU_TEMPERATURE, and CPU_FAN_SPEED in the same row as the time stamp.
The csv file should look like this:
time_stamp, cpu_index, cpu_temperature, cpu_fan_speed
2014-05-29, 1, 38, 12000
One example I've been hacking away on is:
def walk_dict(seq, level=0):
"""Recursively traverse a multidimensional dictionary and print all
keys and values.
"""
items = seq.items()
items.sort()
for v in items:
if isinstance(v[1], dict):
# Print the key before make a recursive call
print "%s%s" % (" " * level, v[0])
nextlevel = level + 1
walk_dict(v[1], nextlevel)
else:
print "%s%s %s" % (" " * level, v[0], v[1])
I get the following output
walk_dict(cpu_stats)
cpus [[{'metric_type': 'CPU_INDEX', 'value': 1}, {'metric_type': 'CPU_TEMPERATURE', 'value': 38}, {'metric_type': 'CPU_FAN_SPEED', 'value': 12000}]]
time_stamp
day 29
hour 22
minute 17
month 5
second 19
year 2014
I have also been hacking away at this function as well hoping I can store the date information into variables that can then be formatted into a single string. Unfortuatly it has recursive calls which loose the local variables on subsequent calls. Using global was futile.
def parseDictionary(obj, nested_level=0, output=sys.stdout):
spacing = ' '
if type(obj) == dict:
print >> output, '%s{' % ((nested_level) * spacing)
for k, v in obj.items():
if hasattr(v, '__iter__'):
# 1st level, prints time and cpus
print >> output, '%s:' % (k)
parseDictionary(v, nested_level + 1, output)
else:
# here is the work
if k == "hour":
hour = v
elif k == "month":
month = v
elif k == "second":
second = v
elif k == "year":
year = v
elif k == "day":
day = v
elif k == "minute":
minute = v
print >> output, '%s %s' % (k, v)
print >> output, '%s}' % (nested_level * spacing)
elif type(obj) == list:
print >> output, '%s[' % ((nested_level) * spacing)
for v in obj:
if hasattr(v, '__iter__'):
parseDictionary(v, nested_level + 1, output)
else:
print >> output, '%s%s' % ((nested_level + 1) * spacing, v)
print >> output, '%s]' % ((nested_level) * spacing)
else:
print >> output, '%s%s' % (nested_level * spacing, obj)
if __name__ == "__main__":
global year
global month
global day
global hour
global minute
global second
cpu_stats = {'time_stamp': {'hour': 22, 'month': 5, 'second': 43, 'year': 2014, 'day': 29, 'minute': 31}, 'cpus': [[{'metric_type': 'CPU_INDEX', 'value': 1}, {'metric_type': 'CPU_TEMPERATURE', 'value': 39}, {'metric_type': 'CPU_FAN_SPEED', 'value': 12000}]]}
parseDictionary(cpu_stats)
print '%s-%s-%s %s:%s:%s' % (year, month, day, hour, minute, second)
output:
{
time_stamp:
{
hour 22
month 5
second 27
year 2014
day 29
minute 57
cpus:
[
[
{
metric_type CPU_INDEX
value 1
{
metric_type CPU_TEMPERATURE
value 39
{
metric_type CPU_FAN_SPEED
value 12000
]
]
Traceback (most recent call last):
File "./cpu.py", line 135, in <module>
print '%s-%s-%s %s:%s:%s' % (year, month, day, hour, minute, second)
NameError: global name 'year' is not defined
Thanks, I appreciate any help in pointing me in the right direction as I'm currently at a loss.

I think you might be missing the point of dictionaries. Rather than iterate over the keys of a dictionary and checking if its the key you want, you should just look up the key you want. It might be easier to approach the problem like this:
t = cpu_stats['time_stamp']
date = '{}-{}-{}'.format(t['year'], t['month'], t['day'])
for cpu in cpu_stats['cpus']:
c = {d['metric_type']: d['value'] for d in cpu}
row = [date, c['cpu_index'], c['cpu_temperature'], c'[cpu_fan_speed']]
Life would be easier if you had had your cpus value as a list of dictionaries, rather than a list of lists of dictionaries, and stored your timestamps as datetime objects:
cpu_stats = {'time_stamp': datetime.datetime(2014, 5, 29, 22, 31, 43), 'cpus': [{'CPU_INDEX': 1, 'CPU_TEMPERATURE': 39, 'CPU_FAN_SPEED': 12000}]}
The whole point of a dictionary is lost if you bury it in a structure like {'key_name': 'my_key', 'key_value': 'my_value'}. This just adds an extra layer that you don't need, instead you only need: {'my_key': 'my_value'}

I agree with #desired login, however assuming you have no control of the incoming data and had to work with what you showed in your questions... You could just traverse it like so:
cpu_stats = {'time_stamp': {'hour': 22, 'month': 5, 'second': 43, 'year': 2014, 'day': 29, 'minute': 31},
'cpus': [ [{'metric_type': 'CPU_INDEX', 'value': 1}, {'metric_type': 'CPU_TEMPERATURE', 'value': 39}, {'metric_type': 'CPU_FAN_SPEED', 'value': 12000} ] ]
}
timestamp = ''
for stats in cpu_stats.keys():
if stats == 'time_stamp':
timestamp = '{year}-{month}-{day}'.format(**cpu_stats[stats])
if stats == 'cpus':
for cpu in cpu_stats[stats]:
cpu_index = ''
cpu_temperature = ''
cpu_fan_speed = ''
for metric in cpu:
if metric['metric_type'] == 'CPU_INDEX':
cpu_index = str(metric['value'])
elif metric['metric_type'] == 'CPU_TEMPERATURE':
cpu_temperature = str(metric['value'])
elif metric['metric_type'] == 'CPU_FAN_SPEED':
cpu_fan_speed = str(metric['value'])
print ','.join([timestamp, cpu_index, cpu_temperature, cpu_fan_speed])

Related

Convert dictionary timestamp key-value pairs into datetime

I have a dictionary which has a timestamp stored as key and value pairs:
timestamp_info = {
'day': 8, 'fold': 0, 'hour': 0,
'max': {'day': 31, 'fold': 0,'hour': 23},
'microsecond': 639000, 'minute': 17, 'month': 10, 'second': 35, 'year': 2021
}
I am trying to take timestamp from dictionary and convert to datetime.datetime timestamp format. I tried the following but it does not work as it asks for str (exact error is: "strptime() argument 1 must be str, not dict"):
result = datetime.datetime.strptime(timestamp_info, "%Y-%m-%d %H:%M:%S")
I want this result to be compared with datetime.datetime.now and take difference in seconds between current timestamp and result, that is the reason I need timestamp format.
Chirs Oram already shows how to build a string that you can then parse with strptime(). Alternatively, you can pass the values directly to datetime():
timestamp_info = {
'day':8, 'fold':0,'hour':0, 'max':{'day':31, 'fold':0,'hour':23},
'microsecond':639000,'minute':17,'month':10,'second':35, 'year':2021
}
year = timestamp_info['year']
month = timestamp_info['month']
day = timestamp_info['day']
hour = timestamp_info['hour']
minute = timestamp_info['minute']
second = timestamp_info['second']
result = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second)
Or you can do this more succinctly by removing the attributes not needed for datetime:
timestamp_info = {
'day':8, 'fold':0,'hour':0, 'max':{'day':31, 'fold':0,'hour':23},
'microsecond':639000,'minute':17,'month':10,'second':35, 'year':2021
}
del timestamp_info['fold']
del timestamp_info['max']
result = datetime.datetime(**timestamp_info)
You can extract each part of the timestamp in the specified format, and concatenate them into a string:
timestamp_info = {
'day':8, 'fold':0,'hour':0, 'max':{'day':31, 'fold':0,'hour':23},
'microsecond':639000,'minute':17,'month':10,'second':35, 'year':2021
}
year = str(timestamp_info['year'])
month = str(timestamp_info['month'])
day = str(timestamp_info['day'])
hour = str(timestamp_info['hour'])
minute = str(timestamp_info['minute'])
second = str(timestamp_info['second'])
timestamp = '-'.join([year, month, day]) + ' ' + ':'.join([hour, minute, second])
result = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
Which results in:
2021-10-08 00:17:35

Best simple and effective solution for data organization

I'm new with Python programming so I'm doing a bunch of practice exercise in order to improve my skills.
Therefore, I would like to show you guys my approach on this example and if you could let me know what you think I would be grateful!
Exercise:
Given a list of costumers IDs, I have to segment them by the following logic:
If ID is multiple of 7 and multiple of 3 then segment 'A'
If ID is multiple of 3 then segment 'B'
If ID is multiple of 7, then segment 'C'
Else, segment 'D'
What I've done:
from collections import Counter
from datetime import datetime
import os.path
import json
date = datetime.today().strftime('%Y-%m-%d')
customer_indices = [list of IDs] ex: [123981,12398,123157,12371...]
def segment(customer):
if customer % 7 == 0 & customer % 3 == 0:
return 'A'
elif customer % 7 == 0:
return 'B'
elif customer % 3 == 0:
return 'C'
else:
return 'D'
def split_customers(customers):
a = []
b = []
c = []
d = []
for customer in customers:
if customer % 7 == 0 & customer % 3 == 0:
a.append(customer)
elif customer % 7 == 0:
b.append(customer)
elif customer % 3 == 0:
c.append(customer)
else:
d.append(customer)
return a,b,c,d
segmentation = [segment(customer) for customer in customer_indices]
print('Segmentation list: ')
print(segmentation)
print('\n')
segmentation_counter = Counter(segmentation)
print('Count of clients per segment: ')
print(f"A: {segmentation_counter['A']}")
print(f"B: {segmentation_counter['B']}")
print(f"C: {segmentation_counter['C']}")
print(f"D: {segmentation_counter['D']}")
a, b, c, d = split_customers(customer_indices)
main_dict = {'Date': date,
'Segmentation': {
'A Clients': {
'Count': segmentation_counter['A'],
'Customers': a},
'B Clients': {
'Count': segmentation_counter['B'],
'Customers': b},
'C Clients': {
'Count': segmentation_counter['C'],
'Customers': c},
'D Clients': {
'Count': segmentation_counter['D'],
'Customers': d}}}
main_list = [main_dict]
if not os.path.exists('Data/customer_segmentation.json'):
os.makedirs('Data')
if os.path.isfile('Data/customer_segmentation.json'):
with open('Data/customer_segmentation.json') as file:
data = json.load(file)
file.close()
data.append(main_dict)
with open('Data/customer_segmentation.json', 'w') as file:
json.dump(data, file, indent=2)
file.close()
else:
file = open('Data/customer_segmentation.json', 'w')
json.dump(main_list, file, indent=2)
file.close()
The original code has a with open txt function at first that extracts the client's ID list from a txt within the same directory of this .py
The main idea of this solution it would be to execute this every day so the json file will update with a new list for each day that it's run, so if I want to do an analysis of the segmentation growth, it would be pretty easy to do so.
What do you think?

Appending new items to a Python list with logic operators (Time values)

I'm starting out on Python on a job (I'm used to R) where I have to get daily data from an API that returns the datetime and value (which is a certain number of listeners on a podcast) and then send that data to a bigquery database.
After I split up the date and time, I need to add a new column that indicates which program was playing in that moment. In other words:
if time is >= than 11:00 and <= 11:59 then add a 'program name' value to the row into the column 'program'.
I've ran into several problems, namely the fact that time has been split as strings (could be due to the fact that we use google data studio, which has extremely rigid datetime implementation).
How would you go about it?
if response.status_code == 200:
data = response.text
result = json.loads(data)
test = result
#Append Items
for k in test:
l = []
l.append(datetime.datetime.strptime(k["time"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%d"))
l.append(datetime.datetime.strptime(k["time"], "%Y-%m-%dT%H:%M:%S.%fZ").astimezone(pytz.timezone("America/Toronto")).strftime("%H:%M"))
l.append(k["value"])
You need to have a 'DB' of the programs timetable. See below.
Your loop will call the function below with the time value and you will have the program name.
import datetime
from collections import namedtuple
Program = namedtuple('Program', 'name start end')
PROGRAMS_DB = [Program('prog1', datetime.time(3, 0, 0), datetime.time(3, 59, 0)),
Program('prog2', datetime.time(18, 0, 0), datetime.time(18, 59, 0)),
Program('prog3', datetime.time(4, 0, 0), datetime.time(4, 59, 0))]
def get_program_name(time_val):
for program in PROGRAMS_DB:
if program.start <= time_val <= program.end:
return program.name
data_from_the_web = [{"time": "2019-02-19T18:10:00.000Z", "value": 413, "details": None},
{"time": "2019-02-19T15:12:00.000Z", "value": 213, "details": None}]
for entry in data_from_the_web:
t = datetime.datetime.strptime(entry["time"], "%Y-%m-%dT%H:%M:%S.%fZ").time()
entry['prog'] = get_program_name(t)
for entry in data_from_the_web:
print(entry)
Output
{'prog': 'prog2', 'details': None, 'value': 413, 'time': '2019-02-19T18:10:00.000Z'}
{'prog': None, 'details': None, 'value': 213, 'time': '2019-02-19T15:12:00.000Z'}

Python - Better way for looping multidimensional dictionary

how I can change my code to use just one loop. I try to use .iteritems(), iterkeys() etc...
for user in data:
for item in data[user]:
start = seconds_since_midnight(
data[user][item]['start']
)
end = seconds_since_midnight(
data[user][item]['end']
)
overtime = end - start
if overtime > eight_hours:
if user not in result:
if str(user) not in names.keys():
continue
result[user] = {
'name': names[str(user)]['name'],
'overtime': []
}
result[user]['overtime'].append(overtime - eight_hours)
try:
result[user]['overtime'] = sum(result[user]['overtime'])
except KeyError:
pass
return sorted(
result.items(),
key=lambda result: result[1]['overtime'],
reverse=True
)
It creates structure like this:
data = {
'user_id': {
datetime.date(2013, 10, 1): {
'start': datetime.time(9, 0, 0),
'end': datetime.time(17, 30, 0),
},
datetime.date(2013, 10, 2): {
'start': datetime.time(8, 30, 0),
'end': datetime.time(16, 45, 0),
},
}
}
First thing to remark is we can assume each user appears only once in data because it is a dictionary.
Solution 1
Now introduce these 2 functions:
def overtime(item):
start = seconds_since_midnight(item['start'])
end = seconds_since_midnight(item['end'])
return end - start
def comp_hours(name, items):
return {'name': name,
'overtime': sum(overtime(item) - 8 for item in items if overtime(item) > 8)}
Now do this dictionary comprehension:
result = {u: comp_hours(names[str(u)]['name'], i) for u, i in data.items() if str(u) in names}
result_filtered = {k: v for k, v in result.items() if v > 0}
You have to do the sorting yourself.
Solution 2
We modify our first solution.
Introduce functions
def total_overtime(items):
return sum(overtime(item) - 8 for item in items if overtime(item) > 8)
def comp_hours_new(user, items):
return {'name': names[str(user)]['name'], 'overtime': total_overtime(items)}
def condition(user, items):
return str(user) in names and total_overtime(items) > 0
Then do this
{u: comp_hours_new(u, i) for u, i in data.items() if condition(u, i)}
With functional programming you would not have to compute total_overtime and overtime 2X.
After all this one was a classic case of this pattern
{k: f(k, v) for k, v in your_dict.items() if condition(k, v)}
Solution 3
To overcome waisting CPU time we modify our solution a bit and will work with generators.
Introduce functions
def condition_new(user, total_overtime_):
return str(user) in names and total_overtime_ > 0
def comp_hours_new_new(user, total_overtime_):
return {'name': names[str(user)]['name'], 'overtime': total_overtime_}
Now do
I = ((u, total_overtime(i)) for u, i in data.items())
{u: comp_hours_new_new(u, ttl_over) for u, ttl_over in I if condition_new(u, ttl_over)}

How to pretty print nested dictionaries?

How can I pretty print a dictionary with depth of ~4 in Python? I tried pretty printing with pprint(), but it did not work:
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(mydict)
I simply want an indentation ("\t") for each nesting, so that I get something like this:
key1
value1
value2
key2
value1
value2
etc.
How can I do this?
My first thought was that the JSON serializer is probably pretty good at nested dictionaries, so I'd cheat and use that:
>>> import json
>>> print(json.dumps({'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}},
... sort_keys=True, indent=4))
{
"a": 2,
"b": {
"x": 3,
"y": {
"t1": 4,
"t2": 5
}
}
}
I'm not sure how exactly you want the formatting to look like, but you could start with a function like this:
def pretty(d, indent=0):
for key, value in d.items():
print('\t' * indent + str(key))
if isinstance(value, dict):
pretty(value, indent+1)
else:
print('\t' * (indent+1) + str(value))
You could try YAML via PyYAML. Its output can be fine-tuned. I'd suggest starting with the following:
print(yaml.dump(data, allow_unicode=True, default_flow_style=False))
The result is very readable; it can be also parsed back to Python if needed.
Edit:
Example:
>>> import yaml
>>> data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> print(yaml.dump(data, default_flow_style=False))
a: 2
b:
x: 3
y:
t1: 4
t2: 5
By this way you can print it in pretty way for example your dictionary name is yasin
import json
print (json.dumps(yasin, indent=2))
or, safer:
print (json.dumps(yasin, indent=2, default=str))
One of the most pythonic ways for that is to use the already build pprint module.
The argument that you need for define the print depth is as you may expect depth
import pprint
pp = pprint.PrettyPrinter(depth=4)
pp.pprint(mydict)
That's it !
As of what have been done, I don't see any pretty printer that at least mimics the output of the python interpreter with very simple formatting so here's mine :
class Formatter(object):
def __init__(self):
self.types = {}
self.htchar = '\t'
self.lfchar = '\n'
self.indent = 0
self.set_formater(object, self.__class__.format_object)
self.set_formater(dict, self.__class__.format_dict)
self.set_formater(list, self.__class__.format_list)
self.set_formater(tuple, self.__class__.format_tuple)
def set_formater(self, obj, callback):
self.types[obj] = callback
def __call__(self, value, **args):
for key in args:
setattr(self, key, args[key])
formater = self.types[type(value) if type(value) in self.types else object]
return formater(self, value, self.indent)
def format_object(self, value, indent):
return repr(value)
def format_dict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + repr(key) + ': ' +
(self.types[type(value[key]) if type(value[key]) in self.types else object])(self, value[key], indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_list(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + self.lfchar + self.htchar * indent)
def format_tuple(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) + (self.types[type(item) if type(item) in self.types else object])(self, item, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + self.lfchar + self.htchar * indent)
To initialize it :
pretty = Formatter()
It can support the addition of formatters for defined types, you simply need to make a function for that like this one and bind it to the type you want with set_formater :
from collections import OrderedDict
def format_ordereddict(self, value, indent):
items = [
self.lfchar + self.htchar * (indent + 1) +
"(" + repr(key) + ', ' + (self.types[
type(value[key]) if type(value[key]) in self.types else object
])(self, value[key], indent + 1) + ")"
for key in value
]
return 'OrderedDict([%s])' % (','.join(items) +
self.lfchar + self.htchar * indent)
pretty.set_formater(OrderedDict, format_ordereddict)
For historical reasons, I keep the previous pretty printer which was a function instead of a class, but they both can be used the same way, the class version simply permit much more :
def pretty(value, htchar='\t', lfchar='\n', indent=0):
nlch = lfchar + htchar * (indent + 1)
if type(value) is dict:
items = [
nlch + repr(key) + ': ' + pretty(value[key], htchar, lfchar, indent + 1)
for key in value
]
return '{%s}' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is list:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '[%s]' % (','.join(items) + lfchar + htchar * indent)
elif type(value) is tuple:
items = [
nlch + pretty(item, htchar, lfchar, indent + 1)
for item in value
]
return '(%s)' % (','.join(items) + lfchar + htchar * indent)
else:
return repr(value)
To use it :
>>> a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':pretty,'unicode':u'\xa7',("tuple","key"):"valid"}
>>> a
{'function': <function pretty at 0x7fdf555809b0>, 'tuple': ('a', 'b', 1, 2), 'list': ['a', 'b', 1, 2], 'dict': {'a': 1, 2: 'b'}, 'unicode': u'\xa7', ('tuple', 'key'): 'valid'}
>>> print(pretty(a))
{
'function': <function pretty at 0x7fdf555809b0>,
'tuple': (
'a',
'b',
1,
2
),
'list': [
'a',
'b',
1,
2
],
'dict': {
'a': 1,
2: 'b'
},
'unicode': u'\xa7',
('tuple', 'key'): 'valid'
}
Compared to other versions :
This solution looks directly for object type, so you can pretty print almost everything, not only list or dict.
Doesn't have any dependancy.
Everything is put inside a string, so you can do whatever you want with it.
The class and the function has been tested and works with Python 2.7 and 3.4.
You can have all type of objects inside, this is their representations and not theirs contents that being put in the result (so string have quotes, Unicode string are fully represented ...).
With the class version, you can add formatting for every object type you want or change them for already defined ones.
key can be of any valid type.
Indent and Newline character can be changed for everything we'd like.
Dict, List and Tuples are pretty printed.
I had to pass the default parameter as well, like this:
print(json.dumps(my_dictionary, indent=4, default=str))
and if you want the keys sorted, then you can do:
print(json.dumps(my_dictionary, sort_keys=True, indent=4, default=str))
in order to fix this type error:
TypeError: Object of type 'datetime' is not JSON serializable
which caused by datetimes being some values in the dictionary.
The modern solution here is to use rich. Install with
pip install rich
and use as
from rich import print
d = {
"Alabama": "Montgomery",
"Alaska": "Juneau",
"Arizona": "Phoenix",
"Arkansas": "Little Rock",
"California": "Sacramento",
"Colorado": "Denver",
"Connecticut": "Hartford",
"Delaware": "Dover",
"Florida": "Tallahassee",
"Georgia": "Atlanta",
"Hawaii": "Honolulu",
"Idaho": "Boise",
}
print(d)
The output is nicely indented:
Another option with yapf:
from pprint import pformat
from yapf.yapflib.yapf_api import FormatCode
dict_example = {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5], '4': {'1': '1', '2': '2', '3': [1, 2, 3, 4, 5]}}
dict_string = pformat(dict_example)
formatted_code, _ = FormatCode(dict_string)
print(formatted_code)
Output:
{
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5]
}
}
You can use print-dict
from print_dict import pd
dict1 = {
'key': 'value'
}
pd(dict1)
Output:
{
'key': 'value'
}
Output of this Python code:
{
'one': 'value-one',
'two': 'value-two',
'three': 'value-three',
'four': {
'1': '1',
'2': '2',
'3': [1, 2, 3, 4, 5],
'4': {
'method': <function custom_method at 0x7ff6ecd03e18>,
'tuple': (1, 2),
'unicode': '✓',
'ten': 'value-ten',
'eleven': 'value-eleven',
'3': [1, 2, 3, 4]
}
},
'object1': <__main__.Object1 object at 0x7ff6ecc588d0>,
'object2': <Object2 info>,
'class': <class '__main__.Object1'>
}
Install:
$ pip install print-dict
Disclosure: I'm the author of print-dict
As others have posted, you can use recursion/dfs to print the nested dictionary data and call recursively if it is a dictionary; otherwise print the data.
def print_json(data):
if type(data) == dict:
for k, v in data.items():
print k
print_json(v)
else:
print data
pout can pretty print anything you throw at it, for example (borrowing data from another answer):
data = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
pout.vs(data)
would result in output printed to the screen like:
{
'a': 2,
'b':
{
'y':
{
't2': 5,
't1': 4
},
'x': 3
}
}
or you can return the formatted string output of your object:
v = pout.s(data)
Its primary use case is for debugging so it doesn't choke on object instances or anything and it handles unicode output as you would expect, works in python 2.7 and 3.
disclosure: I'm the author and maintainer of pout.
prettyformatter
Disclaimer: I'm the author of the package.
For a comparison with other formatters, see Other Formatters.
Formatting
Unlike pprint.pprint, prettyformatter spreads vertically more and attempts to align items more.
Unlike json.dumps, prettyformatter is usually more compact and attempts to align dictionary values wherever reasonable.
from prettyformatter import pprint
batters = [
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
]
toppings = [
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
]
data = {"id": "0001", "type": "donut", "name": "Cake", "ppu": 0.55, "batters": batters, "topping": toppings}
pprint(data)
Output:
{
"id" : "0001",
"type" : "donut",
"name" : "Cake",
"ppu" : 0.55,
"batters":
[
{"id": "1001", "type": "Regular"},
{"id": "1002", "type": "Chocolate"},
{"id": "1003", "type": "Blueberry"},
{"id": "1004", "type": "Devil's Food"},
],
"topping":
[
{"id": "5001", "type": None},
{"id": "5002", "type": "Glazed"},
{"id": "5005", "type": "Sugar"},
{"id": "5007", "type": "Powdered Sugar"},
{"id": "5006", "type": "Chocolate with Sprinkles"},
{"id": "5003", "type": "Chocolate"},
{"id": "5004", "type": "Maple"},
],
}
Features
See here for the full documentation.
JSON
Unlike pprint.pprint, prettyformatter supports JSON conversion via the json=True argument. This includes changing None to null, True to true, False to false, and correct use of quotes.
Unlike json.dumps, prettyformatter supports JSON coercion with more data types. This includes changing any dataclass or mapping into a dict and any iterable into a list.
from dataclasses import dataclass
from prettyformatter import PrettyDataclass, pprint
#dataclass(unsafe_hash=True)
class Point(PrettyDataclass):
x: int
y: int
pprint((Point(1, 2), Point(3, 4)), json=True)
Output:
[{"x": 1, "y": 2}, {"x": 3, "y": 4}]
Customization
Unlike pprint.pprint or json.dumps, prettyformatter supports easy customization with additional types.
Implementing the __pargs__ and/or __pkwargs__ methods for a prettyformatter.PrettyClass subclass allows one to easily customize classes in the form of "cls_name(*args, **kwargs)".
from prettyformatter import PrettyClass
class Dog(PrettyClass):
def __init__(self, name, **kwargs):
self.name = name
def __pkwargs__(self):
return {"name": self.name}
print(Dog("Fido"))
"""
Dog(name="Fido")
"""
print(Dog("Fido"), json=True)
"""
{"name": "Fido"}
"""
Implementing the __pformat__ method allows even more specific implementations of the pformat function.
Implementing the #prettyformatter.register function also allows customizing classes that already exist in the same way implementing __pformat__ would.
import numpy as np
from prettyformatter import pprint, register
#register(np.ndarray)
def pformat_ndarray(obj, specifier, depth, indent, shorten, json):
if json:
return pformat(obj.tolist(), specifier, depth, indent, shorten, json)
with np.printoptions(formatter=dict(all=lambda x: format(x, specifier))):
return repr(obj).replace("\n", "\n" + " " * depth)
pprint(dict.fromkeys("ABC", np.arange(9).reshape(3, 3)))
Output:
{
"A":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"B":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
"C":
array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
}
I took sth's answer and modified it slightly to fit my needs of a nested dictionaries and lists:
def pretty(d, indent=0):
if isinstance(d, dict):
for key, value in d.iteritems():
print '\t' * indent + str(key)
if isinstance(value, dict) or isinstance(value, list):
pretty(value, indent+1)
else:
print '\t' * (indent+1) + str(value)
elif isinstance(d, list):
for item in d:
if isinstance(item, dict) or isinstance(item, list):
pretty(item, indent+1)
else:
print '\t' * (indent+1) + str(item)
else:
pass
Which then gives me output like:
>>>
xs:schema
#xmlns:xs
http://www.w3.org/2001/XMLSchema
xs:redefine
#schemaLocation
base.xsd
xs:complexType
#name
Extension
xs:complexContent
xs:restriction
#base
Extension
xs:sequence
xs:element
#name
Policy
#minOccurs
1
xs:complexType
xs:sequence
xs:element
...
I used what you guys taught me plus the power of decorators to overload the classic print function. Just change the indent to your needs. I added it as a gist in github in case you want to star(save) it.
def print_decorator(func):
"""
Overload Print function to pretty print Dictionaries
"""
def wrapped_func(*args,**kwargs):
if isinstance(*args, dict):
return func(json.dumps(*args, sort_keys=True, indent=2, default=str))
else:
return func(*args,**kwargs)
return wrapped_func
print = print_decorator(print)
Now just use print as usual.
I wrote this simple code to print the general structure of a json object in Python.
def getstructure(data, tab = 0):
if type(data) is dict:
print ' '*tab + '{'
for key in data:
print ' '*tab + ' ' + key + ':'
getstructure(data[key], tab+4)
print ' '*tab + '}'
elif type(data) is list and len(data) > 0:
print ' '*tab + '['
getstructure(data[0], tab+4)
print ' '*tab + ' ...'
print ' '*tab + ']'
the result for the following data
a = {'list':['a','b',1,2],'dict':{'a':1,2:'b'},'tuple':('a','b',1,2),'function':'p','unicode':u'\xa7',("tuple","key"):"valid"}
getstructure(a)
is very compact and looks like this:
{
function:
tuple:
list:
[
...
]
dict:
{
a:
2:
}
unicode:
('tuple', 'key'):
}
I'm just returning to this question after taking sth's answer and making a small but very useful modification. This function prints all keys in the JSON tree as well as the size of leaf nodes in that tree.
def print_JSON_tree(d, indent=0):
for key, value in d.iteritems():
print ' ' * indent + unicode(key),
if isinstance(value, dict):
print; print_JSON_tree(value, indent+1)
else:
print ":", str(type(d[key])).split("'")[1], "-", str(len(unicode(d[key])))
It's really nice when you have large JSON objects and want to figure out where the meat is. Example:
>>> print_JSON_tree(JSON_object)
key1
value1 : int - 5
value2 : str - 16
key2
value1 : str - 34
value2 : list - 5623456
This would tell you that most of the data you care about is probably inside JSON_object['key1']['key2']['value2'] because the length of that value formatted as a string is very large.
I tried the following and got my desired results
Method 1:
Step 1: Install print_dict by typing the following command in cmd
pip install print_dict
Step 2: Import print_dict as
from print_dict import pd
Step 3: Printing using pd
pd(your_dictionary_name)
Example Output:
{
'Name': 'Arham Rumi',
'Age': 21,
'Movies': ['adas', 'adfas', 'fgfg', 'gfgf', 'vbxbv'],
'Songs': ['sdfsd', 'dfdgfddf', 'dsdfd', 'sddfsd', 'sdfdsdf']
}
Method 2:
We can also use for loop to print the dictionary using items method
for key, Value in your_dictionary_name.items():
print(f"{key} : {Value}")
The easiest is to install IPython and use something like below
from IPython.lib.pretty import pretty
class MyClass:
__repr__(self):
return pretty(data) # replace data with what makes sense
In your case
print(pretty(mydict))
Sth, i sink that's pretty ;)
def pretty(d, indent=0):
for key, value in d.iteritems():
if isinstance(value, dict):
print '\t' * indent + (("%30s: {\n") % str(key).upper())
pretty(value, indent+1)
print '\t' * indent + ' ' * 32 + ('} # end of %s #\n' % str(key).upper())
elif isinstance(value, list):
for val in value:
print '\t' * indent + (("%30s: [\n") % str(key).upper())
pretty(val, indent+1)
print '\t' * indent + ' ' * 32 + ('] # end of %s #\n' % str(key).upper())
else:
print '\t' * indent + (("%30s: %s") % (str(key).upper(),str(value)))
This class prints out a complex nested dictionary with sub dictionaries and sub lists.
##
## Recursive class to parse and print complex nested dictionary
##
class NestedDictionary(object):
def __init__(self,value):
self.value=value
def print(self,depth):
spacer="--------------------"
if type(self.value)==type(dict()):
for kk, vv in self.value.items():
if (type(vv)==type(dict())):
print(spacer[:depth],kk)
vvv=(NestedDictionary(vv))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
if (type(vv)==type(list())):
for i in vv:
vvv=(NestedDictionary(i))
depth=depth+3
vvv.print(depth)
depth=depth-3
else:
print(spacer[:depth],kk,vv)
##
## Instatiate and execute - this prints complex nested dictionaries
## with sub dictionaries and sub lists
## 'something' is a complex nested dictionary
MyNest=NestedDictionary(weather_com_result)
MyNest.print(0)
Late, but an answer that does not require any additional libraries. Similar to STH's answer, but a little more robust in formatting and returns a full string that can then be printed:
def pretty_print_dict(
input_dictionary,
indent=1,
depth=0
):
# Bool flag to add comma's after first item in dict.
needs_comma = False
# String for any dict will start with a '{'
return_string = '\t' * depth + '{\n'
# Iterate over keys and values, building the full string out.
for key, value in input_dictionary.items():
# Start with key. If key follows a previous item, add comma.
if needs_comma:
return_string = return_string + ',\n' + '\t' * (depth + 1) + str(key) + ': '
else:
return_string = return_string + '\t' * (depth + 1) + str(key) + ': '
# If the value is a dict, recursively call function.
if isinstance(value, dict):
return_string = return_string + '\n' + pretty_print_dict(value, depth=depth+2)
else:
return_string = return_string + '\t' * indent + str(value)
# After first line, flip bool to True to make sure commas make it.
needs_comma = True
# Complete the dict with a '}'
return_string = return_string + '\n' + '\t' * depth + '}'
# Return dict string.
return return_string
Let's see how it handles a dict like test_dict={1: 2, 3: {4: {5: 6}, 7: 8}, 9: 10}.
The string looks like: '{\n\t1: \t2,\n\t3: \n\t\t{\n\t\t\t4: \n\t\t\t\t{\n\t\t\t\t\t5: \t6\n\t\t\t\t},\n\t\t\t7: \t8\n\t\t},\n\t9: \t10\n}'.
Printing that string yields:
{
1: 2,
3:
{
4:
{
5: 6
},
7: 8
},
9: 10
}
I'm a relative python newbie myself but I've been working with nested dictionaries for the past couple weeks and this is what I had came up with.
You should try using a stack. Make the keys from the root dictionary into a list of a list:
stack = [ root.keys() ] # Result: [ [root keys] ]
Going in reverse order from last to first, lookup each key in the dictionary to see if its value is (also) a dictionary. If not, print the key then delete it. However if the value for the key is a dictionary, print the key then append the keys for that value to the end of the stack, and start processing that list in the same way, repeating recursively for each new list of keys.
If the value for the second key in each list were a dictionary you would have something like this after several rounds:
[['key 1','key 2'],['key 2.1','key 2.2'],['key 2.2.1','key 2.2.2'],[`etc.`]]
The upside to this approach is that the indent is just \t times the length of the stack:
indent = "\t" * len(stack)
The downside is that in order to check each key you need to hash through to the relevant sub-dictionary, though this can be handled easily with a list comprehension and a simple for loop:
path = [li[-1] for li in stack]
# The last key of every list of keys in the stack
sub = root
for p in path:
sub = sub[p]
if type(sub) == dict:
stack.append(sub.keys()) # And so on
Be aware that this approach will require you to cleanup trailing empty lists, and to delete the last key in any list followed by an empty list (which of course may create another empty list, and so on).
There are other ways to implement this approach but hopefully this gives you a basic idea of how to do it.
EDIT: If you don't want to go through all that, the pprint module prints nested dictionaries in a nice format.
Here's a function I wrote based on what sth's comment. It's works the same as json.dumps with indent, but I'm using tabs instead of space for indents. In Python 3.2+ you can specify indent to be a '\t' directly, but not in 2.7.
def pretty_dict(d):
def pretty(d, indent):
for i, (key, value) in enumerate(d.iteritems()):
if isinstance(value, dict):
print '{0}"{1}": {{'.format( '\t' * indent, str(key))
pretty(value, indent+1)
if i == len(d)-1:
print '{0}}}'.format( '\t' * indent)
else:
print '{0}}},'.format( '\t' * indent)
else:
if i == len(d)-1:
print '{0}"{1}": "{2}"'.format( '\t' * indent, str(key), value)
else:
print '{0}"{1}": "{2}",'.format( '\t' * indent, str(key), value)
print '{'
pretty(d,indent=1)
print '}'
Ex:
>>> dict_var = {'a':2, 'b':{'x':3, 'y':{'t1': 4, 't2':5}}}
>>> pretty_dict(dict_var)
{
"a": "2",
"b": {
"y": {
"t2": "5",
"t1": "4"
},
"x": "3"
}
}
Here's something that will print any sort of nested dictionary, while keeping track of the "parent" dictionaries along the way.
dicList = list()
def prettierPrint(dic, dicList):
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
print str(key) + ": " + str(value)
print str(key) + ' was found in the following path:',
print dicList
print '\n'
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
prettierPrint(dicExample, dicList)
This is a good starting point for printing according to different formats, like the one specified in OP. All you really need to do is operations around the Print blocks. Note that it looks to see if the value is 'OrderedDict()'. Depending on whether you're using something from Container datatypes Collections, you should make these sort of fail-safes so the elif block doesn't see it as an additional dictionary due to its name. As of now, an example dictionary like
example_dict = {'key1': 'value1',
'key2': 'value2',
'key3': {'key3a': 'value3a'},
'key4': {'key4a': {'key4aa': 'value4aa',
'key4ab': 'value4ab',
'key4ac': 'value4ac'},
'key4b': 'value4b'}
will print
key3a: value3a
key3a was found in the following path: ['key3']
key2: value2
key2 was found in the following path: []
key1: value1
key1 was found in the following path: []
key4ab: value4ab
key4ab was found in the following path: ['key4', 'key4a']
key4ac: value4ac
key4ac was found in the following path: ['key4', 'key4a']
key4aa: value4aa
key4aa was found in the following path: ['key4', 'key4a']
key4b: value4b
key4b was found in the following path: ['key4']
~altering code to fit the question's format~
lastDict = list()
dicList = list()
def prettierPrint(dic, dicList):
global lastDict
count = 0
for key, value in dic.iteritems():
count+=1
if str(value) == 'OrderedDict()':
value = None
if not isinstance(value, dict):
if lastDict == dicList:
sameParents = True
else:
sameParents = False
if dicList and sameParents is not True:
spacing = ' ' * len(str(dicList))
print dicList
print spacing,
print str(value)
if dicList and sameParents is True:
print spacing,
print str(value)
lastDict = list(dicList)
elif isinstance(value, dict):
dicList.append(key)
prettierPrint(value, dicList)
if dicList:
if count == len(dic):
dicList.pop()
count = 0
Using the same example code, it will print the following:
['key3']
value3a
['key4', 'key4a']
value4ab
value4ac
value4aa
['key4']
value4b
This isn't exactly what is requested in OP. The difference is that a parent^n is still printed, instead of being absent and replaced with white-space. To get to OP's format, you'll need to do something like the following: iteratively compare dicList with the lastDict. You can do this by making a new dictionary and copying dicList's content to it, checking if i in the copied dictionary is the same as i in lastDict, and -- if it is -- writing whitespace to that i position using the string multiplier function.
From this link:
def prnDict(aDict, br='\n', html=0,
keyAlign='l', sortKey=0,
keyPrefix='', keySuffix='',
valuePrefix='', valueSuffix='',
leftMargin=0, indent=1 ):
'''
return a string representive of aDict in the following format:
{
key1: value1,
key2: value2,
...
}
Spaces will be added to the keys to make them have same width.
sortKey: set to 1 if want keys sorted;
keyAlign: either 'l' or 'r', for left, right align, respectively.
keyPrefix, keySuffix, valuePrefix, valueSuffix: The prefix and
suffix to wrap the keys or values. Good for formatting them
for html document(for example, keyPrefix='<b>', keySuffix='</b>').
Note: The keys will be padded with spaces to have them
equally-wide. The pre- and suffix will be added OUTSIDE
the entire width.
html: if set to 1, all spaces will be replaced with ' ', and
the entire output will be wrapped with '<code>' and '</code>'.
br: determine the carriage return. If html, it is suggested to set
br to '<br>'. If you want the html source code eazy to read,
set br to '<br>\n'
version: 04b52
author : Runsun Pan
require: odict() # an ordered dict, if you want the keys sorted.
Dave Benjamin
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/161403
'''
if aDict:
#------------------------------ sort key
if sortKey:
dic = aDict.copy()
keys = dic.keys()
keys.sort()
aDict = odict()
for k in keys:
aDict[k] = dic[k]
#------------------- wrap keys with ' ' (quotes) if str
tmp = ['{']
ks = [type(x)==str and "'%s'"%x or x for x in aDict.keys()]
#------------------- wrap values with ' ' (quotes) if str
vs = [type(x)==str and "'%s'"%x or x for x in aDict.values()]
maxKeyLen = max([len(str(x)) for x in ks])
for i in range(len(ks)):
#-------------------------- Adjust key width
k = {1 : str(ks[i]).ljust(maxKeyLen),
keyAlign=='r': str(ks[i]).rjust(maxKeyLen) }[1]
v = vs[i]
tmp.append(' '* indent+ '%s%s%s:%s%s%s,' %(
keyPrefix, k, keySuffix,
valuePrefix,v,valueSuffix))
tmp[-1] = tmp[-1][:-1] # remove the ',' in the last item
tmp.append('}')
if leftMargin:
tmp = [ ' '*leftMargin + x for x in tmp ]
if html:
return '<code>%s</code>' %br.join(tmp).replace(' ',' ')
else:
return br.join(tmp)
else:
return '{}'
'''
Example:
>>> a={'C': 2, 'B': 1, 'E': 4, (3, 5): 0}
>>> print prnDict(a)
{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, sortKey=1)
{
'B' :1,
'C' :2,
'E' :4,
(3, 5):0
}
>>> print prnDict(a, keyPrefix="<b>", keySuffix="</b>")
{
<b>'C' </b>:2,
<b>'B' </b>:1,
<b>'E' </b>:4,
<b>(3, 5)</b>:0
}
>>> print prnDict(a, html=1)
<code>{
'C' :2,
'B' :1,
'E' :4,
(3, 5):0
}</code>
>>> b={'car': [6, 6, 12], 'about': [15, 9, 6], 'bookKeeper': [9, 9, 15]}
>>> print prnDict(b, sortKey=1)
{
'about' :[15, 9, 6],
'bookKeeper':[9, 9, 15],
'car' :[6, 6, 12]
}
>>> print prnDict(b, keyAlign="r")
{
'car':[6, 6, 12],
'about':[15, 9, 6],
'bookKeeper':[9, 9, 15]
}
'''
Use this function:
def pretty_dict(d, n=1):
for k in d:
print(" "*n + k)
try:
pretty_dict(d[k], n=n+4)
except TypeError:
continue
Call it like this:
pretty_dict(mydict)
This is what I came up with while working on a class that needed to write a dictionary in a .txt file:
#staticmethod
def _pretty_write_dict(dictionary):
def _nested(obj, level=1):
indentation_values = "\t" * level
indentation_braces = "\t" * (level - 1)
if isinstance(obj, dict):
return "{\n%(body)s%(indent_braces)s}" % {
"body": "".join("%(indent_values)s\'%(key)s\': %(value)s,\n" % {
"key": str(key),
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for key, value in obj.items()),
"indent_braces": indentation_braces
}
if isinstance(obj, list):
return "[\n%(body)s\n%(indent_braces)s]" % {
"body": "".join("%(indent_values)s%(value)s,\n" % {
"value": _nested(value, level + 1),
"indent_values": indentation_values
} for value in obj),
"indent_braces": indentation_braces
}
else:
return "\'%(value)s\'" % {"value": str(obj)}
dict_text = _nested(dictionary)
return dict_text
Now, if we have a dictionary like this:
some_dict = {'default': {'ENGINE': [1, 2, 3, {'some_key': {'some_other_key': 'some_value'}}], 'NAME': 'some_db_name', 'PORT': '', 'HOST': 'localhost', 'USER': 'some_user_name', 'PASSWORD': 'some_password', 'OPTIONS': {'init_command': 'SET foreign_key_checks = 0;'}}}
And we do:
print(_pretty_write_dict(some_dict))
We get:
{
'default': {
'ENGINE': [
'1',
'2',
'3',
{
'some_key': {
'some_other_key': 'some_value',
},
},
],
'NAME': 'some_db_name',
'OPTIONS': {
'init_command': 'SET foreign_key_checks = 0;',
},
'HOST': 'localhost',
'USER': 'some_user_name',
'PASSWORD': 'some_password',
'PORT': '',
},
}
There are so many nice implementations here, it made me want to add my own :). I used it for debugging in CircuitPython and MicroPython where json.dumps does not allow using the indent parameter, and pprint is not available as well.
It is implemented with self so can be dropped in into a class, and for each data it is showing the data type, which I find very useful for debugging. Not dependent on any external module.
def pretty_print_dict(self, d, indent=0):
INDENT = 2
if isinstance(d, dict):
print(' ' * indent + '{')
for key, value in d.items():
print(f'{" " * (indent + INDENT)}{key}:')
self.pretty_print_dict(value, indent + 2 * INDENT)
print(' ' * indent + '}')
elif isinstance(d, list):
print(' ' * indent + '[')
for item in d:
self.pretty_print_dict(item, indent + INDENT)
print(' ' * indent + ']')
elif isinstance(d, str):
print(' ' * indent + '<s>' + d + '</s>')
elif isinstance(d, int):
print(' ' * indent + '<i>' + str(d) + '</i>')
elif isinstance(d, bool):
print(' ' * indent + '<b>' + str(d) + '</b>')
elif isinstance(d, float):
print(' ' * indent + '<f>' + str(d) + '</f>')
else:
print(' ' * indent + '<?>' + str(d) + '</?>')
Usage: self.pretty_print_dict(my_dict)

Categories