Nested insertion/creation of dictionary - python

Given that you have an empty dictionary
data = {}
I have a path and a value
path = "root.sub.item"
value = 12
How could I recursively add objects that do not exist?
def add_value(path, value):
for part in path.split('.'):
if not part in data:
data[part] = {}
The expected output for this would be:
data = {
'root':{
'sub':{
'item': 12
}
}
}
Could somebody help out with this or point me in the right direction?
I'm using Python 3.6.

You can use some another kind of solution like recursive defaultdict, as in this answer.
A quick and stupid example about how it can used:
from collections import defaultdict
def func(rdict, path, value):
items = path.split('.')
d = rdict[items[0]]
for item in items[1:-1]:
d = d[item]
d[items[-1]] = value
nested_dict = lambda: defaultdict(nested_dict)
result = nested_dict()
func(result, 'root.sub.item', 12)
func(result, 'root.moon.value', 1)
assert result['root']['sub']['item'] == 12
assert result['root']['moon']['value'] == 1
assert result['root']['moon']['noop'] != 0

You're almost there, you just need to keep track of how far you are into the tree structure, and a way to know when you're on the last element of the path:
def add_value(path, value):
tmp = data
parts = list(path.split('.'))
for i in range(len(parts) - 1):
part = parts[i]
if not part in tmp:
tmp[part] = {}
tmp = tmp[part]
tmp[parts[-1]] = value

you can try Raymond Hettinger recipe :
source: https://twitter.com/raymondh/status/343823801278140417
from collections import defaultdict
infinity_dict=lambda:defaultdict(infinity_dict)
d=infinity_dict()
d['root']['sub']['item'] = 12

Related

xmltodict returns string instead of OrderedDict [duplicate]

The following Code produces an error, if there is only one "car" in "garage":
import xmltodict
mydict = xmltodict.parse(xmlstringResults)
for carsInGarage in mydict['garage']['car']:
# do something...
The Reason is that mydict['garage']['car'] is only a list if there is more than one element of "car". So I did something like this:
import xmltodict
mydict = xmltodict.parse(xmlstringResults)
if isinstance(mydict['garage']['car'], list):
for carsInGarage in mydict['garage']['car']:
# do something for each car...
else:
# do something for the car
to get the code to run. But for more advanced operations this is no solution.
Does someone know some kind of function to use, even if there is only one element?
This problem is discussed in this issue on Github. The xmltodict package now supports
d = xmltodict.parse(s, force_list={'car'})
Although this still doesn't create an empty list if the field is absent.
This is of course not an elegant way, but this is what i have done to get the code run (if someone hase the same probleme an found this via google):
import xmltodict
def guaranteed_list(x):
if not x:
return []
elif isinstance(x, list):
return x
else:
return [x]
mydict = xmltodict.parse(xmlstringResults)
for carsInGarage in guaranteed_list(mydict['garage']['car']):
# do something...
but i thing i will write my code again and "use XML directly" as one of the comments said.
I am using the combination of
1)
json_dict = xmltodict.parse(s, force_list={'item'})
And
2)
# Removes a level in python dict if it has only one specific key
#
# Examples:
# recursive_skip_dict_key_level({"c": {"a": "b"}}, "c") # -> {"a", "b"}
# recursive_skip_dict_key_level({"c": ["a", "b"]}, "c") # -> ["a", "b"]
#
def recursive_skip_dict_key_level(d, skipped_key):
if issubclass(type(d), dict):
if list(d.keys()) == [skipped_key]:
return recursive_skip_dict_key_level(d[skipped_key], skipped_key)
else:
for key in d.keys():
d[key] = recursive_skip_dict_key_level(d[key], skipped_key)
return d
elif issubclass(type(d), list):
new_list = []
for e in d:
new_list.append(recursive_skip_dict_key_level(e, skipped_key))
return new_list
else:
return d
# Removes None values from a dict
#
# Examples:
# recursive_remove_none({"a": None}) # -> {}
# recursive_remove_none([None]) # -> []
#
def recursive_remove_none(d):
if issubclass(type(d), dict):
new_dict = {}
for key in d.keys():
if not (d[key] is None):
new_dict[key] = recursive_remove_none(d[key])
return new_dict
elif issubclass(type(d), list):
new_list = []
for e in d:
if not (e is None):
new_list.append(recursive_remove_none(e))
return new_list
else:
return d
json_dict = recursive_skip_dict_key_level(json_dict, "item")
json_dict = recursive_remove_none(json_dict)
to interpret any "item" XML-elements as lists.
In addition to the existing answers, xmltodict now also supports the following to force everything to be a list:
xml = xmltodict.parse(s, force_list=True)

How to reduce number of python code?

How to reduce number of code when element does not already exist in dictionary? Otherwise assign it to the object.
Prove of python concept:
class MyClass:
pass
key = "test python"
item = MyClass()
d = {}
if d.get(key) is None:
d[key] = item
else:
item = d[key]
print(item)
Is it possible to remove if else statement?
You can read python documentation -> setdefault:
class MyClass:
pass
key = 'test python'
item = MyClass()
d = {}
item = d.setdefault(key, item)
It`s more pythonic!!!!
Read up on Documentation before you start asking questions...
You want to use this setdefault(key[, default])
You can use dict.setdefault for this:
key = "test python"
item = MyClass()
d = {}
print(d.setdefault(key, item))
Maybe see if using defaultdict (from collections) would help?
I'm not sure exactly what you're trying to do, but I think this is the same behavior?
from collections import defaultdict
class MyClass:
pass
key = "test python"
item = MyClass()
d = defaultdict()
d[key] = item
print(item)
Unrelated, with the above, I think
if not key in d:
or
if not d.get(key):
might be a little more pythonic?

A list of path to a dictionnary

I am trying to populate a python dict from a list of paths (the purpose is to create a ttk.treview) :
paths = ["\\hihi", "\\hihi\\hoho\\huhu", "\\hihi\\hoho\\haha", "\\haha", "\\huhu"]
and I want to create this dictionnary (json serialized here) :
{
"haha": {},
"hihi": {
"hoho": {
"huhu": 0
},
"huhu": {
"haha": 0
}
},
"huhu": {}
}
What is the best way to do this? I tried with a for loop (recursive loop?), with a dict comprehension and with dpath but I have no valid result.
The biginning of my code :
split = paths.split("\\")
del split[0]
dict = {}
?
Thank you very much in advance
You could use defaultdict for this:
def make_empty_default_dict():
return defaultdict(make_empty_default_dict)
Define how you add a path:
def add_path(pth, dct):
if pth:
subdict = dct[pth[0]]
return add_path(pth[1:], subdict)
else:
return dct
Then populate your default dict with keys:
d = make_empty_default_dict()
for path in paths:
d = add_path(path.split("\\"), d)
I have an alternative to the recursive solution. For each path:
put a cursor at the root of the target dict
search sequence: move the cursor forth until you find a 0 or a missing part of the path
build sequence: add an empty dict and move the cursor on that dict until you hit the last part.
the last part needs a special handling for the 0.
Here's the code:
def build_paths(paths, d={}):
for path in paths:
parts = path.split("\\")[1:] # remove the part before \
cursor = d
search = True
for part in parts[:-1]:
if search:
if part not in cursor or not cursor[part]: # not found or 0
cursor[part] = {} # set a new dict
search = False
else:
cursor[part] = {}
cursor = cursor[part] # advance one level deeper in the dict
cursor[parts[-1]] = 0 # close with a 0
return d
It's faster than the recursive vesion of #xtofl, but not that fast. With timeit:
iterative: 6.169872568580601
recursive: 17.209112331781498
You can use recursion with itertools.groupby:
import itertools
paths = ["\\hihi", "\\hihi\\hoho\\huhu", "\\hihi\\hoho\\haha", "\\haha", "\\huhu"]
new_paths = [list(filter(None, i.split('\\'))) for i in paths]
def update_output(f):
def wrapper(_d):
result = f(_d)
final = lambda x, level = 0:{a:{} if not level and not b else b if not b else final(b, level+1) for a, b in x.items()}
return final(result)
return wrapper
#update_output
def full_directory(data):
def files(d):
return {a:(lambda x:0 if len(x) == 1 else files([i[1:] for i in filter(lambda y:len(y) != 1 or y[0] != a, x)]))(list(b)) for a, b in itertools.groupby(sorted(d, key=lambda x:x[0]), key=lambda x:x[0])}
return files(data)
print(full_directory(new_paths))
Output:
{'haha': {}, 'hihi': {'hoho': {'haha': 0, 'huhu': 0}}, 'huhu': {}}
I found this : http://connor-johnson.com/2015/02/28/generate-a-tree-structure-in-python/
It works very well! So the code :
def add(t, path):
for node in path:
t = t[node]
Tree = lambda: defaultdict(Tree)
t = Tree()
paths = ["\\hihi", "\\hihi\\hoho\\huhu", "\\hihi\\hoho\\haha", "\\haha", "\\huhu"]
for path in paths:
split = path.split("\\")
del split[0]
for elt in split:
add(t, split)
dicts = lambda t: { k:dicts(t[k]) for k in t }
print(json.dumps(dicts(t), indent=4))

Dynamic dictionary recursion in Python3

I've been working on this for too long and need some help.
I'm trying to create a dictionary using faker. If it were only that simple.
Initially the dictionary is flat. A key and item. If the first letter of the key is 'B' or 'M' it will then turn that string, into a dictionary with 5 keys and keep doing that until it finds none starting with either of those two letters. I know, there's no recursion happening now. That's why I need help. I'm trying to figure out how to properly recurse rather than hard code the depth.
Starting Dictionary:
{
"Marcia": "https://www.skinner.biz/categories/tags/terms.htm",
"Nicholas": "https://scott-tran.com/",
"Christopher": "https://www.ellis.com/",
"Paul": "https://lopez.com/index/",
"Jennifer": "https://www.sosa.com/wp-content/main/login.php"
}
Marcia should expand to this...
Example:
"Marcia": {
"Alexander": "http://hicks.net/home.html",
"Barry": {
"Jared": "https://www.parker-robinson.com/faq.html",
"Eddie": "https://www.smith-thomas.com/",
"Ryan": "https://www.phillips.org/homepage/",
"Mary": {
"Alex": "http://www.perry.com/tags/explore/post.htm",
"Joseph": "https://www.hansen.com/main/list/list/index/",
"Alicia": "https://www.tran.biz/wp-content/explore/posts/",
"Anna": "http://lee-mclaughlin.biz/search/login/",
"Kevin": "https://blake.net/main/index/"
}
"Evan": "http://carroll.com/homepage.html"
}
"Sharon": "https://www.watson.org/categories/app/login/",
"Hayley": "https://www.parks.com/",
"William": "https://www.wyatt-ware.com/"
}
My code is more manual than dynamic in that I must explicitly know now many levels deep the dictionary goes rather than dynamically figuring it out.
Here's what I have that works to the depth of 2 levels but I want to to find any key starting with 'B' or 'M' and acting on it.
import json
from build_a_dictionary import add_dic
from faker import Faker
dic = add_dic(10)
dic1 = {}
dic2 = {}
def build_dic(dic_len):
dic1 = {}
fake = Faker()
if len(dic1) == 0:
dic1 = add_dic(dic_len)
print(json.dumps(dic1, indent=4))
for k, v in dic1.items():
dic2[k] = add_dic(dic_len)
for key in dic2[k].keys():
for f in key:
if f == 'B' or f == 'M':
dic2[k][key] = add_dic(dic_len)
return dic2
Here is the code from add_dic() I wrote:
import string, time
from faker import Faker #had to install with pip
fake = Faker()
dic = {}
dics = {}
key = ""
def add_dic(x):
dic={}
start = time.time()
if x > 690:
print("Please select a value under 690")
sys.exit()
for n in range(x):
while len(dic) < x:
key = fake.first_name()
if key in dic.keys():
break
val = fake.uri()
dic[key] = val
end = time.time()
runtime = end - start
return dic
You're just doing it wrong, if you want it to be recursive, write the function as a recursive function. It's essentially a custom (recursive) map function for a dictionary. As for your expected dictionary, I'm not sure how you'd ever get Faker to deterministically give you that same output every time. It's random...
Note: There is nothing "dynamic" about this, it's just a recursive map function.
from faker import Faker
import pprint
pp = pprint.PrettyPrinter(indent=4)
fake = Faker()
def map_val(key, val):
if key[0] == 'M' or key[0] == 'B':
names = [(fake.first_name(), fake.uri()) for i in range(5)]
return {k : map_val(k, v) for k,v in names}
else:
return val
#uncomment below to generate 5 initial names
#names = [(fake.first_name(), fake.uri()) for i in range(5)]
#initial_dict = {k : v for k,v in names}
initial_dict = {
"Marcia": "https://www.skinner.biz/categories/tags/terms.htm",
"Nicholas": "https://scott-tran.com/",
"Christopher": "https://www.ellis.com/",
"Paul": "https://lopez.com/index/",
"Jennifer": "https://www.sosa.com/wp-content/main/login.php"
}
dict_2 = {k : map_val(k,v) for k,v in initial_dict.items()}
pp.pprint(dict_2)
Output:
rpg711$ python nested_dicts.py
{ 'Christopher': 'https://www.ellis.com/',
'Jennifer': 'https://www.sosa.com/wp-content/main/login.php',
'Marcia': { 'Chelsea': 'http://francis.org/category.jsp',
'Heather': 'http://www.rodgers.com/privacy.jsp',
'Jaime': 'https://bates-molina.com/register/',
'John': 'http://www.doyle.com/author.htm',
'Kimberly': 'https://www.harris.org/homepage/'},
'Nicholas': 'https://scott-tran.com/',
'Paul': 'https://lopez.com/index/'
}
Thank you all for your help. I've managed to figure it out.
It now builds a dynamic dictionary or dynamic json for whatever need.
import sys, json
from faker import Faker
fake = Faker()
def build_dic(dic_len, dic):
if isinstance(dic, (list, tuple)):
dic = dict(dic)
if isinstance(dic, dict):
for counter in range(len(dic)):
for k,v in dic.items():
if k[0] == 'B' or k[0] == "M":
update = [(fake.first_name(), fake.uri()) for i in range(5)]
update = dict(update)
dic.update({k: update})
return dic
def walk(dic):
for key, item in dic.items():
#print(type(item))
if isinstance(item, dict):
build_dic(5, item)
walk(item)
return dic
a = build_dic(10, ([(fake.first_name(), fake.uri()) for i in range(10)]))
walk(a)
print(json.dumps(a, indent=4))
Recursion is when a function calls itself; when designing a recursive function, it's important to have an exit condition in mind (i.e. when will the recursion stop).
Let's consider a contrived example to increment a number until it reaches a certain value:
def increment_until_equal_to_or_greater_than_value(item, target):
print 'item is', item,
if item < target:
print 'incrementing'
item += 1
increment_until_equal_to_or_greater_than_value(item, target)
else:
print 'returning'
return item
increment_until_equal_to_or_greater_than_value(1, 10)
And the output
item is 1 incrementing
item is 2 incrementing
item is 3 incrementing
item is 4 incrementing
item is 5 incrementing
item is 6 incrementing
item is 7 incrementing
item is 8 incrementing
item is 9 incrementing
item is 10 returning
You can see we've defined our recursive part in the if statement and the exit condition in the else.
I've put together a snippet that shows a recursive function on a nested data structure.
It doesn't solve exactly your issue, this way you can learn by dissecting it and making it fit for your use case.
# our recursive method
def deep_do_something_if_string(source, something):
# if source is a dict, iterate through it's values
if isinstance(source, dict):
for v in source.itervalues():
# call this method on the value
deep_do_something_if_string(v, something)
# if source is a list, tuple or set, iterate through it's items
elif isinstance(source, (list, tuple, set)):
for v in source:
deep_do_something_if_string(v, something)
# otherwise do something with the value
else:
return something(source)
# a test something to do with the value
def print_it_out(value):
print value
# an example data structure
some_dict = {
'a': 'value a',
'b': [
{
'c': 'value c',
'd': 'value d',
},
],
'e': {
'f': 'value f',
'g': {
'h': {
'i': {
'j': 'value j'
}
}
}
}
}
deep_do_something_if_string(some_dict, print_it_out)
And the output
value a
value c
value d
value j
value f

Python: recursively create dictionary from paths

I have several hundred thousand endpoint URLs that I want to generate stats for. For example I have:
/a/b/c
/a/b/d
/a/c/d
/b/c/d
/b/d/e
/a/b/c
/b/c/d
I want to create a dictionary that looks like this
{
'a': {
'b': {
'c': {
'_count': 2
},
'd': {
'_count': 1
}
},
'c': {
'd': {
'_count': 1
}
}
},
'b': {
'c': {
'd': {
'_count': 2
}
},
'd': {
'e': {
'_count': 1
}
}
}
}
Any clever ways to do this?
EDIT
I should mention that the paths are not always 3 parts. There might be
/a/b/c/d/e/f/g/h... etc, etc.
If the paths all look like in your example, this would work:
counts = {}
for p in paths:
parts = p.split('/')
branch = counts
for part in parts[1:-1]:
branch = branch.setdefault(part, {})
branch[parts[-1]] = 1 + branch.get(parts[-1], 0)
This uses dictionary methods like setdefault() and get() to avoid having to write a lot of if-statements.
Note that this will not work if a path that has subdirectories can also appear on it's own. Then it's not clear if the corresponding part of counts should contain a number or another dictionary. In this case it would probably be best to store both a count and a dict for each node, using a tuple or a custom class.
The basic algorithm stays the same:
class Stats(object):
def __init__(self):
self.count = 0
self.subdirs = {}
counts = Stats()
for p in paths:
parts = p.split('/')
branch = counts
for part in parts[1:]:
branch = branch.subdirs.setdefault(part, Stats())
branch.count += 1
With some pretty printing you get:
def printstats(stats, indent=''):
print indent + str(stats.count) + ' times'
for (d, s) in stats.subdirs.items():
print indent + d + ':'
printstats(s, indent + ' ')
>>> printstats(counts)
0 times
a:
0 times
c:
0 times
d:
1 times
b:
0 times
c:
2 times
d:
1 times
...
EDIT:
I've amended my code to fit your last comment, above (no complex data structure now).
def dictizeString(string, dictionary):
while string.startswith('/'):
string = string[1:]
parts = string.split('/', 1)
if len(parts) > 1:
branch = dictionary.setdefault(parts[0], {})
dictizeString(parts[1], branch)
else:
if dictionary.has_key(parts[0]):
# If there's an addition error here, it's because invalid data was added
dictionary[parts[0]] += 1
else:
dictionary[parts[0]] = 1
It will store a list of [frequency, dictionary] for each item.
Test case
>>> d = {}
>>> dictizeString('/a/b/c/d', d)
>>> dictizeString('/a/b/c/d', d)
>>> dictizeString('/a/b/c/d', d)
>>> dictizeString('/a/b/c/d', d)
>>> dictizeString('/a/b/e', d)
>>> dictizeString('/c', d)
>>> d
{'a': {'b': {'c': {'d': 4}, 'e': 1}}, 'c': 1}
Old result, but still near the top in google, so I'll update: You could use dpath-python for this.
$ easy_install dpath
>>> result = {}
>>> for path in my_list_of_paths:
>>> ... dpath.util.set(result, path, SOME_VALUE)
... and that's it. I don't understand the math you're using to precompute those values on the terminus (1, 2, etc), but you could precalculate it and use a dictionary of path-to-value instead of a bare list
>>> x = {'path/name': 0, 'other/path/name': 1}
>>> for (path, value) in x.iteritems():
>>> ... dpath.util.set(result, path, value)
Something like that would work.
Here's my attempt:
class Result(object):
def __init__(self):
self.count = 0
self._sub_results = {}
def __getitem__(self, key):
if key not in self._sub_results:
self._sub_results[key] = Result()
return self._sub_results[key]
def __str__(self):
return "(%s, %s)" % (self.count, self._sub_results)
def __repr__(self):
return str(self)
def process_paths(paths):
path_result = Result()
for path in paths:
components = path[1:].split("/")
local_result = path_result
for component in components:
local_result = local_result[component]
local_result.count += 1
return path_result
I've wrapped up some of the logic into the Result class to try and make the algorithm itself a little clearer.
Based on the answers, I wrote a general function for setting a dictionary value along a path:
def dictPath(path, dictionary, val, sep="/"):
"set a value in a nested dictionary"
while path.startswith(sep):
path = path[1:]
parts = path.split(sep, 1)
if len(parts) > 1:
branch = dictionary.setdefault(parts[0], {})
dictPath(parts[1], branch, val, sep)
else:
dictionary[parts[0]] = val

Categories