How to reduce number of code when element does not already exist in dictionary? Otherwise assign it to the object.
Prove of python concept:
class MyClass:
pass
key = "test python"
item = MyClass()
d = {}
if d.get(key) is None:
d[key] = item
else:
item = d[key]
print(item)
Is it possible to remove if else statement?
You can read python documentation -> setdefault:
class MyClass:
pass
key = 'test python'
item = MyClass()
d = {}
item = d.setdefault(key, item)
It`s more pythonic!!!!
Read up on Documentation before you start asking questions...
You want to use this setdefault(key[, default])
You can use dict.setdefault for this:
key = "test python"
item = MyClass()
d = {}
print(d.setdefault(key, item))
Maybe see if using defaultdict (from collections) would help?
I'm not sure exactly what you're trying to do, but I think this is the same behavior?
from collections import defaultdict
class MyClass:
pass
key = "test python"
item = MyClass()
d = defaultdict()
d[key] = item
print(item)
Unrelated, with the above, I think
if not key in d:
or
if not d.get(key):
might be a little more pythonic?
Related
The following Code produces an error, if there is only one "car" in "garage":
import xmltodict
mydict = xmltodict.parse(xmlstringResults)
for carsInGarage in mydict['garage']['car']:
# do something...
The Reason is that mydict['garage']['car'] is only a list if there is more than one element of "car". So I did something like this:
import xmltodict
mydict = xmltodict.parse(xmlstringResults)
if isinstance(mydict['garage']['car'], list):
for carsInGarage in mydict['garage']['car']:
# do something for each car...
else:
# do something for the car
to get the code to run. But for more advanced operations this is no solution.
Does someone know some kind of function to use, even if there is only one element?
This problem is discussed in this issue on Github. The xmltodict package now supports
d = xmltodict.parse(s, force_list={'car'})
Although this still doesn't create an empty list if the field is absent.
This is of course not an elegant way, but this is what i have done to get the code run (if someone hase the same probleme an found this via google):
import xmltodict
def guaranteed_list(x):
if not x:
return []
elif isinstance(x, list):
return x
else:
return [x]
mydict = xmltodict.parse(xmlstringResults)
for carsInGarage in guaranteed_list(mydict['garage']['car']):
# do something...
but i thing i will write my code again and "use XML directly" as one of the comments said.
I am using the combination of
1)
json_dict = xmltodict.parse(s, force_list={'item'})
And
2)
# Removes a level in python dict if it has only one specific key
#
# Examples:
# recursive_skip_dict_key_level({"c": {"a": "b"}}, "c") # -> {"a", "b"}
# recursive_skip_dict_key_level({"c": ["a", "b"]}, "c") # -> ["a", "b"]
#
def recursive_skip_dict_key_level(d, skipped_key):
if issubclass(type(d), dict):
if list(d.keys()) == [skipped_key]:
return recursive_skip_dict_key_level(d[skipped_key], skipped_key)
else:
for key in d.keys():
d[key] = recursive_skip_dict_key_level(d[key], skipped_key)
return d
elif issubclass(type(d), list):
new_list = []
for e in d:
new_list.append(recursive_skip_dict_key_level(e, skipped_key))
return new_list
else:
return d
# Removes None values from a dict
#
# Examples:
# recursive_remove_none({"a": None}) # -> {}
# recursive_remove_none([None]) # -> []
#
def recursive_remove_none(d):
if issubclass(type(d), dict):
new_dict = {}
for key in d.keys():
if not (d[key] is None):
new_dict[key] = recursive_remove_none(d[key])
return new_dict
elif issubclass(type(d), list):
new_list = []
for e in d:
if not (e is None):
new_list.append(recursive_remove_none(e))
return new_list
else:
return d
json_dict = recursive_skip_dict_key_level(json_dict, "item")
json_dict = recursive_remove_none(json_dict)
to interpret any "item" XML-elements as lists.
In addition to the existing answers, xmltodict now also supports the following to force everything to be a list:
xml = xmltodict.parse(s, force_list=True)
I'm given a third party function mk_config that gives me objects according to a (key, configs). The typical usage is, when you need some of the these objects, you say
args = mk_config('args', **configs)
validator = mk_config('validator', **configs)
postproc = mk_config('postproc', **configs)
and then work with said objects.
Since I don't like seeing big blocks of boilerplate, I think, okay, the following might be better:
def mk_configs_tuple(keys, **configs):
return [mk_config(k, **configs) for k in keys]
args, validator, postproc = mk_configs_tuple(
['args', 'validator', 'postproc'],
**configs)
But there's still a repetition of the the key names, and if dealing with 20, this could get out of hand. I could inject these in locals() to be D.R.Y., but most would say that's too dry, so I think I could do this:
def mk_configs_dict(keys, **configs):
return {k: mk_config(k, **configs) for k in keys}
m = mk_configs_dict(['args', 'validator', 'postproc'], **configs)
assert list(m) == ['args', 'validator', 'postproc']
Now, dictionaries are fast, but they don't always autosuggest well in IDEs, and it's more cumbersome to write [''] than . (as JSON would). So I think:
def mk_config_obj(keys, **configs):
class ConfigObj: ...
config_obj = ConfigObj()
for key in keys:
setattr(config_obj, key, mk_config(key, **configs))
return config_obj
config_obj = mk_config_obj(['args', 'validator', 'postproc'], **configs)
assert {'args', 'validator', 'postproc'}.issubset(dir(config_obj))
But then I think if I can't do this with what python comes with naturally, I'm missing somethiing.
Maybe a named tuple?
from collections import namedtuple
def mk_config_obj(keys, **configs):
return namedtuple('ConfigObj', keys)(*[mk_config(key, **configs) for key in keys])
I like to use a list of dictionaries sometimes
list_of_dictionaries = []
a = {}
s = {}
d = {}
f = {}
list_of_dictionaries.append(a)
list_of_dictionaries.append(s)
list_of_dictionaries.append(d)
list_of_dictionaries.append(f)
The best answer I have so far is:
from collections import namedtuple
def mk_config_nt(keys, **configs):
ConfigNT = namedtuple('ConfigNT', field_names=keys)
return ConfigNT(**{k: mk_config(k, **configs) for k in keys})
Why I like it?
Because I get my keys as attributes:
config_nt = mk_config_nt(['args', 'validator', 'postproc'], **configs)
assert {'args', 'validator', 'postproc'}.issubset(dir(config_nt))
which I can use autosuggest/complete on (though there's two extra annoying method names: count and index).
Because I still have the choice to do this:
args, validator, postproc = mk_config_nt(['args', 'validator', 'postproc'], **configs)
And... it's actually faster than a dict, somehow. Key/attribute access timing (on a three item group):
custom object: 36.9 ns
dict: 32.4 ns
namedtuple: 30.7 ns
Say hello to Enums.
from enum import Enum
class MKConfigKey(Enum):
args = 'args'
validator = 'validator'
postproc = 'post-roc'
config_map = {key: mk_config(key.value, **config) for key in MKConfigKey}
I want to implement a dict-like data structure that has the following properties:
from collections import UserDict
class TestDict(UserDict):
pass
test_dict = TestDict()
# Create empty dictionaries at 'level_1' and 'level_2' and insert 'Hello' at the 'level_3' key.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
# However, this should not return an empty dictionary but raise a KeyError.
>>> test_dict['unknown_key']
KeyError: 'unknown_key'
The problem, to my knowledge, is that python does not know whether __getitem__ is being called in the context of setting an item, i.e. the first example, or in the context of getting and item, the second example.
I have already seen Python `defaultdict`: Use default when setting, but not when getting, but I do not think that this question is a duplicate, or that it answers my question.
Please let me know if you have any ideas.
Thanks in advance.
EDIT:
It is possible to achieve something similar using:
def set_nested_item(dict_in: Union[dict, TestDict], value, keys):
for i, key in enumerate(keys):
is_last = i == (len(keys) - 1)
if is_last:
dict_in[key] = value
else:
if key not in dict_in:
dict_in[key] = {}
else:
if not isinstance(dict_in[key], (dict, TestDict)):
dict_in[key] = {}
dict_in[key] = set_nested_item(dict_in[key], value, keys[(i + 1):])
return dict_in
class TestDict(UserDict):
def __init__(self):
super().__init__()
def __setitem__(self, key, value):
if isinstance(key, list):
self.update(set_nested_item(self, value, key))
else:
super().__setitem__(key, value)
test_dict[['level_1', 'level_2', 'level_3']] = 'Hello'
>>> test_dict
{
'level_1': {
'level_2': {
'level_3': 'Hello'
}
}
}
It's impossible.
test_dict['level_1']['level_2']['level_3'] = 'Hello'
is semantically equivalent to:
temp1 = test_dict['level_1'] # Should this line fail?
temp1['level_2']['level_3'] = 'Hello'
But... if determined to implement it anyway, you could inspect the Python stack to grab/parse the calling line of code, and then vary the behaviour depending on whether the calling line of code contains an assignment! Unfortunately, sometimes the calling code isn't available in the stack trace (e.g. when called interactively), in which case you need to work with Python bytecode.
import dis
import inspect
from collections import UserDict
def get_opcodes(code_object, lineno):
"""Utility function to extract Python VM opcodes for line of code"""
line_ops = []
instructions = dis.get_instructions(code_object).__iter__()
for instruction in instructions:
if instruction.starts_line == lineno:
# found start of our line
line_ops.append(instruction.opcode)
break
for instruction in instructions:
if not instruction.starts_line:
line_ops.append(instruction.opcode)
else:
# start of next line
break
return line_ops
class TestDict(UserDict):
def __getitem__(self, key):
try:
return super().__getitem__(key)
except KeyError:
# inspect the stack to get calling line of code
frame = inspect.stack()[1].frame
opcodes = get_opcodes(frame.f_code, frame.f_lineno)
# STORE_SUBSCR is Python opcode for TOS1[TOS] = TOS2
if dis.opmap['STORE_SUBSCR'] in opcodes:
# calling line of code contains a dict/array assignment
default = TestDict()
super().__setitem__(key, default)
return default
else:
raise
test_dict = TestDict()
test_dict['level_1']['level_2']['level_3'] = 'Hello'
print(test_dict)
# {'level_1': {'level_2': {'level_3': 'Hello'}}}
test_dict['unknown_key']
# KeyError: 'unknown_key'
The above is just a partial solution. It can still be fooled if there are other dictionary/array assignments on the same line, e.g. other['key'] = test_dict['unknown_key']. A more complete solution would need to actually parse the line of code to figure out where the variable occurs in the assignment.
Given that you have an empty dictionary
data = {}
I have a path and a value
path = "root.sub.item"
value = 12
How could I recursively add objects that do not exist?
def add_value(path, value):
for part in path.split('.'):
if not part in data:
data[part] = {}
The expected output for this would be:
data = {
'root':{
'sub':{
'item': 12
}
}
}
Could somebody help out with this or point me in the right direction?
I'm using Python 3.6.
You can use some another kind of solution like recursive defaultdict, as in this answer.
A quick and stupid example about how it can used:
from collections import defaultdict
def func(rdict, path, value):
items = path.split('.')
d = rdict[items[0]]
for item in items[1:-1]:
d = d[item]
d[items[-1]] = value
nested_dict = lambda: defaultdict(nested_dict)
result = nested_dict()
func(result, 'root.sub.item', 12)
func(result, 'root.moon.value', 1)
assert result['root']['sub']['item'] == 12
assert result['root']['moon']['value'] == 1
assert result['root']['moon']['noop'] != 0
You're almost there, you just need to keep track of how far you are into the tree structure, and a way to know when you're on the last element of the path:
def add_value(path, value):
tmp = data
parts = list(path.split('.'))
for i in range(len(parts) - 1):
part = parts[i]
if not part in tmp:
tmp[part] = {}
tmp = tmp[part]
tmp[parts[-1]] = value
you can try Raymond Hettinger recipe :
source: https://twitter.com/raymondh/status/343823801278140417
from collections import defaultdict
infinity_dict=lambda:defaultdict(infinity_dict)
d=infinity_dict()
d['root']['sub']['item'] = 12
I have a text file abc.txt:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
I need to parse these statements and output should be in nested dictionary as below:
{'abc':{'pqr':{'lmn':{'xyz':{'pass':1},{'bcd':{'pass':1}}}}}}
where 1 is 'pass' count.
I'm able to do as much as this:
import re
d={}
p=re.compile('[a-zA-z]+')
for line in open('abc.txt'):
for key in p.findall(line):
d['key']={}
Check out the setdefault method on dictionaries.
d = {}
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('xyz', {})['pass'] = 1
d.setdefault('pqr', {}).setdefault('lmn', {}).setdefault('bcd', {})['pass'] = 1
d
gives
{'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 1}}}}
Here's an updated version of my answer in which leaves of the tree data-structure are now different from those in rest of it. Instead of the tree being strictly a dict-of-nested-dicts, the "leaves" on each branch are now instances of a different subclass of dict named collections.Counter which are useful for counting the number of times each of their keys occur. I did this because of your response to my question about what should happen if the last part of each line was something other than ":pass" (which was "we have to put new count for that key").
Nested dictionaries are often called Tree data-structures and can be defined recursively — the root is a dictionary as are the branches. The following uses a dict subclass instead of a plain dict because it makes constructing them easier since you don't need to special case the creation of the first branch of next level down (except I still do when adding the "leaves" because they are a different subclass, collections.Counter).
from collections import Counter
from functools import reduce
import re
# (Optional) trick to make Counter subclass print like a regular dict.
class Counter(Counter):
def __repr__(self):
return dict(self).__repr__()
# Borrowed from answer # https://stackoverflow.com/a/19829714/355230
class Tree(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
# Utility functions based on answer # https://stackoverflow.com/a/14692747/355230
def nested_dict_get(nested_dict, keys):
return reduce(lambda d, k: d[k], keys, nested_dict)
def nested_dict_set(nested_dict, keys, value):
nested_dict_get(nested_dict, keys[:-1])[keys[-1]] = value
def nested_dict_update_count(nested_dict, keys):
counter = nested_dict_get(nested_dict, keys[:-1])
if counter: # Update existing Counter.
counter.update([keys[-1]])
else: # Create a new Counter.
nested_dict_set(nested_dict, keys[:-1], Counter([keys[-1]]))
d = Tree()
pat = re.compile(r'[a-zA-z]+')
with open('abc.txt') as file:
for line in file:
nested_dict_update_count(d, [w for w in pat.findall(line.rstrip())])
print(d) # Prints like a regular dict.
To test the leaf-counting capabilities of the revised code, I used the following test file which includes the same line twice, once ending again with :pass and another ending in :fail.
Expanded abc.txt test file:
abc/pqr/lmn/xyz:pass
abc/pqr/lmn/bcd:pass
abc/pqr/lmn/xyz:fail
abc/pqr/lmn/xyz:pass
Output:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'fail': 1, 'pass': 2}}}}}
If i understand your question:
sources = ["abc/pqr/lmn/xyz:pass", "abc/pqr/lmn/bcd:pass", "abc/pqr/lmn/xyz:pass"]
def prepare_source(source):
path, value = source.split(':')
elements = path.split('/')
return elements, value
def add_key(elements, value):
result = dict()
if len(elements) > 1:
result[elements[0]] = add_key(elements[1:], value)
else:
result[elements[0]] = {value: 1}
return result
# base merge function get from here:
# http://stackoverflow.com/questions/7204805/dictionaries-of-dictionaries-merge
def merge(a, b, path=None):
"merges b into a"
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif isinstance(a[key], int) and isinstance(b[key], int):
a[key] += b[key]
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
result = dict()
for source in sources:
result = merge(result, add_key(*prepare_source(source)))
print result
Output will be:
{'abc': {'pqr': {'lmn': {'bcd': {'pass': 1}, 'xyz': {'pass': 2}}}}}