get dictionary key by path (string) - python

I have this path that can change from time to time:
'#/path/to/key'
The parts of the path aren't defined, so this value is also fine
'#/this/is/a/longer/path'
I'm splitting this key at '/' so I get
['#', 'path', 'to', 'key']
and I need to get to the key in this path, let's say my dict is exp, so I need to get to here:
exp['path']['to']['key']
how could I possibly know how to get to this key?

Use the recursion, Luke ...
def deref_multi(data, keys):
return deref_multi(data[keys[0]], keys[1:]) \
if keys else data
last = deref_multi(exp, ['path','to','key'])
UPDATE: It's It's been 5+ years, time for an update, this time without using recursion (which may use slightly more resources than if Python does the looping internally). Use whichever is more understandable (and so maintainable) to you:
from functools import reduce
def deref_multi(data, keys):
return reduce(lambda d, key: d[key], keys, data)

I suggest you to use python-benedict, a python dict subclass with full keypath support and many utility methods.
You just need to cast your existing dict:
exp = benedict(exp)
# now your keys can be dotted keypaths too
exp['path.to.key']
Here the library and the documentation:
https://github.com/fabiocaccamo/python-benedict
Note: I am the author of this project

def get_key_by_path(dict_obj, path_string):
path_list = path_string.split('/')[1:]
obj_ptr = dict_obj
for elem in path_list:
obj_ptr = obj_ptr[elem]
return obj_ptr

There have been some good answers here, but none of them account for paths that aren't correct or paths that at some point result in something that is not subscriptable. The code below will potentially allow you a little more leeway in handling such cases whereas other code so far will just throw an error or have unexpected behavior.
path = '#/path/to/key'
exp = {'path' : { 'to' : { 'key' : "Hello World"}}}
def getFromPath(dictionary, path):
curr = dictionary
path = path.split("/")[1:] # Gets rid of '#' as it's uneccessary
while(len(path)):
key = path.pop(0)
curr = curr.get(key)
if (type(curr) is not dict and len(path)):
print("Path does not exist!")
return None
return curr
print(getFromPath(exp, path)) #Your value

>>> exp = {'path': {'to': {'key': 42}}}
>>> my_key = exp
>>> for i in '#/path/to/key'.split('/')[1:]:
>>> my_key = my_key[i]
>>> print(my_key)
42
But I'm a bit curious about how you retrieved such dict

Assuming what you mean by this is that your array ['#', 'path', 'to', 'key'] has indexes leading into a nested starting from index 1, you could iterate over each item in the list starting from the second and just dig deeper through every iteration.
For example, in Python 3 you could do this.
def get_key_from_path(exp, path):
"""Returns the value at the key from <path> in <exp>.
"""
cur = exp
for dir in path[1:]:
cur = exp[dir]
return cur

Using functools in place of recursion:
# Define:
from functools import partial, reduce
deref = partial(reduce, lambda d, k: d[k])
# Use:
exp = {'path': {'to': {'key': 42}}}
deref(('path', 'to', 'key'), exp)
3 year old question, I know... I just really like functools.

Related

Keeping the order of an OrderedDict

I have an OrderedDict that I'm passing to a function. Somewhere in the function it changes the ordering, though I'm not sure why and am trying to debug it. Here is an example of the function and the function and output:
def unnest_data(data):
path_prefix = ''
UNNESTED = OrderedDict()
list_of_subdata = [(data, ''),] # (data, prefix)
while list_of_subdata:
for subdata, path_prefix in list_of_subdata:
for key, value in subdata.items():
path = (path_prefix + '.' + key).lstrip('.').replace('.[', '[')
if not (isinstance(value, (list, dict))):
UNNESTED[path] = value
elif isinstance(value, dict):
list_of_subdata.append((value, path))
elif isinstance(value, list):
list_of_subdata.extend([(_, path) for _ in value])
list_of_subdata.remove((subdata, path_prefix))
if not list_of_subdata: break
return UNNESTED
Then, if I call it:
from collections import OrderedDict
data = OrderedDict([('Item', OrderedDict([('[#ID]', '288917'), ('Main', OrderedDict([('Platform', 'iTunes'), ('PlatformID', '353736518')])), ('Genres', OrderedDict([('Genre', [OrderedDict([('[#FacebookID]', '6003161475030'), ('Value', 'Comedy')]), OrderedDict([('[#FacebookID]', '6003172932634'), ('Value', 'TV-Show')])])]))]))])
unnest_data(data)
I get an OrderedDict that doesn't match the ordering of my original one:
OrderedDict([('Item[#ID]', '288917'), ('Item.Genres.Genre[#FacebookID]', ['6003172932634', '6003161475030']), ('Item.Genres.Genre.Value', ['TV-Show', 'Comedy']), ('Item.Main.Platform', 'iTunes'), ('Item.Main.PlatformID', '353736518')])
Notice how it has "Genre" before "PlatformID", which is not the way it was sorted in the original dict. What seems to be my error here and how would I fix it?
It’s hard to say exactly what’s wrong without a complete working example. But based on the code you’ve shown, I suspect your problem isn’t with OrderedDict at all, but rather that you’re modifying list_of_subdata while iterating through it, which will result in items being unexpectedly skipped.
>>> a = [1, 2, 3, 4, 5, 6, 7]
>>> for x in a:
... print(x)
... a.remove(x)
...
1
3
5
7
Given your use, consider a deque instead of a list.

Find if value exists in multiple list and get names of it

I found similar question, but I'm not able to convert answer to match my needs.
(Find if value exists in multiple lists)
So, basicly, I have multiple lists, and I want to list all of them, which contain current user username.
import getpass
value = getpass.getuser()
rep_WOHTEL = ['user1','user2','user3']
rep_REPDAY = ['user4','user1','user3']
rep_ZARKGL = ['user3','user1','user2']
rep_WOHOPL = ['user3','user2','user5']
#No idea how code below works
w = next(n for n,v in filter(lambda t: isinstance(t[1],list) and t[0].startswith('rep_'), globals().items()) if value in v)
print(w)
If current user is user1, I want it to print rep_WOHTEL, rep_REPDAY and rep_ZARKGL. Code above print only ony of them.
How should I change this part of script, to print all I want?
Like I commented in the linked question, iterating through all of globals() or locals() is a bad idea. Store your lists together in a single dictionary or list, and iterate through that instead.
value = "user1"
named_lists = {
"WOHTEL": ['user1','user2','user3'],
"REPDAY": ['user4','user1','user3'],
"ZARKGL": ['user3','user1','user2'],
"WOHOPL": ['user3','user2','user5']
}
names = [name for name, seq in named_lists.items() if value in seq]
print(names)
Result:
['REPDAY', 'ZARKGL', 'WOHTEL']
Checking if value is in all global lists, and if true, print which list(s) contains the required value.
Code:
rep_WOHTEL = ['user1','user2','user3']
rep_REPDAY = ['user4','user1','user3']
rep_ZARKGL = ['user3','user1','user2']
rep_WOHOPL = ['user3','user2','user5']
value = 'user1'
x = globals().items()
for n,v in filter(lambda t: isinstance(t[1],list) and t[0].startswith('rep_'), x):
if value in v:
print(n)
Output:
rep_REPDAY
rep_ZARKGL
rep_WOHTEL
More info about the used functions:
globals()
dict.items()
filter()
isinstance()
startswith()

Python convert string to array assignment

In my application I am receiving a string 'abc[0]=123'
I want to convert this string to an array of items. I have tried eval() it didnt work for me. I know the array name abc but the number of items will be different in each time.
I can split the string, get array index and do. But I would like to know if there is any direct way to convert this string as an array insert.
I would greately appreciate any suggestion.
are you looking for something like
In [36]: s = "abc[0]=123"
In [37]: vars()[s[:3]] = []
In [38]: vars()[s[:3]].append(eval(s[s.find('=') + 1:]))
In [39]: abc
Out[39]: [123]
But this is not a good way to create a variable
Here's a function for parsing urls according to php rules (i.e. using square brackets to create arrays or nested structures):
import urlparse, re
def parse_qs_as_php(qs):
def sint(x):
try:
return int(x)
except ValueError:
return x
def nested(rest, base, val):
curr, rest = base, re.findall(r'\[(.*?)\]', rest)
while rest:
curr = curr.setdefault(
sint(rest.pop(0) or len(curr)),
{} if rest else val)
return base
def dtol(d):
if not hasattr(d, 'items'):
return d
if sorted(d) == range(len(d)):
return [d[x] for x in range(len(d))]
return {k:dtol(v) for k, v in d.items()}
r = {}
for key, val in urlparse.parse_qsl(qs):
id, rest = re.match(r'^(\w+)(.*)$', key).groups()
r[id] = nested(rest, r.get(id, {}), val) if rest else val
return dtol(r)
Example:
qs = 'one=1&abc[0]=123&abc[1]=345&foo[bar][baz]=555'
print parse_qs_as_php(qs)
# {'abc': ['123', '345'], 'foo': {'bar': {'baz': '555'}}, 'one': '1'}
Your other application is doing it wrong. It should not be specifying index values in the parameter keys. The correct way to specify multiple values for a single key in a GET is to simply repeat the key:
http://my_url?abc=123&abc=456
The Python server side should correctly resolve this into a dictionary-like object: you don't say what framework you're running, but for instance Django uses a QueryDict which you can then access using request.GET.getlist('abc') which will return ['123', '456']. Other frameworks will be similar.

How do you convert command line args in python to a dictionary?

I'm writing an application that takes arbitrary command line arguments, and then passes them onto a python function:
$ myscript.py --arg1=1 --arg2=foobar --arg1=4
and then inside myscript.py:
import sys
argsdict = some_function(sys.argv)
where argsdict looks like this:
{'arg1': ['1', '4'], 'arg2': 'foobar'}
I'm sure there is a library somewhere that does this, but I can't find anything.
EDIT: argparse/getopt/optparse is not what I'm looking for. These libraries are for defining an interface that is the same for each invocation. I need to be able to handle arbitrary arguments.
Unless, argparse/optparse/getopt has functionality that does this...
You can use something like this:
myscript.py
import sys
from collections import defaultdict
d=defaultdict(list)
for k, v in ((k.lstrip('-'), v) for k,v in (a.split('=') for a in sys.argv[1:])):
d[k].append(v)
print dict(d)
Result:
C:\>python myscript.py --arg1=1 --arg2=foobar --arg1=4
{'arg1': ['1', '4'], 'arg2': ['foobar']}
Note: the value will always be a list, but I think this is more consistent. If you really want the final dictionary to be
{'arg1': ['1', '4'], 'arg2': 'foobar'}
then you could just run
for k in (k for k in d if len(d[k])==1):
d[k] = d[k][0]
afterwards.
Here's an example using argparse, although it's a stretch. I wouldn't call this complete solution, but rather a good start.
class StoreInDict(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
d = getattr(namespace, self.dest)
for opt in values:
k,v = opt.split("=", 1)
k = k.lstrip("-")
if k in d:
d[k].append(v)
else:
d[k] = [v]
setattr(namespace, self.dest, d)
# Prevent argparse from trying to distinguish between positional arguments
# and optional arguments. Yes, it's a hack.
p = argparse.ArgumentParser( prefix_chars=' ' )
# Put all arguments in a single list, and process them with the custom action above,
# which convertes each "--key=value" argument to a "(key,value)" tuple and then
# merges it into the given dictionary.
p.add_argument("options", nargs="*", action=StoreInDict, default=dict())
args = p.parse_args("--arg1=1 --arg2=foo --arg1=4".split())
print args.options
Something like this?
import sys
argsdict = {}
for farg in sys.argv:
if farg.startswith('--'):
(arg,val) = farg.split("=")
arg = arg[2:]
if arg in argsdict:
argsdict[arg].append(val)
else:
argsdict[arg] = [val]
Slightly different from specified, the value is always a list.
This is what I used today, it accounts for:
--key=val, --key, -key, -key val
def clean_arguments(args):
ret_args = defaultdict(list)
for index, k in enumerate(args):
if index < len(args) - 1:
a, b = k, args[index+1]
else:
a, b = k, None
new_key = None
# double hyphen, equals
if a.startswith('--') and '=' in a:
new_key, val = a.split('=')
# double hyphen, no equals
# single hyphen, no arg
elif (a.startswith('--') and '=' not in a) or \
(a.startswith('-') and (not b or b.startswith('-'))):
val = True
# single hypen, arg
elif a.startswith('-') and b and not b.startswith('-'):
val = b
else:
if (b is None) or (a == val):
continue
else:
raise ValueError('Unexpected argument pair: %s, %s' % (a, b))
# santize the key
key = (new_key or a).strip(' -')
ret_args[key].append(val)
return ret_args
..may I ask why are you trying to rewrite (a bunch of) wheels, when you have:
http://docs.python.org/library/getopt.html
http://docs.python.org/library/optparse.html
http://docs.python.org/library/argparse.html
...etc, etc, etc...
?
EDIT:
In reply to your edit, optparse/argparse (the later one only available in >=2.7) are flexible enough to extend to suit your needs, while maintaining a consistent interface (eg. a user expects to be able to use both --arg=value and --arg value, -a value and -avalue, etc.. using a pre-existent library, you don't have to worry about supporting all those syntaxes, etc.).
Or something like this) Sorry, if this is stupid, I am a newbie:)
$ python3 Test.py a 1 b 2 c 3
import sys
def somefunc():
keys = []
values = []
input_d = sys.argv[1:]
for i in range(0, len(input_d)-1, 2):
keys.append(input_d[i])
values.append(input_d[i+1])
d_sys = dict(zip(keys, values))
somefunc()
If you really want to write something of your own instead of a proper command-line parsing library, for your input this should work:
dict(map(lambda x: x.lstrip('-').split('='),sys.argv[1:]))
You'll want to add something to catch arguments without an '=' in them.

searching and adding a python list

I have a TList which is a list of lists. I would like to add new items to the list if they are not present before. For instance if item I is not present, then add to Tlist otherwise skip.Is there a more pythonic way of doing it ? Note : At first TList may be empty and elements are added in this code. After adding Z for example, TList = [ [A,B,C],[D,F,G],[H,I,J],[Z,aa,bb]]. The other elements are based on calculations on Z.
item = 'C' # for example this item will given by user
TList = [ [A,B,C],[D,F,G],[H,I,J]]
if not TList:
## do something
# check if files not previously present in our TList and then add to our TList
elif item not in zip(*TList)[0]:
## do something
Since it would appear that the first entry in each sublist is a key of some sort, and the remaining entries are somehow derived from that key, a dictionary might be a more suitable data structure:
vals = {'A': ['B','C'], 'D':['F','G'], 'H':['I','J']}
if 'Z' in vals:
print 'found Z'
else:
vals['Z'] = ['aa','bb']
#aix made a good suggestion to use a dict as your data structure; It seems to fit your use case well.
Consider wrapping up the value checking (i.e. 'Does it exist?') and the calculation of the derived values ('aa' and 'bb' in your example?).
class TList(object):
def __init__(self):
self.data = {}
def __iter__(self):
return iter(self.data)
def set(self, key):
if key not in self:
self.data[key] = self.do_something(key)
def get(self, key):
return self.data[key]
def do_something(self, key):
print('Calculating values')
return ['aa', 'bb']
def as_old_list(self):
return [[k, v[0], v[1]] for k, v in self.data.iteritems()]
t = TList()
## Add some values. If new, `do_something()` will be called
t.set('aval')
t.set('bval')
t.set('aval') ## Note, do_something() is not called
## Get a value
t.get('aval')
## 'in ' tests work
'aval' in t
## Give you back your old data structure
t.as_old_list()
if you need to keep the same data structure, something like this should work:
# create a set of already seen items
seen = set(zip(*TList)[:1])
# now start adding new items
if item not in seen:
seen.add(item)
# add new sublist to TList
Here is a method using sets and set.union:
a = set(1,2,3)
b = set(4,5,6)
c = set()
master = [a,b,c]
if 2 in set.union(*master):
#Found it, do something
else:
#Not in set, do something else
If the reason for testing for membership is simply to avoid adding an entry twice, the set structure uses a.add(12) to add something to a set, but only add it once, thus eliminating the need to test. Thus the following:
>>> a=set()
>>> a.add(1)
>>> a
set([1])
>>> a.add(1)
>>> a
set([1])
If you need the set elsewhere as a list you simply say "list(a)" to get "a" as a list, or "tuple(a)" to get it as a tuple.

Categories