How to make replacement in python's dict? - python

The goal I want to achieve is to exchange all items whose form is #item_name# to the from (item_value) in the dict. I use two dict named test1 and test2 to test my function. Here is the code:
test1={'integer_set': '{#integer_list#?}', 'integer_list': '#integer_range#(?,#integer_range#)*', 'integer_range': '#integer#(..#integer#)?', 'integer': '[+-]?\\d+'}
test2={'b': '#a#', 'f': '#e#', 'c': '#b#', 'e': '#d#', 'd': '#c#', 'g': '#f#', 'a': 'correct'}
def change(pat_dict:{str:str}):
print('Expanding: ',pat_dict)
num=0
while num<len(pat_dict):
inv_pat_dict = {v: k for k, v in pat_dict.items()}
for value in pat_dict.values():
for key in pat_dict.keys():
if key in value:
repl='#'+key+'#'
repl2='('+pat_dict[key]+')'
value0=value.replace(repl,repl2)
pat_dict[inv_pat_dict[value]]=value0
num+=1
print('Result: ',pat_dict)
change(test1)
change(test2)
sometimes I can get correct result like:
Expanding: {'integer': '[+-]?\\d+', 'integer_list': '#integer_range#(?,#integer_range#)*', 'integer_set': '{#integer_list#?}', 'integer_range': '#integer#(..#integer#)?'}
Result: {'integer': '[+-]?\\d+', 'integer_list': '(([+-]?\\d+)(..([+-]?\\d+))?)(?,(([+-]?\\d+)(..([+-]?\\d+))?))*', 'integer_set': '{((([+-]?\\d+)(..([+-]?\\d+))?)(?,(([+-]?\\d+)(..([+-]?\\d+))?))*)?}', 'integer_range': '([+-]?\\d+)(..([+-]?\\d+))?'}
Expanding: {'c': '#b#', 'f': '#e#', 'e': '#d#', 'b': '#a#', 'g': '#f#', 'd': '#c#', 'a': 'correct'}
Result: {'c': '((correct))', 'f': '(((((correct)))))', 'e': '((((correct))))', 'b': '(correct)', 'g': '((((((correct))))))', 'd': '(((correct)))', 'a': 'correct'}
But most of time I get wrong results like that:
Expanding: {'integer_range': '#integer#(..#integer#)?', 'integer': '[+-]?\\d+', 'integer_set': '{#integer_list#?}', 'integer_list': '#integer_range#(?,#integer_range#)*'}
Result: {'integer_range': '([+-]?\\d+)(..([+-]?\\d+))?', 'integer': '[+-]?\\d+', 'integer_set': '{(#integer_range#(?,#integer_range#)*)?}', 'integer_list': '#integer_range#(?,#integer_range#)*'}
Expanding: {'f': '#e#', 'a': 'correct', 'd': '#c#', 'g': '#f#', 'b': '#a#', 'c': '#b#', 'e': '#d#'}
Result: {'f': '(((((correct)))))', 'a': 'correct', 'd': '(((correct)))', 'g': '((((((correct))))))', 'b': '(correct)', 'c': '((correct))', 'e': '((((correct))))'}
How could I update my code to achieve my goal?

Your problem is caused by the fact that python dictionaries are unordered. Try using a OrderedDict instead of dict and you should be fine. The OrderedDict works just like a normal dict but with ordering retained, at a small performance cost.
Note that while you could create an OrderedDict from a dict literal (like I did here at first), that dict would be unordered, so the ordering might not be guaranteed. Using a list of (key, value) pairs preserves the ordering in all cases.
from collections import OrderedDict
test1=OrderedDict([('integer_set', '{#integer_list#?}'), ('integer_list', '#integer_range#(?,#integer_range#)*'), ('integer_range', '#integer#(..#integer#)?'), ('integer', '[+-]?\\d+')])
test2=OrderedDict([('b', '#a#'), ('f', '#e#'), ('c', '#b#'), ('e', '#d#'), ('d', '#c#'), ('g', '#f#'), ('a', 'correct')])
def change(pat_dict:{str:str}):
print('Expanding: ',pat_dict)
num=0
while num<len(pat_dict):
inv_pat_dict = {v: k for k, v in pat_dict.items()}
for value in pat_dict.values():
for key in pat_dict.keys():
if key in value:
repl='#'+key+'#'
repl2='('+pat_dict[key]+')'
value0=value.replace(repl,repl2)
pat_dict[inv_pat_dict[value]]=value0
num+=1
print('Result: ',pat_dict)
change(test1)
change(test2)

Try this one. Your problem is due to mutating starting dict. You need to change its copy.
test1={'integer_set': '{#integer_list#?}', 'integer_list': '#integer_range#(?,#integer_range#)*', 'integer_range': '#integer#(..#integer#)?', 'integer': '[+-]?\\d+'}
test2={'b': '#a#', 'f': '#e#', 'c': '#b#', 'e': '#d#', 'd': '#c#', 'g': '#f#', 'a': 'correct'}
def change(d):
new_d = d.copy()
for k in d.keys():
for nk, v in new_d.items():
if k in v:
new_d[nk] = v.replace('#{}#'.format(k), '({})'.format(new_d[k]))
return new_d
test1 = change(test1)
test2 = change(test2)

Related

Is there another way to extract info with complex/unstructured nested dict format in Python?

Suppose I have an unstructured nested dict as following:
{
'A_brand': {'score1': {'A': 13, 'K': 50}},
'B_brand': {'before_taste': {'score2': {'A': 43, 'D': 23}}, 'after_taste': {'score3': {'H': 36, 'J': 34}}},
'Score4': {'G': 2, 'W': 19}
}
How can I get/show the info like: Which letter get the highest score for each scores?
like:
{'key':'value',
'A_brand/score1':'K',
'B_brand/before_taste/score2':'A',
'B_brand/after_taste/score3':'H',
'Score4':'W'}
What I did was dummies way which I created a new dict and accessed into each path, sorted them by values and selected first one item, then added it into the new dict.
For example:
new_csv={'key':'value'}
first=data['A_brand']['before_lunch_break']['score1']
first_new=sorted(first.items(),key=lambda x: x[1],reverse=True)
new_csv['A_brand/score']=first_new[0][0]
second=data['B_brand']['before_taste']['score2']
second_new=sorted(second.items(),key=lambda x: x[1],reverse=True)
new_csv['B_brand/before_taste/score2']=second_new[0][0]
...
I'm wondering if there is any faster or automatic ways to do that?
You can use a generator with recursion:
data = {'A_brand': {'score1': {'A': 13, 'K': 50}}, 'B_brand': {'before_taste': {'score2': {'A': 43, 'D': 23}}, 'after_taste': {'score3': {'H': 36, 'J': 34}}}, 'Score4': {'G': 2, 'W': 19}}
def get_max(d, c = []):
for a, b in d.items():
if all(not isinstance(i, dict) for i in b.values()):
yield ('/'.join(c+[a]), max(b, key=lambda x:b[x]))
else:
yield from get_max(b, c+[a])
print(dict(get_max(data)))
Output:
{'A_brand/score1': 'K', 'B_brand/before_taste/score2': 'A', 'B_brand/after_taste/score3': 'H', 'Score4': 'W'}

How to return nested dictionary from function

Is it possible to make a function that will return a nested dict depending on the arguments?
def foo(key):
d = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}, }
return d[key]
foo(['c']['d'])
I waiting for:
3
I'm getting:
TypeError: list indices must be integers or slices, not str
I understanding that it possible to return a whole dict, or hard code it to return a particular part of dict, like
if 'c' and 'd' in kwargs:
return d['c']['d']
elif 'c' and 'e' in kwargs:
return d['c']['e']
but it will be very inflexible
When you give ['c']['d'], you slice the list ['c'] using the letter d, which isin't possible. So what you can do is, correct the slicing:
foo('c')['d']
Or you could alter your function to slice it:
def foo(*args):
d = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}, }
d_old = dict(d) # if case you have to store the dict for other operations in the fucntion
for i in args:
d = d[i]
return d
>>> foo('c','d')
3
d = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}, }
def funt(keys):
val = d
for key in keys:
if val:
val = val.get(key)
return val
funt(['c', 'd'])
Additionally to handle key not present state.
One possible solution would be to iterate over multiple keys -
def foo(keys, d=None):
if d is None:
d = {'a': 1, 'b': 2, 'c': {'d': 3, 'e': 4}, }
if len(keys) == 1:
return d[keys[0]]
return foo(keys[1:], d[keys[0]])
foo(['c', 'd'])

Adding multiple key,value pair using dictionary comprehension

for a list of dictionaries
sample_dict = [
{'a': 'woot', 'b': 'nope', 'c': 'duh', 'd': 'rough', 'e': '1'},
{'a': 'coot', 'b': 'nope', 'c': 'ruh', 'd': 'rough', 'e': '2'},
{'a': 'doot', 'b': 'nope', 'c': 'suh', 'd': 'rough', 'e': '3'},
{'a': 'soot', 'b': 'nope', 'c': 'fuh', 'd': 'rough', 'e': '4'},
{'a': 'toot', 'b': 'nope', 'c': 'cuh', 'd': 'rough', 'e': '1'}
]
How do I make a separate dictionary that contains all the key,value pair that match to a certain key. With list comprehension I created a list of all the key,value pairs like this:
container = [[key,val] for s in sample_dict for key,val in s.iteritems() if key == 'a']
Now the container gave me
[['a', 'woot'], ['a', 'coot'], ['a', 'doot'], ['a', 'soot'], ['a', 'toot']]
Which is all fine... but if I want to do the same with dictionaries, I get only a singe key,value pair. Why does this happen ?
container = {key : val for s in sample_dict for key,val in s.iteritems() if key == 'a'}
The container gives only a single element
{'a': 'toot'}
I want the something like
{'a': ['woot','coot','doot','soot','toot']}
How do I do this with minimal change to the code above ?
You are generating multiple key-value pairs with the same key, and a dictionary will only ever store unique keys.
If you wanted just one key, you'd use a dictionary with a list comprehension:
container = {'a': [s['a'] for s in sample_dict if 'a' in s]}
Note that there is no need to iterate over the nested dictionaries in sample_dict if all you wanted was a specific key; in the above I simply test if the key exists ('a' in s) and extract the value for that key with s['a']. This is much faster than looping over all the keys.
Another option:
filter = lambda arr, x: { x: [ e.get(x) for e in arr] }
So, from here, you can construct the dict based on the original array and the key
filter(sample_dict, 'a')
# {'a': ['woot', 'coot', 'doot', 'soot', 'toot']}

Rearranging levels of a nested dictionary in python

Is there a library that would help me achieve the task to rearrange the levels of a nested dictionary
Eg: From this:
{1:{"A":"i","B":"ii","C":"i"},2:{"B":"i","C":"ii"},3:{"A":"iii"}}
To this:
{"A":{1:"i",3:"iii"},"B":{1:"ii",2:"i"},"C":{1:"i",2:"ii"}}
ie first two levels on a 3 levelled dictionary swapped. So instead of 1 mapping to A and 3 mapping to A, we have A mapping to 1 and 3.
The solution should be practical for an arbitrary depth and move from one level to any other within.
>>> d = {1:{"A":"i","B":"ii","C":"i"},2:{"B":"i","C":"ii"},3:{"A":"iii"}}
>>> keys = ['A','B','C']
>>> e = {key:{k:d[k][key] for k in d if key in d[k]} for key in keys}
>>> e
{'C': {1: 'i', 2: 'ii'}, 'B': {1: 'ii', 2: 'i'}, 'A': {1: 'i', 3: 'iii'}}
thank god for dict comprehension
One way to think about this would be to consider your data as a (named) array and to take the transpose. An easy way to achieve this would be to use the data analysis package Pandas:
import pandas as pd
df = pd.DataFrame({1: {"A":"i","B":"ii","C":"i"},
2: {"B":"i","C":"ii"},
3: {"A":"iii"}})
df.transpose().to_dict()
{'A': {1: 'i', 2: nan, 3: 'iii'},
'B': {1: 'ii', 2: 'i', 3: nan},
'C': {1: 'i', 2: 'ii', 3: nan}}
I don't really care about performance for my application of this so I haven't bothered checking how efficient this is. Its based on bubblesort so my guess is ~O(N^2).
Maybe this is convoluted, but essentially below works by:
- providing dict_swap_index a nested dictionary and a list. the list should be of the format [i,j,k]. The length should be the depth of the dictionary. Each element corresponds to which position you'd like to move each element to. e.g. [2,0,1] would indicate move element 0 to position 2, element 1 to position 0 and element 2 to position 1.
- this function performs a bubble sort on the order list and dict_, calling deep_swap to swap the levels of the dictionary which are being swapped in the order list
- deep_swap recursively calls itself to find the level provided and returns a dictionary which has been re-ordered
- swap_two_level_dict is called to swap any two levels in a dictionary.
Essentially the idea is to perform a bubble sort on the dictionary, but instead of swapping elements in a list swap levels in a dictionary.
from collections import defaultdict
def dict_swap_index(dict_, order):
for pas_no in range(len(order)-1,0,-1):
for i in range(pas_no):
if order[i] > order[i+1]:
temp = order[i]
order[i] = order[i+1]
order[i+1] = temp
dict_ = deep_swap(dict_, i)
return dict_, order
def deep_swap(dict_, level):
dict_ = deepcopy(dict_)
if level==0:
dict_ = swap_two_level_dict(dict_)
else:
for key in dict_:
dict_[key] = deep_swap(dict_[key], level-1)
return dict_
def swap_two_level_dict(a):
b = defaultdict(dict)
for key1, value1 in a.items():
for key2, value2 in value1.items():
b[key2].update({key1: value2})
return b
e.g.
test_dict = {'a': {'c': {'e':0, 'f':1}, 'd': {'e':2,'f':3}}, 'b': {'c': {'g':4,'h':5}, 'd': {'j':6,'k':7}}}
result = dict_swap_index(test_dict, [2,0,1])
result
(defaultdict(dict,
{'c': defaultdict(dict,
{'e': {'a': 0},
'f': {'a': 1},
'g': {'b': 4},
'h': {'b': 5}}),
'd': defaultdict(dict,
{'e': {'a': 2},
'f': {'a': 3},
'j': {'b': 6},
'k': {'b': 7}})}),
[0, 1, 2])

Recurse through dictionary of dictionaries, always starting from the top

I have a dictionary of the following, of unknown hierarchical depth:
dict_of_dicts = {'a': {'b': {'c': {}, 'd': {}, 'e': {}}}, 'f': {'g': {}}}
I found the following useful for learning how to recurse this, but I have had trouble modifying the code to get what I want, which is a list of all the paths from the top hierarchal level to the dead ends.
The desired output is:
list = ['a,b,c', 'a,b,d', 'a,b,e', 'f,g']
To start approaching this, I used a DFS approach:
hierarchy = []
for parent in dict_of_dicts:
recurse_dicts(concepts, parent, hierarchy)
def recurse_dicts(concepts, parent, hierarchy):
hierarchy.append(parent)
for child in concepts[parents]:
if len(recurse[node][child].keys()) > 0:
recurse_dicts(recurse[node], child, hierarchy)
else:
return
This resulted in:
hierarchy = ['a', 'b', 'c', 'd', 'e']
which is something, but not quite what I wanted.
Assuming your values are always dictionaries, you could use:
def paths(d, path=(), res=None):
if res is None:
res = []
for key, value in d.iteritems():
if not value:
# end of the line, produce path
res.append(','.join(path + (key,)))
else:
# recurse down to find the end of this path
paths(value, path + (key,), res)
return res
This uses a shared list (produced on first call) to pass the resulting generated paths back to the caller, and builds a path for each recursion step, to add it to the result list whenever an empty value is encountered.
Demo:
>>> dict_of_dicts = {'a': {'b': {'c': {}, 'd': {}, 'e': {}}}, 'f': {'g': {}}}
>>> paths(dict_of_dicts)
['a,b,c', 'a,b,e', 'a,b,d', 'f,g']
The paths are not sorted, because dictionaries have no order; you can still sort on the keys if desired:
for key in sorted(d):
value = d[key]
instead of the for key, value in d.iteritems() loop.
Here's a recursive DFS procedure that keeps track of the path of each branch:
dict_of_dicts = {'a': {'b': {'c': {}, 'd': {}, 'e': {}}}, 'f': {'g': {}}}
def dfs(path, d):
if d == {}:
print path;
for item in d:
dfs(path+[item],d[item])
dfs([],dict_of_dicts)
Output:
['a', 'b', 'c']
['a', 'b', 'e']
['a', 'b', 'd']
['f', 'g']

Categories