How to convert nested dictionary 'values' to list? - python

I have tried this code:
dictt = {'a':{'d':[4,5,6]},'b':2,'c':3,'d':{'e':[7,8,9]}}
def dic(a):
lst = []
for i in a.values():
if type(i) is dict:
lst.append(dic(i))
#lst.append(i)
else:
lst.append(i)
return lst
o/p:
[[[4, 5, 6]], 2, 3, [[7, 8, 9]]]
Expected o/p:
[4,5,6,2,3,7,8,9]

You need to use list.extend to add multiple values in the list, and not the list itself, also handle list type
def dic(a):
lst = []
for i in a.values():
if isinstance(i, dict):
lst.extend(dic(i))
elif isinstance(i, list):
lst.extend(i)
else:
lst.append(i)
return lst
dictt = {'a': {'d': [4, 5, 6]}, 'b': 2, 'c': 3, 'd': {'e': [7, 8, 9]}}
x = dic(dictt)
print(x) # [4, 5, 6, 2, 3, 7, 8, 9]

I think this is the fastest way
from typing import Iterable
def flatten(items):
"""Yield items from any nested iterable; see Reference."""
for x in items:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
for sub_x in flatten(x):
yield sub_x
else:
yield x
f = lambda v: [f(x) for x in v.values()] if isinstance(v,dict) else v
list(flatten(f(dictt)))
# [4, 5, 6, 2, 3, 7, 8, 9]
flatten function from: How to make a flat list out of a list of lists

Try this:
dictis = {
'a':{
'd':[4,5,6]
}
,'b':2
,'c':3
,'d':{
'e':[7,8,9]
}
}
def value(obj):
for value in obj.values():
if isinstance(value, dict):
for value_list in value.values():
for value in value_list:
yield value
else:
yield value
listis = [x for x in value(dictis)]
print(listis)

Related

How to consolidate the function of local_sum (list of mixed numbers and sublists) in one loop?

Given a list containing numbers, and sublists. The goal of this function - local_sum() is to get all local sum (or range sum) with these conditions: 1) if it's a sublist, get its sum and remain a list, if consecutive numbers just add up them.
[Notes] a local variable just what's needed - enables keeping the running sum of sublists. Thanks.
Examples of in/outputs shown as following:
from itertools import groupby
A = [3, 4, 2, [10, 22, 32, 14], 9, 8, 6, [22, 11]]
expected = [9, 23, [111]
ans = [9, [78], 23, [33]] <---- currently getting
# is there a way to get this expected result in one shot, instead of doing another processing?
# my current working code:
def local_sum(L):
'''add up the numbers which are not in sublist,
and sum the sublist as a one unit, if there're more than one -
try to consolidate it into ONE.
'''
ans = []
for k, g in groupby(L, key=lambda x: isinstance(x, list)):
if k: # the group is a list
# the whole list :: sum(list(g))X
ans.append([sum(*g)]) # g is _grouper instance
else:
ans.append(sum(list(g))) # each single number
return ans
With only one example, it seems you want to keep a running total of everything in a sublist:
from itertools import groupby
A = [3, 4, 2, [10, 22, 32, 14], 9, 8, 6, [22, 11]]
def local_sum(L):
ans = []
sub = 0
for k, g in groupby(A, key=lambda x: isinstance(x, list)):
if k:
sub += sum(*g)
else:
ans.append(sum(list(g)))
ans.append([sub])
return ans
print(local_sum(A))
Output:
[9, 23, [111]]
Alternatively:
int_=0
int_list = []
for x in A:
if isinstance(x, int):
int_+=x
else:
int_list.append(int_)
int_=0
oneliner = [sum(map(lambda x: sum(x), filter(lambda x: isinstance(x, list), A)))]
int_list.append(oneliner)
Gives:
[9, 23, [111]]

How to group list of duplicate continuous value in a list with a recursion function?

I want to group consecutive values if it's duplicates and each value is just in one group, let's see my example below:
Note: results is an index of the value in test_list
test_list = ["1","2","1","2","1","1","5325235","2","62623","1","1"]
--->results = [[[0, 1], [2, 3]],
[[4, 5], [9, 10]]]
test_list = ["1","2","1","1","2","1","5325235","2","62623","1","2","1","236","2388","626236437","1","2","1","236","2388"]
--->results = [[[9, 10, 11, 12, 13], [15, 16, 17, 18, 19]],
[[0, 1, 2], [3, 4, 5]]]
I build a recursive function:
def group_duplicate_continuous_value(list_label_group):
# how to know which continuous value is duplicate, I implement take next number minus the previous number
list_flag_grouping = [str(int(j.split("_")[0]) - int(i.split("_")[0])) +f"_{j}_{i}" for i,j in zip(list_label_group,list_label_group[1:])]
# I find duplicate value in list_flag_grouping
counter_elements = Counter(list_flag_grouping)
list_have_duplicate = [k for k,v in counter_elements.items() if v > 1]
if len(list_have_duplicate) > 0:
list_final_index = group_duplicate_continuous_value(list_flag_grouping)
# To return exactly value, I use index to define
for k, v in list_final_index.items():
temp_list = [v[i] + [v[i][-1] + 1] for i in range(0,len(v))]
list_final_index[k] = temp_list
check_first_cursive = list_label_group[0].split("_")
# If we have many list grouping duplicate countinous value with different length, we need function below to return exactly results
if len(check_first_cursive) > 1:
list_temp_index = find_index_duplicate(list_label_group)
list_duplicate_index = list_final_index.values()
list_duplicate_index = [val for sublist in list_duplicate_index for val1 in sublist for val in val1]
for k,v in list_temp_index.items():
list_index_v = [val for sublist in v for val in sublist]
if any(x in list_index_v for x in list_duplicate_index) is False:
list_final_index[k] = v
return list_final_index
else:
if len(list_label_group) > 0:
check_first_cursive = list_label_group[0].split("_")
if len(check_first_cursive) > 1:
list_final_index = find_index_duplicate(list_label_group)
return list_final_index
list_final_index = None
return list_final_index
Support function:
def find_index_duplicate(list_data):
dups = defaultdict(list)
for i, e in enumerate(list_data):
dups[e].append([i])
new_dict = {key:val for key, val in dups.items() if len(val) >1}
return new_dict
But when I run with test_list = [5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1,2,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,1,2,5,5,5], it's very slow and make out of memory (~6GB). I knew a reason is stack overflow of my recursive function group_duplicate_continuous_value but I don't know how to fix it.
You can create a dict of lists, where every item from the original list is a key in the dict, and every key is mapped to the list of its indices in the original list. For instance, your list ["1","3","5","5","7","1","3","5"] would result in the dict {"1": [0, 5], "3": [1, 6], "5": [2, 3, 7], "7": [4]}.
Creating a dict of lists in this way is very idiomatic in python, and fast, too: it can be done by iterating just once on the list.
def build_dict(l):
d = {}
for i, x in enumerate(l):
d.setdefault(x, []).append(i)
return d
l = ["1","3","5","5","7","1","3","5"]
d = build_dict(l)
print(d)
# {'1': [0, 5], '3': [1, 6], '5': [2, 3, 7], '7': [4]}
Then you can iterate on the dict to build two lists of indices:
def build_index_results(l):
d = build_dict(l)
idx1, idx2 = [], []
for v in d.values():
if len(v) > 1:
idx1.append(v[0])
idx2.append(v[1])
return idx1, idx2
print(build_index_results(l))
# ([0, 1, 2], [5, 6, 3])
Or using zip:
from operator import itemgetter
def build_index_results(l):
d = build_dict(l)
return list(zip(*map(itemgetter(0,1), (v for v in d.values() if len(v) > 1))))
print(build_index_results(l))
# [(0, 1, 2), (5, 6, 3)]
I can't resist showcasing more_itertools.map_reduce for this:
from more_itertools import map_reduce
from operator import itemgetter
def build_index_results(l):
d = map_reduce(enumerate(l),
keyfunc=itemgetter(1),
valuefunc=itemgetter(0),
reducefunc=lambda v: v[:2] if len(v) > 1 else None
)
return list(zip(*filter(None, d.values())))
print(build_index_results(l))
# [(0, 1, 2), (5, 6, 3)]

How can I merge consecutive string items in a sublist in Python?

I want to remove special character from all the elements of a sublist in a list. How can I do so?
list = [['abch', 'def',1,11],['cdjje', 'fef',2,22],['oefg', 'pqkjbr',3,33]]
'abc', 'def' is one string and ', ' is supposed to be replaced with a space.
Output is expected to be:
list = [['abch def',1,11],['cdjje fef',2,22],['oefg pqkjbr',3,33]]
I am trying below code but it doesn't seem to work:
value_list=[]
for idx, item in enumerate(list):
value_list[idx] = re.sub(r"', '"," ", str(item[0:-2]))
print(value_list)
I hope this is what you are looking for. $ has special meaning in regex (should match the end of the string), therefore use backslash $.
output_list = l_list.copy()
for l_idx, l_item in enumerate(l_list):
for i_idx, i_item in enumerate(l_item):
if isinstance(i_item, str):
output_list[l_idx][i_idx] = re.sub(r"\$","", i_item)
print(output_list)
You can use
lst = [['abc', 'def',1,11],['cde', 'fef',2,22],['efg', 'pqr',3,33]]
result = []
for l in lst:
res = []
for i, s in enumerate(l):
if isinstance(s, str):
if len(res) > 0 and isinstance(res[i-1], str):
res[i-1] += f" {s}"
else:
res.append(s)
else:
res.append(s)
result.append(res)
print(result)
# => [['abc def', 1, 11], ['cde fef', 2, 22], ['efg pqr', 3, 33]]
See the Python demo. NOTE: Do not name your variables as built-ins, list is a Python built-in, and you can use other names for lists, like lst, l, etc.
This:
import re
thelist = [['abc$', 1, 11], ['cde$', 2, 22],['efg$', 3, 33]]
regex = re.compile(r'\$')
newlist = [[regex.sub('', x) if isinstance(x, str) else x for x in l] for l in thelist]
print(newlist)
Prints:
[['abc', 1, 11], ['cde', 2, 22], ['efg', 3, 33]]
For new question:
This:
thelists = [['abch', 'def', 1, 11], ['cdjje', 'fef', 2, 22], ['oefg', 'pqkjbr', 3, 33]]
newlist = []
for thelist in thelists:
tmplist = []
for item in thelist:
if len(tmplist) and isinstance(item, str) and isinstance(tmplist[-1], str):
tmplist[-1] += item
else:
tmplist.append(item)
newlist.append(tmplist)
print(newlist)
Prints:
[['abchdef', 1, 11], ['cdjjefef', 2, 22], ['oefgpqkjbr', 3, 33]]

Can anyone help me in handling ties in a python list while I try to replace it's elements with their ranks?

I have a list that looks something like this:
lst_A = [32,12,32,55,12,90,32,75]
I want to replace the numbers with their rank. I am using this function to do this:
def obtain_rank(lstC):
sort_data = [(x,i) for i,x in enumerate(lstC)]
sort_data = sorted(sort_data,reverse=True)
result = [0]*len(lstC)
for i,(_,idx) in enumerate(sort_data,1):
result[idx] = i
return result
I am getting the following output while I use this:
[6, 8, 5, 3, 7, 1, 4, 2]
But what I want from this is:
[4, 7, 5, 3, 8, 1, 6, 2]
How can I go about this?
Try this:
import pandas as pd
def obtain_rank(a):
s = pd.Series(a)
return [int(x) for x in s.rank(method='first', ascending=False)]
#[4, 7, 5, 3, 8, 1, 6, 2]
You could use 2 loops:
l = [32,12,32,55,12,90,32,75]
d = list(enumerate(sorted(l, reverse = True), start = 1))
res = []
for i in range(len(l)):
for j in range(len(d)):
if d[j][1] == l[i]:
res.append(d[j][0])
del d[j]
break
print(res)
#[4, 7, 5, 3, 8, 1, 6, 2]
Here you go. In case, you are not already aware, please read https://docs.python.org/3.7/library/collections.html to understand defaultdict and deque
from collections import defaultdict, deque
def obtain_rank(listC):
sorted_list = sorted(listC, reverse=True)
d = defaultdict(deque) # deque are efficient at appending/popping elements at both sides of the sequence.
for i, ele in enumerate(sorted_list):
d[ele].append(i+1)
result = []
for ele in listC:
result.append(d[ele].popleft()) # repeating numbers with lower rank will be the start of the list, therefore popleft
return result
Update: Without using defaultdict and deque
def obtain_rank(listC):
sorted_list = sorted(listC, reverse=True)
d = {}
for i, ele in enumerate(sorted_list):
d[ele] = d.get(ele, []) + [i + 1] # As suggested by Joshua Nixon
result = []
for ele in listC:
result.append(d[ele][0])
del d[ele][0]
return result

how to get dict value by regex in python

dict1={'s1':[1,2,3],'s2':[4,5,6],'a':[7,8,9],'s3':[10,11]}
how can I get all the value which key is with 's'?
like dict1['s*']to get the result is dict1['s*']=[1,2,3,4,5,6,10,11]
>>> [x for d in dict1 for x in dict1[d] if d.startswith("s")]
[1, 2, 3, 4, 5, 6, 10, 11]
or, if it needs to be a regex
>>> regex = re.compile("^s")
>>> [x for d in dict1 for x in dict1[d] if regex.search(d)]
[1, 2, 3, 4, 5, 6, 10, 11]
What you're seeing here is a nested list comprehension. It's equivalent to
result = []
for d in dict1:
for x in dict1[d]:
if regex.search(d):
result.append(x)
As such, it's a little inefficient because the regex is tested way too often (and the elements are appended one by one). So another solution would be
result = []
for d in dict1:
if regex.search(d):
result.extend(dict1[d])
>>> import re
>>> from itertools import chain
def natural_sort(l):
# http://stackoverflow.com/a/4836734/846892
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
return sorted(l, key = alphanum_key)
...
Using glob pattern, 's*':
>>> import fnmatch
def solve(patt):
keys = natural_sort(k for k in dict1 if fnmatch.fnmatch(k, patt))
return list(chain.from_iterable(dict1[k] for k in keys))
...
>>> solve('s*')
[1, 2, 3, 4, 5, 6, 10, 11]
Using regex:
def solve(patt):
keys = natural_sort(k for k in dict1 if re.search(patt, k))
return list(chain.from_iterable( dict1[k] for k in keys ))
...
>>> solve('^s')
[1, 2, 3, 4, 5, 6, 10, 11]
I tried below hope it help you, by getting the keys of dictionary and then if the first index of the key is your start with character then extend the list with the key dictionary list.
n='s' # your start with character
result=[] # your output
for d in dict1.keys():
if n == d[0]:
result.extend(dict1[d])
print result

Categories