My ultimate goal is to sum a list of values by summing each pair in the list, producing a smaller list, summing each of those pairs, and so on. For now, I have a list with an even number of elements, but eventually I want to be able to handle an arbitrary sized list.
With some help from previous posts, I have this function:
def collapse_sum(lst):
sums = list()
rems = list()
acc = 0
n = 2
print(lst)
print("final sum: {}\n".format(sum(lst)))
pairs = [lst[i:i + n] for i in range(0, len(lst), n)]
while len(pairs) > 0:
if len(pairs) % 2 == 1:
rems.append(pairs[-1])
del pairs[-1]
for a,b in pairs:
sums.append(a + b)
pairs = [sums[i:i + 2] for i in range(0,len(sums),n)]
sums = list()
if len(pairs) == 1:
for a,b in pairs:
acc += a + b
rems = [item for sublist in rems for item in sublist]
pairs = [rems[i:i + n] for i in range(0, len(rems), n)]
if len(pairs) == 1:
for a,b in pairs:
acc += a + b
del pairs[-1]
return acc
rems = list()
print(acc)
return acc
My ultimate goal is to sum a list of values by summing each pair in
the list, producing a smaller list, summing each of those pairs, and
so on.
Here's a solution that does that for even or odd-sized lists, without using any library dependencies.
# Helper function to split iterable obj into blocks/chunks
def blocks(obj, n):
for i in range(0, len(obj), n):
yield obj[i:i + n]
def collapse_sum(obj):
while len(obj) > 1:
obj = blocks(obj, 2)
obj = [sum(i) for i in obj]
return obj[0]
Some examples:
a = [1, 2, 3, 4, 5]
b = [1, 2, 3, 4]
collapse_sum(a)
15
collapse_sum(b)
10
You can visualize this here.
Related
I am trying to code to retrieve all the list which is the "deepest" within a list. I managed to retrieve only one of the list which is the deepest and I am unable to print out all if they have the same "depth". I tried hardcoding it by it will not work if there is 2 or more values of the same depth. Can anyone share how can I modify my code to make it work?
Input: [1,2,[[3]],[[4]],[[5]], [6]]
Output: [3],[4],[5]
def get_deepest(L):
def get_dpst(L, maxdepth):
deepest_list = [L, maxdepth]
for e in L:
is_list = isinstance(e,list)
if is_list:
rList = get_dpst(e, maxdepth+1)
if rList[1] > deepest_list[1]:
deepest_list = rList
elif rList [1] == deepest_list[1]:
deepest_list[1] = rList[0]
return deepest_list
rList = get_dpst(L, 0)
return rList[0]
print (get_deepest(my_list))
How about a pair of functions like this?
unnest is a recursive function that walks a tree of lists, yielding tuples of any non-list value and its depth
get_deepest_values uses unnest and keeps track of the deepest depth it has returned, gathering only those values.
def unnest(lst, depth=0):
for atom in lst:
if isinstance(atom, list):
yield from unnest(atom, depth + 1)
else:
yield (atom, depth)
def get_deepest_values(inp):
deepest_depth = 0
deepest_depth_values = []
for value, depth in unnest(inp):
if depth > deepest_depth:
deepest_depth = depth
deepest_depth_values.clear()
if depth == deepest_depth:
deepest_depth_values.append(value)
return (deepest_depth, deepest_depth_values)
inp = [1, 2, [[3]], [[4]], [[5, "foo"]], [6]]
deepest_depth, deepest_values = get_deepest_values(inp)
print(f"{deepest_depth=} {deepest_values=}")
The output is
deepest_depth=2 deepest_values=[3, 4, 5, 'foo']
I am trying to create a list of batches from an input generator function, but it doesn't yield the list I am expecting.
def batch_generator(items, batch_size):
new = []
i = 0
for item in items:
new.append(item)
i += 1
print('new: ', new, i)
if i == batch_size:
print('i = batch')
i = 0
yield new
new = []
def _test_items_generator():
for i in range(10):
yield i
print(list(map(lambda x: len(x),
batch_generator(_test_items_generator(), 3))))
I am trying to get the output to be [[0, 1, 2], [3, 4 ,5], [6, 7, 8], [9]]
The yield appears to be sending the batch_size instead of the information in the new list. Trying to get my head to understand how these generators work!
I think the problem lies in your last line:
print(list(map(lambda x: len(x),
batch_generator(_test_items_generator(), 3))))
batch_generator yields new which contains a list. Your map(lambda x: len(x) then returns the len of each list. You then print the list of lengths returned by map().
Here is the code that produces the output you expect:
def batch_generator(items, batch_size):
new = []
i = 0
for item in items:
new.append(item)
i += 1
print('new: ', new, i)
if i == batch_size:
print('i = batch')
i = 0
yield new
new = []
yield new # yield the last list even if it is smaller than batch size
def _test_items_generator():
for i in range(10):
yield i
print(list( batch_generator(_test_items_generator(), 3)))
You generator is working correctly. But in your test, you map the result lists to their size lambda x: len(x)
Another approach to the batch_generator function:
def batch_generator(items, batch_size):
current_batch = []
for i, item in enumerate(items):
current_batch.append(item)
if len(current_batch) == batch_size:
yield current_batch
current_batch = []
if len(current_batch) < batch_size:
yield current_batch
I'm new in python,
I have a list like : A=['a','b','c']
and a list like B=[['a','c'],['a','c'],['b','b']]
i want to have a list like C=[2,1,2]
C stores occurrence of sublists that each element of A comes in B
that means 'a' is in 2 sublists
'b' is in 1 sublist
and 'c' is in 2 sublists,
how can I achieve this?
thanks
You can use sum:
a=['a','b','c']
b=[['a','c'],['a','c'],['b','b']]
final_list = [sum(i in c for c in b) for i in a]
Output:
[2, 1, 2]
You can loop over b and update a collections.Counter for each sublist, using set to remove duplicates:
from collections import Counter
a = ['a','b','c']
b = [['a','c'],['a','c'],['b','b']]
counter = Counter()
for sublist in b:
counter.update(set(sublist))
c = [counter[x] for x in a]
# result: [2, 1, 2]
You can loop and compare in both lists
a=['a','b','c']
b=[['a','c'],['a','c'],['b','b']]
result = []
for letter in a:
count = 0
for l in b:
if letter in l:
count += 1
result.append(count)
You can try dict approach :
A=['a','b','c']
B=[['a','c'],['a','c'],['b','b']]
d={}
for i in A:
for j in B:
if i in j:
if i not in d:
d[i]=1
else:
d[i]+=1
print(d)
output:
{'c': 2, 'b': 1, 'a': 2}
You can use a list comprehension with sum to construct C.
C = [sum(elem in sub for sub in B) for elem in A]
This has the same effect as using nested for loops:
C = []
for elem in A:
sum = 0
for sub in B:
sum += elem in sub
C.append(sum)
Here is a solution with collections.defaultdict.
from collections import defaultdict
a = ['a','b','c']
b = [['a','c'],['a','c'],['b','b']]
# initialise defaultdict
d = defaultdict(int)
# convert to sets for performance
a_set = set(a)
b_sets = list(map(set, b))
# loop through list of sets
for item in b_sets:
for i in item & a_set:
d[i] += 1
# retrieve counts in correct order
res = list(map(d.get, a))
print(res)
# [2, 1, 2]
Performance note
This may not matter, but the performance differential is interesting as it shows clearly the Counter overhead (4x slower).
from collections import defaultdict, Counter
a = ['a','b','c']
b = [['a','c'],['a','c'],['b','b']]
b = b*100000
def dd(a, b):
d = defaultdict(int)
a_set = set(a)
b_sets = list(map(set, b))
for item in b_sets:
for i in item & a_set:
d[i] += 1
return list(map(d.get, a))
def counter(a, b):
counter = Counter()
for sublist in b:
counter.update(set(sublist))
return [counter[x] for x in a]
assert dd(a, b) == counter(a, b)
%timeit dd(a, b) # 414 ms
%timeit counter(a, b) # 1.65 s
Here is my code, but I want a better solution, how do you think about the problem?
def get_all_substrings(string):
length = len(string)
alist = []
for i in xrange(length):
for j in xrange(i,length):
alist.append(string[i:j + 1])
return alist
print get_all_substring('abcde')
The only improvement I could think of is, to use list comprehension like this
def get_all_substrings(input_string):
length = len(input_string)
return [input_string[i:j+1] for i in xrange(length) for j in xrange(i,length)]
print get_all_substrings('abcde')
The timing comparison between, yours and mine
def get_all_substrings(string):
length = len(string)
alist = []
for i in xrange(length):
for j in xrange(i,length):
alist.append(string[i:j + 1])
return alist
def get_all_substrings_1(input_string):
length = len(input_string)
return [input_string[i:j + 1] for i in xrange(length) for j in xrange(i,length)]
from timeit import timeit
print timeit("get_all_substrings('abcde')", "from __main__ import get_all_substrings")
# 3.33308315277
print timeit("get_all_substrings_1('abcde')", "from __main__ import get_all_substrings_1")
# 2.67816185951
can be done concisely with itertools.combinations
from itertools import combinations
def get_all_substrings_2(string):
length = len(string) + 1
return [string[x:y] for x, y in combinations(range(length), r=2)]
You could write it as a generator to save storing all the strings in memory at once if you don't need to
def get_all_substrings(string):
length = len(string)
for i in xrange(length):
for j in xrange(i + 1, length + 1):
yield(string[i:j])
for i in get_all_substrings("abcde"):
print i
you can still make a list if you really need one
alist = list(get_all_substrings("abcde"))
The function can be reduced to return a generator expression
def get_all_substrings(s):
length = len(s)
return (s[i: j] for i in xrange(length) for j in xrange(i + 1, length + 1))
Or of course you can change two characters to return a list if you don't care about memory
def get_all_substrings(s):
length = len(s)
return [s[i: j] for i in xrange(length) for j in xrange(i + 1, length + 1)]
I've never been fond of range(len(seq)), how about using enumerate and just using the index value:
def indexes(seq, start=0):
return (i for i,_ in enumerate(seq, start=start))
def gen_all_substrings(s):
return (s[i:j] for i in indexes(s) for j in indexes(s[i:], i+1))
def get_all_substrings(string):
return list(gen_all_substrings(string))
print(get_all_substrings('abcde'))
Python 3
s='abc'
list(s[i:j+1] for i in range (len(s)) for j in range(i,len(s)))
['a', 'ab', 'abc', 'b', 'bc', 'c']
Use itertools.permutations to generate all pairs of possible start and end indexes,
and filter out only those where the start index is less than then end index. Then
use these pairs to return slices of the original string.
from itertools import permutations
def gen_all_substrings(s):
lt = lambda pair: pair[0] < pair[1]
index_pairs = filter(lt, permutations(range(len(s)+1), 2))
return (s[i:j] for i,j in index_pairs)
def get_all_substrings(s):
return list(gen_all_substrings(s))
print(get_all_substrings('abcde'))
Another solution:
def get_all_substrings(string):
length = len(string)+1
return [string[x:y] for x in range(length) for y in range(length) if string[x:y]]
print get_all_substring('abcde')
Another solution using 2-D matrix approach
p = "abc"
a = list(p)
b = list(p)
c = list(p)
count = 0
for i in range(0,len(a)):
dump = a[i]
for j in range(0, len(b)):
if i < j:
c.append(dump+b[j])
dump = dump + b[j]
If you want to get the substrings sorted by the length:
s = 'abcde'
def allSubstrings(s: str) -> List[str]:
length = len(s)
mylist = []
for i in range(1, length+1):
for j in range(length-i+1):
mylist.append(s[j:j+i])
return mylist
print(allSubstrings(s))
['a', 'b', 'c', 'd', 'e', 'ab', 'bc', 'cd', 'de', 'abc', 'bcd', 'cde', 'abcd', 'bcde', 'abcde']
I try to sum a list of nested elements
e.g, numbers=[1,3,5,6,[7,8]] should produce sum=30
I wrote the following code :
def nested_sum(L):
sum=0
for i in range(len(L)):
if (len(L[i])>1):
sum=sum+nested_sum(L[i])
else:
sum=sum+L[i]
return sum
The above code gives following error:
object of type 'int' has no len()
I also tried len([L[i]]), still not working.
Anyone can help? It is Python 3.3
You need to use isinstance to check whether an element is a list or not. Also, you might want to iterate over the actual list, to make things simpler.
def nested_sum(L):
total = 0 # don't use `sum` as a variable name
for i in L:
if isinstance(i, list): # checks if `i` is a list
total += nested_sum(i)
else:
total += i
return total
One alternative solution with list comprehension:
>>> sum( sum(x) if isinstance(x, list) else x for x in L )
30
Edit:
And for lists with more than two levels(thx #Volatility):
def nested_sum(L):
return sum( nested_sum(x) if isinstance(x, list) else x for x in L )
It is generally considered more pythonic to duck type, rather than explicit type checking. Something like this will take any iterable, not just lists:
def nested_sum(a) :
total = 0
for item in a :
try:
total += item
except TypeError:
total += nested_sum(item)
return total
I would sum the flattened list:
def flatten(L):
'''Flattens nested lists or tuples with non-string items'''
for item in L:
try:
for i in flatten(item):
yield i
except TypeError:
yield item
>>> sum(flatten([1,3,5,6,[7,8]]))
30
A quick recursion that uses a lambda to handle the nested lists:
rec = lambda x: sum(map(rec, x)) if isinstance(x, list) else x
rec, applied on a list, will return the sum (recursively), on a value, return the value.
result = rec(a)
This code also works.
def add_all(t):
total = 0
for i in t:
if type(i) == list: # check whether i is list or not
total = total + add_all(i)
else:
total += i
return total
An example using filter and map and recursion:
def islist(x):
return isinstance(x, list)
def notlist(x):
return not isinstance(x, list)
def nested_sum(seq):
return sum(filter(notlist, seq)) + map(nested_sum, filter(islist, seq))
And here is an example using reduce and recursion
from functools import reduce
def nested_sum(seq):
return reduce(lambda a,b: a+(nested_sum(b) if isinstance(b, list) else b), seq)
An example using plain old recursion:
def nested_sum(seq):
if isinstance(seq[0], list):
head = nested_sum(seq[0])
else:
head = seq[0]
return head + nested_sum(seq[1:])
An example using simulated recursion:
def nested_sum(seq):
stack = []
stack.append(seq)
result = 0
while stack:
item = stack.pop()
if isinstance(item, list):
for e in item:
stack.append(e)
else:
result += item
return result
Adjustment for handling self-referential lists is left as an exercise for the reader.
def sum_nest_lst(lst):
t=0
for l in lst:
if(type(l)==int):
t=t+l
if(type(l)==list):
t=t+sum(l)
print(t)
def nnl(nl): # non nested list function
nn = []
for x in nl:
if type(x) == type(5):
nn.append(x)
if type(x) == type([]):
n = nnl(x)
for y in n:
nn.append(y)
return sum(nn)
print(nnl([[9, 4, 5], [3, 8,[5]], 6])) # output:[9,4,5,3,8,5,6]
a = sum(nnl([[9, 4, 5], [3, 8,[5]], 6]))
print (a) # output: 40
A simple solution would be to use nested loops.
def nested_sum(t):
sum=0
for i in t:
if isinstance(i, list):
for j in i:
sum +=j
else:
sum += i
return sum
L = [1, 2, 3, [4, 5, 6], 5, [7, 8, 9]]
total = 0 # assign any var
for a in L: # assign index and start to iterate using if else
if (isinstance(a, list)): # since its a list you are basically repeating the prev step
for b in a:
total += b
else:
total += a
print(total)
def list_sum(L):
return sum(list_sum(x) if isinstance(x, list) else x for x in L)
def nested_sum(lists):
total = 0
for lst in lists:
s = sum(lst)
total += s
return total
#nested sum
l = [[1, 2], [3,5], [6,2], [4, 5, 6,9]]
def nested_sum(lst):
sum = 0
for i in lst:
for j in i:
sum = sum + j
print(sum)
nested_sum(l)