Going through 2 lists with array data - python

This one is causing me a headache, and I am having trouble to find a solution with a for-loop.
Basically, my data looks like this:
short_list = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12] ]
long_list = [ [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [6, 7, 8, 9, 10], [9, 10, 11, 12, 13] ]
I would need to know how many times each number from each row in the short_list appears in each row of the long_list, and the comparison is NOT needed when both list indices are the same, because they come from the same data set.
Example: I need to know the occurrence of each number in [1, 2, 3] in the long_list rows [2, 3, 4, 5, 6], [6, 7, 8, 9, 10] and [9, 10, 11, 12, 13].
And then continue with the next data row in short_list, etc.

Here's one way to do it. It's straight off the top of my head, so there is probably a much better way to do it.
from collections import defaultdict
short_list = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12] ]
long_list = [ [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [6, 7, 8, 9, 10], [9, 10, 11, 12, 13] ]
occurrences = defaultdict(int)
for i, sl in enumerate(short_list):
for j, ll in enumerate(long_list):
if i != j:
for n in sl:
occurrences[n] += ll.count(n)
>>> occurrences
defaultdict(<class 'int'>, {1: 0, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 0, 8: 0, 9: 1, 10: 1, 11: 0, 12: 0})
Note that enumerate() is used to provide indices while iterating. The indices are compared to ensure that sub-lists at the same relative position are not compared.
The result is a dictionary keyed by items from the short list with the values being the total count of that item in the long list sans the sublist with the same index.

This is a brute-force solution. I've amended the input data to make the results more interesting:
from collections import Counter
from toolz import concat
short_list = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12] ]
long_list = [ [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [6, 7, 8, 9, 10], [2, 3, 11, 12, 13] ]
for idx, i in enumerate(short_list):
long_list_filtered = (x for x in concat(long_list[:idx] + long_list[idx+1:]) if x in set(i)))
print(idx, Counter(long_list_filtered))
# 0 Counter({2: 2, 3: 2})
# 1 Counter({4: 1, 5: 1, 6: 1})
# 2 Counter()
# 3 Counter({10: 1})

Possible Approach:
Loop over each list in short_list.
Flatten every list in long_list that is not the same index as the current list, and convert it to a set.
Create a collections.Counter() to store the counts for each element in short list that appears in the flattened list.
Demo:
from collections import Counter
from itertools import chain
short_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
long_list = [[1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [6, 7, 8, 9, 10], [9, 10, 11, 12, 13]]
for i, short_lst in enumerate(short_list):
to_check = set(chain.from_iterable(long_list[:i] + long_list[i+1:]))
print(Counter(x for x in short_lst if x in to_check))
Output:
Counter({2: 1, 3: 1})
Counter({4: 1, 5: 1, 6: 1})
Counter({9: 1})
Counter({10: 1})

for L1 in short_list:
for L2 in long_list:
if not set(L1).issubset(set(L2)):
for x in L1:
print("{} has {} occurrences in {}".format(x, L2.count(x), L2))

short_list = [ [1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12] ]
long_list = [ [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [6, 7, 8, 9, 10], [9, 10, 11, 12, 13] ]
occ = []
for si in short_list:
occi = []
for i, e in enumerate(si):
count = 0
for li in long_list:
for j, e1 in enumerate(li):
if i == j:
continue
elif e == e1:
count += 1
occi.append(count)
occ.append(occi)
print occ
This should work,
Happy coding :)

Related

How can I make this python function generate such a list [[1], [1, 2]...[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]?

all = []
def generate(i, current):
if i < 11:
current.append(i)
all.append(current)
i+= 1
generate(i, current)
generate(1, [])
print(all)
I want this function to generate
[[1], [1, 2]...[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
instead of
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]],
but don't know how to fix it.
Do you know the solution?
Here's my go:
def listGen(start, stop):
res = []
for i in range(start, stop+1):
res.append([x for x in range(start, i+1)])
return res
You could also simplify this to:
def listGen(start, stop):
return [[x for x in range(start, i+1)] for i in range(start, stop+1)]
Input: print(listGen(1, 10))
Output: [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
def generate_array():
result = []
for i in range(1, 11):
current_array = []
for j in range(1, i + 1):
current_array.append(j)
result.append(current_array)
return result
print(generate_array())
The code uses two nested for loops, where the outer loop iterates over range(1, 11) and the inner loop iterates over range(1, i + 1). The values of i and j are used to generate the sublists and append them to the result list, which is returned at the end of the function.
The core issue you have is that when you do:
all.append(current)
current is the exact same list all over the place so when you append to it in the prior line you effectively append to it everywhere. To fix that and the lightest change to your code you would append to copy of it.:
all = []
def generate(i, current):
if i < 11:
current.append(i)
all.append(current.copy()) ## <--- append a copy
i+= 1
generate(i, current)
generate(1, [])
print(all)
alternatively you could pass a copy like:
all = []
def generate(i, current):
if i < 11:
current.append(i)
all.append(current)
i+= 1
generate(i, current.copy()) ## <--- pass a copy
generate(1, [])
print(all)
In either case, the important part is that we get a distinct current to work with.
Note that the use of all as a variable clobbers the function all() and you might not want to do that. As I'm sure lots of others will point out, there are many ways to skin this cat.

Splitting list of elements without numpy array function [duplicate]

This question already has answers here:
How do I split a list into equally-sized chunks?
(66 answers)
Closed 1 year ago.
Example: I have a list:
[8, 3, 4, 1, 5, 9, 6, 7, 2]
And I need to make it look like this but without using numpy.array_split():
[[8, 3, 4], [1, 5, 9], [6, 7, 2]]
How can I do it? Not only for this one case, but when I have 4 elements, I want to have 2 and 2, (9 - 3,3,3 and 16 - 4,4,4,4) etc.
You can get the square root of the list's length then split it using a list comprehension. This will work for lists with the length of 4, 9, 16, ...:
lst = [8, 3, 4, 1, 5, 9, 6, 7, 2]
lst2 = [8, 3, 4, 1]
def split_equal(lst):
len_ = len(lst)
# returns emtpy list, if the list has no item.
if len_ == 0:
return []
n = int(len_ ** 0.5)
return [lst[i:i + n] for i in range(0, len_, n)]
output:
[[8, 3, 4], [1, 5, 9], [6, 7, 2]]
[[8, 3], [4, 1]]
You can use that:
def splitter(inlist):
n = len(inlist)
m = int(n ** 0.5)
if m*m != n:
raise Exception("")
return [[inlist[i+j] for j in range(m)] for i in range(m)]
print(splitter([8, 3, 4, 1]))
print(splitter([8, 3, 4, 1, 5, 9, 6, 7, 2]))
print(splitter([8, 3, 4, 1, 5, 9, 6, 7, 2, 8, 3, 4, 1, 5, 9, 6]))
Result:
[[8, 3], [3, 4]]
[[8, 3, 4], [3, 4, 1], [4, 1, 5]]
[[8, 3, 4, 1], [3, 4, 1, 5], [4, 1, 5, 9], [1, 5, 9, 6]]
Carefull, it will crash if the square of the len of input list is not integer.
def equal_array_split(arr, split_arr_len):
array_length = len(arr)
if array_length % split_arr_len == 0:
return [arr[i:i+split_arr_len] for i in range(0,array_length,split_arr_len)]
else:
return "Invalid split array length!!"
print(equal_array_split([1,2,3,4,5,6,7,8,9],3))
print(equal_array_split([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],4))
print(equal_array_split([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],8))
print(equal_array_split([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],2))
Output:
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
[[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16]]
[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]]
You can slice the list with a list comprehension. Assuming the input is a square number length:
import numpy as np
arr = [8, 3, 4, 1, 5, 9, 6, 7, 2]
n = int(np.sqrt(len(arr)))
result = [arr[i*n:(i+1)*n] for i in range(int(n))]
the split value being a square
list = [8, 3, 4, 1, 5, 9, 6, 7, 2]
result = []
N = split_value #split_value being the value required to split the list
for i in range(0,len(list),N):
result.append(list[i:i+N])
print(result)
Whitout numpy....
a = [8, 3, 4, 1, 5, 9, 6, 7, 2]
splitedSize = 3
a_splited = [a[x:x+splitedSize] for x in range(0, len(a), splitedSize)]
print(a_splited)

How to create a recursive separator?

I am trying to learn recursion and am separating odd and even values in two lists and merging them to another list as below:
Code:
def separateNumbers(L):
evenList = []
oddList = []
main = []
if len(L)==0:
return L
if L[0] % 2 == 0:
evenList.append(L[0])
separateNumbers(L[1:])
if L[0] % 2 == 1:
oddList.append(L[0])
separateNumbers(L[1:])
main.append(evenList)
main.append(oddList)
return main
inputList = [1,2,3,4,5,6,7,8,9,10]
L = separateNumbers(inputList)
print(L)
Input:
L = [1,2,3,4,5,6]
Output:
[[1,3,5], [2,4,6]]
The even and odd arrays reset everytime the recursive function is called, how can I fix this?
Tried with inner function:
def separateNumbers(L):
evenList = []
oddList = []
main = []
def inner(L):
if len(L)==0:
return L
if L[0] % 2 == 0:
evenList.append(L[0])
inner(L[1:])
if L[0] % 2 == 1:
oddList.append(L[0])
inner(L[1:])
main.append(evenList)
main.append(oddList)
return main
a = inner(L)
return a
Output:
[[2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8,
10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9], [2, 4, 6, 8, 10], [1, 3, 5, 7, 9]]
You don't need a nested function. try:
def separate_numbers(lst):
if not lst: # empty list
return [], []
odd, even = separate_numbers(lst[1:]) # recursion call
if lst[0] % 2: # if the first item is odd
return [lst[0], *odd], even
else: # if even
return odd, [lst[0], *even]
lst = [1,2,3,4,5,6,7,8,9,10]
print(separate_numbers(lst)) # ([1, 3, 5, 7, 9], [2, 4, 6, 8, 10])
The function calls itself using the tail part of the input list, receiving two lists: odd for odd numbers and even for even numbers. Then it returns those lists, after attaching the head element lst[0] to one of the lists.

How to sum values from two iterator "lists"?

I have two iterators, which consists of a "list" that looks something like this:
[[1, 2, 3, 4, 5, 6],
[2, 4, 6, 8, 10, 12],
[3, 5, 8, 6, 1, 19],
[5, 9, 1, 9, 4, 6]]
Or, that is what it will look like if I just ran a for loop over them.
The reason for the iterator and not a list per se is due to memory. The true lists/arrays are way larger, this is just an example.
What I need to do is take one list and sum the columns of each index inside the list for all "outside" indices and then add them together for both lists like sum(list1) + sum(list2).
So basically:
list1: list2:
[[1, 2, 3, 4, 5, 6], [[5, 4, 3, 2, 1, 9],
[2, 4, 6, 8, 10, 12], [6, 3, 8, 1, 1, 6],
[3, 5, 8, 6, 1, 19], [1, 3, 2, 8, 2, 3],
[5, 9, 1, 9, 4, 6]] [5, 2, 9, 4, 2, 5]]
=> =>
[11, 20, 18, 20, 43] [17, 12, 22, 15, 23]
=>
[28, 32, 40, 35, 66]
So I iterate over the two lists, and for each list I need to sum the columns, and then in the end at the columns of the final two lists into one combined list.
I know how to do this if it were just regular lists, but since this is iterators/generators (don't know the correct term) I am really not sure how it is done.
You can use this to sum each one without loading everything into memory:
def sumIter(iter):
result = [0, 0, 0, 0, 0, 0] #Assuming there are always 6 items in each sub-list
for list in iter:
result = [(result[i] + list[i]) for i in range(6)]
And then:
sum1 = sumIter(iter1)
sum2 = sumIter(iter2)
result = [(sum1[i] + sum2[i]) for i in range(6)]
Using zip
Ex:
l1 = [
[1, 2, 3, 4, 5, 6],
[2, 4, 6, 8, 10, 12],
[3, 5, 8, 6, 1, 19],
[5, 9, 1, 9, 4, 6]
]
l2 = [
[5, 4, 3, 2, 1, 9],
[6, 3, 8, 1, 1, 6],
[1, 3, 2, 8, 2, 3],
[5, 2, 9, 4, 2, 5]
]
l1 = (sum(i) for i in zip(*l1))
l2 = (sum(i) for i in zip(*l2))
print( [sum(i) for i in zip(l1, l2)] )
Output:
[28, 32, 40, 42, 26, 66]
Using reduce since row can be added in numpy array.
reduce is an build-in function in python2
import numpy as np
from functools import reduce # only in python3
def sumup(one_row, another_row):
return one_row + another_row
test_list = np.array([[1, 2, 3, 4, 5, 6],
[2, 4, 6, 8, 10, 12],
[3, 5, 8, 6, 1, 19],
[5, 9, 1, 9, 4, 6]])
reduce(sumup, test_list)
Output
array([11, 20, 18, 27, 20, 43])
using numpy.sum
import numpy as np
l1 = np.sum([[1, 2, 3, 4, 5, 6], [2, 4, 6, 8, 10, 12], [3, 5, 8, 6, 1, 19], [5, 9, 1, 9, 4, 6]], axis=0)
l2 = np.sum([[5, 4, 3, 2, 1, 9],[6, 3, 8, 1, 1, 6], [1, 3, 2, 8, 2, 3],[5, 2, 9, 4, 2, 5]], axis=0)
print(l1 + l2)
Output
[28 32 40 42 26 66]

python generate sublist with offset and condition

Hey I'm trying to generate sublists of a list. For example I've a list like this:
l = [1,2,3,4,5,6,7,8,9,10,11,12]
I want to split them in sublists with the length of 4. But to first element is the same like the last element from the previous list AND like I said it must have the length of 4. Like this:
l1 = [1,2,3,4]
l2 = [4,5,6,7]
l3 = [7,8,9,10]
l4 = [10, 11, 12] <-- should be ignored
Does someone has an idea?! I'm thinking about an generator but I'm not quite sure.
A simple but flexible generator implementation:
def overlapping_sublists(l, n, overlap=1, start=0):
while start <= len(l) - n:
yield l[start:start+n]
start += n - overlap
Example usage:
>>> l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
>>> list(overlapping_sublists(l, 4))
[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]
>>> list(overlapping_sublists(l, 4, 2, 3))
[[4, 5, 6, 7], [6, 7, 8, 9], [8, 9, 10, 11]]
a = []
l = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
for i in range(0, len(l)-3, 3):
a.append(l[i:i+4])
will give a = [[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]
or you can use as a list comprehension:
[l[i:i+4] for i in range(0, len(l)-3, 3)]
print([l[i:i+4] for i in range(0, len(l), 3)])
Output:
[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10], [10, 11, 12]]
Only sublists of length 4:
print([m for m in [l[i:i+4] for i in range(0, len(l), 3)] if len(m) == 4])
Output:
[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]
Using generators:
for n in (m for m in (l[i:i+4] for i in range(0, len(l), 3)) if len(m) == 4):
print(n)
Output:
[1, 2, 3, 4]
[4, 5, 6, 7]
[7, 8, 9, 10]

Categories