Consider an array Y of 0s and 1s. For example: Y = (0,1,1,0). I want to count the number of uninterrupted intervals of the 0s and 1s. In our example n0 = 2 and n1 = 1. I have a script which does the needed. It is not very elegant though. Does someone know a smoother or more pythonic version?
import pandas as pd
import numpy as np
# storage
counter = {}
# number of random draws
n = 10
# dataframe of random draw between 0 and 1
Y = pd.DataFrame(np.random.choice(2, n))
# where are the 0s and 1s
idx_0 = Y[Y[0] == 0].index
idx_1 = Y[Y[0] == 1].index
# count intervals of uninterrupted 0s
j = 0
for i in idx_0:
if i+1 < n:
if Y.loc[i+1, 0] == 1:
j += 1
else:
continue
if Y.loc[n-1, 0] == 0:
j += 1
counter['n_0'] = j
# count intervals of uninterrupted 1s
j = 0
for i in idx_1:
if i+1 < n:
if Y.loc[i+1, 0] == 0:
j += 1
else:
continue
if Y.loc[n-1, 0] == 1:
j += 1
counter['n_1'] = j
A more succinct solution taking advantage of pandas methods:
counter = Y[0][Y[0].diff() != 0].value_counts()
Y[0].diff() counts the difference between consecutive elements
diff != 0 marks the indices where the value changes
Y[idx].value_counts() counts the frequency of each value
Example result for 10 random elements, [0, 1, 1, 0, 1, 1, 1, 1, 1, 1]:
1 2
0 2
Name: 0, dtype: int64
If you insist having the keys as 'n_0' and 'n_1' instead, you can rename them with
counter = counter.rename(index={i: f'n_{i}' for i in range(2)})
You can also convert that to a dict with dict(counter), even though the pandas object has the same functionality with counter[key] giving you the respective value.
numbers = [0, 1, 1, 0]
def runs(x, numbers):
number_string = ''.join([str(n) for n in numbers])
return len([r for r in number_string.split('1' if x == 0 else '0') if r])
print(runs(0, numbers))
print(runs(1, numbers))
Update using dataframe:
import pandas as pd
import numpy as np
# storage
counter = {}
# number of random draws
n = 10
# dataframe of random draw between 0 and 1
Y = pd.DataFrame(np.random.choice(2, n))
print([v[0] for v in Y.values.tolist()])
def runs(x, numbers):
number_string = ''.join([str(n) for n in numbers])
return len([len(r) for r in number_string.split('1' if x == 0 else '0') if r])
values = [v[0] for v in Y.values.tolist()]
print(values)
print('Runs of 0: {}'.format(runs(0, values)))
print('Runs of 1: {}'.format(runs(1, values))
Related
I have a task to do:
a = [0,1,0,1,0,0,1,1,1,1,0]
I have the list - a - randomly generated each time the program runs.
Task 1: find the longest 1-row (here it is [1,1,1,1]) and output its starting index number.
Task 2: find 1,1 in a; how many times it occurs? 1,1,1 doesn't count, only exact matches are taken into account.
a = [1,0,0,1,1,0,1,1,1,1]
counter = 1
for i in range(len(a)):
if a[i] == 1:
a[i] = counter
counter += 1
print(a)
b = []
one_rows = []
for i in a:
if i > 0:
one_rows.append(i)
if i == 0:
b.append([one_rows])
one_rows.clear()
print(b)
If I've understood your question right, you can use can use itertools.groupby to group the list and count the number of 1s:
a = [0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0]
max_len, double_ones, max_idx = float("-inf"), 0, 0
for v, g in groupby(enumerate(a), lambda k: k[1]):
if v == 1:
idxs = [i for i, _ in g]
double_ones += len(idxs) == 2
if len(idxs) > max_len:
max_len = len(idxs)
max_idx = idxs[0]
print("Longest 1-row:", max_len, "Index:", max_idx)
print("How many 1,1:", double_ones)
Prints:
Longest 1-row: 4 Index: 6
How many 1,1: 0
I need to find the biggest sequence of zeros next to each other (up down left right).
for example in this example the function should return 6
mat = [[1,**0**,**0**,3,0],
[**0**,**0**,2,3,0],
[2,**0**,**0**,2,0],
[0,1,2,3,3],]
the zeros that i marked as bold should be the answer (6)
the solution should be implemented without any loop (using recursion)
this is what i tried so far
def question_3_b(some_list,index_cord):
y = index_cord[0]
x = index_cord[1]
list_of_nums = []
def main(some_list,index_cord):
y = index_cord[0]
x = index_cord[1]
def check_right(x,y):
if x + 1 < 0:
return 0
if some_list[y][x+1] == 0:
main(some_list,(y,x+1))
else:
return 0
def check_left(x,y):
if x -1 < 0:
return 0
if some_list[y][x - 1] == 0:
main(some_list,(y, x - 1))
def check_down(x,y):
if y + 1 < 0:
return 0
try:
if some_list[y + 1][x] == 0:
main(some_list,(y + 1, x))
except:
print("out of range")
def check_up(x,y):
counter_up = 0
if y - 1 < 0:
return 0
if some_list[y - 1][x] == 0:
counter_up += 1
main(some_list,(y - 1, x))
list_of_nums.append((x,y))
right = check_right(x,y)
down = check_down(x,y)
left = check_left(x,y)
up = check_up(x, y)
main(some_list,index_cord)
print(list_of_nums)
question_3_b(mat,(0,1))
Solution #1: classic BFS
As I mention in a comment, you can tackle this problem using BFS (Breadth First Search), it will be something like this:
# This function will give the valid adjacent positions
# of a given position according the matrix size (NxM)
def valid_adj(i, j, N, M):
adjs = [[i + 1, j], [i - 1, j], [i, j + 1], [i, j - 1]]
for a_i, a_j in adjs:
if 0 <= a_i < N and 0 <= a_j < M:
yield a_i, a_j
def biggest_zero_chunk(mat):
answer = 0
N, M = len(mat), len(mat[0])
# Mark all non zero position as visited (we are not instrested in them)
mask = [[mat[i][j] != 0 for j in range(M)] for i in range(N)]
queue = []
for i in range(N):
for j in range(M):
if mask[i][j]: # You have visited this position
continue
# Here comes the BFS
# It visits all the adjacent zeros recursively,
# count them and mark them as visited
current_ans = 1
queue = [[i,j]]
while queue:
pos_i, pos_j = queue.pop(0)
mask[pos_i][pos_j] = True
for a_i, a_j in valid_adj(pos_i, pos_j, N, M):
if mat[a_i][a_j] == 0 and not mask[a_i][a_j]:
queue.append([a_i, a_j])
current_ans += 1
answer = max(answer, current_ans)
return answer
mat = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,2,0],
[0,1,2,3,3],]
mat2 = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,0,0], # A slight modification in this row to connect two chunks
[0,1,2,3,3],]
print(biggest_zero_chunk(mat))
print(biggest_zero_chunk(mat2))
Output:
6
10
Solution #2: using only recursion (no for statements)
def count_zeros(mat, i, j, N, M):
# Base case
# Don't search zero chunks if invalid position or non zero values
if i < 0 or i >= N or j < 0 or j >= M or mat[i][j] != 0:
return 0
ans = 1 # To count the current zero we start at 1
mat[i][j] = 1 # To erase the current zero and don't count it again
ans += count_zeros(mat, i - 1, j, N, M) # Up
ans += count_zeros(mat, i + 1, j, N, M) # Down
ans += count_zeros(mat, i, j - 1, N, M) # Left
ans += count_zeros(mat, i, j + 1, N, M) # Right
return ans
def biggest_zero_chunk(mat, i = 0, j = 0, current_ans = 0):
N, M = len(mat), len(mat[0])
# Base case (last position of mat)
if i == N - 1 and j == M - 1:
return current_ans
next_j = (j + 1) % M # Move to next column, 0 if j is the last one
next_i = i + 1 if next_j == 0 else i # Move to next row if j is 0
ans = count_zeros(mat, i, j, N, M) # Count zeros from this position
current_ans = max(ans, current_ans) # Update the current answer
return biggest_zero_chunk(mat, next_i, next_j, current_ans) # Check the rest of mat
mat = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,2,0],
[0,1,2,3,3],]
mat2 = [[1,0,0,3,0],
[0,0,2,3,0],
[2,0,0,0,0], # A slight modification in this row to connect two chunks
[0,1,2,3,3],]
print(biggest_zero_chunk(mat.copy()))
print(biggest_zero_chunk(mat2.copy()))
Output:
6
10
Notes:
The idea behind this solution is still BFS (represented mainly in the count_zeros function). Also, if you are interested in using the matrix values after this you should call the biggest_zero_chunk with a copy of the matrix (because it is modified in the algorithm)
Is there a short way to detect the longest sublist with alternative signs within a list?
For instance:
my_list = [-1, -0.5, 1, -3, 4, 5, 5, -1]
returning 4 starting from -0.5 to 4?
This is what I have written so far but I feel there is room for something much shorter.
import numpy
my_list = [-1, -0.5, 1, -3, 4, 5, 5, -1]
# function that detects whether a list has alternate signs
# https://stackoverflow.com/questions/6451514/detect-alternating-signs
def is_alternating_signs(a):
return numpy.all(numpy.abs(numpy.diff(numpy.sign(a))) == 2)
# getting all sublists from the main list
sublists = []
for i in range(len(my_list) + 1):
for j in range(i + 1, len(my_list) + 1):
sublists.append(my_list[i:j])
# detecting the longest sublist with alternate signs
max_list = 0
for sublist in sublists:
if is_alternating_signs(sublist) and len(sublist) > max_list:
max_list = len(sublist)
print(max_list)
Use zip to compare the current element with the next one:
maxlen = 1
curlen = 1
for i, j in zip(l, l[1:]):
# if one conditions match
# increment curlen by 1
if (i < 0 and j > 0) or (i > 0 and j < 0):
curlen += 1
# break the alternative sign
# keep the highest value between maxlen and curlen
# reset curlen to 1
else:
maxlen = max(maxlen, curlen)
curlen = 1
maxlen = max(maxlen, curlen)
Output:
>>> maxlen
4
You can use zip to detect the positions of 'breaks' in the alternance. Then combine these breaks into ranges to find the longest streak of alternating values:
L = [-1, -0.5, 1, -3, 4, 5, 5, -1]
breaks = [i for i,(a,b) in enumerate(zip(L,L[1:]),1) if (a<0)==(b<0)]
longest = max((L[s:e] for s,e in zip([0]+breaks,breaks+[None])),key=len)
print(longest)
[-0.5, 1, -3, 4]
If you're only looking for the length of the streak, you could convert the zip result to a string of 1s and 0s, then split on 0s and measure the longest substring:
max(map(len,"".join("01"[a*b<0] for a,b in zip(L,L[1:])).split('0')))+1
4
What about a single loop?
def max_alt_subseq_size(seq):
last_x = seq[0]
size = max_size = 1
for x in seq[1:]:
# use the fact that x * y < 0 iff x > 0 and y < 0 or x < 0 and y > 0
if last_x * x < 0:
size += 1
else:
# once the size of the alternating subsequence is found, we need to check if it is the largest
if size > max_size:
max_size = size
size = 1
last_x = x
# check on the final subsequence to see if it is the largest
if size > max_size:
max_size = size
return max_size
my_list = [-1, -0.5, 1, -3, 4, 5, 5, -1]
max_alt_subseq_size(my_list)
# 4
One could have a (number of) fully vectorized approach.
The code below assumes a NumPy 1D array as input.
For example, if one computes the run-length encoding (RLE) in a vectorized fashion, it would be simple to use RLE information on some array that represents where the signs change to compute the desired value
import numpy as np
def rle(arr):
n = len(arr)
if n == 0:
values = np.empty(0, dtype=arr.dtype)
lengths = np.empty(0, dtype=np.int_)
else:
positions = np.concatenate(
[[-1], np.nonzero(arr[1:] != arr[:-1])[0], [n - 1]])
lengths = positions[1:] - positions[:-1]
values = arr[positions[1:]]
return values, lengths
def max_alt_rle(arr):
values, lengths = rle(arr[1:] * arr[:-1] < 0)
subs_lengths = lengths[values]
return (1 if len(arr) > 0 else 0) + \
(np.max(subs_lengths) if len(subs_lengths) > 0 else 0)
Alternatively, one could make good use of the richer functionalities available to Strings/Bytes, notably str.split() to craft a very short, vectorized, but not very efficient solution:
def max_alt_np(arr):
return (1 if len(arr) > 0 else 0) + \
len(max((arr[1:] * arr[:-1] < 0).tobytes().split(b'\x00')))
If one is after raw speed, accelerating with Numba the single loop solution would be most efficient and fast solution:
import numba as nb
#nb.jit
def max_alt_nb(arr):
if len(arr):
last_x = arr[0]
size = max_size = 1
for x in arr[1:]:
if last_x * x < 0:
size += 1
else:
if size > max_size:
max_size = size
size = 1
last_x = x
if size > max_size:
max_size = size
return max_size
else:
return 0
Finally, here is reported an adaptation of the currently accepted answer, which is neither efficient nor fast, but it is relatively compact (but not as compact as max_alt_np and considerably slower) and can use lists without prior conversion to a NumPy array:
def max_alt_str(arr):
return (1 if len(arr) > 0 else 0) + len(max(
("".join(
"01"[1 if a * b < 0 else 0]
for a, b in zip(arr[:-1], arr[1:])))
.split("0")))
Here some benchmarks on random integer arrays of varying size:
(Full analysis here).
You are given the following array A, We need to calculate the total number of sub-arrays with XOR sum X were, The sub-array should satisfy the conditions (X+1) = (X^1). Here is my solution,
def getTotalXorOfSubarrayXors(arr, N):
X = 0
count = 0
for i in range(0, N):
for j in range(i, N):
for k in range(i, j + 1):
X = X ^ arr[k]
if X+1 == X^1:
count +=1
X = 0
return count
arr = [3, 5, 2, 4, 6]
N = len(A)
print(getTotalXorOfSubarrayXors(A, N))
But this solution has a time complexity of O(n^3) which exceeds my time limit for a large set of arrays. Is there is any way I can optimize this code to have less time complexity?
The condition (X+1) = (X^1) just means X must be even. So just count the even xors by using prefix-xor-counts. Takes O(n) time and O(1) space.
def getTotalXorOfSubarrayXors(A, _):
X = 0
counts = [1, 0]
total = 0
for a in A:
X ^= a & 1
total += counts[X]
counts[X] += 1
return total
Try it online! (with tests)
Operation X ^ 1 changes the last bit of a number. So ****1 becomes ****0 and vice versa.
So we can see that for odd values of X value of X ^ 1 is less than X, but for even X's value X ^ 1 is larger by one than X - just what we need.
Now we can count subarrays with even xor-sum. Note that we remember how many odd and even xorsums we already have for subarrays starting from zero index:
def Xors(arr, N):
oddcnt = 0
evencnt = 0
res = 0
x = 0
for p in arr:
x ^= p
if (x % 2):
res += oddcnt
oddcnt += 1
else:
evencnt += 1
res += evencnt
return res
I am practicing and trying to write O(n^2) program that tests whether there are two 1s lying on the same row or the same column in A. Where A = n x n matrix of 0s and 1s.
Given A as:
I should get answer return of 2 matches.
One is on the 1st row, and another on the 3rd column.
My 2nd Attempt:
def testLines():
count = 0
for x in range( 0, len(A)-1 ):
if( (A[x] == 1) & (A[x+1] == 1) ):
count+=1
for y in range( 0, len(A)-1):
if( (A[y] == 1 & A[y+1]) == 1 ):
count+=1
print( count, '1s has been matched in the array A')
testLines()
You want to nest the two loops and change the indexes so that both x and y are parsed. Currently your code moves through (all x, y = 0) and (x = 0, all y).
A = [[0, 0, 1, 1],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 1, 0]]
def testLines():
count = 0
N = len(A)
for x in range(N):
for y in range(N):
if A[x][y] == 1:
if x+1 < N and A[x+1][y] == 1:
count += 1
if y+1 < N and A[x][y+1] == 1:
count += 1
print(count, '1s has been matched in the array A')
testLines()
Alternatively, you can go the Schwarzenegger way and not check if (x+1, y) or (x, y+1) even exist. That will raise IndexErrors that you can choose to ignore.
def testLines():
count = 0
N = len(A)
for x in range(N):
for y in range(N):
try:
if A[x][y] == 1 and A[x+1][y] == 1 or A[x][y+1] == 1:
count += 1
except IndexError:
continue
print(count, '1s has been matched in the array A')
You can run one nested loop (n²) to get summation of rows. If summation is 2 then that row has two 1s.
Now interchange rows and columns(consider rows as columns & vice versa).
Again run nested loop (n²) to check summation of columns.
n²+n²= O(n²)