Related
I'm trying to automate a trading strategy which should enter/exit a long position when the current price is the minimum/maximum among the previous k prices.
The result should contain 1 if the current number is maximum among previous k numbers, -1 if it is the minimum and 0 if none of the conditions are true.
For example if k = 3 and the numpyp array = [1, 2, 3, 2, 1, 6], the result should be an array like:
[0, 0, 1, 0, -1, 1].
I tried the numpy's max function but don't know how to take into account the previous k numbers instead of fixed index and how to switch to default condition for the first k - 1 numbers which should be 0 since there are not k number available to compare them with.
I will use Pandas
import pandas as pd
array = [1, 2, 3, 2, 1, 6]
df = pd.DataFrame(array)
df['rolling_max'] = df[0].rolling(3).max()
df['rolling_min'] = df[0].rolling(3).min()
df['result'] = df.apply(lambda row: 1 if row[0] == row['rolling_max'] else (-1 if row[0] == row['rolling_min'] else 0), axis=1)
Here is a solution with numpy using numpy.lib.stride_tricks.sliding_window_view, which was introduced in version 1.20.0.
Note that this solution (like the one proposed by #Hanwei Tang) does not exactly yield the result you was looking for, because in the second window ([2, 3, 2]) 2 is the minimum value and thus a -1 is returned instead of zero (what you requested). But maybe you should rethink whether you really want a zero for the second window or a -1.
EDIT: If a windows only contains same numbers, i.e. the minimum and maximum are the same, this method returns a zero.
import numpy as np
def rolling_max(a, wsize):
windows = np.lib.stride_tricks.sliding_window_view(a, wsize)
return np.max(windows, axis=-1)
def rolling_min(a, wsize):
windows = np.lib.stride_tricks.sliding_window_view(a, wsize)
return np.min(windows, axis=-1)
def check_prize(a, wsize):
rmax = rolling_max(a, wsize)
rmin = rolling_min(a, wsize)
ismax = np.where(a[wsize-1:] == rmax, 1, 0)
ismin = np.where(a[wsize-1:] == rmin, -1, 0)
result = np.zeros_like(a)
result[wsize-1:] = ismax + ismin
return result
a = np.array([1, 2, 3, 2, 1, 6])
check_prize(a, wsize=3)
# Output:
# array([ 0, 0, 1, -1, -1, 1])
b = np.array([1, 2, 4, 3, 1, 6])
check_prize(b, wsize=3)
# Output:
# array([ 0, 0, 1, 0, -1, 1])
c = np.array([1, 2, 2, 2, 1, 6])
check_prize(c, wsize=3)
# Output:
# array([ 0, 0, 1, 0, -1, 1])
Another approach using sliding_window_view with pad:
from numpy.lib.stride_tricks import sliding_window_view as swv
k = 3
a = np.array([1, 2, 3, 2, 1, 6])
# create sliding window
v = swv(np.pad(a.astype(float), (k-1, 0), constant_values=np.nan), k)
# compare each element to min/max of sliding window
out = np.select([np.max(v, 1)==a, np.min(v, 1)==a], [1, -1], 0)
Output: array([ 0, 0, 1, -1, -1, 1])
Let's say I have a NumPy array:
x = np.array([0, 1, 2, 0, 4, 5, 6, 7, 0, 0])
At each index, I want to find the distance to nearest zero value. If the position is a zero itself then return zero as a distance. Afterward, we are only interested in distances to the nearest zero that is to the right of the current position. The super naive approach would be something like:
out = np.full(x.shape[0], x.shape[0]-1)
for i in range(x.shape[0]):
j = 0
while i + j < x.shape[0]:
if x[i+j] == 0:
break
j += 1
out[i] = j
And the output would be:
array([0, 2, 1, 0, 4, 3, 2, 1, 0, 0])
I'm noticing a countdown/decrement pattern in the output in between the zeros. So, I might be able to do use the locations of the zeros (i.e., zero_indices = np.argwhere(x == 0).flatten())
What is the fastest way to get the desired output in linear time?
Approach #1 : Searchsorted to the rescue for linear-time in a vectorized manner (before numba guys come in)!
mask_z = x==0
idx_z = np.flatnonzero(mask_z)
idx_nz = np.flatnonzero(~mask_z)
# Cover for the case when there's no 0 left to the right
# (for same results as with posted loop-based solution)
if x[-1]!=0:
idx_z = np.r_[idx_z,len(x)]
out = np.zeros(len(x), dtype=int)
idx = np.searchsorted(idx_z, idx_nz)
out[~mask_z] = idx_z[idx] - idx_nz
Approach #2 : Another with some cumsum -
mask_z = x==0
idx_z = np.flatnonzero(mask_z)
# Cover for the case when there's no 0 left to the right
if x[-1]!=0:
idx_z = np.r_[idx_z,len(x)]
out = idx_z[np.r_[False,mask_z[:-1]].cumsum()] - np.arange(len(x))
Alternatively, last step of cumsum could be replaced by repeat functionality -
r = np.r_[idx_z[0]+1,np.diff(idx_z)]
out = np.repeat(idx_z,r)[:len(x)] - np.arange(len(x))
Approach #3 : Another with mostly just cumsum -
mask_z = x==0
idx_z = np.flatnonzero(mask_z)
pp = np.full(len(x), -1)
pp[idx_z[:-1]] = np.diff(idx_z) - 1
if idx_z[0]==0:
pp[0] = idx_z[1]
else:
pp[0] = idx_z[0]
out = pp.cumsum()
# Handle boundary case and assigns 0s at original 0s places
out[idx_z[-1]:] = np.arange(len(x)-idx_z[-1],0,-1)
out[mask_z] = 0
You could work from the other side. Keep a counter on how many non zero digits have passed and assign it to the element in the array. If you see 0, reset the counter to 0
Edit: if there is no zero on the right, then you need another check
x = np.array([0, 1, 2, 0, 4, 5, 6, 7, 0, 0])
out = x
count = 0
hasZero = False
for i in range(x.shape[0]-1,-1,-1):
if out[i] != 0:
if not hasZero:
out[i] = x.shape[0]-1
else:
count += 1
out[i] = count
else:
hasZero = True
count = 0
print(out)
You can use the difference between the indices of each position and the cumulative max of zero positions to determine the distance to the preceding zero. This can be done forward and backward. The minimum between forward and backward distance to the preceding (or next) zero will be the nearest:
import numpy as np
indices = np.arange(x.size)
zeroes = x==0
forward = indices - np.maximum.accumulate(indices*zeroes) # forward distance
forward[np.cumsum(zeroes)==0] = x.size-1 # handle absence of zero from edge
forward = forward * (x!=0) # set zero positions to zero
zeroes = zeroes[::-1]
backward = indices - np.maximum.accumulate(indices*zeroes) # backward distance
backward[np.cumsum(zeroes)==0] = x.size-1 # handle absence of zero from edge
backward = backward[::-1] * (x!=0) # set zero positions to zero
distZero = np.minimum(forward,backward) # closest distance (minimum)
results:
distZero
# [0, 1, 1, 0, 1, 2, 2, 1, 0, 0]
forward
# [0, 1, 2, 0, 1, 2, 3, 4, 0, 0]
backward
# [0, 2, 1, 0, 4, 3, 2, 1, 0, 0]
Special case where no zeroes are present on outer edges:
x = np.array([3, 1, 2, 0, 4, 5, 6, 0,8,8])
forward: [9 9 9 0 1 2 3 0 1 2]
backward: [3 2 1 0 3 2 1 0 9 9]
distZero: [3 2 1 0 1 2 1 0 1 2]
also works with no zeroes at all
[EDIT] non-numpy solutions ...
if you're looking for an O(N) solution that doesn't require numpy, you can apply this strategy using the accumulate function from itertools:
x = [0, 1, 2, 0, 4, 5, 6, 7, 0, 0]
from itertools import accumulate
maxDist = len(x) - 1
zeroes = [maxDist*(v!=0) for v in x]
forward = [*accumulate(zeroes,lambda d,v:min(maxDist,(d+1)*(v!=0)))]
backward = accumulate(zeroes[::-1],lambda d,v:min(maxDist,(d+1)*(v!=0)))
backward = [*backward][::-1]
distZero = [min(f,b) for f,b in zip(forward,backward)]
print("x",x)
print("f",forward)
print("b",backward)
print("d",distZero)
output:
x [0, 1, 2, 0, 4, 5, 6, 7, 0, 0]
f [0, 1, 2, 0, 1, 2, 3, 4, 0, 0]
b [0, 2, 1, 0, 4, 3, 2, 1, 0, 0]
d [0, 1, 1, 0, 1, 2, 2, 1, 0, 0]
If you don't want to use any library, you can accumulate the distances manually in a loop:
x = [0, 1, 2, 0, 4, 5, 6, 7, 0, 0]
forward,backward = [],[]
fDist = bDist = maxDist = len(x)-1
for f,b in zip(x,reversed(x)):
fDist = min(maxDist,(fDist+1)*(f!=0))
forward.append(fDist)
bDist = min(maxDist,(bDist+1)*(b!=0))
backward.append(bDist)
backward = backward[::-1]
distZero = [min(f,b) for f,b in zip(forward,backward)]
print("x",x)
print("f",forward)
print("b",backward)
print("d",distZero)
output:
x [0, 1, 2, 0, 4, 5, 6, 7, 0, 0]
f [0, 1, 2, 0, 1, 2, 3, 4, 0, 0]
b [0, 2, 1, 0, 4, 3, 2, 1, 0, 0]
d [0, 1, 1, 0, 1, 2, 2, 1, 0, 0]
My first intuition would be to use slicing. If x can be a normal list instead of a numpy array, then you could use
out = [x[i:].index(0) for i,_ in enumerate(x)]
if numpy is necessary then you can use
out = [np.where(x[i:]==0)[0][0] for i,_ in enumerate(x)]
but this is less efficient because you are finding all zero locations to the right of the value and then pulling out just the first. Almost definitely a better way to do this in numpy.
Edit: I am sorry, I misunderstood. This will give you the distance to the nearest zeros - may it be at left or right. But you can use d_right as intermediate result. This does not cover the edge case of not having any zero to the right though.
import numpy as np
x = np.array([0, 1, 2, 0, 4, 5, 6, 7, 0, 0])
# Get the distance to the closest zero from the left:
zeros = x == 0
zero_locations = np.argwhere(x == 0).flatten()
zero_distances = np.diff(np.insert(zero_locations, 0, 0))
temp = x.copy()
temp[~zeros] = 1
temp[zeros] = -(zero_distances-1)
d_left = np.cumsum(temp) - 1
# Get the distance to the closest zero from the right:
zeros = x[::-1] == 0
zero_locations = np.argwhere(x[::-1] == 0).flatten()
zero_distances = np.diff(np.insert(zero_locations, 0, 0))
temp = x.copy()
temp[~zeros] = 1
temp[zeros] = -(zero_distances-1)
d_right = np.cumsum(temp) - 1
d_right = d_right[::-1]
# Get the smallest distance from both sides:
smallest_distances = np.min(np.stack([d_left, d_right]), axis=0)
# np.array([0, 1, 1, 0, 1, 2, 2, 1, 0, 0])
With
input = [0,0,5,9,0,4,10,3,0]
as list
I need an output, which is going to be two highest values in input while setting other list elements to zero.
output = [0,0,0,9,0,0,10,0,0]
The closest I got:
from itertools import compress
import numpy as np
import operator
input= [0,0,5,9,0,4,10,3,0]
top_2_idx = np.argsort(test)[-2:]
test[top_2_idx[0]]
test[top_2_idx[1]]
Can you please help?
You can sort, find the two largest values, and then use a list comprehension:
input = [0,0,5,9,0,4,10,3,0]
*_, c1, c2 = sorted(input)
result = [0 if i not in {c1, c2} else i for i in input]
Output:
[0, 0, 0, 9, 0, 0, 10, 0, 0]
Not as pretty as Ajax's solution but a O(n) solution and a little more dynamic:
from collections import deque
def zero_non_max(lst, keep_top_n):
"""
Returns a list with all numbers zeroed out
except the keep_top_n.
>>> zero_non_max([0, 0, 5, 9, 0, 4, 10, 3, 0], 3)
>>> [0, 0, 5, 9, 0, 0, 10, 0, 0]
"""
lst = lst.copy()
top_n = deque(maxlen=keep_top_n)
for index, x in enumerate(lst):
if len(top_n) < top_n.maxlen or x > top_n[-1][0]:
top_n.append((x, index))
lst[index] = 0
for val, index in top_n:
lst[index] = val
return lst
lst = [0, 0, 5, 9, 0, 4, 10, 3, 0]
print(zero_non_max(lst, 2))
Output:
[0, 0, 0, 9, 0, 0, 10, 0, 0]
Pure numpy approach:
import numpy as np
arr = np.array([0, 0, 5, 9, 0, 4, 10, 3, 0])
top_2_idx = np.argsort(arr)[-2:]
np.put(arr, np.argwhere(~np.isin(arr, arr[top_2_idx])), 0)
print(arr)
The output:
[ 0 0 0 9 0 0 10 0 0]
Numpy.put
It's possible to achieve this with a single list traversal, making the algorithm O(n):
First find the two highest values with a single traversal;
Then create a list of zeros and add in the found maxima.
Code
def two_max(lst):
# Find two highest values in a single traversal
max_i, max_j = 0, 1
for i in range(len(lst)):
_, max_i, max_j = sorted((max_i, max_j, i), key=lst.__getitem__)
# Make a new list with zeros and replace both maxima
new_lst = [0] * len(lst)
new_lst[max_i], new_lst[max_j] = lst[max_i], lst[max_j]
return new_lst
lst = [0, 0, 5, 9, 0, 4, 10, 3, 0]
print(two_max(lst)) # [0, 0, 0, 9, 0, 0, 10, 0, 0]
Note that if the maximum value in the list appears more than twice, only the two left-most values will appear.
As a sidenote, do not use names such as input in your code as this overshadows the built-in function of the same name.
Here is another numpy-based solution that avoids sorting the entire array, which takes O(nlogn) time.
import numpy as np
arr = np.array([0,0,5,9,0,4,10,3,0])
arr[np.argpartition(arr,-2)[:-2]] = 0
If you want to create a new array as output:
result = np.zeros_like(arr)
idx = np.argpartition(arr,-2)[-2:]
result[idx] = arr[idx]
A corresponding Python-native solution is to use heap.nlargest, which also avoids sorting the entire array.
import heapq
arr = [0,0,5,9,0,4,10,3,0]
l = len(arr)
idx1, idx2 = heapq.nlargest(2, range(l), key=arr.__getitem__)
result = [0] * l
result[idx1] = arr[idx1]
result[idx2] = arr[idx2]
I'm completely at a loss on trying to figure out how to get the individual numbers inside in a list
Here is my code:
infinity = 1000000
invalid_node = -1
class Node:
previous = invalid_node
distFromSource = infinity
visited = False
def populateNetwork(fileName):
network = []
networkFile = open(fileName, "r")
for line in networkFile:
network.append(map(int, line.strip().split(',')))
return network
def populateNodeTable(network, StartNode):
nodeTable = []
for node in network:
nodeTable.append(Node())
nodeTable[StartNode].distFromSource = 0
nodeTable[StartNode].visited = True
return nodeTable
network = populateNetwork('network.txt')
nodeTable = populateNodeTable(network, 1)
nodeTable2 = populateNodeTable(network, 2)
print "Visited Nodes"
for node in nodeTable:
print node.previous, node.distFromSource, node.visited
print
print "This is what is inside network"
for line in network:
print line
print
print "what is inside index 6"
print network[6]
Here is the output:
Visited Nodes
-1 1000000 False
-1 0 True
-1 1000000 False
-1 1000000 False
-1 1000000 False
-1 1000000 False
-1 1000000 False
This is what is inside network
[0, 2, 4, 1, 6, 0, 0]
[2, 0, 0, 0, 5, 0, 0]
[4, 0, 0, 0, 0, 5, 0]
[1, 0, 0, 0, 1, 1, 0]
[6, 5, 0, 1, 0, 5, 5]
[0, 0, 5, 1, 5, 0, 0]
[0, 0, 0, 0, 5, 0, 0]
what is inside index 6
[0, 0, 0, 0, 5, 0, 0]
My question is, how do I get the individual numbers inside an index to be used to calculate? So for example index[1] contains "0, 2, 4, 1, 6, 0, 0" and I am going to use these numbers to do addition so 0+2+4+1+6+0+0 = 13. I'm really confused.
print network[0] # 0, 2, 4, 1, 6, 0, 0
print network[0][0] # 0
print network[0][1] # 2
print network[0][2] # 4
for x in network[0]:
print x
# 0
# 2
# 4
# 1
# 6
# 0
# 0
print sum(network[0]) # 13
I have looked at the following prim's algorithm (in order to create a minimum spanning tree) and I am unsure as to what the input value s in the following code is, I think the G of course would be the graph sent (adjacency matrix or list graphs) and I think the value s is where the start should be? Also if it is the start then in what way would you send a starting value to the following algorithm?:
from heapq import heappop, heappush
def prim(self, G, s):
P, Q = {}, [(0, None, s)]
while Q:
_, p, u = heappop(Q)
if u in P: continue
P[u] = p
for v, w in G[u].items():
heappush(Q, (w, u, v))
return P
Any help will be much appreciated, thank you!
Here you are:
#A = adjacency matrix, u = vertex u, v = vertex v
def weight(A, u, v):
return A[u][v]
#A = adjacency matrix, u = vertex u
def adjacent(A, u):
L = []
for x in range(len(A)):
if A[u][x] > 0 and x <> u:
L.insert(0,x)
return L
#Q = min queue
def extractMin(Q):
q = Q[0]
Q.remove(Q[0])
return q
#Q = min queue, V = vertex list
def decreaseKey(Q, K):
for i in range(len(Q)):
for j in range(len(Q)):
if K[Q[i]] < K[Q[j]]:
s = Q[i]
Q[i] = Q[j]
Q[j] = s
#V = vertex list, A = adjacency list, r = root
def prim(V, A, r):
u = 0
v = 0
# initialize and set each value of the array P (pi) to none
# pi holds the parent of u, so P(v)=u means u is the parent of v
P=[None]*len(V)
# initialize and set each value of the array K (key) to some large number (simulate infinity)
K = [999999]*len(V)
# initialize the min queue and fill it with all vertices in V
Q=[0]*len(V)
for u in range(len(Q)):
Q[u] = V[u]
# set the key of the root to 0
K[r] = 0
decreaseKey(Q, K) # maintain the min queue
# loop while the min queue is not empty
while len(Q) > 0:
u = extractMin(Q) # pop the first vertex off the min queue
# loop through the vertices adjacent to u
Adj = adjacent(A, u)
for v in Adj:
w = weight(A, u, v) # get the weight of the edge uv
# proceed if v is in Q and the weight of uv is less than v's key
if Q.count(v)>0 and w < K[v]:
# set v's parent to u
P[v] = u
# v's key to the weight of uv
K[v] = w
decreaseKey(Q, K) # maintain the min queue
return P
A = [ [0, 4, 0, 0, 0, 0, 0, 8, 0],
[4, 0, 8, 0, 0, 0, 0, 11, 0],
[0, 8, 0, 7, 0, 4, 0, 0, 2],
[0, 0, 7, 0, 9, 14, 0, 0, 0],
[0, 0, 0, 9, 0, 10, 0, 0, 0],
[0, 0, 4, 14, 10, 0, 2, 0, 0],
[0, 0, 0, 0, 0, 2, 0, 1, 6],
[8, 11, 0, 0, 0, 0, 1, 0, 7],
[0, 0, 2, 0, 0, 0, 6, 7, 0]]
V = [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ]
P = prim(V, A, 0)
print P
[None, 0, 5, 2, 3, 6, 7, 0, 2]
G is the graph or the adjacency matrix and s is any random starting node which u can give , it does not matter which of the node you choose