Torch code not running on GPU - python
I am running an optimization problem in torch. My torch installation is GPU compatible but for some odd reason it does not use the GPU at all when running. Everything seems to be done by the CPU and my local RAM.
import numpy as np
import scipy.sparse.csgraph as csg
import torch
from torch.autograd import Variable
import torch.autograd as autograd
import matplotlib.pyplot as plt
%matplotlib inline
def cmdscale(D):
# Number of points
n = len(D)
# Centering matrix
H = np.eye(n) - np.ones((n, n))/n
# YY^T
B = -H.dot(D**2).dot(H)/2
# Diagonalize
evals, evecs = np.linalg.eigh(B)
# Sort by eigenvalue in descending order
idx = np.argsort(evals)[::-1]
evals = evals[idx]
evecs = evecs[:,idx]
# Compute the coordinates using positive-eigenvalued components only
w, = np.where(evals > 0)
L = np.diag(np.sqrt(evals[w]))
V = evecs[:,w]
Y = V.dot(L)
return Y, evals
Y = np.array([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.],
[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.],
[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.],
[0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.],
[0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0.]])
temp = Y[~np.all(Y == 0, axis=1)]
temp = temp[:,~np.all(Y == 0, axis=1)]
Y = np.asarray(temp, dtype='uint8')
n = np.shape(Y)[0]
k = 2
D = csg.shortest_path(Y, directed=True)
Z = cmdscale(D)[0][:,0:k]
Z = Z - Z.mean(axis=0, keepdims=True)
def distMatrix(m):
n = m.size(0)
d = m.size(1)
x = m.unsqueeze(1).expand(n, n, d)
y = m.unsqueeze(0).expand(n, n, d)
return torch.sqrt(torch.pow(x - y, 2).sum(2) + 1e-4)
def loss(tY):
d = -distMatrix(tZ)+B
sigmoidD = torch.sigmoid(d)
reduce = tY*torch.log(sigmoidD)+(1-tY)*torch.log(1-sigmoidD)
#remove diagonal
reduce[torch.eye(n).byte().cuda()] = 0
return -reduce.sum()
tZ = autograd.Variable(torch.cuda.FloatTensor(Z), requires_grad=True)
B = autograd.Variable(torch.cuda.FloatTensor([0]), requires_grad=True)
tY = autograd.Variable(torch.cuda.FloatTensor(Y), requires_grad=False)
losses = []
biases = []
#rocAuc = []
learning_rate = 1e-3
epochs = 10000
percentDone = 0
percent = 5
for i in range(epochs):
if i % (epochs*percent*0.01) == 0:
percentDone += percent
print(str(percentDone) + "%")
l = loss(tY)
l.backward(retain_graph=True)
losses.append(float(l))
biases.append(B.data)
tZ.data = tZ.data - learning_rate * tZ.grad.data
B.data = B.data - learning_rate * B.grad.data
tZ.grad.data.zero_()
B.grad.data.zero_()
plt.subplot(122)
plt.plot(losses)
plt.title('Loss')
plt.xlabel('Iteration')
plt.ylabel('loss')
plt.show()
Theres an awful lot of code, but it is a working example, how do I make this code run on my GPU? is it even possible? Any hints or nudges in the right directions would be greatly appreciated.
Maybe you're missing the cuda toolkit or it doesn't work properly with your PyTorch installation.
Can you first check if this function
torch.cuda.is_available()
returns true. If not, you should check if the cuda toolkit works and the PyTorch version you are using is the correct one for your cuda installation.
Related
Jacobian function in autograd keeps returning empty array even when the numpy wrapper is used
I'm having trouble with using the Jacobian function in autograd.py. Here are some functions I've defined: import autograd.numpy as np from autograd import elementwise_grad as egrad from autograd import hessian,jacobian def unicycle_continuous_dynamics(x, u): # x = [x position, y position, heading, forward velocity] # u = [omega, forward acceleration] #m = sym if x.dtype == object else np # Check type for autodiff x_pos = x[3]*np.cos(x[2]) y_pos = x[3]*np.sin(x[2]) heading_rate = u[0] v_dot = u[1] x_d = np.array([ x_pos, y_pos, heading_rate, v_dot ]) return x_d def discrete_dynamics(x, u): dt = 0.05 #Euler integrator below and return the next state x = x + dt*unicycle_continuous_dynamics(x,u) x_next = x return x_next def discrete_dynamics_multiple(x,u,x_dim,u_dim): x_new = [] for i,j in zip(range(len(x_dim)),range(len(u_dim))): x_new.append(discrete_dynamics(x[i*n_states:(i+1)*n_states],u[j*n_inputs:(j+1)*n_inputs])) return np.asarray(x_new).flatten() def f(x,u,x_dim,u_dim): return discrete_dynamics_multiple(x,u,x_dim,u_dim) def f_x(x,u,x_dim,u_dim): return jacobian(f,0)(x,u,x_dim,u_dim) def f_u(x,u,x_dim,u_dim): return jacobian(f,1)(x,u,x_dim,u_dim) n_states=4 n_inputs=2 x_try = x0 = np.array([0.5, 1.5, 0, 0, #1st row for agent1, 2nd row for agent2, 3rd row for agent3 2.5, 1.5, np.pi ,0 , 1.5, 1.3, np.pi/2 , 0.1 ]) u_try = np.array([0.5,0.5,0.2,0.2,0.1,0.1]) now when I call f_x, the following is returned with a warning: f_x(x_try,u_try,(4,4,4),(2,2,2)) array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) D:\Softwares\Anaconda\lib\site-packages\autograd\tracer.py:14: UserWarning: Output seems independent of input. warnings.warn("Output seems independent of input.") The shape of f_x is correct, but it shouldn't be zeros everywhere. I can't figure out what's wrong with the jacobian function. Any suggestions?
numpy resize array width by zeros
I'm a typical user of R, but in python I'm stuck. I have a lot of images saved as NumPy array I need to resize the pad of array/images to 4k resolution from different widths which oscillated between 1620 to 2800, the height is constant: 2160. I need to resize the pad of each array/image to 3840*2160, ie. add a black border on right and left side, so that the array/image itself remains unchanged. For resizing I try this, but the code adds black edges to all sides. arr = np.array([[1,1,1],[1,1,1],[1,1,1],[1,1,1]]) FinalWidth = 20 def pad_with(vector, pad_width, iaxis, kwargs): pad_value = kwargs.get('padder', 0) vector[:pad_width[0]] = pad_value arr2 = np.pad(arr,FinalWidth/2,pad_with)
I think you just need hstack, assuming you want half the width to go on either side: def pad_with(vector, pad_width): temp = np.hstack((np.zeros((vector.shape[0], pad_width//2)), vector)) return np.hstack((temp, np.zeros((vector.shape[0], pad_width//2)))) arr2 = pad_with(arr,FinalWidth) arr2 >>> array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) arr2.shape >>> (4, 23)
How to make a big tridiagonal matrix with matrices?
How can I make a matrix H from two smaller matrices H_0 and H_1 as shown in the attached image? The final dimension is finite.
Here is an example. a = np.array([[1,2,3],[4,5,6]]) b = np.ones(shape=(3,3)) a_r = a.reshape((-1,)) b_r = b.reshape((-1,)) b_r_ = np.diag(b_r,k=1) b_r_ = b_r_ + b_r_.transpose() for i in range(b_r_.shape[0]): if i < len(a_r): b_r_[i][i]=a_r[i] else: b_r_[i][i]=0 Output: array([[1., 1., 0., 0., 0., 0., 0., 0., 0., 0.], [1., 2., 1., 0., 0., 0., 0., 0., 0., 0.], [0., 1., 3., 1., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 4., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 1., 5., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 6., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0., 1., 0., 0.], [0., 0., 0., 0., 0., 0., 1., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 0., 1., 0., 1.], [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]]) Concern: I think this is not the most computationally efficient way but I think it works
H = np.kron(np.eye(r,dtype=int),H_0) + np.kron(np.diag(np.ones(r-1), 1),H_1) + np.kron(np.diag(np.ones(r-1), -1),transpose(conj(H_1))) #r = repetition
Fill Bounding Boxes in 2D array
I have a 2D numpy array which looks like array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]) ` I want to create bounding box like masks over the 1s shown above. For example it should look like this array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.]]) How can I do it it easily? Also how do I do it if other no.s like 2,3 etc exist but I want to ignore them and the groups are mostly 2.
We have skimage.measure to make life easy when it comes to component labeling. We can use skimage.measure.label to label the different components in the array, and skimage.measure.regionprops to obtain the corresponding slices, which we can use to set the values to 1 in this case: def fill_bounding_boxes(x): l = label(x) for s in regionprops(l): x[s.slice] = 1 return x If we try with the proposed example: from skimage.measure import label, regionprops a = np.array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]]) We get: fill_bounding_boxes(x) array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0.]])
While the previous responses are perfectly fine, here's how you could do it with scipy.ndimage: import numpy as np from scipy import ndimage def fill_bboxes(x): x_components, _ = ndimage.measurements.label(x, np.ones((3, 3))) bboxes = ndimage.measurements.find_objects(x_components) for bbox in bboxes: x[bbox] = 1 return x ndimage.measurements.label does a connected component labelling with the 3x3-"ones" matrix defining the neighbourhood. find_objects then determines the bounding box for each component, which you can then use to set everything within to 1.
There is one solution, but its a little bit hacky and I will not program it for you. OpenCV - Image processing library, has a algorithm for finding Rectangular contour -> Straight or Rotated. What you may want to do is to transform your array into 2D grayscale image, find contours and write inside the contours your 1s. Check this image - it is from Opencv DOC - 7.a - https://docs.opencv.org/3.4/dd/d49/tutorial_py_contour_features.html You would be interested in everything that is inside green lines. To be honest, I think seems to me much easier than programming some algorithm for bounding boxes Note Of course you dont really need to do the image stuff, but I think it is enough to use opencv's algorithm for the bounding boxes(countours)
This is an interesting problem. A 2D convolution is a natural approach. However, if the input matrix is sparse (as it appears in your example), this can be costly. For sparse matrix, another approach is to use a clustering algorithm. This extracts only the non-zero pixels from the input box a (the array in your example), and runs a hierarchical clustering. The clustering is based on a special distance matrix (a tuple). Merging happens if boxes are separated by a max of 1 pixel in either direction. You can also apply filter for any numbers you need in the initialization step (say only do for a[row, col]==1 and skip any other numbers, or whatever you wish. from collections import namedtuple Point = namedtuple("Point",["x","y"]) # a pixel on the matrix Box = namedtuple("Box",["tl","br"]) # a box defined by top-lef/bottom-right def initialize(a): """ create a separate bounding box at each non-zero pixel. """ boxes = [] rows, cols = a.shape for row in range(rows): for col in range(cols): if a[row, col] != 0: boxes.append(Box(Point(row, col),Point(row, col))) return boxes def dist(box1, box2): """ dist between boxes is from top-left to bottom-right, or reverse. """ x = min(abs(box1.br.x - box2.tl.x), abs(box1.tl.x - box2.br.x)) y = min(abs(box1.br.y - box2.tl.y), abs(box1.tl.y - box2.br.y)) return x, y def merge(boxes, i, j): """ pop the boxes at the indices, merge and put back at the end. """ if i == j: return if i >= len(boxes) or j >= len(boxes): return ii = min(i, j) jj = max(i, j) box_i = boxes[ii] box_j = boxes[jj] x, y = dist(box_i, box_j) if x < 2 or y < 2: tl = Point(min(box_i.tl.x, box_j.tl.x),min(box_i.tl.y, box_j.tl.y)) br = Point(max(box_i.br.x, box_j.br.x),max(box_i.br.y, box_j.br.y)) del boxes[ii] del boxes[jj-1] boxes.append(Box(tl, br)) def cluster(a, max_iter=100): """ initialize the cluster. then loop through the length and merge boxes. break if `max_iter` reached or no change in length. """ boxes = initialize(a) n = len(boxes) k = 0 while k < max_iter: for i in range(n): for j in range(n): merge(boxes, i, j) if n == len(boxes): break n = len(boxes) k = k+1 return boxes cluster(a) # output: [Box(tl=Point(x=2, y=2), br=Point(x=5, y=4)),Box(tl=Point(x=11, y=9), br=Point(x=14, y=11))] # performance 275 µs ± 887 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) # compares to 637 µs ± 9.36 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) for #the method based on 2D convolution This returns a list of boxes defined by the corner points (top-left and bottom-right). Here x is the row number and y is the column numbers. The initialization loops through the entire matrix. But after that we only process a very small subset of points. By changing the dist function, you can customize the box definition (overlapping, non-overlapping etc). Performance can further be optimized (for e.g. breaking if i or j greater the length of boxes within the for loops, than simply returning from the merge function and continue).
python scatter threshold function not working
I want a scatterplot with values exceeding a particular threshold to have another color then the ones "inside" the threshold. Here is what I wrote so far: import numpy as np import numpy.random as rnd import matplotlib.pyplot as plt n = 100 x = rnd.uniform(low = -1, high = 1, size = n) y = rnd.uniform(low = -1, high = 1, size = n) a = x**2 + y**2 c = np.zeros(n) for i in range(n): if a[i] <= 1: c[i] = 0 else: c[i] = 1 plt.scatter(x,y, color = c) plt.show() the output is a completely black scatter plot. c = array([ 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1.]) I tried the following: for i in range(n): if a[i] <= 1: c[i] = "r" else: c[i] = "g" ValueError: could not convert string to float: r and several other variations of the theme. However I am stuck. Please help, thank you very much for your time. Best wishes
You have c defined as integers with this line: c = np.zeros(n) But then in your second code snippet you are trying to set c as a string. c[i] = "r" Choose a new name for your string array: cs = [] for i in range(n): if a[i] <= 1: cs.append("r") else: cs.append("g") If scatter complains about c not being from numpy, you can set a numpy chararry with: numpy.chararray.