I get this horrible massive error when trying to plot using matplotlib:
Traceback (most recent call last):
File "24oct_specanal.py", line 90, in <module>
main()
File "24oct_specanal.py", line 83, in main
plt.plot(Svar,Sav)
File "/usr/lib64/python2.6/site-packages/matplotlib/pyplot.py", line 2458, in plot
ret = ax.plot(*args, **kwargs)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 3849, in plot
self.add_line(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1443, in add_line
self._update_line_limits(line)
File "/usr/lib64/python2.6/site-packages/matplotlib/axes.py", line 1451, in _update_line_limits
p = line.get_path()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 644, in get_path
self.recache()
File "/usr/lib64/python2.6/site-packages/matplotlib/lines.py", line 392, in recache
x = np.asarray(xconv, np.float_)
File "/usr/lib64/python2.6/site-packages/numpy/core/numeric.py", line 235, in asarray
return array(a, dtype, copy=False, order=order)
ValueError: setting an array element with a sequence.
This is the code I am using:
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def makeAImatrix(n):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
return (A, I)
def main():
n=5 #number of species
t=1 # number of matrices to check
Aflat = []
Aflatlist = [] #list of matrices
Aflatav = []
Aflatvar = []
Aflatskew = []
remspec = []
Afreeze = [] #this is a LIST OF VECTORS that stores the vector corresponding to each extinct species as
#it is taken out. it is NOT the same as the original A matrix as it is only
#coherant in one direction. it is also NOT A SQUARE.
Sex = [] # (Species extinct) this is a vector that corresponds to the Afreeze matrix. if a species is extinct then
#the value stored here will be -1.
Sav = [] # (Species average) The average value of the A cooefficiants for each species
Svar = [] # (Species variance)
for k in range (0,t):
allpos = 0
A, I = makeAImatrix(n)
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
Sex.append(-1) #given -1 value as species is extinct.
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
#now fill Afreeze and Sex with the remaining species that have survived
for m in range (0, l):
Afreeze.append(A[m])
Sex.append(1) # value of 1 as this species has survived
#now time to analyse the coefficiants for each species.
for m in range (0, len(Sex)):
X1 = sum(Afreeze[m])/len(Afreeze[m]) # this is the mean
X2 = 0
for p in range (len(Afreeze[m])):
X2 = X2 + Afreeze[m][p]
X2 = X2/len(Afreeze[m])
Sav.append(X1)
Svar.append(X2 - X1*X1)
spec = []
for b in range(0,n):
spec.append(b)
plt.plot(Svar,Sav)
plt.show()
#plt.scatter(spec, Sav)
#plt.show()
if __name__ == '__main__':
main()
I cannot figure this out at all! I think it was working before but then just stopped working. Any ideas?
Your problem is in this section:
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
#now store the A coefficiants of the extinct species in the Afreeze list
Afreeze.append(A[p])
You're indexing a 2D array, and the result is still a 2D array. So, your Afreeze will get a 2D array appended, instead of a 1D array. Later, where you sum the separate elements of Afreeze, a summed 2D array will result in a 1D array, and that gets added to Sav and Svar. By the time you feed these variables to plt.plot(), matplotlib will get an array as one of the elements instead of a single number, which it of course can't cope with.
You probably want:
if any(t<0 for t in x):
p=np.where(x==min(x))
Afreeze.append(A[p][0])
but I haven't tried to follow the logic of the script very much; that's up to you.
Perhaps good to see if this is indeed what you want: print the value of A[p][0] in the line before it gets appended to Afreeze.
I noted that because of the random.random() in the matrix creation, the if statement isn't always true, so the problem doesn't always show up. Minor detail, but could confuse people.
Fix your identation?
import numpy as np
import numpy.linalg
import random
import matplotlib.pyplot as plt
import pylab
from scipy.optimize import curve_fit
from array import array
def main():
n=20 #number of species
spec=np.zeros((n+1))
for i in range(0,n):
spec[i]=i
t=100 #initial number of matrices to check
B = np.zeros((n+1)) #matrix to store the results of how big the matrices have to be
for k in range (0,t):
A=np.zeros((n,n))
I=np.ones((n))
for i in range(0,n):
for j in range(i+1,n):
A[j,i]=random.random()
for i in range(0,n):
for j in range(i+1,n):
A[i,j] = A[j,i]
for i in range(n):
A[i,i]=1
allpos = 0
while allpos !=1: #while all solutions are not positive
x = numpy.linalg.solve(A,I)
if any(t<0 for t in x): #if any of the solutions in x are negative
p=np.where(x==min(x)) # find the most negative solution, p is the position
x=np.delete(x, p, 0)
A=np.delete(A, p, 0)
A=np.delete(A, p, 1)
I=np.delete(I, p, 0)
else:
allpos = 1 #set allpos to one so loop is broken
l=len(x)
B[l] = B[l]+1
B = B/n
pi=3.14
resfile=open("results.txt","w")
for i in range (0,len(spec)):
resfile.write("%d " % spec[i])
resfile.write("%0.6f \n" %B[i])
resfile.close()
plt.hist(B, bins=n)
plt.title("Histogram")
plt.show()
plt.plot(spec,B)
plt.xlabel("final number of species")
plt.ylabel("fraction of total matrices")
plt.title("plot")
plt.show()
if __name__ == '__main__':
main()
Got this:
Related
I am currently on a project to make a multiprocessing fractal dimention calculation program.
I designed the code so that it would first split big image to 9 pieces and calculate fractal dimention of each part.
from multiprocessing import process
import numpy as np
import os
import math as mt
import cv2
import imageio
import warnings
from PIL import Image
warnings.simplefilter('ignore', Image.DecompressionBombWarning)
Image.MAX_IMAGE_PIXELS = None
def fractal_dimension(Z, threshold=0.8):
"""Returns box-counting dimension of a 2D array.
Args:
Z: 2D array to be analysed.
threshold: Cutoff for converting values in Z to 1 and 0.
Returns:
The estimated box counting dimension.
"""
# Only for 2d image
assert(len(Z.shape) == 2)
# From https://github.com/rougier/numpy-100 (#87)
def boxcount(Z, k):
S = np.add.reduceat(
np.add.reduceat(Z, np.arange(0, Z.shape[0], k), axis=0),
np.arange(0, Z.shape[1], k), axis=1)
# We count non-empty (0) and non-full boxes (k*k)
return len(np.where((S > 0) & (S < k*k))[0])
# Transform Z into a binary array
Z = (Z < threshold)
# Minimal dimension of image
p = min(Z.shape)
# Greatest power of 2 less than or equal to p
n = 2**np.floor(np.log(p)/np.log(2))
# Extract the exponent
n = int(np.log(n)/np.log(2))
# Build successive box sizes (from 2**n down to 2**1)
sizes = 2**np.arange(n, 1, -1)
# Actual box counting with decreasing size
counts = []
for size in sizes:
counts.append(boxcount(Z, size))
# Fit the successive log(sizes) with log (counts)
coeffs = np.polyfit(np.log(sizes), np.log(counts), 1)
#fracd.append(coeffs)
return -coeffs[0]
I = imageio.imread(r"C:\Users\qkrgn\PycharmProjects\pythonProject\Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg", as_gray="True")/255.0 # Import the image in greyscale
print("Minkowski–Bouligand dimension (computed): ", fractal_dimension(I))
subset_9t1 = I[::3, ::3]
subset_9t2 = I[1::3, ::3]
subset_9t3 = I[2::3, ::3]
subset_9t4 = I[::3, 1::3]
subset_9t5 = I[1::3, 1::3]
subset_9t6 = I[2::3, 1::3]
subset_9t7 = I[::3, 2::3]
subset_9t8 = I[1::3, 2::3]
subset_9t9 = I[2::3, 2::3]
if __name__ =='__main__':
processes:list = []
processes.append(target=fractal_dimension, args=subset_9t1)
processes.append(target=fractal_dimension, args=subset_9t2)
processes.append(target=fractal_dimension, args=subset_9t3)
processes.append(target=fractal_dimension, args=subset_9t4)
processes.append(target=fractal_dimension, args=subset_9t5)
processes.append(target=fractal_dimension, args=subset_9t6)
processes.append(target=fractal_dimension, args=subset_9t7)
processes.append(target=fractal_dimension, args=subset_9t8)
processes.append(target=fractal_dimension, args=subset_9t9)
for process in processes:
process.start()
print(sum(fracd, 0.000)/len(fracd))
The error code reads:
traceback (most recent call last):
File "C:/Users/qkrgn/PycharmProjects/pythonProject/main.py", line 141, in <module>
processes.append(target=fractal_dimension, args=subset_9t1)
TypeError: append() takes no keyword arguments
I want to know why this error comes up& how to fix it. and wether there would be any problem in splitting a image to calculate fractal dimention and combining them back to get the fractal dimention.
You should do it this way instead of what you are actually doing.
from multiprocessing import Process
# make this change for all the times you tried to append it while using the earlier way.
processes.append(Process(target=fractal_dimension, args=[subset_9t1]))
You were passing keyword arguments which don't exist for append function which is what you don't wanna do.
Also instead of manually typing those appends you can use a for loop
if __name__ == '__main__':
VARIABLE_NAME = "subset_9t"
for i in range(1, 10):
p = Process(target=fractal_dimension, args=[eval(VARIABLE_NAME + str(i))])
processes.append(p)
By calculating the p-value, I am reducing the number of features in a large sparse file. But I get this error. I have seen similar posts but this code works with non-sparse input. Can you help, please? (I can upload the input file if needed)
import statsmodels.formula.api as sm
def backwardElimination(x, Y, sl, columns):
numVars = len(x[0])
pvalue_removal_counter = 0
for i in range(0, numVars):
print(i, 'of', numVars)
regressor_OLS = sm.OLS(Y, x).fit()
maxVar = max(regressor_OLS.pvalues).astype(float)
if maxVar > sl:
for j in range(0, numVars - i):
if (regressor_OLS.pvalues[j].astype(float) == maxVar):
x = np.delete(x, j, 1)
pvalue_removal_counter += 1
columns = np.delete(columns, j)
regressor_OLS.summary()
return x, columns
Output:
0 of 1970
1 of 1970
2 of 1970
Traceback (most recent call last):
File "main.py", line 142, in <module>
selected_columns)
File "main.py", line 101, in backwardElimination
if (regressor_OLS.pvalues[j].astype(float) == maxVar):
IndexError: index 1967 is out of bounds for axis 0 with size 1967
Here is a fixed version.
I made a number of changes:
Import the correct OLS from statsmodels.api
Generate columns in the function
Use np.argmax to find the location of the maximum value
Use a boolean index to select columns. In pseudo-code it is like x[:, [True, False, True]] which keeps columns 0 and 2.
Stop if there is nothing to drop.
import numpy as np
# Wrong import. Not using the formula interface, so using statsmodels.api
import statsmodels.api as sm
def backwardElimination(x, Y, sl):
numVars = x.shape[1] # variables in columns
columns = np.arange(numVars)
for i in range(0, numVars):
print(i, 'of', numVars)
regressor_OLS = sm.OLS(Y, x).fit()
if maxVar > sl:
# Use boolean selection
retain = np.ones(x.shape[1], bool)
drop = np.argmax(regressor_OLS.pvalues)
# Drop the highest pvalue(s)
retain[drop] = False
# Keep the x we with to retain
x = x[:, retain]
# Also keep their column indices
columns = columns[retain]
else:
# Exit early if everything has pval above sl
break
# Show the final summary
print(regressor_OLS.summary())
return x, columns
You can test it with
x = np.random.standard_normal((1000,100))
y = np.random.standard_normal(1000)
backwardElimination(x,y,0.1)
I am writing a code to populate a Numpy array. It's a 2D array, so I run two For loops with a specified condition. "Coords" array are positions of my particles. However every time I try to run it through the loops "coords" array starts being populated from x=2 position rather than x=1 (so the second position from the array) and then it gives me an error because of the lack of space in the array. Any ideas why is this happening?
Thank you for any help!
The error I am getting:
"IndexError: index 10 is out of bounds for axis 0 with size 10"
Here's my code:
import random
from math import exp, pow
import numpy as np
#Lattice and sampling parameters:
L = 10 #no of sites per one side of the lattice
nSites = L*L
nPart = 10 #no of particles
nSteps = 50000
AddRemFraction = 0.5 #acceptance ratio
# create an array of coordinates of particles:
coords = np.zeros((nPart,2), dtype=object)
# create an LxL array of particle occupancy => occ(x,y) = 1 if site is occupied
occ = np.zeros((L, L), dtype=object)
#no of particles that have been placed on the lattice so far
nPlace = 0
for x in range (1, L):
for y in range (1, L):
if (nPlace < nPart):
nPlace = nPlace +1
coords[nPlace,:] = [x,y]
occ[x,y]=1
I want to read an grayscale image, say something with (248, 480, 3) shape, then use each element of it as the lam value for making a Poisson random value and do this for each element and make a new data set with the same shape. I want to do this as much as nscan, then I want to add them all together and put them in a new data set and plot it again to get something that is similar to the first image that I put in the beginning. This code is working but it is extremely slow, I was wondering if there is any way to make it faster?
import numpy as np
import matplotlib.pyplot as plt
my_image = plt.imread('myimage.png')
def genP(data):
new_data = np.zeros(data.shape)
for i in range(data.shape[0]):
for j in range(data.shape[1]):
for k in range(data.shape[2]):
new_data[i, j, k] = np.random.poisson(lam = data[i, j, k])
return new_data
def get_total(data, nscan = 1):
total = genP(data)
for i in range(nscan):
total += genP(data)
total = total/nscan
plt.imshow(total)
plt.show()
get_total(my_image, 100)
numpy.random.poisson can entirely replace your genP() function... This is basically guaranteed to be much faster.
If size is None (default), a single value is returned if lam is a scalar. Otherwise, np.array(lam).size samples are drawn
def get_total(data, nscan = 1):
total = np.random.poisson(lam=data)
for i in range(nscan):
total += np.random.poisson(lam=data)
total = total/nscan
plt.imshow(total)
plt.show()
Main Problem
What is the better/pythonic way of retrieving elements in a particular array that are not found in a different array. This is what I have;
idata = [np.column_stack(data[k]) for k in range(len(data)) if data[k] not in final]
idata = np.vstack(idata)
My interest is in performance. My data is an (X,Y,Z) array of size (7000 x 3) and my gdata is an (X,Y) array of (11000 x 2)
Preamble
I am working on an octant search to find the n-number(e.g. 8) of points (+) closest to my circular point (o) in each octant. This would mean that my points (+) are reduced to only 64 (8 per octant). Then for each gdata I would save the elements that are not found in data.
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist
from collections import defaultdict
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
data = pd.read_excel(file_path)
data = np.array(data, dtype=np.float)
nrow, cols = data.shape
file_path1 = filedialog.askopenfilename()
gdata = pd.read_excel(file_path1)
gdata = np.array(gdata, dtype=np.float)
gnrow, gcols = gdata.shape
N=8
delta = gdata - data[:,:2]
angles = np.arctan2(delta[:,1], delta[:,0])
bins = np.linspace(-np.pi, np.pi, 9)
bins[-1] = np.inf # handle edge case
octantsort = []
for j in range(gnrow):
delta = gdata[j, ::] - data[:, :2]
angles = np.arctan2(delta[:, 1], delta[:, 0])
octantsort = []
for i in range(8):
data_i = data[(bins[i] <= angles) & (angles < bins[i+1])]
if data_i.size > 0:
dist_order = np.argsort(cdist(data_i[:, :2], gdata[j, ::][np.newaxis]), axis=0)
if dist_order.size < npoint_per_octant+1:
[octantsort.append(data_i[dist_order[:npoint_per_octant][j]]) for j in range(dist_order.size)]
else:
[octantsort.append(data_i[dist_order[:npoint_per_octant][j]]) for j in range(npoint_per_octant)]
final = np.vstack(octantsort)
idata = [np.column_stack(data[k]) for k in range(len(data)) if data[k] not in final]
idata = np.vstack(idata)
Is there an efficient and pythonic way of doing this do increase performance in the last two lines of the code?
If I understand your code correctly, then I see the following potential savings:
dedent the final = ... line
don't use arctan it's expensive; since you only want octants compare the coordinates to zero and to each other
don't do a full argsort, use argpartition instead
make your octantsort an "octantargsort", i.e. store the indices into data, not the data points themselves; this would save you the search in the last but one line and allow you to use np.delete for removing
don't use append inside a list comprehension. This will produce a list of Nones that is immediately discarded. You can use list.extend outside the comprehension instead
besides, these list comprehensions look like a convoluted way of converting data_i[dist_order[:npoint_per_octant]] into a list, why not simply cast, or even keep as an array, since you want to vstack in the end?
Here is some sample code illustrating these ideas:
import numpy as np
def discard_nearest_in_each_octant(eater, eaten, n_eaten_p_eater):
# build octants
# start with quadrants ...
top, left = (eaten < eater).T
quadrants = [np.where(v&h)[0] for v in (top, ~top) for h in (left, ~left)]
dcoord2 = (eaten - eater)**2
dc2quadrant = [dcoord2[q] for q in quadrants]
# ... and split them
oct4158 = [q[:, 0] < q [:, 1] for q in dc2quadrant]
# main loop
dc2octants = [[q[o], q[~o]] for q, o in zip (dc2quadrant, oct4158)]
reloap = [[
np.argpartition(o.sum(-1), n_eaten_p_eater)[:n_eaten_p_eater]
if o.shape[0] > n_eaten_p_eater else None
for o in opair] for opair in dc2octants]
# translate indices
octantargpartition = [q[so] if oap is None else q[np.where(so)[0][oap]]
for q, o, oaps in zip(quadrants, oct4158, reloap)
for so, oap in zip([o, ~o], oaps)]
octantargpartition = np.concatenate(octantargpartition)
return np.delete(eaten, octantargpartition, axis=0)