Number format python - python

I want to have the legend of the plot shown with the value in a list. But what I get is the element index but not the value itself. I dont know how to fix it. I'm referring to the plt.plot line. Thanks for the help.
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random(1000)
y = np.random.random(1000)
n = len(x)
d_ij = []
for i in range(n):
for j in range(i+1,n):
a = np.sqrt((x[i]-x[j])**2+(y[i]-y[j])**2)
d_ij.append(a)
epsilon = np.linspace(0.01,1,num=10)
sigma = np.linspace(0.01,1,num=10)
def lj_pot(epsi,sig,d):
result = []
for i in range(len(d)):
a = 4*epsi*((sig/d[i])**12-(sig/d[i])**6)
result.append(a)
return result
for i in range(len(epsilon)):
for j in range(len(sigma)):
a = epsilon[i]
b = sigma[j]
plt.cla()
plt.ylim([-1.5, 1.5])
plt.xlim([0, 2])
plt.plot(sorted(d_ij),lj_pot(epsilon[i],sigma[j],sorted(d_ij)),label = 'epsilon = %d, sigma =%d' %(a,b))
plt.legend()
plt.savefig("epsilon_%d_sigma_%d.png" % (i,j))
plt.show()

Your code is a bit unpythonic, so I tried to clean it up to the best of my knowledge. numpy.random.random and numpy.random.uniform(0, 1) are basically the same, however, the latter also allows you to pass the shape of the return array that you would like to have, in this case an array with 1000 rows and two columns (1000, 2). I then use some magic to assign the two colums of the return array to x and y in the same line, respectively.
numpy.hypot does as the name suggests and calculates the hypothenuse of x and y. It can also do that for each entry of arrays with the same size, saving you the for loops, which you should try to aviod in Python since they are pretty slow.
You used plt for all your plotting, which is fine as long as you only have one figure, but I would recommend to be as explicit as possible, according to one of Python's key notions:
explicit is better than implicit.
I recommend you read through this guide, in particular the section called 'Stateful Versus Stateless Approaches'. I changed your commands accordingly.
It is also very unpythonic to loop over items of a list using the index of the item in the list like you did (for i in range(len(list)): item = list[i]). You can just reference the item directly (for item in list:).
Lastly I changed your formatted strings to the more convenient f-strings. Have a read here.
import matplotlib.pyplot as plt
import numpy as np
def pot(epsi, sig, d):
result = 4*epsi*((sig/d)**12 - (sig/d)**6)
return result
# I am not sure why you would create the independent variable this way,
# maybe you are simulating something. In that case, the code below is
# simpler than your version and should achieve the same.
# x, y = zip(*np.random.uniform(0, 1, (1000, 2)))
# d = np.array(sorted(np.hypot(x, y)))
# If you only want to plot your pot function then creating the value range
# like this is just fine.
d = np.linspace(0.001, 1, 1000)
epsilons = sigmas = np.linspace(0.01, 1, num=10)
fig, ax = plt.subplots()
ax.set_xlim([0, 2])
ax.set_ylim([-1.5, 1.5])
line = None
for epsilon in epsilons:
for sigma in sigmas:
if line is None:
line = ax.plot(
d, pot(epsilon, sigma, d),
label=f'epsilon = {epsilon}, sigma = {sigma}'
)[0]
fig.legend()
else:
line.set_data(d, pot(epsilon, sigma, d))
# plt.savefig(f"epsilon_{epsilon}_sigma_{sigma}.png")
fig.show()

Related

SciPy: Why is the input of 2D interpolation sorted internally?

I hae a problem with scipy.interpolate.interp2d.
For sorted input, the interpolation is OK.
When I ask to get the interpolation values for an unsorted array, I get output as if it is sorted internally by SciPy. Why is that?
The way around is to get interpolation values in a loop.
Here is my demonstration code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
SciPy interp2d test.
Why is the input of 2D interpolation sorted internally?
'''
import matplotlib.pyplot as plt
import scipy.interpolate as itp
import numpy as np
def fMain():
nx=11
ny=21
ax=np.linspace(0,1,nx)
mx=np.empty((nx,ny))
for i in range(ny):
mx[:,i] = ax
pass
ay=np.linspace(0,1,ny)
my=np.empty((nx,ny))
for i in range(nx): # can I do this without loop?
my[i,:] = ay
pass
mz=np.empty((nx,ny))
mz=mx**2 + my**3
f2Di = itp.interp2d( mx, my, mz, kind='linear')
#this provides identical results, ok
#f2Di = itp.interp2d( ax, ay, mz.transpose(), kind='linear')
if True :
# just to check the interpolation
mzi = f2Di(ax,ay)
fig = plt.figure()
axis = fig.add_subplot(projection='3d')
axis.plot_wireframe( mx, my, mz )
axis.scatter(mx, my, mzi.transpose(), marker="o",color="red")
axis.set_xlabel("x")
axis.set_ylabel("y")
axis.set_zlabel("z")
plt.tight_layout()
plt.show()
plt.close()
pass
if True:
y = 0.5
az = f2Di( ax , y )
axf=np.flip(ax)
azf1 = f2Di( axf , y )
azf2 = np.empty(nx)
for i in range(nx):
azf2[i] = f2Di( axf[i] , y )
pass
plt.plot(ax,az,label="Normal",linewidth=3,linestyle="dashed")
plt.plot(axf,azf1,label="Reversed")
plt.plot(axf,azf2,label="Reversed loop")
plt.legend()
plt.xlabel("x")
plt.ylabel("z")
plt.tight_layout()
plt.show()
plt.close()
pass
pass
if __name__ == "__main__":
fMain()
pass
To answer another question (from a comment in the code):
ax=np.linspace(0,1,nx)
mx=np.empty((nx,ny))
for i in range(ny):
mx[:,i] = ax
(and similar for ay).
Can I do this with a loop?
Yes (technically no, since there'll be a C loop under the hood, but practially, yes). Use numpy.tile:
ax = np.linspace(0, 1, nx)
mx = np.tile(ax, (ny, 1)).T
And the np.empty doesn't make sense below: you allocate memory, but immediately (re)assign the variable to another value:
#mz = np.empty((nx, ny)) # This line is redundant
mz = mx**2 + my**3
This is also why np.empty has disappeared form the for-replacement code.
There's a builtin function for make grids like your mx,my:
In [68]: I,J = np.meshgrid(ay,ax)
In [69]: I.shape
Out[69]: (11, 21)
In [71]: np.allclose(I,my)
Out[71]: True
In [72]: np.allclose(J,mx)
Out[72]: True
alternatively you could have assigned the values with broadcasting
In [76]: my = np.empty((nx,ny)); my[:]=ay
In [78]: mx = np.empty((nx,ny)); mx[:]=ax[:,None]
The interp2d docs say that input arrays are flattened, even if input as 2d. And that the x,y can be the coordinates as in ax,ay; they don't have to be constructed from the full grid. So the 2 ways of setting up the f2Di are equivalent.
Full documentation for the use of f2Di(x,y) is
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.interp2d.__call__.html#scipy.interpolate.interp2d.__call__
It explicitly states that the inputs, x,y have to sorted, or it will do it for you.
One interpolation:
In [86]: mzi = f2Di(ax,ay)
In [87]: mzi.shape
Out[87]: (21, 11)
Another with the inputs reversed:
In [89]: azf1 = f2Di(ax[::-1], ay[::-1] )
In [90]: azf1.shape
Out[90]: (21, 11)
In [91]: np.allclose(mzi, azf1)
Out[91]: True
As you note, and attempt to show with a lot of plotting code, the results are the same - inputs have been sorted before they are used to interpolate.
If I falsely tell it that the coordinates are sorted:
In [94]: azf1 = f2Di(ax[::-1], ay[::-1] , assume_sorted=True)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
...
File ~\anaconda3\lib\site-packages\scipy\interpolate\_fitpack_impl.py:1054, in bisplev(x, y, tck, dx, dy)
1052 z, ier = _fitpack._bispev(tx, ty, c, kx, ky, x, y, dx, dy)
1053 if ier == 10:
-> 1054 raise ValueError("Invalid input data")
1055 if ier:
1056 raise TypeError("An error occurred")
ValueError: Invalid input data
Note that the error was raised by a function in _fitpack. The name implies that this using some sort of compiled interpolation code, a library that is probably written in C or Fortran. I'm not a developer, but I can imagine that it's easiest to write such code assuming that the inputs are sorted. Such shared libraries work best when they have clear, and relatively simple, expectations regarding the inputs.

How to exclude long peaks in histograms?

Sometimes I get histograms that look like below:
I see the peaks loud and clear, but nigh for much else; is there a way to drop the "bin outliers" from a histogram so that the rest of the distribution can be seen better?
This can be accomplished by simply setting ylim; however, this rids of the peaks information. To retain, we can include it via annotations, as follows:
Fetching histogram heights, N, and positions, bins
Selecting a ymax; e.g. 2nd or 3rd max N
Packing (position, height) into a string, and annotating
All combined and an example below; I used your exact data for comparison, since you are me.
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
data = np.random.randn(100, 100) ** 3
data[:50] = 0
hist_visible(data, peaks_to_clip=3, bins=500, annot_kw={})
data[:95] = 0
hist_visible(data, peaks_to_clip=3, bins=500, annot_kw={})
Function:
def hist_visible(data, peaks_to_clip=1, bins=200, annot_kw=None):
def _annotate(peaks_info, annot_kw):
def _process_annot_kw(annot_kw):
defaults = dict(weight='bold', fontsize=13, color='r',
xy=(.85, .85), xycoords='axes fraction')
if not annot_kw:
annot_kw = defaults.copy()
else:
annot_kw = annot_kw.copy() # ensure external dict unaffected
# if `defaults` key not in `annot_kw`, add it & its value
for k, v in defaults.items():
if k not in annot_kw:
annot_kw[k] = v
return annot_kw
def _make_annotation(peaks_info):
txt = ''
for entry in peaks_info:
txt += "({:.2f}, {})\n".format(entry[0], int(entry[1]))
return txt.rstrip('\n')
annot_kw = _process_annot_kw(annot_kw)
txt = _make_annotation(peaks_info)
plt.annotate(txt, **annot_kw)
N, bins, _ = plt.hist(np.asarray(data).ravel(), bins=bins)
Ns = np.sort(N)
lower_max = Ns[-(peaks_to_clip + 1)]
peaks_info = []
for peak_idx in range(1, peaks_to_clip + 1):
patch_idx = np.where(N == Ns[-peak_idx])[0][0]
peaks_info.append([bins[patch_idx], N[patch_idx]])
plt.ylim(0, lower_max)
if annot_kw is not None:
_annotate(peaks_info, annot_kw)
plt.show()

Cannot sample values of a trigonometric function

I have a problem with my code.
So i try to represent the sampled values of a function 'sin(t^3)/2^tan(t)' for
t between 0 and 1.5 and frequency fs=50Hz.
I have created a function 'sampleFunction' which takes as parameters the string which represents the trigonometric function,beginning of the interval,end of interval and the frequency.
I create tVector(0,0.02,0.04,..,1.48)
Then I take the elements of tVector and use them to evaluate the string and put the result in another vector y
I return both y and tVector
But I encounter a problem when i run it saying 'y' is not defined
This is the code:
import numpy as np
import matplotlib.pyplot as plt
import math
def sampleFunction(functionString,t0,t1,fs):
tVector=np.arange(start=t0, stop=t1, step=1/fs, dtype='float')
t=t0
for i in range(0,len(tVector)):
t=tVector[i]
y[i]=eval(functionString)
return y,tVector
t0=0
t1 =1.5
fs=50
thold=.1
functionString='math.sin(t**3)/2**math.tan(t)'
y,t=sampleFunction(functionString,t0,t1,fs)
plt.plot(t,y)
plt.xlabel('time')
plt.ylabel('Amplitude')
You can change your code in the following way:
def sampleFunction(functionString,t0,t1,fs):
tVector=np.arange(start=t0, stop=t1, step=1/fs, dtype='float')
t=t0
y = np.zeros( tVector.shape )
for i in range(0,len(tVector)):
t=tVector[i]
y[i]=eval(functionString)
return y,tVector
However, this is not good python. There are a couple of issues:
You should use vectorized operations.
You should avoid eval like the plague. This has security implications.
For vectorized operations, simply do:
def sampleFunction(functionString,t0,t1,fs):
t = np.arange(start=t0, stop=t1, step=1/fs, dtype='float')
y = eval(functionString)
return y, t
and call it as:
sampleFunction('np.sin(t**3)/2**np.tan(t)', 0, 10, 100)
This is much faster (especially for large arrays)
Finally, the vectorized form is only a single line long. You probably don't need the extra function.
You have a problem with the allocation of the 'y' variable as Harold is saying.
However, there are multiple ways of achieving what you are doing and the eval function is, unless you have a very good reason, the absolute worst. Maybe consider one of the possible examples below:
import numpy as np
import matplotlib.pyplot as plt
import math
def sampleFunction(functionString,t0,t1,fs):
tVector=np.arange(start=t0, stop=t1, step=1/fs, dtype='float')
t=t0
y = [float]*len(tVector) # <------------------- Allocate 'y' variable
for i in range(0,len(tVector)):
t = tVector[i]
y[i]=eval(functionString)
return y,tVector
t0=0
t1 =1.5
fs=50
thold=.1
# Your code
functionString = 'math.sin(t**3)/2**math.tan(t)'
y, t = sampleFunction(functionString,t0,t1,fs)
plt.plot(t, y, color='cyan')
# Using the 'map' built-in function
t = np.arange(start=t0, stop=t1, step=1./fs, dtype='float')
y = map(lambda ti: 0.9*math.sin(ti**3)/2**math.tan(ti), t)
plt.plot(t, y, color='magenta')
# Using Numpy's 'sin' and 'tan'
t = np.arange(start=t0, stop=t1, step=1./fs, dtype='float')
y = 0.8*np.sin(t**3)/2**np.tan(t)
plt.plot(t, y, color='darkorange')
# Using 'list comprehensions'
t = np.arange(start=t0, stop=t1, step=1./fs, dtype='float')
y = [ 0.7*math.sin(ti**3)/2**math.tan(ti) for ti in t]
plt.plot(t, y, color='darkgreen')
plt.xlabel('time')
plt.ylabel('Amplitude')
plt.show()
The result is:
When running the above code, you should have gotten an error message saying, in the end, "name 'y' is not defined". If you look at your function definition, you will see that it really isn't. You cannot passing a value to y[i] without defining y first! The following line before the "for" loop fixes that particular problem:
y = [None] * len(tVector)
The code will run fine after that correction.
But: why do you want to pass a function string when you can pass a function? Functions, in Python, are first-class-objects!

How to optimize for loops for generating a new random Poisson array in python?

I want to read an grayscale image, say something with (248, 480, 3) shape, then use each element of it as the lam value for making a Poisson random value and do this for each element and make a new data set with the same shape. I want to do this as much as nscan, then I want to add them all together and put them in a new data set and plot it again to get something that is similar to the first image that I put in the beginning. This code is working but it is extremely slow, I was wondering if there is any way to make it faster?
import numpy as np
import matplotlib.pyplot as plt
my_image = plt.imread('myimage.png')
def genP(data):
new_data = np.zeros(data.shape)
for i in range(data.shape[0]):
for j in range(data.shape[1]):
for k in range(data.shape[2]):
new_data[i, j, k] = np.random.poisson(lam = data[i, j, k])
return new_data
def get_total(data, nscan = 1):
total = genP(data)
for i in range(nscan):
total += genP(data)
total = total/nscan
plt.imshow(total)
plt.show()
get_total(my_image, 100)
numpy.random.poisson can entirely replace your genP() function... This is basically guaranteed to be much faster.
If size is None (default), a single value is returned if lam is a scalar. Otherwise, np.array(lam).size samples are drawn
def get_total(data, nscan = 1):
total = np.random.poisson(lam=data)
for i in range(nscan):
total += np.random.poisson(lam=data)
total = total/nscan
plt.imshow(total)
plt.show()

How do I plot a step function with Matplotlib in Python?

This should be easy but I have just started toying with matplotlib and python. I can do a line or a scatter plot but i am not sure how to do a simple step function. Any help is much appreciated.
x = 1,2,3,4
y = 0.002871972681775004, 0.00514787917410944, 0.00863476098280219, 0.012003316194034325
It seems like you want step.
E.g.
import matplotlib.pyplot as plt
x = [1,2,3,4]
y = [0.002871972681775004, 0.00514787917410944,
0.00863476098280219, 0.012003316194034325]
plt.step(x, y)
plt.show()
If you have non-uniformly spaced data points, you can use the drawstyle keyword argument for plot:
x = [1,2.5,3.5,4]
y = [0.002871972681775004, 0.00514787917410944,
0.00863476098280219, 0.012003316194034325]
plt.plot(x, y, drawstyle='steps-pre')
Also available are steps-mid and steps-post.
New in matplotlib 3.4.0
There is a new plt.stairs method to complement plt.step:
plt.stairs and the underlying StepPatch provide a cleaner interface for plotting stepwise constant functions for the common case that you know the step edges.
This supersedes many use cases of plt.step, for instance when plotting the output of np.histogram.
Check out the official matplotlib gallery for how to use plt.stairs and StepPatch.
When to use plt.step vs plt.stairs
Use the original plt.step if you have reference points. Here the steps are anchored at [1,2,3,4] and extended to the left:
plt.step(x=[1,2,3,4], y=[20,40,60,30])
Use the new plt.stairs if you have edges. The previous [1,2,3,4] step points correspond to [1,1,2,3,4] stair edges:
plt.stairs(values=[20,40,60,30], edges=[1,1,2,3,4])
Using plt.stairs with np.histogram
Since np.histogram returns edges, it works directly with plt.stairs:
data = np.random.normal(5, 3, 3000)
bins = np.linspace(0, 10, 20)
hist, edges = np.histogram(data, bins)
plt.stairs(hist, edges)
I think you want pylab.bar(x,y,width=1) or equally pyplot's bar method. if not checkout the gallery for the many styles of plots you can do. Each image comes with example code showing you how to make it using matplotlib.
Draw two lines, one at y=0, and one at y=1, cutting off at whatever x your step function is for.
e.g. if you want to step from 0 to 1 at x=2.3 and plot from x=0 to x=5:
import matplotlib.pyplot as plt
# _
# if you want the vertical line _|
plt.plot([0,2.3,2.3,5],[0,0,1,1])
#
# OR:
# _
# if you don't want the vertical line _
#plt.plot([0,2.3],[0,0],[2.3,5],[1,1])
# now change the y axis so we can actually see the line
plt.ylim(-0.1,1.1)
plt.show()
In case someone just wants to stepify some data rather than actually plot it:
def get_x_y_steps(x, y, where="post"):
if where == "post":
x_step = [x[0]] + [_x for tup in zip(x, x)[1:] for _x in tup]
y_step = [_y for tup in zip(y, y)[:-1] for _y in tup] + [y[-1]]
elif where == "pre":
x_step = [_x for tup in zip(x, x)[:-1] for _x in tup] + [x[-1]]
y_step = [y[0]] + [_y for tup in zip(y, y)[1:] for _y in tup]
return x_step, y_step

Categories