Matplotlib plot iterating plot features - python
I am writing a Python script that will cycle through every ASCII file in a directory and output an individual plot of the data contained with said file. Moreover, it finds the best curve-fitting parameters and outputs those into one combined text file.
The text output works perfectly, but the plot output does not. On the first iteration, it works fine, but after that, it iterates the plot features. For example, on the third iteration of for filename in os.listdir(directory), the draws three legend labels, three fit curves, and each data point three times. Here is a screenshot:
Here is the code I am using:
for filename in os.listdir(directory):
if filename.endswith((".lc", ".lc1")):
wf = directory + "/" + filename
try:
# get the data
time, flux = getData(wf, nheader)
# fit the data
popt_sine, err_sine = sineFit(time, flux)
popt_exp, err_exp = expFit(time, flux)
# plot/save the data
plotData(filename, time, flux, popt_sine, popt_exp)
# output the data
output = outputData(popt_sine, err_sine, popt_exp, err_exp)
print(output)
except StopIteration:
raise IOError("End of File Error")
where I have defined the function plotData as:
def plotData(filename, time, flux, popt_sine, popt_exp):
t = np.linspace(time[0], time[len(time)-1], 10000)
# Sine plot
A = popt_sine[0]
B = popt_sine[1]
C = popt_sine[2]
D = popt_sine[3]
plt.plot(t, sine(t, A, B, C, D), "r", label="Sine Fit")
# Exponential plot
E = popt_exp[0]
F = popt_exp[1]
G = popt_exp[2]
H = popt_exp[3]
#plt.plot(t, exponential(t, E, F, G, H), label='Exponential Fit')
# Raw plot
plt.plot(time, flux, "ko", label = "Data")
# Plot config.
plt.xlabel("Time (sec)")
plt.ylabel("Flux")
plt.title("Mean Normalized Flux vs. Time (" + filename + ")")
plt.legend(loc='best')
# save plot
if filename.endswith(".lc"):
plt.savefig(filename[:-3] + ".jpg")
elif filename.endswith(".lc1"):
plt.savefig(filename[:-4] + ".jpg")
else:
raise ValueError("Incorrect input file type")
#plt.show()
One interesting thing I noticed: if I do plt.show() rather than trying to save the plot, wherein I view and exit each plot separately, each plot will be correct.
Related
how to change the color of subplot datafile in matplotlib
In Short: I want to change the color of blue marker in the graph. So that I can do comparison with other plots easily. You can download the data files and script from this link Problem Explanation I have two data files, full.dat and part.dat(Note: part.dat is also there in full.dat). I got the plotting scripts from the internet, and it is working very well. But as a noob in Python and Matplotlib, I am facing difficulties in changing the color of part.dat. Please see the graph first, then the following scripts. Script-1: Function and definitions: let's say: "func.py" # This was written by Levi Lentz for the Kolpak Group at MIT import numpy as np import matplotlib.mlab as mlab import matplotlib.pyplot as plt import matplotlib.gridspec as gs import sys #This function extracts the high symmetry points from the output of bandx.out def Symmetries(fstring): f = open(fstring,'r') x = np.zeros(0) for i in f: if "high-symmetry" in i: x = np.append(x,float(i.split()[-1])) f.close() return x # This function takes in the datafile, the fermi energy, the symmetry file, a subplot, and the label # It then extracts the band data, and plots the bands, the fermi energy in red, and the high symmetry points def bndplot(datafile_full,datafile,fermi,symmetryfile,subplot,**kwargs): if 'shift_fermi' in kwargs: bool_shift_efermi = kwargs['shift_fermi'] else: bool_shift_efermi = 0 if 'color' in kwargs: color_bnd=kwargs['color'] else: color_bnd='black' if 'linestyle' in kwargs: line_bnd=kwargs['linestyle'] else: line_bnd='solid' z = np.loadtxt(datafile_full) #This loads the full.dat file x = np.unique(z[:,0]) #This is all the unique x-points [a,b,w]=np.loadtxt(datafile,unpack=True) #Weight bands = [] bndl = len(z[z[:,0]==x[1]]) #This gives the number of bands in the calculation Fermi = float(fermi) if bool_shift_efermi: fermi_shift=Fermi else: fermi_shift=0 axis = [min(x),max(x)] for i in range(0,bndl): bands.append(np.zeros([len(x),2])) #This is where we storre the bands for i in range(0,len(x)): sel = z[z[:,0] == x[i]] #Here is the energies for a given x test = [] for j in range(0,bndl): #This separates it out into a single band bands[j][i][0] = x[i] #bands[j][i][1] = np.multiply(sel[j][1],13.605698066) bands[j][i][1] = sel[j][1] #Here we plots the bands for i in bands: subplot.plot(i[:,0],i[:,1]-fermi_shift,color=color_bnd,linestyle=line_bnd, linewidth=0.7,alpha=0.5) # plt.scatter(a,b-fermi_shift,c=w,cmap='viridis',alpha=0.5) # plt.colorbar() if 'legend' in kwargs: #empty plot to generate legend subplot.plot([None],[None],color=color_bnd,linestyle=line_bnd,label=kwargs['legend']) temp = Symmetries(symmetryfile) for j in temp: #This is the high symmetry lines x1 = [j,j] subplot.axvline(x=j,linestyle='dashed',color='black',alpha=0.75) subplot.plot([min(x),max(x)],[Fermi-fermi_shift,Fermi-fermi_shift],color='red',linestyle='dotted') subplot.set_xticks(temp) subplot.set_xticklabels([]) if 'name_k_points' in kwargs: if len(kwargs['name_k_points'])==len(temp): subplot.set_xticklabels(kwargs['name_k_points']) if 'range' in kwargs: range_plot=kwargs['range'] subplot.set_ylim([range_plot[0],range_plot[1]]) subplot.set_xlim([axis[0],axis[1]]) subplot.set_xlabel('k') subplot.set_ylabel('E-E$_f$') plt.scatter(a,b-fermi_shift,s=70*np.array(w)) if 'legend' in kwargs: plt.legend() script-2 Plotting script: let's say: "plot.py" #!/usr/bin/python3 from func import * El='el' orb='orb' plt.rcParams["figure.figsize"]=(4,15) datafile_full='bands.dat.gnu' #datafile=El+'_'+orb+'.dat.all' datafile=El+'_'+orb+'.dat.all' fermi = 10.2382 symmetryfile='band.out' bool_shift_efermi= True fig, ax = plt.subplots() #bndplot(datafile,fermi,symmetryfile,ax) bndplot(datafile_full,datafile,fermi,symmetryfile,ax,shift_fermi=1,color='black',linestyle='solid',name_k_points=['K','G','M','K','H','A','L','H'], legend=El+', '+orb+'-orbital') #ax.set_ylim(-5,5) ax.set_ylim(-10,12) fig.set_figheight(6) fig.set_figwidth(4) plt.rcParams.update({'font.size': 22}) fig.savefig("el-orb.eps") plt.show() In script-2, there is an option to change the color, however I want to change the color of blue marker/solid-circles(please see the graph) so that I can compare with other graphs. Whenever I change the color, it changes the line color only. Please help me out I am trying to understand Matplotlib uses and examples from past few hrs However as a noob I was not able to figure out how to do.
Only last graph is getting pasted in pdf file in python
I am reading the parameters from different CSV files and creating the graphs after comparing the parameters across the CSVs. The problem is only last graph is getting pasted in PDF for the last parameter. with PdfPages('example.pdf') as pdf: for arg in sys.argv[1:]: file_reader= open(arg, "rt", encoding='ascii') read = csv.reader(file_reader) for row in read: if operation_OnDut in row: column_Result = row[10] resultOfOperations_OnDut_List.append(column_Result) buildNumber = row[0] buildName_List.append(buildNumber) N = len(resultOfOperations_OnDut_List) ind = np.arange(N) #Draw graph for operations performed in that TEST CASE y = resultOfOperations_OnDut_List width = .1 fig, ax = plt.subplots() plt.bar(ind, y, width, label = column_Parameters, color="blue") plt.xticks(ind, buildName_List) plt.title("Performance and Scale") plt.ylabel('Result of Operations') plt.xlabel('Execution Builds') plt.legend() plt.tight_layout() pdf.savefig() plt.close() resultOfOperations_OnDut_List = [] buildName_List = []
You probably got the indentation wrong... Try with PdfPages('example.pdf') as pdf: for arg in sys.argv[1:]: file_reader= open(arg, "rt", encoding='ascii') read = csv.reader(file_reader) for row in read: if operation_OnDut in row: column_Result = row[10] .... # one level deeper N = len(resultOfOperations_OnDut_List) ind = np.arange(N) #Draw graph for operations performed in that TEST CASE ... Note that the section starting with N = len(resultOfOperations_OnDut_List) has been shifted four spaces to the left to be within the first for loop. If you want it to be within the second for loop add four more spaces.
Why first figure in the list is not plotted, but at the end there is an empty plot?
I have a problem with matplotlib. I need to prepare a plot consisted of all plots from list in specified directory. The code below generating that, but it omits first path... For example, if I need to prepare image consisted of 14 subplots, only 13 are copied, first is omitted and instead of first, there is an empty plot at the last position. I have checked, that function reads all paths, including first at list. If you will be able to help and to give me a hint, what I`m doing wrong, I will be grateful. Best regards def create_combo_plot(path_to_dir, list_of_png_abspath): name = path_to_dir.replace('_out', '') title = name if name.find('/') != -1: title = name.split('/')[-1] list_of_png_abspath how_many_figures = len(list_) combo_figure = plt.figure(2, figsize=(100,100)) a = 4 b = int(floor(how_many_figures/4.1)) + 1 for i, l in enumerate(list_of_png_abspath): print l #I`ve checked, path is reached j = i + 1 img=mpimg.imread(l) imgplot = plt.imshow(img, interpolation="nearest") plot = plt.subplot(b, a, j) combo_figure.suptitle(title, fontsize=100) combo_figure.savefig(path_to_dir +'/' + title + '.jpeg') plt.close(combo_figure)
Replace these two lines: imgplot = plt.imshow(img, interpolation="nearest") plot = plt.subplot(b, a, j) with these: sub = plt.subplot(b, a, j) sub.imshow(img, interpolation="nearest") The line: imgplot = plt.imshow(img, interpolation="nearest") adds a new plot to the last active subplot. In your case it was created in the previous loop here: plot = plt.subplot(b, a, j) Therefore, you start with the second image and the last subplot stays empty. But if you create the subplot first: sub = plt.subplot(b, a, j) and later explicitly plot into it: sub.imshow(img, interpolation="nearest") you should see 14 plots.
IndexError: too many indices for array for an array that is definitely as big
I'm trying to make a movie by taking png images of an updating plot and stitching them together. There are three variables: degrees, ksB, and mp. Only mp changes each frame; the other two are constant. The data for mp for all times is stored in X. This is the relevant part of the code: def plot(fname, haveMLPY=False): # Load data from .npz file. data = np.load(fname) X = data["X"] T = data["T"] N = X.shape[1] A = data["vipWeights"] degrees = A.sum(1) ksB = data["ksB"] # Initialize a figure. figure = plt.figure() # Generate a plottable axis as the first subplot in 1 rows and 1 columns. axis = figure.add_subplot(1,1,1) # MP is the first (0th) variable. Plot one trajectory for each cell over time. axis.plot(T, X[:,:,0], color="black") # Decorate the plot. axis.set_xlabel("time [hours]") axis.set_ylabel("MP [nM]") axis.set_title("PER mRNA concentration across all %d cells" % N) firstInd = int(T.size / 2) if haveMLPY: import circadian.analysis # Generate a and plot Signal object, which encapsulates wavelet analysis. signal = circadian.analysis.Signal(X[firstInd:, 0, 0], T[firstInd:]) signal.showSpectrum(show=False) files=[] # filename for the name of the resulting movie filename = 'animation' mp = X[10**4-1,:,0] from mpl_toolkits.mplot3d import Axes3D for i in range(10**4): print i mp = X[i,:,0] data2 = np.c_[degrees, ksB, mp] # Find best fit surface for data2 # regular grid covering the domain of the data mn = np.min(data2, axis=0) mx = np.max(data2, axis=0) X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20)) XX = X.flatten() YY = Y.flatten() order = 2 # 1: linear, 2: quadratic if order == 1: # best-fit linear plane A = np.c_[data2[:,0], data2[:,1], np.ones(data2.shape[0])] C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2]) # coefficients # evaluate it on grid Z = C[0]*X + C[1]*Y + C[2] # or expressed using matrix/vector product #Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape) elif order == 2: # best-fit quadratic curve A = np.c_[np.ones(data2.shape[0]), data2[:,:2], np.prod(data2[:,:2], axis=1), data2[:,:2]**2] C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2]) # evaluate it on a grid Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2) ax.scatter(degrees, ksB, mp) ax.set_xlabel('degrees') ax.set_ylabel('ksB') ax.set_zlabel('mp') # form a filename fname2 = '_tmp%03d.png'%i # save the frame savefig(fname2) # append the filename to the list files.append(fname2) # call mencoder os.system("mencoder 'mf://_tmp*.png' -mf type=png:fps=10 -ovc lavc -lavcopts vcodec=wmv2 -oac copy -o " + filename + ".mpg") # cleanup for fname2 in files: os.remove(fname2) Basically, all the data is stored in X. The format X[i, i, i] means X[time, neuron, data type]. Each time through the loop, I want to update the time, but still plot mp (the 0th variable) for all the neurons. When I run this code, I get "IndexError: too many indices for array". I asked it to print i to see when the code was going wrong. I get an error when i = 1, meaning that the code loops through once but then has the error the second time. However, I have data for 10^4 time steps. You can see in the first line of the provided code, I access X[10**4-1, :, 0] successfully. That's why it's confusing to me why X[1,:,0] would be out of range. If anybody could explain why/help me get around this, that would be great. The traceback error is Traceback (most recent call last): File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 196, in module plot(fname) File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 142, in plot mp = X[i,:,0] IndexError: too many indices for array Thanks!
Your problem is that you overwrite your X inside your loop: X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20)) So afterwards it will have another shape and contain different data. I would suggest changing this second X to x_grid and check where you need this "other" X and where the original. for example: X_grid, Y_grid = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
Adding a single label to the legend for a series of different data points plotted inside a designated bin in Python using matplotlib.pyplot.plot()
I have a script for plotting astronomical data of redmapping clusters using a csv file. I could get the data points in it and want to plot them using different colors depending on their redshift values: I am binning the dataset into 3 bins (0.1-0.2, 0.2-0.25, 0.25,0.31) based on the redshift. The problem arises with my code after I distinguish to what bin the datapoint belongs: I want to have 3 labels in the legend corresponding to red, green and blue data points, but this is not happening and I don't know why. I am using plot() instead of scatter() as I also had to do the best fit from the data in the same figure. So everything needs to be in 1 figure. import numpy as np import matplotlib.pyplot as py import csv z = open("Sheet4CSV.csv","rU") data = csv.reader(z) x = [] y = [] ylow = [] yupp = [] xlow = [] xupp = [] redshift = [] for r in data: x.append(float(r[2])) y.append(float(r[5])) xlow.append(float(r[3])) xupp.append(float(r[4])) ylow.append(float(r[6])) yupp.append(float(r[7])) redshift.append(float(r[1])) from operator import sub xerr_l = map(sub,x,xlow) xerr_u = map(sub,xupp,x) yerr_l = map(sub,y,ylow) yerr_u = map(sub,yupp,y) py.xlabel("$Original\ Tx\ XCS\ pipeline\ Tx\ keV$") py.ylabel("$Iterative\ Tx\ pipeline\ keV$") py.xlim(0,12) py.ylim(0,12) py.title("Redmapper Clusters comparison of Tx pipelines") ax1 = py.subplot(111) ##Problem starts here after the previous line## for p in redshift: for i in xrange(84): p=redshift[i] if 0.1<=p<0.2: ax1.plot(x[i],y[i],color="b", marker='.', linestyle = " ")#, label = "$z < 0.2$") exit if 0.2<=p<0.25: ax1.plot(x[i],y[i],color="g", marker='.', linestyle = " ")#, label="$0.2 \leq z < 0.25$") exit if 0.25<=p<=0.3: ax1.plot(x[i],y[i],color="r", marker='.', linestyle = " ")#, label="$z \geq 0.25$") exit ##There seems nothing wrong after this point## py.errorbar(x,y,yerr=[yerr_l,yerr_u],xerr=[xerr_l,xerr_u], fmt= " ",ecolor='magenta', label="Error bars") cof = np.polyfit(x,y,1) p = np.poly1d(cof) l = np.linspace(0,12,100) py.plot(l,p(l),"black",label="Best fit") py.plot([0,15],[0,15],"black", linestyle="dotted", linewidth=2.0, label="line $y=x$") py.grid() box = ax1.get_position() ax1.set_position([box.x1,box.y1,box.width, box.height]) py.legend(loc='center left',bbox_to_anchor=(1,0.5)) py.show() In the 1st 'for' loop, I have indexed every value 'p' in the list 'redshift' so that bins can be created using 'if' statement. But if I add the labels that are hashed out against each py.plot() inside the 'if' statements, each data point 'i' that gets plotted in the figure as an intersection of (x[i],y[i]) takes the label and my entire legend attains in total 87 labels (including the 3 mentioned in the code at other places)!!!!!! I essentially need 1 label for each bin... Please tell me what needs to done after the bins are created and py.plot() commands used...Thanks in advance :-) Sorry I cannot post my image here due to low reputation! The data 'appended' for x, y and redshift lists from the csv file are as follows: x=[5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547] y=[5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677] redshift = [0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]
Working with numerical data like this, you should really consider using a numerical library, like numpy. The problem in your code arises from processing each record (a coordinate (x,y) and the corresponding value redshift) one at a time. You are calling plot for each point, thereby creating legends for each of those 84 datapoints. You should consider your "bins" as groups of data that belong to the same dataset and process them as such. You could use "logical masks" to distinguish between your "bins", as shown below. It's also not clear why you call exit after each plotting action. import numpy as np import matplotlib.pyplot as plt x = np.array([5.031,10.599,10.589,8.548,9.089,8.675,3.588,1.244,3.023,8.632,8.953,7.603,7.513,2.917,7.344,7.106,3.889,7.287,3.367,6.839,2.801,2.316,1.328,6.31,6.19,6.329,6.025,5.629,6.123,5.892,5.438,4.398,4.542,4.624,4.501,4.504,5.033,5.068,4.197,2.854,4.784,2.158,4.054,3.124,3.961,4.42,3.853,3.658,1.858,4.537,2.072,3.573,3.041,5.837,3.652,3.209,2.742,2.732,1.312,3.635,2.69,3.32,2.488,2.996,2.269,1.701,3.935,2.015,0.798,2.212,1.672,1.925,3.21,1.979,1.794,2.624,2.027,3.66,1.073,1.007,1.57,0.854,0.619,0.547]) y = np.array([5.255,10.897,11.045,9.125,9.387,17.719,4.025,1.389,4.152,8.703,9.051,8.02,7.774,3.139,7.543,7.224,4.155,7.416,3.905,6.868,2.909,2.658,1.651,6.454,6.252,6.541,6.152,5.647,6.285,6.079,5.489,4.541,4.634,8.851,4.554,4.555,5.559,5.144,5.311,5.839,5.364,3.18,4.352,3.379,4.059,4.575,3.914,5.736,2.304,4.68,3.187,3.756,3.419,9.118,4.595,3.346,3.603,6.313,1.816,4.34,2.732,4.978,2.719,3.761,2.623,2.1,4.956,2.316,4.231,2.831,1.954,2.248,6.573,2.276,2.627,3.85,3.545,25.405,3.996,1.347,1.679,1.435,0.759,0.677]) redshift = np.array([0.12,0.25,0.23,0.23,0.27,0.26,0.12,0.27,0.17,0.18,0.17,0.3,0.23,0.1,0.23,0.29,0.29,0.12,0.13,0.26,0.11,0.24,0.13,0.21,0.17,0.2,0.3,0.29,0.23,0.27,0.25,0.21,0.11,0.15,0.1,0.26,0.23,0.12,0.23,0.26,0.2,0.17,0.22,0.26,0.25,0.12,0.19,0.24,0.18,0.15,0.27,0.14,0.14,0.29,0.29,0.26,0.15,0.29,0.24,0.24,0.23,0.26,0.29,0.22,0.13,0.18,0.24,0.14,0.24,0.24,0.17,0.26,0.29,0.11,0.14,0.26,0.28,0.26,0.28,0.27,0.23,0.26,0.23,0.19]) bin3 = 0.25 <= redshift bin2 = np.logical_and(0.2 <= redshift, redshift < 0.25) bin1 = np.logical_and(0.1 <= redshift, redshift < 0.2) plt.ion() labels = ("$z < 0.2$", "$0.2 \leq z < 0.25$", "$z \geq 0.25$") colors = ('r', 'g', 'b') for bin, label, co in zip( (bin1, bin2, bin3), labels, colors): plt.plot(x[bin], y[bin], color=co, ls='none', marker='o', label=label) plt.legend() plt.show()