I have a csv file which has two sets of data. Basically:
for row in reader:
###I have some other code but here's the stuff that applies to the question###
disorder_long = sequence_analysis(looped_region.upper(), mode = 'long')
disorder_short = sequence_analysis(looped_region.upper(), mode = 'short')
length = len(list(disorder_short))
#print length
xmin = 1
xmax_long = length
ymin = 0
ymax_long = max(disorder_long)
ymax_short = max(disorder_short)
y_limit = max([ymax_long, ymax_short])
#print y_limit
while True:
try:
newfig = str(raw_input('Name the graph to be created: '))
break #break out of loop
except ValueError:
print("error")
continue #return to start of loop
plt.figure
#data
x_series = np.array(range(1,length+1))
# print "x series: "
# print x_series
# print len(x_series)
y_series1 = np.array(disorder_long)
y_series2 = np.array(disorder_short)
# print y_series1, y_series2
#plot data
plt.plot(x_series, y_series1, label=uniprot_id+' long')
plt.plot(x_series, y_series2, label=uniprot_id+' short')
#add limits to the x and y axis
plt.xlim(xmin, xmax_long)
plt.ylim(ymin, 1)
#create legend
plt.legend(loc="upper left")
#save figure to png
plt.savefig(newfig)`
Returns me two graphs, one of which is perfectly fine (its the first set of data) but the other has two extra lines plotted and I have no idea where they came from. Second graph has two extra lines, the top two are extraneous.
Just at a cursory glance, the second graph, top two lines, appear to be the same as the lines in the first. I'm not the best with matplotlib but I do see that the line
plt.figure
doesn't specify it needs to make a new one. You should use plt.figure(1) and plt.figure(2) to specify they are different figures
plt.clf()
works to create new plots without appending to the existing one.
Related
I have a for loop running through my data to find dips in my data set. This gives me five different graphs. I need to superimpose these five graphs onto each other with different colors. Yes, it will ve very messy.
I've done this before but not while it's in a four loop so I'm not sure how to go about it. Here is my data
# data
data = np.loadtxt('student_021.txt') # loading light curve data file
time_x = data[:,0] # taking the first column of data
lum_y = data[:,1] # second column
mean = lum_y.mean() # mean value of the light curve
std = lum_y.std() # standard deviation of the light curve
light_dip = [] # initalize empty array for areas where the light curve dips
end = None # cut off the values here where the data goes to normal
# for loop to go through the data and find where the light dips are
for i, x in enumerate(lum_y): #enumerate to assign positional values so I can identify and sepparate them
if x < mean - (std *4): # if the iterator is less than the mean - an arbitrarly chosen 4stds
if not light_dip: # is it an outlier or not?
light_dip.append(i) # if it is, let me know and append the data
end = i
else:
if i > end + 250: # find the end of the light dip
end = light_dip.append(i)
end = i
print(light_dip)
# plotting the primary chart
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Time(s)')
ax.set_ylabel('Brightnes')
ax.plot(time_x,lum_y)
plt.title('Original Graph')
for dip in light_dip: # sort through and print out the five different light dips
i = max(dip - 50, 0) # left limit
j = dip + 150 # right limit
factor_x = time_x[i:j]
factor_y = lum_y[i:j]
# plotting
fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Brightnes')
#ax.plot(time_x,lum_y)
ax.plot(factor_x, factor_y)
So, from this graph below, I find discrete points and their indexes. When we draw a final figure, the figure should have the information of when is the exact time that discontinuity happened and vertical line at the discontinuity should be added. In order to do that, I had to add "plt.axvline" and "plt.text" together as a pair.
I wrote out the code like this here:
However, whenever new index comes, it's very inconvenient to add two lines of code manually, and what if 100 discontinuities manifested than it's almost nearly impossible to add them.
I am thinking of object oriented method for this like "class" but I have no idea where to start..
Any idea of how to recursively adding it with knowing length of indexes?
if len(idx) == 1 :
plt.figure(figsize = (20,10))
plt.axvline(x=idx[0], color = 'r')
plt.text(idx[0],.13, '{}'.format(send_time[idx[0]-1]))
plt.title(var)
plt.scatter(x, result, alpha = alp)
plt.savefig("XXX.png")
plt.close()
elif len(idx) == 2:
plt.figure(figsize = (20,10))
plt.axvline(x=idx[0], color = 'r')
plt.text(idx[0],.10, '{}\n{}'.format(send_time[idx[0]], time1[idx[0]]))
plt.axvline(x=idx[1], color = 'b')
plt.text(idx[1],.10, '{}'.format(send_time[idx[1]-1]))
plt.title(var)
plt.scatter(x, result, alpha = alp)
plt.savefig("XXX.png")
plt.close()
elif len(idx) == 3:
plt.figure(figsize = (20,10))
plt.axvline(x=idx[0], color = 'r')
plt.text(idx[0],.13, '{}\n{}'.format(send_time[idx[0]],time1[idx[0]]))
plt.axvline(x=idx[1], color = 'b')
plt.text(idx[1],.12, '{}\n{}'.format(send_time[idx[1]],time1[idx[1]]))
plt.axvline(x=idx[2], color = 'y')
plt.text(idx[2],.11, '{}'.format(send_time[idx[2]-1]))
plt.title(var)
plt.scatter(x, result, alpha = alp)
plt.savefig("XXX.png")
plt.close()
In general, it's better to loop over a collection directly for item in collection rather than using indices.
plt.figure(figsize = (20,10))
for num in idx:
plt.axvline(num, color = 'r')
plt.text(num, .13, '{}'.format(send_time[num-1]))
plt.title(var)
plt.scatter(x, result, alpha = alp)
plt.savefig("XXX.png")
plt.close()
So lets say I have a dictionary as follows:
dictionary = {'a': [1,2,3], 'b':[4,2,5], 'c':[5,9,1]}
So the way I would do a single plot of all 'a','b','c' lines would be (assuming figure has already been declared, etc.):
#half-setup for animation
lines = []
mass = list(dictionary.keys()) #I know this is redundant but my 'mass' variable serves another purpose in my actual program
for i in range(len(mass)): #create a list of line objects with zero entries
a, = ax.plot([], [])
lines.append(a)
#single plot
for i in dictionary:
index = np.array(locations[i]) #convert to numpy
ax.plot(index[:,0],index[:,1],index[:,2])
plt.show()
So how can I turn this into an animated 3D graph? I have already tried plt.ion() and plt.pause() but the animation is painfully slow.
Here is the following general implementation that I used and it works pretty well (involves dictionaries):
import matplotlib.animation as anim
#create regular 3D figure 'fig'
lines = []
for i in range(3): #create however many lines you want
a, = ax.plot([],[],[]) #create lines with no data
lines.append(a)
bodies = {i:[data] for i in lines} #where [data] is your x,y,z dataset that you have before hand and 'i' is your matplotlib 'line' object
def update(num):
for i in bodies: #update positions of each line
index = np.array(bodies[i])
i.set_data(index[:,0][:num],index[:,1][:num])
i.set_3d_properties(index[:,2][:num])
if __name__=='__main__':
totalSteps = 1000 #can change
ani = anim.FuncAnimation(fig, update, totalSteps, interval = 1)
I'm trying to plot data from 2 seperate MultiIndex, with the same data as levels in each.
Currently, this is generating two seperate plots and I'm unable to customise the legend by appending some string to individualise each line on the graph. Any help would be appreciated!
Here is the method so far:
def plot_lead_trail_res(df_ante, df_post, symbols=[]):
if len(symbols) < 1:
print "Try again with a symbol list. (Time constraints)"
else:
df_ante = df_ante.loc[symbols]
df_post = df_post.loc[symbols]
ante_leg = [str(x)+'_ex-ante' for x in df_ante.index.levels[0]]
post_leg = [str(x)+'_ex-post' for x in df_post.index.levels[0]]
print "ante_leg", ante_leg
ax = df_ante.unstack(0).plot(x='SHIFT', y='MUTUAL_INFORMATION', legend=ante_leg)
ax = df_post.unstack(0).plot(x='SHIFT', y='MUTUAL_INFORMATION', legend=post_leg)
ax.set_xlabel('Time-shift of sentiment data (days) with financial data')
ax.set_ylabel('Mutual Information')
Using this function call:
sentisignal.plot_lead_trail_res(data_nasdaq_top_100_preprocessed_mi_res, data_nasdaq_top_100_preprocessed_mi_res_validate, ['AAL', 'AAPL'])
I obtain the following figure:
Current plots
Ideally, both sets of lines would be on the same graph with the same axes!
Update 2 [Concatenation Solution]
I've solved the issues of plotting from multiple frames using concatenation, however the legend does not match the line colors on the graph.
There are not specific calls to legend and the label parameter in plot() has not been used.
Code:
df_ante = data_nasdaq_top_100_preprocessed_mi_res
df_post = data_nasdaq_top_100_preprocessed_mi_res_validate
symbols = ['AAL', 'AAPL']
df_ante = df_ante.loc[symbols]
df_post = df_post.loc[symbols]
df_ante.index.set_levels([[str(x)+'_ex-ante' for x in df_ante.index.levels[0]],df_ante.index.levels[1]], inplace=True)
df_post.index.set_levels([[str(x)+'_ex-post' for x in df_post.index.levels[0]],df_post.index.levels[1]], inplace=True)
df_merge = pd.concat([df_ante, df_post])
df_merge['SHIFT'] = abs(df_merge['SHIFT'])
df_merge.unstack(0).plot(x='SHIFT', y='MUTUAL_INFORMATION')
Image:
MultiIndex Plot Image
I think, with
ax = df_ante.unstack(0).plot(x='SHIFT', y='MUTUAL_INFORMATION', legend=ante_leg)
you put the output of the plot() in ax, including the lines, which then get overwritten by the second function call. Am I right, that the lines which were plotted first are missing?
The official procedure would be rather something like
fig = plt.figure(figsize=(5, 5)) # size in inch
ax = fig.add_subplot(111) # if you want only one axes
now you have an axes object in ax, and can take this as input for the next plots.
I'm trying to make a program that will create a descending series of plots based on given files containing an n*2 matrix of numerical values (they more or less share an x-axis, and they're close enough on the y-axis that they need to be manipulated to avoid overlap).
Right now, the way it works is to read in the files one at a time with fileinput, add a constant to the values in column two (arbitrary so long as the constant splits each plot; I do it by multiplying the number of files by two, and decrementing by two each plot so they get split), then add the manipulated values to two master lists (for x and y), which are plotted at the end by matplotlib.
I have it doing very close to exactly what I want, but it has some odd lines connecting the end of one file to the beginning of the next, and I'd like to know how to remove them.
Here's the relevant part of the code:
mpl.suptitle(spectitle, fontsize=16)
mpl.xlabel('wavelength (A)', fontsize=14)
mpl.ylabel('flux (erg s^-1 cm^-2)', fontsize=14)
with open(filelist) as infile:
allfiles = [line.rstrip('\n') for line in open(filelist)]
multiplier = len(allfiles)
multiplier *= 2
for line in fileinput.input(allfiles):
filename = fileinput.filename()
waveN, fluxN = np.loadtxt(filename, usecols=[0,1], unpack=True)
fluxCalc = np.array(fluxN)
fluxCalc += multiplier
multiplier -= 2 #decrease multiplier, causing next output specturm to be placed below the one just calculated
wavelenAll.extend(waveN)
fluxCalc.tolist()
fluxAll.extend(fluxCalc)
fileinput.nextfile()
mpl.plot(wavelenAll, fluxAll)
mpl.savefig('allspec.png')
mpl.show()
I can add an image of the output in a few hours. Thanks for any help in advance.
Try something like:
import matplotlib.pyplot as plt
import numpy as np
filelist = []
spectitle = 'spectrum'
with open(filelist) as infile:
allfiles = [line.rstrip('\n') for line in open(filelist)]
all_flux, all_wavelen = [], []
# just get the data from the file and accumulate in a list
# which assumes you want these lists for something else
for fname in allfiles:
waveN, fluxN = np.loadtxt(fname, usecols=[0, 1], unpack=True)
all_flux.append(fluxN)
all_wavelen.append(waveN)
fig, ax = plt.subplots()
fig.suptitle(spectitle, fontsize=16)
ax.set_xlabel('wavelength (A)', fontsize=14)
ax.set_ylabel('flux (erg s^-1 cm^-2)', fontsize=14)
# loop over the data and plot
for wv, flux, shift in zip(all_wavelen, all_flux,
range(1, len(allfiles) + 1)[::-1]):
# do the shift as late as possible so you do not accidentally reuse
# cosmetically shifted data for computing something
ax.plot(wv, flux + shift, color='b')
fig.savefig('allspec.png')
plt.show()
It must be in the data or caused by its wrong postprocessing. It is hard to say more unless we see the data. Try to plot it without first of last elements, i.e. like mpl.plot(wavelenAll[1:-1], fluxAll[1:-1])