Number of legend entries equals the size of data set - python

I'm plotting many sets of data in a for loop. The number of sets and size of sets don't have any problems plotting. When I try to add a legend, things get interesting. I get a legend, but I only get the first label to show up hundreds of times! I have one data set with 887 points, I get 887 legend entries.Here is the plot I get
You can access the .py and .xlsx files here:
https://drive.google.com/drive/folders/1QCVw2yqIHexNCvgz4QQfJQDGYql1hGW8?usp=sharing
Here is the code that is generating the plot.
# Temperature Data plotting
=================================================
#initialize figure
plt.figure(figsize=(11,8))
Color = 'C'
Marks = '*','o','+','x','s','d','.'
nm = len(Marks)
q = 0 # Marks counter
c = 0 # color counter
for k in range(0,nt):
style = 'C' + str(c) + Marks[q]
test = 'T' + str(k)
plt.plot([t+t_adjust[k]],[Temps[:,k]],style,label=test)
#, label = 'test'
c += 1
if(c==6):
c = 9
if(c==10):
c = 0
q += 1
if(k > nt-10):
q = nm - 1
# Formatting Figure
#names = '1','2','3','4','5'
#name1 = '1'
#pylab.legend([name1])
#from collections import OrderedDict
#import matplotlib.pyplot as plt
#handles, labels = plt.gca().get_legend_handles_labels()
#by_label = OrderedDict(zip(labels, handles))
#plt.legend(by_label.values(), by_label.keys())
plt.legend(loc = 'upper right')
plt.show()
# x axis limits, in seconds
plt.xlim(0,60)
plt.xlabel('t (s)')
plt.ylabel('T (deg C)')
FigTitle = (oper_name + '; ' + str(pres_val) + pres_unit + '; d=' +
str(diam_val) + diam_unit + '; H=' + str(dist_val) + dist_unit)
plt.title(FigTitle)
# End Temperature Data Plotting
==============================================
I have 14 sets of data, with 887 points each. There is clearly more than 14 legend entries. Not sure why its somehow referencing the length of data or something. I found this (code below) to find the handles and labels, but I need them to be assigned the style name for each data set instead of the first style name for the length of data.
#from collections import OrderedDict
#import matplotlib.pyplot as plt
#handles, labels = plt.gca().get_legend_handles_labels()
#by_label = OrderedDict(zip(labels, handles))
#plt.legend(by_label.values(), by_label.keys())

Hard to say without having a look at the data, but you can always control what goes into the legend manually like so:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0., 2*np.pi, 101, endpoint=True)
lns = []
for i in range(1, 10):
for j in range(10):
ln = plt.plot(x, j*np.sin(x/i), label="series i={:d}".format(i))
lns += ln # note plt.plot returns a list of entities
plt.legend(lns, [l.get_label() for l in lns], loc="best")
plt.show()

Related

Making parts of a line graph a different colour depending on their y value in Matplotlib

I'm making a program which takes a random list of data and will plot it.
I want the colour of the graph to change if it goes above a certain value.
https://matplotlib.org/gallery/lines_bars_and_markers/multicolored_line.html
Matplotlib has an entry on doing just this but it seems to require using a function as input for the graph not using lists.
Does anyone know how to either convert this to work for lists or another way of doing so?
Here's my code so far (without my horrific failed attempts to colour code them)
from matplotlib import pyplot as plt
import random
import sys
import numpy as np
#setting the max and min values where I want the colour to change
A_min = 2
B_max = 28
#makes lists for later
A_min_lin = []
B_max_lin = []
#simulating a corruption of the data where it returns all zeros
sim_crpt = random.randint(0,10)
print(sim_crpt)
randomy = []
if sim_crpt == 0:
randomy = []
#making the empty lists for corrupted data
for i in range(0,20):
randomy.append(0)
print(randomy)
else:
#making a random set of values for the y axis
for i in range(0,20):
n = random.randint(0,30)
randomy.append(n)
print(randomy)
#making an x axis for time
time = t = np.arange(0, 20, 1)
#Making a list to plot a straight line showing where the maximum and minimum values
for i in range(0, len(time)):
A_min_lin.append(A_min)
B_max_lin.append(B_max)
#Testing to see if more than 5 y values are zero to return if it's corrupted
tracker = 0
for i in (randomy):
if i == 0:
tracker += 1
if tracker > 5:
sys.exit("Error, no data")
#ploting and showing the different graphs
plt.plot(time,randomy)
plt.plot(time,A_min_lin)
plt.plot(time,B_max_lin)
plt.legend(['Data', 'Minimum for linear', "Maximum for linear"])
plt.show
You can use np.interp to generate the fine-grain data to plot:
# fine grain time
new_time = np.linspace(time.min(), time.max(), 1000)
# interpolate the y values
new_randomy = np.interp(new_time, time, randomy)
# this is copied from the link with few modification
points = np.array([new_time, new_randomy]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
fig, axs = plt.subplots()
norm = plt.Normalize(new_randomy.min(), new_randomy.max())
lc = LineCollection(segments, cmap='viridis', norm=norm)
# Set the values used for colormapping
lc.set_array(new_randomy[1:])
lc.set_linewidth(2)
line = axs.add_collection(lc)
fig.colorbar(line, ax=axs)
# set the limits
axs.set_xlim(new_time.min(), new_time.max())
axs.set_ylim(new_randomy.min(), new_randomy.max())
plt.show()
Output:

ternary plots as subplot

I want to draw multiple ternary graphs and thought to do this using matplotlib's subplot.
I'm just getting empty 'regular' plots though, not the ternary graphs I want in there. I found the usage of
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax)
so this seems to be possible, but can't really find out how to get this working. Any ideas?
Code I'm using:
I'm using a for loop as the data has columns named tria1-a, tria2-a, etc for the different triads
import ternary
import matplotlib.pyplot as plt
import pandas as pd
#configure file to import.
filename = 'somecsv.csv'
filelocation = 'location'
dfTriad = pd.read_csv(filelocation+filename)
# plot the data
scale = 33
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax, scale=scale)
figure.set_size_inches(10, 10)
tax.set_title("Scatter Plot", fontsize=20)
tax.boundary(linewidth=2.0)
tax.gridlines(multiple=1, color="blue")
tax.legend()
tax.ticks(axis='lbr', linewidth=1, multiple=5)
tax.clear_matplotlib_ticks()
#extract the xyz columns for the triads from the full dataset
for i in range(1,6) :
key_x = 'tria'+ str(i) + '-a'
key_y = 'tria' + str(i) + '-b'
key_z = 'tria' + str(i) + '-c'
#construct dataframe from the extracted xyz columns
dfTriad_data = pd.DataFrame(dfTriad[key_x], columns=['X'])
dfTriad_data['Y'] = dfTriad[key_y]
dfTriad_data['Z'] = dfTriad[key_z]
#create list of tuples from the constructed dataframe
triad_data = [tuple(x) for x in dfTriad_data.to_records(index=False)]
plt.subplot(2, 3, i)
tax.scatter(triad_data, marker='D', color='green', label="")
tax.show()
I had the same problem and could solve it by first "going" into the subplot, then creating the ternary figure in there by giving plt.gca() as keyword argument ax:
plt.subplot(2,2,4, frameon = False)
scale = 10
plt.gca().get_xaxis().set_visible(False)
plt.gca().get_yaxis().set_visible(False)
figure, tax = ternary.figure(ax = plt.gca(), scale = scale)
#now you can use ternary normally:
tax.line(scale * np.array((0.5,0.5,0.0)), scale*np.array((0.0, 0.5, 0.5)))
tax.boundary(linewidth=1.0)
#...

Linear Regression: Extending line past data and adding a legend

I have a code:
import math
import numpy as np
import pylab as plt1
from matplotlib import pyplot as plt
uH2 = 1.90866638
uHe = 3.60187307
eH2 = 213.38
eHe = 31.96
R = float(uH2*eH2)/(uHe*eHe)
C_Values = []
Delta = []
kHeST = []
J_f21 = []
data = np.genfromtxt("Lamda_HeHCL.txt", unpack=True);
J_i1=data[1];
J_f1=data[2];
kHe=data[7]
data = np.genfromtxt("Basecol_Basic_New_1.txt", unpack=True);
J_i2=data[0];
J_f2=data[1];
kH2=data[5]
print kHe
print kH2
kHe = map(float, kHe)
kH2 = map(float, kH2)
kHe = np.array(kHe)
kH2= np.array(kH2)
g = len(kH2)
for n in range(0,g):
if J_f2[n] == 1:
Jf21 = J_f2[n]
J_f21.append(Jf21)
ratio = kHe[n]/kH2[n]
C = (((math.log(float(kH2[n]),10)))-(math.log(float(kHe[n]),10)))/math.log(R,10)
C_Values.append(C)
St = abs(J_f1[n] - J_i1[n])
Delta.append(St)
print C_Values
print Delta
print J_f21
fig, ax = plt.subplots()
ax.scatter(Delta,C_Values)
for i, txt in enumerate(J_f21):
ax.annotate(txt, (Delta[i],C_Values[i]))
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
plt.plot(Delta, C_Values)
fit = np.polyfit(Delta,C_Values,1)
fit_fn = np.poly1d(fit)
# fit_fn is now a function which takes in x and returns an estimate for y
plt.scatter(Delta,C_Values, Delta, fit_fn(Delta))
plt.xlim(0, 12)
plt.ylim(-3, 3)
In this code, I am trying to plot a linear regression that extends past the data and touches the x-axis. I am also trying to add a legend to the plot that shows the slope of the plot. Using the code, I was able to plot this graph.
Here is some trash data I have been using to try and extend the line and add a legend to my code.
x =[5,7,9,15,20]
y =[10,9,8,7,6]
I would also like it to be a scatter except for the linear regression line.
Given that you don't provide the data you're loading from files I was unable to test this, but off the top of my head:
To extend the line past the plot, you could turn this line
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
Into something like
x = np.linspace(0, 12, 50) # both 0 and 12 are from visually inspecting the plot
plt.plot(x, np.poly1d(np.polyfit(Delta, C_Values, 1))(x))
But if you want the line extended to the x-axis,
polynomial = np.polyfit(Delta, C_Values, 1)
x = np.linspace(0, *np.roots(polynomial))
plt.plot(x, np.poly1d(polynomial)(x))
As for the scatter plot thing, it seems to me you could just remove this line:
plt.plot(Delta, C_Values)
Oh right, as for the legend, add a label to the plots you make, like this:
plt.plot(x, np.poly1d(polynomial)(x), label='Linear regression')
and add a call to plt.legend() just before plt.show().

matplotlib twinx xticks at specific locations

I have the code below, in the x-axis, i want to show only the parameter values for which i have the metric values which are 5,10,20 and 50.
I want the parameter values to span the x-axis.
How I can do it ?.
import matplotlib.pyplot as plt;
import numpy as np;
from matplotlib import rc;
fig, ax1 = plt.subplots();
rc('mathtext', default='regular');
x = np.array([5,10,20,50]);
cg1 = np.array([0.1,0.3,0.5,0.8]);
cg2 = np.array([0.2,0.2,0.4,0.7]);
cg3 = np.array([0.3,0.4,0.6,0.6]);
lns1 = ax1.plot(x,cg1,'b*:',label='1 CG');
lns2 = ax1.plot(x,cg2,'bo--',label='2 CG');
lns3 = ax1.plot(x,cg3,'bs-',label='3 CG');
ax1.set_ylabel('CG',color='b');
ax1.set_ylim([0,1]);
ax1.set_xlim([4,55]);
ax1.set_xticklabels([5,10,20,50]);
ax1.set_xlabel('K');
ax2 = ax1.twinx();
ld1 = np.array([0.8,0.5,0.2,0.2]);
ld2 = np.array([0.6,0.2,0.3,0.2]);
ld3 = np.array([0.2,0.4,0.6,0.2]);
lns4 = ax2.plot(x,ld1,'k*:',label='1 ld');
lns5 = ax2.plot(x,ld2,'ko--',label='2 ld');
lns6 = ax2.plot(x,ld3,'ks-',label='3 ld');
lns = lns1 + lns2 + lns3 + lns4 + lns5 + lns6;
labs = [l.get_label() for l in lns];
ax1.legend(lns, labs, loc='best', ncol=2);
ax2.set_ylabel('LD',color='k');
ax2.set_ylim([0,1]);
ax2.set_xlim([4,55]);
plt.show();
Try replacing line with ax1.set_xlim([4,55]) with this line:
ax1.set_xticks(x)
You may also want to remove ax2.set_xlim(...).
Does it give you what you expected?
UPDATE Following comments:
Please use these lines (NOTE: the order matters!):
ax1.set_xlim([4,55]);
ax1.set_xticks(x)
...
ax2.set_xlim([4,55]);
ax2.set_xticks(x)
And remove anything else that touches xticks, like any of these:
ax1.set_xticklabels([5,10,20,50]);
ax2.set_xticklabels([5,10,20,50]);
This should produce a chart like this:
Which has limits at [4, 55] and only selected tick values visible.

Changing the colour of a boxplot when its paired

I want to change the colour of the boxplots according to what they represent, this are grouped in pairs, so my question is:
How can i change the colour of the boxplots when they are paired?
Considering that the first boxplot of each pair should be blue and the second one red.
This is the code, sorry if it's messy:
def obtenerBoxplotsAnuales(self, directorioEntrada, directorioSalida):
meses = ["Enero","Febrero","Marzo","Abril","Mayo","Junio", "Julio", "Agosto","Septie.","Octubre","Noviem.","Diciem."]
ciudades = ["CO","CR"]
anios = ["2011", "2012", "2013"]
boxPlotMensual = []
fig = plt.figure()
fig.set_size_inches(14.3, 9)
ax = plt.axes()
plt.hold(True)
for anio in anios:
boxPlotAnual = []
i=0
ticks = []
for mes in range(len(meses)):
data1 = getSomeData()
data2 = getSomeData()
data = [ [int(float(data1[2])), int(float(data1[0])), int(float(data1[1]))],
[int(float(data2[2])), int(float(data2[0])), int(float(data2[1]))] ]
plt.boxplot(data, positions=[i,i+1], widths=0.5)
ticks.append(i+0.5)
i=i+2
hB, = plt.plot([1,1],'b-')
hR, = plt.plot([1,1],'r-')
plt.legend((hB, hR),('Caleta', 'Comodoro'))
hB.set_visible(False)
hR.set_visible(False)
ax.set_xticklabels(meses)
ax.set_xticks(ticks)
plt.savefig(directorioSalida+"/asdasd"+str(anio)+".ps", orientation='landscape', papertype='A4' )
This is what i get:
I've read that the solution is related with the fact that plt.boxplot(...) returns a kind of dict object that contains a list of the lines created so the way to modify the colour of each boxplot would be access to the indexes? How for this case?
You can set the colour of the return dict from boxplot as follows,
import matplotlib.pyplot as plt
import numpy as np
nboxes = 10
# fake up some data
spread= np.random.rand(50,nboxes) * 100
center = np.ones((25,nboxes)) * 50
flier_high = np.random.rand(10,nboxes) * 100 + 100
flier_low = np.random.rand(10,nboxes) * -100
data =np.concatenate((spread, center, flier_high, flier_low), 0)
# plot figure
plt.figure()
bp = plt.boxplot(data)
for i, box in enumerate(bp['boxes']):
#Colour alternate boxes blue and red
if i%2:
box.set_color('blue')
else:
box.set_color('red')
plt.show()
Where you loop through all boxes in bp['boxes'] and use the method set_color (you can also box.set_markerfacecolor and other standard matplotlib artist attributes). The bp dict also contains ['boxes', 'fliers', 'medians', 'means', 'whiskers', 'caps'] which can also be changed as required.

Categories