Legend in matplotlib - python

I'm wondering how can I do the following:
I have a DataFrame with points and classes. I'd like to draw all points and use one color for each class. How can I specify how classes refer to colors in the legend?
fig = plt.figure(figsize=(18,10), dpi=1600)
df = pd.DataFrame(dict(points1 = data_plot[:,0], points2 = data_plot[:,1], \
target = target[0:2000]))
colors = {1: 'green', 2:'red', 3:'blue', 4:'yellow', 5:'orange', 6:'pink', \
7:'brown', 8:'black', 9:'white'}
fig, ax = plt.subplots()
ax.scatter(df['points1'], df['points2'], c = df['target'].apply(lambda x: colors[x]))

The easiest way to get your legend to have separate entries for each color (and therefore it's target value) is to create a separate plot object for each target value.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
x = np.random.rand(100)
y = np.random.rand(100)
target = np.random.randint(1,9, size=100)
df = pd.DataFrame(dict(points1=x, points2=y, target=target))
colors = {1: 'green', 2:'red', 3:'blue', 4:'yellow', 5:'orange', 6:'pink', \
7:'brown', 8:'black', 9:'white'}
fig, ax = plt.subplots()
for k,v in colors.items():
series = df[df['target'] == k]
scat = ax.scatter(series['points1'], series['points2'], c=v, label=k)
plt.legend()

Related

Custom colormap boundaries for segmented colormap

I have the following picture with data available for several vectors with some quantified feature:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as colors
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot()
category = [0,0,0,0.1,0.4,0.9,1.5]
r = np.random.uniform(size=[len(category)*100]).reshape(len(category),100)
norm = matplotlib.colors.Normalize(vmin=min(category), vmax=max(category))
bounds = np.array([0, 0.3, 0.5, 1.5])
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=3)
cmap = matplotlib.cm.ScalarMappable(norm=norm, cmap=colors.ListedColormap(['green', 'blue', 'red']))
cmap.set_array([])
for no, cat in enumerate(category):
ax.plot(r[no][r[no]>0.1],no*np.ones(100)[r[no]>0.1],'o',color=cmap.to_rgba(category[no]))
cbar = fig.colorbar(cmap, ax=ax, pad=0.01)
I am wondering is there any way to move colormap feature boundaries to correspond to the boundaries between vectors in the picture? (as denoted by black arrows)
I thought that spacing='proportional' will help me, however, it depends on the feature, and I want to make it dependent on the number of vectors having some range of features.
Additionally, is it possible to use these custom boundaries for gradient (not segmented) colormap?
I found the way to do what I asked for.
The idea is to create a new variable that will be dependent on the number of vectors in each group and use this variable with spacing='proportional'. Here is the MWE:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as colors
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot()
category = np.array([0,0,0,0.1,0.4,0.9,1.5])
r = np.random.uniform(size=[len(category)*100]).reshape(len(category),100)
norm = matplotlib.colors.Normalize(vmin=min(category), vmax=max(category))
lev1 = 0.3
lev2 = 0.5
gr0 = (category<=lev1).sum()
gr2 = (category>lev2).sum()
gr1 = len(category) - gr0 - gr2
frac = np.array([gr0,gr1,gr2])/len(category)
bounds = np.array([0, frac[0], frac[1]+frac[0], 1])
gr_color = np.where(category<lev1,0,np.where(category>=lev2,1,frac[0]+frac[1]/2))
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=3)
cmap = matplotlib.cm.ScalarMappable(norm=norm, cmap=colors.ListedColormap(['green', 'blue', 'red']))
cmap.set_array([])
for no, cat in enumerate(category):
ax.plot(r[no][r[no]>0.1],no*np.ones(100)[r[no]>0.1],'o',color=cmap.to_rgba(gr_color[no]))
cbar = fig.colorbar(cmap, ax=ax, pad=0.01,spacing='proportional')
dic = {bounds[0] : 0 ,bounds[1] : lev1, bounds[2] : lev2,bounds[3] : "1.5"}
labels = [bounds[i] if t not in dic.keys() else dic[t] for i,t in enumerate(bounds)]
cbar.ax.set_yticklabels(labels)

boxplot show max and min fliers results in TypeError: 'AxesSubplot' object is not subscriptable

I am preparing box plots with a whisker interval of [2,98]. The issue is that I am working with air quality data and have a large range of data points, so the outliers take up the entire figure and overshadow the boxplots. I would like to plot the max and min outliers only and have tried the method from Matplotlib boxplot show only max and min fliers, however, I get an error message that says TypeError: 'AxesSubplot' object is not subscriptable.
Here is my code:
fig,ax = plt.subplots(1, figsize=(8,6))
g = sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98))
fliers = g['fliers']
for fly in fliers:
fdata=fly.get_data
fly.set_data([fdata[0][0],fdata[0][-1],fdata[1][0],fdata[1][-1]])
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
Here's some sample data:
mda8 = pd.DataFrame({
'T1':[35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2':[28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3':[34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
I need help with plotting the max and min outliers only and am open to doing another method besides the one that I tried here.
EDIT here's the link to my csv file https://drive.google.com/file/d/1E3A0UAYCbSN53JXtfsbrA4i_Phci_JWf/view?usp=sharing
A possible approach could be:
hide the outliers plotted by seaborn.boxplot by passing showfliers = False parameter:
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
get the list of outliers for each column, find maximum and minimum and plot only them:
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.cbook import boxplot_stats
mda8 = pd.DataFrame({'T1': [35.000000, 32.125000, 32.000000, 35.250000, 28.875000, 28.500000, 29.375000, 25.125000, 34.166667, 35.250000],
'T2': [28.375, 30.750, 33.250, 34.000, 32.875, 30.250, 29.875, 100.409, 29.625, 1.232],
'T3': [34.250, 102.232, 28.250, 33.000, 27.625, 21.500, 28.375, 30.250, 3.454, 33.750]})
fig,ax = plt.subplots(1, figsize=(8,6))
sns.boxplot(data=mda8, orient='v', width = 0.7, whis = (2,98), showfliers = False)
outliers = {col: list(stat['fliers']) for col in mda8.columns for stat in boxplot_stats(mda8[col])}
min_max_outliers = {key: [np.min(value), np.max(value)] if value != [] else [] for key, value in outliers.items()}
i = 0
for key, value in min_max_outliers.items():
if value != []:
ax.scatter([i, i], value, marker = 'd', facecolor = 'black')
i += 1
xvalues = ['Niland', 'El Centro', 'Calexico']
plt.xticks(np.arange(3), xvalues, fontsize=12)
ax.set_ylabel('Ozone MDA8 (ppb)',fontsize=15)
ax.set_ylim(0,105)
plt.show()
EDIT
Working on the data your provided, if I plot them as they are:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = True)
plt.show()
I get:
In the code above I change the parameter showfliers = False, in order to hide outliers.
Then, as suggested by JohanC in the comment, a simpler way to plot outliers is to plot min and max for each column:
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
Complete Code
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
mda8 = pd.read_csv(r'data/MDA8_allregions.csv')
mda8 = mda8.drop(['date', 'date.1', 'date.2'], axis = 1)
fig, ax = plt.subplots(1, figsize = (8, 6))
sns.boxplot(data = mda8, orient = 'v', width = 0.7, whis = (2, 98), showfliers = False)
for i, col in enumerate(mda8.columns, 0):
ax.scatter([i, i], [mda8[col].min(), mda8[col].max()], marker = 'd', facecolor = 'black')
plt.show()

Plot colormap to unique labels - Matplotlib

I'm hoping to map varying colours to a quiver plot determined by the associated label. Using below, unique items are defined by the col Label. I'm hoping to plot the same color for each unique item in Label.
Note: The amount of unique items may vary across df's so I don't want to hardcode colors. I'm hoping to take any amount of unique labels and pass a colormap.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import seaborn as sns
df = pd.DataFrame(np.random.randint(0,20,size=(100, 4)), columns=list('XYUV'))
labels = df['X'].apply(lambda x: random.choice(['A', 'B', 'C', 'D']))
df['Label'] = labels
X = df['X']
Y = df['Y']
U = df['U']
V = df['V']
fig,ax = plt.subplots()
ax.set_xlim(-10, 30)
ax.set_ylim(-10, 30)
color_labels = df['Label'].unique()
col_values = sns.color_palette('Set2')
color_map = dict(zip(color_labels, col_values))
ax.quiver(X, Y, (U-X), (V-Y), angles = 'xy', scale_units = 'xy', scale = 1, color = color_map)
You can create a list of colors for each vector with
colors = [color_map[label] for label in df['Label'].values]
With the colors,
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import seaborn as sns
import matplotlib.patches as mpatches
data = np.vstack((np.random.randint(0,10,size=(25, 4)),
np.random.randint(10,20,size=(25, 4)),
np.random.randint(20,30,size=(25, 4)),
np.random.randint(30,40,size=(25, 4))))
df = pd.DataFrame(data, columns=list('XYUV'))
df['Label'] = np.repeat(np.array(['A','B','C','D'])[:,None],25)
X = df['X']
Y = df['Y']
U = df['U']
V = df['V']
fig,ax = plt.subplots()
ax.set_xlim(-10, 40)
ax.set_ylim(-10, 40)
color_labels = df['Label'].unique()
col_values = sns.color_palette('Set2')
color_map = dict(zip(color_labels, col_values))
colors = [color_map[label] for label in df['Label'].values]
ax.quiver(X, Y, (U-X), (V-Y), angles = 'xy', scale_units = 'xy', scale = 1, color = colors,)
ax.legend(handles=[mpatches.Patch(color=v,label=k) for k,v in color_map.items()])

Plotting superimposed charts (line and bar) with pandas and matplotlib

I am testing the capabilities of pandas to plot financial data (price and volume) on the same chart. If I try to render both data as lines, it works fine:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
a = pd.date_range('2019-01-01', '2019-06-01',freq = 'D')
b = np.random.normal(size = len(a), loc = 50)
v = np.random.normal(size = len(a), loc = 1000)
c = pd.DataFrame(index = a, data = zip(b,v), columns = ['price', 'volume'])
fig, ax = plt.subplots(figsize = (15,8))
bx = ax.twinx()
c.price.plot.line(ax = ax, color = 'r')
c.volume.plot.line(ax = bx, color = 'g', alpha = .2)
plt.show()
This gives:
However if I try to render one as a line and the other as a bar chart, by replacing the 3 last lines by:
c.price.plot.line(ax = ax, color = 'r')
c.volume.plot.bar(ax = bx, color = 'g', alpha = .2)
plt.show()
This gives the wrong result:
Would anybody know how to make the above code work with line + bar ??
Use Matplotlib plotting library.
Matplotlib's function pyplot has functions bar and plot. You can use them to display data on the same chart.
Example

How to color parts of links in dendrograms using scipy in python?

I can color labels in Python dendrograms but I don't know how to color parts of the links belonging its labels.. I want to make something like this:
Is it possible in Python?
Here I color only labels:
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sc
dists = np.array([[0,2,1,4],[2,0,3,5],[1,3,0,6],[4,5,6,0]])
l = ['a','b','c','b']
Z = sc.linkage(dists, method='complete')
d = sc.dendrogram(Z, labels=l)
label_colors = {'a': 'r', 'b': 'g', 'c': 'm'}
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for i in range(len(xlbls)):
xlbls[i].set_color(label_colors[xlbls[i].get_text()])
plt.show()
Not sure if it's possible to color part of an u-shape, however you can color it complete shapes with
something like
d = sc.dendrogram(Z, labels=l)
it = iter(map(label_colors.__getitem__, d['ivl'])[-2::-1])
def f(x):
return it.next()
d = sc.dendrogram(Z, labels=l, link_color_func=f)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(label_colors[y.get_text()])
In Python dendrogram you can not colour a half u-shape directly, but you can appoint colours to any node. This can be accomplished as below:
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sc
dists = np.array([[0,2,1,4],[2,0,3,5],[1,3,0,6],[4,5,6,0],[4,7,6,2]])
Z = sc.linkage(dists, method='complete')
num = len(dists)
color = ["b"]*(2*num-1) # initialize color list with blue
# define the color of a specific node
color[5]="g"
color[6]="r"
color[7]="y"
d = sc.dendrogram(Z,link_color_func=lambda x: color[x])
# add labels for nodes
coord = np.c_[np.array(d['icoord'])[:,1:3],np.array(d['dcoord'])[:,1]]
coord = coord[np.argsort(coord[:,2])]
for posi in coord:
x = 0.5 * sum(posi[0:2])
y = posi[2]
plt.plot(x, y, 'ro')
plt.annotate("%2i" % num, (x, y), xytext=(0, -8),
textcoords='offset points',
va='top', ha='center')
num = num+1
plt.show()
#~ plt.savefig("test.png")

Categories