Matplotlib scatter plot legend display problem - python

I'm struggling to make a scatterplot code which includes legend related to 'pitch' like below.
The variable 'u' stands for the unique pitch list. And the number is 19.
So the number of legend's label should be 19. But it turns out to be like below. There are only 10 labels on legend. And the color range assign looks weird. (label '15' should be the dark-blue but it's not though)
What seems to be the problem?
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.colors as colors
import matplotlib.cm as cm
import numpy as np
df = pd.DataFrame({
"X" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],
"Y" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37],
"pitch":[10,10,20,20,30,40,50,50,60,60,60,70,70,70,70,80,80,80,100,150,1,2,3,4,5,6,7,8,9,10,3,4,5,8,9,10,3],
})
color = cm.jet
u, div = np.unique(df.pitch.values, return_inverse=True)
colorlist = [colors.rgb2hex(color(i)) for i in np.linspace(1, 0, len(u))]
cmap = ListedColormap(colorlist)
fig,ax = plt.subplots()
scatter = plt.scatter(df['X'],df['Y'], c=div, cmap=cmap)
plt.legend(scatter.legend_elements()[0], u, loc=2)
plt.show()

fig,ax = plt.subplots(figsize=(12,8))
for i, pitch in enumerate(u):
df_p = df[df['pitch'] == pitch]
scatter = ax.scatter(df_p['X'],df_p['Y'], c=colorlist[i], cmap=cmap,
label=pitch)
ax.legend(loc=2)
plt.show()

You need to replace the plt.legend(scatteer..) line by this.... Documentation on legend_element is available here.
plt.legend(scatter.legend_elements(prop='colors', num=len(colorlist))[0], u, loc=2)
Output Plot

Related

How to add a color bar for vspans created with variable alpha

I would like to use varying degrees of red color to represent the different importance of each time element and fill in that region.
The example code is shown below.
import matplotlib.pyplot as plt
X_example = np.random.rand(400)
importance_values = np.random.rand(400)
plt.figure(figsize=(13,7))
plt.plot(X_example)
for j in range(len(X_example)):
plt.axvspan(xmin=j, xmax=j+1,facecolor="r",alpha=importance_values[j])
It generates a graph like:
Now I would like to add a colormap in this figure to show that, e.g. the light red means low importance and the dark red means high importance, just like this:
How could I achieve that in my case?
One solution would be to create a LinearSegmentedColormap which takes a list of colors and turns it into a matplotlib colorbar object. Then you can set the "alpha channel":
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap, ListedColormap
from matplotlib.colorbar import ColorbarBase
X_example = np.random.rand(400)
importance_values = np.random.rand(400)
fig, (ax, cax) = plt.subplots(ncols=2, figsize=(8,5), gridspec_kw={'width_ratios': [1, 0.05]})
ax.plot(X_example, color='b')
for j in range(len(X_example)):
ax.axvspan(xmin=j, xmax=j+1,facecolor="r",alpha=importance_values[j])
N = 20 # the number of colors/alpha-values in the colorbar
cmap = LinearSegmentedColormap.from_list(None, ['r' for i in range(N)], N=N)
alpha_cmap = cmap(np.arange(N))
alpha_cmap[:,-1] = np.linspace(0, 1, N)
alpha_cmap = ListedColormap(alpha_cmap, N=N)
cbar = ColorbarBase(cax, cmap=alpha_cmap, ticks=[0., 1],)
cbar.ax.set_yticklabels(["low importance", "high importance"])
This gives the following plot, where the two colors of the colorbar have custom labels:
You could create a colormap mixing the red color with a range of alpha values:
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, to_rgba
from matplotlib.cm import ScalarMappable
import numpy as np
X_example = np.random.rand(400)
importance_values = np.random.rand(400)
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(X_example)
for j in range(len(X_example)):
ax.axvspan(xmin=j, xmax=j + 1, facecolor="r", alpha=importance_values[j])
ax.margins(x=0)
cmap = LinearSegmentedColormap.from_list(None, [to_rgba('r', 0), 'r'])
cbar = plt.colorbar(ScalarMappable(cmap=cmap), ticks=[0, 1], pad=0.02)
cbar.ax.set_yticklabels(["low", "high"], fontsize=20)
cbar.ax.set_ylabel("importance", labelpad=-30, fontsize=20)
plt.tight_layout()
plt.show()
An example of a horizontal colorbar:
cbar = plt.colorbar(ScalarMappable(cmap=cmap), ticks=[0, 1], orientation='horizontal')
cbar.ax.set_xticklabels(["low", "high"], fontsize=20)
cbar.ax.set_xlabel("importance", labelpad=-15, fontsize=20)

Plotting seaborn histplot bar_label with condition

I want to plot a seaborn histogram with labels to show the values of each bar. I only want to show the non-zero values, but I'm not sure how to do it. My MWE is
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
xlist = 900+200*np.random.randn(50,1)
fig, ax = plt.subplots()
y = sns.histplot(data=xlist, element="bars", bins=20, stat='count', legend=False)
y.set(xlabel='total time (ms)')
y.bar_label(y.containers[0])
## y.bar_label(y.containers[0][y.containers[0]!=0])
plt.show()
The graph looks like
and I want to remove all the 0 labels.
Update
A best version suggested by #BigBen:
labels = [str(v) if v else '' for v in y.containers[0].datavalues]
y.bar_label(y.containers[0], labels=labels)
Try:
labels = []
for p in y.patches:
h = p.get_height()
labels.append(str(h) if h else '')
y.bar_label(y.containers[0], labels=labels)

How can I add jitter to my seaborn and matplot plots?

I am working on trying to add Jitter to my plots using seaborn and matplot plots. I am getting mixed information form what I am reading online. Some information is saying coding needs to be done and other information show it as being as simple as jitter = True. I there another library or something that I should be importing that I am not aware of? Below is the code that I am running and trying to add jitter to:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
filename = 'https://library.startlearninglabs.uw.edu/DATASCI410/Datasets/JitteredHeadCount.csv'
headcount_df = pd.read_csv(filename)
headcount_df.describe()
%matplotlib inline
ax = plt.figure(figsize=(12, 6)).gca() # define axis
headcount_df.plot.scatter(x = 'Hour', y = 'TablesOpen', ax = ax, alpha = 0.2)
# auto_price.plot(kind = 'scatter', x = 'city-mpg', y = 'price', ax = ax)
ax.set_title('Hour vs TablesOpen') # Give the plot a main title
ax.set_ylabel('TablesOpen')# Set text for y axis
ax.set_xlabel('Hour')
ax = sns.kdeplot(headcount_df.loc[:, ['TablesOpen', 'Hour']], shade = True, cmap = 'PuBu')
headcount_df.plot.scatter(x = 'Hour', y = 'TablesOpen', ax = ax, jitter = True)
ax.set_title('Hour vs TablesOpen') # Give the plot a main title
ax.set_ylabel('TablesOpen')# Set text for y axis
ax.set_xlabel('Hour')
I receive the error: AttributeError: 'PathCollection' object has no property 'jitter' when trying to add the jitter. Any help or more information on this would be much appreciated
To add jitter to a scatter plot, first get a handle to the collection that contains the scatter dots. When a scatter plot is just created on an ax, ax.collections[-1] will be the desired collection.
Calling get_offsets() on the collection gets all the xy coordinates of the dots. Add some small random number to each of them. As in this case all coordinates are integers, adding a random number between 0 and 1 spreads the dots out evenly.
In this case the number of dots is very huge. To better see where the dots are concentrated, they can be made very small (marker=',', linewidth=0, s=1,) and be very transparent (e.g.alpha=0.1).
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
filename = 'https://library.startlearninglabs.uw.edu/DATASCI410/Datasets/JitteredHeadCount.csv'
headcount_df = pd.read_csv(filename)
fig, ax = plt.subplots(figsize=(12, 6))
headcount_df.plot.scatter(x='Hour', y='TablesOpen', marker=',', linewidth=0, s=1, alpha=.1, color='crimson', ax=ax)
dots = ax.collections[-1]
offsets = dots.get_offsets()
jittered_offsets = offsets + np.random.uniform(0, 1, offsets.shape)
dots.set_offsets(jittered_offsets)
ax.set_title('Hour vs TablesOpen') # Give the plot a main title
ax.set_ylabel('TablesOpen') # Set text for y axis
ax.set_xlabel('Hour')
ax.set_xticks(range(25))
ax.autoscale(enable=True, tight=True)
plt.tight_layout()
plt.show()
As there are a huge number of points, drawing the 2D kde takes a long time. The time can be reduced by taking a random sample from the rows. Note that to draw a 2D kde, the latest versions of Seaborn want each column as a separate parameter.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
filename = 'https://library.startlearninglabs.uw.edu/DATASCI410/Datasets/JitteredHeadCount.csv'
headcount_df = pd.read_csv(filename)
fig, ax = plt.subplots(figsize=(12, 6))
N = 5000
rand_sel_df = headcount_df.iloc[np.random.choice(range(len(headcount_df)), N)]
ax = sns.kdeplot(rand_sel_df['Hour'], rand_sel_df['TablesOpen'], shade=True, cmap='PuBu', ax=ax)
ax.set_title('Hour vs TablesOpen')
ax.set_xticks(range(25))
plt.tight_layout()
plt.show()

Scatter plot with colorbar and datetime axis ticks

I am getting lost in different methods used in matplotlib.
I want to create a colour-coded scatter plot with a colorbar on the side and datetime on the x axis.
But depending on how I define my ax, I get different errors.
Below is the core of my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.dates as mdates
#.....loading files etc.
norm = mcolors.Normalize(vmin=0,vmax=1000)
timerange = pd.date_range(start='2015-01-01', end='2016-01-01', freq='30D')
### PLOTTING
fig = plt.figure(figsize=(6.,5))
ax = fig.add_subplot(111)
for Af in Afiles:
for index, row in Af.iterrows():
time = pd.to_datetime(row['date'], format="%Y-%m-%d")
plt.scatter(time, row['A'], c=row['z'], norm=norm, cmap=colormap,edgecolor='k', lw=0.8, s=80)
plt.xticks(timerange, rotation=90)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%Y"))
plt.xlabel('Time', fontsize=11, color='k')
clb = fig.colorbar(ax)
clb.ax.set_title('Value', y=-0.125, fontsize=11)
clb.ax.invert_yaxis()
fig.tight_layout()
this produces AttributeError: 'AxesSubplot' object has no attribute 'autoscale_None'
but if I specify my ax as the scatter plot so that I can get my colour-coding working, I then have trouble with the axis formatter.
Writing instead ax = plt.scatter generates AttributeError: 'PathCollection' object has no attribute 'xaxis'.
How can I have both the colorbar AND formatted axis ticks?
Don't call the scatter ax. (This overwrites the existinge axes ax.)
The colorbar expects as first argument a ScalarMappable (as e.g. the scatter). Since the scatters are all normalized, you can use it from the loop,
norm = plt.Normalize(...)
for bla in blubb:
scatter = plt.scatter(..., norm=norm)
Then,
clb = fig.colorbar(scatter)
The rest should stay the same.
The basic idea is that you need to add an extra axis for the colorbar.
It's hard to know if this is an exact match, as you haven't provided a working example with data. But this may at least serve as a template.
First, some example data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1 import make_axes_locatable
vmin = 0
vmax = 1000
timerange = pd.date_range(start='2015-01-01', end='2016-01-01', freq='30D')
N = len(timerange)
data = np.random.randint(vmin, vmax, size=N)
# z contains the colorbar values for each point
cmap = plt.get_cmap('Reds')
z = [cmap((x-vmin)/(vmax-vmin))[:3] for x in data]
df = pd.DataFrame({"value":data, "datetime":timerange, "z":z})
Now plot:
fig = plt.figure(figsize=(6.,5))
ax = fig.add_subplot(111)
plt.scatter(x=df.datetime.values, y=df.value.values, c=df.z)
ax.set_xticklabels(timerange, rotation=90)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%Y"))
ax.set_xlabel('Time')
Now add colorbar:
norm = mcolors.Normalize(vmin=vmin,vmax=vmax)
m = cm.ScalarMappable(cmap='Reds', norm=norm)
m.set_array([(x-vmin)/(vmax-vmin) for x in df.value.values])
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
clb = plt.colorbar(m, cax=cax)

Setting markers fro scatter plot

I wanted to set markers for scatter plot as shown below
I am using matplotlib library for plotting until now I am able to plot only the points by the code
import matplotlib.pyplot as plt
import numpy as np
x = [39.5,38,42.5]
y = np.array([0,1,2])
my_xticks = ['a','b','c']
plt.yticks(y, my_xticks)
plt.scatter(x, y,marker='x',s=100)
plt.show()
I also want the line as shown in figure above as a marker in my plot
One way to do this would be to call plt.hlines, so that you can add a few horizontal lines to your markers.
Example:
import matplotlib.pyplot as plt
import numpy as np
x = np.array([39.5,38,42.5])
y = np.array([0,1,2])
my_xticks = ['a','b','c']
plt.yticks(y, my_xticks)
plt.scatter(x, y,marker='x',s=100)
width = .4
plt.hlines(y, xmin=x - width/2, xmax=x + width/2)
plt.show()
Change width to suit your desired "width" for each line. Colors can be changed as well, check the documentation for plt.hlines.
Note that this will not persist in a call to legend. legend will only use the actual line objects, although there are ways to change that too.

Categories