Scatter plot with different groups and marginal histograms for each group - python

I already have a scatter plot with different groups of elements and histograms on the margins, but they are linked to the whole data, not to the individual groups:
I'd like to have 2 histograms, one for each group of elements. How do I do that?
Here's my code:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
from matplotlib.colors import LinearSegmentedColormap
data= pd.read_csv("data.csv")
x=data['Fe']
y=data['V']
z=data['Discovery']
# Fixing random state for reproducibility
np.random.seed(19680801)
# definitions for the axes
left, width = 0.1, 0.7
bottom, height = 0.1, 0.7
spacing = 0.05
rect_scatter = [left, bottom, width, height]
rect_histx = [left, bottom + height + spacing, width, 0.2]
rect_histy = [left + width + spacing, bottom, 0.2, height]
# start with a rectangular Figure
fig=plt.figure(figsize=(7, 6))
ax_scatter = plt.axes(rect_scatter)
ax_scatter.tick_params(direction='in', top=True, right=True)
ax_histx = plt.axes(rect_histx)
ax_histx.tick_params(direction='in', labelbottom=True)
ax_histy = plt.axes(rect_histy)
ax_histy.tick_params(direction='in', labelleft=False)
# the function that separates the dots in different classes:
classes = np.zeros( len(x) )
classes[(z == 'Transit')] = 1
classes[(z == 'Radial Velocity')] = 2
# create color map:
colors = ['purple', 'orange']
cm = LinearSegmentedColormap.from_list('custom', colors, N=len(colors))
# the scatter plot:
scatter = ax_scatter.scatter(x, y, c=classes, s=10, cmap=cm, alpha=0.6)
lines, labels = scatter.legend_elements()
# legend with custom labels
labels = [r'Transit', r'Radial Velocity']
legend = ax_scatter.legend(lines, labels,
loc="upper left", title="Planetary Discovery Method")
ax_scatter.add_artist(legend)
# now determine nice limits by hand:
binwidth = 0.1
ax_scatter.set_xlim((-1, 0.7))
ax_scatter.set_ylim((-0.9, 0.9))
#histogram
weights = np.ones_like(x)/(len(x))
weights2 = np.ones_like(y)/(len(y))
ax_histx.hist(x, bins=bins, weights=weights, color='chartreuse')
ax_histy.hist(y, bins=bins, weights=weights, orientation='horizontal', color='darkmagenta')
ax_histx.set_xlim(ax_scatter.get_xlim())
ax_histy.set_ylim(ax_scatter.get_ylim())
#labeling
ax_scatter.set_xlabel('[Fe/H]')
ax_scatter.set_ylabel('[V/H]')
ax_histy.set_xlabel('Relative Dist.')
ax_histx.set_ylabel('Relative Dist.')
plt.show()
I'll add an example of a plot I'm trying to reach:

Related

Matplotlib stacked histogram label

Here is my picture. I need to make label for those bars however every upper layer contains lower layer - so the label should containt grouped colors, i.e. blue - dataset 1, blue/orange - dataset 2, blue/orange/green - dataset 3 and finally blue/orange/green/purple - dataset 4. Is it plausible to make it? Thank you.
enter image description here
binwidth = 1
n, bins, patches = ax1.hist(C, bins=range(81, 105, binwidth),
density=False, histtype='barstacked' ,
edgecolor='gray',
color=barvy_histogram,linewidth=0.3)
hatches = ['//','x','..','oo']
for patch_set, hatch in zip(patches, hatches):
for patch in patch_set.patches:
patch.set_hatch(hatch)
patch.set_linewidth=0.1
patch.set_color='gray'
mpl.rcParams['hatch.linewidth'] = 0.5
The following approach uses the tuple legend handler (HandlerTuple) to combine the legend handles. It produces a horizontal layout, while maybe a vertical stacking would be more interesting.
The code starts with creating some test data, supposing C is an Nx4 array of integers. The bin edges are set at halves to make sure that floating point accuracy wouldn't place values in the wrong bin.
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import numpy as np
# first, create some test data
C = (np.random.normal(0.001, 1, (100, 20)).cumsum(axis=0) * 1.2 + 90).astype(int).reshape(-1, 4)
c_min = C.min()
c_max = C.max()
mpl.rcParams['hatch.linewidth'] = 0.5
fig, ax1 = plt.subplots(figsize=(12, 5))
binwidth = 1
colors = plt.cm.Set2.colors[:C.shape[1]]
_, _, patches = ax1.hist(C, bins=np.arange(c_min - 0.5, c_max + binwidth, binwidth),
density=False, histtype='barstacked',
edgecolor='gray', color=colors, linewidth=0.3,
label=[f'N={p}' for p in range(25, 101, 25)])
hatches = ['//', 'x', '..', 'oo']
for patch_set, hatch in zip(patches, hatches):
for patch in patch_set.patches:
patch.set_hatch(hatch)
patch.set_linewidth = 0.1
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles=[tuple(handles[:i + 1]) for i in range(C.shape[1])], labels=labels,
handlelength=6, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)})
plt.show()

Matplotlib Colorbar - Non-Linear

I have created a diverging colorbar with it's midpoint normalised at the median value of the data.
I would like to extend the midpoint color ('white') and apply it to the range (+- 15%) from the midpoint, and then have the diverging colorbar continue normally from that point.
My current colorbar is created using the following code:
#Initial ZValues contour plot
Colorbar_min = np.around(ZValues.min()*0.9,0)
Colorbar_max = np.around(ZValues.max()*1.1,0)
Colorbar_mid = np.median(ZValues)
#Colormap
cmap = plt.cm.seismic # define the colormap
cmaplist = [cmap(i) for i in range(cmap.N)] # extract all colors from the .seismic map
# create the new colourmap
cmap = mpl.colors.LinearSegmentedColormap.from_list('Custom cmap', cmaplist, cmap.N)
# define the bins and normalize
bounds = np.linspace(Colorbar_min, Colorbar_max, 30)
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
Chosen_CS = ax.tricontourf(Chosen_tri_refi, Chosen_Z_refi, cmap=cmap, levels=bounds,
norm=MidpointNormalize(midpoint=Colorbar_mid, vmin=Colorbar_min, vmax=Colorbar_max))
#Create a second axis for the colorbar
ax2 = fig.add_axes([0.87, 0.12, 0.04, 0.75]) #The numbers in the square brackets of add_axes refer to [left, bottom, width, height], where the coordinates are just fractions that go from 0 to 1 of the plotting area.
cb = mpl.colorbar.ColorbarBase(ax2, cmap=cmap, norm=MidpointNormalize(midpoint=Colorbar_mid, vmin=Colorbar_min, vmax=Colorbar_max),spacing='uniform', ticks=bounds, boundaries=bounds, format='%1i')
cb.set_label('ZValues', fontsize=7, weight="bold", rotation=270, labelpad=14)
You can create your custom colormap with a white midrange color at 50 ± 15 % by inserting a white section (i.e. 1. for all three colors) from .5 - .15 to .5 + .15 like so:
import matplotlib.pyplot as plt
import matplotlib as mpl
seismic_cdict = plt.cm.seismic._segmentdata
cdict = dict()
for c in seismic_cdict:
cdict[c] = [t for t in seismic_cdict[c] if t[0] < .35] + \
[(.35,1.,1.), (.65,1.,1.)] + \
[t for t in seismic_cdict[c] if t[0] > .65]
custom_cmap = mpl.colors.LinearSegmentedColormap('Custom cmap', cdict)
fig, ax = plt.subplots(figsize=(8, 1))
mpl.colorbar.ColorbarBase(ax, cmap=custom_cmap, orientation='horizontal')

Loop to create subplot /Python

i have a little problem to create a subplot loop.
The following code show my result for one plot.... So it starts with a dayloop than with a hour loop (8 timesteps).
If i run the code i get a nice QUiver plot with the colorbar.
for dd in range(1,15):
day=str(dd)
readfile=fns[files_indizes[dd]]
if dd < 10:
nc_u_comp = NetCDFFile(ROOT+u_comp1+'0'+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+'0'+day+comp)
else:
nc_u_comp = NetCDFFile(ROOT+u_comp1+day+comp)
nc_v_comp = NetCDFFile(ROOT+v_comp1+day+comp)
time = nc_u_comp.variables['time'][:]
index=readfile.find(comp)
index=index+len(comp)
date=readfile[index-14:index-6]
plt.clf()
for tt in range(0,len(time)):
if tt < 10:
h =str(0)+str(tt)
else:
h=str(tt)
varU=nc_u_comp.variables['u10'][tt,:,:]
varV=nc_v_comp.variables['v10'][tt,:,:]
lat = nc_u_comp.variables['latitude'][:]
lon = nc_u_comp.variables['longitude'][:]
plt.rcParams["figure.figsize"] = [10,10]
#plane projection of the world
#map with box size (defintion on the top)
box = sgeom.box(minx=llcrnrlon, maxx=urcrnrlon, miny=llcrnrlat, maxy=urcrnrlat)
x0, y0, x1, y1 = box.bounds
#Map plot. The middel of the map is central_longitude
#proj = ccrs.PlateCarree(central_longitude=0)
proj=ccrs.PlateCarree()
#Change middelpoint of the map
box_proj = ccrs.PlateCarree(central_longitude=0)
ax2 = plt.axes(projection=proj)
ax2.set_extent([x0, x1, y0, y1], box_proj)
ax2.add_feature(cartopy.feature.BORDERS, linestyle='-', alpha=.5)
ax2.coastlines(resolution='50m')
#Definition of the scale_bar
gl = ax2.gridlines(ccrs.PlateCarree(), \
linestyle='--', alpha=1, linewidth=0.5, draw_labels=True)
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
magnitude = (varU ** 2 + varV ** 2) ** 0.5
strm =plt.streamplot(lon , lat , varU, varV, linewidth=2, density=2, color=magnitude)
cbar= plt.colorbar()
cbar.set_label('$m/s$')
name='Wind in 10 m '+ date + h+' UTC'
ax2.set_aspect('auto')
plt.title(name, y=1)
Now i want to create an 2x4 Subplot array with a colorbar allocate to the complete Subplot array.
I find some infromation in the internet, but it doesn't run with my code. Maybe someone can help me?
This shows how to plot an array of simple Cartopy maps in 4 rows 2 columns. Also shows how to plot a colorbar to accompany the maps array. Hope it helps.
import numpy as np
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import matplotlib as mpl
# create figure with figsize big enough to accomodate all maps, labels, etc.
fig = plt.figure(figsize=(8, 10), tight_layout=False)
# define plot array's arrangement
columns = 2
rows = 4
# set projection to use
projex = ccrs.PlateCarree()
# set the colormap and norm for
# the colorbar to use
cmap1 = mpl.cm.magma
norm1 = mpl.colors.Normalize(vmin=0, vmax=100)
def plotmymap(axs):
# your plot specs of each map should replace this
img = np.random.randint(100, size=(15, 30)) # 2d array of random values (1-100)
# render image on current axis
plims = plt.imshow(img, extent=[-180,180,-90,90], alpha=0.5, cmap=cmap1, norm=norm1)
axs.set_global()
axs.coastlines()
# add title to the map
axs.set_title("Map_"+str(i))
return plims # for use by colorbar
for i in range(1, columns*rows +1):
# add a subplot into the array of plots
ax = fig.add_subplot(rows, columns, i, projection=projex)
plims = plotmymap(ax) # a simple maps is created on subplot
# add a subplot for vertical colorbar
bottom, top = 0.1, 0.9
left, right = 0.1, 0.8
fig.subplots_adjust(top=top, bottom=bottom, left=left, right=right, hspace=0.15, wspace=0.25)
cbar_ax = fig.add_axes([0.85, bottom, 0.05, top-bottom])
fig.colorbar(plims, cax=cbar_ax) # plot colorbar
plt.show() # this plot all the maps
The resulting plots:

Plotting Curves aligned to Dynamic Time Warping Matrix

I have problems to plot two arrays with the right scaling. I use the dtw package to compare the two arrays, x and y (https://pypi.python.org/pypi/dtw/1.0). The function dtw returns a matrix and a path.
With the following code, I can plot the matrix and the path:
import matplotlib.pyplot as plt
dist, cost, acc, path = dtw(x, y, dist=lambda x, y: norm(x - y, ord=1))
plt.imshow(acc.T, origin='lower', cmap=cm.gray, interpolation='nearest')
plt.colorbar()
plt.plot(path[0], path[1], 'w')
plt.ylim((-0.5, acc.shape[1]-0.5))
plt.xlim((-0.5, acc.shape[0]-0.5))
Resulting figure:
However, I would like to plot the two curves aligned to it, like shown in (http://www.psb.ugent.be/cbd/papers/gentxwarper/DTWalgorithm.htm). One curve is above the matrix, the other one is on the left side, so that you can compare which parts are equal.
Like suggested by kwinkunks (see comment) I used this example as template. Please note that I used "plt.pcolor()" instead of "plt.image()" to plot the matrix. This is my code and the resulting figure:
'''
Plotting
'''
nullfmt = NullFormatter()
# definitions for the axes
left, width = 0.12, 0.60
bottom, height = 0.08, 0.60
bottom_h = 0.16 + width
left_h = left + 0.27
rect_plot = [left_h, bottom, width, height]
rect_x = [left_h, bottom_h, width, 0.2]
rect_y = [left, bottom, 0.2, height]
# start with a rectangular Figure
plt.figure(2, figsize=(8, 8))
axplot = plt.axes(rect_plot)
axx = plt.axes(rect_x)
axy = plt.axes(rect_y)
# Plot the matrix
axplot.pcolor(acc.T,cmap=cm.gray)
axplot.plot(path[0], path[1], 'w')
axplot.set_xlim((0, len(x)))
axplot.set_ylim((0, len(linear)))
axplot.tick_params(axis='both', which='major', labelsize=18)
# Plot time serie horizontal
axx.plot(x,'.', color='k')
axx.tick_params(axis='both', which='major', labelsize=18)
xloc = plt.MaxNLocator(4)
x2Formatter = FormatStrFormatter('%d')
axx.yaxis.set_major_locator(xloc)
axx.yaxis.set_major_formatter(x2Formatter)
# Plot time serie vertical
axy.plot(y,linear,'.',color='k')
axy.invert_xaxis()
yloc = plt.MaxNLocator(4)
xFormatter = FormatStrFormatter('%d')
axy.xaxis.set_major_locator(yloc)
axy.xaxis.set_major_formatter(xFormatter)
axy.tick_params(axis='both', which='major', labelsize=18)
#Limits
axx.set_xlim(axplot.get_xlim())
axy.set_ylim(axplot.get_ylim())
plt.show()

matplotlib scatter_hist with stepfilled histtype in histogram

I modified scatter_hist.py example found here to have two data sets to be plotted.
I'd like to have histograms with "stepfilled" type, but somehow if I set the type "stepfilled" the Y-axis histogram (orientation = "horizontal") is not working.
Is there any other way to do the histogram to look like "stepfilled"-style or am I doing something wrong?
Here is my code with histtype = "bar" to show the idea what I try to do. Change it to
histtype="stepfilled"
to get strange histogram:
import numpy as np
import matplotlib.pyplot as plt
# the random data
x = np.random.randn(1000)
y = np.random.randn(1000)
x_vals = [x]
y_vals = [y]
x_vals.append( np.random.randn( 300 ) )
y_vals.append( np.random.randn( 300 ) )
fig = plt.figure(1, figsize=(5.5,5.5))
from mpl_toolkits.axes_grid1 import make_axes_locatable
colour_LUT = ['#0000FF',
'#00FF00']
# the scatter plot:
xymax = np.max(np.fabs(x))
colors = []
axScatter = plt.subplot(111)
for i in range( len(x_vals ) ):
colour = colour_LUT[i]
xymax = np.max( [np.max(np.fabs(x)), np.max(np.fabs(y)), xymax ] )
axScatter.scatter( x_vals[i], y_vals[i], color = colour )
colors.append(colour)
axScatter.set_aspect(1.)
# create new axes on the right and on the top of the current axes
# The first argument of the new_vertical(new_horizontal) method is
# the height (width) of the axes to be created in inches.
divider = make_axes_locatable(axScatter)
axHistx = divider.append_axes("top", 1.2, pad=0.1, sharex=axScatter)
axHisty = divider.append_axes("right", 1.2, pad=0.1, sharey=axScatter)
# make some labels invisible
plt.setp(axHistx.get_xticklabels() + axHisty.get_yticklabels(),
visible=False)
# now determine nice limits by hand:
binwidth = 0.25
lim = ( int(xymax/binwidth) + 1) * binwidth
bins = np.arange(-lim, lim + binwidth, binwidth)
histtype = "bar"
axHistx.hist(x_vals, bins=bins, histtype= histtype, color=colors)
axHisty.hist(y_vals, bins=bins, orientation='horizontal',histtype= histtype, color=colors)
# the xaxis of axHistx and yaxis of axHisty are shared with axScatter,
# thus there is no need to manually adjust the xlim and ylim of these
# axis.
#axHistx.axis["bottom"].major_ticklabels.set_visible(False)
for tl in axHistx.get_xticklabels():
tl.set_visible(False)
axHistx.set_yticks([0, 50, 100])
#axHisty.axis["left"].major_ticklabels.set_visible(False)
for tl in axHisty.get_yticklabels():
tl.set_visible(False)
axHisty.set_xticks([0, 50, 100])
plt.draw()
plt.show()
Thank You for help!
Edit:
Here is the images which I receive in windows environment with matplotlib 1.0.0.
With histtype="bar" I have this:
and with histtype="stepfilled" I have this:
The documentation only mentions special cases for multiple data when using 'bar' and 'barstacked', which I would assume means that this isn't properly implemented for the other two types. Changing your code to add multiple histograms instead of just one worked for me:
histtype = "stepfilled"
for i in xrange(len(x_vals)):
axHistx.hist(x_vals[i], bins=bins, histtype= histtype, color=colors[i])
axHisty.hist(y_vals[i], bins=bins, orientation='horizontal',histtype= histtype, color=colors[i])

Categories