Aligning two combined plots - Matplotlib - python

I'm currently working in a plot in which I show to datas combined.
I plot them with the following code:
plt.figure()
# Data 1
data = plt.cm.binary(data1)
data[..., 3] = 1.0 * (data1 > 0.0)
fig = plt.imshow(data, interpolation='nearest', cmap='binary', vmin=0, vmax=1, extent=(-4, 4, -4, 4))
# Plotting just the nonzero values of data2
x = numpy.linspace(-4, 4, 11)
y = numpy.linspace(-4, 4, 11)
data2_x = numpy.nonzero(data2)[0]
data2_y = numpy.nonzero(data2)[1]
pts = plt.scatter(x[data2_x], y[data2_y], marker='s', c=data2[data2_x, data2_y])
And this gives me this plot:
As can be seen in the image, my background and foreground squares are not aligned.
Both of then have the same dimension (20 x 20). I would like to have a way, if its possible, to align center with center, or corner with corner, but to have some kind of alignment.
In some grid cells it seems that I have right bottom corner alignment, in others left bottom corner alignment and in others no alignment at all, with degrades the visualization.
Any help would be appreciated.
Thank you.

As tcaswell says, your problem may be easiest to solve by defining the extent keyword for imshow.
If you give the extent keyword, the outermost pixel edges will be at the extents. For example:
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(np.random.random((8, 10)), extent=(2, 6, -1, 1), interpolation='nearest', aspect='auto')
Now it is easy to calculate the center of each pixel. In X direction:
interpixel distance is (6-2) / 10 = 0.4 pixels
center of the leftmost pixel is half a pixel away from the left edge, 2 + .4/2 = 2.2
Similarly, the Y centers are at -.875 + n * 0.25.
So, by tuning the extent you can get your pixel centers wherever you want them.
An example with 20x20 data:
import matplotlib.pyplot as plt
import numpy
# create the data to be shown with "scatter"
yvec, xvec = np.meshgrid(np.linspace(-4.75, 4.75, 20), np.linspace(-4.75, 4.75, 20))
sc_data = random.random((20,20))
# create the data to be shown with "imshow" (20 pixels)
im_data = random.random((20,20))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(im_data, extent=[-5,5,-5,5], interpolation='nearest', cmap=plt.cm.gray)
ax.scatter(xvec, yvec, 100*sc_data)
Notice that here the inter-pixel distance is the same for both scatter (if you have a look at xvec, all pixels are 0.5 units apart) and imshow (as the image is stretched from -5 to +5 and has 20 pixels, the pixels are .5 units apart).

here is a code where there is no alignment problem.
import matplotlib.pyplot as plt
import numpy
data1 = numpy.random.rand(10, 10)
data2 = numpy.random.rand(10, 10)
data2[data2 < 0.4] = 0.0
plt.figure()
# Plotting data1
fig = plt.imshow(data1, interpolation='nearest', cmap='binary', vmin=0.0, vmax=1.0)
# Plotting data2
data2_x = numpy.nonzero(data2)[0]
data2_y = numpy.nonzero(data2)[1]
pts = plt.scatter(data2_x, data2_y, marker='s', c=data2[data2_x, data2_y])
plt.show()
which gives a perfectly aligned combined plots:
Thus the use of additional options in your code might be the reason of the non-alignment of the combined plots.

Related

matplotlib.pyplot: How to plot single graph with different Colormaps and a Legend?

I am plotting separate figures for each attribute and label for each data sample. Here is the illustration:
As illustrated in the the last subplot (Label), my data contains seven classes (numerically) (0 to 6). I'd like to visualize these classes using a different fancy colors and a legend. Please note that I just want colors for last subplot. How should I do that?
Here is the code of above plot:
x, y = test_data["x"], test_data["y"]
# determine the total number of plots
n, off = x.shape[1] + 1, 0
plt.rcParams["figure.figsize"] = (40, 15)
# plot all the attributes
for i in range(6):
plt.subplot(n, 1, off + 1)
plt.plot(x[:, off])
plt.title('Attribute:' + str(i), y=0, loc='left')
off += 1
# plot Labels
plt.subplot(n, 1, n)
plt.plot(y)
plt.title('Label', y=0, loc='left')
plt.savefig(save_file_name, bbox_inches="tight")
plt.close()
First, just to set up a similar dataset:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random((100,6))
y = np.random.randint(0, 6, (100))
fig, axs = plt.subplots(6, figsize=(40,15))
We could use plt.scatter() to give individual points different marker styles:
for i in range(x.shape[-1]):
axs[i].scatter(range(x.shape[0]), x[:,i], c=y)
Or we could mask the arrays we're plotting:
for i in range(x.shape[-1]):
for j in np.unique(y):
axs[i].plot(np.ma.masked_where(y!=j, x[:,i]), 'o')
Either way we get the same results:
Edit: Ah you've edited your question! You can do exactly the same thing for your last plot only, just modify my code above to take it out of the loop of subplots :)
As suggested, we imitate the matplotlib step function by creating a LineCollection to color the different line segments:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
from matplotlib.patches import Patch
#random data generation
np.random.seed(12345)
number_of_categories=4
y = np.concatenate([np.repeat(np.random.randint(0, number_of_categories), np.random.randint(1, 30)) for _ in range(20)])
#check the results with less points
#y = y[:10]
x = y[None] * np.linspace(1, 5, 3)[:, None]
x += 2 * np.random.random(x.shape) - 1
#your initial plot
num_plots = x.shape[0] + 1
fig, axes = plt.subplots(num_plots, 1, sharex=True, figsize=(10, 8))
for i, ax in enumerate(axes.flat[:-1]):
ax.plot(x[i,:])
#first we create the matplotlib step function with x-values as their midpoint
axes.flat[-1].step(np.arange(y.size), y, where="mid", color="lightgrey", zorder=-1)
#then we plot colored segments with shifted index simulating the step function
shifted_x = np.arange(y.size+1)-0.5
#and identify the step indexes
idx_steps, = np.nonzero(np.diff(y, prepend=np.inf, append=np.inf))
#create collection of plateau segments
colored_segments = np.zeros((idx_steps.size-1, 2, 2))
colored_segments[:, :, 0] = np.vstack((shifted_x[idx_steps[:-1]], shifted_x[idx_steps[1:]])).T
colored_segments[:, :, 1] = np.repeat(y[idx_steps[:-1]], 2).reshape(-1, 2)
#generate discrete color list
n_levels, idx_levels = np.unique(y[idx_steps[:-1]], return_inverse=True)
colorarr = np.asarray(plt.cm.tab10.colors[:n_levels.size])
#and plot the colored segments
lc_cs = LineCollection(colored_segments, colors=colorarr[idx_levels, :], lw=10)
lines_cs = axes.flat[-1].add_collection(lc_cs)
#scaling and legend generation
axes.flat[-1].set_ylim(n_levels.min()-0.5, n_levels.max()+0.5)
axes.flat[-1].legend([Patch(color=colorarr[i, :]) for i, _ in enumerate(n_levels)],
[f"cat {i}" for i in n_levels],
loc="upper center", bbox_to_anchor=(0.5, -0.15),
ncol=n_levels.size)
plt.show()
Sample output:
Alternatively, you can use broken barh plots or color this axis or even all axes using axvspan.

Pyplot: Drawing figure in a custom scale (both x and y)

I've been plotting a dataframe using the following code within a Jupyter Notebook: For comparision with older data only available on paper in the scale 0.005mm=1cm, I need to export and print the graph in the same scale: 0.005mm in the figure (both x and y-axis) have to be 1cm in the figure.
Is there any way how I can define a custom scale? For information, the x-range and y-range are not fixed, they will vary depending on the data I am loading into the dataframe.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
df = pd.DataFrame(np.array([[1.7, 0], [1.75, -0.012], [1.8, 0]]),
columns=['pos', 'val'])
# Plot results
sns.set()
plt.figure(figsize=(20,30))
plt.plot(df['pos'], df['val'])
ax = plt.axes()
ax.set_aspect('equal')
plt.xlabel('Position [mm]')
plt.ylabel('Höhe [mm]')
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.005))
ax.yaxis.set_major_locator(ticker.MultipleLocator(0.005))
plt.show()
In a
comment
I suggested to use matplotlib.transforms — well I was wrong, the way
to go is to shamelessly steal from Matplotlib's Demo Fixed Size
Axes…
(the figure was resized by StackOverflow to fit in the post, but you
can check that the proportions are correct)
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import Divider, Size
from mpl_toolkits.axes_grid1.mpl_axes import Axes
cm = lambda d: d/2.54
x, y = [1700.0, 1725.0, 1750.0], [0.0, -12.0, 0.0] # μm
dx, dy = 50.0, 12.0
# take margins into account
xmin, xmax = min(x)-dx*0.05, max(x)+dx*0.05
ymin, ymax = min(y)-dy*0.05, max(y)+dy*0.05
dx, dy = xmax-xmin, ymax-ymin
# 5 μm data == 1 cm plot
scale = 5/1
xlen, ylen = dx/scale, dy/scale
# Now we know the extents of our data and the axes dimension,
# so we can set the Figure dimensions, taking borders into account
left, right = 2, 1
bot, top = 1.5, 1.5
fig = plt.figure(
figsize=(cm(left+xlen+right), cm(bot+ylen+top)),
dpi=118)
# change bg color to show so that one can measure the figure
# and the axes when pasted into SO and do their math…
fig.set_facecolor('xkcd:grey teal')
########## Below is stolen from Matplotlib Fixed Size Axes
########## (please don't ask me…)
# Origin and size of the x axis and y axis
h = [Size.Fixed(cm(left)), Size.Fixed(cm(xlen))]
v = [Size.Fixed(cm(bot)), Size.Fixed(cm(ylen))]
divider = Divider(fig, (0.0, 0.0, 1., 1.), h, v, aspect=False)
# NB: Axes is from mpl_toolkits.axes_grid1.mpl_axes
ax = Axes(fig, divider.get_position())
ax.set_axes_locator(divider.new_locator(nx=1, ny=1))
fig.add_axes(ax)
######### Above is stolen from Matplotlib Fixed Size Axes Demo
plt.plot(x,y)
plt.grid()
ax.set(xlim=(xmin, xmax), ylim=(ymin, ymax), yticks=range(-12,1,3),
xlabel='X/μm', ylabel='Y/μm',
title='X vs Y, 1 cm on plot equals 5 μm')
fig.suptitle('Figure dimensions: w = %.2f cm, h = %.2f cm.'%(
left+xlen+right, bot+ylen+top))
fig.savefig('Figure_1.png',
# https://stackoverflow.com/a/4805178/2749397, Joe Kington's
facecolor=fig.get_facecolor(), edgecolor='none')
1 inch = 2.54 cm, so 254/0.005 = 50800 dpi
plt.figure(figsize=(20,30), dpi=50800)

Is there anything in matplotlib that behaves like alpha but reversed?

A good way to show the concentration of the data points in a plot is using a scatter plot with non-unit transparency. As a result, the areas with more concentration would appear darker.
# this is synthetic example
N = 10000 # a very very large number
x = np.random.normal(0, 1, N)
y = np.random.normal(0, 1, N)
plt.scatter(x, y, marker='.', alpha=0.1) # an area full of dots, darker wherever the number of dots is more
which gives something like this:
Imagine the case we want to emphasize on the outliers. So the situation is almost reversed: A plot in which the less-concentrated areas are bolder. (There might be a trick to apply for my simple example, but imagine a general case where a distribution of points are not known prior, or it's difficult to define a rule for transparency/weight on color.)
I was thinking if there's anything handy same as alpha that is designed for this job specifically. Although other ideas for emphasizing on outliers are also welcomed.
UPDATE: This is what happens when more then one data point is scattered on the same area:
I'm looking for something like the picture below, the more data point, the less transparent the marker.
To answer the question: You can calculate the density of points, normalize it and encode it in the alpha channel of a colormap.
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
# this is synthetic example
N = 10000 # a very very large number
x = np.random.normal(0, 1, N)
y = np.random.normal(0, 1, N)
fig, (ax,ax2) = plt.subplots(ncols=2, figsize=(8,5))
ax.scatter(x, y, marker='.', alpha=0.1)
values = np.vstack([x,y])
kernel = stats.gaussian_kde(values)
weights = kernel(values)
weights = weights/weights.max()
cols = plt.cm.Blues([0.8, 0.5])
cols[:,3] = [1., 0.005]
cmap = LinearSegmentedColormap.from_list("", cols)
ax2.scatter(x, y, c=weights, s = 1, marker='.', cmap=cmap)
plt.show()
Left is the original image, right is the image where higher density points have a lower alpha.
Note, however, that this is undesireable, because high density transparent points are undistinguishable from low density. I.e. in the right image it really looks as though you have a hole in the middle of your distribution.
Clearly, a solution with a colormap which does not contain the color of the background is a lot less confusing to the reader.
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
# this is synthetic example
N = 10000 # a very very large number
x = np.random.normal(0, 1, N)
y = np.random.normal(0, 1, N)
fig, ax = plt.subplots(figsize=(5,5))
values = np.vstack([x,y])
kernel = stats.gaussian_kde(values)
weights = kernel(values)
weights = weights/weights.max()
ax.scatter(x, y, c = weights, s=9, edgecolor="none", marker='.', cmap="magma")
plt.show()
Here, low density points are still emphazised by darker color, but at the same time it's clear to the viewer that the highest density lies in the middle.
As far as I know, there is no "direct" solution to this quite interesting problem. As a workaround, I propose this solution:
N = 10000 # a very very large number
x = np.random.normal(0, 1, N)
y = np.random.normal(0, 1, N)
fig = plt.figure() # create figure directly to be able to extract the bg color
ax = fig.gca()
ax.scatter(x, y, marker='.') # plot all markers without alpha
bgcolor = ax.get_facecolor() # extract current background color
# plot with alpha, "overwriting" dense points
ax.scatter(x, y, marker='.', color=bgcolor, alpha=0.2)
This will plot all points without transparency and then plot all points again with some transparency, "overwriting" those points with the highest density the most. Setting the alpha value to other higher values will put more emphasis to outliers and vice versa.
Of course the color of the second scatter plot needs to be adjusted to your background color. In my example this is done by extracting the background color and setting it as the new scatter plot's color.
This solution is independent of the kind of distribution. It only depends on the density of the points. However it produces twice the amount of points, thus may take slightly longer to render.
Reproducing the edit in the question, my solution is showing exactly the desired behavior. The leftmost point is a single point and is the darkest, the rightmost is consisting of three points and is the lightest color.
x = [0, 1, 1, 2, 2, 2]
y = [0, 0, 0, 0, 0, 0]
fig = plt.figure() # create figure directly to be able to extract the bg color
ax = fig.gca()
ax.scatter(x, y, marker='.', s=10000) # plot all markers without alpha
bgcolor = ax.get_facecolor() # extract current background color
# plot with alpha, "overwriting" dense points
ax.scatter(x, y, marker='.', color=bgcolor, alpha=0.2, s=10000)
Assuming that the distributions are centered around a specific point (e.g. (0,0) in this case), I would use this:
import numpy as np
import matplotlib.pyplot as plt
N = 500
# 0 mean, 0.2 std
x = np.random.normal(0,0.2,N)
y = np.random.normal(0,0.2,N)
# calculate the distance to (0, 0).
color = np.sqrt((x-0)**2 + (y-0)**2)
plt.scatter(x , y, c=color, cmap='plasma', alpha=0.7)
plt.show()
Results:
I don't know if it helps you, because it's not exactly you asked for, but you can simply color points, which values are bigger than some threshold. For example:
import matplotlib.pyplot as plt
num = 100
threshold = 80
x = np.linspace(0, 100, num=num)
y = np.random.normal(size=num)*45
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(x[np.abs(y) < threshold], y[np.abs(y) < threshold], color="#00FFAA")
ax.scatter(x[np.abs(y) >= threshold], y[np.abs(y) >= threshold], color="#AA00FF")
plt.show()

Polar plot - Put one grid line in bold

I am trying to make use the polar plot projection to make a radar chart. I would like to know how to put only one grid line in bold (while the others should remain standard).
For my specific case, I would like to highlight the gridline associated to the ytick "0".
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#Variables
sespi = pd.read_csv("country_progress.csv")
labels = sespi.country
progress = sespi.progress
angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False)
#Concatenation to close the plots
progress=np.concatenate((progress,[progress[0]]))
angles=np.concatenate((angles,[angles[0]]))
#Polar plot
fig=plt.figure()
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, progress, '.--', linewidth=1, c="g")
#ax.fill(angles, progress, alpha=0.25)
ax.set_thetagrids(angles * 180/np.pi, labels)
ax.set_yticklabels([-200,-150,-100,-50,0,50,100,150,200])
#ax.set_title()
ax.grid(True)
plt.show()
The gridlines of a plot are Line2D objects. Therefore you can't make it bold. What you can do (as shown, in part, in the other answer) is to increase the linewidth and change the colour but rather than plot a new line you can do this to the specified gridline.
You first need to find the index of the y tick labels which you want to change:
y_tick_labels = [-100,-10,0,10]
ind = y_tick_labels.index(0) # find index of value 0
You can then get a list of the gridlines using gridlines = ax.yaxis.get_gridlines(). Then use the index you found previously on this list to change the properties of the correct gridline.
Using the example from the gallery as a basis, a full example is shown below:
r = np.arange(0, 2, 0.01)
theta = 2 * np.pi * r
ax = plt.subplot(111, projection='polar')
ax.set_rmax(2)
ax.set_rticks([0.5, 1, 1.5, 2]) # less radial ticks
ax.set_rlabel_position(-22.5) # get radial labels away from plotted line
ax.grid(True)
y_tick_labels = [-100, -10, 0, 10]
ax.set_yticklabels(y_tick_labels)
ind = y_tick_labels.index(0) # find index of value 0
gridlines = ax.yaxis.get_gridlines()
gridlines[ind].set_color("k")
gridlines[ind].set_linewidth(2.5)
plt.show()
Which gives:
It is just a trick, but I guess you could just plot a circle and change its linewidth and color to whatever could be bold for you.
For example:
import matplotlib.pyplot as plt
import numpy as np
Yline = 0
Npoints = 300
angles = np.linspace(0,360,Npoints)*np.pi/180
line = 0*angles + Yline
ax = plt.subplot(111, projection='polar')
plt.plot(angles, line, color = 'k', linewidth = 3)
plt.ylim([-1,1])
plt.grid(True)
plt.show()
In this piece of code, I plot a line using plt.plot between any point of the two vectors angles and line. The former is actually all the angles between 0 and 2*np.pi. The latter is constant, and equal to the 'height' you want to plot that line Yline.
I suggest you try to decrease and increase Npoints while having a look to the documentaion of np.linspace() in order to understand your problem with the roundness of the circle.

fourfold display in matplotlib using polar axis

I am trying to create a fourfold display in matplotlib:
but can't get the logic of the polar axis. This is what I have tried so far:
import numpy as np
import matplotlib.pyplot as plt
# radius of each bar
radii = [10, 15, 20, 25]
# Value - width
width = np.pi/ 2
# angle of each bar
theta = [0,90,180,270]
ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width)
plt.show()
not sure what I am missing but I just want four "equal" areas which touch each others. What I can't get to work is
How to "control" the angles ? I mean to have all four "slides" being in [0,90], [90,180], [180, 270], [270, 360].
I do not understand what "width" corresponds to.
theta is expected to be in radians, not degrees.
If you just slightly tweak your code:
import numpy as np
import matplotlib.pyplot as plt
# radius of each bar
radii = [10, 15, 20, 25]
# Value - width
width = np.pi/ 2
# angle of each bar
theta = np.radians([0,90,180,270])
ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, alpha=0.5)
plt.show()
You'll get what you'd expect:
On a side note, for the exact plot you're making it might make more sense to use 4 Wedges on a rectangular plot with centered spines.
In case somebody else is interested here is what I came up
To use the example of Berkeley admission in the paper one first need to standardized the values (to equate margins) using iterative proportional fitting
def ContTableIPFP(x1ContTable):
''' poor man IPFP
compute iterative proportional fitting for
a 2 X 2 contingency table
Input :
a 2x2 contingency table as numpy array
Output :
numpy array with values standarized to equate margins
'''
import numpy as np
#Margins
xSumRows = np.sum(x1ContTable, axis = 0).tolist()
xSumCols = np.sum(x1ContTable, axis = 1).tolist()
# Seed
xq0 = x1ContTable/x1ContTable
# Iteration 1 : we adjust by row sums (i.e. using the sums of the columns)
xq1 = np.array([
(xq0[0] * xSumCols[0]).astype(float) / np.sum(xq0, axis = 0).tolist()[0],
(xq0[1] * xSumCols[1]).astype(float) / np.sum(xq0, axis = 0).tolist()[1],
]
)
#Iteration 2 : adjust by columns (i.e. using sums of rows)
xq2 = np.array([
(xq1[:,0] * xSumRows[0]).astype(float) / np.sum(xq1, axis = 0).tolist()[0],
(xq1[:,1] * xSumRows[1]).astype(float) / np.sum(xq1, axis = 0).tolist()[1],
]
)
return xq2.T
and then plot
def FourfoldDisplay(radii):
''' radii = [10, 15, 20, 25]
'''
import numpy as np
import matplotlib.pyplot as plt
# Value - width
width = np.pi/ 2
# angle of each bar
theta = np.radians([0,90,180,270])
ax = plt.subplot(111, polar=True)
bars = ax.bar(theta, radii, width=width, alpha=0.5)
#labels
ax.set_xticklabels([])
ax.set_yticks([])
#plt.axis('off')
plt.show()
to use
import numpy as np
x1 = np.array([
[1198, 1493],
[557, 1278]
])
x2 = ContTableIPFP(x1).flatten()
FourfoldDisplay(x2)

Categories