Replicate Log10 Scaling with Matplotlib - python

I'm trying to recreate a plot that has the y-axis styled as so:
But can't seem to figure out how to get the axis breaks and labels lined up how I want them. I am currently doing this in my code:
# plot lines
for key, group in grouped:
plt.plot(group.x * 950, np.log10(group.y), label=key)
# plot points
exp_group = exp_data.groupby('Experiment')
for key, group in exp_group:
plt.plot(group.x, np.log10(group.y), label=key, marker='o')
plt.yticks(np.arange(-3, 3), label=10.0**np.arange(-3,3))
plt.show()

A solution is to use plt.yticks:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, 10, 1000)
y = np.exp(x) - x**3 + x**2 - x**(1/2)
plt.figure()
plt.semilogy(x, y)
locs, labels = plt.yticks()
print(locs)
print(labels)
lst_10 = [1, 10, 100, 1000, 10000]
plt.yticks(lst_10, lst_10)
plt.title('Different label on y-axis')
plt.figure()
plt.semilogy(x, y)
plt.title('Default label')
plt.show()
The function plt.yticks takes two arguments, the locations and the labels. I want the labels in the locations 1, 10, 100, 1000, 10000; I want the label (in location 1) to have a label 1, the label (in localtion 10) to have a label 10, and so on. I have also used plt.semilogy to get that semi-log axis.

Related

matplotlib.pyplot: How to plot single graph with different Colormaps and a Legend?

I am plotting separate figures for each attribute and label for each data sample. Here is the illustration:
As illustrated in the the last subplot (Label), my data contains seven classes (numerically) (0 to 6). I'd like to visualize these classes using a different fancy colors and a legend. Please note that I just want colors for last subplot. How should I do that?
Here is the code of above plot:
x, y = test_data["x"], test_data["y"]
# determine the total number of plots
n, off = x.shape[1] + 1, 0
plt.rcParams["figure.figsize"] = (40, 15)
# plot all the attributes
for i in range(6):
plt.subplot(n, 1, off + 1)
plt.plot(x[:, off])
plt.title('Attribute:' + str(i), y=0, loc='left')
off += 1
# plot Labels
plt.subplot(n, 1, n)
plt.plot(y)
plt.title('Label', y=0, loc='left')
plt.savefig(save_file_name, bbox_inches="tight")
plt.close()
First, just to set up a similar dataset:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random((100,6))
y = np.random.randint(0, 6, (100))
fig, axs = plt.subplots(6, figsize=(40,15))
We could use plt.scatter() to give individual points different marker styles:
for i in range(x.shape[-1]):
axs[i].scatter(range(x.shape[0]), x[:,i], c=y)
Or we could mask the arrays we're plotting:
for i in range(x.shape[-1]):
for j in np.unique(y):
axs[i].plot(np.ma.masked_where(y!=j, x[:,i]), 'o')
Either way we get the same results:
Edit: Ah you've edited your question! You can do exactly the same thing for your last plot only, just modify my code above to take it out of the loop of subplots :)
As suggested, we imitate the matplotlib step function by creating a LineCollection to color the different line segments:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
from matplotlib.patches import Patch
#random data generation
np.random.seed(12345)
number_of_categories=4
y = np.concatenate([np.repeat(np.random.randint(0, number_of_categories), np.random.randint(1, 30)) for _ in range(20)])
#check the results with less points
#y = y[:10]
x = y[None] * np.linspace(1, 5, 3)[:, None]
x += 2 * np.random.random(x.shape) - 1
#your initial plot
num_plots = x.shape[0] + 1
fig, axes = plt.subplots(num_plots, 1, sharex=True, figsize=(10, 8))
for i, ax in enumerate(axes.flat[:-1]):
ax.plot(x[i,:])
#first we create the matplotlib step function with x-values as their midpoint
axes.flat[-1].step(np.arange(y.size), y, where="mid", color="lightgrey", zorder=-1)
#then we plot colored segments with shifted index simulating the step function
shifted_x = np.arange(y.size+1)-0.5
#and identify the step indexes
idx_steps, = np.nonzero(np.diff(y, prepend=np.inf, append=np.inf))
#create collection of plateau segments
colored_segments = np.zeros((idx_steps.size-1, 2, 2))
colored_segments[:, :, 0] = np.vstack((shifted_x[idx_steps[:-1]], shifted_x[idx_steps[1:]])).T
colored_segments[:, :, 1] = np.repeat(y[idx_steps[:-1]], 2).reshape(-1, 2)
#generate discrete color list
n_levels, idx_levels = np.unique(y[idx_steps[:-1]], return_inverse=True)
colorarr = np.asarray(plt.cm.tab10.colors[:n_levels.size])
#and plot the colored segments
lc_cs = LineCollection(colored_segments, colors=colorarr[idx_levels, :], lw=10)
lines_cs = axes.flat[-1].add_collection(lc_cs)
#scaling and legend generation
axes.flat[-1].set_ylim(n_levels.min()-0.5, n_levels.max()+0.5)
axes.flat[-1].legend([Patch(color=colorarr[i, :]) for i, _ in enumerate(n_levels)],
[f"cat {i}" for i in n_levels],
loc="upper center", bbox_to_anchor=(0.5, -0.15),
ncol=n_levels.size)
plt.show()
Sample output:
Alternatively, you can use broken barh plots or color this axis or even all axes using axvspan.

How to get arithmetically growing minor ticks with matplotlib?

The following snippet creates a list myHLines of (y) values that is arithmetically growing.
I want to use them as minor y ticks in a matplotlib plot.
How can I do this?
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'a': [1, 3, 10, 30, 100]})
myMin = df.a.min()
myMax = df.a.max()
ratio = 3
myHLines = [myMin * ratio ** i for i in range(1000) if myMin * ratio ** i < myMax]
print("myHLines=", myHLines)
# myHLines= [1, 3, 9, 27, 81]
plt.plot(df, '-o', markersize=2, c='r')
plt.show()
Is the scale of the y-axis you want to achieve the y-axis shown in the graph below?
plt.plot(df, '-o', markersize=2, c='r')
locs, labels = plt.yticks()
new_y = sorted(myHLines + locs.tolist()[1:-1])
# print(new_y)
plt.yticks(new_y)
plt.show()

Changing axis ticks in Matplotlib with multiple connected Boxplots

I am plotting a convergence graph and to show deviations from the mean I am using connected boxplots:
For some reason Matplotlib forces ticks for each boxplot and I cannot seem to get them removed. My code for the current plot looks something like this:
label = ["" for i in range(160)]
no_labels = int(np.floor(len(label)/20))
for i in range(no_labels):
label[i*20] = str(i*no_samples/no_labels)
# Weird behaviour for the last label so adding it manually
label[-1] = no_samples
fig = plt.figure(figsize=(10,5))
ax = fig.add_axes([0,0,1,1])
ax.set_xlabel("Samples", labelpad=10)
ax.set_ylabel("Error (MSE)", labelpad=10)
ax.set_ylim(0, 0.11)
ax.boxplot(data, flierprops=flyprops, showcaps=False,
boxprops=colorprops, whiskerprops={'color' : 'tab:blue'},
labels=label, patch_artist=True)
I have tried multiple ways of manipulating axis ticks which are available in MPL.
1) Trying to let MPL do the work:
ax.xaxis.set_major_locator(MultipleLocator(20))
2) Trying to set ticks manually: ax.set_xticks([list_of_ticks])
3) Tried a workaround
ax.xaxis.set_minor_locator(MultipleLocator(20))
# Removing major ticks, setting minor ticks
ax.xaxis.set_tick_params(which='major', size=0, width=2, direction='in')
ax.yaxis.set_tick_params(which='major', size=5, width=2, direction='in')
None of these seemed to work and I am unsure why. I think it may have something to do with my label variable but if I don't include it in this way MPL with include an axis lable for every entry which is a mess.
How can I set axis ticks once every 1000 entries in a connected boxplots figure?`
Edit: The input data is a numpy array of shape (15, 160) s.t. there are 160 boxplots plotted of 15 samples each. Example data for 5 boxplots of 3 samples each would look like:
np.random.rand(3,5)
>>> array([[0.05942481, 0.03408175, 0.84021109, 0.27531937, 0.62428798],
[0.24658313, 0.77910387, 0.2161348 , 0.39101172, 0.14038211],
[0.40694432, 0.22979738, 0.87056873, 0.788295 , 0.29337562]])
The main issue seems to be that the ticks need to be updated after drawing the main plot, never before.
(Having ax = fig.add_axes([0, 0, 1, 1]) is also quite unusual to work with. The standard way is fig, ax = plt.subplots(figsize=(10, 5)) which lets matplotlib a bit of flexibility for the whitespace around the plot.)
The code of the question has some missing variables and data, but the following toy example should create something similar:
import numpy as np
import matplotlib.pyplot as plt
no_samples = 8000
x = np.linspace(0, no_samples, 160)
no_labels = int(np.floor(len(x) / 20))
label = [f'{i * no_samples / no_labels:.0f}' for i in range(no_labels+1)]
fig = plt.figure(figsize=(10, 5))
ax = fig.add_axes([0.1, 0.1, 0.85, 0.85])
N = 100
data = np.random.normal(np.tile(100 / (x+1000), N), 0.001).reshape(N, -1)
flyprops = {'markersize':0.01}
colorprops = None
ax.boxplot(data, flierprops=flyprops, showcaps=False,
boxprops=colorprops, whiskerprops={'color': 'tab:blue'},
patch_artist=True)
ax.set_xlabel("Samples", labelpad=10)
ax.set_ylabel("Error (MSE)", labelpad=10)
ax.set_ylim(0, 0.11)
ax.set_xticks(range(0, len(x)+1, 20))
ax.set_xticklabels(label)
plt.show()
Here is an example of setting the tick marks:
import matplotlib.pyplot as plt
import numpy as np
data=np.random.rand(3,50)
fig = plt.figure(figsize=(10,5))
ax = fig.add_axes([0,0,1,1])
ax.set_xlabel("Samples", labelpad=10)
ax.set_ylabel("Error (MSE)", labelpad=10)
ax.boxplot(data,
showcaps=False,
whiskerprops={'color' : 'tab:blue'},
patch_artist=True
)
plt.xticks([10, 20, 30, 40, 50],
["10", "20", "30", "40", "50"])
EDIT:
You can also avoid messing with strings and set the marks like this:
N=50
plt.xticks(np.linspace(0, N, num=6), np.linspace(0, N, num=6))
See here and this example.
Simple ticks can be acheived in a similar mannar as here (note data as transposed numpy array) using
import numpy as np
import matplotlib.pyplot as plt
data = np.array([ np.random.rand(100) for i in range(3) ]).T
plt.boxplot(data)
plt.xticks([1, 2, 3], ['mon', 'tue', 'wed'])

Scatterplot in matplotlib with legend and randomized point order

I'm trying to build a scatterplot of a large amount of data from multiple classes in python/matplotlib. Unfortunately, it appears that I have to choose between having my data randomised and having legend labels. Is there a way I can have both (preferably without manually coding the labels?)
Minimum reproducible example:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
Plotting with randomized points:
plot_idx = np.random.permutation(data.shape[0])
colors = pd.factorize(classes)
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
label=classes[plot_idx],
alpha=0.4)
plt.legend()
plt.show()
This gives me the wrong legend.
Plotting with the correct legend:
from matplotlib import cm
unique_classes = np.unique(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
for i, class in enumerate(unique_classes):
ax.scatter(data[classes == class, 0],
data[classes == class, 1],
c=colors[i],
label=class,
alpha=0.4)
plt.legend()
plt.show()
But now the points are not randomized and the resulting plot is not representative of the data.
I'm looking for something that would give me a result like I get as follows in R:
library(ggplot2)
X <- matrix(rnorm(10000, 0, 1), ncol=2)
Y <- matrix(rnorm(10000, 0.5, 1), ncol=2)
data <- as.data.frame(rbind(X, Y))
data$classes <- rep(c('X', 'Y'), times=nrow(X))
plot_idx <- sample(nrow(data))
ggplot(data[plot_idx,], aes(x=V1, y=V2, color=classes)) +
geom_point(alpha=0.4, size=3)
You need to create the legend manually. This is not a big problem though. You can loop over the labels and create a legend entry for each. Here one may use a Line2D with a marker similar to the scatter as handle.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
plot_idx = np.random.permutation(data.shape[0])
colors,labels = pd.factorize(classes)
fig, ax = plt.subplots()
sc = ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
alpha=0.4)
h = lambda c: plt.Line2D([],[],color=c, ls="",marker="o")
plt.legend(handles=[h(sc.cmap(sc.norm(i))) for i in range(len(labels))],
labels=list(labels))
plt.show()
Alternatively you can use a special scatter handler, as shown in the quesiton Why doesn't the color of the points in a scatter plot match the color of the points in the corresponding legend? but that seems a bit overkill here.
It's a bit of a hack, but you can save the axis limits, set the labels by drawing points well outside the limits of the plot, and then resetting the axis limits as follows:
plot_idx = np.random.permutation(data.shape[0])
color_idx, unique_classes = pd.factorize(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[color_idx[plot_idx]],
alpha=0.4)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
for i in range(len(unique_classes)):
ax.scatter(xlim[1]*10,
ylim[1]*10,
c=colors[i],
label=unique_classes[i])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
plt.legend()
plt.show()

change position of top major x-ticks as function of bottom axis

I want to something similar to How to add a second x-axis in matplotlib, i.e. have a top x-axis that displays a wavelength and a bottom axis that displays the corresponding frequency.
Reproducing linked example gives me a plot that looks like this:
This plot was produced with:
#setting up the plot
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
fig = plt.figure()
fig.tight_layout()
ax = plt.subplot()
#Here it gets interesting!
def tick_function(X):
c = 299792458
V = c/X
V = V*1e6
V = np.round(V,0)
V[2] = 3000
V = V.astype(int)
return(V)
ax = plt.subplot()
ax_top = ax.twiny()
ax.set_xscale("log", nonposx='clip')
ax.set_yscale("log", nonposy='clip')
ax_top.set_xscale("log", nonposx='clip')
ax.set_xlim([8e10,5e14])
ax.set_ylim([5e33,2e36])
axTicks = ax.get_xticks()
ax_top_Ticks = axTicks
ax_top.set_xticks(ax_top_Ticks)
ax_top.set_xlim(ax.get_xlim())
ax_top.set_xbound(ax.get_xbound())
ax_top.set_xticklabels(tick_function(ax_top_Ticks))
Now, rather than plotting the top major x-ticks at the position of the bottom major x-axis, I'd like to have them shifted.
I.e., I would like to have the top major x-ticks at positions 1000, 100, 10, 1 and the minor ticks shifted accordingly.
This is what I'd like it too look like:
I found this plot, that's what I want!
http://inspirehep.net/record/877424/files/fig2.png
Note, since lambda=c/f and ax & ax_top are logarithmic the spacing of the minor ticks has to be inverted to!
The trick is to choose the wavelengths you want and convert them to frequencies. Then use those frequencies as positions for the upper ticks.
#setting up the plot
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
fig = plt.figure()
ax = plt.subplot()
def conversion_freq_lam(inp):
c = 299792458
outp = c/inp
outp = outp.astype(int)
return outp
#ax = plt.subplot(gs1[0])
ax = plt.subplot(111)
ax_top = ax.twiny()
ax.set_xscale("log", nonposx='clip')
ax.set_yscale("log", nonposy='clip')
ax_top.set_xscale("log", nonposx='clip')
ax.set_xlim([8e10,5e14])
ax.set_ylim([5e33,2e36])
goal_lambdas = np.array([100000, 10000, 1000, 100, 10, 1, 0.1, 0.01])
goal_freqs = conversion_freq_lam(goal_lambdas)
ax_top_Ticks = goal_freqs * 1e6 # magic factor 1e6 from your attempt. Units?
ax_top.set_xticks(ax_top_Ticks)
ax_top.set_xlim(ax.get_xlim())
ax_top.set_xbound(ax.get_xbound())
ax_top.set_xticklabels(goal_lambdas)
plt.savefig('test_2axes.png')
This produces the following plot:
The magic number 1e6 used as a scaling factor I took from your question. I assume it is caused by the units of the axis.
Edit:
To have correctly spaced minor ticks at the top axis (for example at 2, 3, 4, ..., 20, 30, 40, 50, ...) add the following code block:
def find_minor_vals(goals):
minors = []
factors = np.arange(2, 10, 1)
for val in goals:
minors.extend(list(val * factors))
print minors
return np.array(minors)
goal_lambdas_minor = find_minor_vals(goal_lambdas)
goal_freqs_minor = conversion_freq_lam(goal_lambdas_minor) * 1e6
minor_locator = FixedLocator(goal_freqs_minor)
ax_top.xaxis.set_minor_locator(minor_locator)
Which results in the following picture:

Categories