these are my codes, mostly taken from the example here https://matplotlib.org/2.0.2/examples/axes_grid/demo_parasite_axes2.html
the output graph is shown below
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()
import numpy as np
host = host_subplot(111, axes_class=AA.Axes)
plt.subplots_adjust(left=.15)
par1 = host.twinx()
par2 = host.twinx()
host.tick_params(width=0)
par1.tick_params(width=0)
par2.tick_params(width=0)
offset = -60
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["left"] = new_fixed_axis(loc="left", axes=par2,offset=(offset, 0))
par2.set_ylabel("V3")
par2.axis["left"].toggle(all=True)
xx = np.random.randint(1,10,100)
yy = np.random.randint(-100,10,100)
p1 = host.scatter(xx,yy)
ax = plt.gca()
ax.invert_yaxis()
rolling_period = 3
plt.xlabel('V2')
plt.ylabel('V1')
x1, y1 = [250, 0], [0, 0]
x2, y2 = [0, 0], [-30, 0]
plt.plot(x1,y1,x2,y2,color='black')
plt.xlim([-50, 250])
plt.ylim([0, -30])
plt.tick_params(axis='x',which='both',bottom=False,top=False)
p2, = par1.plot([0, 1, 2], [0, 3, 2], linestyle = 'None', label="V3")
plt.draw()
plt.show()
as you can see if I use seaborn I cant see the axis line for the parasite axis (the additional y axis on the left)
however if I remove the seaborn I do see that, but I do need to use seaborn for visualisation purpose so how do I fix the problem?
the graph without seaborn
Seaborn sets axis line colors to white. Just reset it to black by:
par2.axis['left'].line.set_ec((0, 0, 0, 1))
Related
I am trying to plot a cluster using scatter plot with two x-axes, in left and right side, not in top and bottom. I have checked out similar questions but it doesn't seem to be the problem here.
I was trying to make this two plots share y axis with 2 x-axes in left and right side, but I cannot make it.
This is my plot,
but I expect my plot to be displayed like this
.
This is the code I have tried.
import pylab as py
filename1="ex_1.csv"
df1=pd.read_csv(filename1)
filename2="ex_21.csv"
df2=pd.read_csv(filename2)
x1 = df1['Dom']
y1 = df1['Sal']
s1 = df1['Size']
x2 = df2['Type']
y2 = df2['Sal']
s2 = df2['Size']
d = 2.0
#plot 1
fig1 = plt.figure(figsize=(10,6))
fig, (gb1, gb2) = plt.subplots(nrows = 1, ncols = 2, figsize=(10,6),sharey = True)
gb1.scatter(x=x1, y=y1, s=s1*50, alpha=0.8, c="blue", label=x1)
gb2.scatter(x=x2, y=y2, s=s2*50, alpha=0.8, c="red")
If your categories are different, you can just use a single plot and draw a line in between.
import matplotlib.pyplot as plt
import numpy as np
ax = plt.subplot(111)
x1 = np.array(['A','B','C','A','B','C'])
y1 = np.array([1,2,3,4,5,6])
x2 = np.array(['X', 'Y', 'X', 'Y', 'Z', 'Z'])
y2 = np.array([1, 2, 3, 4, 5, 6])
ax.scatter(x1, y1, s=y1*50, c='b')
ax.scatter(x2, y2, s=y2*50, c='r')
ax.axline((2.5, 1), (2.5, 6), c='black')
ax.figure.show()
Edit: As pointed out in the comment, changed import pylab to import matplotlib.pyplot as plt as pylab can cause unexpected behavior and is strongly discouraged by matplotlib
I am plotting my pandas data using matplotlib, My plot looks like this:
There are four classes in the dataset. I want to color the backgroud area for each class, something like this
My matplotlib code looks like this:
import pandas as pd
df = pd.read_csv('normalized.csv')
fig = plt.figure(figsize=(8,8))
plt.scatter(df['p1'], df['p2'], c= list(df['cs']), alpha=0.9)
plt.show()
I also tried sns for this:
import pandas as pd
df = pd.read_csv('normalized.csv')
sn.FacetGrid(df, hue="cs", size = 8).map(plt.scatter, "p1", "p2").add_legend()
plt.show()
How I can fill the backgroud area for four classes in any of module?
A filled contour could serve as background:
import numpy as np
import matplotlib.pyplot as plt
N = 100
M = 4
points = np.random.normal(np.tile(np.random.uniform(1, 10, 2 * M), N)).reshape(-1, 2)
group = np.tile(np.arange(M), N)
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14, 5), sharey=True, sharex=True)
cmap = plt.cm.get_cmap('tab10', 4)
ax1.scatter(points[:, 0], points[:, 1], c=group, cmap=cmap)
ax2.scatter(points[:, 0], points[:, 1], c=group, cmap=cmap)
ax2.tricontourf(points[:, 0], points[:, 1], group, levels=np.arange(-0.5, 4), zorder=0, cmap=cmap, alpha=0.3)
plt.show()
Note that the contour plot also creates some narrow zones of inbetween values, because it only looks at numeric values and supposes that between a zone 0 and a zone 2 there must exist some small zone 1.
A bit more involved approach uses a nearest neighbor fit:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors
N = 100
M = 4
points = np.random.normal(np.tile(np.random.uniform(1, 10, 2 * M), N)).reshape(-1, 2)
groups = np.tile(np.arange(M), N)
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14, 5), sharey=True, sharex=True)
cmap = ListedColormap(['orange', 'cyan', 'cornflowerblue', 'crimson'])
ax1.scatter(points[:, 0], points[:, 1], c=groups, cmap=cmap)
ax2.scatter(points[:, 0], points[:, 1], c=groups, cmap=cmap)
clf = neighbors.KNeighborsClassifier(10)
clf.fit(points, groups)
x_min, x_max = points[:, 0].min() - 1, points[:, 0].max() + 1
y_min, y_max = points[:, 1].min() - 1, points[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 50),
np.linspace(y_min, y_max, 50))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
ax2.imshow(Z, extent=[x_min, x_max, y_min, y_max], cmap=cmap, alpha=0.3, aspect='auto', origin='lower')
plt.show()
If you don't need to fill the space and do not bother about areas overlap (your data points show some overlap) then you can try to fill out the convex hull defined by each subset.
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import ConvexHull
N = 100
points = [np.random.normal(np.tile(np.random.uniform(1, 5, 2), N)).reshape(-1, 2) for i in range(4)]
colors = ['r', 'g', 'b', 'k']
for k in range(4):
hull = ConvexHull(points[k])
plt.plot(points[k][:,0], points[k][:,1], '.', color = colors[k])
plt.fill(points[k][hull.vertices,0], points[k][hull.vertices,1], color = colors[k], alpha=0.3)
stack.imgur.com/2562R.png
I want to mark a line over two aligned subplots. Therefore, I use matplotlib.patches.ConnectionPatch as suggested in other answers. It worked already in other examples, but here for the second time, the line just is cut off at the second plot area.
How do I assure that the ConnectionPatch is plotted in the front?
I tried playing around with zorder, but did not find a solution yet.
from matplotlib.patches import ConnectionPatch
import matplotlib.pyplot as plt
xes=[-2, 0, 2]
field=[0, -10, 0]
potential=[-20, 0, 20]
fig, axs = plt.subplots(2, 1, sharex=True)
axs[0].plot(xes, field)
axs[1].plot(xes, potential)
# line over both plots
_, ytop = axs[0].get_ylim()
ybot, _ = axs[1].get_ylim()
n_p_border = ConnectionPatch(xyA=(0., ytop), xyB=(0., ybot),
coordsA='data', coordsB='data',
axesA=axs[0], axesB=axs[1], lw=3)
print(n_p_border)
axs[0].add_artist(n_p_border)
You would need to inverse the role of the two axes. This is also shown in Drawing lines between two plots in Matplotlib.
from matplotlib.patches import ConnectionPatch
import matplotlib.pyplot as plt
xes=[-2, 0, 2]
field=[0, -10, 0]
potential=[-20, 0, 20]
fig, axs = plt.subplots(2, 1, sharex=True)
axs[0].plot(xes, field)
axs[1].plot(xes, potential)
# line over both plots
_, ytop = axs[0].get_ylim()
ybot, _ = axs[1].get_ylim()
n_p_border = ConnectionPatch(xyA=(0., ybot), xyB=(0., ytop),
coordsA='data', coordsB='data',
axesA=axs[1], axesB=axs[0], lw=3)
axs[1].add_artist(n_p_border)
plt.show()
I'm trying to build a scatterplot of a large amount of data from multiple classes in python/matplotlib. Unfortunately, it appears that I have to choose between having my data randomised and having legend labels. Is there a way I can have both (preferably without manually coding the labels?)
Minimum reproducible example:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
Plotting with randomized points:
plot_idx = np.random.permutation(data.shape[0])
colors = pd.factorize(classes)
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
label=classes[plot_idx],
alpha=0.4)
plt.legend()
plt.show()
This gives me the wrong legend.
Plotting with the correct legend:
from matplotlib import cm
unique_classes = np.unique(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
for i, class in enumerate(unique_classes):
ax.scatter(data[classes == class, 0],
data[classes == class, 1],
c=colors[i],
label=class,
alpha=0.4)
plt.legend()
plt.show()
But now the points are not randomized and the resulting plot is not representative of the data.
I'm looking for something that would give me a result like I get as follows in R:
library(ggplot2)
X <- matrix(rnorm(10000, 0, 1), ncol=2)
Y <- matrix(rnorm(10000, 0.5, 1), ncol=2)
data <- as.data.frame(rbind(X, Y))
data$classes <- rep(c('X', 'Y'), times=nrow(X))
plot_idx <- sample(nrow(data))
ggplot(data[plot_idx,], aes(x=V1, y=V2, color=classes)) +
geom_point(alpha=0.4, size=3)
You need to create the legend manually. This is not a big problem though. You can loop over the labels and create a legend entry for each. Here one may use a Line2D with a marker similar to the scatter as handle.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
plot_idx = np.random.permutation(data.shape[0])
colors,labels = pd.factorize(classes)
fig, ax = plt.subplots()
sc = ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
alpha=0.4)
h = lambda c: plt.Line2D([],[],color=c, ls="",marker="o")
plt.legend(handles=[h(sc.cmap(sc.norm(i))) for i in range(len(labels))],
labels=list(labels))
plt.show()
Alternatively you can use a special scatter handler, as shown in the quesiton Why doesn't the color of the points in a scatter plot match the color of the points in the corresponding legend? but that seems a bit overkill here.
It's a bit of a hack, but you can save the axis limits, set the labels by drawing points well outside the limits of the plot, and then resetting the axis limits as follows:
plot_idx = np.random.permutation(data.shape[0])
color_idx, unique_classes = pd.factorize(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[color_idx[plot_idx]],
alpha=0.4)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
for i in range(len(unique_classes)):
ax.scatter(xlim[1]*10,
ylim[1]*10,
c=colors[i],
label=unique_classes[i])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
plt.legend()
plt.show()
Dear all, I'm trying to perform a scatter plot with color with an associated color bar. I would like the colorbar to have string values rather than numerical values, as I'm comparing two different data sets each one with different colorvalues (but in any case between a maximum and minimum values). Here the code I'm using
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
from matplotlib import rc
import pylab
from pylab import *
from matplotlib import mpl
data = np.loadtxt('deltaBinned.txt')
data2 = np.loadtxt('deltaHalphaBinned.txt')
fig=plt.figure()
fig.subplots_adjust(bottom=0.1)
ax=fig.add_subplot(111)
plt.xlabel(r'$\partial \Delta/\partial\Phi[$mm$/^{\circ}]$',fontsize=16)
plt.ylabel(r'$\Delta$ [mm]',fontsize=16)
plt.scatter(data[:,0],data[:,1],marker='o',c=data[:,3],s=data[:,3]*1500,cmap=cm.Spectral,vmin=min(data[:,3]),vmax=max(data[:,3]))
plt.scatter(data2[:,0],data2[:,1],marker='^',c=data2[:,2],s=data2[:,2]*500,cmap=cm.Spectral,vmin=min(data2[:,2]),vmax=max(data2[:,2]))
cbar=plt.colorbar(ticks=[min(data2[:,2]),max(data2[:,2])])
cbar.set_ticks(['Low','High'])
cbar.set_label(r'PdF')
plt.show()
Unfortunately it does not work as cbar.set_ticks does not accept string values. I've read the ling
http://matplotlib.sourceforge.net/examples/pylab_examples/colorbar_tick_labelling_demo.html but Iwas not able to adapt it to my case. I apologize if the question is simple but I'm just at the beginning of python programming
Nicola.
cbar.ax.set_yticklabels(['Low','High'])
For example,
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
data = np.random.random((10, 4))
data2 = np.random.random((10, 4))
plt.subplots_adjust(bottom = 0.1)
plt.xlabel(r'$\partial \Delta/\partial\Phi[$mm$/^{\circ}]$', fontsize = 16)
plt.ylabel(r'$\Delta$ [mm]', fontsize = 16)
plt.scatter(
data[:, 0], data[:, 1], marker = 'o', c = data[:, 3], s = data[:, 3]*1500,
cmap = cm.Spectral, vmin = min(data[:, 3]), vmax = max(data[:, 3]))
plt.scatter(
data2[:, 0], data2[:, 1], marker = '^', c = data2[:, 2], s = data2[:, 2]*500,
cmap = cm.Spectral, vmin = min(data2[:, 2]), vmax = max(data2[:, 2]))
cbar = plt.colorbar(ticks = [min(data2[:, 2]), max(data2[:, 2])])
cbar.ax.set_yticklabels(['Low', 'High'])
cbar.set_label(r'PdF')
plt.show()
produces