Dear all, I'm trying to perform a scatter plot with color with an associated color bar. I would like the colorbar to have string values rather than numerical values, as I'm comparing two different data sets each one with different colorvalues (but in any case between a maximum and minimum values). Here the code I'm using
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
from matplotlib import rc
import pylab
from pylab import *
from matplotlib import mpl
data = np.loadtxt('deltaBinned.txt')
data2 = np.loadtxt('deltaHalphaBinned.txt')
fig=plt.figure()
fig.subplots_adjust(bottom=0.1)
ax=fig.add_subplot(111)
plt.xlabel(r'$\partial \Delta/\partial\Phi[$mm$/^{\circ}]$',fontsize=16)
plt.ylabel(r'$\Delta$ [mm]',fontsize=16)
plt.scatter(data[:,0],data[:,1],marker='o',c=data[:,3],s=data[:,3]*1500,cmap=cm.Spectral,vmin=min(data[:,3]),vmax=max(data[:,3]))
plt.scatter(data2[:,0],data2[:,1],marker='^',c=data2[:,2],s=data2[:,2]*500,cmap=cm.Spectral,vmin=min(data2[:,2]),vmax=max(data2[:,2]))
cbar=plt.colorbar(ticks=[min(data2[:,2]),max(data2[:,2])])
cbar.set_ticks(['Low','High'])
cbar.set_label(r'PdF')
plt.show()
Unfortunately it does not work as cbar.set_ticks does not accept string values. I've read the ling
http://matplotlib.sourceforge.net/examples/pylab_examples/colorbar_tick_labelling_demo.html but Iwas not able to adapt it to my case. I apologize if the question is simple but I'm just at the beginning of python programming
Nicola.
cbar.ax.set_yticklabels(['Low','High'])
For example,
import numpy as np
import matplotlib.cm as cm
import matplotlib.pyplot as plt
data = np.random.random((10, 4))
data2 = np.random.random((10, 4))
plt.subplots_adjust(bottom = 0.1)
plt.xlabel(r'$\partial \Delta/\partial\Phi[$mm$/^{\circ}]$', fontsize = 16)
plt.ylabel(r'$\Delta$ [mm]', fontsize = 16)
plt.scatter(
data[:, 0], data[:, 1], marker = 'o', c = data[:, 3], s = data[:, 3]*1500,
cmap = cm.Spectral, vmin = min(data[:, 3]), vmax = max(data[:, 3]))
plt.scatter(
data2[:, 0], data2[:, 1], marker = '^', c = data2[:, 2], s = data2[:, 2]*500,
cmap = cm.Spectral, vmin = min(data2[:, 2]), vmax = max(data2[:, 2]))
cbar = plt.colorbar(ticks = [min(data2[:, 2]), max(data2[:, 2])])
cbar.ax.set_yticklabels(['Low', 'High'])
cbar.set_label(r'PdF')
plt.show()
produces
Related
these are my codes, mostly taken from the example here https://matplotlib.org/2.0.2/examples/axes_grid/demo_parasite_axes2.html
the output graph is shown below
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()
import numpy as np
host = host_subplot(111, axes_class=AA.Axes)
plt.subplots_adjust(left=.15)
par1 = host.twinx()
par2 = host.twinx()
host.tick_params(width=0)
par1.tick_params(width=0)
par2.tick_params(width=0)
offset = -60
new_fixed_axis = par2.get_grid_helper().new_fixed_axis
par2.axis["left"] = new_fixed_axis(loc="left", axes=par2,offset=(offset, 0))
par2.set_ylabel("V3")
par2.axis["left"].toggle(all=True)
xx = np.random.randint(1,10,100)
yy = np.random.randint(-100,10,100)
p1 = host.scatter(xx,yy)
ax = plt.gca()
ax.invert_yaxis()
rolling_period = 3
plt.xlabel('V2')
plt.ylabel('V1')
x1, y1 = [250, 0], [0, 0]
x2, y2 = [0, 0], [-30, 0]
plt.plot(x1,y1,x2,y2,color='black')
plt.xlim([-50, 250])
plt.ylim([0, -30])
plt.tick_params(axis='x',which='both',bottom=False,top=False)
p2, = par1.plot([0, 1, 2], [0, 3, 2], linestyle = 'None', label="V3")
plt.draw()
plt.show()
as you can see if I use seaborn I cant see the axis line for the parasite axis (the additional y axis on the left)
however if I remove the seaborn I do see that, but I do need to use seaborn for visualisation purpose so how do I fix the problem?
the graph without seaborn
Seaborn sets axis line colors to white. Just reset it to black by:
par2.axis['left'].line.set_ec((0, 0, 0, 1))
I would like to generate a series of histogram shown below:
The above visualization was done in tensorflow but I'd like to reproduce the same visualization on matplotlib.
EDIT:
Using plt.fill_between suggested by #SpghttCd, I have the following code:
colors=cm.OrRd_r(np.linspace(.2, .6, 10))
plt.figure()
x = np.arange(100)
for i in range(10):
y = np.random.rand(100)
plt.fill_between(x, y + 10-i, 10-i,
facecolor=colors[i]
edgecolor='w')
plt.show()
This works great, but is it possible to use histogram instead of a continuous curve?
EDIT:
joypy based approach, like mentioned in the comment of october:
import pandas as pd
import joypy
import numpy as np
df = pd.DataFrame()
for i in range(0, 400, 20):
df[i] = np.random.normal(i/410*5, size=30)
joypy.joyplot(df, overlap=2, colormap=cm.OrRd_r, linecolor='w', linewidth=.5)
for finer control of colors, you can define a color gradient function which accepts a fractional index and start and stop color tuples:
def color_gradient(x=0.0, start=(0, 0, 0), stop=(1, 1, 1)):
r = np.interp(x, [0, 1], [start[0], stop[0]])
g = np.interp(x, [0, 1], [start[1], stop[1]])
b = np.interp(x, [0, 1], [start[2], stop[2]])
return (r, g, b)
Usage:
joypy.joyplot(df, overlap=2, colormap=lambda x: color_gradient(x, start=(.78, .25, .09), stop=(1.0, .64, .44)), linecolor='w', linewidth=.5)
Examples with different start and stop tuples:
original answer:
You could iterate over your dataarrays you'd like to plot with plt.fill_between, setting colors to some gradient and the line color to white:
creating some sample data:
import numpy as np
t = np.linspace(-1.6, 1.6, 11)
y = np.cos(t)**2
y2 = lambda : y + np.random.random(len(y))/5-.1
plot the series:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
colors = cm.OrRd_r(np.linspace(.2, .6, 10))
plt.figure()
for i in range(10):
plt.fill_between(t+i, y2()+10-i/10, 10-i/10, facecolor = colors[i], edgecolor='w')
If you want it to have more optimized towards your example you should perhaps consider providing some sample data.
EDIT:
As I commented below, I'm not quite sure if I understand what you want - or if you want the best for your task. Therefore here a code which plots besides your approach in your edit two smples of how to present a bunch of histograms in a way that they are better comparable:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
N = 10
np.random.seed(42)
colors=cm.OrRd_r(np.linspace(.2, .6, N))
fig1 = plt.figure()
x = np.arange(100)
for i in range(10):
y = np.random.rand(100)
plt.fill_between(x, y + 10-i, 10-i,
facecolor=colors[i],
edgecolor='w')
data = np.random.binomial(20, .3, (N, 100))
fig2, axs = plt.subplots(N, figsize=(10, 6))
for i, d in enumerate(data):
axs[i].hist(d, range(20), color=colors[i], label=str(i))
fig2.legend(loc='upper center', ncol=5)
fig3, ax = plt.subplots(figsize=(10, 6))
ax.hist(data.T, range(20), color=colors, label=[str(i) for i in range(N)])
fig3.legend(loc='upper center', ncol=5)
This leads to the following plots:
your plot from your edit:
N histograms in N subplots:
N histograms side by side in one plot:
I'm trying to build a scatterplot of a large amount of data from multiple classes in python/matplotlib. Unfortunately, it appears that I have to choose between having my data randomised and having legend labels. Is there a way I can have both (preferably without manually coding the labels?)
Minimum reproducible example:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
Plotting with randomized points:
plot_idx = np.random.permutation(data.shape[0])
colors = pd.factorize(classes)
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
label=classes[plot_idx],
alpha=0.4)
plt.legend()
plt.show()
This gives me the wrong legend.
Plotting with the correct legend:
from matplotlib import cm
unique_classes = np.unique(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
for i, class in enumerate(unique_classes):
ax.scatter(data[classes == class, 0],
data[classes == class, 1],
c=colors[i],
label=class,
alpha=0.4)
plt.legend()
plt.show()
But now the points are not randomized and the resulting plot is not representative of the data.
I'm looking for something that would give me a result like I get as follows in R:
library(ggplot2)
X <- matrix(rnorm(10000, 0, 1), ncol=2)
Y <- matrix(rnorm(10000, 0.5, 1), ncol=2)
data <- as.data.frame(rbind(X, Y))
data$classes <- rep(c('X', 'Y'), times=nrow(X))
plot_idx <- sample(nrow(data))
ggplot(data[plot_idx,], aes(x=V1, y=V2, color=classes)) +
geom_point(alpha=0.4, size=3)
You need to create the legend manually. This is not a big problem though. You can loop over the labels and create a legend entry for each. Here one may use a Line2D with a marker similar to the scatter as handle.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
X = np.random.normal(0, 1, [5000, 2])
Y = np.random.normal(0.5, 1, [5000, 2])
data = np.concatenate([X,Y])
classes = np.concatenate([np.repeat('X', X.shape[0]),
np.repeat('Y', Y.shape[0])])
plot_idx = np.random.permutation(data.shape[0])
colors,labels = pd.factorize(classes)
fig, ax = plt.subplots()
sc = ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[plot_idx],
alpha=0.4)
h = lambda c: plt.Line2D([],[],color=c, ls="",marker="o")
plt.legend(handles=[h(sc.cmap(sc.norm(i))) for i in range(len(labels))],
labels=list(labels))
plt.show()
Alternatively you can use a special scatter handler, as shown in the quesiton Why doesn't the color of the points in a scatter plot match the color of the points in the corresponding legend? but that seems a bit overkill here.
It's a bit of a hack, but you can save the axis limits, set the labels by drawing points well outside the limits of the plot, and then resetting the axis limits as follows:
plot_idx = np.random.permutation(data.shape[0])
color_idx, unique_classes = pd.factorize(classes)
colors = cm.Set1(np.linspace(0, 1, len(unique_classes)))
fig, ax = plt.subplots()
ax.scatter(data[plot_idx, 0],
data[plot_idx, 1],
c=colors[color_idx[plot_idx]],
alpha=0.4)
xlim = ax.get_xlim()
ylim = ax.get_ylim()
for i in range(len(unique_classes)):
ax.scatter(xlim[1]*10,
ylim[1]*10,
c=colors[i],
label=unique_classes[i])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
plt.legend()
plt.show()
I am trying to make a histgram over a specific range but the matplotlib.pyplot.hist() function keeps cropping the range to the bins with entries in them. A toy example:
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100,100,1000)
nbins = 100
xmin = -500
xmax = 500
fig = plt.figure();
ax = fig.add_subplot(1, 1, 1)
ax.hist(x, bins=nbins,range=[xmin,xmax])
plt.show()
Gives a plot with a range [-100,100]. Why is the range not [-500,500] as specified?
(I am using the Enthought Canopy 1.4 and sorry but I do not have a high enough rep to post an image of the plot.)
Actually, it works if you specify with range an interval shorter than [-100, 100]. For example, this work :
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100, 100, 1000)
plt.hist(x, bins=30, range=(-50, 50))
plt.show()
If you want to plot the histogram on a range larger than [x.min(), x.max()] you can change xlim propertie of the plot.
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-100, 100, 1000)
plt.hist(x, bins=30)
plt.xlim(-500, 500)
plt.show()
the following code is for making the same y axis limit on two subplots
f ,ax = plt.subplots(1,2,figsize = (30, 13),gridspec_kw={'width_ratios': [5, 1]})
df.plot(ax = ax[0], linewidth = 2.5)
ylim = [df['min_return'].min()*1.1,df['max_return'].max()*1.1]
ax[0].set_ylim(ylim)
ax[1].hist(data,normed =1, bins = num_bin, color = 'yellow' ,alpha = 1)
ax[1].set_ylim(ylim)
I have two vectors, one with values and one with class labels like 1,2,3 etc.
I would like to plot all the points that belong to class 1 in red, to class 2 in blue, to class 3 in green etc. How can I do that?
The accepted answer has it spot on, but if you might want to specify which class label should be assigned to a specific color or label you could do the following. I did a little label gymnastics with the colorbar, but making the plot itself reduces to a nice one-liner. This works great for plotting the results from classifications done with sklearn. Each label matches a (x,y) coordinate.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
x = [4,8,12,16,1,4,9,16]
y = [1,4,9,16,4,8,12,3]
label = [0,1,2,3,0,1,2,3]
colors = ['red','green','blue','purple']
fig = plt.figure(figsize=(8,8))
plt.scatter(x, y, c=label, cmap=matplotlib.colors.ListedColormap(colors))
cb = plt.colorbar()
loc = np.arange(0,max(label),max(label)/float(len(colors)))
cb.set_ticks(loc)
cb.set_ticklabels(colors)
Using a slightly modified version of this answer, one can generalise the above for N colors as follows:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
N = 23 # Number of labels
# setup the plot
fig, ax = plt.subplots(1,1, figsize=(6,6))
# define the data
x = np.random.rand(1000)
y = np.random.rand(1000)
tag = np.random.randint(0,N,1000) # Tag each point with a corresponding label
# define the colormap
cmap = plt.cm.jet
# extract all colors from the .jet map
cmaplist = [cmap(i) for i in range(cmap.N)]
# create the new map
cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)
# define the bins and normalize
bounds = np.linspace(0,N,N+1)
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
# make the scatter
scat = ax.scatter(x,y,c=tag,s=np.random.randint(100,500,N),cmap=cmap, norm=norm)
# create the colorbar
cb = plt.colorbar(scat, spacing='proportional',ticks=bounds)
cb.set_label('Custom cbar')
ax.set_title('Discrete color mappings')
plt.show()
Which gives:
Assuming that you have your data in a 2d array, this should work:
import numpy
import pylab
xy = numpy.zeros((2, 1000))
xy[0] = range(1000)
xy[1] = range(1000)
colors = [int(i % 23) for i in xy[0]]
pylab.scatter(xy[0], xy[1], c=colors)
pylab.show()
You can also set a cmap attribute to control which colors will appear through use of a colormap; i.e. replace the pylab.scatter line with:
pylab.scatter(xy[0], xy[1], c=colors, cmap=pylab.cm.cool)
A list of color maps can be found
here
A simple solution is to assign color for each class. This way, we can control how each color is for each class. For example:
arr1 = [1, 2, 3, 4, 5]
arr2 = [2, 3, 3, 4, 4]
labl = [0, 1, 1, 0, 0]
color= ['red' if l == 0 else 'green' for l in labl]
plt.scatter(arr1, arr2, color=color)