I'm trying to plot some trendlines for some data that I have. Problem is that it looks fuzzy and it seems to overlap on itself if I use dotted or dashed styles.
Not sure why, but the lines also look like there's no anti-aliasing done -- they look jagged. Trying to produce the exact same graph in Excel gives clean lines.
Using other line styles doesn't help; nor does increasing the spacing by adding dashes=(1,5) or similar. Even if you increase plot size or change linewidth - it still overlaps.
Here's the code:
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import numpy
from scipy import stats
radon = [49.6,61.7,58.7,64.1,59.4,64.6,65.4,65.3,65.5,66.0,50.5,64.8,71.9,
71.9,60.4,54.4,50.9,58.1,52.6,55.6,56.6,41.6,43.0,33.0,41.5,53.5,52.2,45.1,
46.8,63.0,73.8,61.3,44.4,39.3,38.2,45.4,39.9,36.3,41.0,38.5,35.4,40.2,11.3,
34.7,24.2,24.5,32.1,26.3,23.6,32.1,27.5,39.0,24.9,22.0,18.2,23.0,21.1,15.4,
13.9,10.2,26.4,18.2,16.6]
tout = [-2.57,-3.31,-0.63,-0.60,0.39,-1.64,-7.62,-1.90,-0.35,-4.88,-1.27,
-0.23,-6.99,-2.87,-12.27,-11.90,-9.42,-4.10,-3.15,0.81,3.87,-11.41,-9.47,
0.25,-6.81,-13.70,-16.41,-14.14,-9.70,-10.32,-21.83,-26.55,-16.88,-6.85,
4.03,-7.89,-6.53,-3.96,-6.09,-3.15,-0.51,-2.62,8.13,2.08,0.58,1.99,-6.64,
-12.13,-4.95,-2.99,-4.81,-0.88,0.28,-1.44,1.92,3.73,0.21,1.11,6.83,13.31,
7.04,1.46,1.78]
# start and end index for data
a = [0,21,42]
b = [20,41,62]
n = 0 # just a counter
# set font family
hfont = {'family':'Arial'}
plt.rcParams.update({'font.family': 'Arial', 'font.size':12})
# set axis minor tick marks
plt.axes().yaxis.set_minor_locator(mtick.MultipleLocator(4))
plt.axes().xaxis.set_minor_locator(mtick.MultipleLocator(2))
plt.axes().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0f%%'))
# config axis labels
plt.xlabel("Outdoor Temperature", **hfont)
plt.ylabel("Radiator on %", **hfont)
# set line and marker types and colors
marker = ['o', 's', 'x']
marker_facecolor = ['None', 'k', 'None']
names = ['Manual Control', 'Enforced Schedule', 'Occupancy-based']
lines = [':','--','-']
line_spacing = [[1, 5], [5, 5], [0, 0]]
transparency = [1, 0.75, 1]
for i,j in zip(a,b):
# get x and y
x = numpy.array(tout[i:j])
y = numpy.array(radon[i:j])
# set axis ranges
plt.ylim(0, 100)
plt.xlim(-30, 20)
# plot data
plt.plot(x, y, marker[n],markeredgewidth=0.75
,markeredgecolor='k',markerfacecolor=marker_facecolor[n],
alpha=transparency[n],label=names[n])
# perform regressions
z = numpy.polyfit(x, y, 1)
p = numpy.poly1d(z)
# plot trendline
plt.plot(x,p(x),'k%s' % lines[n], linewidth=0.85)
# increment counter
n+=1
plt.legend(loc='upper right')
leg = plt.legend()
leg.get_frame().set_edgecolor('k')
plt.savefig('tout_vs_radon.png', dpi=300)
plt.show()
Here's the result of the above code:
https://i.imgur.com/K6jIHBM.png
(don't have enough reputation to post image, sorry)
You can see the trendline overlapping itself in the center for dotted or dashed styles. I'm using matplotlib v3.0.3 and Python v3.6.5 on Windows 10.
The key would be to plot the array in a sorted fashion. Else, the line will jump back and forth and overlap itself.
order = np.argsort(x)
plt.plot(x[order],p(x[order]), ..)
Complete code:
from matplotlib import pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
radon = [49.6,61.7,58.7,64.1,59.4,64.6,65.4,65.3,65.5,66.0,50.5,64.8,71.9,
71.9,60.4,54.4,50.9,58.1,52.6,55.6,56.6,41.6,43.0,33.0,41.5,53.5,52.2,45.1,
46.8,63.0,73.8,61.3,44.4,39.3,38.2,45.4,39.9,36.3,41.0,38.5,35.4,40.2,11.3,
34.7,24.2,24.5,32.1,26.3,23.6,32.1,27.5,39.0,24.9,22.0,18.2,23.0,21.1,15.4,
13.9,10.2,26.4,18.2,16.6]
tout = [-2.57,-3.31,-0.63,-0.60,0.39,-1.64,-7.62,-1.90,-0.35,-4.88,-1.27,
-0.23,-6.99,-2.87,-12.27,-11.90,-9.42,-4.10,-3.15,0.81,3.87,-11.41,-9.47,
0.25,-6.81,-13.70,-16.41,-14.14,-9.70,-10.32,-21.83,-26.55,-16.88,-6.85,
4.03,-7.89,-6.53,-3.96,-6.09,-3.15,-0.51,-2.62,8.13,2.08,0.58,1.99,-6.64,
-12.13,-4.95,-2.99,-4.81,-0.88,0.28,-1.44,1.92,3.73,0.21,1.11,6.83,13.31,
7.04,1.46,1.78]
# start and end index for data
a = [0,21,42]
b = [20,41,62]
n = 0 # just a counter
# set font family
hfont = {'family':'Arial'}
plt.rcParams.update({'font.family': 'Arial', 'font.size':12})
# set axis minor tick marks
plt.gca().yaxis.set_minor_locator(mtick.MultipleLocator(4))
plt.gca().xaxis.set_minor_locator(mtick.MultipleLocator(2))
plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0f%%'))
# config axis labels
plt.xlabel("Outdoor Temperature", **hfont)
plt.ylabel("Radiator on %", **hfont)
# set line and marker types and colors
marker = ['o', 's', 'x']
marker_facecolor = ['None', 'k', 'None']
names = ['Manual Control', 'Enforced Schedule', 'Occupancy-based']
lines = [':','--','-']
line_spacing = [[1, 5], [5, 5], [0, 0]]
transparency = [1, 0.75, 1]
for i,j in zip(a,b):
# get x and y
x = np.array(tout[i:j])
y = np.array(radon[i:j])
# set axis ranges
plt.ylim(0, 100)
plt.xlim(-30, 20)
# plot data
plt.plot(x, y, marker[n],markeredgewidth=0.75,
markeredgecolor='k',markerfacecolor=marker_facecolor[n],
alpha=transparency[n],label=names[n])
# perform regressions
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
# plot trendline
order = np.argsort(x)
plt.plot(x[order],p(x[order]),'k%s' % lines[n], linewidth=0.85)
# increment counter
n+=1
plt.show()
Related
I am plotting separate figures for each attribute and label for each data sample. Here is the illustration:
As illustrated in the the last subplot (Label), my data contains seven classes (numerically) (0 to 6). I'd like to visualize these classes using a different fancy colors and a legend. Please note that I just want colors for last subplot. How should I do that?
Here is the code of above plot:
x, y = test_data["x"], test_data["y"]
# determine the total number of plots
n, off = x.shape[1] + 1, 0
plt.rcParams["figure.figsize"] = (40, 15)
# plot all the attributes
for i in range(6):
plt.subplot(n, 1, off + 1)
plt.plot(x[:, off])
plt.title('Attribute:' + str(i), y=0, loc='left')
off += 1
# plot Labels
plt.subplot(n, 1, n)
plt.plot(y)
plt.title('Label', y=0, loc='left')
plt.savefig(save_file_name, bbox_inches="tight")
plt.close()
First, just to set up a similar dataset:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random((100,6))
y = np.random.randint(0, 6, (100))
fig, axs = plt.subplots(6, figsize=(40,15))
We could use plt.scatter() to give individual points different marker styles:
for i in range(x.shape[-1]):
axs[i].scatter(range(x.shape[0]), x[:,i], c=y)
Or we could mask the arrays we're plotting:
for i in range(x.shape[-1]):
for j in np.unique(y):
axs[i].plot(np.ma.masked_where(y!=j, x[:,i]), 'o')
Either way we get the same results:
Edit: Ah you've edited your question! You can do exactly the same thing for your last plot only, just modify my code above to take it out of the loop of subplots :)
As suggested, we imitate the matplotlib step function by creating a LineCollection to color the different line segments:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
from matplotlib.patches import Patch
#random data generation
np.random.seed(12345)
number_of_categories=4
y = np.concatenate([np.repeat(np.random.randint(0, number_of_categories), np.random.randint(1, 30)) for _ in range(20)])
#check the results with less points
#y = y[:10]
x = y[None] * np.linspace(1, 5, 3)[:, None]
x += 2 * np.random.random(x.shape) - 1
#your initial plot
num_plots = x.shape[0] + 1
fig, axes = plt.subplots(num_plots, 1, sharex=True, figsize=(10, 8))
for i, ax in enumerate(axes.flat[:-1]):
ax.plot(x[i,:])
#first we create the matplotlib step function with x-values as their midpoint
axes.flat[-1].step(np.arange(y.size), y, where="mid", color="lightgrey", zorder=-1)
#then we plot colored segments with shifted index simulating the step function
shifted_x = np.arange(y.size+1)-0.5
#and identify the step indexes
idx_steps, = np.nonzero(np.diff(y, prepend=np.inf, append=np.inf))
#create collection of plateau segments
colored_segments = np.zeros((idx_steps.size-1, 2, 2))
colored_segments[:, :, 0] = np.vstack((shifted_x[idx_steps[:-1]], shifted_x[idx_steps[1:]])).T
colored_segments[:, :, 1] = np.repeat(y[idx_steps[:-1]], 2).reshape(-1, 2)
#generate discrete color list
n_levels, idx_levels = np.unique(y[idx_steps[:-1]], return_inverse=True)
colorarr = np.asarray(plt.cm.tab10.colors[:n_levels.size])
#and plot the colored segments
lc_cs = LineCollection(colored_segments, colors=colorarr[idx_levels, :], lw=10)
lines_cs = axes.flat[-1].add_collection(lc_cs)
#scaling and legend generation
axes.flat[-1].set_ylim(n_levels.min()-0.5, n_levels.max()+0.5)
axes.flat[-1].legend([Patch(color=colorarr[i, :]) for i, _ in enumerate(n_levels)],
[f"cat {i}" for i in n_levels],
loc="upper center", bbox_to_anchor=(0.5, -0.15),
ncol=n_levels.size)
plt.show()
Sample output:
Alternatively, you can use broken barh plots or color this axis or even all axes using axvspan.
I am scatter ploting data points with a very small marker (see screengrab below). When I use the very small marker ',' the legend is very hard to read (example code taken from here).
(Python 3, Jupyter lab)
How can I increase the size of the marker in the legend. The two versions shown on the above mentioned site do not work:
legend = ax.legend(frameon=True)
for legend_handle in legend.legendHandles:
legend_handle._legmarker.set_markersize(9)
and
ax.legend(markerscale=6)
The two solutions do however work when the marker is set to '.'.
How can I show bigger makers in the legend?
Sample Code from intoli.com:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
for i in range(5):
mean = [np.random.random()*10, np.random.random()*10]
covariance = [ [1 + np.random.random(), np.random.random() - 1], [0, 1 + np.random.random()], ]
covariance[1][0] = covariance[0][1] # must be symmetric
x, y = np.random.multivariate_normal(mean, covariance, 3000).T
plt.plot(x, y, ',', label=f'Cluster {i + 1}')
ax.legend(markerscale=12)
fig.tight_layout()
plt.show()
You can get 1 pixel sized markers for a plot by setting the markersize to 1 pixel. This would look like
plt.plot(x, y, marker='s', markersize=72./fig.dpi, mec="None", ls="None")
What the above does is set the marker to a square, set the markersize to the ppi (points per inch) divided by dpi (dots per inch) == dots == pixels, and removes lines and edges.
Then the solution you tried using markerscale in the legend works nicely.
Complete example:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(12)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
for i in range(5):
mean = [np.random.random()*10, np.random.random()*10]
covariance = [ [1 + np.random.random(), np.random.random() - 1], [0, 1 + np.random.random()], ]
covariance[1][0] = covariance[0][1] # must be symmetric
x, y = np.random.multivariate_normal(mean, covariance, 3000).T
plt.plot(x, y, marker='s', markersize=72./fig.dpi, mec="None", ls="None",
label=f'Cluster {i + 1}')
ax.legend(markerscale=12)
fig.tight_layout()
plt.show()
According to this discussion, the markersize has no effect when using pixels (,) as marker. How about generating a custom legend instead? For example, by adapting the first example in this tutorial, one can get a pretty decent legend:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
np.random.seed(12)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
for i in range(5):
mean = [np.random.random()*10, np.random.random()*10]
covariance = [ [1 + np.random.random(), np.random.random() - 1], [0, 1 + np.random.random()], ]
covariance[1][0] = covariance[0][1] # must be symmetric
x, y = np.random.multivariate_normal(mean, covariance, 3000).T
plt.plot(x, y, ',', label=f'Cluster {i + 1}')
##generating custom legend
handles, labels = ax.get_legend_handles_labels()
patches = []
for handle, label in zip(handles, labels):
patches.append(mpatches.Patch(color=handle.get_color(), label=label))
legend = ax.legend(handles=patches)
fig.tight_layout()
plt.show()
The output would look like this:
I use matplotlib to plot a scatter chart:
And label the bubble using a transparent box according to the tip at How to annotate point on a scatter automatically placed arrow
Here is the code:
if show_annote:
for i in range(len(x)):
annote_text = annotes[i][0][0] # STK_ID
ax.annotate(annote_text, xy=(x[i], y[i]), xytext=(-10,3),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.2),
fontproperties=ANNOTE_FONT)
and the resulting plot:
But there is still room for improvement to reduce overlap (for instance the label box offset is fixed as (-10,3)). Are there algorithms that can:
dynamically change the offset of label box according to the crowdedness of its neighbourhood
dynamically place the label box remotely and add an arrow line beween bubble and label box
somewhat change the label orientation
label_box overlapping bubble is better than label_box overlapping label_box?
I just want to make the chart easy for human eyes to comprehand, so some overlap is OK, not as rigid a constraint as http://en.wikipedia.org/wiki/Automatic_label_placement suggests. And the bubble quantity within the chart is less than 150 most of the time.
I find the so called Force-based label placement http://bl.ocks.org/MoritzStefaner/1377729 is quite interesting. I don't know if there is any python code/package available to implement the algorithm.
I am not an academic guy and not looking for an optimum solution, and my python codes need to label many many charts, so the the speed/memory is in the scope of consideration.
I am looking for a quick and effective solution. Any help (code,algorithm,tips,thoughts) on this subject? Thanks.
The following builds on tcaswell's answer.
Networkx layout methods such as nx.spring_layout rescale the positions so that they all fit in a unit square (by default). Even the position of the fixed data_nodes are rescaled. So, to apply the pos to the original scatter_data, an unshifting and unscaling must be performed.
Note also that nx.spring_layout has a k parameter which controls the optimal distance between nodes. As k increases, so does the distance of the annotations from the data points.
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
np.random.seed(2016)
N = 20
scatter_data = np.random.rand(N, 3)*10
def repel_labels(ax, x, y, labels, k=0.01):
G = nx.DiGraph()
data_nodes = []
init_pos = {}
for xi, yi, label in zip(x, y, labels):
data_str = 'data_{0}'.format(label)
G.add_node(data_str)
G.add_node(label)
G.add_edge(label, data_str)
data_nodes.append(data_str)
init_pos[data_str] = (xi, yi)
init_pos[label] = (xi, yi)
pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes, k=k)
# undo spring_layout's rescaling
pos_after = np.vstack([pos[d] for d in data_nodes])
pos_before = np.vstack([init_pos[d] for d in data_nodes])
scale, shift_x = np.polyfit(pos_after[:,0], pos_before[:,0], 1)
scale, shift_y = np.polyfit(pos_after[:,1], pos_before[:,1], 1)
shift = np.array([shift_x, shift_y])
for key, val in pos.items():
pos[key] = (val*scale) + shift
for label, data_str in G.edges():
ax.annotate(label,
xy=pos[data_str], xycoords='data',
xytext=pos[label], textcoords='data',
arrowprops=dict(arrowstyle="->",
shrinkA=0, shrinkB=0,
connectionstyle="arc3",
color='red'), )
# expand limits
all_pos = np.vstack(pos.values())
x_span, y_span = np.ptp(all_pos, axis=0)
mins = np.min(all_pos-x_span*0.15, 0)
maxs = np.max(all_pos+y_span*0.15, 0)
ax.set_xlim([mins[0], maxs[0]])
ax.set_ylim([mins[1], maxs[1]])
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
repel_labels(ax, scatter_data[:, 0], scatter_data[:, 1], labels, k=0.008)
plt.show()
with k=0.011 yields
and with k=0.008 yields
Another option using my library adjustText, written specially for this purpose (https://github.com/Phlya/adjustText).
from adjustText import adjust_text
np.random.seed(2016)
N = 50
scatter_data = np.random.rand(N, 3)
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
texts = []
for x, y, text in zip(scatter_data[:, 0], scatter_data[:, 1], labels):
texts.append(ax.text(x, y, text))
plt.show()
np.random.seed(2016)
N = 50
scatter_data = np.random.rand(N, 3)
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
texts = []
for x, y, text in zip(scatter_data[:, 0], scatter_data[:, 1], labels):
texts.append(ax.text(x, y, text))
adjust_text(texts, force_text=0.05, arrowprops=dict(arrowstyle="-|>",
color='r', alpha=0.5))
plt.show()
It doesn't repel from the bubbles, only from their centers and other texts.
It is a little rough around the edges (I can't quite figure out how to scale the relative strengths of the spring network vs the repulsive force, and the bounding box is a bit screwed up), but this is a decent start:
import networkx as nx
N = 15
scatter_data = rand(3, N)
G=nx.Graph()
data_nodes = []
init_pos = {}
for j, b in enumerate(scatter_data.T):
x, y, _ = b
data_str = 'data_{0}'.format(j)
ano_str = 'ano_{0}'.format(j)
G.add_node(data_str)
G.add_node(ano_str)
G.add_edge(data_str, ano_str)
data_nodes.append(data_str)
init_pos[data_str] = (x, y)
init_pos[ano_str] = (x, y)
pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes)
ax = gca()
ax.scatter(scatter_data[0], scatter_data[1], c=scatter_data[2], s=scatter_data[2]*150)
for j in range(N):
data_str = 'data_{0}'.format(j)
ano_str = 'ano_{0}'.format(j)
ax.annotate(ano_str,
xy=pos[data_str], xycoords='data',
xytext=pos[ano_str], textcoords='data',
arrowprops=dict(arrowstyle="->",
connectionstyle="arc3"))
all_pos = np.vstack(pos.values())
mins = np.min(all_pos, 0)
maxs = np.max(all_pos, 0)
ax.set_xlim([mins[0], maxs[0]])
ax.set_ylim([mins[1], maxs[1]])
draw()
How well it works depends a bit on how your data is clustered.
We can use plotly for this. But we can't help placing overlap correctly if there is lot of data. Instead we can zoom in and zoom out.
import plotly.express as px
df = px.data.tips()
df = px.data.gapminder().query("year==2007 and continent=='Americas'")
fig = px.scatter(df, x="gdpPercap", y="lifeExp", text="country", log_x=True, size_max=100, color="lifeExp",
title="Life Expectency")
fig.update_traces(textposition='top center')
fig.show()
Output:
Just created another quick solution that is also very fast: textalloc
In this case you could do something like this:
import textalloc as ta
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(2022)
N = 30
scatter_data = np.random.rand(N, 3)*10
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1], c=scatter_data[:, 2], s=scatter_data[:, 2] * 50, zorder=10,alpha=0.5)
labels = ['ano-{}'.format(i) for i in range(N)]
text_list = labels = ['ano-{}'.format(i) for i in range(N)]
ta.allocate_text(fig,ax,scatter_data[:, 0],scatter_data[:, 1],
text_list,
x_scatter=scatter_data[:, 0], y_scatter=scatter_data[:, 1],
max_distance=0.2,
min_distance=0.04,
margin=0.039,
linewidth=0.5,
nbr_candidates=400)
plt.show()
Are there libraries or methods in python that are capable of creating plots that look like this? (preferably based around MatPlotLib for the sake of embedding the plots in HTML pages)
My goal is to create 3D renderings of data that is read from a Neo4J database and model them as the cylinders above.
The code below attempts to create a similar 3D plot (not cylindrical but rectangular) with legends from a dataframe. The plot is interactive. Resources: 1, 2, 3, 4 (Jupyter Notebook 5.0.0, Python 3.6.6)
Import libraries
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from mpl_toolkits.mplot3d import axes3d
import matplotlib.patches as mpatches # for legends
%matplotlib notebook
Create a sample dataframe
# Create two sets of identical xpos and ypos
# So taht the z-values are plotted at same location for stacking
xtemp = np.random.randint(1, 10, size=5)
ytemp = np.random.randint(1, 10, size=5)
df = pd.DataFrame({
# category
'season': ['S1']*5 + ['S2']*5 + ['S3']*5,
#'wins': np.random.randint(1, 10, size=15),
# define pos
'xpos' : list(xtemp)+list(xtemp)+list(xtemp),
'ypos' : list(ytemp)+list(ytemp)+list(ytemp),
'zpos' : np.zeros(15),
# define delta
'dx': 0.8*np.ones(15),
'dy': 0.8*np.ones(15),
'dz': np.random.randint(1, 5, size=15), #np.ones(15)
})
df.head(5)
Plot the figure
Note: Figure are in two parts: (1) 2D plot for the N-S, E-W lines and (2) 3D bar plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# ..................
# Line-1 on x-y plane
x = [4, 4]
y = [-3, 12]
ax.plot(x, y, zs=0, zdir='z', color='orange', alpha=0.8)
# Line-2 on x-y plane
y = [4, 4]
x = [-3, 12]
ax.plot(x, y, zs=0, zdir='z', color='blue', alpha=0.5)
# Creat multiple overlap plots within a loop
color = ['#6495ED', '#6E8B3D', '#FFB90F']
slist = ['S1', 'S2', 'S3']
stack_zpos = pd.Series(np.zeros(5))
for i in range(0,3):
q = df[df['season']==slist[i]].reset_index(inplace=False)
ax.bar3d(q.xpos, q.ypos, stack_zpos, q.dx, q.dy, q.dz, color=color[i], alpha=1)
stack_zpos += q.dz # values added here for stacking
Annotate lines and remove z-axis panes and grid lines
# Remove the z-axis panes, grids and lines
alpha = 0
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, alpha))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, alpha))
#
ax.zaxis._axinfo["grid"]['color'] = (1.0, 1.0, 1.0, alpha)
ax.w_yaxis._axinfo["grid"]['linewidth'] = 0
ax.w_xaxis._axinfo["grid"]['linewidth'] = 0
#
ax.w_zaxis.line.set_lw(0.)
ax.set_zticks([])
#
ax.set_zlabel("") # remove z-axis label 'z'
# ..........
# Annotate the N, S, E, W lines on the x-y plane
zdirs = (None, 'x', 'y', 'z', (1, 1, 0), (1, 1, 1))
xs = (4, 4, -3, 12)
ys = (-3,12, 4, 4)
zs = (0, 0, 0, 0)
i=0 # Counter
nsew = ['N', 'S', 'E', 'W'] # list of labels
for zdir, x, y, z in zip(zdirs, xs, ys, zs):
label = '{0}'.format(nsew[i])
#label = 'N, S, E, W' #% (x, y, z, zdir)
ax.text(x, y, z, label, zdir)
i +=1
Create and add legends to the plot
# Add legend
patch1 = mpatches.Patch(color=color[0], label=slist[0])
patch2 = mpatches.Patch(color=color[1], label=slist[1])
patch3 = mpatches.Patch(color=color[2], label=slist[2])
plt.legend(handles=[patch1, patch2,patch3])
Visualize plot
plt.show()
I have two vectors, one with values and one with class labels like 1,2,3 etc.
I would like to plot all the points that belong to class 1 in red, to class 2 in blue, to class 3 in green etc. How can I do that?
The accepted answer has it spot on, but if you might want to specify which class label should be assigned to a specific color or label you could do the following. I did a little label gymnastics with the colorbar, but making the plot itself reduces to a nice one-liner. This works great for plotting the results from classifications done with sklearn. Each label matches a (x,y) coordinate.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
x = [4,8,12,16,1,4,9,16]
y = [1,4,9,16,4,8,12,3]
label = [0,1,2,3,0,1,2,3]
colors = ['red','green','blue','purple']
fig = plt.figure(figsize=(8,8))
plt.scatter(x, y, c=label, cmap=matplotlib.colors.ListedColormap(colors))
cb = plt.colorbar()
loc = np.arange(0,max(label),max(label)/float(len(colors)))
cb.set_ticks(loc)
cb.set_ticklabels(colors)
Using a slightly modified version of this answer, one can generalise the above for N colors as follows:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
N = 23 # Number of labels
# setup the plot
fig, ax = plt.subplots(1,1, figsize=(6,6))
# define the data
x = np.random.rand(1000)
y = np.random.rand(1000)
tag = np.random.randint(0,N,1000) # Tag each point with a corresponding label
# define the colormap
cmap = plt.cm.jet
# extract all colors from the .jet map
cmaplist = [cmap(i) for i in range(cmap.N)]
# create the new map
cmap = cmap.from_list('Custom cmap', cmaplist, cmap.N)
# define the bins and normalize
bounds = np.linspace(0,N,N+1)
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
# make the scatter
scat = ax.scatter(x,y,c=tag,s=np.random.randint(100,500,N),cmap=cmap, norm=norm)
# create the colorbar
cb = plt.colorbar(scat, spacing='proportional',ticks=bounds)
cb.set_label('Custom cbar')
ax.set_title('Discrete color mappings')
plt.show()
Which gives:
Assuming that you have your data in a 2d array, this should work:
import numpy
import pylab
xy = numpy.zeros((2, 1000))
xy[0] = range(1000)
xy[1] = range(1000)
colors = [int(i % 23) for i in xy[0]]
pylab.scatter(xy[0], xy[1], c=colors)
pylab.show()
You can also set a cmap attribute to control which colors will appear through use of a colormap; i.e. replace the pylab.scatter line with:
pylab.scatter(xy[0], xy[1], c=colors, cmap=pylab.cm.cool)
A list of color maps can be found
here
A simple solution is to assign color for each class. This way, we can control how each color is for each class. For example:
arr1 = [1, 2, 3, 4, 5]
arr2 = [2, 3, 3, 4, 4]
labl = [0, 1, 1, 0, 0]
color= ['red' if l == 0 else 'green' for l in labl]
plt.scatter(arr1, arr2, color=color)