Related
I wish to color all points in a scatter plot between 2 x values a different color to the rest of the plot. I understand I can do something such as the following:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
c_array1 = []
c_array2 = []
for i in range(50):
if i > want_to_color[0] and i < want_to_color[1]:
c_array1.append('r')
c_array2.append('r')
else:
c_array1.append(color_cycle[0])
c_array2.append(color_cycle[1])
plt.scatter(x, y1, c=c_array1)
plt.scatter(x, y2, c=c_array2)
plt.show()
Which produces:
But as you can see, this very memory inefficient, needing a list the size of the data set in order to color all points, when it could simply just be 2 x values. I just wanted to know if there is a more efficient way of doing this.
EDIT:
I just thought of a method of doing this using generators, which would be a really nice clean solution. Unfortauntely:
RuntimeError: matplotlib does not support generators as input
Just because I wrote it anyway, here's the code I tried to use:
import matplotlib.pyplot as plt
import numpy as np
col = lambda x, x1, x2, n : map(lambda v: 'r' if v > x1 and v < x2 else plt.rcParams['axes.prop_cycle'].by_key()['color'][n], x)
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
plt.scatter(x, y1, c=col(x, 20, 40, 0))
plt.scatter(x, y2, c=col(x, 20, 40, 1))
plt.show()
NOTE: The code above actually throws the error TypeError: object of type 'map' has no len(), but even converting this to the less clean iterable equivalent still doesn't work, as matplotlib simply wont accept generators
Here is a more efficient way of creating labels for the scatter plot:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
c = np.zeros(50)
c[want_to_color[0]+1 : want_to_color[1]] = 1
colors1 = ListedColormap([color_cycle[0], 'r'])
colors2 = ListedColormap([color_cycle[1], 'r'])
plt.scatter(x, y1, c=c, cmap=colors1)
plt.scatter(x, y2, c=c, cmap=colors2)
plt.show()
This gives:
If you prefer not to create an array of labels at all then you can try this:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
sl = slice(want_to_color[0]+1, want_to_color[1])
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
plt.scatter(x, y1, c=color_cycle[0])
plt.scatter(x, y2, c=color_cycle[1])
plt.scatter(x[sl], y1[sl], c='r')
plt.scatter(x[sl], y2[sl], c='r')
The resulting image is:
I am trying to shade the area before the point of intersection of the two curves produced by this example code:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0,100,10)
y1 = [0,2,4,6,8,5,4,3,2,1]
y2 = [0,1,3,5,6,8,9,12,13,14]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(t_list,y1,linestyle='-')
ax.plot(t_list,y2,linestyle='--')
plt.show()
Simply using:
ax.fill_between(x,y1,y2,where=y1>=y2,color='grey',alpha='0.5')
Does no work and gives the following error: "ValueError: Argument dimensions are incompatible"
I tried to convert the lists into arrays:
z1 = np.array(y1)
z2 = np.array(y2)
Then:
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey',alpha='0.5')
Not the entire area was shaded.
I know I have to find the point of intersection between the two curves by interpolating but have not seen a simple way to do it.
You are completely right, you need to interpolate. And that is ludicrously complicated, as you need to add the interpolate=True keyword argument to the call to fill_between.
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey', interpolate=True)
Complete code to reproduce:
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0,100,10)
y1 = [0,2,4,6,8,5,4,3,2,1]
y2 = [0,1,3,5,6,8,9,12,13,14]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,y1,linestyle='-')
ax.plot(x,y2,linestyle='--')
z1 = np.array(y1)
z2 = np.array(y2)
ax.fill_between(x,y1,y2,where=z1>=z2,color='grey',alpha=0.5, interpolate=True)
plt.show()
I have 2 line plots on the same figure, plotted from pandas dataframes.
I want to fill between them with a gradient/colour map of sorts.
I understand I can do this with a cmap, only it will not work for me (see code below).
General example I found are filling between x axis and line, i do not want that and also i am interested in simplest solution possible for this as i am a begginer at this and complicated, though maybe better code will just make it more confusing honestly.
Code for which fill is plain blue:
import matplotlib.pyplot as plt
import pandas as pd
ax = plt.gca()
df0.plot(kind='line', x='something', y='other', color='orange', ax=ax, legend=False, figsize=(20,10))
df1.plot(kind='line', x='something', y='other2', color='c', ax=ax, legend=False, figsize=(20,10))
ax.fill_between(x=df0['daysInAYear'], y1=df0['other'], y2 = df1['other2'], alpha=0.2, cmap=plt.cm.get_cmap("winter"))
plt.show()
EDIT/UPDATE: DATA EXAMPLE
other is ALWAYS >= other2
other other2 something (same for both)
15.6 -16.0 1
13.9 -26.7 2
13.3 -26.7 3
10.6 -26.1 4
12.8 -15.0 5
Final graph example:
I would like the fill to go from orange on top to blue at the bottom
Edit
In response to the edited question, here is an alternative approach which does the gradient vertically but doesn't use imshow.
import matplotlib.pyplot as plt
from matplotlib import colors, patches
import numpy as np
import pandas as pd
n = 100
nc = 100
x = np.linspace(0, np.pi*5, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.3)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
z = np.linspace(0, 10, nc)
normalize = colors.Normalize(vmin=z.min(), vmax=z.max())
cmap = plt.cm.get_cmap('winter')
fig, ax = plt.subplots(1)
for ii in range(len(df['x'].values)-1):
y = np.linspace(y1[ii], y2[ii], nc)
yn = np.linspace(y1[ii+1], y2[ii+1], nc)
for kk in range(nc - 1):
p = patches.Polygon([[x[ii], y[kk]],
[x[ii+1], yn[kk]],
[x[ii+1], yn[kk+1]],
[x[ii], y[kk+1]]], color=cmap(normalize(z[kk])))
ax.add_patch(p)
plt.plot(x, y1, 'k-', lw=1)
plt.plot(x, y2, 'k-', lw=1)
plt.show()
The idea here being similar to that in my original answer, except the trapezoids are divided into nc pieces and each piece is colored separately. This has the advantage of scaling correctly for varying y1[ii], y2[ii] distances, as shown in this comparison,
It does, however, have the disadvantages of being much, much slower than imshow or the horizontal gradient method and of being unable to handle 'crossing' correctly.
The code to generate the second image in the above comparison:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import patches
from matplotlib.path import Path
x = np.linspace(0, 10, n)
y1 = [-50.0]
y2 = [50.0]
for ii in range(1, n):
y1.append(y1[ii-1] + (np.random.random()-0.2)*3)
y2.append(y2[ii-1] + (np.random.random()-0.5)*3)
y1 = np.array(y1)
y2 = np.array(y2)
verts = np.vstack([np.stack([x, y1], 1), np.stack([np.flip(x), np.flip(y2)], 1)])
path = Path(verts)
patch = patches.PathPatch(path, facecolor='k', lw=2, alpha=0.0)
plt.gca().add_patch(patch)
plt.imshow(np.arange(10).reshape(10,-1), cmap=plt.cm.winter, interpolation="bicubic",
origin='upper', extent=[0,10,-60,60], aspect='auto', clip_path=patch,
clip_on=True)
plt.show()
Original
This is a bit of a hack, partly based on the answers in this question. It does seem to work fairly well but works best with higher density along the x axis. The idea is to call fill_between separately for each trapezoid corresponding to x pairs, [x[ii], x[ii+1]]. Here is a complete example using some generated data
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import pandas as pd
n = 1000
X = np.linspace(0, np.pi*5, n)
Y1 = np.sin(X)
Y2 = np.cos(X)
Z = np.linspace(0, 10, n)
normalize = colors.Normalize(vmin=Z.min(), vmax=Z.max())
cmap = plt.cm.get_cmap('winter')
df = pd.DataFrame({'x': X, 'y1': Y1, 'y2': Y2, 'z': Z})
x = df['x'].values
y1 = df['y1'].values
y2 = df['y2'].values
z = df['z'].values
for ii in range(len(df['x'].values)-1):
plt.fill_between([x[ii], x[ii+1]], [y1[ii], y1[ii+1]],
[y2[ii], y2[ii+1]], color=cmap(normalize(z[ii])))
plt.plot(x, y1, 'k-', x, y2, 'k-')
plt.show()
This can be generalized to a 2 dimensional color grid but would require non-trivial modification
I want to create a plot for two different datasets similar to the one presented in this answer:
In the above image, the author managed to fix the overlapping problem of the error bars by adding some small random scatter in x to the new dataset.
In my problem, I must plot a similar graphic, but having some categorical data in the x axis:
Any ideas on how to slightly move one the error bars of the second dataset using categorical variables at the x axis? I want to avoid the overlapping between the bars for making the visualization easier.
You can translate each errorbar by adding the default data transform to a prior translation in data space. This is possible when knowing that categories are in general one data unit away from each other.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.transforms import Affine2D
x = list("ABCDEF")
y1, y2 = np.random.randn(2, len(x))
yerr1, yerr2 = np.random.rand(2, len(x))*4+0.3
fig, ax = plt.subplots()
trans1 = Affine2D().translate(-0.1, 0.0) + ax.transData
trans2 = Affine2D().translate(+0.1, 0.0) + ax.transData
er1 = ax.errorbar(x, y1, yerr=yerr1, marker="o", linestyle="none", transform=trans1)
er2 = ax.errorbar(x, y2, yerr=yerr2, marker="o", linestyle="none", transform=trans2)
plt.show()
Alternatively, you could translate the errorbars after applying the data transform and hence move them in units of points.
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
from matplotlib.transforms import ScaledTranslation
x = list("ABCDEF")
y1, y2 = np.random.randn(2, len(x))
yerr1, yerr2 = np.random.rand(2, len(x))*4+0.3
fig, ax = plt.subplots()
trans1 = ax.transData + ScaledTranslation(-5/72, 0, fig.dpi_scale_trans)
trans2 = ax.transData + ScaledTranslation(+5/72, 0, fig.dpi_scale_trans)
er1 = ax.errorbar(x, y1, yerr=yerr1, marker="o", linestyle="none", transform=trans1)
er2 = ax.errorbar(x, y2, yerr=yerr2, marker="o", linestyle="none", transform=trans2)
plt.show()
While results look similar in both cases, they are fundamentally different. You will observe this difference when interactively zooming the axes or changing the figure size.
Consider the following approach to highlight plots - combination of errorbar and fill_between with non-zero transparency:
import random
import matplotlib.pyplot as plt
# create sample data
N = 8
data_1 = {
'x': list(range(N)),
'y': [10. + random.random() for dummy in range(N)],
'yerr': [.25 + random.random() for dummy in range(N)]}
data_2 = {
'x': list(range(N)),
'y': [10.25 + .5 * random.random() for dummy in range(N)],
'yerr': [.5 * random.random() for dummy in range(N)]}
# plot
plt.figure()
# only errorbar
plt.subplot(211)
for data in [data_1, data_2]:
plt.errorbar(**data, fmt='o')
# errorbar + fill_between
plt.subplot(212)
for data in [data_1, data_2]:
plt.errorbar(**data, alpha=.75, fmt=':', capsize=3, capthick=1)
data = {
'x': data['x'],
'y1': [y - e for y, e in zip(data['y'], data['yerr'])],
'y2': [y + e for y, e in zip(data['y'], data['yerr'])]}
plt.fill_between(**data, alpha=.25)
Result:
Threre is example on lib site: https://matplotlib.org/stable/gallery/lines_bars_and_markers/errorbar_subsample.html
enter image description here
You need parameter errorevery=(m, n),
n - how often plot error lines, m - shift with range from 0 to n
I'm making some scatterplots using Matplotlib (python 3.4.0, matplotlib 1.4.3, running on Linux Mint 17). It's easy enough to set alpha transparency for each point individually; is there any way to set them as a group, so that two overlapping points from the same group don't change the color?
Example code:
import matplotlib.pyplot as plt
import numpy as np
def points(n=100):
x = np.random.uniform(size=n)
y = np.random.uniform(size=n)
return x, y
x1, y1 = points()
x2, y2 = points()
fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, title="Test scatter")
ax.scatter(x1, y1, s=100, color="blue", alpha=0.5)
ax.scatter(x2, y2, s=100, color="red", alpha=0.5)
fig.savefig("test_scatter.png")
Results in this output:
but I want something more like this one:
I can workaround by saving as SVG and manually grouping then in Inkscape, then setting transparency, but I'd really prefer something I can code. Any suggestions?
Yes, interesting question. You can get this scatterplot with Shapely. Here is the code :
import matplotlib.pyplot as plt
import matplotlib.patches as ptc
import numpy as np
from shapely.geometry import Point
from shapely.ops import cascaded_union
n = 100
size = 0.02
alpha = 0.5
def points():
x = np.random.uniform(size=n)
y = np.random.uniform(size=n)
return x, y
x1, y1 = points()
x2, y2 = points()
polygons1 = [Point(x1[i], y1[i]).buffer(size) for i in range(n)]
polygons2 = [Point(x2[i], y2[i]).buffer(size) for i in range(n)]
polygons1 = cascaded_union(polygons1)
polygons2 = cascaded_union(polygons2)
fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, title="Test scatter")
for polygon1 in polygons1:
polygon1 = ptc.Polygon(np.array(polygon1.exterior), facecolor="red", lw=0, alpha=alpha)
ax.add_patch(polygon1)
for polygon2 in polygons2:
polygon2 = ptc.Polygon(np.array(polygon2.exterior), facecolor="blue", lw=0, alpha=alpha)
ax.add_patch(polygon2)
ax.axis([-0.2, 1.2, -0.2, 1.2])
fig.savefig("test_scatter.png")
and the result is :
Interesting question, I think any use of transparency will result in the stacking effect you want to avoid. You could manually set a transparency type colour to get closer to the results you want,
import matplotlib.pyplot as plt
import numpy as np
def points(n=100):
x = np.random.uniform(size=n)
y = np.random.uniform(size=n)
return x, y
x1, y1 = points()
x2, y2 = points()
fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, title="Test scatter")
alpha = 0.5
ax.scatter(x1, y1, s=100, lw = 0, color=[1., alpha, alpha])
ax.scatter(x2, y2, s=100, lw = 0, color=[alpha, alpha, 1.])
plt.show()
The overlap between the different colours are not included in this way but you get,
This is a terrible, terrible hack, but it works.
You see while Matplotlib plots data points as separate objects that can overlap, it plots the line between them as a single object - even if that line is broken into several pieces by NaNs in the data.
With that in mind, you can do this:
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['lines.solid_capstyle'] = 'round'
def expand(x, y, gap=1e-4):
add = np.tile([0, gap, np.nan], len(x))
x1 = np.repeat(x, 3) + add
y1 = np.repeat(y, 3) + add
return x1, y1
x1, y1 = points()
x2, y2 = points()
fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, title="Test scatter")
ax.plot(*expand(x1, y1), lw=20, color="blue", alpha=0.5)
ax.plot(*expand(x2, y2), lw=20, color="red", alpha=0.5)
fig.savefig("test_scatter.png")
plt.show()
And each color will overlap with the other color but not with itself.
One caveat is that you have to be careful with the spacing between the two points you use to make each circle. If they're two far apart then the separation will be visible on your plot, but if they're too close together, matplotlib doesn't plot the line at all. That means that the separation needs to be chosen based on the range of your data, and if you plan to make an interactive plot then there's a risk of all the data points suddenly vanishing if you zoom out too much, and stretching if you zoom in too much.
As you can see, I found 1e-5 to be a good separation for data with a range of [0,1].
Just pass an argument saying edgecolors='none' to plt.scatter()
Here's a hack if you have more than just a few points to plot. I had to plot >500000 points, and the shapely solution does not scale well. I also wanted to plot a different shape other than a circle. I opted to instead plot each layer separately with alpha=1 and then read in the resulting image with np.frombuffer (as described here), then add the alpha to the whole image and plot overlays using plt.imshow. Note this solution forfeits access to the original fig object and attributes, so any other modifications to figure should be made before it's drawn.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
def arr_from_fig(fig):
canvas = FigureCanvas(fig)
canvas.draw()
img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
return img
def points(n=100):
x = np.random.uniform(size=n)
y = np.random.uniform(size=n)
return x, y
x1, y1 = points()
x2, y2 = points()
imgs = list()
figsize = (4, 4)
dpi = 200
for x, y, c in zip([x1, x2], [y1, y2], ['blue', 'red']):
fig = plt.figure(figsize=figsize, dpi=dpi, tight_layout={'pad':0})
ax = fig.add_subplot(111)
ax.scatter(x, y, s=100, color=c, alpha=1)
ax.axis([-0.2, 1.2, -0.2, 1.2])
ax.axis('off')
imgs.append(arr_from_fig(fig))
plt.close()
fig = plt.figure(figsize=figsize)
alpha = 0.5
alpha_scaled = 255*alpha
for img in imgs:
img_alpha = np.where((img == 255).all(-1), 0, alpha_scaled).reshape([*img.shape[:2], 1])
img_show = np.concatenate([img, img_alpha], axis=-1).astype(int)
plt.imshow(img_show, origin='lower')
ticklabels = ['{:03.1f}'.format(i) for i in np.linspace(-0.2, 1.2, 8, dtype=np.float16)]
plt.xticks(ticks=np.linspace(0, dpi*figsize[0], 8), labels=ticklabels)
plt.yticks(ticks=np.linspace(0, dpi*figsize[1], 8), labels=ticklabels);
plt.title('Test scatter');
I encountered the save issue recently, my case is there are too many points very close to each other, like 100 points of alpha 0.3 on top of each other, the alpha of the color in the generated image is almost 1. So instead of setting the alpha value in the cmap or scatter. I save it to a Pillow image and set the alpha channel there. My code:
import io
import os
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from matplotlib import colors
from PIL import Image
from dhi_base import DHIBase
class HeatMapPlot(DHIBase):
def __init__(self) -> None:
super().__init__()
# these 4 values are precalculated
top=75
left=95
width=1314
height=924
self.crop_box = (left, top, left+width, top+height)
# alpha 0.5, [0-255]
self.alpha = 128
def get_cmap(self):
v = [
...
]
return colors.LinearSegmentedColormap.from_list(
'water_level', v, 512)
def png3857(self):
"""Generate flooding images
"""
muids = np.load(os.path.join(self.npy_dir, 'myfilename.npy'))
cmap = self.get_cmap()
i = 0
for npyf in os.listdir(self.npy_dir):
if not npyf.startswith('flooding'):
continue
flooding_num = np.load(os.path.join(self.npy_dir, npyf))
image_file = os.path.join(self.img_dir, npyf.replace('npy', 'png'))
# if os.path.isfile(image_file):
# continue
# filter the water level value that is less than 0.001
masked_arr = ma.masked_where(flooding_num > 0.001, flooding_num)
flooding_masked = flooding_num[masked_arr.mask]
muids_masked = muids[masked_arr.mask, :]
plt.figure(figsize=(self.grid2D['numJ'] / 500, self.grid2D['numK'] / 500))
plt.axis('off')
plt.tight_layout()
plt.scatter(muids_masked[:, 0], muids_masked[:, 1], s=0.1, c=flooding_masked,
alpha=1, edgecolors='none', linewidths=0,
cmap=cmap,
vmin=0, vmax=1.5)
img_buf = io.BytesIO()
plt.savefig(img_buf, transparent=True, dpi=200, format='png')#, pad_inches=0)
plt.clf()
plt.close()
img_buf.seek(0)
img = Image.open(img_buf)
# Cropped image of above dimension
# (It will not change original image)
img = img.crop(self.crop_box)
alpha_channel = img.getchannel('A')
# Make all opaque pixels into semi-opaque
alpha_channel = alpha_channel.point(lambda i: self.alpha if i>0 else 0)
img.putalpha(alpha_channel)
img.save(image_file)
self.logger.info("PNG saved to {}".format(image_file))
i += 1
# if i > 15:
# break
if __name__ == "__main__":
hp = HeatMapPlot()
hp.png3857()