Connect points with horizontal lines - python

The goal is to fill the space between two arrays y1 and y2, similar to matplotlib's fill_between. But I don't want to fill the space with a polygon (for example with hatch='|'), but rather I want to draw the vertical lines only between the data points of the two arrays.
import matplotlib.pyplot as plt
import numpy as np
n = 10
y1 = np.random.random(n)
y2 = np.random.random(n) + 1
x1 = np.arange(n)
ax.fill_between(x1, y1, y2, facecolor='w', hatch='|')

Using a LineCollection could be handy if there are lots of lines in the game. Similar to the other answer, but less expensive:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
def draw_lines_between(*, x1=None, x2=None, y1, y2, ax=None, **kwargs):
ax = ax or plt.gca()
x1 = x1 if x1 is not None else np.arange(len(y1))
x2 = x2 if x2 is not None else x1
cl = LineCollection(np.stack((np.c_[x1, x2], np.c_[y1, y2]), axis=2), **kwargs)
ax.add_collection(cl)
return cl
n = 10
y1 = np.random.random(n)
y2 = np.random.random(n) + 1
x = np.arange(n)
color_list = [str(x) for x in np.round(np.linspace(0., 0.8, n), 2)]
fig, ax = plt.subplots()
ax.plot(x, y1, 'r')
ax.plot(x, y2, 'b')
draw_lines_between(ax=ax, x1=x, y1=y1, y2=y2, colors=color_list)
plt.show()

I wrote a little function which takes two arrays y1, y2 (x1, x2 are optional)
and connects their data points vertically.
def draw_lines_between(*, ax, x1=None, x2=None, y1, y2, color_list=None, **kwargs):
assert len(y1) == len(y2)
assert isinstance(color_list, list)
n = len(y1)
if x1 is None:
x1 = np.arange(n)
if x2 is None:
x2 = x1
if color_list is None:
color_list = [None for i in range(n)]
elif len(color_list) < n:
color_list = [color_list] * n
h = np.zeros(n, dtype=object)
for i in range(n):
h[i] = ax.plot((x1[i], x2[i]), (y1[i], y2[i]), color=color_list[i], **kwargs)[0]
return h
import matplotlib.pyplot as plt
import numpy as np
n = 10
y1 = np.random.random(n)
y2 = np.random.random(n) + 1
x1 = np.arange(n)
color_list = [str(x) for x in np.round(np.linspace(0., 0.8, n), 2)]
fig, ax = plt.subplots()
ax.plot(x1, y1, 'r')
ax.plot(x1, y2, 'b')
draw_lines_between(ax=ax, x1=x1, y1=y1, y2=y2, color_list=color_list)

Related

Color all points between 2 x values in matplotlib efficiently

I wish to color all points in a scatter plot between 2 x values a different color to the rest of the plot. I understand I can do something such as the following:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
c_array1 = []
c_array2 = []
for i in range(50):
if i > want_to_color[0] and i < want_to_color[1]:
c_array1.append('r')
c_array2.append('r')
else:
c_array1.append(color_cycle[0])
c_array2.append(color_cycle[1])
plt.scatter(x, y1, c=c_array1)
plt.scatter(x, y2, c=c_array2)
plt.show()
Which produces:
But as you can see, this very memory inefficient, needing a list the size of the data set in order to color all points, when it could simply just be 2 x values. I just wanted to know if there is a more efficient way of doing this.
EDIT:
I just thought of a method of doing this using generators, which would be a really nice clean solution. Unfortauntely:
RuntimeError: matplotlib does not support generators as input
Just because I wrote it anyway, here's the code I tried to use:
import matplotlib.pyplot as plt
import numpy as np
col = lambda x, x1, x2, n : map(lambda v: 'r' if v > x1 and v < x2 else plt.rcParams['axes.prop_cycle'].by_key()['color'][n], x)
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
plt.scatter(x, y1, c=col(x, 20, 40, 0))
plt.scatter(x, y2, c=col(x, 20, 40, 1))
plt.show()
NOTE: The code above actually throws the error TypeError: object of type 'map' has no len(), but even converting this to the less clean iterable equivalent still doesn't work, as matplotlib simply wont accept generators
Here is a more efficient way of creating labels for the scatter plot:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
c = np.zeros(50)
c[want_to_color[0]+1 : want_to_color[1]] = 1
colors1 = ListedColormap([color_cycle[0], 'r'])
colors2 = ListedColormap([color_cycle[1], 'r'])
plt.scatter(x, y1, c=c, cmap=colors1)
plt.scatter(x, y2, c=c, cmap=colors2)
plt.show()
This gives:
If you prefer not to create an array of labels at all then you can try this:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(10)
x = np.linspace(1,50,50)
y1 = np.random.rand(50)
y2 = np.random.rand(50)
want_to_color = [20,40]
sl = slice(want_to_color[0]+1, want_to_color[1])
color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
plt.scatter(x, y1, c=color_cycle[0])
plt.scatter(x, y2, c=color_cycle[1])
plt.scatter(x[sl], y1[sl], c='r')
plt.scatter(x[sl], y2[sl], c='r')
The resulting image is:

Generating a random float below and above a line created by numpy arrays python

I would like to generate a random float point above and below a line created by numpy arrays.
For example I have these line equations:
x_values = np.linspace(-1, 1, 100)
y1 = 2 * x_values -5
y2= -3 * x_values +2
plt.plot(x_values,y1, '-k')
plt.plot(x_values,y2, '-g')
I have tried this method from Generate random points above and below a line in Python and it works if np.arrange is used like so:
lower, upper = -25, 25
num_points = 1
x1 = [random.randrange(start=1, stop=9) for i in range(num_points)]
x2 = [random.randrange(start=1, stop=9) for i in range(num_points)]
y1 = [random.randrange(start=lower, stop=(2 * x -5) )for x in x1]
y2 = [random.randrange(start=(2 * x -5), stop=upper) for x in x2]
plt.plot(np.arange(10), 2 * np.arange(10) -5)
plt.scatter(x1, y1, c='blue')
plt.scatter(x2, y2, c='red')
However, I wanted to find a way to generate a random point if np.linspace(-1, 1, 100) was used to create the line graph. The difference is involving/allowing float coordinates to be picked to. But unsure how.
Any ideas will be appreciated.
Here is an approach, using functions for the y-values. Random x positions are chosen uniformly over the x-range. For each random x, a value is randomly chosen between its y-ranges.
import numpy as np
import matplotlib.pyplot as plt
x_values = np.linspace(-1, 1, 100)
f1 = lambda x: 2 * x - 5
f2 = lambda x: -3 * x + 2
y1 = f1(x_values)
y2 = f2(x_values)
plt.plot(x_values, y1, '-k')
plt.plot(x_values, y2, '-g')
plt.fill_between (x_values, y1, y2, color='gold', alpha=0.2)
num_points = 20
xs = np.random.uniform(x_values[0], x_values[-1], num_points)
ys = np.random.uniform(f1(xs), f2(xs))
plt.scatter(xs, ys, color='crimson')
plt.show()
PS: Note that the simplicity of the approach chooses x uniform over its length. If you need an even distribution over the area of the trapezium, you need the x less probable at the right, and more at the left. You can visualize this with many more points and using transparency. With the simplistic approach, the right will look denser than the left.
The following code first generates x,y points in a parallelogram, and remaps the points on the wrong side back to its mirror position. The code looks like:
import numpy as np
import matplotlib.pyplot as plt
x0, x1 = -1, 1
x_values = np.linspace(x0, x1, 100)
f1 = lambda x: 2 * x - 5
f2 = lambda x: -3 * x + 2
y1 = f1(x_values)
y2 = f2(x_values)
plt.plot(x_values, y1, '-k')
plt.plot(x_values, y2, '-g')
plt.fill_between(x_values, y1, y2, color='gold', alpha=0.2)
num_points = 100_000
h0 = f2(x0) - f1(x0)
h1 = f2(x1) - f1(x1)
xs1 = np.random.uniform(x0, x1, num_points)
ys1 = np.random.uniform(0, h0 + h1, num_points) + f1(xs1)
xs = np.where(ys1 <= f2(xs1), xs1, x0 + x1 - xs1)
ys = np.where(ys1 <= f2(xs1), ys1, f1(xs) + h0 + h1 + f1(xs1) - ys1)
plt.scatter(xs, ys, color='crimson', alpha=0.2, ec='none', s=1)
plt.show()
Plot comparing the two approaches:
First of all, if you have 2 intersecting lines, there will most likely be a triangle in which you can pick random points. This is dangerously close to Bertrand's paradox, so make sure that your RNG suits its purpose.
If you don't really care about how "skewed" your randomness is, try this:
import numpy as np
left, right = -1, 1
# x_values = np.linspace(left, right, 100)
k1, k2 = 2, -3
b1, b2 = -5, 2
y1 = lambda x: k1*x + b1
y2 = lambda x: k2*x + b2
# If you need a point above the 1st equation, but below the second one.
# Check the limits where you can pick the points under this condition.
nosol = False
if k1==k2:
if b1>=b2:
inters = -100
nosol = True
else:
rand_x = np.random.uniform(left,right)
rand_y = np.random.uniform(y1(rand_x),y2(rand_x))
print(f'Random point is ({round(rand_x,2)}, {round(rand_y,2)})')
else:
inters = (b2-b1)/(k1-k2)
if inters<=left:
if k1>=k2:
nosol=True
elif inters>=right:
if k1<=k2:
nosol=True
if nosol:
print('No solution')
else:
if k1>k2:
right = inters
else:
left = inters
# Pick random X between "left" and "right"
# Pick whatever distribution you like or need
rand_x = np.random.uniform(left,right)
rand_y = np.random.uniform(y1(rand_x),y2(rand_x))
print(f'Random point is ({round(rand_x,2)}, {round(rand_y,2)})')
If your random X needs to belong to a specific number sequence, use some other np.random function: randint, choice...

Drawing lines between two points with twinaxes

I have followed this example (Drawing lines between two plots in Matplotlib) but am running into problems. I believe it has something to do with the fact that I essentially have two different y points, but am not sure how to amend the code to fix it. I would like the line to start at one point and end at the other point directly below it, as well as plotting for all lines.
fig=plt.figure(figsize=(22,10), dpi=150)
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
n = 10
y1 = np.random.random(n)
y2 = np.random.random(n) + 1
x1 = np.arange(n)
ax1.scatter(x1, y1)
ax2.scatter(x1, y2)
i = 1
xy = (x1[i],y1[i])
con = ConnectionPatch(xyA=xy, xyB=xy, coordsA="data", coordsB="data",
axesA=ax1, axesB=ax2, color="red")
ax2.add_artist(con)
ax1.plot(x1[i],y1[i],'g+',markersize=12)
ax2.plot(x1[i],y1[i],'g+',markersize=12)
Just iterate over zipped (x, y1, y2):
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch
fig = plt.figure(figsize=(10, 5), dpi=100)
ax1 = fig.add_subplot(1, 1, 1)
ax2 = ax1.twinx()
n = 10
y1 = np.random.random(n)
y2 = np.random.random(n) + 1
x1 = np.arange(n)
# I add some colors blue for left y-axis, red for right y-axis
ax1.scatter(x1, y1, c='b')
ax2.scatter(x1, y2, c='r')
# Now iterate over paired x, and 2 y values:
for xi, y1i, y2i in zip(x1, y1, y2):
con = ConnectionPatch(
xyA=(xi, y1i),
xyB=(xi, y2i),
coordsA="data",
coordsB="data",
axesA=ax1,
axesB=ax2,
color='g',
)
ax1.add_artist(con)
plt.show()
Out:

Generate random points above and below a line in Python

I would like to generate random points on an x,y scatter plot that are either above or below a given line. For example, if the line is y=x I would like to generate a list of points in the top left of the plot (above the line) and a list of points in the bottom right of the plot (below the line). Here's is an example where the points are above or below y=5:
import random
import matplotlib.pyplot as plt
num_points = 10
x1 = [random.randrange(start=1, stop=9) for i in range(num_points)]
x2 = [random.randrange(start=1, stop=9) for i in range(num_points)]
y1 = [random.randrange(start=1, stop=5) for i in range(num_points)]
y2 = [random.randrange(start=6, stop=9) for i in range(num_points)]
plt.scatter(x1, y1, c='blue')
plt.scatter(x2, y2, c='red')
plt.show()
However, I generated the x and y points independently, which limits me to equations where y = c (where c is a constant). How can I expand this to any y=mx+b?
You can change the stop and start limits for y1 and y2 to be the line you want. You will need to decide where the plane ends (set lower and upper).
Note this only works for integers. You can use truncated multivariate distributions if you want something more sophisticated.
m, b = 1, 0
lower, upper = -25, 25
x1 = [random.randrange(start=1, stop=9) for i in range(num_points)]
x2 = [random.randrange(start=1, stop=9) for i in range(num_points)]
y1 = [random.randrange(start=lower, stop=m*x+b) for x in x1]
y2 = [random.randrange(start=m*x+b, stop=upper) for x in x2]
plt.plot(np.arange(10), m*np.arange(10)+b)
plt.scatter(x1, y1, c='blue')
plt.scatter(x2, y2, c='red')
You may as well have my answer too.
This way puts Gaussian noise above the line, and below. I have deliberately set the mean of the noise to 20 so that it would stand out from the line, which is y = 10*x + 5. You would probably make the mean zero.
>>> import random
>>> def y(x, m, b):
... return m*x + b
...
>>> import numpy as np
>>> X = np.linspace(0, 10, 100)
>>> y_above = [y(x, 10, 5) + abs(random.gauss(20,5)) for x in X]
>>> y_below = [y(x, 10, 5) - abs(random.gauss(20,5)) for x in X]
>>> import matplotlib.pyplot as plt
>>> plt.scatter(X, y_below, c='g')
>>> plt.scatter(X, y_above, c='r')
>>> plt.show()
Here's the plot.
There are many approaches possible, but if your only requirement is that they are above and below the y = mx + b line, then you can simply plug the random x values into the equation and then add or subtract a random y value.
import random
import matplotlib.pyplot as plt
slope = 1
intercept = 0
def ymxb(slope, intercept, x):
return slope * x + intercept
num_points = 10
x1 = [random.randrange(start=1, stop=9) for i in range(num_points)]
x2 = [random.randrange(start=1, stop=9) for i in range(num_points)]
y1 = [ymxb(slope, intercept, x) - random.randrange(start=1, stop=9) for x in x1]
y2 = [ymxb(slope, intercept, x) + random.randrange(start=1, stop=9) for x in x2]
plt.scatter(x1, y1, c='blue')
plt.scatter(x2, y2, c='red')
plt.show()
That looks like this:
Side of (x, y) is defined by the sign of y - mx - b. You can read it here, for example.
import random
import matplotlib.pyplot as plt
num_points = 50
x = [random.randrange(start=1, stop=9) for i in range(num_points)]
y = [random.randrange(start=1, stop=9) for i in range(num_points)]
m = 5
b = -3
colors = ['blue' if y[i] - m * x[i] - b > 0 else 'red' for i in range(num_points) ]
plt.plot([0, 10], [b, 10 * m + b], c='green')
plt.xlim((0, 10))
plt.ylim((0, 10))
plt.scatter(x, y, c=colors)
plt.show()

Multiple plots on same figure with DataFrame.Plot

While I can get multiple lines on a chart and multiple bars on a chart - I cannot get a line and bar on the same chart using the same PeriodIndex.
Faux code follows ...
# play data
n = 100
x = pd.period_range('2001-01-01', periods=n, freq='M')
y1 = (Series(np.random.randn(n)).diff() + 5).tolist()
y2 = (Series(np.random.randn(n)).diff()).tolist()
df = pd.DataFrame({'bar':y2, 'line':y1}, index=x)
# let's plot
plt.figure()
ax = df['bar'].plot(kind='bar', label='bar')
df['line'].plot(kind='line', ax=ax, label='line')
plt.savefig('fred.png', dpi=200)
plt.close()
Any help will be greatly appreciated ...
The problem is: bar plots don't use index values as x axis, but use range(0, n). You can use twiny() to create a second axes that share yaxis with the bar axes, and draw line curve in this second axes.
The most difficult thing is how to align x-axis ticks. Here we define the align function, which will align ax2.get_xlim()[0] with x1 in ax1 and ax2.get_xlim()[1] with x2 in ax1:
def align_xaxis(ax2, ax1, x1, x2):
"maps xlim of ax2 to x1 and x2 in ax1"
(x1, _), (x2, _) = ax2.transData.inverted().transform(ax1.transData.transform([[x1, 0], [x2, 0]]))
xs, xe = ax2.get_xlim()
k, b = np.polyfit([x1, x2], [xs, xe], 1)
ax2.set_xlim(xs*k+b, xe*k+b)
Here is the full code:
from matplotlib import pyplot as plt
import pandas as pd
from pandas import Series
import numpy as np
n = 50
x = pd.period_range('2001-01-01', periods=n, freq='M')
y1 = (Series(np.random.randn(n)) + 5).tolist()
y2 = (Series(np.random.randn(n))).tolist()
df = pd.DataFrame({'bar':y2, 'line':y1}, index=x)
# let's plot
plt.figure(figsize=(20, 4))
ax1 = df['bar'].plot(kind='bar', label='bar')
ax2 = ax1.twiny()
df['line'].plot(kind='line', label='line', ax=ax2)
ax2.grid(color="red", axis="x")
def align_xaxis(ax2, ax1, x1, x2):
"maps xlim of ax2 to x1 and x2 in ax1"
(x1, _), (x2, _) = ax2.transData.inverted().transform(ax1.transData.transform([[x1, 0], [x2, 0]]))
xs, xe = ax2.get_xlim()
k, b = np.polyfit([x1, x2], [xs, xe], 1)
ax2.set_xlim(xs*k+b, xe*k+b)
align_xaxis(ax2, ax1, 0, n-1)
and the output:

Categories