Drawing heat map in python - python

I'm having two lists x, y representing coordinates in 2D. For example x = [1,4,0.5,2,5,10,33,0.04] and y = [2,5,44,0.33,2,14,20,0.03]. x[i] and y[i] represent one point in 2D. Now I also have a list representing "heat" values for each (x,y) point, for example z = [0.77, 0.88, 0.65, 0.55, 0.89, 0.9, 0.8,0.95]. Of course x,y and z are much higher dimensional than the example.
Now I would like to plot a heat map in 2D where x and y represents the axis coordinates and z represents the color. How can this be done in python?

This code produces a heat map. With a few more data points, the plot starts looking pretty nice and I've found it to be very quick in general even for >100k points.
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
import math
x = [1,4,0.5,2,5,10,33,0.04]
y = [2,5,44,0.33,2,14,20,0.03]
z = [0.77, 0.88, 0.65, 0.55, 0.89, 0.9, 0.8, 0.95]
levels = [0.7, 0.75, 0.8, 0.85, 0.9]
plt.figure()
ax = plt.gca()
ax.set_aspect('equal')
CS = ax.tricontourf(x, y, z, levels, cmap=plt.get_cmap('jet'))
cbar = plt.colorbar(CS, ticks=np.sort(np.array(levels)),ax=ax, orientation='horizontal', shrink=.75, pad=.09, aspect=40,fraction=0.05)
cbar.ax.set_xticklabels(list(map(str,np.sort(np.array(levels))))) # horizontal colorbar
cbar.ax.tick_params(labelsize=8)
plt.title('Heat Map')
plt.xlabel('X Label')
plt.ylabel('Y Label')
plt.show()
Produces this image:
or if you're looking for a more gradual color change, change the tricontourf line to this:
CS = ax.tricontourf(x, y, z, np.linspace(min(levels),max(levels),256), cmap=cmap)
and then the plot will change to:

Based on this answer, you might want to do something like:
import numpy as np
from matplotlib.mlab import griddata
import matplotlib.pyplot as plt
xs0 = [1,4,0.5,2,5,10,33,0.04]
ys0 = [2,5,44,0.33,2,14,20,0.03]
zs0 = [0.77, 0.88, 0.65, 0.55, 0.89, 0.9, 0.8,0.95]
N = 30j
extent = (np.min(xs0),np.max(xs0),np.min(ys0),np.max(ys0))
xs,ys = np.mgrid[extent[0]:extent[1]:N, extent[2]:extent[3]:N]
resampled = griddata(xs0, ys0, zs0, xs, ys, interp='linear')
plt.imshow(np.fliplr(resampled).T, extent=extent,interpolation='none')
plt.colorbar()
The example here might also help: http://matplotlib.org/examples/pylab_examples/griddata_demo.html

Related

Plotting a Lorenz curve from a given NumPy file

I am trying to write a program that reads data from a NumPy file, and then uses that data to plot a Lorenz curve, but I'm not exactly sure how to make the Lorenz curve. I tried using the cumsum() function, but I was not able to plot the Lorenz curve. Here's what I have so far:
import numpy as np
import matplotlib.pyplot as plt
data = np.load('pop2010.npy')
print(data)
plt.plot(data[0]) # display all the points
plt.show()
plot_x = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
plot_y = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
plt.plot(plot_x, plot_y)
# set the labels for x, y, and title
plt.xlabel("Countries")
plt.ylabel("Wealth")
plt.title("Population-Lorenz Curve")
# save plot as png file
plt.savefig('population-lorenz.png', dpi = 200)
plt.show()
Any advice would be appreciated, thanks!
Adapting from https://zhiyzuo.github.io/Plot-Lorenz/, combining with your code.
Not quite clear from the context you provide, but I think data is what you want to plot as a Lorenz curve, and the plot_x, plot_y variables are your way of plotting the x-y line?
Note that I am using the object-oriented API instead of the pyplot API since this is what the docs now recommend--I think you'll find it's easier to work with in the long run. See https://matplotlib.org/stable/api/index.html#usage-patterns for detail.
import numpy as np
import matplotlib.pyplot as plt
data = np.load('pop2010.npy')
X_lorenz = data.cumsum() / data.sum()
X_lorenz = np.insert(X_lorenz, 0, 0)
fig, ax = plt.subplots(figsize=[6,6])
## scatter plot of Lorenz curve
ax.scatter(np.arange(X_lorenz.size)/(X_lorenz.size-1), X_lorenz,
marker='x', color='darkgreen', s=100)
## line plot of equality
ax.plot([0,1], [0,1], color='k')
# set the labels for x, y, and title
ax.set_xlabel("Countries")
ax.set_ylabel("Wealth")
ax.set_title("Population-Lorenz Curve")
plt.show()
# save plot as png file
plt.savefig('population-lorenz.png', dpi = 200)

How do I smooth out the edges of a closed line similar to d3's curveCardinal method implementation?

I have a few data points that I am connecting using a closed line plot, and I want the line to have smooth edges similar to how the curveCardinal methods in d3 do it. Link Here
Here's a minimal example of what I want to do:
import numpy as np
from matplotlib import pyplot as plt
x = np.array([0.5, 0.13, 0.4, 0.5, 0.6, 0.7, 0.5])
y = np.array([1.0, 0.7, 0.5, 0.2, 0.4, 0.6, 1.0])
fig, ax = plt.subplots()
ax.plot(x, y)
ax.scatter(x, y)
Now, I'd like to smooth out/interpolate the line similar to d3's curveCardinal methods. Here are a few things that I've tried.
from scipy import interpolate
tck, u = interpolate.splprep([x, y], s=0, per=True)
xi, yi = interpolate.splev(np.linspace(0, 1, 100), tck)
fig, ax = plt.subplots(1, 1)
ax.plot(xi, yi, '-b')
ax.plot(x, y, 'k')
ax.scatter(x[:2], y[:2], s=200)
ax.scatter(x, y)
The result of the above code is not bad, but I was hoping that the curve would stay closer to the line when the data points are far apart (I increased the size of two such data points above to highlight this). Essentially, have the curve stay close to the line.
Using interp1d (has the same problem as the code above):
from scipy.interpolate import interp1d
x = [0.5, 0.13, 0.4, 0.5, 0.6, 0.7, 0.5]
y = [1.0, 0.7, 0.5, 0.2, 0.4, 0.6, 1.0]
orig_len = len(x)
x = x[-3:-1] + x + x[1:3]
y = y[-3:-1] + y + y[1:3]
t = np.arange(len(x))
ti = np.linspace(2, orig_len + 1, 10 * orig_len)
kind='cubic'
xi = interp1d(t, x, kind=kind)(ti)
yi = interp1d(t, y, kind=kind)(ti)
fig, ax = plt.subplots()
ax.plot(xi, yi, 'g')
ax.plot(x, y, 'k')
ax.scatter(x, y)
I also looked at the Chaikins Corner Cutting algorithm, but I don't like the result.
def chaikins_corner_cutting(coords, refinements=5):
coords = np.array(coords)
for _ in range(refinements):
L = coords.repeat(2, axis=0)
R = np.empty_like(L)
R[0] = L[0]
R[2::2] = L[1:-1:2]
R[1:-1:2] = L[2::2]
R[-1] = L[-1]
coords = L * 0.75 + R * 0.25
return coords
fig, ax = plt.subplots()
ax.plot(x, y, 'k', linewidth=1)
ax.plot(chaikins_corner_cutting(x, 4), chaikins_corner_cutting(y, 4))
I also, superficially, looked at Bezier curves, matplotlibs PathPatch, and Fancy box implementations, but I couldn't get any satisfactory results.
Suggestions are greatly appreciated.
So, here's how I ended up doing it. I decided to introduce new points between every two existing data points. The following image shows how I am adding these new points. Red are data that I have. Using a convex hull I calculate the geometric center of the data points and draw lines to it from each point (shown with blue lines). Divide these lines twice in half and connect the resulting points (green line). The center of the green line is the new point added.
Here are the functions that accomplish this:
def midpoint(p1, p2, sf=1):
"""Calculate the midpoint, with an optional
scaling-factor (sf)"""
xm = ((p1[0]+p2[0])/2) * sf
ym = ((p1[1]+p2[1])/2) * sf
return (xm, ym)
def star_curv(old_x, old_y):
""" Interpolates every point by a star-shaped curve. It does so by adding
"fake" data points in-between every two data points, and pushes these "fake"
points towards the center of the graph (roughly 1/4 of the way).
"""
try:
points = np.array([old_x, old_y]).reshape(7, 2)
hull = ConvexHull(points)
x_mid = np.mean(hull.points[hull.vertices,0])
y_mid = np.mean(hull.points[hull.vertices,1])
except:
x_mid = 0.5
y_mid = 0.5
c=1
x, y = [], []
for i, j in zip(old_x, old_y):
x.append(i)
y.append(j)
try:
xm_i, ym_i = midpoint((i, j),
midpoint((i, j), (x_mid, y_mid)))
xm_j, ym_j = midpoint((old_x[c], old_y[c]),
midpoint((old_x[c], old_y[c]), (x_mid, y_mid)))
xm, ym = midpoint((xm_i, ym_i), (xm_j, ym_j))
x.append(xm)
y.append(ym)
c += 1
except IndexError:
break
orig_len = len(x)
x = x[-3:-1] + x + x[1:3]
y = y[-3:-1] + y + y[1:3]
t = np.arange(len(x))
ti = np.linspace(2, orig_len + 1, 10 * orig_len)
kind='quadratic'
xi = interp1d(t, x, kind=kind)(ti)
yi = interp1d(t, y, kind=kind)(ti)
return xi, yi
Here's how it looks:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from scipy.spatial import ConvexHull
x = [0.5, 0.13, 0.4, 0.5, 0.6, 0.7, 0.5]
y = [1.0, 0.7, 0.5, 0.2, 0.4, 0.6, 1.0]
xi, yi = star_curv(x, y)
fig, ax = plt.subplots()
ax.plot(xi, yi, 'g')
ax.plot(x, y, 'k', alpha=0.5)
ax.scatter(x, y, color='r')
The result is especially noticeable when the data points are more symmetric, for example the following x, y values give the results in the image below:
x = [0.5, 0.32, 0.34, 0.5, 0.66, 0.65, 0.5]
y = [0.71, 0.6, 0.41, 0.3, 0.41, 0.59, 0.71]
Comparison between the interpolation presented here, with the default interp1d interpolation.
I would create another array with the vertices extended in/out or up/down by about 5%. So if a point is lower than the average of the neighbouring points, make it a bit lower still.
Then do a linear interpolation between the new points, say 10 points/edge. Finally do a spline between the second last point per edge and the actual vertex. If you use Bezier curves, you can make the spline come in at the same angle on each side.
It's a bit of work, but of course you can use this anywhere.

Plotting Curved Lines in Python

I'd like to plot curved lines of a specific arch like shape, below is how far I've gotten using specific values (these values need to be used) but it plots straight lines.
I'm also having trouble formatting the y axis the way I want. It's a log scale and I'd like it to go up to 1 (like in the ideal plot above). Some help would be great, thanks! =)
The reason why your line is not stretching on a log scale plot is because there are no points between the points that are on the top and on the bottom. log plot does not curve the lines, only place the points on a different scale, the line between them are still straight.
To change this, we add more points between dots. and the result will become curved.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import ScalarFormatter
# Data for plotting
t = [0.0, 62.5, 125.0, 187.5, 250, 312.5, 375, 437.5, 500]
s = [0.1, 0.005, 0.1, 0.005, 0.1, 0.005, 0.1, 0.005, 0.1]
def extendlist(l):
master = []
for i in range(len(l)-1):
x = np.linspace(l[i], l[i+1], 50)
master.extend(x)
return master
t = extendlist(t)
s = extendlist(s)
fig, ax = plt.subplots()
ax.semilogy(t, s)
ax.set(xlabel='x axis', ylabel='y axis', title='Stuff')
plt.xlim((0,500))
plt.ylim((0.001, 1))
plt.show()
This will generate what you graphed on paper.
you can use interp1d
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
t = [0.0, 62.5, 125.0, 187.5, 250, 312.5, 375, 437.5, 500]
s = [0.1, 0.005, 0.1, 0.005, 0.1, 0.005, 0.1, 0.005, 0.1]
tnew = np.linspace(0, 500, num=1001, endpoint=True)
f = interp1d(t, s)
plt.semilogy(tnew, f(tnew))
plt.ylim((0.001, 1))
plt.show()

Get best linear function which approximate some dots in 3D

I have 4 dots which are represented with these coordinates:
X = [0.1, 0.5, 0.9, 0.18]
Y = [0.7, 0.5, 0.7, 0.3]
Z = [4.2, 3.3, 4.2, 2.5]
and I have to get the best linear function (plane) which approximate these 4 dots.
I'm aware of numpy.polyfit, but polyfitworks only with x and y (2D),
What can I do?
while not completely general, if the the data points can be reasonably represented as a surface relative to a coordinate plane, say z = ax + by + c then np.linalg.lstsq can be used
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
X = np.array([0.1, 0.5, 0.9, 0.18])
Y = np.array([0.7, 0.5, 0.7, 0.3])
Z = np.array([4.2, 3.3, 4.2, 2.5])
# least squares fit
A = np.vstack([X, Y, np.ones(len(X))]).T
a,b,c= np.linalg.lstsq(A, Z)[0]
# plots
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# plot data as big red crosses
ax.scatter(X, Y, Z, color='r', marker='+', linewidth=10)
# plot plane fit as grid of green dots
xs = np.linspace(min(X), max(X), 10)
ys = np.linspace(min(Y), max(Y), 10)
xv, yv = np.meshgrid(xs, ys)
zv = a*xv + b*yv + c
ax.scatter(xv, yv, zv, color = 'g')
# ax.plot_wireframe(xv, yv, zv, color = 'g') # alternative fit plane plot
plt.show()
plotting the data 1st, you could select a different coordinate pair for the "independent variable" plane to avoid ill conditioned result if necessary, if the data points appeared to lie in a plane containing the z axis, then use xz or yz
and of course you could have degenerate points on a line or the vertices of a regular tetrahedron
for a better "geometric fit" the 1st fitted plane could be used as the base for a 2nd least square fit of the data rotated into that coordinate system (if the data is "reasonably" plane like)

Label python data points on plot

I searched for ages (hours which is like ages) to find the answer to a really annoying (seemingly basic) problem, and because I cant find a question that quite fits the answer I am posting a question and answering it in the hope that it will save someone else the huge amount of time I just spent on my noobie plotting skills.
If you want to label your plot points using python matplotlib
from matplotlib import pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
A = anyarray
B = anyotherarray
plt.plot(A,B)
for i,j in zip(A,B):
ax.annotate('%s)' %j, xy=(i,j), xytext=(30,0), textcoords='offset points')
ax.annotate('(%s,' %i, xy=(i,j))
plt.grid()
plt.show()
I know that xytext=(30,0) goes along with the textcoords, you use those 30,0 values to position the data label point, so its on the 0 y axis and 30 over on the x axis on its own little area.
You need both the lines plotting i and j otherwise you only plot x or y data label.
You get something like this out (note the labels only):
Its not ideal, there is still some overlap - but its better than nothing which is what I had..
How about print (x, y) at once.
from matplotlib import pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
A = -0.75, -0.25, 0, 0.25, 0.5, 0.75, 1.0
B = 0.73, 0.97, 1.0, 0.97, 0.88, 0.73, 0.54
ax.plot(A,B)
for xy in zip(A, B): # <--
ax.annotate('(%s, %s)' % xy, xy=xy, textcoords='data') # <--
ax.grid()
plt.show()
I had a similar issue and ended up with this:
For me this has the advantage that data and annotation are not overlapping.
from matplotlib import pyplot as plt
import numpy as np
fig = plt.figure()
ax = fig.add_subplot(111)
A = -0.75, -0.25, 0, 0.25, 0.5, 0.75, 1.0
B = 0.73, 0.97, 1.0, 0.97, 0.88, 0.73, 0.54
plt.plot(A,B)
# annotations at the side (ordered by B values)
x0,x1=ax.get_xlim()
y0,y1=ax.get_ylim()
for ii, ind in enumerate(np.argsort(B)):
x = A[ind]
y = B[ind]
xPos = x1 + .02 * (x1 - x0)
yPos = y0 + ii * (y1 - y0)/(len(B) - 1)
ax.annotate('',#label,
xy=(x, y), xycoords='data',
xytext=(xPos, yPos), textcoords='data',
arrowprops=dict(
connectionstyle="arc3,rad=0.",
shrinkA=0, shrinkB=10,
arrowstyle= '-|>', ls= '-', linewidth=2
),
va='bottom', ha='left', zorder=19
)
ax.text(xPos + .01 * (x1 - x0), yPos,
'({:.2f}, {:.2f})'.format(x,y),
transform=ax.transData, va='center')
plt.grid()
plt.show()
Using the text argument in .annotate ended up with unfavorable text positions.
Drawing lines between a legend and the data points is a mess, as the location of the legend is hard to address.

Categories