Related
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
data = np.genfromtxt('file1.txt',delimiter=' ')
lats = data[:,0]
## lon => x
lons = data[:,1]
## values => z
values = data[:,2]
###
lat_uniq = list(set(lats.tolist()))
nlats = len(lat_uniq)
print(nlats)
print(lat_uniq)
lon_uniq = list(set(lons.tolist()))
print(lon_uniq)
nlons = len(lon_uniq)
print(nlons)
print (lats.shape, nlats, nlons)
yre = lats.reshape(nlats,nlons)
xre = lons.reshape(nlats,nlons)
zre = values.reshape(nlats,nlons)
#### later in the defined map
fig,ax=plt.subplots(1,1)
cp = ax.contourf(xre, yre, zre)
fig.colorbar(cp)
plt.savefig('f1.pdf')
file1.txt
1 2 3
4 5 6
7 8 9
10 11 12
..
First column - x values,
Second - y values,
third - z values
I'm using the code to make a contour plot in python, but getting the following error:
Traceback (most recent call last):
File "./yut.py", line 21, in
yre = lats.reshape(nlats,nlons)
ValueError: cannot reshape array of size 4 into shape (4,4)
Could you please help to fix this error? Thanks in advance!
Matplotlib expects a contour plot to receive data in a specific format. Your approach does not provide the data in this format; you have to transform your data like this:
import numpy as np
import matplotlib.pyplot as plt
#from matplotlib.colors import LogNorm
data = np.genfromtxt('test.txt', delimiter=' ')
#print(data)
lats = data[:,0]
## lon => x
lons = data[:,1]
## values => z
values = data[:,2]
###
#get unique lat lon values and their index positions
lat_uniq, lat_idx = np.unique(lats, return_inverse=True)
lon_uniq, lon_idx = np.unique(lons, return_inverse=True)
#create 2D array necessary for the contour plot
xre, yre = np.meshgrid(lon_uniq, lat_uniq)
zre = np.full(xre.shape, np.nan)
#or if you know the standard value of the array, fill it with that
#zre = np.full(xre.shape, 0)
zre[lat_idx, lon_idx] = values
print(zre)
#you can fill in missing data with interpolation
from scipy.interpolate import griddata
zre_interpolated = griddata((lons, lats), values, (xre, yre), method = "linear")
print(zre_interpolated)
#### later in the defined map
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10, 5))
cp1 = ax1.contourf(xre, yre, zre, levels=4)
plt.colorbar(cp1, ax=ax1)
ax1.set_title("data are not interpolated")
cp2 = ax2.contourf(xre, yre, zre_interpolated, levels=4)
plt.colorbar(cp2, ax=ax2)
ax2.set_title("interpolated data")
plt.show()
Example output:
The example output was generated using the following data in the txt file:
1 1 1
1 2 2
2 4 9
4 5 2
6 1 1
6 2 8
6 4 9
6 5 2
2 5 3
4 2 5
4 3 8
4 4 5
1 3 4
1 5 2
2 1 1
2 3 4
"Scatter Pie Plot" ( a scatter plot using pie charts instead of dots). I require this as I have to represent 3 dimensions of data.
1: x axis (0-6)
2: y axis (0-6)
3: Category lets say (A,B,C - H)
If two x and y values are the same I want a pie chart to be in that position representing that Category.
Similar to the graph seen in this link:
https://matplotlib.org/gallery/lines_bars_and_markers/scatter_piecharts.html#sphx-glr-gallery-lines-bars-and-markers-scatter-piecharts-py
or this image from Tableu:
[![enter image description here][1]][1]
As I am limited to only use python I have been struggling to manipulate the code to work for me.
Could anyone help me with this problem? I would very grateful!
Example data:
XVAL YVAL GROUP
1.3 4.5 A
1.3 4.5 B
4 2 E
4 6 A
2 4 A
2 4 B
1 1 G
1 1 C
1 2 B
1 2 D
3.99 4.56 G
The final output should have 6 pie charts on the X & Y with 1 containing 3 groups and 2 containing 3 groups.
My attempt:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(10,8))
# for incremental pie slices
cumsum = np.cumsum(dist)
cumsum = cumsum/ cumsum[-1]
pie = [0] + cumsum.tolist()
for r1, r2 in zip(pie[:-1], pie[1:]):
angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2)
x = [0] + np.cos(angles).tolist()
y = [0] + np.sin(angles).tolist()
xy = np.column_stack([x, y])
ax.scatter([xpos], [ypos], marker=xy, s=size)
return ax
fig, ax = plt.subplots(figsize=(40,40))
draw_pie([Group],'xval','yval',10000,ax=ax)
draw_pie([Group], 'xval', 'yval', 20000, ax=ax)
draw_pie([Group], 'xval', 'yval', 30000, ax=ax)
plt.show()
I'm not sure how to get 6 pie charts. If we group on XVAL and YVAL, there are 7 unique pairs. You can do something down this line:
fig, ax = plt.subplots(figsize=(40,40))
for (x,y), d in df.groupby(['XVAL','YVAL']):
dist = d['GROUP'].value_counts()
draw_pie(dist, x, y, 10000*len(d), ax=ax)
plt.show()
Output:
I would like to convert two arrays (x and y) into a frequency n x n matrix (n = 5), indicating each cell the number of point that contains. It consists on resampling both variables into five intervals and count the existing number of points per cell.
I have tried using pandas pivot_table but don't know the way of referencing to each axis coordinate.
X and Y arrays are two dependent variables that contain values between 0 and 100.
I would really appreciate some one's aid.
Thank you very much in advance.
This is an example of the code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Arrays example. They are always float type and ranging 0-100. (n_size array = 15)
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
# Plot the example data:
df.plot(x = 'X',y = 'Y', style = 'o')
This is what I have:
This is the objetive matrix, saved as a df:
If you do not explicitly need to use pandas (which you don't, if it's just about a frequency matrix), consider using numpy.histogram2d:
# Sample data
x = 100*np.random.random(15)
y = 100*np.random.random(15)
Construct your bins (since your x and y bins are the same, one set is enough)
bins = np.linspace(0, 100, 5+1)
# bins = array([ 0., 20., 40., 60., 80., 100.])
Now use the histogram function:
binned, binx, biny = np.histogram2d(x, y, bins = [bins, bins])
# To get the result you desire, transpose
objmat = binned.T
Note: x-values are binned along the first dimension(axis 0), which visually means 'vertical'. Hence the transpose.
Plotting:
fig, ax = plt.subplots()
ax.grid()
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
ax.scatter(x, y)
for i in range(objmat.shape[0]):
for j in range(objmat.shape[1]):
c = int(objmat[::-1][j,i])
ax.text((bins[i]+bins[i+1])/2, (bins[j]+bins[j+1])/2, str(c), fontdict={'fontsize' : 16, 'ha' : 'center', 'va' : 'center'})
Result:
You could use GroupBy.size
matching group axes to the center of each grid.
Then you can use Axes.text to draw them
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(20)
max_val = 100
n = 5
len_group = max_val // 5
x = max_val * np.random.random(15)
y = max_val * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
x_groups = df['X'] // len_group * len_group + len_group / 2
y_groups = df['Y'] // len_group * len_group + len_group / 2
fig, ax= plt.subplots(figsize=(13, 6))
ax.set_ylim(0, max_val)
ax.set_xlim(0, max_val)
df.plot(x = 'X',y = 'Y', style = 'o', ax=ax)
for i, val in df.groupby([x_groups, y_groups]).size().items():
ax.text(*i, val,fontdict={'fontsize' : 20, 'ha' : 'center', 'va':'center'})
plt.grid()
You can just create bins with pd.cut and then groupby the bins and unstack along the X variable and you have a matrix of frequency counts.
df['Xc'] = pd.cut(df['X'], range(0, 101, 20))
df['Yc'] = pd.cut(df['Y'], range(0, 101, 20))
mat = df.groupby(['Xc', 'Yc']).size().unstack('Xc')
mat
Xc (0, 20] (20, 40] (40, 60] (60, 80] (80, 100]
Yc
(0, 20] 0 1 1 0 0
(20, 40] 4 0 1 2 0
(40, 60] 0 0 0 0 0
(60, 80] 3 0 1 0 0
(80, 100] 1 0 1 0 0
There is no elegant solution to the plotting part of the problem. But here's what you can do.
# Calculate the counts
counts = df.groupby([df.X.astype(int) // 20,
df.Y.astype(int) // 20]).size().astype(str)
# Restore the original scales
counts.index = pd.MultiIndex.from_tuples([(x * 20 + 10,
y * 20 + 10)
for x,y in counts.index.to_list()],
names=counts.index.names)
fig = plt.figure()
ax = fig.add_subplot(111)
# Plot the text labels
[ax.text(*xy, txt) for (xy, txt) in counts.items()]
# Update the axes extents
ax.axis([0, counts.index.levels[0].max() + 10,
0, counts.index.levels[1].max() + 10])
plt.show()
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
# Arrays example. They are always float type and ranging 0-100. (n_size array = 15)
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
# Df created for trying to pivot and counting values per cell
df = pd.DataFrame({'X':x,'Y':y})
ir = pd.interval_range(start=0, freq=20, end=100, closed='left')
df['xbin'] = pd.cut(df['X'], bins=ir)
df['ybin'] = pd.cut(df['Y'], bins=ir)
df['xbin'] = df['xbin'].apply(lambda x: x.mid)
df['ybin'] = df['ybin'].apply(lambda x: x.mid)
fig, ax= plt.subplots()
ax.set_ylim(0, 100)
ax.set_xlim(0, 100)
for i, val in df.groupby(['xbin', 'ybin']).size().items():
if val!=0:
ax.text(*i, val,fontdict={'fontsize' : 20, 'ha' : 'center', 'va' : 'center'})
One option is to call np.add.at on ravel of frequency matrix
x = 100 * np.random.random(15)
y = 100 * np.random.random(15)
n = 5
points = (np.array([x, y]) / 20).astype(int)
z = np.zeros((n, n), dtype=int)
np.add.at(z.ravel(),
np.ravel_multi_index(points, z.shape),
np.ones(points.shape[1]))
Sample run:
print(points)
print(z)
[[0 0 0 2 4 1 2 1 1 0 1 1 3 0 0]
[0 0 1 4 0 4 1 0 1 3 3 1 0 0 3]]
[[3 1 0 2 0]
[1 2 0 1 1]
[0 1 0 0 1]
[1 0 0 0 0]
[1 0 0 0 0]]
I am using pcolormesh to show the results from some algorithm I am running.
I create the usual mesh for some x_min, x_max, etc., and define my color map
h = (x_max - x_min) / 1000.
xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, h), numpy.arange(y_min, y_max, h))
colours = ("blue", "green", "red")
cmap = colors.ListedColormap(colours)
and then do plt.pcolormesh(xx, yy, Z, alpha=0.7, cmap=cmap), where Z is the result of my prediction (it could be any 0, 1 or 2 value, it does not matter).
Say Z = numpy.zeros(xx.shape) I should see everything blue, Z = numpy.ones(xx.shape) I should see green, and Z = 2*numpy.ones(xx.shape) I should see red, or so I thought. Instead, I always see blue.
If I add these lines:
Z[0] = 0
Z[1] = 1
Z[2] = 2
everything works as expected. It looks as if the result does not have all the possible results (0,1,2) then it defaults to use only the first color, blue, even if the result is all 2s, and I want red.
How can I force it to have the colours I desire, i.e. blue for 0, green for 1 and red for 2, in every case?
You can use clim to fix your colorbar.
plt.clim(0, 3)
That will force 0 to be blue, 1 to be green, 2 to be red.
You have to normalize ColorMap:
import matplotlib.pylab as plt
import numpy as np
from matplotlib.colors import ListedColormap, BoundaryNorm
x_max = 100.
x_min = 0.
y_max = 100.
y_min = 0.
h = (x_max - x_min) / 5.
xx, yy = np.meshgrid(np.arange(x_min, x_max+h, h), np.arange(y_min, y_max+h, h))
Z = np.random.randint(3, size=(5,5))
# define color map & norm it
colours = (["blue", "green", "red"])
cmap = ListedColormap(colours)
bounds=[0,1,2,np.max(Z)+1] # discrete values of Z
norm = BoundaryNorm(bounds, cmap.N)
# for colorbar
ticks = [.5,1.5,2.5]
labels = ['0','1','2']
# plot
pcm = plt.pcolormesh(xx, yy, Z, alpha=0.7, cmap=cmap, norm=norm)
cb = plt.colorbar(pcm, cmap=cmap, norm=norm, ticks=ticks)
cb.set_ticklabels(labels)
plt.show()
Z array:
[[0 0 1 0 0]
[0 0 0 1 1]
[1 0 0 0 1]
[1 1 2 2 0]
[1 1 1 2 2]]
I'm looking to find the Python equivalent of the following Matlab statement:
vq interp1(x,y, xq,'nearest','extrap')
It looks as if interp(xq, x, y) works perfectly for linear interpolation/extrapolation.
I also looked at
F = scipy.interpolate.interp1d(x, y, kind='nearest')
which works perfectly for the nearest method, but will not perform extrapolation.
Is there anything else I've overlooked? Thanks.
For linear interpolation that will extrapolate using nearest interpolation, use numpy.interp. It does this by default.
For example:
yi = np.interp(xi, x, y)
Otherwise, if you just want nearest interpolation everywhere, as you describe, you can do it in the short, but inefficient way: (you can make this a one-liner, if you want)
def nearest_interp(xi, x, y):
idx = np.abs(x - xi[:,None])
return y[idx.argmin(axis=1)]
Or in a more efficient way using searchsorted:
def fast_nearest_interp(xi, x, y):
"""Assumes that x is monotonically increasing!!."""
# Shift x points to centers
spacing = np.diff(x) / 2
x = x + np.hstack([spacing, spacing[-1]])
# Append the last point in y twice for ease of use
y = np.hstack([y, y[-1]])
return y[np.searchsorted(x, xi)]
To illustrate the difference between numpy.interp and the nearest interpolation examples above:
import numpy as np
import matplotlib.pyplot as plt
def main():
x = np.array([0.1, 0.3, 1.9])
y = np.array([4, -9, 1])
xi = np.linspace(-1, 3, 200)
fig, axes = plt.subplots(nrows=2, sharex=True, sharey=True)
for ax in axes:
ax.margins(0.05)
ax.plot(x, y, 'ro')
axes[0].plot(xi, np.interp(xi, x, y), color='blue')
axes[1].plot(xi, nearest_interp(xi, x, y), color='green')
kwargs = dict(x=0.95, y=0.9, ha='right', va='top')
axes[0].set_title("Numpy's $interp$ function", **kwargs)
axes[1].set_title('Nearest Interpolation', **kwargs)
plt.show()
def nearest_interp(xi, x, y):
idx = np.abs(x - xi[:,None])
return y[idx.argmin(axis=1)]
main()
In later versions of SciPy (at least v0.19.1+), scipy.interpolate.interp1d has the option fill_value = “extrapolate”.
For example:
import pandas as pd
>>> s = pd.Series([1, 2, 3])
Out[1]:
0 1
1 2
2 3
dtype: int64
>>> t = pd.concat([s, pd.Series(index=s.index + 0.1)]).sort_index()
Out[2]:
0.0 1.0
0.1 NaN
1.0 2.0
1.1 NaN
2.0 3.0
2.1 NaN
dtype: float64
>>> t.interpolate(method='nearest')
Out[3]:
0.0 1.0
0.1 1.0
1.0 2.0
1.1 2.0
2.0 3.0
2.1 NaN
dtype: float64
>>> t.interpolate(method='nearest', fill_value='extrapolate')
Out[4]:
0.0 1.0
0.1 1.0
1.0 2.0
1.1 2.0
2.0 3.0
2.1 3.0
dtype: float64