matplotlib contourf plot sparsity whitespace, need interpolation? - python

I have a set of data captured in a pandas data frame which I would like to plot on a contourf plot. When plotting, I can see much white space in certain areas of the contour which I'm not sure how to fix. My x-data is semilog. I'm not sure if some kind of interpolation would help, or if it is someway I am generating my mesh grid and contour itself. I will attach an image and 2 sets of data frames as examples.
contourplot
Data file can be found here: https://drive.google.com/drive/folders/13aO1_P0wzLCjZSTIgalXyaR4cdW1_Rh8?usp=sharing
import os,sys
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#dev
import pprint
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(suppress=True)
# start
Data = pd.read_csv('DF.csv',index_col=0)
plt.rcParams['figure.figsize'] = (16,10)
Freqs = Data.iloc[:,0] # Frequencies for data
angleFullset= ['{:03}'.format(x) for x in [*range(0,360,15)]] # test set, name of df cols in degrees
angleContour = [[int(x) for x in angleFullset],[int(x) if int(x) < 181 else int(x) - 360 for x in angleFullset]] # rename colum names to -180 to 180 deg
angleContour[0].append(angleContour[0][0]); angleContour[1].append(angleContour[1][0] - 1) # append 1 more column for last data set (which is same as first)
idx_180 = angleContour[1].index(180)
angleContour[0].insert(idx_180 + 1,-180); angleContour[1].insert(idx_180 + 1,-180) # insert another column after 180 to cover -180 case
[X,Y] = np.meshgrid(Freqs,angleContour[1])
fig,ax = plt.subplots(1,1)
ax.semilogx()
plt.hlines(0,20,20000,'k',linewidth=1.5) # zero axis
plt.vlines(100,-200,200,'k',linewidth=2) # 100Hz axis
plt.vlines(1000,-200,200,'k',linewidth=2) # 1kHz axis
plt.vlines(10000,-200,200,'k',linewidth=2) # 10kHz axis
plt.xlim([85,8000])
plt.ylim([-180,180])
plt.xticks([100,1000,8000],('100','1000','8000'))
plt.yticks(range(-180,181,30))
plt.xlabel('Frequency [Hz]')
plt.ylabel('Angle [deg]')
plt.grid(b=True,which='major'); plt.grid(b=True,which='minor')
plt.title('Contour')
newData = Data.copy()
newData.drop("Freq",axis=1,inplace=True)
newData['001'] = newData['000'] # for data from -345 to 0
newData.insert(newData.columns.get_loc('180')+1,'-180',newData['180']) # for data from -180 to -165
lev_min,lev_max,levels = -70,-19,range(-70,-19,1)
CM = ax.contourf(X,Y,newData.transpose(),cmap=matplotlib.cm.jet,levels=levels,vmin=lev_min,vmax=lev_max)
plt.colorbar(CM,label='Magnitude [dB]',fraction=0.1)
outputFileName = os.path.join(os.getcwd(),'Contour.png')
plt.savefig(outputFileName,orientation='landscape',format='png')
plt.clf()
plt.cla()

Related

How to plot heatmap onto mplsoccer pitch?

Wondering how I can plot a seaborn plot onto a different matplotlib plot. Currently I have two plots (one a heatmap, the other a soccer pitch), but when I plot the heatmap onto the pitch, I get the results below. (Plotting the pitch onto the heatmap isn't pretty either.) Any ideas how to fix it?
Note: Plots don't need a colorbar and the grid structure isn't required either. Just care about the heatmap covering the entire space of the pitch. Thanks!
import pandas as pd
import numpy as np
from mplsoccer import Pitch
import seaborn as sns
nmf_shot_W = pd.read_csv('https://raw.githubusercontent.com/lucas-nelson-uiuc/datasets/main/nmf_show_W.csv').iloc[:, 1:]
nmf_shot_ThierryHenry = pd.read_csv('https://raw.githubusercontent.com/lucas-nelson-uiuc/datasets/main/nmf_show_Hth.csv')['Thierry Henry']
pitch = Pitch(pitch_type='statsbomb', line_zorder=2,
pitch_color='#22312b', line_color='#efefef')
dfdfdf = np.array(np.matmul(nmf_shot_W, nmf_shot_ThierryHenry)).reshape((24,25))
g_ax = sns.heatmap(dfdfdf)
pitch.draw(ax=g_ax)
Current output:
Desired output:
Use the built-in pitch.heatmap:
pitch.heatmap expects a stats dictionary of binned data, bin mesh, and bin centers:
stats (dict) – The keys are statistic (the calculated statistic), x_grid and y_grid (the bin's edges), and cx and cy (the bin centers).
In the mplsoccer heatmap demos, they construct this stats object using pitch.bin_statistic because they have raw data. However, you already have binned data ("calculated statistic"), so reconstruct the stats object manually by building the mesh and centers:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import Pitch
nmf_shot_W = pd.read_csv('71878281/nmf_show_W.csv', index_col=0)
nmf_shot_ThierryHenry = pd.read_csv('71878281/nmf_show_Hth.csv')['Thierry Henry']
statistic = np.dot(nmf_shot_W, nmf_shot_ThierryHenry.to_numpy()).reshape((24, 25))
# construct stats object from binned data, bin mesh, and bin centers
y, x = statistic.shape
x_grid = np.linspace(0, 120, x + 1)
y_grid = np.linspace(0, 80, y + 1)
cx = x_grid[:-1] + 0.5 * (x_grid[1] - x_grid[0])
cy = y_grid[:-1] + 0.5 * (y_grid[1] - y_grid[0])
stats = dict(statistic=statistic, x_grid=x_grid, y_grid=y_grid, cx=cx, cy=cy)
# use pitch.draw and pitch.heatmap as per mplsoccer demo
pitch = Pitch(pitch_type='statsbomb', line_zorder=2, pitch_color='#22312b', line_color='#efefef')
fig, ax = pitch.draw(figsize=(6.6, 4.125))
pcm = pitch.heatmap(stats, ax=ax, cmap='plasma')
cbar = fig.colorbar(pcm, ax=ax, shrink=0.6)
cbar.outline.set_edgecolor('#efefef')
cbar.ax.yaxis.set_tick_params(color='#efefef')
plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#efefef')

Reading specific rows and plotting using Matplotlib

I have an Excel sheet that has a column of image frames. These frames numbers are not uniformly distributed, e.g. frame 1 may have entries from row 1 to 20 and frame 2 from 21 to 25 and so on. I want to read this data from an Excel sheet that has x and y coordinate for each frame and plot these x and y coordinate in a scattered plot using matplotlib. Here's my code, frame numbers are identified as image index.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
from matplotlib.pyplot import figure
df_xlsx = pd.read_excel('X10.xlsx')
temp = df_xlsx['Image index'][0]
i = 0; #number of the row
xList = []
yList = []
dt = df_xlsx.loc[df_xlsx['Image index'] == 19]
xList = np.array(dt['X position'])
yList = np.array(dt['Y position'])
rList = np.array(dt['Diameter'])
figure(figsize=(10.24,7.68), dpi=100)
fig, ax = plt.subplots()
plt.xlim([0,1024])
plt.ylim([0,768])
plt.scatter(xList, yList, color ='r')
plt.axis('off')
plt.gcf().set_size_inches((10.24,7.68))
for i in range(len(xList)):
circle1 = plt.Circle((xList[i], yList[i]), rList[i], color='r')
ax.add_artist(circle1)
plt.tight_layout(pad=0)
plt.savefig('f=19.png',dpi=100)
plt.show()
Excel sheet example
The problem is every time I need to enter the image index and then save the plot. Can this be done in a loop such that the plot is continuously generated as different plots for each frame number (index frames)? This will save me a lot of time, as I have lots of frames and excel sheets. I am new to Python.
You can use groupby to step over the image index:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('X10.xlsx')
for idx, group in df.groupby('Image index'):
fig, ax = plt.subplots(figsize=(10.24, 7.68), dpi=100)
diameter = 1 * group['Diameter']**2
ax.scatter(group['X position'], group['Y position'], s=diameter)
ax.set_xlim(0, 1024)
ax.set_ylim(0, 768)
ax.axis('off')
plt.tight_layout(pad=0)
plt.savefig(f'Image_index_{idx}.png', dpi=100)
Some notes on the other changes I made:
You don't need to cast the DataFrame columns to arrays or lists.
You can pass a size parameter s to plt.scatter() to make the circles; you'll just need to scale the numbers to fit your scale. E.g. you could multiply by some factor other than 1. Note that in matplotlib you are specifying the area of the marker, not the diameter.

How to use pandas with matplotlib to create 3D plots

I am struggling a bit with the pandas transformations needed to make data render in 3D on matplot lib. The data I have is usually in columns of numbers (usually time and some value). So lets create some test data to illustrate.
import pandas as pd
pattern = ("....1...."
"....1...."
"..11111.."
".1133311."
"111393111"
".1133311."
"..11111.."
"....1...."
"....1....")
# create the data and coords
Zdata = list(map(lambda d:0 if d == '.' else int(d), pattern))
Zinverse = list(map(lambda d:1 if d == '.' else -int(d), pattern))
Xdata = [x for y in range(1,10) for x in range(1,10)]
Ydata = [y for y in range(1,10) for x in range(1,10)]
# pivot the data into columns
data = [d for d in zip(Xdata,Ydata,Zdata,Zinverse)]
# create the data frame
df = pd.DataFrame(data, columns=['X','Y','Z',"Zi"], index=zip(Xdata,Ydata))
df.head(5)
Edit: This block of data is demo data that would normally come from a query on a
database that may need more cleaning and transforms before plotting. In this case data is already aligned and there are no problems aside having one more column we don't need (Zi).
So the numbers in pattern are transferred into height data in the Z column of df ('Zi' being the inverse image) and with that as the data frame I've struggled to come up with this pivot method which is 3 separate operations. I wonder if that can be better.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.cm as cm
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
Xs = df.pivot(index='X', columns='Y', values='X').values
Ys = df.pivot(index='X', columns='Y', values='Y').values
Zs = df.pivot(index='X', columns='Y', values='Z').values
ax.plot_surface(Xs,Ys,Zs, cmap=cm.RdYlGn)
plt.show()
Although I have something working I feel there must be a better way than what I'm doing. On a big data set I would imagine doing 3 pivots is an expensive way to plot something. Is there a more efficient way to transform this data ?
I guess you can avoid some steps during the preparation of the data by not using pandas (but only numpy arrays) and by using some convenience fonctions provided by numpy such as linespace and meshgrid.
I rewrote your code to do so, trying to keep the same logic and the same variable names :
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
pattern = ("....1...."
"....1...."
"..11111.."
".1133311."
"111393111"
".1133311."
"..11111.."
"....1...."
"....1....")
# Extract the value according to your logic
Zdata = list(map(lambda d:0 if d == '.' else int(d), pattern))
# Assuming the pattern is always a square
size = int(len(Zdata) ** 0.5)
# Create a mesh grid for plotting the surface
Xdata = np.linspace(1, size, size)
Ydata = np.linspace(1, size, size)
Xs, Ys = np.meshgrid(Xdata, Ydata)
# Convert the Zdata to a numpy array with the appropriate shape
Zs = np.array(Zdata).reshape((size, size))
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Plot the surface
ax.plot_surface(Xs, Ys, Zs, cmap=cm.RdYlGn)
plt.show()

Fill oceans in basemap [duplicate]

This question already has answers here:
Plot only on continent in matplotlib
(5 answers)
Closed 5 years ago.
I am trying to plot 1x1 degree data on a matplotlib.Basemap, and I want to fill the ocean with white. However, in order for the boundaries of the ocean to follow the coastlines drawn by matplotlib, the resolution of the white ocean mask should be much higher than the resolution of my data.
After searching around for a long time I tried the two possible solutions:
(1) maskoceans() and is_land() functions, but since my data is lower resolution than the map drawn by basemap it does not look good on the edges. I do not want to interpolate my data to higher resolution either.
(2) m.drawlsmask(), but since zorder cannot be assigned the pcolormesh plot always overlays the mask.
This code
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.basemap as bm
#Make data
lon = np.arange(0,360,1)
lat = np.arange(-90,91,1)
data = np.random.rand(len(lat),len(lon))
#Draw map
plt.figure()
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=72, lon_0=319)
m.drawcoastlines(linewidth=1, color='white')
data, lon = bm.addcyclic(data,lon)
x,y = m(*np.meshgrid(lon,lat))
plt.pcolormesh(x,y,data)
plt.savefig('1.png',dpi=300)
Produces this image:
Adding m.fillcontinents(color='white') produces the following image, which is what I need but to fill the ocean and not the land.
Edit:
m.drawmapboundary(fill_color='lightblue') also fills over land and can therefore not be used.
The desired outcome is that the oceans are white, while what I plotted with plt.pcolormesh(x,y,data) shows up over the lands.
I found a much nicer solution to the problem which uses the polygons defined by the coastlines in the map to produce a matplotlib.PathPatch that overlays the ocean areas. This solution has a much better resolution and is much faster:
from matplotlib import pyplot as plt
from mpl_toolkits import basemap as bm
from matplotlib import colors
import numpy as np
import numpy.ma as ma
from matplotlib.patches import Path, PathPatch
fig, ax = plt.subplots()
lon_0 = 319
lat_0 = 72
##some fake data
lons = np.linspace(lon_0-60,lon_0+60,10)
lats = np.linspace(lat_0-15,lat_0+15,5)
lon, lat = np.meshgrid(lons,lats)
TOPO = np.sin(np.pi*lon/180)*np.exp(lat/90)
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=lat_0, lon_0=lon_0, ax = ax)
m.drawcoastlines(linewidth=0.5)
x,y = m(lon,lat)
pcol = ax.pcolormesh(x,y,TOPO)
##getting the limits of the map:
x0,x1 = ax.get_xlim()
y0,y1 = ax.get_ylim()
map_edges = np.array([[x0,y0],[x1,y0],[x1,y1],[x0,y1]])
##getting all polygons used to draw the coastlines of the map
polys = [p.boundary for p in m.landpolygons]
##combining with map edges
polys = [map_edges]+polys[:]
##creating a PathPatch
codes = [
[Path.MOVETO] + [Path.LINETO for p in p[1:]]
for p in polys
]
polys_lin = [v for p in polys for v in p]
codes_lin = [c for cs in codes for c in cs]
path = Path(polys_lin, codes_lin)
patch = PathPatch(path,facecolor='white', lw=0)
##masking the data:
ax.add_patch(patch)
plt.show()
The output looks like this:
Original solution:
You can use an array with greater resolution in basemap.maskoceans, such that the resolution fits the continent outlines. Afterwards, you can just invert the mask and plot the masked array on top of your data.
Somehow I only got basemap.maskoceans to work when I used the full range of the map (e.g. longitudes from -180 to 180 and latitudes from -90 to 90). Given that one needs quite a high resolution to make it look nice, the computation takes a while:
from matplotlib import pyplot as plt
from mpl_toolkits import basemap as bm
from matplotlib import colors
import numpy as np
import numpy.ma as ma
fig, ax = plt.subplots()
lon_0 = 319
lat_0 = 72
##some fake data
lons = np.linspace(lon_0-60,lon_0+60,10)
lats = np.linspace(lat_0-15,lat_0+15,5)
lon, lat = np.meshgrid(lons,lats)
TOPO = np.sin(np.pi*lon/180)*np.exp(lat/90)
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=lat_0, lon_0=lon_0, ax = ax)
m.drawcoastlines(linewidth=0.5)
x,y = m(lon,lat)
pcol = ax.pcolormesh(x,y,TOPO)
##producing a mask -- seems to only work with full coordinate limits
lons2 = np.linspace(-180,180,10000)
lats2 = np.linspace(-90,90,5000)
lon2, lat2 = np.meshgrid(lons2,lats2)
x2,y2 = m(lon2,lat2)
pseudo_data = np.ones_like(lon2)
masked = bm.maskoceans(lon2,lat2,pseudo_data)
masked.mask = ~masked.mask
##plotting the mask
cmap = colors.ListedColormap(['w'])
pcol = ax.pcolormesh(x2,y2,masked, cmap=cmap)
plt.show()
The result looks like this:

numpy fft returns orginal frequency when plotting

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('signal180_single.csv', sep=',', header=None)
x = df.values
length = len(x)
# frequency 0.02s
fs = 50.0
t = np.arange(0, length/fs,1.0/fs)
xF = np.fft.fft(x)
N = len(xF)
xF = xF[0:N/2]
# plot frequencys from 0 to fs, with num = N/2
fr = np.linspace(0,fs,N/2)
plt.figure()
plt.subplot(211)
plt.plot(t,x)
plt.subplot(212)
plt.plot(fr, abs(xF))
plt.show()
I am writing 180000 floating point values into an array from file.
The values are sampled at 50Hz, and contains a sinus of 2Hz.
Then I plot the frequency in the upper plot window.
I want to plot the frequency specter of the frequency in the lower plot window, but I get the same values as in the upper plot window.
Can anyone see where the error is?
When I plot the formula x = np.sin(10*t) + np.cos(3*t), I get the frequency’s. But not when I read the sinus from a file or array.
the line
x = df.values
returns a 180000 x 1 array. Where every value is stored separately. In this case replace the line with:
x = df.values.ravel()
And your script will work.

Categories