Wondering how I can plot a seaborn plot onto a different matplotlib plot. Currently I have two plots (one a heatmap, the other a soccer pitch), but when I plot the heatmap onto the pitch, I get the results below. (Plotting the pitch onto the heatmap isn't pretty either.) Any ideas how to fix it?
Note: Plots don't need a colorbar and the grid structure isn't required either. Just care about the heatmap covering the entire space of the pitch. Thanks!
import pandas as pd
import numpy as np
from mplsoccer import Pitch
import seaborn as sns
nmf_shot_W = pd.read_csv('https://raw.githubusercontent.com/lucas-nelson-uiuc/datasets/main/nmf_show_W.csv').iloc[:, 1:]
nmf_shot_ThierryHenry = pd.read_csv('https://raw.githubusercontent.com/lucas-nelson-uiuc/datasets/main/nmf_show_Hth.csv')['Thierry Henry']
pitch = Pitch(pitch_type='statsbomb', line_zorder=2,
pitch_color='#22312b', line_color='#efefef')
dfdfdf = np.array(np.matmul(nmf_shot_W, nmf_shot_ThierryHenry)).reshape((24,25))
g_ax = sns.heatmap(dfdfdf)
pitch.draw(ax=g_ax)
Current output:
Desired output:
Use the built-in pitch.heatmap:
pitch.heatmap expects a stats dictionary of binned data, bin mesh, and bin centers:
stats (dict) – The keys are statistic (the calculated statistic), x_grid and y_grid (the bin's edges), and cx and cy (the bin centers).
In the mplsoccer heatmap demos, they construct this stats object using pitch.bin_statistic because they have raw data. However, you already have binned data ("calculated statistic"), so reconstruct the stats object manually by building the mesh and centers:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import Pitch
nmf_shot_W = pd.read_csv('71878281/nmf_show_W.csv', index_col=0)
nmf_shot_ThierryHenry = pd.read_csv('71878281/nmf_show_Hth.csv')['Thierry Henry']
statistic = np.dot(nmf_shot_W, nmf_shot_ThierryHenry.to_numpy()).reshape((24, 25))
# construct stats object from binned data, bin mesh, and bin centers
y, x = statistic.shape
x_grid = np.linspace(0, 120, x + 1)
y_grid = np.linspace(0, 80, y + 1)
cx = x_grid[:-1] + 0.5 * (x_grid[1] - x_grid[0])
cy = y_grid[:-1] + 0.5 * (y_grid[1] - y_grid[0])
stats = dict(statistic=statistic, x_grid=x_grid, y_grid=y_grid, cx=cx, cy=cy)
# use pitch.draw and pitch.heatmap as per mplsoccer demo
pitch = Pitch(pitch_type='statsbomb', line_zorder=2, pitch_color='#22312b', line_color='#efefef')
fig, ax = pitch.draw(figsize=(6.6, 4.125))
pcm = pitch.heatmap(stats, ax=ax, cmap='plasma')
cbar = fig.colorbar(pcm, ax=ax, shrink=0.6)
cbar.outline.set_edgecolor('#efefef')
cbar.ax.yaxis.set_tick_params(color='#efefef')
plt.setp(plt.getp(cbar.ax.axes, 'yticklabels'), color='#efefef')
Related
I have a set of data captured in a pandas data frame which I would like to plot on a contourf plot. When plotting, I can see much white space in certain areas of the contour which I'm not sure how to fix. My x-data is semilog. I'm not sure if some kind of interpolation would help, or if it is someway I am generating my mesh grid and contour itself. I will attach an image and 2 sets of data frames as examples.
contourplot
Data file can be found here: https://drive.google.com/drive/folders/13aO1_P0wzLCjZSTIgalXyaR4cdW1_Rh8?usp=sharing
import os,sys
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#dev
import pprint
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(suppress=True)
# start
Data = pd.read_csv('DF.csv',index_col=0)
plt.rcParams['figure.figsize'] = (16,10)
Freqs = Data.iloc[:,0] # Frequencies for data
angleFullset= ['{:03}'.format(x) for x in [*range(0,360,15)]] # test set, name of df cols in degrees
angleContour = [[int(x) for x in angleFullset],[int(x) if int(x) < 181 else int(x) - 360 for x in angleFullset]] # rename colum names to -180 to 180 deg
angleContour[0].append(angleContour[0][0]); angleContour[1].append(angleContour[1][0] - 1) # append 1 more column for last data set (which is same as first)
idx_180 = angleContour[1].index(180)
angleContour[0].insert(idx_180 + 1,-180); angleContour[1].insert(idx_180 + 1,-180) # insert another column after 180 to cover -180 case
[X,Y] = np.meshgrid(Freqs,angleContour[1])
fig,ax = plt.subplots(1,1)
ax.semilogx()
plt.hlines(0,20,20000,'k',linewidth=1.5) # zero axis
plt.vlines(100,-200,200,'k',linewidth=2) # 100Hz axis
plt.vlines(1000,-200,200,'k',linewidth=2) # 1kHz axis
plt.vlines(10000,-200,200,'k',linewidth=2) # 10kHz axis
plt.xlim([85,8000])
plt.ylim([-180,180])
plt.xticks([100,1000,8000],('100','1000','8000'))
plt.yticks(range(-180,181,30))
plt.xlabel('Frequency [Hz]')
plt.ylabel('Angle [deg]')
plt.grid(b=True,which='major'); plt.grid(b=True,which='minor')
plt.title('Contour')
newData = Data.copy()
newData.drop("Freq",axis=1,inplace=True)
newData['001'] = newData['000'] # for data from -345 to 0
newData.insert(newData.columns.get_loc('180')+1,'-180',newData['180']) # for data from -180 to -165
lev_min,lev_max,levels = -70,-19,range(-70,-19,1)
CM = ax.contourf(X,Y,newData.transpose(),cmap=matplotlib.cm.jet,levels=levels,vmin=lev_min,vmax=lev_max)
plt.colorbar(CM,label='Magnitude [dB]',fraction=0.1)
outputFileName = os.path.join(os.getcwd(),'Contour.png')
plt.savefig(outputFileName,orientation='landscape',format='png')
plt.clf()
plt.cla()
I have created a scatter plot with bokeh. I want to generate a best fit polynomial curve on the data, and superimpose the curve on the cloud of points.
I have generated a 2nd degree polyline with polyfit:
import numpy as np
from bokeh.plotting import figure, output_file, show
model2 = np.poly1d(np.polyfit(df['Dist'],df['Speed'], 2)
polyline = np.linspace(1,16000,900)
graph = figure(title = "Speed function of flight distance")
graph.scatter(df['Dist'],df['Speed'])
show(graph)
What is the instruction for showing this polyline on top of the scatter plot? I see how to generate a line of best fit, my need is for a polyline.
As mentioned in the comments, graph.line() adds a line plot. Now, we just need an evenly spaced x-range over which we plot the fitted function:
import numpy as np
from bokeh.plotting import figure, output_file, show
#data generation
import pandas as pd
np.random.seed(123)
dist = np.sort(np.random.choice(range(100), 20, replace=False))
speed = 0.3 * dist ** 2 - 2.7 * dist - 1 + np.random.randint(-10, 10, dist.size)
df = pd.DataFrame({'Dist': dist, 'Speed': speed})
model2 = np.poly1d(np.polyfit(df['Dist'], df['Speed'], 2))
x_fit = np.linspace(df['Dist'].min(), df['Dist'].max(), 100)
graph = figure(title = "Speed function of flight distance")
graph.scatter(df['Dist'],df['Speed'])
graph.line(x_fit, model2(x_fit))
show(graph)
Sample output:
My problem is calculating the area under the peaks in my FT-IR analysis. I usually work with Origin but I would like to see if I get a better result working with Python. The data I'm using is linked here and the code is below. The problem I'm facing is, I don't know how to find the start and the end of the peak to calculate the area and how to set a Baseline.
I found this answered question about how to calculate the area under multiple peaks but I don't know how to implement it in my code: How to get value of area under multiple peaks
import numpy as np
from numpy import trapz
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv(r'CuCO3.csv', skiprows=5)
print(df)
Wavenumber = df.iloc[:,0]
Absorbance = df.iloc[:,1]
Wavenumber_Peak = Wavenumber.iloc[700:916] #Where the peaks start/end that i want to calculate the area
Absorbance_Peak = Absorbance.iloc[700:916] #Where the peaks start/end that i want to calculate the area
plt.figure()
plt.plot(Wavenumber_Peak, Absorbance_Peak)
plt.show()
Plot of the peaks to calculate the area:
Okay, I have quickly added the code from the other post to your beginning and checked that it works. Unfortunately, the file that you linked did not work with your code, so I had to change some stuff in the beginning to make it work (in a very unelegant way, because I do not really know how to work with dataframes). If your local file is different and processing the file in this way does not work, then just exchange my beginning by yours.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import peakutils
df = pd.read_csv(r'CuCO3.csv', skiprows=5)
data = np.asarray([[float(y) for y in x[0].split(",")] for x in df.to_numpy()])
Wavenumber = np.arange(700, 916)
Absorbance = data[700:916,1]
indices = peakutils.indexes(Absorbance, thres=0.35, min_dist=0.1)
peak_values = [Absorbance[i] for i in indices]
peak_Wavenumbers = [Wavenumber[i] for i in indices]
plt.figure()
plt.scatter(peak_Wavenumbers, peak_values)
plt.plot(Wavenumber, Absorbance)
plt.show()
ixpeak = Wavenumber.searchsorted(peak_Wavenumbers)
ixmin = np.array([np.argmin(i) for i in np.split(Absorbance, ixpeak)])
ixmin[1:] += ixpeak
mins = Wavenumber[ixmin]
# split up the x and y values based on those minima
xsplit = np.split(Wavenumber, ixmin[1:-1])
ysplit = np.split(Absorbance, ixmin[1:-1])
# find the areas under each peak
areas = [np.trapz(ys, xs) for xs, ys in zip(xsplit, ysplit)]
# plotting stuff
plt.figure(figsize=(5, 7))
plt.subplots_adjust(hspace=.33)
plt.subplot(211)
plt.plot(Wavenumber, Absorbance, label='trace 0')
plt.plot(peak_Wavenumbers, Absorbance[ixpeak], '+', c='red', ms=10, label='peaks')
plt.plot(mins, Absorbance[ixmin], 'x', c='green', ms=10, label='mins')
plt.xlabel('dep')
plt.ylabel('indep')
plt.title('Example data')
plt.ylim(-.1, 1.6)
plt.legend()
plt.subplot(212)
plt.bar(np.arange(len(areas)), areas)
plt.xlabel('Peak number')
plt.ylabel('Area under peak')
plt.title('Area under the peaks of trace 0')
plt.show()
I'm hoping to use matplotlib to plot inter-annual variation of monthly data (below). By passing c=ds['time.year'] in plt.scatter(), I achieve the desired outcome. However, I would like to be able to connect the points with an analogous plt.plot() call. Is this possible?
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
# create y data
y = []
for yr in range(10):
for mo in range(12):
y.append(yr+mo+(yr*mo)**2)
# create datetime vector
t = pd.date_range(start='1/1/2010', periods=120, freq='M')
# combine in DataArray
ds = xr.DataArray(y, coords={'time':t}, dims=['time'])
# scatter plot with color
im = plt.scatter(ds['time.month'], ds.values, c=ds['time.year'])
plt.colorbar(im)
Output:
I have tried the following, but it does not work:
plt.plot(ds['time.month'], ds.values, c=ds['time.year'])
You can create a norm mapping the range of years to the range of colors. The norm together with the used colormap, can server as input for a ScalarMapple to create an accompanying colorbar. With the default 'viridis' colormap the code could look like:
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import pandas as pd
import xarray as xr
y = []
for yr in range(10):
for mo in range(12):
y.append(yr + mo + (yr * mo) ** 2)
t = pd.date_range(start='1/1/2010', periods=120, freq='M')
ds = xr.DataArray(y, coords={'time': t}, dims=['time'])
norm = plt.Normalize(ds['time.year'].min(), ds['time.year'].max())
cmap = plt.cm.get_cmap('viridis')
for year in range(int(ds['time.year'].min()), int(ds['time.year'].max()) + 1):
plt.plot(ds['time.month'][ds['time.year'] == year],
ds.values[ds['time.year'] == year],
ls='-', marker='o', color=cmap(norm(year)))
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm))
plt.xticks(range(1, 13))
plt.show()
This question already has answers here:
Plot only on continent in matplotlib
(5 answers)
Closed 5 years ago.
I am trying to plot 1x1 degree data on a matplotlib.Basemap, and I want to fill the ocean with white. However, in order for the boundaries of the ocean to follow the coastlines drawn by matplotlib, the resolution of the white ocean mask should be much higher than the resolution of my data.
After searching around for a long time I tried the two possible solutions:
(1) maskoceans() and is_land() functions, but since my data is lower resolution than the map drawn by basemap it does not look good on the edges. I do not want to interpolate my data to higher resolution either.
(2) m.drawlsmask(), but since zorder cannot be assigned the pcolormesh plot always overlays the mask.
This code
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.basemap as bm
#Make data
lon = np.arange(0,360,1)
lat = np.arange(-90,91,1)
data = np.random.rand(len(lat),len(lon))
#Draw map
plt.figure()
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=72, lon_0=319)
m.drawcoastlines(linewidth=1, color='white')
data, lon = bm.addcyclic(data,lon)
x,y = m(*np.meshgrid(lon,lat))
plt.pcolormesh(x,y,data)
plt.savefig('1.png',dpi=300)
Produces this image:
Adding m.fillcontinents(color='white') produces the following image, which is what I need but to fill the ocean and not the land.
Edit:
m.drawmapboundary(fill_color='lightblue') also fills over land and can therefore not be used.
The desired outcome is that the oceans are white, while what I plotted with plt.pcolormesh(x,y,data) shows up over the lands.
I found a much nicer solution to the problem which uses the polygons defined by the coastlines in the map to produce a matplotlib.PathPatch that overlays the ocean areas. This solution has a much better resolution and is much faster:
from matplotlib import pyplot as plt
from mpl_toolkits import basemap as bm
from matplotlib import colors
import numpy as np
import numpy.ma as ma
from matplotlib.patches import Path, PathPatch
fig, ax = plt.subplots()
lon_0 = 319
lat_0 = 72
##some fake data
lons = np.linspace(lon_0-60,lon_0+60,10)
lats = np.linspace(lat_0-15,lat_0+15,5)
lon, lat = np.meshgrid(lons,lats)
TOPO = np.sin(np.pi*lon/180)*np.exp(lat/90)
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=lat_0, lon_0=lon_0, ax = ax)
m.drawcoastlines(linewidth=0.5)
x,y = m(lon,lat)
pcol = ax.pcolormesh(x,y,TOPO)
##getting the limits of the map:
x0,x1 = ax.get_xlim()
y0,y1 = ax.get_ylim()
map_edges = np.array([[x0,y0],[x1,y0],[x1,y1],[x0,y1]])
##getting all polygons used to draw the coastlines of the map
polys = [p.boundary for p in m.landpolygons]
##combining with map edges
polys = [map_edges]+polys[:]
##creating a PathPatch
codes = [
[Path.MOVETO] + [Path.LINETO for p in p[1:]]
for p in polys
]
polys_lin = [v for p in polys for v in p]
codes_lin = [c for cs in codes for c in cs]
path = Path(polys_lin, codes_lin)
patch = PathPatch(path,facecolor='white', lw=0)
##masking the data:
ax.add_patch(patch)
plt.show()
The output looks like this:
Original solution:
You can use an array with greater resolution in basemap.maskoceans, such that the resolution fits the continent outlines. Afterwards, you can just invert the mask and plot the masked array on top of your data.
Somehow I only got basemap.maskoceans to work when I used the full range of the map (e.g. longitudes from -180 to 180 and latitudes from -90 to 90). Given that one needs quite a high resolution to make it look nice, the computation takes a while:
from matplotlib import pyplot as plt
from mpl_toolkits import basemap as bm
from matplotlib import colors
import numpy as np
import numpy.ma as ma
fig, ax = plt.subplots()
lon_0 = 319
lat_0 = 72
##some fake data
lons = np.linspace(lon_0-60,lon_0+60,10)
lats = np.linspace(lat_0-15,lat_0+15,5)
lon, lat = np.meshgrid(lons,lats)
TOPO = np.sin(np.pi*lon/180)*np.exp(lat/90)
m = bm.Basemap(resolution='i',projection='laea', width=1500000, height=2900000, lat_ts=60, lat_0=lat_0, lon_0=lon_0, ax = ax)
m.drawcoastlines(linewidth=0.5)
x,y = m(lon,lat)
pcol = ax.pcolormesh(x,y,TOPO)
##producing a mask -- seems to only work with full coordinate limits
lons2 = np.linspace(-180,180,10000)
lats2 = np.linspace(-90,90,5000)
lon2, lat2 = np.meshgrid(lons2,lats2)
x2,y2 = m(lon2,lat2)
pseudo_data = np.ones_like(lon2)
masked = bm.maskoceans(lon2,lat2,pseudo_data)
masked.mask = ~masked.mask
##plotting the mask
cmap = colors.ListedColormap(['w'])
pcol = ax.pcolormesh(x2,y2,masked, cmap=cmap)
plt.show()
The result looks like this: