Last week I asked a question about finding a way to interpolate a surface from multiple curves (data from multiple Excel files) and someone referred me to a question which explains how to use scipy.interpolate.RBFInterpolator (How can I perform two-dimensional interpolation using scipy?).
I tried this method but I am getting a bad surface fitting (see the pictures below). Does anyone understand what is wrong with my code? I tried to change the kernel parameter but "linear" seems to be the best. Am I doing an error when I am using np.meshgrid? Thanks for the help.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from scipy.interpolate import RBFInterpolator
fig = plt.figure(figsize=(15,10),dpi=400)
ax = fig.gca(projection='3d')
# List all the results files in the folder (here 'Sress_Strain') to plot them.
results_list = os.listdir(r"C:/Users/bdhugu/Desktop/Strain_Stress")
for i in range(len(results_list)):
if i == 0:
results = pd.read_excel(r"C:/Users/bdhugu/Desktop/Strain_Stress/"+results_list[i])
strain = results["Strain (mm/mm)"]
stress = results["Stress (MPa)"]
strain_rate = results["Strain rate (s^-1)"]
if i>0:
new_results = pd.read_excel(r"C:/Users/bdhugu/Desktop/Strain_Stress/"+results_list[i])
new_strain = new_results["Strain (mm/mm)"]
new_stress = new_results["Stress (MPa)"]
new_strain_rate = new_results["Strain rate (s^-1)"]
strain = strain.append(new_strain, ignore_index=False)
stress = stress.append(new_stress, ignore_index=False)
strain_rate = strain_rate.append(new_strain_rate,ignore_index=False)
# RBFINTERPOLATOR METHOD
# ----------------------------------------------------------------------------
x_scattered = strain
y_scattered = strain_rate
z_scattered = stress
scattered_points = np.stack([x_scattered.ravel(), y_scattered.ravel()],-1)
x_dense, y_dense = np.meshgrid(np.linspace(min(strain), max(strain), 20),np.linspace(min(strain_rate), max(strain_rate), 21))
dense_points = np.stack([x_dense.ravel(), y_dense.ravel()], -1)
interpolation = RBFInterpolator(scattered_points, z_scattered.ravel(), smoothing = 0, kernel='linear',epsilon=1, degree=0)
z_dense = interpolation(dense_points).reshape(x_dense.shape)
fig = plt.figure(figsize=(15,10),dpi=400)
ax = plt.axes(projection='3d')
ax.plot_surface(x_dense, y_dense, z_dense ,cmap='viridis', edgecolor='none')
ax.invert_xaxis()
ax.set_title('Surface plot')
plt.show()
Data to interpolate
Surface fitting with RBFInterpolator
Related
The HDF-EOS website presents some useful code examples to process satellite data. I tried to utilize the OMI-based level2 retrieved datasets (swap-like). However, when I want to visualize the data using Cartopy which I though was better than basemap in Python, issue arise.
Here is the example code. I have uploaded the case data here, anyone interested can download it.
## related libraries
from netCDF4 import Dataset
import cartopy
from cartopy.io.img_tiles import StamenTerrain
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from mpl_toolkits.basemap import Basemap
## read the data
dset = f[DATAFIELD_NAME]
data = dset[:]
lat = f[LAT_NAME][:]
lon = f[LON_NAME][:]
title = dset.attrs['Title'].decode()
units = dset.attrs['Units'].decode()
_FillValue = dset.attrs['_FillValue']
add_offset = dset.attrs['Offset']
scale_factor = dset.attrs['ScaleFactor']
data[data == _FillValue] = np.nan
data = data * scale_factor + add_offset
data = np.ma.masked_where(np.isnan(data), data)
# Subset data at nCandidate = 0
data = data[0,:,:]
lon = lon[0,:,:]
lat = lat[0,:,:]
fig = plt.figure(figsize=(12,5))
ax1 = plt.subplot(121)
m = Basemap(projection='cyl', resolution='l',
llcrnrlat=-90, urcrnrlat = 90,
llcrnrlon=-180, urcrnrlon = 180)
m.drawcoastlines(linewidth=0.5)
m.drawparallels(np.arange(-90., 120., 30.), labels=[1, 0, 0, 0])
m.drawmeridians(np.arange(-180, 180., 45.), labels=[0, 0, 0, 1])
m.scatter(lon, lat, c=data, s=0.1, cmap=plt.cm.jet,
edgecolors=None, linewidth=0)
cb = m.colorbar()
cb.set_label(units)
plt.title("BASEMAP")
ax2 = plt.subplot(122,projection=ccrs.PlateCarree())
ax2.scatter(lon,lat,c=data,s=0.1,zorder =4,\
transform=ccrs.PlateCarree(), cmap = plt.cm.jet)
ax2.set_global()
plt.title("CARTOPY")
I don't know how to fix this issue?
I'm not sure why your example is not working, I can get a figure using the following code that is basically the same as yours:
from matplotlib.colors import LogNorm
FILE_NAME = 'OMI-Aura_L2G-OMNO2G_2016m0526_v003-2016m0527t184011.he5'
path = '/HDFEOS/GRIDS/ColumnAmountNO2/Data Fields/'
DATAFIELD_NAME = path + 'ColumnAmountNO2'
f=h5py.File(FILE_NAME, mode='r')
dset = f[DATAFIELD_NAME]
data =dset[:].astype(np.float64)
# Retrieve any attributes that may be needed later.
# String attributes actually come in as the bytes type and should
# be decoded to UTF-8 (python3).
scale = f[DATAFIELD_NAME].attrs['ScaleFactor']
offset = f[DATAFIELD_NAME].attrs['Offset']
missing_value = f[DATAFIELD_NAME].attrs['MissingValue']
fill_value = f[DATAFIELD_NAME].attrs['_FillValue']
title = f[DATAFIELD_NAME].attrs['Title'].decode()
units = f[DATAFIELD_NAME].attrs['Units'].decode()
# Retrieve the geolocation data.
latitude = f[path + 'Latitude'][:]
longitude = f[path + 'Longitude'][:]
latitude=latitude[0,:,:]
longitude=longitude[0,:,:]
data=data[0,:,:]
data[data == missing_value] = np.nan
data[data == fill_value] = np.nan
data = scale * (data - offset)
datam = np.ma.masked_where(np.isnan(data), data)
#Figure
fig=plt.figure()
axs=plt.subplot(111,projection=ccrs.PlateCarree())
pcm=axs.scatter(longitude,latitude,c=datam,s=0.1,cmap='viridis',norm=LogNorm(vmin=1e14,vmax=1e17))
axs.add_feature(cfeature.COASTLINE)
#colorbar
fig.subplots_adjust(right=0.87)
cbar_ax = fig.add_axes([0.89, 0.3, 0.04, 0.4])
cbar=fig.colorbar(pcm,cax=cbar_ax, extend='both', orientation='vertical')
this is the result:
I used a log color scale and set some reasonable values for your colorscale to make the plot readable. In addition I used viridis color map because jet is not a perceptually uniform colormap, and it should not be used!
p.s.
It took a while for me to load your dataset because your example is not clear at all. For example you didn't specify which datafield were you plotting or how to read in the dataset. Try to edit your question to make it more clear!
I am using a github repository called ptitprince, which is derived from seaborn and matplotlib, to generate graphs.
For example, this is the code using the ptitprince repo:
# coding: utf8
import pandas as pd
import ptitprince as pt
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
df = pd.read_csv ("u118phag.csv", sep= ",")
df.head()
savefigs = True
figs_dir = 'figs'
if savefigs:
# Make the figures folder if it doesn't yet exist
if not os.path.isdir('figs'):
os.makedirs('figs')
#automation
f, ax = plt.subplots(figsize=(4, 5))
#f.subplots_adjust(hspace=0,wspace=0)
dx = "Treatment"; dy = "score"; ort = "v"; pal = "Set2"; sigma = .2
ax=pt.RainCloud(x = dx, y = dy, data = df, palette = pal, bw = sigma,
width_viol = .6, ax = ax, move=.2, offset=.1, orient = ort, pointplot = True)
f.show()
if savefigs:
f.savefig('figs/figure20.png', bbox_inches='tight', dpi=500)
which generates the following graph
The raw code not using ptitprince is as follows and produces the same graph as above:
# coding: utf8
import pandas as pd
import ptitprince as pt
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
df = pd.read_csv ("u118phag.csv", sep= ",")
df.head()
savefigs = True
figs_dir = 'figs'
if savefigs:
# Make the figures folder if it doesn't yet exist
if not os.path.isdir('figs'):
os.makedirs('figs')
f, ax = plt.subplots(figsize=(7, 5))
dy="Treatment"; dx="score"; ort="h"; pal = sns.color_palette(n_colors=1)
#adding color
pal = "Set2"
f, ax = plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot( x = dx, y = dy, data = df, palette = pal, bw = .2, cut = 0.,
scale = "area", width = .6, inner = None, orient = ort)
ax=sns.stripplot( x = dx, y = dy, data = df, palette = pal, edgecolor = "white",
size = 3, jitter = 1, zorder = 0, orient = ort)
ax=sns.boxplot( x = dx, y = dy, data = df, color = "black", width = .15, zorder = 10,\
showcaps = True, boxprops = {'facecolor':'none', "zorder":10},\
showfliers=True, whiskerprops = {'linewidth':2, "zorder":10},\
saturation = 1, orient = ort)
if savefigs:
f.savefig('figs/figure21.png', bbox_inches='tight', dpi=500)
Now, what I'm trying to do is to figure out how to modify the graph so that I can (1) move the plots closer together, so there is not so much white space between them, and (2) shift the x-axis to the right, so that I can make the distribution (violin) plot wider without it getting cut in half by the y-axis.
I have tried to play around with subplots_adjust() as you can see in the first box of code, but I receive an error. I cannot figure out how to appropriately use this function, or even if that will actually bring the different graphs closer together.
I also know that I can increase the distribution size by increasing this value width = .6, but if I increase it too high, the distribution plot begins to being cut off by the y-axis. I can't figure out if I need to adjust the overall plot using the plt.subplots,or if I need to move each individual plot.
Any advice or recommendations on how to change the visuals of the graph? I've been staring at this for awhile, and I can't figure out how to make seaborn/matplotlib play nicely with ptitprince.
You may try to change the interval of X-axis being shown using ax.set_xbound (put a lower value than you currently have for the beginning).
I am trying to plot a heatmap and I found this code online although I am having some difficulty using it. I am trying to to do hierarchical clustering and test gene methylation of one another, I made a DataFrame using pandas where I have Betavalues and Genes as separate columns (df4). Later I converted it to a matrix like scipy prefers.I tried running the code with my matrix but it gave me a value error saying "ValueError: could not convert string to float: 'tAKR'", I already removed the N/A and anything that is not a gene or a valid Beta value.
I was wondering if you may have any suggestions?
Below I have attached a picture of what my dataframe looks like before changing into a matrix. [![enter image description here][1]][1]
import scipy
import pylab
import scipy.cluster.hierarchy as sch
df5 = df4.as_matrix()
# Generate random features and distance matrix.
x = scipy.rand(40)
D = scipy.zeros([40,40])
for i in range(40):
for j in range(40):
D[i,j] = abs(x[i] - x[j])
# Compute and plot first dendrogram.
fig = pylab.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(df5, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(df5, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = D[idx1,:]
D = D[:,idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')
You may be interested in an out-of-the-box solution if you're not comfortable with this. Check out seaboarn's clustermap, which accepts pandas data frames as inputs.
>>> import seaborn as sns; sns.set()
>>> flights = sns.load_dataset("flights")
>>> flights = flights.pivot("month", "year", "passengers")
>>> g = sns.clustermap(flights)
I am not affiliated with Seaborn.
I'm trying to do a quite simple scatter plot with error bars and semilogy scale. What is a little bit different from tutorials I have found is that the color of the scatterplot should trace a different quantity. On one hand, I was able to do a scatterplot with the errorbars with my data, but just with one color. On the other hand, I realized a scatterplot with the right colors, but without the errorbars.
I'm not able to combine the two different things.
Here an example using fake data:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
n=100
Lx_gas = 1e40*np.random.random(n) + 1e37
Tx_gas = np.random.random(n) + 0.5
Lx_plus_error = Lx_gas
Tx_plus_error = Tx_gas/2.
Tx_minus_error = Tx_gas/4.
#actually positive numbers, this is the quantity that should be traced by the
#color, in this example I use random numbers
Lambda = np.random.random(n)
#this is actually different from zero, but I want to be sure that this simple
#code works with the log axis
Lx_minus_error = np.zeros_like(Lx_gas)
#normalize the color, to be between 0 and 1
colors = np.asarray(Lambda)
colors -= colors.min()
colors *= (1./colors.max())
#build the error arrays
Lx_error = [Lx_minus_error, Lx_plus_error]
Tx_error = [Tx_minus_error, Tx_plus_error]
##--------------
##important part of the script
##this works, but all the dots are of the same color
#plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error,fmt='o')
##this is what is should be in terms of colors, but it is without the error bars
#plt.scatter(Tx_gas, Lx_gas, marker='s', c=colors)
##what I tried (and failed)
plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error,\
color=colors, fmt='o')
ax = plt.gca()
ax.set_yscale('log')
plt.show()
I even tried to plot the scatterplot after the errorbar, but for some reason everything plotted on the same window is put in background with respect to the errorplot.
Any ideas?
Thanks!
You can set the color to the LineCollection object returned by the errorbar as described here.
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
n=100
Lx_gas = 1e40*np.random.random(n) + 1e37
Tx_gas = np.random.random(n) + 0.5
Lx_plus_error = Lx_gas
Tx_plus_error = Tx_gas/2.
Tx_minus_error = Tx_gas/4.
#actually positive numbers, this is the quantity that should be traced by the
#color, in this example I use random numbers
Lambda = np.random.random(n)
#this is actually different from zero, but I want to be sure that this simple
#code works with the log axis
Lx_minus_error = np.zeros_like(Lx_gas)
#normalize the color, to be between 0 and 1
colors = np.asarray(Lambda)
colors -= colors.min()
colors *= (1./colors.max())
#build the error arrays
Lx_error = [Lx_minus_error, Lx_plus_error]
Tx_error = [Tx_minus_error, Tx_plus_error]
sct = plt.scatter(Tx_gas, Lx_gas, marker='s', c=colors)
cb = plt.colorbar(sct)
_, __ , errorlinecollection = plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error, marker = '', ls = '', zorder = 0)
error_color = sct.to_rgba(colors)
errorlinecollection[0].set_color(error_color)
errorlinecollection[1].set_color(error_color)
ax = plt.gca()
ax.set_yscale('log')
plt.show()
I have plot a regression line. now I want to extrapolate it. I have tried with np.arange but it didn't work for me? I want to extend the line.
Another question is how i can make proper uncertainty intervals instead of adding a new formula.
import numpy as np
import datetime
import matplotlib.dates
import matplotlib.pyplot as plt
from scipy import polyfit, polyval
kwargs = dict(delimiter = '\t',\
skip_header = 0,\
missing_values = 'NaN',\
converters = {0:matplotlib.dates.strpdate2num('%d-%m-%Y %H:%M')},\
dtype = float,\
names = True,\
)
ratingcats = np.genfromtxt('C:\Users\ker\Documents\Discharge_and_stageheight_Catsop.txt',**kwargs)
dis_rat = ratingcats['discharge'] #change names of collumns
stage_rat = ratingcats['stage'] - 79.331
#mask NaN
dis_ratM = np.ma.masked_array(dis_rat,mask=np.isnan(dis_rat)).compressed()
stage_ratM = np.ma.masked_array(stage_rat,mask=np.isnan(dis_rat)).compressed()
#sort
sort_ind = np.argsort(stage_ratM)
stage_ratM = stage_ratM[sort_ind]
dis_ratM = dis_ratM[sort_ind]
#regression
a1,b1,c1 = polyfit(stage_ratM, dis_ratM, 2)
discharge_pred = polyval([a1,b1,c1],stage_ratM)
print 'regression coefficients'
print (a1,b1,c1)
#create upper and lower uncertainty
upper = discharge_pred*1.15
lower = discharge_pred*0.85
#create scatterplot
plt.scatter(stage_rat,dis_rat,color='b',label='Rating curve')
plt.plot(stage_ratM,discharge_pred,'r-',label='regression line')
plt.plot(stage_ratM,upper,'r--',label='15% error')
plt.plot(stage_ratM,lower,'r--')
plt.title('Rating curve Catsop')
plt.ylabel('discharge')
plt.ylim(0,1)
plt.xlabel('stageheight[m]')
plt.legend(loc='upper left', title='Legend')
plt.grid(True)
plt.show()
Instead of only using stage_ratM for your prediction, use np.arange:
prediction_extrapolation = polyval([a1,b1,c1], np.arange(60,100,1))
plt.plot(np.arange(60,100,1),prediction_extrapolation,'r-',label='regression line')
For your second question you might want to look into the command plt.errorbar Example.