Python Matplotlib Difference between two NetCDF datasets - python

I am trying to map the difference between climate simulation data and observed data over a set geographical area.
To create the map of just the climate simulation I am using this code
import matplotlib.pyplot as plt
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
def main():
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(grid_longitude=lambda v: 31 <= v <= 36.5, \
grid_latitude=lambda v: -18. <= v <= -8.)
CCCma = CCCma.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#Convert units to match, CORDEX data is in Kelvin but Observed data in Celsius, we would like to show all data in Celsius
CCCma.convert_units('Celsius')
#plot map with physical features
cmap = plt.cm.afmhot_r
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([31, 36.5, -8,-18])
#set axis tick marks
ax.set_xticks([32, 34, 36])
ax.set_yticks([-9, -11, -13, -15, -17])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
data = CCCma
#take mean of data over all time
plot = iplt.contourf(data.collapsed('time', iris.analysis.MEAN), \
cmap=cmap, levels=[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],\
extend='both')
#add colour bar index
plt.colorbar(plot)
#give map a title
plt.title('RCP4.5 Mean Temperature 1989-2008', fontsize=10)
plt.show()
if __name__ == '__main__':
main()
How can I amend this to take the difference between the two datasets? I tried this code:
import matplotlib.pyplot as plt
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(grid_longitude=lambda v: 31 <= v <= 36.5, \
grid_latitude=lambda v: -18. <= v <= -8.)
CCCma = CCCma.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#Convert units to match
CCCma.convert_units('Celsius')
CRU.convert_units('Celsius')
#Take difference between two datasets
Bias_CCCma = CCCma-CRU
#PART 4: PLOT MAP
#plot map with physical features
cmap = plt.cm.afmhot_r
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([31, 36.5, -8,-18])
#set axis tick marks
ax.set_xticks([32, 34, 36])
ax.set_yticks([-9, -11, -13, -15, -17])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
data = Bias_CCCma
#take mean of data over all time
plot = iplt.contourf(data.collapsed('time', iris.analysis.MEAN), \
cmap=cmap, levels=[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],\
extend='both')
#add colour bar index
plt.colorbar(plot)
#give map a title
plt.title('RCP4.5 Mean Temperature 1989-2008', fontsize=10)
plt.show()
if __name__ == '__main__':
main()
However this gives me the following error:
ValueError: This operation cannot be performed as there are differing coordinates (grid_latitude, grid_longitude, time) remaining which cannot be ignored.
I was pretty sure this wasn't going to be so simple, but I'm not sure how to fix it. Any ideas? TIA!

My guess is that CCCma and CRU are on different grids, so when you try to subtract them you get an error. You probably need to interpolate them to the same grid first (otherwise, how would iris know which grid you want the result to lie on?).

Iris is very strict about matching up the cube coordinates for binary operations and there is an open issue discussing whether and how to make it more flexible ready for version 2. In the meantime, if your cubes are the same shape and you don't mind loading the data, you could just do
Bias_CCCma = CCCma - CRU.data
If your cubes are different shapes (i.e. the models are on different grids, as Jeremy suggested) or you don't want to load the data, there are a few things to look at:
If the grids are different then you will need to regrid one of the cubes to match the other.
For the subtraction operation, the grid coordinate names need to match up. If you are confident that grid_latitude and grid_longitude mean the same as latitude and longitude, you can rename the grid coordinates on one of your cubes. You will also need to ensure the other coordinate metadata matches (e.g. var_name is often an issue).
The time coordinate coming up in your error message is almost certainly due to the unit mismatch you identified in your previous question. I think this issue should go away if you reorder your code to do the time averaging first and then take the difference (the binary operations don't care so much about scalar coordinates).

Thank you all for your answers. In the end I needed to regrid the data first, as #RuthC suggested.
So the code changed to look like this:
import matplotlib.pyplot as plt
import matplotlib.cm as mpl_cm
import numpy as np
from cf_units import Unit
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
import iris.coord_categorisation as iriscc
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
iris.FUTURE.netcdf_promote=True
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0899345/Climate_Modelling/AFR_44_tasmax/ERAINT/1979-2012/tasmax_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CORDEX.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CORDEX.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CORDEX.remove_coord('latitude')
CORDEX.remove_coord('longitude')
CORDEX.remove_coord('grid_latitude')
CORDEX.remove_coord('grid_longitude')
CORDEX.add_dim_coord(lats, 1)
CORDEX.add_dim_coord(lons, 2)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#PART 3: FORMAT DATA
#Regrid observed data onto rotated pole grid
CRU = CRU.regrid(CORDEX, iris.analysis.Linear())
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36.5, \
latitude=lambda v: -17. <= v <= -9.)
CORDEX = CORDEX.extract(Malawi)
CRU = CRU.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1990<= cell.point.year <= 2008)
CORDEX = CORDEX.extract(t_constraint)
CRU = CRU.extract(t_constraint)
#Convert units to match
CORDEX.convert_units('Celsius')
CRU.unit = Unit('Celsius') # This fixes CRU which is in 'Degrees Celsius' to read 'Celsius'
#add years to data
iriscc.add_year(CORDEX, 'time')
iriscc.add_year(CRU, 'time')
#We are interested in plotting the data for the average of the time period.
CORDEX = CORDEX.collapsed('time', iris.analysis.MEAN)
CRU = CRU.collapsed(['time'], iris.analysis.MEAN)
#Take difference between two datasets
Bias = CRU-CORDEX
#PART 4: PLOT MAP
#load color palette
colourA = mpl_cm.get_cmap('brewer_YlOrRd_09')
#plot map with physical features
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([32.5, 36., -9, -17])
#set axis tick marks
ax.set_xticks([33, 34, 35])
ax.set_yticks([-10, -12, -14, -16])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
#plot data and set colour range
plot = iplt.contourf(CORDEX, cmap=colourA, levels=np.arange(13,32,1), extend='both')
#add colour bar index and a label
plt.colorbar(plot, label='Celsius')
#give map a title
plt.title('Tasmax 1990-2008 - CanRCM4_ERAINT ', fontsize=10)
#save the image of the graph and include full legend
plt.savefig('ERAINT_CCCma_Tasmax_MAP_Annual', bbox_inches='tight')
plt.show()
if __name__ == '__main__':
main()

Related

How to implement kmeans clustering as a feature for classification techniques in SVM?

Ive already created a clustering and saved the model but im confused what should i do with this model and how to use it as a feature for classification.
This clustering is based on the coordinate of a crime place. after the data has been clustered, i want to use the clustered model as features in SVM.
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
import xlrd
import pickle
import tkinter as tk
from tkinter import *
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
#kmeans section
#Creating and labelling latitudes of X and Y and plotting it
data=pd.read_excel("sanfrancisco.xlsx")
x1=data['X']
y1=data['Y']
X = np.array(list(zip(x1,y1)))
# Elbow method
from sklearn.cluster import KMeans
wcss = [] #empty string
# to check in range for 10 cluster
for i in range(1,11):
kmeans = KMeans(n_clusters=i, init='k-means++') # will generate centroids
kmeans.fit(X)
wcss.append(kmeans.inertia_) # to find euclidean distance
plot1 = plt.figure(1)
plt.xlabel("Number of Clusters")
plt.ylabel("Euclidean Distance")
plt.plot(range(1,11), wcss)
k = 3
# data visual section.. Eg: how many crimes in diff month, most number of crime in a day in a week
# most number crime in what address, most number of crimes in what city, how many crime occur
# in how much time. , etc..
# X coordinates of random centroids
C_x = np.random.randint(0, np.max(X)-20, size=k)
# Y coordinates of random centroids
C_y = np.random.randint(0, np.max(X)-20, size=k)
C = np.array(list(zip(C_x,C_y)), dtype=np.float32)
print("Initial Centroids")
print(C)
# n_clustersr takes numbers of clusters, init chooses random data points for the initial centroids
# in default sckit provides 10 times of count and chooses the best one, in order to elak n_init assigned to 1
model = KMeans(n_clusters=k, init='random', n_init=1)
model.fit_transform(X)
centroids = model.cluster_centers_ # final centroids
rgb_colors = {0.: 'y',
1.: 'c',
2.: 'fuchsia',
}
if k == 4:
rgb_colors[3.] = 'lime'
if k == 6:
rgb_colors[3.] = 'lime'
rgb_colors[4.] = 'orange'
rgb_colors[5.] = 'tomato'
new_labels = pd.Series(model.labels_.astype(float)) # label that predicted by kmeans
plot2 = plt.figure(2)
plt.scatter(x1, y1, c=new_labels.map(rgb_colors), s=20)
plt.scatter(centroids[:, 0], centroids[:, 1], marker='*', c='black', s=200 )
plt.xlabel('Final Cluster Centers\n Iteration Count=' +str(model.n_iter_)+
'\n Objective Function Value: ' +str(model.inertia_))
plt.ylabel('y')
plt.title("k-Means")
plt.show()
# save the model to disk
filename = 'clusteredmatrix.sav'
pickle.dump(model, open(filename,'wb'))
Your problem is not much clear, but if you want to see the behavior of clusters, I recommend you to use a tool like Weka, so that you can freely cluster them and get meaningful inferences before going into complex coding stuff!

Distribution plot is showing flat pdf

I tried to plot the Probability Density Function (PDF) plot of my data after finding the best parameters, but the plot is showing a flat line instead of a curve.
Is it a matter of scaling?
Is it a problem of Continuous or Discrete data? Data file is available here
The purpose here is to get the best distribution fittings and then plot PDF function.
My data values are so small like: 0.21, 1.117 .etc. The data statistics and PDF plots are shown below:
My script is given below:
from time import time
from datetime import datetime
start_time = datetime.now()
import pandas as pd
pd.options.display.float_format = '{:.4f}'.format
import numpy as np
import pickle
import scipy
import scipy.stats
import matplotlib.pyplot as plt
data= pd.read_csv("line_RXC_data.csv",usecols=['R'],parse_dates=True, squeeze=True)
df=data
y_std=df
# del yy
import warnings
warnings.filterwarnings("ignore")
# Create an index array (x) for data
y=df
#
# Create an index array (x) for data
x = np.arange(len(y))
size = len(y)
#simple visualisation of the data
plt.hist(y)
plt.title("Histogram of resistance ")
plt.xlabel("Resistance data visualization ")
plt.ylabel("Frequency")
plt.show()
y_df = pd.DataFrame(y)
tt=y_df.describe()
print(tt)
dist_names = [
'foldcauchy',
'beta',
'expon',
'exponnorm',
'norm',
'lognorm',
'dweibull',
'pareto',
'gamma'
]
x = np.arange(len(df))
size = len(df)
y_std = df
y=df
chi_square = []
p_values = []
# Set up 50 bins for chi-square test
# Observed data will be approximately evenly distrubuted aross all bins
percentile_bins = np.linspace(0,100,51)
percentile_cutoffs = np.percentile(y_std, percentile_bins)
observed_frequency, bins = (np.histogram(y_std, bins=percentile_cutoffs))
cum_observed_frequency = np.cumsum(observed_frequency)
# Loop through candidate distributions
for distribution in dist_names:
s1 = time()
# Set up distribution and get fitted distribution parameters
dist = getattr(scipy.stats, distribution)
# print("1")
param = dist.fit(y_std)
# print("2")
# Obtain the KS test P statistic, round it to 5 decimal places
p = scipy.stats.kstest(y_std, distribution, args=param)[1]
p = np.around(p, 5)
p_values.append(p)
# print("3")
# Get expected counts in percentile bins
# This is based on a 'cumulative distrubution function' (cdf)
cdf_fitted = dist.cdf(percentile_cutoffs, *param[:-2], loc=param[-2],
scale=param[-1])
# print("4")
expected_frequency = []
for bin in range(len(percentile_bins)-1):
expected_cdf_area = cdf_fitted[bin+1] - cdf_fitted[bin]
expected_frequency.append(expected_cdf_area)
# calculate chi-squared
expected_frequency = np.array(expected_frequency) * size
cum_expected_frequency = np.cumsum(expected_frequency)
ss = sum (((cum_expected_frequency - cum_observed_frequency) ** 2) / cum_observed_frequency)
chi_square.append(ss)
print(f"chi_square {distribution} time: {time() - s1}")
# print("std of predicted probability : ", np.std(cum_observed_frequency))
# Collate results and sort by goodness of fit (best at top)
results = pd.DataFrame()
results['Distribution'] = dist_names
results['chi_square'] = chi_square
results['p_value'] = p_values
results.sort_values(['chi_square'], inplace=True)
# Report results
print ('\nDistributions sorted by goodness of fit:')
print ('----------------------------------------')
print (results)
#%%
# Divide the observed data into 100 bins for plotting (this can be changed)
number_of_bins = 100
bin_cutoffs = np.linspace(np.percentile(y,0), np.percentile(y,99),number_of_bins)
# Create the plot
plt.figure(figsize=(7, 4))
h = plt.hist(y, bins = bin_cutoffs, color='0.70')
# Get the top three distributions from the previous phase
number_distributions_to_plot = 5
dist_names = results['Distribution'].iloc[0:number_distributions_to_plot]
#%%
# Create an empty list to stroe fitted distribution parameters
parameters = []
# Loop through the distributions ot get line fit and paraemters
for dist_name in dist_names:
# Set up distribution and store distribution paraemters
dist = getattr(scipy.stats, dist_name)
param = dist.fit(y)
parameters.append(param)
# Get line for each distribution (and scale to match observed data)
pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1])
scale_pdf = np.trapz (h[0], h[1][:-1]) / np.trapz (pdf_fitted, x)
pdf_fitted *= scale_pdf
# Add the line to the plot
plt.plot(pdf_fitted, label=dist_name)
# Set the plot x axis to contain 99% of the data
# This can be removed, but sometimes outlier data makes the plot less clear
plt.xlim(0,np.percentile(y,99))
# Add legend and display plotfig = plt.figure(figsize=(8,5))
plt.legend()
plt.title(u'Data distribution charateristics) \n' )
plt.xlabel(u'Resistance')
plt.ylabel('Frequency )')
plt.show()
# Store distribution paraemters in a dataframe (this could also be saved)
dist_parameters = pd.DataFrame()
dist_parameters['Distribution'] = (
results['Distribution'].iloc[0:number_distributions_to_plot])
dist_parameters['Distribution parameters'] = parameters
# Print parameter results
print ('\nDistribution parameters:')
print ('------------------------')
for index, row in dist_parameters.iterrows():
print ('\nDistribution:', row[0])
print ('Parameters:', row[1] )
If you look at the following categorical frequency analysis, you'll see that you have only 15 distinct values spread across the range with large gaps in between—not a continuum of values. Half the observations have the value 0.211, with another ~36% occurring at the value 1.117, ~8% at 0.194, and ~4% at 0.001. I think it's a mistake to treat this as continuous data.

How to interpolate numpy.polyval and numpy.polyfit python

I did a numpy.polyfit() for latitude, longitude, & altitude data for a satellite orbit and interpolated (50 points) with numpy.polyval().
Now, I want to just take a window (0-4.5 degrees longitude) and do a higher resolution interpolation (6,000 points). I think that I need to use the fit coefficients from the first low res fit in order to interpolate for my longitude window, and I am not quite sure how to do this.
Inputs:
lat = [27.755611104020687, 22.50661883405905, 17.083576087905502, 11.53891099628959, 5.916633366002468, 0.2555772624429494, -5.407902834141322, -11.037514984810027, -16.594621304857206, -22.03556688048686, -27.308475759820045, -32.34927891621322, -37.07690156937186, -41.38803163295967, -45.15306971601912, -48.21703193866987, -50.41165326774015, -51.58419672864487, -51.63883932997542, -50.57025116952513, -48.46557920053242, -45.47329014246061, -41.76143266388077, -37.48707787049647, -32.782653540783, -27.754184631685046, -22.48503337048438, -17.041097574740743, -11.475689837873944, -5.833592289780744, -0.1543286595142316, 5.525119007560692, 11.167878192881306, 16.73476477885508, 22.18160021405449, 27.455997555900108, 32.493386953033685, 37.21222272985329, 41.508824407948275, 45.25350232626601, 48.291788915858554, 50.45698534747271, 51.59925055739275, 51.62660832560593, 50.53733379179681, 48.420673231121725, 45.42531420150485, 41.71819693220144, 37.45473807165676, 32.76569228387106]
lon = [-109.73105744378498, -104.28690174554579, -99.2435132929552, -94.48533149079628, -89.91054414962821, -85.42671400689177, -80.94616150449806, -76.38135021210172, -71.6402674905218, -66.62178379632216, -61.21120467960157, -55.27684029674759, -48.66970878028004, -41.23083703244677, -32.813881865289346, -23.332386757370532, -12.832819226213942, -1.5659455609661785, 10.008077792630402, 21.33116444634303, 31.92601575632583, 41.51883213364072, 50.04498630545507, 57.58103957109249, 64.26993028992476, 70.2708323505337, 75.73441871754586, 80.7944079829813, 85.56734813043659, 90.1558676264546, 94.65309120129724, 99.14730128118617, 103.72658922048785, 108.48349841714494, 113.51966824008079, 118.95024882101737, 124.9072309203375, 131.5395221402974, 139.00523971191907, 147.44847902856114, 156.95146022590976, 167.46163867248032, 178.72228750873975, -169.72898181991064, -158.44642409799974, -147.8993300787564, -138.35373014113995, -129.86955508919888, -122.36868103811106, -115.70852432245486]
alt = [374065.49207488785, 372510.1635949105, 371072.75959230476, 369836.3092635453, 368866.7921820211, 368209.0950216997, 367884.3703536549, 367888.97894243425, 368195.08833668986, 368752.88080031495, 369494.21701128664, 370337.49662954226, 371193.3839051864, 371971.0136622536, 372584.272228585, 372957.752022573, 373032.0104747458, 372767.8112563471, 372149.0940816824, 371184.49208500446, 369907.2992362557, 368373.8795969478, 366660.5935723809, 364859.4071422184, 363072.42955020745, 361405.69765685993, 359962.58417682414, 358837.24421522504, 358108.5277743581, 357834.7679493668, 358049.8054538341, 358760.531463618, 359946.1257064284, 361559.04646970675, 363527.70518032915, 365760.6377191965, 368151.8843206526, 370587.2165838985, 372950.8014553002, 375131.8814988529, 377031.06540952163, 378565.8596562773, 379675.13241518533, 380322.2707576381, 380496.8682141012, 380214.86538256245, 379517.14674525027, 378466.68079100474, 377144.36811517406, 375643.83731560566]
myOrbitJ2000Time =[ 20027712., 20027713., 20027714., 20027715., 20027716.,
20027717., 20027718., 20027719., 20027720., 20027721.,
20027722., 20027723., 20027724., 20027725., 20027726.,
20027727., 20027728., 20027729., 20027730., 20027731.,
20027732., 20027733., 20027734., 20027735., 20027736.,
20027737., 20027738., 20027739., 20027740., 20027741.,
20027742., 20027743., 20027744., 20027745., 20027746.,
20027747., 20027748., 20027749., 20027750., 20027751.,
20027752., 20027753., 20027754., 20027755., 20027756.,
20027757., 20027758., 20027759., 20027760., 20027761.]
Code:
deg = 30 #polynomial degree for fit
fittime = myOrbitJ2000Time - myOrbitJ2000Time[0]
'Latitude Interpolation'
fitLat = np.polyfit(fittime, lat, deg)
polyval_lat = np.polyval(fitLat,fittime)
'Longitude Interpolation'
fitLon = np.polyfit(fittime, lon, deg)
polyval_lon = np.polyval(fitLon,fittime)
'Altitude Interpolation'
fitAlt = np.polyfit(fittime, alt, deg)
polyval_alt = np.polyval(fitAlt,fittime)
'Get Lat, Lon, & Alt values for a window of 0-4.5 deg Longitude'
lonwindow =[]
latwindow = []
altwindow = []
for i in range(len(polyval_lat)):
if 0 < polyval_lon[i] < 4.5: # get lon vals in window
lonwindow.append(polyval_lon[i]) #append lon vals
latwindow.append(polyval_lat[i]) #append corresponding lat vals
altwindow.append(polyval_alt[i]) #append corresponding alt vals
lonwindow = np.array(lonwindow)
Just to be clear -- The issue is I only have one point in the window range, I want to use the interpolation/equation/curve from the previous step. So then I can use that to interpolate again and generate 6,000 points in my window range.
Original answer posted here
First, generate the polynomial fit coefficients using the old time (x-axis) values, and interpolated longitude (y-axis) values.
import numpy as np
import matplotlib.pyplot as plt
poly_deg = 3 #degree of the polynomial fit
polynomial_fit_coeff = np.polyfit(original_times, interp_lon, poly_deg)
Next, use np.linspace() to generate arbitrary time values based on the number of desire points in the window.
start = 0
stop = 4
num_points = 6000
arbitrary_time = np.linspace(start, stop, num_points)
Finally, use the fit coefficients and the arbitrary time to get the actual interpolated longitude (y-axis) values and plot.
lon_intrp_2 = np.polyval(polynomial_fit_coeff, arbitrary_time)
plt.plot(arbitrary_time, lon_intrp_2, 'r') #interpolated window as a red curve
plt.plot(myOrbitJ2000Time, lon, '.') #original data plotted as points

How to do a second interpolation in python

I did my first interpolation with numpy.polyfit() and numpy.polyval() for 50 longitude values for a full satellite orbit.
Now, I just want to look at a window of 0-4.5 degrees longitude and do a second interpolation so that I have 6,000 points for longitude in the window.
I need to use the equation/curve from the first interpolation to create the second one because there is only one point in the window range. I'm not sure how to do the second interpolation.
Inputs:
lon = [-109.73105744378498, -104.28690174554579, -99.2435132929552, -94.48533149079628, -89.91054414962821, -85.42671400689177, -80.94616150449806, -76.38135021210172, -71.6402674905218, -66.62178379632216, -61.21120467960157, -55.27684029674759, -48.66970878028004, -41.23083703244677, -32.813881865289346, -23.332386757370532, -12.832819226213942, -1.5659455609661785, 10.008077792630402, 21.33116444634303, 31.92601575632583, 41.51883213364072, 50.04498630545507, 57.58103957109249, 64.26993028992476, 70.2708323505337, 75.73441871754586, 80.7944079829813, 85.56734813043659, 90.1558676264546, 94.65309120129724, 99.14730128118617, 103.72658922048785, 108.48349841714494, 113.51966824008079, 118.95024882101737, 124.9072309203375, 131.5395221402974, 139.00523971191907, 147.44847902856114, 156.95146022590976, 167.46163867248032, 178.72228750873975, -169.72898181991064, -158.44642409799974, -147.8993300787564, -138.35373014113995, -129.86955508919888, -122.36868103811106, -115.70852432245486]
myOrbitJ2000Time = [ 20027712., 20027713., 20027714., 20027715., 20027716.,
20027717., 20027718., 20027719., 20027720., 20027721.,
20027722., 20027723., 20027724., 20027725., 20027726.,
20027727., 20027728., 20027729., 20027730., 20027731.,
20027732., 20027733., 20027734., 20027735., 20027736.,
20027737., 20027738., 20027739., 20027740., 20027741.,
20027742., 20027743., 20027744., 20027745., 20027746.,
20027747., 20027748., 20027749., 20027750., 20027751.,
20027752., 20027753., 20027754., 20027755., 20027756.,
20027757., 20027758., 20027759., 20027760., 20027761.]
Code:
deg = 30 #polynomial degree for fit
fittime = myOrbitJ2000Time - myOrbitJ2000Time[0]
'Longitude Interpolation'
fitLon = np.polyfit(fittime, lon, deg) #gets fit coefficients
polyval_lon = np.polyval(fitLon,fittime) #interp.s to get actual values
'Get Longitude values for a window of 0-4.5 deg Longitude'
lonwindow =[]
for i in range(len(polyval_lon)):
if 0 < polyval_lon[i] < 4.5: # get lon vals in window
lonwindow.append(polyval_lon[i]) #append lon vals
lonwindow = np.array(lonwindow)
First, generate the polynomial fit coefficients using the old time (x-axis) values, and interpolated longitude (y-axis) values.
import numpy as np
import matplotlib.pyplot as plt
poly_deg = 3 #degree of the polynomial fit
polynomial_fit_coeff = np.polyfit(original_times, interp_lon, poly_deg)
Next, use np.linspace() to generate arbitrary time values based on the number of desire points in the window.
start = 0
stop = 4
num_points = 6000
arbitrary_time = np.linspace(start, stop, num_points)
Finally, use the fit coefficients and the arbitrary time to get the actual interpolated longitude (y-axis) values and plot.
lon_intrp_2 = np.polyval(polynomial_fit_coeff, arbitrary_time)
plt.plot(arbitrary_time, lon_intrp_2, 'r') #interpolated window as a red curve
plt.plot(myOrbitJ2000Time, lon, '.') #original data plotted as points

world map without rivers with matplotlib / Basemap?

Would there be a way to plot the borders of the continents with Basemap (or without Basemap, if there is some other way), without those annoying rivers coming along? Especially that piece of Kongo River, not even reaching the ocean, is disturbing.
EDIT: I intend to further plot data over the map, like in the Basemap gallery (and still have the borderlines of the continents drawn as black lines over the data, to give structure for the worldmap) so while the solution by Hooked below is nice, masterful even, it's not applicable for this purpose.
Image produced by:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.fillcontinents(color='gray',lake_color='white')
m.drawcoastlines()
plt.savefig('world.png',dpi=75)
For reasons like this i often avoid Basemap alltogether and read the shapefile in with OGR and convert them to a Matplotlib artist myself. Which is alot more work but also gives alot more flexibility.
Basemap has some very neat features like converting the coordinates of input data to your 'working projection'.
If you want to stick with Basemap, get a shapefile which doesnt contain the rivers. Natural Earth for example has a nice 'Land' shapefile in the physical section (download 'scale rank' data and uncompress). See http://www.naturalearthdata.com/downloads/10m-physical-vectors/
You can read the shapefile in with the m.readshapefile() method from Basemap. This allows you to get the Matplotlib Path vertices and codes in the projection coordinates which you can then convert into a new Path. Its a bit of a detour but it gives you all styling options from Matplotlib, most of which are not directly available via Basemap. Its a bit hackish, but i dont now another way while sticking to Basemap.
So:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.collections import PathCollection
from matplotlib.path import Path
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
# MPL searches for ne_10m_land.shp in the directory 'D:\\ne_10m_land'
m = Basemap(projection='robin',lon_0=0,resolution='c')
shp_info = m.readshapefile('D:\\ne_10m_land', 'scalerank', drawbounds=True)
ax = plt.gca()
ax.cla()
paths = []
for line in shp_info[4]._paths:
paths.append(Path(line.vertices, codes=line.codes))
coll = PathCollection(paths, linewidths=0, facecolors='grey', zorder=2)
m = Basemap(projection='robin',lon_0=0,resolution='c')
# drawing something seems necessary to 'initiate' the map properly
m.drawcoastlines(color='white', zorder=0)
ax = plt.gca()
ax.add_collection(coll)
plt.savefig('world.png',dpi=75)
Gives:
How to remove "annoying" rivers:
If you want to post-process the image (instead of working with Basemap directly) you can remove bodies of water that don't connect to the ocean:
import pylab as plt
A = plt.imread("world.png")
import numpy as np
import scipy.ndimage as nd
import collections
# Get a counter of the greyscale colors
a = A[:,:,0]
colors = collections.Counter(a.ravel())
outside_and_water_color, land_color = colors.most_common(2)
# Find the contigous landmass
land_idx = a == land_color[0]
# Index these land masses
L = np.zeros(a.shape,dtype=int)
L[land_idx] = 1
L,mass_count = nd.measurements.label(L)
# Loop over the land masses and fill the "holes"
# (rivers without outlays)
L2 = np.zeros(a.shape,dtype=int)
L2[land_idx] = 1
L2 = nd.morphology.binary_fill_holes(L2)
# Remap onto original image
new_land = L2==1
A2 = A.copy()
c = [land_color[0],]*3 + [1,]
A2[new_land] = land_color[0]
# Plot results
plt.subplot(221)
plt.imshow(A)
plt.axis('off')
plt.subplot(222)
plt.axis('off')
B = A.copy()
B[land_idx] = [1,0,0,1]
plt.imshow(B)
plt.subplot(223)
L = L.astype(float)
L[L==0] = None
plt.axis('off')
plt.imshow(L)
plt.subplot(224)
plt.axis('off')
plt.imshow(A2)
plt.tight_layout() # Only with newer matplotlib
plt.show()
The first image is the original, the second identifies the land mass. The third is not needed but fun as it ID's each separate contiguous landmass. The fourth picture is what you want, the image with the "rivers" removed.
Following user1868739's example, I am able to select only the paths (for some lakes) that I want:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='white',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84] # want Baikal, but not Tanganyika
# 80 = Superior+Michigan+Huron, 81 = Victoria, 82 = Aral, 83 = Tanganyika,
# 84 = Baikal, 85 = Great Bear, 86 = Great Slave, 87 = Nyasa, 88 = Erie
# 89 = Winnipeg, 90 = Ontario
for ipoly in ipolygons:
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][2] for i in xrange(len(polygon_vertices))]
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
plt.savefig('world2.png',dpi=100)
But this only works when using white background for the continents. If I change color to 'gray' in the following line, we see that other rivers and lakes are not filled with the same color as the continents are. (Also playing with area_thresh will not remove those rivers that are connected to ocean.)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
The version with white background is adequate for further color-plotting all kind of land information over the continents, but a more elaborate solution would be needed, if one wants to retain the gray background for continents.
I frequently modify Basemap's drawcoastlines() to avoid those 'broken' rivers. I also modify drawcountries() for the sake of data source consistency.
Here is what I use in order to support the different resolutions available in Natural Earth data:
from mpl_toolkits.basemap import Basemap
class Basemap(Basemap):
""" Modify Basemap to use Natural Earth data instead of GSHHG data """
def drawcoastlines(self):
shapefile = 'data/naturalearth/coastline/ne_%sm_coastline' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'coastline', linewidth=1.)
def drawcountries(self):
shapefile = 'data/naturalearth/countries/ne_%sm_admin_0_countries' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'countries', linewidth=0.5)
m = Basemap(llcrnrlon=-90, llcrnrlat=-40, urcrnrlon=-30, urcrnrlat=+20,
resolution='l') # resolution = (l)ow | (m)edium | (h)igh
m.drawcoastlines()
m.drawcountries()
Here is the output:
Please note that by default Basemap uses resolution='c' (crude), which is not supported in the code shown.
If you're OK with plotting outlines rather than shapefiles, it's pretty easy to plot coastlines that you can get from wherever. I got my coastlines from the NOAA Coastline Extractor in MATLAB format:
http://www.ngdc.noaa.gov/mgg/shorelines/shorelines.html
To edit the coastlines, I converted to SVG, then edited them with Inkscape, then converted back to the lat/lon text file ("MATLAB" format).
All Python code is included below.
# ---------------------------------------------------------------
def plot_lines(mymap, lons, lats, **kwargs) :
"""Plots a custom coastline. This plots simple lines, not
ArcInfo-style SHAPE files.
Args:
lons: Longitude coordinates for line segments (degrees E)
lats: Latitude coordinates for line segments (degrees N)
Type Info:
len(lons) == len(lats)
A NaN in lons and lats signifies a new line segment.
See:
giss.noaa.drawcoastline_file()
"""
# Project onto the map
x, y = mymap(lons, lats)
# BUG workaround: Basemap projects our NaN's to 1e30.
x[x==1e30] = np.nan
y[y==1e30] = np.nan
# Plot projected line segments.
mymap.plot(x, y, **kwargs)
# Read "Matlab" format files from NOAA Coastline Extractor.
# See: http://www.ngdc.noaa.gov/mgg/coast/
lineRE=re.compile('(.*?)\s+(.*)')
def read_coastline(fname, take_every=1) :
nlines = 0
xdata = array.array('d')
ydata = array.array('d')
for line in file(fname) :
# if (nlines % 10000 == 0) :
# print 'nlines = %d' % (nlines,)
if (nlines % take_every == 0 or line[0:3] == 'nan') :
match = lineRE.match(line)
lon = float(match.group(1))
lat = float(match.group(2))
xdata.append(lon)
ydata.append(lat)
nlines = nlines + 1
return (np.array(xdata),np.array(ydata))
def drawcoastline_file(mymap, fname, **kwargs) :
"""Reads and plots a coastline file.
See:
giss.basemap.drawcoastline()
giss.basemap.read_coastline()
"""
lons, lats = read_coastline(fname, take_every=1)
return drawcoastline(mymap, lons, lats, **kwargs)
# =========================================================
# coastline2svg.py
#
import giss.io.noaa
import os
import numpy as np
import sys
svg_header = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.1"
width="360"
height="180"
id="svg2">
<defs
id="defs4" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
id="layer1">
"""
path_tpl = """
<path
d="%PATH%"
id="%PATH_ID%"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
"""
svg_footer = "</g></svg>"
# Set up paths
data_root = os.path.join(os.environ['HOME'], 'data')
#modelerc = giss.modele.read_modelerc()
#cmrun = modelerc['CMRUNDIR']
#savedisk = modelerc['SAVEDISK']
ifname = sys.argv[1]
ofname = ifname.replace('.dat', '.svg')
lons, lats = giss.io.noaa.read_coastline(ifname, 1)
out = open(ofname, 'w')
out.write(svg_header)
path_id = 1
points = []
for lon, lat in zip(lons, lats) :
if np.isnan(lon) or np.isnan(lat) :
# Process what we have
if len(points) > 2 :
out.write('\n<path d="')
out.write('m %f,%f L' % (points[0][0], points[0][1]))
for pt in points[1:] :
out.write(' %f,%f' % pt)
out.write('"\n id="path%d"\n' % (path_id))
# out.write('style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"')
out.write(' />\n')
path_id += 1
points = []
else :
lon += 180
lat = 180 - (lat + 90)
points.append((lon, lat))
out.write(svg_footer)
out.close()
# =============================================================
# svg2coastline.py
import os
import sys
import re
# Reads the output of Inkscape's "Plain SVG" format, outputs in NOAA MATLAB coastline format
mainRE = re.compile(r'\s*d=".*"')
lineRE = re.compile(r'\s*d="(m|M)\s*(.*?)"')
fname = sys.argv[1]
lons = []
lats = []
for line in open(fname, 'r') :
# Weed out extraneous lines in the SVG file
match = mainRE.match(line)
if match is None :
continue
match = lineRE.match(line)
# Stop if something is wrong
if match is None :
sys.stderr.write(line)
sys.exit(-1)
type = match.group(1)[0]
spairs = match.group(2).split(' ')
x = 0
y = 0
for spair in spairs :
if spair == 'L' :
type = 'M'
continue
(sdelx, sdely) = spair.split(',')
delx = float(sdelx)
dely = float(sdely)
if type == 'm' :
x += delx
y += dely
else :
x = delx
y = dely
lon = x - 180
lat = 90 - y
print '%f\t%f' % (lon, lat)
print 'nan\tnan'
Okay I think I have a partial solution.
The basic idea is that the paths used by drawcoastlines() are ordered by the size/area. Which means the first N paths are (for most applications) the main land masses and lakes and the later paths the smaller islands and rivers.
The issue is that the first N paths that you want will depend on the projection (e.g., global, polar, regional), if area_thresh has been applied and whether you want lakes or small islands etc. In other words, you will have to tweak this per application.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mp = 'cyl'
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
print ipoly
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
Once you know the relevant ipoly to stop drawing (poly_stop) then you can do something like this...
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mproj = ['nplaea','cyl']
mp = mproj[0]
if mp == 'nplaea':
m = Basemap(resolution='c',projection=mp,lon_0=0,boundinglat=30,area_thresh=200000,round=1)
poly_stop = 10
else:
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
poly_stop = 18
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
if ipoly > poly_stop: continue
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
As per my comment to #sampo-smolander
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84]
for ipoly in xrange(len(coasts_paths)):
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
if ipoly in ipolygons:
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
else:
m.plot(px,py,linewidth=0.5,zorder=4,color='grey')
plt.savefig('world2.png',dpi=100)

Categories