Related
My question is similar to this one (R + ggplot) but for Python.
Question
How to generate a (2D) Voronoi diagram where cells have a maximum extend / growth limit ?
it would result in some cells with curved boundary (circle arc), and some space regions not filled by any cell.
An image processing interpretation of the desired output would be to perform some "AND" mask between the voronoi cells and circles centered on each cell with the required radius.
(For my particular problem, I am working with geolocated data, and for now I am willing to accept discrepancies from most voronoi libs that will use lat/lon as cartesian coordinates. That is another matter...)
This question is NOT about clipping or handling of "inifinite" voronoi cells (see scypi.spatial.Voronoi), so the following links are NOT related :
clipping a voronoi diagram python
How to limit Voronoi cells even when infinite with python?
Vaguely related topics:
Is there a way to vary the rate of Voronoi cell growth?
https://gis.stackexchange.com/questions/366471/weighted-voronoi-polygons-in-r
https://gis.stackexchange.com/questions/17282/create-weighted-thiessen-polygons/17284#17284
https://github.com/HichamZouarhi/Weighted-Voronoi-PyQGIS
Example from the R + ggplot answer:
sample code for tests:
from typing import Tuple, List, Union
import geovoronoi
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
from geographiclib.geodesic import Geodesic
from shapely.geometry import Polygon, Point
from simplekml import LineStyle, Color, PolyStyle, Container
WGS84_Tool = Geodesic.WGS84
T_Color_List = List[Union[clr.Colormap, clr.LinearSegmentedColormap, clr.ListedColormap]]
def generate_n_colors(n, cmap_name='tab20') -> T_Color_List:
"""
https://github.com/WZBSocialScienceCenter/geovoronoi/blob/master/geovoronoi/plotting.py
Get a list of `n` numbers from matplotlib color map `cmap_name`. If `n` is larger than the number of colors in the
color map, the colors will be recycled, i.e. they are not unique in this case.
:param n: number of colors to generate
:param cmap_name: matplotlib color map name
:return: list of `n` colors
"""
pt_region_colormap = plt.get_cmap(cmap_name)
max_i = len(pt_region_colormap.colors)
return [pt_region_colormap(i % max_i) for i in range(n)]
def _plot_cell_and_seed(kml: Container,
name: str,
region_polygon: Polygon,
seed_coords: Tuple[float, float],
_kml_color: str):
_p = kml.newpolygon(name=f"{name} zone",
outerboundaryis=[(lon, lat)
for lat, lon
in region_polygon.exterior.coords], )
_p.style.linestyle = LineStyle(color=Color.darkgrey, width=1.)
_p.style.polystyle = PolyStyle(color=_kml_color)
p = kml.newpoint(coords=[seed_coords],
name=name)
p.style.iconstyle.icon.href = "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
p.style.iconstyle.scale = 0.5
p.style.labelstyle.scale = 0.5
def plot_regions(region_polys,
region_pts,
seeds_coords_list: np.ndarray,
seeds_names: List[str],
kml: Container,
colors: T_Color_List,
):
assert (len(seeds_names) == len(seeds_coords_list))
index = 0
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = seeds_coords_list[_cell_point_indexes][0]
name = seeds_names[_cell_point_indexes[0]]
_kml_airport_coords = (_cell_seed_coords[-1], _cell_seed_coords[0])
_mpl_color = colors[index]
_mpl_hexa_color = clr.to_hex(_mpl_color, keep_alpha=True)
_hexa_color_no_sharp = _mpl_hexa_color.split("#")[-1]
_kml_color = Color.hexa(_hexa_color_no_sharp)
_kml_color = Color.changealphaint(alpha=7 * 255 // 10, gehex=_kml_color)
_plot_cell_and_seed(kml=kml,
name=name,
region_polygon=region_polygon,
seed_coords=_kml_airport_coords,
_kml_color=_kml_color)
index += 1
# bounding box for geovoronoi
geo_boundaries = {"min": {"lat": +30, "lon": -12},
"max": {"lat": +75, "lon": +35}, }
# a list of [[lat,lon],[lat,lon],[lat,lon],[lat,lon],]
n = 150
seeds_coords_list = np.dstack(
[np.random.uniform(low=geo_boundaries["min"]["lat"], high=geo_boundaries["max"]["lat"], size=n),
np.random.uniform(low=geo_boundaries["min"]["lon"], high=geo_boundaries["max"]["lon"], size=n), ]).reshape((n, 2))
seeds_names = [f"{lat:+_.2f};{lon:+_.2f}" for lat, lon in seeds_coords_list]
boundary_points = geovoronoi.coords_to_points([[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]],
[geo_boundaries["min"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["min"]["lon"]],
# last necessary ?
[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]]])
boundary_polygon = Polygon(boundary_points)
# beware that geodesics and geovoronoi may not be accurate since it uses planar cartesian formulas...
region_polys, region_pts = geovoronoi.voronoi_regions_from_coords(seeds_coords_list, boundary_polygon)
# DO SOMETHING HERE
#...
#...
#...
kdoc = Kml()
p_kml = Path.cwd() / "voronoi_so.kml"
colors: T_Color_List = generate_n_colors(len(region_polys))
plot_regions(region_polys=region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="raw regions"),
colors=colors)
print("save KML")
kdoc.save(p_kml.as_posix())
print(p_kml.as_uri())
After some thinking, here is an approach using Shapely and using the intersection between computed circles (360 vertices) and each voronoi cell.
Performance is not convincing since we create 360 points and a polygon + 1 intersection computation for each cell... (at least it is a bounded ...).
However, the underlying voronoi computation data such as cells adjacency is lost, and we can't know if some cells are isolated after this transformation. Maybe some ideas here: Determining and storing Voronoi Cell Adjacency
I do believe better solutions may exist.
So here is a solution, with random geographical data, and corresponding kml rendering.
EDIT: using weighted voronoi diagrams, a possible weight function could be f(d) = d <= radius ? I could not explore this for now...
Raw regions:
Modified raw regions:
from pathlib import Path
from typing import Tuple, List, Union
import geovoronoi
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
from geographiclib.geodesic import Geodesic
from shapely.geometry import Polygon, Point
from simplekml import LineStyle, Kml, Color, PolyStyle, Container
WGS84_Tool = Geodesic.WGS84
T_Color_List = List[Union[clr.Colormap, clr.LinearSegmentedColormap, clr.ListedColormap]]
def generate_n_colors(n, cmap_name='tab20') -> T_Color_List:
"""
https://github.com/WZBSocialScienceCenter/geovoronoi/blob/master/geovoronoi/plotting.py
Get a list of `n` numbers from matplotlib color map `cmap_name`. If `n` is larger than the number of colors in the
color map, the colors will be recycled, i.e. they are not unique in this case.
:param n: number of colors to generate
:param cmap_name: matplotlib color map name
:return: list of `n` colors
"""
pt_region_colormap = plt.get_cmap(cmap_name)
max_i = len(pt_region_colormap.colors)
return [pt_region_colormap(i % max_i) for i in range(n)]
def _create_bounded_regions(region_polys,
region_pts,
distance_criteria_m: float,
cell_seed_coords_list: np.ndarray,
nb_vertices:int=36):
new_polygons = {}
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = cell_seed_coords_list[_cell_point_indexes][0]
_arpt_lat = _cell_seed_coords[0]
_arpt_lon = _cell_seed_coords[-1]
cycle_vertices = []
for a in np.linspace(0,359,nb_vertices):
p = WGS84_Tool.Direct(lat1=_arpt_lat, lon1=_arpt_lon, azi1=a, s12=distance_criteria_m)
_point = Point(p["lat2"], p["lon2"])
cycle_vertices.append(_point)
circle = Polygon(cycle_vertices)
new_polygons[region_id] = region_polygon.intersection(circle)
return new_polygons
def _plot_cell_and_seed(kml: Container,
name: str,
region_polygon: Polygon,
seed_coords: Tuple[float, float],
_kml_color: str):
_p = kml.newpolygon(name=f"{name} zone",
outerboundaryis=[(lon, lat)
for lat, lon
in region_polygon.exterior.coords], )
_p.style.linestyle = LineStyle(color=Color.darkgrey, width=1.)
_p.style.polystyle = PolyStyle(color=_kml_color)
p = kml.newpoint(coords=[seed_coords],
name=name)
p.style.iconstyle.icon.href = "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
p.style.iconstyle.scale = 0.5
p.style.labelstyle.scale = 0.5
def plot_regions(region_polys,
region_pts,
seeds_coords_list: np.ndarray,
seeds_names: List[str],
kml: Container,
colors: T_Color_List,
):
assert (len(seeds_names) == len(seeds_coords_list))
index = 0
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = seeds_coords_list[_cell_point_indexes][0]
name = seeds_names[_cell_point_indexes[0]]
_kml_airport_coords = (_cell_seed_coords[-1], _cell_seed_coords[0])
_mpl_color = colors[index]
_mpl_hexa_color = clr.to_hex(_mpl_color, keep_alpha=True)
_hexa_color_no_sharp = _mpl_hexa_color.split("#")[-1]
_kml_color = Color.hexa(_hexa_color_no_sharp)
_kml_color = Color.changealphaint(alpha=7 * 255 // 10, gehex=_kml_color)
_plot_cell_and_seed(kml=kml,
name=name,
region_polygon=region_polygon,
seed_coords=_kml_airport_coords,
_kml_color=_kml_color)
index += 1
# bounding box for geovoronoi
geo_boundaries = {"min": {"lat": +30, "lon": -12},
"max": {"lat": +75, "lon": +35}, }
# a list of [[lat,lon],[lat,lon],[lat,lon],[lat,lon],]
n = 150
seeds_coords_list = np.dstack(
[np.random.uniform(low=geo_boundaries["min"]["lat"], high=geo_boundaries["max"]["lat"], size=n),
np.random.uniform(low=geo_boundaries["min"]["lon"], high=geo_boundaries["max"]["lon"], size=n), ]).reshape((n, 2))
seeds_names = [f"{lat:+_.2f};{lon:+_.2f}" for lat, lon in seeds_coords_list]
boundary_points = geovoronoi.coords_to_points([[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]],
[geo_boundaries["min"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["min"]["lon"]],
# last necessary ?
[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]]])
boundary_polygon = Polygon(boundary_points)
# beware that geodesics and geovoronoi may not be accurate since it uses planar cartesian formulas...
region_polys, region_pts = geovoronoi.voronoi_regions_from_coords(seeds_coords_list, boundary_polygon)
new_region_polys = _create_bounded_regions(region_polys=region_polys,
region_pts=region_pts,
distance_criteria_m=300_000.,
cell_seed_coords_list=seeds_coords_list, )
kdoc = Kml()
p_kml = Path.cwd() / "voronoi_so.kml"
colors: T_Color_List = generate_n_colors(len(region_polys))
plot_regions(region_polys=region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="raw regions"),
colors=colors)
plot_regions(region_polys=new_region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="new_regions (range)"),
colors=colors)
print("save KML")
kdoc.save(p_kml.as_posix())
print(p_kml.as_uri())
Hi I am trying to extract data from a netCDF file, but the data is upside down. How can I reverse the database:
The data I want to extract is the height data from the (netcdf) at the points I have in the CSV file. my Data:
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import pandas as pd
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Path, PathPatch
csv_data = np.loadtxt('CSV with target coordinates',skiprows=1,delimiter=',')
num_el = csv_data[:,0]
lat = csv_data[:,1]
lon = csv_data[:,2]
value = csv_data[:,3]
data = Dataset("elevation Data",'r')
lon_range = data.variables['x_range'][:]
lat_range = data.variables['y_range'][:]
topo_range = data.variables['z_range'][:]
spacing = data.variables['spacing'][:]
dimension = data.variables['dimension'][:]
z = data.variables['z'][:]
lon_num = dimension[0]
lat_num = dimension[1]
etopo_lon = np.linspace(lon_range[0],lon_range[1],dimension[0])
etopo_lat = np.linspace(lat_range[0],lat_range[1],dimension[1])
topo = np.reshape(z, (lat_num, lon_num))
height = np.empty_like(num_el)
desired_lat_idx = np.empty_like(num_el)
desired_lon_idx = np.empty_like(num_el)
for i in range(len(num_el)):
tmp_lat = np.abs(etopo_lat - lat[i]).argmin()
tmp_lon = np.abs(etopo_lon - lon[i]).argmin()
desired_lat_idx[i] = tmp_lat
desired_lon_idx[i] = tmp_lon
height[i] = topo[tmp_lat,tmp_lon]
height[height<-10]=0
print(len(desired_lat_idx))
print(len(desired_lon_idx))
print(len(height))
dfl= pd.DataFrame({
'Latitude' : lat.reshape(-1),
'Longitude': lon.reshape(-1),
'Altitude': height.reshape(-1)
});
print(dfl)
# but the Lat should not be changed here (the dfl must be correct)
df =dfl
lat=np.array(df['Latitude'])
lon=np.array(df['Longitude'])
val=np.array(df['Altitude'])
m = basemap.Basemap(projection='robin', lon_0=0, lat_0=0, resolution='l',area_thresh=1000)
m.drawcoastlines(color = 'black')
x,y = m(lon,lat)
colormesh= m.contourf(x,y,val,100, tri=True, cmap = 'terrain')
plt.colorbar(location='bottom',pad=0.04,fraction=0.06)
plt.show()
I have already tried:
lat = csv_data[:,1]
lat= lat*(-1)
But this didnĀ“t work
It's a plotting artifact().
Just do:
colormesh= m.contourf(x,y[::-1],val,100, tri=True, cmap = 'terrain')
y[::-1] will reverse the order of the y latitude elements (as opposed to the land-mass outlines; and while keeping the x longitude coordinates the same) and hence flip them.
I've often had this problem with plotting numpy image data in the past.
Your raw CSV data are unlikely to be flipped themselves (why would they be?). You should try sanity-checking them [I am not a domain expert I'm afraid]! Overlaying an actual coordinate grid can help with this.
Another way to do it is given here: Reverse Y-Axis in PyPlot
You could also therefore just do
ax = plt.gca()
ax.invert_yaxis()
I am trying to adapt this code I found on stackoverflow to create a voronoi cell with finite boundaries.
However my problem is that I don't know how to get the region associated to a given point. This was done in normal Voronoi with the point_region method, but this doesn't work here because regions have changed.
The data points I am using are:
points = array([[289255.176 , 921667.461 ],
[289296.31699, 921687.13826],
[289463.30305, 921770.12504],
[289725.08002, 921905.75745],
[289960.48198, 922099.46056],
[290106.98928, 922361.79529],
[289255.184 , 921646.244 ],
[289307.48677, 921627.05485],
[289500.80493, 921555.50067],
[289825.14532, 921435.65147],
[290141.79326, 921322.77935],
[290454.91721, 921211.09355],
[289255.187 , 921635.627 ],
[289327.07776, 921558.85263],
[289565.21795, 921298.17707],
[289875.40176, 920978.013 ],
[290192.86361, 920656.82017],
[289255.185 , 921630.386 ],
[289318.54181, 921453.18492],
[289421.06861, 921167.57934],
[289565.42462, 920770.1386 ],
[289701.83141, 920376.28627],
[289833.6501 , 919990.66467]])
vor = Voronoi(points)
min_x = vor.min_bound[0] - 100
max_x = vor.max_bound[0] + 100
min_y = vor.min_bound[1] - 100
max_y = vor.max_bound[1] + 100
regions, vertices, pts = voronoi_finite_polygons_2d(vor)
This code gives you the point associated with a given finite Voronoi region using a point in polygon analysis. Let us assume that we wish to find which point is associated with a Voronoi region that is second on the list of vor_regions.
vor = Voronoi(points)
vor_regions = vor.regions
vor_vertices = vor.vertices
from shapely.geometry import MultiPoint
from shapely.geometry import Point
region = vor_regions[1]
coords = tuple(map(tuple, vor_vertices[region]))
poly = MultiPoint(coords).convex_hull
for i in range(0,len(points)):
pt = Point(tuple(map(tuple, points[i:i+1])))
if poly.contains(pt) == True:
print('The input region is ')
print(region)
print('Index of the associated point in the vor_vertices list is '+str(i))
print('The coordinates of the associated point is ')
print(pt)
Output:
The input region is
[6, 2, 1, 0, 5]
Index of the associated point in the vor_vertices list is 4
The coordinates of the associated point is
POINT (289960.48198 922099.46056)
I am trying to map the difference between climate simulation data and observed data over a set geographical area.
To create the map of just the climate simulation I am using this code
import matplotlib.pyplot as plt
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
def main():
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(grid_longitude=lambda v: 31 <= v <= 36.5, \
grid_latitude=lambda v: -18. <= v <= -8.)
CCCma = CCCma.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#Convert units to match, CORDEX data is in Kelvin but Observed data in Celsius, we would like to show all data in Celsius
CCCma.convert_units('Celsius')
#plot map with physical features
cmap = plt.cm.afmhot_r
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([31, 36.5, -8,-18])
#set axis tick marks
ax.set_xticks([32, 34, 36])
ax.set_yticks([-9, -11, -13, -15, -17])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
data = CCCma
#take mean of data over all time
plot = iplt.contourf(data.collapsed('time', iris.analysis.MEAN), \
cmap=cmap, levels=[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],\
extend='both')
#add colour bar index
plt.colorbar(plot)
#give map a title
plt.title('RCP4.5 Mean Temperature 1989-2008', fontsize=10)
plt.show()
if __name__ == '__main__':
main()
How can I amend this to take the difference between the two datasets? I tried this code:
import matplotlib.pyplot as plt
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/AFR_44_tas/ERAINT/1979-2012/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(grid_longitude=lambda v: 31 <= v <= 36.5, \
grid_latitude=lambda v: -18. <= v <= -8.)
CCCma = CCCma.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CCCma = CCCma.extract(t_constraint)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36., \
latitude=lambda v: -17. <= v <= -9.)
CRU = CRU.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1989 <= cell.point.year <= 2008)
CRU = CRU.extract(t_constraint)
#PART 3: FORMAT DATA
#Convert units to match
CCCma.convert_units('Celsius')
CRU.convert_units('Celsius')
#Take difference between two datasets
Bias_CCCma = CCCma-CRU
#PART 4: PLOT MAP
#plot map with physical features
cmap = plt.cm.afmhot_r
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([31, 36.5, -8,-18])
#set axis tick marks
ax.set_xticks([32, 34, 36])
ax.set_yticks([-9, -11, -13, -15, -17])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
data = Bias_CCCma
#take mean of data over all time
plot = iplt.contourf(data.collapsed('time', iris.analysis.MEAN), \
cmap=cmap, levels=[15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],\
extend='both')
#add colour bar index
plt.colorbar(plot)
#give map a title
plt.title('RCP4.5 Mean Temperature 1989-2008', fontsize=10)
plt.show()
if __name__ == '__main__':
main()
However this gives me the following error:
ValueError: This operation cannot be performed as there are differing coordinates (grid_latitude, grid_longitude, time) remaining which cannot be ignored.
I was pretty sure this wasn't going to be so simple, but I'm not sure how to fix it. Any ideas? TIA!
My guess is that CCCma and CRU are on different grids, so when you try to subtract them you get an error. You probably need to interpolate them to the same grid first (otherwise, how would iris know which grid you want the result to lie on?).
Iris is very strict about matching up the cube coordinates for binary operations and there is an open issue discussing whether and how to make it more flexible ready for version 2. In the meantime, if your cubes are the same shape and you don't mind loading the data, you could just do
Bias_CCCma = CCCma - CRU.data
If your cubes are different shapes (i.e. the models are on different grids, as Jeremy suggested) or you don't want to load the data, there are a few things to look at:
If the grids are different then you will need to regrid one of the cubes to match the other.
For the subtraction operation, the grid coordinate names need to match up. If you are confident that grid_latitude and grid_longitude mean the same as latitude and longitude, you can rename the grid coordinates on one of your cubes. You will also need to ensure the other coordinate metadata matches (e.g. var_name is often an issue).
The time coordinate coming up in your error message is almost certainly due to the unit mismatch you identified in your previous question. I think this issue should go away if you reorder your code to do the time averaging first and then take the difference (the binary operations don't care so much about scalar coordinates).
Thank you all for your answers. In the end I needed to regrid the data first, as #RuthC suggested.
So the code changed to look like this:
import matplotlib.pyplot as plt
import matplotlib.cm as mpl_cm
import numpy as np
from cf_units import Unit
import iris
import iris.plot as iplt
import cartopy
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import iris.analysis.cartography
import iris.coord_categorisation as iriscc
#this file is split into parts as follows:
#PART 1: load and format CORDEX models
#PART 2: load and format observed data
#PART 3: format data
#PART 4: plot data
def main():
iris.FUTURE.netcdf_promote=True
#PART 1: CORDEX MODELS
#bring in all the models we need and give them a name
CCCma = '/exports/csce/datastore/geos/users/s0899345/Climate_Modelling/AFR_44_tasmax/ERAINT/1979-2012/tasmax_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_CCCma-CanRCM4_r2_mon_198901-200912.nc'
#Load exactly one cube from given file
CCCma = iris.load_cube(CCCma)
#remove flat latitude and longitude and only use grid latitude and grid longitude to make consistent with the observed data, also make sure all of the longitudes are monotonic
lats = iris.coords.DimCoord(CORDEX.coord('latitude').points[:,0], \
standard_name='latitude', units='degrees')
lons = CORDEX.coord('longitude').points[0]
for i in range(len(lons)):
if lons[i]>100.:
lons[i] = lons[i]-360.
lons = iris.coords.DimCoord(lons, \
standard_name='longitude', units='degrees')
CORDEX.remove_coord('latitude')
CORDEX.remove_coord('longitude')
CORDEX.remove_coord('grid_latitude')
CORDEX.remove_coord('grid_longitude')
CORDEX.add_dim_coord(lats, 1)
CORDEX.add_dim_coord(lons, 2)
#PART 2: OBSERVED DATA
#bring in all the files we need and give them a name
CRU= '/exports/csce/datastore/geos/users/s0XXXX/Climate_Modelling/Actual_Data/cru_ts4.00.1901.2015.tmp.dat.nc'
#Load exactly one cube from given file
CRU = iris.load_cube(CRU, 'near-surface temperature')
#PART 3: FORMAT DATA
#Regrid observed data onto rotated pole grid
CRU = CRU.regrid(CORDEX, iris.analysis.Linear())
#we are only interested in the latitude and longitude relevant to Malawi
Malawi = iris.Constraint(longitude=lambda v: 32.5 <= v <= 36.5, \
latitude=lambda v: -17. <= v <= -9.)
CORDEX = CORDEX.extract(Malawi)
CRU = CRU.extract(Malawi)
#time constraint to make all series the same
iris.FUTURE.cell_datetime_objects = True
t_constraint = iris.Constraint(time=lambda cell: 1990<= cell.point.year <= 2008)
CORDEX = CORDEX.extract(t_constraint)
CRU = CRU.extract(t_constraint)
#Convert units to match
CORDEX.convert_units('Celsius')
CRU.unit = Unit('Celsius') # This fixes CRU which is in 'Degrees Celsius' to read 'Celsius'
#add years to data
iriscc.add_year(CORDEX, 'time')
iriscc.add_year(CRU, 'time')
#We are interested in plotting the data for the average of the time period.
CORDEX = CORDEX.collapsed('time', iris.analysis.MEAN)
CRU = CRU.collapsed(['time'], iris.analysis.MEAN)
#Take difference between two datasets
Bias = CRU-CORDEX
#PART 4: PLOT MAP
#load color palette
colourA = mpl_cm.get_cmap('brewer_YlOrRd_09')
#plot map with physical features
ax = plt.axes(projection=cartopy.crs.PlateCarree())
ax.add_feature(cartopy.feature.COASTLINE)
ax.add_feature(cartopy.feature.BORDERS)
ax.add_feature(cartopy.feature.LAKES, alpha=0.5)
ax.add_feature(cartopy.feature.RIVERS)
#set map boundary
ax.set_extent([32.5, 36., -9, -17])
#set axis tick marks
ax.set_xticks([33, 34, 35])
ax.set_yticks([-10, -12, -14, -16])
lon_formatter = LongitudeFormatter(zero_direction_label=True)
lat_formatter = LatitudeFormatter()
ax.xaxis.set_major_formatter(lon_formatter)
ax.yaxis.set_major_formatter(lat_formatter)
#plot data and set colour range
plot = iplt.contourf(CORDEX, cmap=colourA, levels=np.arange(13,32,1), extend='both')
#add colour bar index and a label
plt.colorbar(plot, label='Celsius')
#give map a title
plt.title('Tasmax 1990-2008 - CanRCM4_ERAINT ', fontsize=10)
#save the image of the graph and include full legend
plt.savefig('ERAINT_CCCma_Tasmax_MAP_Annual', bbox_inches='tight')
plt.show()
if __name__ == '__main__':
main()
Would there be a way to plot the borders of the continents with Basemap (or without Basemap, if there is some other way), without those annoying rivers coming along? Especially that piece of Kongo River, not even reaching the ocean, is disturbing.
EDIT: I intend to further plot data over the map, like in the Basemap gallery (and still have the borderlines of the continents drawn as black lines over the data, to give structure for the worldmap) so while the solution by Hooked below is nice, masterful even, it's not applicable for this purpose.
Image produced by:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.fillcontinents(color='gray',lake_color='white')
m.drawcoastlines()
plt.savefig('world.png',dpi=75)
For reasons like this i often avoid Basemap alltogether and read the shapefile in with OGR and convert them to a Matplotlib artist myself. Which is alot more work but also gives alot more flexibility.
Basemap has some very neat features like converting the coordinates of input data to your 'working projection'.
If you want to stick with Basemap, get a shapefile which doesnt contain the rivers. Natural Earth for example has a nice 'Land' shapefile in the physical section (download 'scale rank' data and uncompress). See http://www.naturalearthdata.com/downloads/10m-physical-vectors/
You can read the shapefile in with the m.readshapefile() method from Basemap. This allows you to get the Matplotlib Path vertices and codes in the projection coordinates which you can then convert into a new Path. Its a bit of a detour but it gives you all styling options from Matplotlib, most of which are not directly available via Basemap. Its a bit hackish, but i dont now another way while sticking to Basemap.
So:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.collections import PathCollection
from matplotlib.path import Path
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
# MPL searches for ne_10m_land.shp in the directory 'D:\\ne_10m_land'
m = Basemap(projection='robin',lon_0=0,resolution='c')
shp_info = m.readshapefile('D:\\ne_10m_land', 'scalerank', drawbounds=True)
ax = plt.gca()
ax.cla()
paths = []
for line in shp_info[4]._paths:
paths.append(Path(line.vertices, codes=line.codes))
coll = PathCollection(paths, linewidths=0, facecolors='grey', zorder=2)
m = Basemap(projection='robin',lon_0=0,resolution='c')
# drawing something seems necessary to 'initiate' the map properly
m.drawcoastlines(color='white', zorder=0)
ax = plt.gca()
ax.add_collection(coll)
plt.savefig('world.png',dpi=75)
Gives:
How to remove "annoying" rivers:
If you want to post-process the image (instead of working with Basemap directly) you can remove bodies of water that don't connect to the ocean:
import pylab as plt
A = plt.imread("world.png")
import numpy as np
import scipy.ndimage as nd
import collections
# Get a counter of the greyscale colors
a = A[:,:,0]
colors = collections.Counter(a.ravel())
outside_and_water_color, land_color = colors.most_common(2)
# Find the contigous landmass
land_idx = a == land_color[0]
# Index these land masses
L = np.zeros(a.shape,dtype=int)
L[land_idx] = 1
L,mass_count = nd.measurements.label(L)
# Loop over the land masses and fill the "holes"
# (rivers without outlays)
L2 = np.zeros(a.shape,dtype=int)
L2[land_idx] = 1
L2 = nd.morphology.binary_fill_holes(L2)
# Remap onto original image
new_land = L2==1
A2 = A.copy()
c = [land_color[0],]*3 + [1,]
A2[new_land] = land_color[0]
# Plot results
plt.subplot(221)
plt.imshow(A)
plt.axis('off')
plt.subplot(222)
plt.axis('off')
B = A.copy()
B[land_idx] = [1,0,0,1]
plt.imshow(B)
plt.subplot(223)
L = L.astype(float)
L[L==0] = None
plt.axis('off')
plt.imshow(L)
plt.subplot(224)
plt.axis('off')
plt.imshow(A2)
plt.tight_layout() # Only with newer matplotlib
plt.show()
The first image is the original, the second identifies the land mass. The third is not needed but fun as it ID's each separate contiguous landmass. The fourth picture is what you want, the image with the "rivers" removed.
Following user1868739's example, I am able to select only the paths (for some lakes) that I want:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='white',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84] # want Baikal, but not Tanganyika
# 80 = Superior+Michigan+Huron, 81 = Victoria, 82 = Aral, 83 = Tanganyika,
# 84 = Baikal, 85 = Great Bear, 86 = Great Slave, 87 = Nyasa, 88 = Erie
# 89 = Winnipeg, 90 = Ontario
for ipoly in ipolygons:
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][2] for i in xrange(len(polygon_vertices))]
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
plt.savefig('world2.png',dpi=100)
But this only works when using white background for the continents. If I change color to 'gray' in the following line, we see that other rivers and lakes are not filled with the same color as the continents are. (Also playing with area_thresh will not remove those rivers that are connected to ocean.)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
The version with white background is adequate for further color-plotting all kind of land information over the continents, but a more elaborate solution would be needed, if one wants to retain the gray background for continents.
I frequently modify Basemap's drawcoastlines() to avoid those 'broken' rivers. I also modify drawcountries() for the sake of data source consistency.
Here is what I use in order to support the different resolutions available in Natural Earth data:
from mpl_toolkits.basemap import Basemap
class Basemap(Basemap):
""" Modify Basemap to use Natural Earth data instead of GSHHG data """
def drawcoastlines(self):
shapefile = 'data/naturalearth/coastline/ne_%sm_coastline' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'coastline', linewidth=1.)
def drawcountries(self):
shapefile = 'data/naturalearth/countries/ne_%sm_admin_0_countries' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'countries', linewidth=0.5)
m = Basemap(llcrnrlon=-90, llcrnrlat=-40, urcrnrlon=-30, urcrnrlat=+20,
resolution='l') # resolution = (l)ow | (m)edium | (h)igh
m.drawcoastlines()
m.drawcountries()
Here is the output:
Please note that by default Basemap uses resolution='c' (crude), which is not supported in the code shown.
If you're OK with plotting outlines rather than shapefiles, it's pretty easy to plot coastlines that you can get from wherever. I got my coastlines from the NOAA Coastline Extractor in MATLAB format:
http://www.ngdc.noaa.gov/mgg/shorelines/shorelines.html
To edit the coastlines, I converted to SVG, then edited them with Inkscape, then converted back to the lat/lon text file ("MATLAB" format).
All Python code is included below.
# ---------------------------------------------------------------
def plot_lines(mymap, lons, lats, **kwargs) :
"""Plots a custom coastline. This plots simple lines, not
ArcInfo-style SHAPE files.
Args:
lons: Longitude coordinates for line segments (degrees E)
lats: Latitude coordinates for line segments (degrees N)
Type Info:
len(lons) == len(lats)
A NaN in lons and lats signifies a new line segment.
See:
giss.noaa.drawcoastline_file()
"""
# Project onto the map
x, y = mymap(lons, lats)
# BUG workaround: Basemap projects our NaN's to 1e30.
x[x==1e30] = np.nan
y[y==1e30] = np.nan
# Plot projected line segments.
mymap.plot(x, y, **kwargs)
# Read "Matlab" format files from NOAA Coastline Extractor.
# See: http://www.ngdc.noaa.gov/mgg/coast/
lineRE=re.compile('(.*?)\s+(.*)')
def read_coastline(fname, take_every=1) :
nlines = 0
xdata = array.array('d')
ydata = array.array('d')
for line in file(fname) :
# if (nlines % 10000 == 0) :
# print 'nlines = %d' % (nlines,)
if (nlines % take_every == 0 or line[0:3] == 'nan') :
match = lineRE.match(line)
lon = float(match.group(1))
lat = float(match.group(2))
xdata.append(lon)
ydata.append(lat)
nlines = nlines + 1
return (np.array(xdata),np.array(ydata))
def drawcoastline_file(mymap, fname, **kwargs) :
"""Reads and plots a coastline file.
See:
giss.basemap.drawcoastline()
giss.basemap.read_coastline()
"""
lons, lats = read_coastline(fname, take_every=1)
return drawcoastline(mymap, lons, lats, **kwargs)
# =========================================================
# coastline2svg.py
#
import giss.io.noaa
import os
import numpy as np
import sys
svg_header = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.1"
width="360"
height="180"
id="svg2">
<defs
id="defs4" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
id="layer1">
"""
path_tpl = """
<path
d="%PATH%"
id="%PATH_ID%"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
"""
svg_footer = "</g></svg>"
# Set up paths
data_root = os.path.join(os.environ['HOME'], 'data')
#modelerc = giss.modele.read_modelerc()
#cmrun = modelerc['CMRUNDIR']
#savedisk = modelerc['SAVEDISK']
ifname = sys.argv[1]
ofname = ifname.replace('.dat', '.svg')
lons, lats = giss.io.noaa.read_coastline(ifname, 1)
out = open(ofname, 'w')
out.write(svg_header)
path_id = 1
points = []
for lon, lat in zip(lons, lats) :
if np.isnan(lon) or np.isnan(lat) :
# Process what we have
if len(points) > 2 :
out.write('\n<path d="')
out.write('m %f,%f L' % (points[0][0], points[0][1]))
for pt in points[1:] :
out.write(' %f,%f' % pt)
out.write('"\n id="path%d"\n' % (path_id))
# out.write('style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"')
out.write(' />\n')
path_id += 1
points = []
else :
lon += 180
lat = 180 - (lat + 90)
points.append((lon, lat))
out.write(svg_footer)
out.close()
# =============================================================
# svg2coastline.py
import os
import sys
import re
# Reads the output of Inkscape's "Plain SVG" format, outputs in NOAA MATLAB coastline format
mainRE = re.compile(r'\s*d=".*"')
lineRE = re.compile(r'\s*d="(m|M)\s*(.*?)"')
fname = sys.argv[1]
lons = []
lats = []
for line in open(fname, 'r') :
# Weed out extraneous lines in the SVG file
match = mainRE.match(line)
if match is None :
continue
match = lineRE.match(line)
# Stop if something is wrong
if match is None :
sys.stderr.write(line)
sys.exit(-1)
type = match.group(1)[0]
spairs = match.group(2).split(' ')
x = 0
y = 0
for spair in spairs :
if spair == 'L' :
type = 'M'
continue
(sdelx, sdely) = spair.split(',')
delx = float(sdelx)
dely = float(sdely)
if type == 'm' :
x += delx
y += dely
else :
x = delx
y = dely
lon = x - 180
lat = 90 - y
print '%f\t%f' % (lon, lat)
print 'nan\tnan'
Okay I think I have a partial solution.
The basic idea is that the paths used by drawcoastlines() are ordered by the size/area. Which means the first N paths are (for most applications) the main land masses and lakes and the later paths the smaller islands and rivers.
The issue is that the first N paths that you want will depend on the projection (e.g., global, polar, regional), if area_thresh has been applied and whether you want lakes or small islands etc. In other words, you will have to tweak this per application.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mp = 'cyl'
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
print ipoly
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
Once you know the relevant ipoly to stop drawing (poly_stop) then you can do something like this...
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mproj = ['nplaea','cyl']
mp = mproj[0]
if mp == 'nplaea':
m = Basemap(resolution='c',projection=mp,lon_0=0,boundinglat=30,area_thresh=200000,round=1)
poly_stop = 10
else:
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
poly_stop = 18
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
if ipoly > poly_stop: continue
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
As per my comment to #sampo-smolander
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84]
for ipoly in xrange(len(coasts_paths)):
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
if ipoly in ipolygons:
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
else:
m.plot(px,py,linewidth=0.5,zorder=4,color='grey')
plt.savefig('world2.png',dpi=100)