Matplotlib plots distorted shapes instead of a polygon and a wedge - python

I am trying to add a function to the pycircos module written by ponnhide on https://github.com/ponnhide/pyCircos/blob/master/pycircos/pycircos.py. I tried to add a circular arrow in the circular plot, which is a patch collection from a ring and an arrow. However, when I call the function I put into the pycircos module in my python notebook, I get a very distorted shape, which has nothing to do with the ring (wedge) or the arrow (polygon).
I have integrated the following function from https://github.com/NaysanSaran/markov-chain/blob/master/notebooks/Drawing.ipynb in the following pycircos module:
def draw_self_loop(self, center=(0, 0), radius=500, facecolor='#2693de', edgecolor='#000000', theta1=-30, theta2=240):
# Add the ring
rwidth = 0.02
ring = mpatches.Wedge(center, radius, theta1, theta2, width=rwidth)
# Triangle edges
offset = 0.02
xcent = center[0] - radius + (rwidth/2)
left = [xcent - offset, center[1]]
right = [xcent + offset, center[1]]
bottom = [(left[0]+right[0])/2., center[1]-0.05]
arrow = plt.Polygon([left, right, bottom, left])
p = PatchCollection(
[ring, arrow],
edgecolor = edgecolor,
facecolor = facecolor
)
self.ax.add_collection(p)
In my python notebook, I have called the function from the class object "circle" at the second last line:
%matplotlib inline
import pycircos
import matplotlib.pyplot as plt
import collections
from Bio import SeqIO
Garc = pycircos.Garc
Gcircle = pycircos.Gcircle
arcList_size = []
arcList_name = []
circle = Gcircle(figsize=(8,8))
with open("BIM_data_general.txt") as f:
f.readline()
for line in f:
line = line.rstrip().split(",")
name = line[0]
length = int(line[-1])
arc = Garc(arc_id=name, size=length, interspace=2, raxis_range=(990,1000),
labelposition=50,facecolor="#FFFFFF",edgecolor="#FFFFFF",labelsize=15, label_visible=False)
arcList_size.append(arc.size)
arcList_name.append(arc.arc_id)
circle.add_garc(arc)
circle.set_garcs(0,360)
circle.draw_self_loop()
circle.figure
instead of this example , I get this distorted shape

Related

Define a Voronoi cell max radius (python)

My question is similar to this one (R + ggplot) but for Python.
Question
How to generate a (2D) Voronoi diagram where cells have a maximum extend / growth limit ?
it would result in some cells with curved boundary (circle arc), and some space regions not filled by any cell.
An image processing interpretation of the desired output would be to perform some "AND" mask between the voronoi cells and circles centered on each cell with the required radius.
(For my particular problem, I am working with geolocated data, and for now I am willing to accept discrepancies from most voronoi libs that will use lat/lon as cartesian coordinates. That is another matter...)
This question is NOT about clipping or handling of "inifinite" voronoi cells (see scypi.spatial.Voronoi), so the following links are NOT related :
clipping a voronoi diagram python
How to limit Voronoi cells even when infinite with python?
Vaguely related topics:
Is there a way to vary the rate of Voronoi cell growth?
https://gis.stackexchange.com/questions/366471/weighted-voronoi-polygons-in-r
https://gis.stackexchange.com/questions/17282/create-weighted-thiessen-polygons/17284#17284
https://github.com/HichamZouarhi/Weighted-Voronoi-PyQGIS
Example from the R + ggplot answer:
sample code for tests:
from typing import Tuple, List, Union
import geovoronoi
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
from geographiclib.geodesic import Geodesic
from shapely.geometry import Polygon, Point
from simplekml import LineStyle, Color, PolyStyle, Container
WGS84_Tool = Geodesic.WGS84
T_Color_List = List[Union[clr.Colormap, clr.LinearSegmentedColormap, clr.ListedColormap]]
def generate_n_colors(n, cmap_name='tab20') -> T_Color_List:
"""
https://github.com/WZBSocialScienceCenter/geovoronoi/blob/master/geovoronoi/plotting.py
Get a list of `n` numbers from matplotlib color map `cmap_name`. If `n` is larger than the number of colors in the
color map, the colors will be recycled, i.e. they are not unique in this case.
:param n: number of colors to generate
:param cmap_name: matplotlib color map name
:return: list of `n` colors
"""
pt_region_colormap = plt.get_cmap(cmap_name)
max_i = len(pt_region_colormap.colors)
return [pt_region_colormap(i % max_i) for i in range(n)]
def _plot_cell_and_seed(kml: Container,
name: str,
region_polygon: Polygon,
seed_coords: Tuple[float, float],
_kml_color: str):
_p = kml.newpolygon(name=f"{name} zone",
outerboundaryis=[(lon, lat)
for lat, lon
in region_polygon.exterior.coords], )
_p.style.linestyle = LineStyle(color=Color.darkgrey, width=1.)
_p.style.polystyle = PolyStyle(color=_kml_color)
p = kml.newpoint(coords=[seed_coords],
name=name)
p.style.iconstyle.icon.href = "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
p.style.iconstyle.scale = 0.5
p.style.labelstyle.scale = 0.5
def plot_regions(region_polys,
region_pts,
seeds_coords_list: np.ndarray,
seeds_names: List[str],
kml: Container,
colors: T_Color_List,
):
assert (len(seeds_names) == len(seeds_coords_list))
index = 0
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = seeds_coords_list[_cell_point_indexes][0]
name = seeds_names[_cell_point_indexes[0]]
_kml_airport_coords = (_cell_seed_coords[-1], _cell_seed_coords[0])
_mpl_color = colors[index]
_mpl_hexa_color = clr.to_hex(_mpl_color, keep_alpha=True)
_hexa_color_no_sharp = _mpl_hexa_color.split("#")[-1]
_kml_color = Color.hexa(_hexa_color_no_sharp)
_kml_color = Color.changealphaint(alpha=7 * 255 // 10, gehex=_kml_color)
_plot_cell_and_seed(kml=kml,
name=name,
region_polygon=region_polygon,
seed_coords=_kml_airport_coords,
_kml_color=_kml_color)
index += 1
# bounding box for geovoronoi
geo_boundaries = {"min": {"lat": +30, "lon": -12},
"max": {"lat": +75, "lon": +35}, }
# a list of [[lat,lon],[lat,lon],[lat,lon],[lat,lon],]
n = 150
seeds_coords_list = np.dstack(
[np.random.uniform(low=geo_boundaries["min"]["lat"], high=geo_boundaries["max"]["lat"], size=n),
np.random.uniform(low=geo_boundaries["min"]["lon"], high=geo_boundaries["max"]["lon"], size=n), ]).reshape((n, 2))
seeds_names = [f"{lat:+_.2f};{lon:+_.2f}" for lat, lon in seeds_coords_list]
boundary_points = geovoronoi.coords_to_points([[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]],
[geo_boundaries["min"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["min"]["lon"]],
# last necessary ?
[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]]])
boundary_polygon = Polygon(boundary_points)
# beware that geodesics and geovoronoi may not be accurate since it uses planar cartesian formulas...
region_polys, region_pts = geovoronoi.voronoi_regions_from_coords(seeds_coords_list, boundary_polygon)
# DO SOMETHING HERE
#...
#...
#...
kdoc = Kml()
p_kml = Path.cwd() / "voronoi_so.kml"
colors: T_Color_List = generate_n_colors(len(region_polys))
plot_regions(region_polys=region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="raw regions"),
colors=colors)
print("save KML")
kdoc.save(p_kml.as_posix())
print(p_kml.as_uri())
After some thinking, here is an approach using Shapely and using the intersection between computed circles (360 vertices) and each voronoi cell.
Performance is not convincing since we create 360 points and a polygon + 1 intersection computation for each cell... (at least it is a bounded ...).
However, the underlying voronoi computation data such as cells adjacency is lost, and we can't know if some cells are isolated after this transformation. Maybe some ideas here: Determining and storing Voronoi Cell Adjacency
I do believe better solutions may exist.
So here is a solution, with random geographical data, and corresponding kml rendering.
EDIT: using weighted voronoi diagrams, a possible weight function could be f(d) = d <= radius ? I could not explore this for now...
Raw regions:
Modified raw regions:
from pathlib import Path
from typing import Tuple, List, Union
import geovoronoi
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
from geographiclib.geodesic import Geodesic
from shapely.geometry import Polygon, Point
from simplekml import LineStyle, Kml, Color, PolyStyle, Container
WGS84_Tool = Geodesic.WGS84
T_Color_List = List[Union[clr.Colormap, clr.LinearSegmentedColormap, clr.ListedColormap]]
def generate_n_colors(n, cmap_name='tab20') -> T_Color_List:
"""
https://github.com/WZBSocialScienceCenter/geovoronoi/blob/master/geovoronoi/plotting.py
Get a list of `n` numbers from matplotlib color map `cmap_name`. If `n` is larger than the number of colors in the
color map, the colors will be recycled, i.e. they are not unique in this case.
:param n: number of colors to generate
:param cmap_name: matplotlib color map name
:return: list of `n` colors
"""
pt_region_colormap = plt.get_cmap(cmap_name)
max_i = len(pt_region_colormap.colors)
return [pt_region_colormap(i % max_i) for i in range(n)]
def _create_bounded_regions(region_polys,
region_pts,
distance_criteria_m: float,
cell_seed_coords_list: np.ndarray,
nb_vertices:int=36):
new_polygons = {}
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = cell_seed_coords_list[_cell_point_indexes][0]
_arpt_lat = _cell_seed_coords[0]
_arpt_lon = _cell_seed_coords[-1]
cycle_vertices = []
for a in np.linspace(0,359,nb_vertices):
p = WGS84_Tool.Direct(lat1=_arpt_lat, lon1=_arpt_lon, azi1=a, s12=distance_criteria_m)
_point = Point(p["lat2"], p["lon2"])
cycle_vertices.append(_point)
circle = Polygon(cycle_vertices)
new_polygons[region_id] = region_polygon.intersection(circle)
return new_polygons
def _plot_cell_and_seed(kml: Container,
name: str,
region_polygon: Polygon,
seed_coords: Tuple[float, float],
_kml_color: str):
_p = kml.newpolygon(name=f"{name} zone",
outerboundaryis=[(lon, lat)
for lat, lon
in region_polygon.exterior.coords], )
_p.style.linestyle = LineStyle(color=Color.darkgrey, width=1.)
_p.style.polystyle = PolyStyle(color=_kml_color)
p = kml.newpoint(coords=[seed_coords],
name=name)
p.style.iconstyle.icon.href = "http://maps.google.com/mapfiles/kml/shapes/placemark_circle.png"
p.style.iconstyle.scale = 0.5
p.style.labelstyle.scale = 0.5
def plot_regions(region_polys,
region_pts,
seeds_coords_list: np.ndarray,
seeds_names: List[str],
kml: Container,
colors: T_Color_List,
):
assert (len(seeds_names) == len(seeds_coords_list))
index = 0
for region_id, region_polygon in region_polys.items():
_cell_point_indexes = region_pts[region_id]
_cell_seed_coords = seeds_coords_list[_cell_point_indexes][0]
name = seeds_names[_cell_point_indexes[0]]
_kml_airport_coords = (_cell_seed_coords[-1], _cell_seed_coords[0])
_mpl_color = colors[index]
_mpl_hexa_color = clr.to_hex(_mpl_color, keep_alpha=True)
_hexa_color_no_sharp = _mpl_hexa_color.split("#")[-1]
_kml_color = Color.hexa(_hexa_color_no_sharp)
_kml_color = Color.changealphaint(alpha=7 * 255 // 10, gehex=_kml_color)
_plot_cell_and_seed(kml=kml,
name=name,
region_polygon=region_polygon,
seed_coords=_kml_airport_coords,
_kml_color=_kml_color)
index += 1
# bounding box for geovoronoi
geo_boundaries = {"min": {"lat": +30, "lon": -12},
"max": {"lat": +75, "lon": +35}, }
# a list of [[lat,lon],[lat,lon],[lat,lon],[lat,lon],]
n = 150
seeds_coords_list = np.dstack(
[np.random.uniform(low=geo_boundaries["min"]["lat"], high=geo_boundaries["max"]["lat"], size=n),
np.random.uniform(low=geo_boundaries["min"]["lon"], high=geo_boundaries["max"]["lon"], size=n), ]).reshape((n, 2))
seeds_names = [f"{lat:+_.2f};{lon:+_.2f}" for lat, lon in seeds_coords_list]
boundary_points = geovoronoi.coords_to_points([[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]],
[geo_boundaries["min"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["max"]["lon"]],
[geo_boundaries["max"]["lat"], geo_boundaries["min"]["lon"]],
# last necessary ?
[geo_boundaries["min"]["lat"], geo_boundaries["min"]["lon"]]])
boundary_polygon = Polygon(boundary_points)
# beware that geodesics and geovoronoi may not be accurate since it uses planar cartesian formulas...
region_polys, region_pts = geovoronoi.voronoi_regions_from_coords(seeds_coords_list, boundary_polygon)
new_region_polys = _create_bounded_regions(region_polys=region_polys,
region_pts=region_pts,
distance_criteria_m=300_000.,
cell_seed_coords_list=seeds_coords_list, )
kdoc = Kml()
p_kml = Path.cwd() / "voronoi_so.kml"
colors: T_Color_List = generate_n_colors(len(region_polys))
plot_regions(region_polys=region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="raw regions"),
colors=colors)
plot_regions(region_polys=new_region_polys,
region_pts=region_pts,
seeds_coords_list=seeds_coords_list,
seeds_names=seeds_names,
kml=kdoc.newfolder(name="new_regions (range)"),
colors=colors)
print("save KML")
kdoc.save(p_kml.as_posix())
print(p_kml.as_uri())

how to change the color of subplot datafile in matplotlib

In Short:
I want to change the color of blue marker in the graph. So that I can do comparison with other plots easily.
You can download the data files and script from this link
Problem Explanation
I have two data files, full.dat and part.dat(Note: part.dat is also there in full.dat).
I got the plotting scripts from the internet, and it is working very well. But as a noob in Python and Matplotlib, I am facing difficulties in changing the color of part.dat.
Please see the graph first, then the following scripts.
Script-1: Function and definitions: let's say: "func.py"
# This was written by Levi Lentz for the Kolpak Group at MIT
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
import sys
#This function extracts the high symmetry points from the output of bandx.out
def Symmetries(fstring):
f = open(fstring,'r')
x = np.zeros(0)
for i in f:
if "high-symmetry" in i:
x = np.append(x,float(i.split()[-1]))
f.close()
return x
# This function takes in the datafile, the fermi energy, the symmetry file, a subplot, and the label
# It then extracts the band data, and plots the bands, the fermi energy in red, and the high symmetry points
def bndplot(datafile_full,datafile,fermi,symmetryfile,subplot,**kwargs):
if 'shift_fermi' in kwargs:
bool_shift_efermi = kwargs['shift_fermi']
else:
bool_shift_efermi = 0
if 'color' in kwargs:
color_bnd=kwargs['color']
else:
color_bnd='black'
if 'linestyle' in kwargs:
line_bnd=kwargs['linestyle']
else:
line_bnd='solid'
z = np.loadtxt(datafile_full) #This loads the full.dat file
x = np.unique(z[:,0]) #This is all the unique x-points
[a,b,w]=np.loadtxt(datafile,unpack=True) #Weight
bands = []
bndl = len(z[z[:,0]==x[1]]) #This gives the number of bands in the calculation
Fermi = float(fermi)
if bool_shift_efermi:
fermi_shift=Fermi
else:
fermi_shift=0
axis = [min(x),max(x)]
for i in range(0,bndl):
bands.append(np.zeros([len(x),2])) #This is where we storre the bands
for i in range(0,len(x)):
sel = z[z[:,0] == x[i]] #Here is the energies for a given x
test = []
for j in range(0,bndl): #This separates it out into a single band
bands[j][i][0] = x[i]
#bands[j][i][1] = np.multiply(sel[j][1],13.605698066)
bands[j][i][1] = sel[j][1]
#Here we plots the bands
for i in bands:
subplot.plot(i[:,0],i[:,1]-fermi_shift,color=color_bnd,linestyle=line_bnd, linewidth=0.7,alpha=0.5)
# plt.scatter(a,b-fermi_shift,c=w,cmap='viridis',alpha=0.5)
# plt.colorbar()
if 'legend' in kwargs:
#empty plot to generate legend
subplot.plot([None],[None],color=color_bnd,linestyle=line_bnd,label=kwargs['legend'])
temp = Symmetries(symmetryfile)
for j in temp: #This is the high symmetry lines
x1 = [j,j]
subplot.axvline(x=j,linestyle='dashed',color='black',alpha=0.75)
subplot.plot([min(x),max(x)],[Fermi-fermi_shift,Fermi-fermi_shift],color='red',linestyle='dotted')
subplot.set_xticks(temp)
subplot.set_xticklabels([])
if 'name_k_points' in kwargs:
if len(kwargs['name_k_points'])==len(temp):
subplot.set_xticklabels(kwargs['name_k_points'])
if 'range' in kwargs:
range_plot=kwargs['range']
subplot.set_ylim([range_plot[0],range_plot[1]])
subplot.set_xlim([axis[0],axis[1]])
subplot.set_xlabel('k')
subplot.set_ylabel('E-E$_f$')
plt.scatter(a,b-fermi_shift,s=70*np.array(w))
if 'legend' in kwargs:
plt.legend()
script-2 Plotting script: let's say: "plot.py"
#!/usr/bin/python3
from func import *
El='el'
orb='orb'
plt.rcParams["figure.figsize"]=(4,15)
datafile_full='bands.dat.gnu'
#datafile=El+'_'+orb+'.dat.all'
datafile=El+'_'+orb+'.dat.all'
fermi = 10.2382
symmetryfile='band.out'
bool_shift_efermi= True
fig, ax = plt.subplots()
#bndplot(datafile,fermi,symmetryfile,ax)
bndplot(datafile_full,datafile,fermi,symmetryfile,ax,shift_fermi=1,color='black',linestyle='solid',name_k_points=['K','G','M','K','H','A','L','H'], legend=El+', '+orb+'-orbital')
#ax.set_ylim(-5,5)
ax.set_ylim(-10,12)
fig.set_figheight(6)
fig.set_figwidth(4)
plt.rcParams.update({'font.size': 22})
fig.savefig("el-orb.eps")
plt.show()
In script-2, there is an option to change the color, however I want to change the color of blue marker/solid-circles(please see the graph) so that I can compare with other graphs.
Whenever I change the color, it changes the line color only.
Please help me out I am trying to understand Matplotlib uses and examples from past few hrs However as a noob I was not able to figure out how to do.

'Jagged' output from dot file

I have been trying to insert a .dot graph from an sklearn decision tree into a pyplot subplot, and have been struggling to do so. The library pygraphviz is refusing to work on my Windows system, so I've used the following to insert the images:
from subprocess import check_call
from PIL import Image
import matplotlib.pyplot as plt
gridSpec = fig.add_gridspec(3, 2)
treeSubPlot = plt.subplot(gridSpec.new_subplotspec((2, 1)))
treeSubPlot.tick_params(axis = "both", which = "both", bottom = False, top = False, labelbottom = False, right = False, left = False, labelleft = False)
treeSubPlot.set_xlabel("Final model")
# Obtain the size of a single plot in inches to inform the dot picture creator.
height = treeSubPlot.get_window_extent().y1 - treeSubPlot.get_window_extent().y0
width = treeSubPlot.get_window_extent().x1 - treeSubPlot.get_window_extent().x0
check_call(['dot','-Tpng', "-Gsize=" + str(width/100) + "," + str(height/100) + "!", "-Gdpi=100", "-Gratio=fill", 'random_tree.dot','-o','random.png'])
randImg = Image.open("random.png")
treeSubplot.imshow(randImg, aspect = "auto")
With the .dot file:
digraph Tree {
node [shape=box] ;
0 [label="B field <= 332.72\nsamples = 19\nvalue = 0.41"] ;
1 [label="samples = 11\nvalue = 0.67"] ;
0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
2 [label="B field <= 576.73\nsamples = 8\nvalue = 0.04"] ;
0 -> 2 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
3 [label="samples = 4\nvalue = 0.05"] ;
2 -> 3 ;
4 [label="samples = 4\nvalue = 0.03"] ;
2 -> 4 ;
}
What results is the plot, but it looks extremely 'jagged', for lack of a better term:
Are there any settings I can use to stop this? I'm trying to make the image to fit the subplot perfectly but it's still jagged.
There are several issues.
The image produced by dot is not actually the exact size you specify in -Gsize={},{}! parameter.
There are rounding errors, because the matplotlib axes is positioned in relative figure coordinates and those later do not precisely match the integer positions of pixels.
Finally, the axes to show the image in is extremely small, so the above effects become even more pronounced.
A solution one can come up with is to find the approximate size of the axes, call the dot program and retrieve the image from it. Then set the actual size of the axes according to the actual image size (this might shift the axes by a few pixels compared to their original position).
In the code below I took a bigger axes. Also note that a higher dpi or figure size will increase the readability.
import numpy as np
from subprocess import check_call
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.transforms as mtrans
dpi = 100
fig = plt.figure(dpi=dpi)
gridSpec = fig.add_gridspec(3, 2)
ax = plt.subplot(gridSpec.new_subplotspec((2, 1)))
#ax = fig.add_subplot(111)
ax.tick_params(axis = "both", which = "both", bottom = False, top = False,
labelbottom = False, right = False, left = False, labelleft = False)
ax.set_xlabel("Final model")
# calculate nominal size supplied to `dot`
ext = ax.get_position().transformed(fig.transFigure).extents
bbox = mtrans.Bbox.from_extents(np.array(ext).astype(int))
height = bbox.height
width = bbox.width
# get image from dot
check_call(['dot','-Tpng', "-Gsize={},{}!".format(width/100, height/100),
"-Gdpi={}".format(100), "-Gratio=fill", 'random_tree.dot','-o','random.png'])
randImg = np.array(Image.open("random.png"))
# get ACTUAL size from produced image
aheight, awidth, _ = randImg.shape
# create new bounding box from actual size
abbox = mtrans.Bbox.from_bounds(bbox.x0, bbox.y0, awidth, aheight)
# transform bbox to figure coordintes
abboxf = bbox.transformed(fig.transFigure.inverted())
# Use the thus created bbox to modify the position of the axes
ax.set_position(abboxf)
# finally plot the data
ax.imshow(randImg, aspect = "auto")
# save image (this should now be correct)
plt.savefig("randomdot_mpl.png")
# show image (this might still be slightly wrong,
# if the GUI changes the figure size on the fly.)
plt.show()
As you can see the result is still not perfect (e.g the l in "values" is slightly thicker). In general, a much better solution might be to not place the image inside the axes at all, but to use a figimage, which is a non-resampled image placed inside the figure. If you still want to show the axes around it, you need to make the axes transparent to "see-through".
This would be accomplished by the following code
import numpy as np
from subprocess import check_call
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.transforms as mtrans
dpi = 100
fig = plt.figure(dpi=dpi)
#gridSpec = fig.add_gridspec(3, 2)
#ax = plt.subplot(gridSpec.new_subplotspec((2, 1)))
ax = fig.add_subplot(111)
ax.tick_params(axis = "both", which = "both", bottom = False, top = False,
labelbottom = False, right = False, left = False, labelleft = False)
ax.patch.set_visible(False)
ax.set_xlabel("Final model")
# calculate nominal size supplied to `dot`
ext = ax.get_position().transformed(fig.transFigure).extents
bbox = mtrans.Bbox.from_extents(np.array(ext).astype(int))
height = bbox.height
width = bbox.width
# get image from dot
check_call(['dot','-Tpng', "-Gsize={},{}!".format(width/100, height/100),
"-Gdpi={}".format(100), "-Gratio=fill", 'random_tree.dot','-o','random.png'])
randImg = np.array(Image.open("random.png"))
# produce a figimage, i.e. a non-resampled image
fig.figimage(randImg, xo=bbox.x0, yo=bbox.y0)
# save image (this should now be perfect)
plt.savefig("randomdot_mpl.png")
# show image (this should now be perfect)
plt.show()

world map without rivers with matplotlib / Basemap?

Would there be a way to plot the borders of the continents with Basemap (or without Basemap, if there is some other way), without those annoying rivers coming along? Especially that piece of Kongo River, not even reaching the ocean, is disturbing.
EDIT: I intend to further plot data over the map, like in the Basemap gallery (and still have the borderlines of the continents drawn as black lines over the data, to give structure for the worldmap) so while the solution by Hooked below is nice, masterful even, it's not applicable for this purpose.
Image produced by:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(projection='robin',lon_0=0,resolution='c')
m.fillcontinents(color='gray',lake_color='white')
m.drawcoastlines()
plt.savefig('world.png',dpi=75)
For reasons like this i often avoid Basemap alltogether and read the shapefile in with OGR and convert them to a Matplotlib artist myself. Which is alot more work but also gives alot more flexibility.
Basemap has some very neat features like converting the coordinates of input data to your 'working projection'.
If you want to stick with Basemap, get a shapefile which doesnt contain the rivers. Natural Earth for example has a nice 'Land' shapefile in the physical section (download 'scale rank' data and uncompress). See http://www.naturalearthdata.com/downloads/10m-physical-vectors/
You can read the shapefile in with the m.readshapefile() method from Basemap. This allows you to get the Matplotlib Path vertices and codes in the projection coordinates which you can then convert into a new Path. Its a bit of a detour but it gives you all styling options from Matplotlib, most of which are not directly available via Basemap. Its a bit hackish, but i dont now another way while sticking to Basemap.
So:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
from matplotlib.collections import PathCollection
from matplotlib.path import Path
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
# MPL searches for ne_10m_land.shp in the directory 'D:\\ne_10m_land'
m = Basemap(projection='robin',lon_0=0,resolution='c')
shp_info = m.readshapefile('D:\\ne_10m_land', 'scalerank', drawbounds=True)
ax = plt.gca()
ax.cla()
paths = []
for line in shp_info[4]._paths:
paths.append(Path(line.vertices, codes=line.codes))
coll = PathCollection(paths, linewidths=0, facecolors='grey', zorder=2)
m = Basemap(projection='robin',lon_0=0,resolution='c')
# drawing something seems necessary to 'initiate' the map properly
m.drawcoastlines(color='white', zorder=0)
ax = plt.gca()
ax.add_collection(coll)
plt.savefig('world.png',dpi=75)
Gives:
How to remove "annoying" rivers:
If you want to post-process the image (instead of working with Basemap directly) you can remove bodies of water that don't connect to the ocean:
import pylab as plt
A = plt.imread("world.png")
import numpy as np
import scipy.ndimage as nd
import collections
# Get a counter of the greyscale colors
a = A[:,:,0]
colors = collections.Counter(a.ravel())
outside_and_water_color, land_color = colors.most_common(2)
# Find the contigous landmass
land_idx = a == land_color[0]
# Index these land masses
L = np.zeros(a.shape,dtype=int)
L[land_idx] = 1
L,mass_count = nd.measurements.label(L)
# Loop over the land masses and fill the "holes"
# (rivers without outlays)
L2 = np.zeros(a.shape,dtype=int)
L2[land_idx] = 1
L2 = nd.morphology.binary_fill_holes(L2)
# Remap onto original image
new_land = L2==1
A2 = A.copy()
c = [land_color[0],]*3 + [1,]
A2[new_land] = land_color[0]
# Plot results
plt.subplot(221)
plt.imshow(A)
plt.axis('off')
plt.subplot(222)
plt.axis('off')
B = A.copy()
B[land_idx] = [1,0,0,1]
plt.imshow(B)
plt.subplot(223)
L = L.astype(float)
L[L==0] = None
plt.axis('off')
plt.imshow(L)
plt.subplot(224)
plt.axis('off')
plt.imshow(A2)
plt.tight_layout() # Only with newer matplotlib
plt.show()
The first image is the original, the second identifies the land mass. The third is not needed but fun as it ID's each separate contiguous landmass. The fourth picture is what you want, the image with the "rivers" removed.
Following user1868739's example, I am able to select only the paths (for some lakes) that I want:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='white',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84] # want Baikal, but not Tanganyika
# 80 = Superior+Michigan+Huron, 81 = Victoria, 82 = Aral, 83 = Tanganyika,
# 84 = Baikal, 85 = Great Bear, 86 = Great Slave, 87 = Nyasa, 88 = Erie
# 89 = Winnipeg, 90 = Ontario
for ipoly in ipolygons:
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][2] for i in xrange(len(polygon_vertices))]
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
plt.savefig('world2.png',dpi=100)
But this only works when using white background for the continents. If I change color to 'gray' in the following line, we see that other rivers and lakes are not filled with the same color as the continents are. (Also playing with area_thresh will not remove those rivers that are connected to ocean.)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
The version with white background is adequate for further color-plotting all kind of land information over the continents, but a more elaborate solution would be needed, if one wants to retain the gray background for continents.
I frequently modify Basemap's drawcoastlines() to avoid those 'broken' rivers. I also modify drawcountries() for the sake of data source consistency.
Here is what I use in order to support the different resolutions available in Natural Earth data:
from mpl_toolkits.basemap import Basemap
class Basemap(Basemap):
""" Modify Basemap to use Natural Earth data instead of GSHHG data """
def drawcoastlines(self):
shapefile = 'data/naturalearth/coastline/ne_%sm_coastline' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'coastline', linewidth=1.)
def drawcountries(self):
shapefile = 'data/naturalearth/countries/ne_%sm_admin_0_countries' % \
{'l':110, 'm':50, 'h':10}[self.resolution]
self.readshapefile(shapefile, 'countries', linewidth=0.5)
m = Basemap(llcrnrlon=-90, llcrnrlat=-40, urcrnrlon=-30, urcrnrlat=+20,
resolution='l') # resolution = (l)ow | (m)edium | (h)igh
m.drawcoastlines()
m.drawcountries()
Here is the output:
Please note that by default Basemap uses resolution='c' (crude), which is not supported in the code shown.
If you're OK with plotting outlines rather than shapefiles, it's pretty easy to plot coastlines that you can get from wherever. I got my coastlines from the NOAA Coastline Extractor in MATLAB format:
http://www.ngdc.noaa.gov/mgg/shorelines/shorelines.html
To edit the coastlines, I converted to SVG, then edited them with Inkscape, then converted back to the lat/lon text file ("MATLAB" format).
All Python code is included below.
# ---------------------------------------------------------------
def plot_lines(mymap, lons, lats, **kwargs) :
"""Plots a custom coastline. This plots simple lines, not
ArcInfo-style SHAPE files.
Args:
lons: Longitude coordinates for line segments (degrees E)
lats: Latitude coordinates for line segments (degrees N)
Type Info:
len(lons) == len(lats)
A NaN in lons and lats signifies a new line segment.
See:
giss.noaa.drawcoastline_file()
"""
# Project onto the map
x, y = mymap(lons, lats)
# BUG workaround: Basemap projects our NaN's to 1e30.
x[x==1e30] = np.nan
y[y==1e30] = np.nan
# Plot projected line segments.
mymap.plot(x, y, **kwargs)
# Read "Matlab" format files from NOAA Coastline Extractor.
# See: http://www.ngdc.noaa.gov/mgg/coast/
lineRE=re.compile('(.*?)\s+(.*)')
def read_coastline(fname, take_every=1) :
nlines = 0
xdata = array.array('d')
ydata = array.array('d')
for line in file(fname) :
# if (nlines % 10000 == 0) :
# print 'nlines = %d' % (nlines,)
if (nlines % take_every == 0 or line[0:3] == 'nan') :
match = lineRE.match(line)
lon = float(match.group(1))
lat = float(match.group(2))
xdata.append(lon)
ydata.append(lat)
nlines = nlines + 1
return (np.array(xdata),np.array(ydata))
def drawcoastline_file(mymap, fname, **kwargs) :
"""Reads and plots a coastline file.
See:
giss.basemap.drawcoastline()
giss.basemap.read_coastline()
"""
lons, lats = read_coastline(fname, take_every=1)
return drawcoastline(mymap, lons, lats, **kwargs)
# =========================================================
# coastline2svg.py
#
import giss.io.noaa
import os
import numpy as np
import sys
svg_header = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
version="1.1"
width="360"
height="180"
id="svg2">
<defs
id="defs4" />
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title></dc:title>
</cc:Work>
</rdf:RDF>
</metadata>
<g
id="layer1">
"""
path_tpl = """
<path
d="%PATH%"
id="%PATH_ID%"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
"""
svg_footer = "</g></svg>"
# Set up paths
data_root = os.path.join(os.environ['HOME'], 'data')
#modelerc = giss.modele.read_modelerc()
#cmrun = modelerc['CMRUNDIR']
#savedisk = modelerc['SAVEDISK']
ifname = sys.argv[1]
ofname = ifname.replace('.dat', '.svg')
lons, lats = giss.io.noaa.read_coastline(ifname, 1)
out = open(ofname, 'w')
out.write(svg_header)
path_id = 1
points = []
for lon, lat in zip(lons, lats) :
if np.isnan(lon) or np.isnan(lat) :
# Process what we have
if len(points) > 2 :
out.write('\n<path d="')
out.write('m %f,%f L' % (points[0][0], points[0][1]))
for pt in points[1:] :
out.write(' %f,%f' % pt)
out.write('"\n id="path%d"\n' % (path_id))
# out.write('style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"')
out.write(' />\n')
path_id += 1
points = []
else :
lon += 180
lat = 180 - (lat + 90)
points.append((lon, lat))
out.write(svg_footer)
out.close()
# =============================================================
# svg2coastline.py
import os
import sys
import re
# Reads the output of Inkscape's "Plain SVG" format, outputs in NOAA MATLAB coastline format
mainRE = re.compile(r'\s*d=".*"')
lineRE = re.compile(r'\s*d="(m|M)\s*(.*?)"')
fname = sys.argv[1]
lons = []
lats = []
for line in open(fname, 'r') :
# Weed out extraneous lines in the SVG file
match = mainRE.match(line)
if match is None :
continue
match = lineRE.match(line)
# Stop if something is wrong
if match is None :
sys.stderr.write(line)
sys.exit(-1)
type = match.group(1)[0]
spairs = match.group(2).split(' ')
x = 0
y = 0
for spair in spairs :
if spair == 'L' :
type = 'M'
continue
(sdelx, sdely) = spair.split(',')
delx = float(sdelx)
dely = float(sdely)
if type == 'm' :
x += delx
y += dely
else :
x = delx
y = dely
lon = x - 180
lat = 90 - y
print '%f\t%f' % (lon, lat)
print 'nan\tnan'
Okay I think I have a partial solution.
The basic idea is that the paths used by drawcoastlines() are ordered by the size/area. Which means the first N paths are (for most applications) the main land masses and lakes and the later paths the smaller islands and rivers.
The issue is that the first N paths that you want will depend on the projection (e.g., global, polar, regional), if area_thresh has been applied and whether you want lakes or small islands etc. In other words, you will have to tweak this per application.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mp = 'cyl'
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
print ipoly
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
Once you know the relevant ipoly to stop drawing (poly_stop) then you can do something like this...
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
mproj = ['nplaea','cyl']
mp = mproj[0]
if mp == 'nplaea':
m = Basemap(resolution='c',projection=mp,lon_0=0,boundinglat=30,area_thresh=200000,round=1)
poly_stop = 10
else:
m = Basemap(resolution='c',projection=mp,lon_0=0,area_thresh=200000)
poly_stop = 18
fill_color = '0.9'
# If you don't want lakes set lake_color to fill_color
m.fillcontinents(color=fill_color,lake_color='white')
# Draw the coastlines, with a thin line and same color as the continent fill.
coasts = m.drawcoastlines(zorder=100,color=fill_color,linewidth=0.5)
# Exact the paths from coasts
coasts_paths = coasts.get_paths()
# In order to see which paths you want to retain or discard you'll need to plot them one
# at a time noting those that you want etc.
for ipoly in xrange(len(coasts_paths)):
if ipoly > poly_stop: continue
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
m.plot(px,py,'k-',linewidth=1)
plt.show()
As per my comment to #sampo-smolander
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 4.5))
plt.subplots_adjust(left=0.02, right=0.98, top=0.98, bottom=0.00)
m = Basemap(resolution='c',projection='robin',lon_0=0)
m.fillcontinents(color='gray',lake_color='white',zorder=2)
coasts = m.drawcoastlines(zorder=1,color='white',linewidth=0)
coasts_paths = coasts.get_paths()
ipolygons = range(83) + [84]
for ipoly in xrange(len(coasts_paths)):
r = coasts_paths[ipoly]
# Convert into lon/lat vertices
polygon_vertices = [(vertex[0],vertex[1]) for (vertex,code) in
r.iter_segments(simplify=False)]
px = [polygon_vertices[i][0] for i in xrange(len(polygon_vertices))]
py = [polygon_vertices[i][1] for i in xrange(len(polygon_vertices))]
if ipoly in ipolygons:
m.plot(px,py,linewidth=0.5,zorder=3,color='black')
else:
m.plot(px,py,linewidth=0.5,zorder=4,color='grey')
plt.savefig('world2.png',dpi=100)

Shape recognition with numpy/scipy (perhaps watershed)

My goal is to trace drawings that have a lot of separate shapes in them and to split these shapes into individual images. It is black on white. I'm quite new to numpy,opencv&co - but here is my current thought:
scan for black pixels
black pixel found -> watershed
find watershed boundary (as polygon path)
continue searching, but ignore points within the already found boundaries
I'm not very good at these kind of things, is there a better way?
First I tried to find the rectangular bounding box of the watershed results (this is more or less a collage of examples):
from numpy import *
import numpy as np
from scipy import ndimage
np.set_printoptions(threshold=np.nan)
a = np.zeros((512, 512)).astype(np.uint8) #unsigned integer type needed by watershed
y, x = np.ogrid[0:512, 0:512]
m1 = ((y-200)**2 + (x-100)**2 < 30**2)
m2 = ((y-350)**2 + (x-400)**2 < 20**2)
m3 = ((y-260)**2 + (x-200)**2 < 20**2)
a[m1+m2+m3]=1
markers = np.zeros_like(a).astype(int16)
markers[0, 0] = 1
markers[200, 100] = 2
markers[350, 400] = 3
markers[260, 200] = 4
res = ndimage.watershed_ift(a.astype(uint8), markers)
unique(res)
B = argwhere(res.astype(uint8))
(ystart, xstart), (ystop, xstop) = B.min(0), B.max(0) + 1
tr = a[ystart:ystop, xstart:xstop]
print tr
Somehow, when I use the original array (a) then argwhere seems to work, but after the watershed (res) it just outputs the complete array again.
The next step could be to find the polygon path around the shape, but the bounding box would be great for now!
Please help!
#Hooked has already answered most of your question, but I was in the middle of writing this up when he answered, so I'll post it in the hopes that it's still useful...
You're trying to jump through a few too many hoops. You don't need watershed_ift.
You use scipy.ndimage.label to differentiate separate objects in a boolean array and scipy.ndimage.find_objects to find the bounding box of each object.
Let's break things down a bit.
import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt
def draw_circle(grid, x0, y0, radius):
ny, nx = grid.shape
y, x = np.ogrid[:ny, :nx]
dist = np.hypot(x - x0, y - y0)
grid[dist < radius] = True
return grid
# Generate 3 circles...
a = np.zeros((512, 512), dtype=np.bool)
draw_circle(a, 100, 200, 30)
draw_circle(a, 400, 350, 20)
draw_circle(a, 200, 260, 20)
# Label the objects in the array.
labels, numobjects = ndimage.label(a)
# Now find their bounding boxes (This will be a tuple of slice objects)
# You can use each one to directly index your data.
# E.g. a[slices[0]] gives you the original data within the bounding box of the
# first object.
slices = ndimage.find_objects(labels)
#-- Plotting... -------------------------------------
fig, ax = plt.subplots()
ax.imshow(a)
ax.set_title('Original Data')
fig, ax = plt.subplots()
ax.imshow(labels)
ax.set_title('Labeled objects')
fig, axes = plt.subplots(ncols=numobjects)
for ax, sli in zip(axes.flat, slices):
ax.imshow(labels[sli], vmin=0, vmax=numobjects)
tpl = 'BBox:\nymin:{0.start}, ymax:{0.stop}\nxmin:{1.start}, xmax:{1.stop}'
ax.set_title(tpl.format(*sli))
fig.suptitle('Individual Objects')
plt.show()
Hopefully that makes it a bit clearer how to find the bounding boxes of the objects.
Use the ndimage library from scipy. The function label places a unique tag on each block of pixels that are within a threshold. This identifies the unique clusters (shapes). Starting with your definition of a:
from scipy import ndimage
image_threshold = .5
label_array, n_features = ndimage.label(a>image_threshold)
# Plot the resulting shapes
import pylab as plt
plt.subplot(121)
plt.imshow(a)
plt.subplot(122)
plt.imshow(label_array)
plt.show()

Categories