Shapely Buffering, not working as expected - python

Why does buffering one of my geometries have an unexpected hole in it?
from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
assert l.is_valid
assert l.is_simple
GeoDataFrame({'geometry': [
By removing a pair of coordinates, it doesn't have a hole.
When using Sedona's ST_Buffer this happened in more cases.

from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
l = l.simplify(tolerance=1e-6)
buffered_geometry = l.buffer(80, cap_style=3, join_style=2)
GeoDataFrame({'geometry': [l, buffered_geometry]}).plot(column='geometry')
which is basically what you did.


Convert geometry from bounds to Polygon

I have a geopandas DataFrame with bounds geometry.
import pandas as pd
import geopandas as gpd
gdf = gpd.GeoDataFrame({
'id': [0, 1],
'b': [((40.6494140625, -86.7919921875), (40.69335937...)),
((39.55078125, -93.8232421875), (39.5947265625...))]
Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))
<class 'geolib.geohash.Bounds'>
How do I turn Bounds into Polygon geometry type? Like,
Polygon((40.6494140625, -86.7919921875), (40.69335937...))
Suppose you have:-
# bound1 = The geohash.Bounds object.
you can proceed with:-
from shapely.geometry import box
bounds_pgon = box(bounds1.sw.lon,,,
# Check the result
the output will be similar to this:-
'POLYGON ((-27.9986 70.2987, -27.9986 70.3001, -28.0000 70.3001, -28.0000 70.2987, -27.9986 70.2987))'
This is really the same answer that #swatchi has provided.
Shape of the geometry is defined by the precision of the hash. See reference: geohash
import geolib.geohash
import shapely.geometry
import geopandas as gpd
import pandas as pd
# Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))
# regenerate the referenced geohad bounds
b = geolib.geohash.bounds(
geolib.geohash.encode(lat=32.8271484375, lon=-96.8115234375, precision=5)
gdf = gpd.GeoDataFrame(
pd.DataFrame({"id": [0], "b": [b]}).assign(
geometry=lambda d: d["b"].apply(
lambda b:,,,
gdf.explore(height=300, width=300)
Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))

Edit polygon coords using Python, Shapely and Fiona

I need to edit the geometry of intersecting polygons and I don't know how I can save modified geometry to a shapefile. Is it even possible?
from shapely.geometry import Polygon, shape
import matplotlib.pyplot as plt
import fiona
c ='polygon23.shp', 'r')
d ='polygon23.shp', 'r')
for poly in c.values():
for poly2 in d.values():
p_poly = shape(poly['geometry'])
p_poly2 = shape(poly2['geometry'])
intersect_polygons = p_poly.intersection(p_poly2)
if type(intersect_polygons) == Polygon:
intersect_polygons = p_poly.intersection(p_poly2).exterior.coords
if p_poly.exterior.xy != p_poly2.exterior.xy:
y_difference = abs(intersect_polygons[0][1]) - abs(intersect_polygons[2][1])
coords_polygonB = p_poly2.exterior.coords[:]
coords_polygonB[0] = (coords_polygonB[0][0], coords_polygonB[0][1] + (y_difference))
coords_polygonB[1] = (coords_polygonB[1][0], coords_polygonB[1][1] + (y_difference))
coords_polygonB[2] = (coords_polygonB[2][0], coords_polygonB[2][1] + (y_difference))
coords_polygonB[3] = (coords_polygonB[3][0], coords_polygonB[3][1] + (y_difference))
coords_polygonB[4] = (coords_polygonB[4][0], coords_polygonB[4][1] + (y_difference))
p_poly2 = Polygon(coords_polygonB)
x,y = p_poly.exterior.xy
x,y = p_poly2.exterior.xy
The removal of intersections between many polygons is most likely a complex problem. Moreover, I used your method as the solver in my solution.
The answer to your question, is yes. You can rectify the intersections between the polygons in your shp file; however, you need to create new Polygon objects, you can't just change the exterior coordinates of an existing Polygon.
Store metadata and disc from original shp file
The solution below writes/creates the resulting polygon set to a new shp file. This requires us to store the metadata from the original shp file, and pass it to the new one. We also need to store the properties of each polygon, I store these in a separate list, set_of_properties.
No need for two for loops
You don't need to for loops, just use combinations from the itertools standard library to loop through all possible polygon combinations. I use index combinations to replace polygons that are intersecting with new ones.
Outer do...while-loop
In very cringe caes, a rectification using your method may actually introduce new intersections. We can catch these and rectify them by looping through your solver until there are no intersections left. This requires a do... while loop, but there is no do...while loop in Python. Moreover, it can be implemented with while-loops (see Solution for implementation).
from itertools import combinations
from shapely.geometry import Polygon, Point, shape, mapping
import matplotlib.pyplot as plt
import fiona
polygons, set_of_properties = [], []
with"polygon23.shp", "r") as source:
for line in source:
meta = source.meta
poly_index_combinations = combinations(tuple(range(len(polygons))), 2)
while True:
intersection_record = []
for i_poly_a, i_poly_b in poly_index_combinations:
poly_a, poly_b = polygons[i_poly_a], polygons[i_poly_b]
if poly_a.exterior.xy == poly_b.exterior.xy:
# print(f"The polygons have identical exterior coordinates:\n{poly_a} and {poly_b}\n")
intersecting = poly_a.intersection(poly_b)
if type(intersecting) != Polygon:
intersecting_polygons = intersecting.exterior.coords
if not intersecting_polygons:
# print(f"No intersections between\n{poly_a} and {poly_b}\n")
print("Rectifying intersection")
y_diff = abs(intersecting_polygons[0][1]) - abs(intersecting_polygons[2][1])
new_poly_b = Polygon((
Point(float(poly_b.exterior.coords[0][0]), float(poly_b.exterior.coords[0][1] + y_diff)),
Point(float(poly_b.exterior.coords[1][0]), float(poly_b.exterior.coords[1][1] + y_diff)),
Point(float(poly_b.exterior.coords[2][0]), float(poly_b.exterior.coords[2][1] + y_diff)),
Point(float(poly_b.exterior.coords[3][0]), float(poly_b.exterior.coords[3][1] + y_diff)),
Point(float(poly_b.exterior.coords[4][0]), float(poly_b.exterior.coords[4][1] + y_diff))
x, y = poly_a.exterior.xy
plt.plot(x, y)
x, y = new_poly_b.exterior.xy
plt.plot(x, y)
polygons[i_poly_b] = new_poly_b
if not intersection_record:
with"new.shp", "w", **meta) as sink:
for poly, properties in zip(polygons, set_of_properties):
"geometry": mapping(poly),
"properties": properties

How to optimize Shapely and Sklearn code?

I am working with a dataset of 4.2 millions points and my codes is already taking a while to process, however below code is taking several hours to process (the code was provided in other public question and basically it takes the nearest linestring to a point, finds the nearest point from that line string and calculus the distance)
The codes actually does an awesome job, but takes too long for its purposes, How I can optimize or do the same thing in a shortest time?
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
from sklearn.neighbors import DistanceMetric
panama = gpd.read_file("/Users/Danilo/Documents/Python/panama_coastline/panama_coastline.shp")
for c in range(b):
#p = Point(-77.65325423107359,9.222038196656131)
def closest_line(point, linestrings):
return np.argmin( [p.distance(linestring) for linestring in panama.geometry] )
closest_linestring = panama.geometry[ closest_line(p, panama.geometry) ]
closest_point = nearest_points(p, closest_linestring)
dist = DistanceMetric.get_metric('haversine')
points_as_floats = [ np.array([p.x, p.y]) for p in closest_point ]
haversine_distances = dist.pairwise(np.radians(points_as_floats), np.radians(points_as_floats) )
haversine_distances *= EARTH_RADIUS_IN_MILES
Edit: Simplify to single calculation with BallTree
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
Read Panama
panama = gpd.read_file("panama_coastline/panama_coastline.shp")
Get all points, long,lat format:
def get_points_as_numpy(geom):
work_list = []
for g in geom:
work_list.append( np.array(g.coords) )
return np.concatenate(work_list)
all_coastline_points = get_points_as_numpy(panama.geometry)
Create Balltree
from sklearn.neighbors import BallTree
import numpy as np
panama_radians = np.radians(np.flip(all_coastline_points,axis=1))
tree = BallTree(panama_radians, leaf_size=12, metric='haversine')
Create 1M random points:
mean = [8.5,-80]
cov = [[1,0],[0,5]] # diagonal covariance, points lie on x or y-axis
random_gps = np.random.multivariate_normal(mean,cov,(10**6))
random_points = pd.DataFrame( {'lat' : random_gps[:,0], 'long' : random_gps[:,1]})
Calculate closest coast point (<30 Seconds on my machine)
distances, index = tree.query( np.radians(random_gps), k=1)
Put results in DataFrame
random_points['distance_to_coast'] = distances * EARTH_RADIUS_IN_MILES
random_points['closest_lat'] = all_coastline_points[index][:,0,1]
random_points['closest_long'] = all_coastline_points[index][:,0,0]

How to find the 2nd nearest point of a LineString in Shapely

Given a certain LineString and point p:
from shapely.ops import nearest_points
from shapely.geometry import Point
p = Point(51.21745162000732, 4.41871738126533)
linestring = LineString([(51.2176008, 4.4177154), (51.21758, 4.4178548), (51.2175729, 4.4179023), (51.21745162000732, 4.41871738126533)])
The nearest point to p is calculated by:
n_p = nearest_points(linestring, p)[0]
Conclusion it's the exact same point, which is normal since the exact same value is also in the linestring, but I need to know the nearest point, apart from the point itself.
So how can I find the second nearest point?
In the general case, the simplest solution would be to construct a new geometric object from your LineString but without the nearest point, and then get the nearest point with this new geometry.:
from shapely.geometry import LineString, MultiPoint, Point
from shapely.ops import nearest_points
point = Point(51.21745162000732, 4.41871738126533)
line = LineString([(51.2176008, 4.4177154), (51.21758, 4.4178548),
(51.2175729, 4.4179023), (51.21745162000732, 4.41871738126533)])
nearest_point = nearest_points(line, point)[0]
line_points_except_nearest = MultiPoint([point for point in linestring.coords
if point != (nearest_point.x, nearest_point.y)])
second_nearest = nearest_points(line_points_except_nearest, point)[0]
Alternatively, if you don't want to construct a new object because of, for example, memory constraints, you could run over all the points in the LineString with heapq.nsmallest:
import heapq
line_points = map(Point, line.coords)
nearest, second_nearest = heapq.nsmallest(2, line_points, key=point.distance)
In your specific case, when all the points are collinear, you can also calculate distances with the neighboring points of the nearest point:
index = list(line.coords).index((point.x, point.y))
if index == 0:
second_nearest = Point(line.coords[1])
elif index == len(line.coords) - 1:
second_nearest = Point(line.coords[-2])
second_nearest = min(Point(line.coords[index - 1]),
Point(line.coords[index + 1]),
Solve as folllows.
from shapely.ops import nearest_points
from shapely.geometry import Point
from shapely.geometry import LineString
def second_nearest(p, linestring):
""" Finds nearest point of p in linestring
if p in linestring, finds second nearest"""
# coordinates of p and linestring
p_coords = list(p.coords)[0]
linestring_coords = list(linestring.coords)
if p_coords in linestring_coords:
# form a new linestring if p is in linestring
linestring = LineString(linestring_coords)
return nearest_points(p, linestring)
p = Point(51.21745162000732, 4.41871738126533)
linestring = LineString([(51.2176008,4.4177154), (51.21758,4.4178548), (51.2175729,4.4179023), (51.21745162000732,4.41871738126533)])
n_p = second_nearest(p, linestring)
print(list(map(str, n_p)))
First point is p, Second point is closest point to p in linestream not equal to p (so second closest point)
['POINT (51.21745162000732 4.41871738126533)',
'POINT (51.2175729 4.4179023)']

Python : shapely, cascaded intersections within one polygon

I'd like to split a polygon into a list of polygons corresponding to all intersections with other polygons (and intersections between themselves).
from shapely.geometry import Point
circleA = Point((0, 0)).buffer(1)
circleB = Point((1, 0)).buffer(1)
circleC = Point((1, 1)).buffer(1)
def cascaded_intersections(poly1, lst_poly):
# ???
return result
result = cascaded_intersections(circleA, (circleB, circleC))
The result should be a list of 4 Polygons, corresponding to the 4 complementary parts of A (above: [AC!B, ABC, AB!C, rest of A]).
The problem is the same than spitting a polygon into its smallest parts from a list of covering LineStrings.
How to write cascaded_intersections ?
A colleague of mine, Pascal L., found a solution :
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from shapely.geometry import MultiPolygon, Polygon, Point, GeometryCollection
from shapely.ops import cascaded_union
EMPTY = GeometryCollection()
def partition(poly_a, poly_b):
Splits polygons A and B into their differences and intersection.
if not poly_a.intersects(poly_b):
return poly_a, poly_b, EMPTY
only_a = poly_a.difference(poly_b)
only_b = poly_b.difference(poly_a)
inter = poly_a.intersection(poly_b)
return only_a, only_b, inter
def eliminate_small_areas(poly, small_area):
Eliminates tiny parts of a MultiPolygon (or Polygon)
if poly.area < small_area:
return EMPTY
if isinstance(poly, Polygon):
return poly
assert isinstance(poly, MultiPolygon)
l = [p for p in poly if p.area > small_area]
if len(l) == 0:
return EMPTY
if len(l) == 1:
return l[0]
return MultiPolygon(l)
def cascaded_intersections(poly1, lst_poly):
Splits Polygon poly1 into intersections of/with list of other polygons.
result = [(lst_poly[0], (0,))]
for i, poly in enumerate(lst_poly[1:], start=1):
current = []
while result:
result_geometry, result_indexes = result.pop(0)
only_result, only_poly, inter = partition(result_geometry, poly)
for geometry, indexes in ((only_result, result_indexes), (inter, result_indexes + (i,))):
if not geometry.is_empty:
current.append((geometry, indexes))
current_union = cascaded_union([elt[0] for elt in current])
only_poly = poly.difference(current_union)
if not only_poly.is_empty:
current.append((only_poly, (i,)))
result = current
for r in range(len(result)-1, -1, -1):
geometry, indexes = result[r]
if poly1.intersects(geometry):
inter = poly1.intersection(geometry)
result[r] = (inter, indexes)
del result[r]
only_poly1 = poly1.difference(cascaded_union([elt[0] for elt in result]))
only_poly1 = eliminate_small_areas(only_poly1, 1e-16*poly1.area)
if not only_poly1.is_empty:
result.append((only_poly1, None))
return [r[0] for r in result]
result = cascaded_intersections(a, (b1,b2,b3))
Hi hi again, here's a better solution than my own, using part of gene's answer # it uses shapely.ops functions cascaded_union, unary_union and polygonize.
import matplotlib.pyplot as plt
import numpy as np
import shapely.geometry as sg
from shapely.ops import cascaded_union, unary_union, polygonize
import shapely.affinity
import descartes
from itertools import combinations
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
circles = [circleA,circleB,circleC]
listpoly = [a.intersection(b) for a, b in combinations(circles, 2)] #list of intersections
rings = [sg.LineString(list(pol.exterior.coords)) for pol in listpoly] #list of rings
union = unary_union(rings)
result = [geom for geom in polygonize(union)] #list all intersection geometries
multi = cascaded_union(result) #Create a single geometry out of all intersections
fin = [c.difference(multi) for c in circles] #Cut multi from circles and leave only outside geometries.
result = result + fin #add the outside geometries to the intersections geometries
#Plot settings:
ax = plt.gca()
name = 1
for e in result:
bbox=dict(facecolor='orange', alpha=0.5),
name += 1
Como va. Hi Eric, I tried using the split function from shapely.ops. Here is the result. This is not the most time efficient or elegant solution but it works:
import matplotlib.pyplot as plt
import numpy as np #use np.random to give random RGB color to each polygon
import shapely.geometry as sg
from shapely.ops import split
import descartes
from itertools import combinations
def cascade_split(to_split,splitters): #Helper function for split recursion
Return a list of all intersections between multiple polygons.
to_split: list, polygons or sub-polygons to split
splitters: list, polygons used as splitters
Returns a list of all the polygons formed by the multiple intersections.
if len(splitters) == 0: # Each splitting geometry will be removed
return to_split # at the end of the function, reaching len == 0 at some point,
# only the it will return all the final splits.
new_to_split = [] # make a list that will run again though the function
for ts in to_split:
s = split(ts,splitters[0].boundary) # split geometry using the boundaries of another
for i in list(s):
new_to_split.append(i) #save the splits
splitters.remove(splitters[0]) #remove the splitting geometry to
#allow the split with the next polygon in line.
return cascade_split(new_to_split,splitters) #Use recursion to exhaust all splitting possibilities
#Create polygons, in this case circles.
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
#Put all circles in list
circles = [circleA,circleB,circleC]
#The combinations tool takes the last polygon
#from list to split with the remaning polygons in list,
#creating a backwards copy of the circles list will help keep track of shapes.
back_circles = circles[::-1] #backwards copy of circles list
index_count = 0 #Keep track of which circle will get splitted
polys = [] #Final list of splitted polygons
for i in combinations(circles,len(circles)-1):
c_split = cascade_split([back_circles[index_count]],list(i)) #Use helper function here
for p in c_split:
#There will be duplicate polygon splits, the following condition will filter those:
if not any(poly.equals(p) for poly in polys):
index_count += 1
#plotting settings
ax = plt.gca()
for e in range(len(polys)):
fc=np.random.rand(3), #give random color to each split
'%s' %(e+1),fontsize=9,
bbox=dict(facecolor='orange', alpha=0.5),
polys #Output the polys list to see all the splits
