Given a certain LineString and point p:
from shapely.ops import nearest_points
from shapely.geometry import Point
p = Point(51.21745162000732, 4.41871738126533)
linestring = LineString([(51.2176008, 4.4177154), (51.21758, 4.4178548), (51.2175729, 4.4179023), (51.21745162000732, 4.41871738126533)])
The nearest point to p is calculated by:
n_p = nearest_points(linestring, p)[0]
Conclusion it's the exact same point, which is normal since the exact same value is also in the linestring, but I need to know the nearest point, apart from the point itself.
So how can I find the second nearest point?
In the general case, the simplest solution would be to construct a new geometric object from your LineString but without the nearest point, and then get the nearest point with this new geometry.:
from shapely.geometry import LineString, MultiPoint, Point
from shapely.ops import nearest_points
point = Point(51.21745162000732, 4.41871738126533)
line = LineString([(51.2176008, 4.4177154), (51.21758, 4.4178548),
(51.2175729, 4.4179023), (51.21745162000732, 4.41871738126533)])
nearest_point = nearest_points(line, point)[0]
line_points_except_nearest = MultiPoint([point for point in linestring.coords
if point != (nearest_point.x, nearest_point.y)])
second_nearest = nearest_points(line_points_except_nearest, point)[0]
Alternatively, if you don't want to construct a new object because of, for example, memory constraints, you could run over all the points in the LineString with heapq.nsmallest:
import heapq
line_points = map(Point, line.coords)
nearest, second_nearest = heapq.nsmallest(2, line_points, key=point.distance)
In your specific case, when all the points are collinear, you can also calculate distances with the neighboring points of the nearest point:
index = list(line.coords).index((point.x, point.y))
if index == 0:
second_nearest = Point(line.coords[1])
elif index == len(line.coords) - 1:
second_nearest = Point(line.coords[-2])
else:
second_nearest = min(Point(line.coords[index - 1]),
Point(line.coords[index + 1]),
key=point.distance)
Solve as folllows.
from shapely.ops import nearest_points
from shapely.geometry import Point
from shapely.geometry import LineString
def second_nearest(p, linestring):
""" Finds nearest point of p in linestring
if p in linestring, finds second nearest"""
# coordinates of p and linestring
p_coords = list(p.coords)[0]
linestring_coords = list(linestring.coords)
if p_coords in linestring_coords:
# form a new linestring if p is in linestring
linestring_coords.remove(p_coords)
linestring = LineString(linestring_coords)
return nearest_points(p, linestring)
p = Point(51.21745162000732, 4.41871738126533)
linestring = LineString([(51.2176008,4.4177154), (51.21758,4.4178548), (51.2175729,4.4179023), (51.21745162000732,4.41871738126533)])
n_p = second_nearest(p, linestring)
print(list(map(str, n_p)))
Output
First point is p, Second point is closest point to p in linestream not equal to p (so second closest point)
['POINT (51.21745162000732 4.41871738126533)',
'POINT (51.2175729 4.4179023)']
Related
Why does buffering one of my geometries have an unexpected hole in it?
from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
(250,447),
(319,446),
(325,387),
(290,374),
(259,378),
(254,385),
(240,409),
(244,440),
(250,447),
])
assert l.is_valid
assert l.is_simple
GeoDataFrame({'geometry': [
l,
l.buffer(80),
]}).plot(column='geometry')
By removing a pair of coordinates, it doesn't have a hole.
When using Sedona's ST_Buffer this happened in more cases.
from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
(250,447),
(319,446),
(325,387),
(290,374),
(259,378),
(254,385),
(240,409),
(244,440),
(250,447),
])
l = l.simplify(tolerance=1e-6)
buffered_geometry = l.buffer(80, cap_style=3, join_style=2)
GeoDataFrame({'geometry': [l, buffered_geometry]}).plot(column='geometry')
which is basically what you did.
I am working with a dataset of 4.2 millions points and my codes is already taking a while to process, however below code is taking several hours to process (the code was provided in other public question and basically it takes the nearest linestring to a point, finds the nearest point from that line string and calculus the distance)
The codes actually does an awesome job, but takes too long for its purposes, How I can optimize or do the same thing in a shortest time?
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
from sklearn.neighbors import DistanceMetric
EARTH_RADIUS_IN_MILES = 3440.1 #NAUTICAL MILES
panama = gpd.read_file("/Users/Danilo/Documents/Python/panama_coastline/panama_coastline.shp")
for c in range(b):
#p = Point(-77.65325423107359,9.222038196656131)
p=Point(data['longitude'][c],data['latitude'][c])
def closest_line(point, linestrings):
return np.argmin( [p.distance(linestring) for linestring in panama.geometry] )
closest_linestring = panama.geometry[ closest_line(p, panama.geometry) ]
closest_linestring
closest_point = nearest_points(p, closest_linestring)
dist = DistanceMetric.get_metric('haversine')
points_as_floats = [ np.array([p.x, p.y]) for p in closest_point ]
haversine_distances = dist.pairwise(np.radians(points_as_floats), np.radians(points_as_floats) )
haversine_distances *= EARTH_RADIUS_IN_MILES
dtc1=haversine_distances[0][1]
dtc.append(dtc1)
Edit: Simplify to single calculation with BallTree
Imports
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
Read Panama
panama = gpd.read_file("panama_coastline/panama_coastline.shp")
Get all points, long,lat format:
def get_points_as_numpy(geom):
work_list = []
for g in geom:
work_list.append( np.array(g.coords) )
return np.concatenate(work_list)
all_coastline_points = get_points_as_numpy(panama.geometry)
Create Balltree
from sklearn.neighbors import BallTree
import numpy as np
panama_radians = np.radians(np.flip(all_coastline_points,axis=1))
tree = BallTree(panama_radians, leaf_size=12, metric='haversine')
Create 1M random points:
mean = [8.5,-80]
cov = [[1,0],[0,5]] # diagonal covariance, points lie on x or y-axis
random_gps = np.random.multivariate_normal(mean,cov,(10**6))
random_points = pd.DataFrame( {'lat' : random_gps[:,0], 'long' : random_gps[:,1]})
random_points.head()
Calculate closest coast point (<30 Seconds on my machine)
distances, index = tree.query( np.radians(random_gps), k=1)
Put results in DataFrame
EARTH_RADIUS_IN_MILES = 3440.1
random_points['distance_to_coast'] = distances * EARTH_RADIUS_IN_MILES
random_points['closest_lat'] = all_coastline_points[index][:,0,1]
random_points['closest_long'] = all_coastline_points[index][:,0,0]
I have a geopandas dataframe containing several line strings created from lat, lon point data. For all line intersections, I need to find the nearest point within each line string to that intersection.
Thus, if two lines in the dataframe intersect, I need to nearest point to that intersection in each linestring. I have used itertools to find all possible intersection points similar to the accepted answer in this post: https://gis.stackexchange.com/questions/137909/intersecting-lines-to-get-crossings-using-python-with-qgis
Is there a simpler way to find the closest point in each linestring to the intersection point, for all line intersections in a geopandas dataframe?
My dataframe looks like so, with each file name containing its own linestring:
geometry
file
2015_may14_10 LINESTRING (-140.43855 59.80302, -140.44101 59...
2015_may14_11 LINESTRING (-140.84909 59.83433, -140.84758 59...
2015_may14_12 LINESTRING (-140.66859 59.79890, -140.66600 59...
2015_may14_15 LINESTRING (-140.19642 59.86655, -140.19795 59...
2015_may14_16 LINESTRING (-141.08783 59.94741, -141.08610 59...
Let's create n random lines :
import geopandas as gpd
from shapely.geometry import LineString, Point, Polygon
from shapely import wkt
import numpy as np
xmin, xmax, ymin, ymax = 0, 10000, 0, 10000
n = 100
xa = (xmax - xmin) * np.random.random(n) + xmin
ya = (ymax - ymin) * np.random.random(n) + ymin
xb = (xmax - xmin) * np.random.random(n) + xmin
yb = (ymax - ymin) * np.random.random(n) + ymin
lines = gpd.GeoDataFrame({'index':range(n),'geometry':[LineString([(a, b), (k, l)]) for a, b, k, l in zip(xa, ya, xb, yb)]})
This gives :
>>> lines
index geometry
0 0 LINESTRING (4444.630 3081.439, 6132.674 5849.463)
1 1 LINESTRING (7015.940 6378.245, 4568.386 757.205)
2 2 LINESTRING (8766.417 6070.131, 690.359 7511.385)
3 3 LINESTRING (4245.544 4009.196, 8496.307 1557.175)
4 4 LINESTRING (1489.436 9364.784, 2109.740 5923.480)
... ... ...
95 95 LINESTRING (4783.454 7840.857, 1935.396 2435.260)
96 96 LINESTRING (1884.455 4982.662, 6257.958 3580.912)
97 97 LINESTRING (7072.811 7843.319, 4811.589 2486.040)
98 98 LINESTRING (6933.272 6427.046, 7528.579 2064.067)
99 99 LINESTRING (3876.400 5183.790, 5360.753 1901.207)
Let's get our intersections :
res = []
for i in lines.loc[:, 'geometry']:
for j in lines.loc[:, 'geometry']:
inter = i.intersection(j)
if inter.geom_type != 'LineString':
res.append(inter)
Here I just got a little misunderstanding, sometimes the inter = i.intersection(j) returns a LineString object, I have no idea how two different lines can get as an intersection output another line (unless they are the same). I leave this up to you.
And now, we can create our df with the resulting points :
points = gpd.GeoDataFrame({'geometry':res})
>>>points
geometry
0 POINT (4811.366 3682.806)
1 POINT (5149.727 4237.644)
2 POINT (4607.312 3348.202)
3 POINT (6026.639 5675.588)
4 POINT (4514.359 3195.779)
... ...
2215 POINT (4788.793 3166.070)
2216 POINT (4704.895 3351.608)
2217 POINT (4581.390 3624.734)
2218 POINT (4320.392 4201.921)
2219 POINT (4949.041 2811.691)
2220 rows × 1 columns
We can see that we are more working with segments rather than pure lines, since the number of intersections (ie points) is 2220. And I do not agree to consider that we are lucky enough to have 7880 parallel lines.
Then, we import our best friend for the operation :
from shapely.ops import nearest_points
And we compute the desired output :
intersection = []
line = []
my_point = []
for i in points.index:
for j in lines.index:
intersection.append(points.loc[i, 'geometry'])
line.append(lines.loc[j, 'geometry'])
my_point.append([p.wkt for p in nearest_points(points.loc[i, 'geometry'], lines.loc[j, 'geometry'])][1])
result = gpd.GeoDataFrame({'intersection':intersection, 'line':line, 'nearest_point':my_point})
result.geometry = result.loc[:, 'nearest_point'].apply(wkt.loads)
result.drop(columns=['nearest_point'], inplace=True)
>>>result
intersection line geometry
0 POINT (4811.365980053641 3682.805619834874) LINESTRING (4444.630325108094 3081.43918610815... POINT (4811.366 3682.806)
1 POINT (4811.365980053641 3682.805619834874) LINESTRING (7015.939846319573 6378.24453843603... POINT (5677.967 3305.464)
2 POINT (4811.365980053641 3682.805619834874) LINESTRING (8766.416847858662 6070.13073873083... POINT (5346.331 6680.480)
3 POINT (4811.365980053641 3682.805619834874) LINESTRING (4245.544341245415 4009.19558793877... POINT (4811.366 3682.806)
4 POINT (4811.365980053641 3682.805619834874) LINESTRING (1489.4355376526 9364.784164867619,... POINT (2109.740 5923.480)
... ... ... ...
221995 POINT (4949.040525093341 2811.690701237854) LINESTRING (4783.453909575222 7840.85687296287... POINT (2745.435 3972.709)
221996 POINT (4949.040525093341 2811.690701237854) LINESTRING (1884.454611847149 4982.66168904636... POINT (5294.551 3889.693)
221997 POINT (4949.040525093341 2811.690701237854) LINESTRING (7072.811488307434 7843.31900543939... POINT (4949.041 2811.691)
221998 POINT (4949.040525093341 2811.690701237854) LINESTRING (6933.272054846982 6427.04550331467... POINT (7381.288 3143.559)
221999 POINT (4949.040525093341 2811.690701237854) LINESTRING (3876.399925481877 5183.78974899146... POINT (4949.041 2811.691)
222000 rows × 3 columns
Hope, this answers your question, let me know if you got a better answer.
I'd like to split a polygon into a list of polygons corresponding to all intersections with other polygons (and intersections between themselves).
from shapely.geometry import Point
circleA = Point((0, 0)).buffer(1)
circleB = Point((1, 0)).buffer(1)
circleC = Point((1, 1)).buffer(1)
def cascaded_intersections(poly1, lst_poly):
# ???
return result
result = cascaded_intersections(circleA, (circleB, circleC))
The result should be a list of 4 Polygons, corresponding to the 4 complementary parts of A (above: [AC!B, ABC, AB!C, rest of A]).
The problem is the same than spitting a polygon into its smallest parts from a list of covering LineStrings.
How to write cascaded_intersections ?
A colleague of mine, Pascal L., found a solution :
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from shapely.geometry import MultiPolygon, Polygon, Point, GeometryCollection
from shapely.ops import cascaded_union
EMPTY = GeometryCollection()
def partition(poly_a, poly_b):
"""
Splits polygons A and B into their differences and intersection.
"""
if not poly_a.intersects(poly_b):
return poly_a, poly_b, EMPTY
only_a = poly_a.difference(poly_b)
only_b = poly_b.difference(poly_a)
inter = poly_a.intersection(poly_b)
return only_a, only_b, inter
def eliminate_small_areas(poly, small_area):
"""
Eliminates tiny parts of a MultiPolygon (or Polygon)
"""
if poly.area < small_area:
return EMPTY
if isinstance(poly, Polygon):
return poly
assert isinstance(poly, MultiPolygon)
l = [p for p in poly if p.area > small_area]
if len(l) == 0:
return EMPTY
if len(l) == 1:
return l[0]
return MultiPolygon(l)
def cascaded_intersections(poly1, lst_poly):
"""
Splits Polygon poly1 into intersections of/with list of other polygons.
"""
result = [(lst_poly[0], (0,))]
for i, poly in enumerate(lst_poly[1:], start=1):
current = []
while result:
result_geometry, result_indexes = result.pop(0)
only_result, only_poly, inter = partition(result_geometry, poly)
for geometry, indexes in ((only_result, result_indexes), (inter, result_indexes + (i,))):
if not geometry.is_empty:
current.append((geometry, indexes))
current_union = cascaded_union([elt[0] for elt in current])
only_poly = poly.difference(current_union)
if not only_poly.is_empty:
current.append((only_poly, (i,)))
result = current
for r in range(len(result)-1, -1, -1):
geometry, indexes = result[r]
if poly1.intersects(geometry):
inter = poly1.intersection(geometry)
result[r] = (inter, indexes)
else:
del result[r]
only_poly1 = poly1.difference(cascaded_union([elt[0] for elt in result]))
only_poly1 = eliminate_small_areas(only_poly1, 1e-16*poly1.area)
if not only_poly1.is_empty:
result.append((only_poly1, None))
return [r[0] for r in result]
a=Point(0,0).buffer(1)
b1=Point(0,1).buffer(1)
b2=Point(1,0).buffer(1)
b3=Point(1,1).buffer(1)
result = cascaded_intersections(a, (b1,b2,b3))
Hi hi again, here's a better solution than my own, using part of gene's answer # stackexchange.com it uses shapely.ops functions cascaded_union, unary_union and polygonize.
import matplotlib.pyplot as plt
import numpy as np
import shapely.geometry as sg
from shapely.ops import cascaded_union, unary_union, polygonize
import shapely.affinity
import descartes
from itertools import combinations
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
circles = [circleA,circleB,circleC]
listpoly = [a.intersection(b) for a, b in combinations(circles, 2)] #list of intersections
rings = [sg.LineString(list(pol.exterior.coords)) for pol in listpoly] #list of rings
union = unary_union(rings)
result = [geom for geom in polygonize(union)] #list all intersection geometries
multi = cascaded_union(result) #Create a single geometry out of all intersections
fin = [c.difference(multi) for c in circles] #Cut multi from circles and leave only outside geometries.
result = result + fin #add the outside geometries to the intersections geometries
#Plot settings:
plt.figure(figsize=(5,5))
ax = plt.gca()
name = 1
for e in result:
ax.add_patch(descartes.PolygonPatch(e,
fc=np.random.rand(3),
ec=None,
alpha=0.5))
ax.text(e.centroid.x,e.centroid.y,
'%s'%name,fontsize=9,
bbox=dict(facecolor='orange', alpha=0.5),
color='blue',
horizontalalignment='center')
name += 1
plt.xlim(-1.5,2.5)
plt.ylim(-1.5,2.5)
plt.show()
Como va. Hi Eric, I tried using the split function from shapely.ops. Here is the result. This is not the most time efficient or elegant solution but it works:
import matplotlib.pyplot as plt
import numpy as np #use np.random to give random RGB color to each polygon
import shapely.geometry as sg
from shapely.ops import split
import descartes
from itertools import combinations
def cascade_split(to_split,splitters): #Helper function for split recursion
'''
Return a list of all intersections between multiple polygons.
to_split: list, polygons or sub-polygons to split
splitters: list, polygons used as splitters
Returns a list of all the polygons formed by the multiple intersections.
'''
if len(splitters) == 0: # Each splitting geometry will be removed
return to_split # at the end of the function, reaching len == 0 at some point,
# only the it will return all the final splits.
new_to_split = [] # make a list that will run again though the function
for ts in to_split:
s = split(ts,splitters[0].boundary) # split geometry using the boundaries of another
for i in list(s):
new_to_split.append(i) #save the splits
splitters.remove(splitters[0]) #remove the splitting geometry to
#allow the split with the next polygon in line.
return cascade_split(new_to_split,splitters) #Use recursion to exhaust all splitting possibilities
#Create polygons, in this case circles.
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
#Put all circles in list
circles = [circleA,circleB,circleC]
#The combinations tool takes the last polygon
#from list to split with the remaning polygons in list,
#creating a backwards copy of the circles list will help keep track of shapes.
back_circles = circles[::-1] #backwards copy of circles list
index_count = 0 #Keep track of which circle will get splitted
polys = [] #Final list of splitted polygons
for i in combinations(circles,len(circles)-1):
c_split = cascade_split([back_circles[index_count]],list(i)) #Use helper function here
for p in c_split:
#There will be duplicate polygon splits, the following condition will filter those:
if not any(poly.equals(p) for poly in polys):
polys.append(p)
index_count += 1
#plotting settings
plt.figure(figsize=(5,5))
ax = plt.gca()
for e in range(len(polys)):
ax.add_patch(descartes.PolygonPatch(polys[e],
fc=np.random.rand(3), #give random color to each split
ec=None,
alpha=0.5))
ax.text(polys[e].centroid.x,polys[e].centroid.y,
'%s' %(e+1),fontsize=9,
bbox=dict(facecolor='orange', alpha=0.5),
color='blue',
horizontalalignment='center')
plt.xlim(-1.5,2.5)
plt.ylim(-1.5,2.5)
plt.show()
polys #Output the polys list to see all the splits
I am using Python 3.5 64 bit in Windows 7 64 bit, shapely version 1.5.13.
I have the following code that returned me a self-intersecting polygon:
import numpy as np
from shapely.geometry import Polygon, MultiPolygon
import matplotlib.pyplot as plt
x = np.array([ 0.38517325, 0.40859912, 0.43296919, 0.4583215 , 0.4583215 ,
0.43296919, 0.40859912, 0.38517325, 0.36265506, 0.34100929])
y = np.array([ 62.5 , 56.17977528, 39.39698492, 0. ,
0. , 17.34605377, 39.13341671, 60.4180932 ,
76.02574417, 85.47008547])
polygon = Polygon(np.c_[x, y])
plt.plot(*polygon.exterior.xy)
This is correct. Then I tried to obtain the two individual polygons by using buffer(0):
split_polygon = polygon.buffer(0)
plt.plot(*polygon.exterior.xy)
print(type(split_polygon))
plt.fill(*split_polygon.exterior.xy)
Unfortunately, it only returned of the the two polygons:
Could anyone please help? Thanks!
The first step is to close the LineString to make a LinearRing, which is what Polygons are made of.
from shapely.geometry import LineString, MultiPolygon
from shapely.ops import polygonize, unary_union
# original data
ls = LineString(np.c_[x, y])
# closed, non-simple
lr = LineString(ls.coords[:] + ls.coords[0:1])
lr.is_simple # False
However, note that it is non-simple, since the lines cross to make a bow-tie. (The widely used buffer(0) trick usually does not work for fixing bow-ties in my experience). This is unsuitable for a LinearRing, so it needs further work. Make it simple and MultiLineString with unary_union:
mls = unary_union(lr)
mls.geom_type # MultiLineString'
Then use polygonize to find the Polygons from the linework:
for polygon in polygonize(mls):
print(polygon)
Or if you want one MultiPolygon geometry:
mp = MultiPolygon(list(polygonize(mls)))
I struggled with this for a while still in 2020, and finally just wrote a method that cleans up self intersections.
This requires Shapely v 1.2.1 explain_validity() method to work.
def clean_bowtie_geom(base_linearring):
base_polygon = Polygon(base_linearring)
invalidity = explain_validity(base_polygon)
invalid_regex = re.compile('^(Self-intersection)[[](.+)\s(.+)[]]$')
match = invalid_regex.match(invalidity)
if match:
groups = match.groups()
intersect_point = (float(groups[1]), float(groups[2]))
new_linring_coords1 = []
new_linring_coords2 = []
pop_new_linring = False
for i in range(0, len(base_linearring.coords)):
if i == len(base_linearring.coords) - 1:
end_point = base_linearring.coords[0]
else:
end_point = base_linearring.coords[i + 1]
start_point = base_linearring.coords[i]
if not pop_new_linring:
if is_point_on_line_and_between(start=start_point, end=end_point, pt=intersect_point):
new_linring_coords2.append(intersect_point)
new_linring_coords1.append(intersect_point)
pop_new_linring = True
else:
new_linring_coords1.append(start_point)
else:
new_linring_coords2.append(start_point)
if is_point_on_line_and_between(start=start_point, end=end_point, pt=intersect_point):
new_linring_coords2.append(intersect_point)
pop_new_linring = False
corrected_linear_ring1 = LinearRing(coordinates=new_linring_coords1)
corrected_linear_ring2 = LinearRing(coordinates=new_linring_coords2)
polygon1 = Polygon(corrected_linear_ring1)
polygon2 = Polygon(corrected_linear_ring2)
def is_point_on_line_and_between(start, end, pt, tol=0.0005):
"""
Checks to see if pt is directly in line and between start and end coords
:param start: list or tuple of x, y coordinates of start point of line
:param end: list or tuple of x, y coordinates of end point of line
:param pt: list or tuple of x, y coordinates of point to check if it is on the line
:param tol: Tolerance for checking if point on line
:return: True if on the line, False if not on the line
"""
v1 = (end[0] - start[0], end[1] - start[1])
v2 = (pt[0] - start[0], pt[1] - start[1])
cross = cross_product(v1, v2)
if cross <= tol:
# The point lays on the line, but need to check if in between
if ((start[0] <= pt[0] <= end[0]) or (start[0] >= pt[0] >= end[0])) and ((start[1] <= pt[1] <= end[1]) or (start[1] >= pt[1] >= end[1])):
return True
return False
This is not the cleanest code, but it gets the job done for me.
Input is a LinearRing with self intersecting geometry (is_simple=False) and output can be either 2 LinearRings, or Two Polygons, whichever you prefer (or have condition to pick one or the other, the world is your oyster, really).
EDIT
In Shapely 1.8.0, new function added.
shapely.validation.make_valid() will take a self intersecting Polygon and return a MultiPolygon with each polygon created by splitting at the self intersection point(s).