Convert geometry from bounds to Polygon - python

I have a geopandas DataFrame with bounds geometry.
import pandas as pd
import geopandas as gpd
gdf = gpd.GeoDataFrame({
'id': [0, 1],
'b': [((40.6494140625, -86.7919921875), (40.69335937...)),
((39.55078125, -93.8232421875), (39.5947265625...))]
})
gdf['b'][0]
Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))
print(type(gdf['b'][0]))
<class 'geolib.geohash.Bounds'>
How do I turn Bounds into Polygon geometry type? Like,
Polygon((40.6494140625, -86.7919921875), (40.69335937...))

Suppose you have:-
# bound1 = The geohash.Bounds object.
you can proceed with:-
from shapely.geometry import box
bounds_pgon = box(bounds1.sw.lon, bounds1.sw.lat,
bounds1.ne.lon, bounds1.ne.lat)
# Check the result
bounds_pgon.wkt
the output will be similar to this:-
'POLYGON ((-27.9986 70.2987, -27.9986 70.3001, -28.0000 70.3001, -28.0000 70.2987, -27.9986 70.2987))'

This is really the same answer that #swatchi has provided.
Shape of the geometry is defined by the precision of the hash. See reference: geohash
import geolib.geohash
import shapely.geometry
import geopandas as gpd
import pandas as pd
# Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))
# regenerate the referenced geohad bounds
b = geolib.geohash.bounds(
geolib.geohash.encode(lat=32.8271484375, lon=-96.8115234375, precision=5)
)
print(b)
gdf = gpd.GeoDataFrame(
pd.DataFrame({"id": [0], "b": [b]}).assign(
geometry=lambda d: d["b"].apply(
lambda b: shapely.geometry.box(b.sw.lon, b.sw.lat, b.ne.lon, b.ne.lat)
)
),
crs="epsg:4326",
)
gdf.explore(height=300, width=300)
output
Bounds(sw=SouthWest(lat=32.8271484375, lon=-96.8115234375), ne=NorthEast(lat=32.87109375, lon=-96.767578125))

Related

Shapely Buffering, not working as expected

Why does buffering one of my geometries have an unexpected hole in it?
from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
(250,447),
(319,446),
(325,387),
(290,374),
(259,378),
(254,385),
(240,409),
(244,440),
(250,447),
])
assert l.is_valid
assert l.is_simple
GeoDataFrame({'geometry': [
l,
l.buffer(80),
]}).plot(column='geometry')
By removing a pair of coordinates, it doesn't have a hole.
When using Sedona's ST_Buffer this happened in more cases.
from shapely import LineString
from geopandas import GeoDataFrame
l = LineString([
(250,447),
(319,446),
(325,387),
(290,374),
(259,378),
(254,385),
(240,409),
(244,440),
(250,447),
])
l = l.simplify(tolerance=1e-6)
buffered_geometry = l.buffer(80, cap_style=3, join_style=2)
GeoDataFrame({'geometry': [l, buffered_geometry]}).plot(column='geometry')
which is basically what you did.

ValueError: Found unknown categories ['Off', 'On'] in column 0 during fit

This is my code I do not know where the error is.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder
Light = ['On','Off']
Watering = ['Low','High']
# create combinations for all parameters
experiments = [(x,y) for x in Light for y in Watering]
exp_df = pd.DataFrame(experiments,columns=['A','B'])
print(exp_df)
OHE_model = OneHotEncoder(handle_unknown = 'ignore')
enc = OrdinalEncoder(categories=[['A','B'],['Low','High']])
encoded_df = pd.DataFrame(enc.fit_transform(exp_df[['A','B']]),columns=['A','B'])
#define the experiments order which must be random
encoded_df['exp_order'] = np.random.choice(np.arange(4),4,replace=False)
encoded_df['outcome'] = [25,37,55,65]
and this is the error
ValueError: Found unknown categories ['Off', 'On'] in column 0 during fit
The error indicates that On and Off are expected in place off A and B.
Use this code for your enc variable instead
enc = OrdinalEncoder(categories=[['On','Off'],['Low','High']])

Folium popup does not appear python

I have a geodataframe with Multistring Geometry. I wanted to plot an interactive map, on which when I click on each line, the name and id would appear! I can see that mouse form would change but nothing will appear. I also tried to define the names as labels but it did not work!
Ex_centreLine_map= folium.Map(location = [43.7180, -79.3762], zoom_start=12)
folium.Choropleth(
centreline_gdf[centreline_gdf.geometry.length> 0.00001],
line_weight=3,
line_color='blue',
popup=centreline_gdf[['lf_name', 'lfn_id']]).add_to(Ex_centreLine_map)
Ex_centreLine_map
you have not provided any sample data, so I have used some road data to make a MWE
you can do native folium however it's simpler to use geopandas explore()
for native folium you can to use folium.GeoJson()
import geopandas as gpd
import pandas as pd
import shapely.wkt
import io
df = pd.read_csv(io.StringIO("""ref;lanes;highway;maxspeed;length;name;geometry
A3015;2;primary;40 mph;40.68;Rydon Lane;MULTILINESTRING ((-3.4851169 50.70864409999999, -3.4849879 50.7090007), (-3.4857269 50.70693379999999, -3.4853034 50.7081574), (-3.488620899999999 50.70365289999999, -3.4857269 50.70693379999999), (-3.4853034 50.7081574, -3.4851434 50.70856839999999), (-3.4851434 50.70856839999999, -3.4851169 50.70864409999999))
A379;3;primary;50 mph;177.963;Rydon Lane;MULTILINESTRING ((-3.4763853 50.70886769999999, -3.4786112 50.70811229999999), (-3.4746017 50.70944449999999, -3.4763853 50.70886769999999), (-3.470350900000001 50.71041779999999, -3.471219399999999 50.71028909999998), (-3.465049699999999 50.712158, -3.470350900000001 50.71041779999999), (-3.481215600000001 50.70762499999999, -3.4813909 50.70760109999999), (-3.4934747 50.70059599999998, -3.4930204 50.7007898), (-3.4930204 50.7007898, -3.4930048 50.7008015), (-3.4930048 50.7008015, -3.4919513 50.70168349999999), (-3.4919513 50.70168349999999, -3.49137 50.70213669999998), (-3.49137 50.70213669999998, -3.4911565 50.7023015), (-3.4911565 50.7023015, -3.4909108 50.70246919999999), (-3.4909108 50.70246919999999, -3.4902349 50.70291189999999), (-3.4902349 50.70291189999999, -3.4897693 50.70314579999999), (-3.4805021 50.7077218, -3.4806265 50.70770150000001), (-3.488620899999999 50.70365289999999, -3.4888806 50.70353719999999), (-3.4897693 50.70314579999999, -3.489176800000001 50.70340539999999), (-3.489176800000001 50.70340539999999, -3.4888806 50.70353719999999), (-3.4865751 50.70487679999999, -3.4882604 50.70375799999999), (-3.479841700000001 50.70784459999999, -3.4805021 50.7077218), (-3.4882604 50.70375799999999, -3.488620899999999 50.70365289999999), (-3.4806265 50.70770150000001, -3.481215600000001 50.70762499999999), (-3.4717096 50.71021009999998, -3.4746017 50.70944449999999), (-3.4786112 50.70811229999999, -3.479841700000001 50.70784459999999), (-3.471219399999999 50.71028909999998, -3.4717096 50.71021009999998))"""),
sep=";")
gdf = gpd.GeoDataFrame(df, geometry=df["geometry"].apply(shapely.wkt.loads), crs="epsg:4326")
gdf.explore(style_kwds={"weight":10})
native folium
import folium
m = folium.Map(
location=[sum(gdf.total_bounds[[1, 3]]) / 2, sum(gdf.total_bounds[[0, 2]]) / 2],
zoom_start=12,
)
def style_fn(x):
return {"color":"blue", "weight":3}
folium.GeoJson(
gdf,
style_function=style_fn,
popup=folium.GeoJsonPopup(gdf.drop(columns=["geometry"]).columns.tolist()),
tooltip=folium.GeoJsonTooltip(["ref"]),
).add_to(m)
m

Convert DataFrame to a multi polygon DataFrame, multiple data point - python

I have a DataFrame as below, I want to convert data to a multi polygon DataFrame, because I want to plot each multi polygon on a map.
I know how to convert if I have two data point, but with 6 data point, I don't know how to convert it. can anyone help me please.
geometry = [Point(xy) for xy in zip(neightrip_counts_.lan0, neightrip_counts_.long0)]
geometry
#neightrip_counts_.lan1, neightrip_counts_.long1,neightrip_counts_.lan2, neightrip_counts_.long2
lan0 long0 lan1 long1 lan2 long2
0 59.915667 10.777567 59.916738 10.779916 59.914943 10.773977
1 59.929853 10.711515 59.929435 10.713682 59.927596 10.710033
2 59.939230 10.759170 59.937205 10.760581 59.943750 10.760306
3 59.912520 10.762240 59.911594 10.761774 59.912347 10.763815
4 59.929634 10.732839 59.927140 10.730981 59.931081 10.736003
Let me rename the dataframe neightrip_counts_ as df for brevity. Here is the relevant code that will create a polygon for each row of dataframe.
df['geometry'] = [Polygon([(z[0],z[1]), (z[2],z[3]), (z[4],z[5])]) for z in zip(df.long0, df.lan0, df.long1, df.lan1, df.long2, df.lan2)]
gpdf = df.set_geometry("geometry", drop=True)
gpdf.plot()
By the way, you must be careful about the sequence of (long, lat).
start_coords = [ gdf.centroid[0].x, gdf.centroid[0].y] # is wrong
Use this in stead.
start_coords = [ gdf.centroid[0].y, gdf.centroid[0].x]
Edit
For the benefits of the readers, here is the complete runnable code:
import pandas as pd
import geopandas as gpd
from io import StringIO
from shapely.geometry import Polygon, Point, LineString
import numpy as np
import folium
data1 = """index lan0 long0 lan1 long1 lan2 long2
0 59.915667 10.777567 59.916738 10.779916 59.914943 10.773977
1 59.929853 10.711515 59.929435 10.713682 59.927596 10.710033
2 59.939230 10.759170 59.937205 10.760581 59.943750 10.760306
3 59.912520 10.762240 59.911594 10.761774 59.912347 10.763815
4 59.929634 10.732839 59.927140 10.730981 59.931081 10.736003"""
# read/parse data into dataframe
df0 = pd.read_csv(StringIO(data1), sep='\s+', index_col='index')
# create `geometry` column
df0['geometry'] = [Polygon([(xy[0],xy[1]), (xy[2],xy[3]), (xy[4],xy[5])]) \
for xy in zip(df0.long0, df0.lan0, df0.long1, df0.lan1, df0.long2, df0.lan2)]
# set geometry
gpdf = df0.set_geometry("geometry", drop=True)
# do check plot. (uncomment next line)
#gpdf.plot()
# make geojson
center_pt = gpdf.centroid[0].y, gpdf.centroid[0].x
gdf_json = gpdf.to_json()
# plot the geojson on the folium webmap
webmap = folium.Map(location = center_pt, zoom_start = 13, min_zoom = 3)
folium.GeoJson(gdf_json, name='data_layer_1').add_to(webmap)
# this opens the webmap
webmap
Output screen capture (of interactive webmap):
Try this, assuming the 'lan' is latitude.
import geopandas as gpd
from shapely.geometry import Polygon
import numpy as np
import pandas as pd
import folium
# ....
def addpolygeom(row):
row_array = np.array(row)
# split dataframe row to a list of tuples (lat, lon)
coords = [tuple(i)[::-1] for i in np.split(row_array, range(2, row_array.shape[0], 2))]
polygon = Polygon(coords)
return polygon
# Convert points to shapely geometry
neightrip_counts_['geometry'] = neightrip_counts_.apply(lambda x: addpolygeom(x), axis=1)
# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(neightrip_counts_, geometry='geometry')
start_coords = [ gdf.centroid[0].y, gdf.centroid[0].x]
gdf_json = gdf.to_json()
map = folium.Map(start_coords, zoom_start=4)
folium.GeoJson(gdf_json, name='mypolygons').add_to(map)

How to optimize Shapely and Sklearn code?

I am working with a dataset of 4.2 millions points and my codes is already taking a while to process, however below code is taking several hours to process (the code was provided in other public question and basically it takes the nearest linestring to a point, finds the nearest point from that line string and calculus the distance)
The codes actually does an awesome job, but takes too long for its purposes, How I can optimize or do the same thing in a shortest time?
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
from sklearn.neighbors import DistanceMetric
EARTH_RADIUS_IN_MILES = 3440.1 #NAUTICAL MILES
panama = gpd.read_file("/Users/Danilo/Documents/Python/panama_coastline/panama_coastline.shp")
for c in range(b):
#p = Point(-77.65325423107359,9.222038196656131)
p=Point(data['longitude'][c],data['latitude'][c])
def closest_line(point, linestrings):
return np.argmin( [p.distance(linestring) for linestring in panama.geometry] )
closest_linestring = panama.geometry[ closest_line(p, panama.geometry) ]
closest_linestring
closest_point = nearest_points(p, closest_linestring)
dist = DistanceMetric.get_metric('haversine')
points_as_floats = [ np.array([p.x, p.y]) for p in closest_point ]
haversine_distances = dist.pairwise(np.radians(points_as_floats), np.radians(points_as_floats) )
haversine_distances *= EARTH_RADIUS_IN_MILES
dtc1=haversine_distances[0][1]
dtc.append(dtc1)
Edit: Simplify to single calculation with BallTree
Imports
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point, LineString
from shapely.ops import nearest_points
Read Panama
panama = gpd.read_file("panama_coastline/panama_coastline.shp")
Get all points, long,lat format:
def get_points_as_numpy(geom):
work_list = []
for g in geom:
work_list.append( np.array(g.coords) )
return np.concatenate(work_list)
all_coastline_points = get_points_as_numpy(panama.geometry)
Create Balltree
from sklearn.neighbors import BallTree
import numpy as np
panama_radians = np.radians(np.flip(all_coastline_points,axis=1))
tree = BallTree(panama_radians, leaf_size=12, metric='haversine')
Create 1M random points:
mean = [8.5,-80]
cov = [[1,0],[0,5]] # diagonal covariance, points lie on x or y-axis
random_gps = np.random.multivariate_normal(mean,cov,(10**6))
random_points = pd.DataFrame( {'lat' : random_gps[:,0], 'long' : random_gps[:,1]})
random_points.head()
Calculate closest coast point (<30 Seconds on my machine)
distances, index = tree.query( np.radians(random_gps), k=1)
Put results in DataFrame
EARTH_RADIUS_IN_MILES = 3440.1
random_points['distance_to_coast'] = distances * EARTH_RADIUS_IN_MILES
random_points['closest_lat'] = all_coastline_points[index][:,0,1]
random_points['closest_long'] = all_coastline_points[index][:,0,0]

Categories