So, I have a dataframe like this,
import numpy as np
import pandas as pd
import descartes
from shapely.geometry import Point, Polygon
import geopandas as gpd
import matplotlib.pyplot as plt
df = pd.DataFrame({'Address':['280 Broadway','1 Liberty Island','141 John Street'],
'Latitude':[ 40.71,40.69,40.71],
'Longitude':[-74.01,-74.05,-74.00]
})
%matplotlib inline
geometry = [Point(xy) for xy in zip( df["Longitude"],df["Latitude"])]
crs = {'init':'epsg:4326'}
df = gpd.GeoDataFrame(df,
crs=crs,
geometry=geometry)
df.head()
I converted the lat and lon to geometry points and I am trying to find all possible closest points for each address using the geometrical points. For example, all possible closest points adjacent to 280 Broadway which lies next to each other for one block.There could be more than one point if the points are adjacent to each other containing in a polygon shape.
This was my approach but didn't really get what I wanted,
df.insert(4, 'nearest_geometry', None)
from shapely.geometry import Point, MultiPoint
from shapely.ops import nearest_points
for index, row in df.iterrows():
point = row.geometry
multipoint = df.drop(index, axis=0).geometry.unary_union
queried_geom, nearest_geom = nearest_points(point, multipoint)
df.loc[index, 'nearest_geometry'] = nearest_geom
Any help is appreciated, Thanks.
Related
I am trying to recreate in Python a map I made in Tableau. I'm pretty sure it's called a kernel density map (just "density" map in Tableau). Each point is just a single point and doesn't correspond with any kind of value.
I have plotted my points on a map, but am unable to figure out how to give them the contour appearance in the Tableau map. I see some examples that include np.meshgrid or matplotlib's contourf function, but I'm unable to apply it to my data because I don't have a Z coordinate (from what I can tell). Below is what I have currently:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import timedelta, date
import matplotlib
from pandas import Series, DataFrame
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, mapping
# import data
df = pd.read_csv('./Data/complete.csv', on_bad_lines='skip') # csv had some bad data, had to skip
# drop unneccessary columns
df = df[['datetime', 'latitude', 'longitude']]
# convert 'datetime' to YYYY-MM-DD
df['datetime'] = pd.to_datetime(df['datetime'], dayfirst=True)
# clean the data
# remove values that contain '/' and 'q'
df = df.drop(df[df.latitude.str.contains(r'[/q]')].index)
# convert lat/long to float
df['latitude'] = df.latitude.astype('float')
df['longitude'] = df.longitude.astype('float')
# create GDF from lat/long
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude)).set_crs('EPSG:4326')
# import shapefile
us_map = gpd.read_file(r'./Data/USA_States_(Generalized)/USA_States_Generalized.shp')
#remove AK and HI
us_map = us_map[~us_map['STATE_NAME'].isin(['Alaska', 'Hawaii'])]
# plotting only 2012 sightings
sightings2012 = df[(df['datetime'] > '12/31/2011') & (df['datetime'] < '01/01/2013')]
# create GDF for 2012 sightings
gdf2012 = gpd.GeoDataFrame(sightings2012,
geometry=gpd.points_from_xy(sightings2012.longitude,
sightings2012.latitude)).set_crs('EPSG:4326')
# clip the sightings data
us_sightings_2012 = gpd.clip(gdf2012, us_map)
cmap = matplotlib.cm.get_cmap('plasma')
# plot
fig, ax = plt.subplots(1, 1)
us_map.plot(ax=ax)
us_sightings_2012.plot(ax=ax, cmap=cmap)
plt.show()
And here is my output:
I have multiple shapefiles that I am trying to map in geopandas, but I can't get aerial/satellite imagery as a background image. These are very zoomed in shapefiles, and probably cover less than half a mile square. These are in Iowa in the United States.
Here is my code.
import geopandas as gpd
import fiona, os
import matplotlib.pyplot as plt
from geopandas import GeoDataFrame
from shapely.geometry import Polygon
import pandas as pd
import contextily as ctx
boundary = gpd.read_file(boundaryFile)
sample_data = gpd.read_file(sampleFile)
yieldData = gpd.read_file(yieldFile)
filesList = [boundary, sample_data, yieldData]
for i in filesList:
i.set_crs(epsg=3857, inplace=True)
fig = plt.figure()
ax = yieldData.plot()
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
I am getting a ValueError: The inferred zoom level of 34 is not valid for the current tile provider. This can indicate that the extent of your figure is wrong (e.g. too small extent, or in the wrong coordinate reference system)
Thanks for your help
as an exercise I am trying to plot out the UK's general election results from 2017. I have used Pandas to manipulate my dataframe and geopandas to visualise the results where every region is coloured by the winning party, conservative: blue, labour: red etc...
I have managed to plot it out but no matter what I do - the colours are not coming out correctly! Below I have attached my code, my output and what the output should look like, and any help would be much appreciated.
My Code:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import multipolygon, polygon, Polygon, MultiPolygon
%matplotlib inline
uk_map = gpd.read_file('Westminster_Parliamentary_Constituencies__December_2017__UK_BGC_V2.shp')
#shape file of the uk
df = pd.read_csv('uk_election_data_v2.csv')
#election results
uk_map.rename(columns={'PCON17NM':'Constituency'}, inplace=True)
uk_map.sort_values('Constituency', inplace=True)
df.sort_values('Constituency', inplace=True)
party_colours={'Conservative':'#0087dc',
'Liberal Democrat':'#FDBB30',
'Labour':'#d50000',
'SNP':'#FFF95D',
'Green':'#00FF00',
'Independent':'#800080',
'Sinn Fein':'#228B22',
'Democratic Unionist':'#808080',
'Plaid Cymru':'#FF5733'
}
#dictionary to assign the colours to each winning party
df['winner_fill']=df['2017_winner'].apply(lambda s: party_colours.get(s,"#aaaaaa"))
#new column that applies the colour for the winning party
election_results = uk_map.merge(df, on='Constituency')
#merge the shape and df together
election_results.plot('winner_fill', figsize=(12,12))
[Expected output]
[My Output]
I think the problem is apply in
df['winner_fill']=df['2017_winner'].apply(lambda s: party_colours.get(s,"#aaaaaa"))
Try map instead
df['winner_fill'] = df['2017_winner'].map(lambda s: party_colours.get(s,"#aaaaaa"))
I don't have very much experience with GeoPandas at all, so I am a little lost. I am trying to plot this data
jupyterNotebook dataframe image
I have followed many references on the GeoPandas website, read through blog posts, and this stack overflow post. All of them tell me to do the same thing, but it seems to still now be working.
Ploting data in geopandas
When I try to plot this data, it comes out this like:
enter image description here
All I am trying to do is plot points from this csv file that has latitude and longitude data onto a map (eventually a map that I have loaded from an .shp file).
Anyways, here is the code I have written so far:
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
Any ideas what is wrong? I check all 100 coordinates and they all seem to be fine. Any suggestions would be great! Thanks!
It's likely to be a problem of projection system. A good thing to do is defining immediately the crs when creating a Geopandas object. If you try,
df = gpd.GeoDataFrame(df, geometry='coordinates', crs = 4326)
maybe you will be able to see your points. I put "4326" because your x-y coordinates look like GPS coordinates which are WSG84 standards (crs code: 4326). Change to the relevent crs code if it's not the good one.
Those responses above are helpful. This also turned out to be another solution as lingo suggested to set the crs. I was getting an error, but this worked out when I ignored the error. Here is my code that ended up working.
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df.head()
#fixing wrong negative value for Latitude
df.loc[df["Latitude"] == df["Latitude"].min()]
df.at[80, 'Latitude'] = 40.467715
#count the numner of racks at each station
rackTot = 0
for index, row in df.iterrows():
rackTot += row['NumRacks']
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
geobikes = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
geobikes.head()
#plot the geodf
#not working for some reason, fix later
geobikes.plot()
When I run your code with the first four rows of coords, I get what you'd expect. From the extent of your plot, it looks like you might have some negative latitude values. Can you do df['Latitude'].min() to check?
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
df = pd.DataFrame({'Latitude' :[40.441326, 40.440877, 40.439030, 40.437200],
'Longitude' :[-80.004679, -80.003080, -80.001860, -80.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
You can also use plt.subplots() and then set xlim and ylim for your data.
df = pd.DataFrame({'Latitude' :[40.441326, 41.440877, 42.439030, 43.437200],
'Longitude' :[-78.004679, -79.003080, -80.001860, -81.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
print(type(df))
#plot the geodf
fig, ax = plt.subplots(figsize=(14,6))
df.plot(ax = ax)
xlim = ([df.total_bounds[0] - 1, df.total_bounds[2] + 1])
ylim = ([df.total_bounds[1] - 1, df.total_bounds[3] + 1])
# you can also pass in the xlim or ylim vars defined above
ax.set_xlim([-82, -77])
ax.set_ylim([40, 42])
plt.show()
I'm trying to plot data onto a map. I would like to generate data for specific points on the map (e.g. transit times to one or more prespecified location) for a specific city.
I found data for New York City here: https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
It looks like they have a shapefile available. I'm wondering if there is a way to sample a latitude-longitude grid within the bounds of the shapefile for each borough (perhaps using Shapely package, etc).
Sorry if this is naive, I'm not very familiar with working with these files--I'm doing this as a fun project to learn about them
I figured out how to do this. Essentially, I just created a full grid of points and then removed those that did not fall within the shape files corresponding to the boroughs. Here is the code:
import geopandas
from geopandas import GeoDataFrame, GeoSeries
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import matplotlib.cm as cm
%matplotlib inline
import seaborn as sns
from shapely.geometry import Point, Polygon
import numpy as np
import googlemaps
from datetime import datetime
plt.rcParams["figure.figsize"] = [8,6]
# Get the shape-file for NYC
boros = GeoDataFrame.from_file('./Borough Boundaries/geo_export_b641af01-6163-4293-8b3b-e17ca659ed08.shp')
boros = boros.set_index('boro_code')
boros = boros.sort_index()
# Plot and color by borough
boros.plot(column = 'boro_name')
# Get rid of are that you aren't interested in (too far away)
plt.gca().set_xlim([-74.05, -73.85])
plt.gca().set_ylim([40.65, 40.9])
# make a grid of latitude-longitude values
xmin, xmax, ymin, ymax = -74.05, -73.85, 40.65, 40.9
xx, yy = np.meshgrid(np.linspace(xmin,xmax,100), np.linspace(ymin,ymax,100))
xc = xx.flatten()
yc = yy.flatten()
# Now convert these points to geo-data
pts = GeoSeries([Point(x, y) for x, y in zip(xc, yc)])
in_map = np.array([pts.within(geom) for geom in boros.geometry]).sum(axis=0)
pts = GeoSeries([val for pos,val in enumerate(pts) if in_map[pos]])
# Plot to make sure it makes sense:
pts.plot(markersize=1)
# Now get the lat-long coordinates in a dataframe
coords = []
for n, point in enumerate(pts):
coords += [','.join(__ for __ in _.strip().split(' ')[::-1]) for _ in str(point).split('(')[1].split(')')[0].split(',')]
which results in the following plots:
I also got a matrix of lat-long coordinates I used to make a transport-time map for every point in the city to Columbia Medical Campus. Here is that map:
and a zoomed-up version so you can see how the map is made up of the individual points: