How to use data in a csv file with geopandas? - python

I have a csv file which has neumerical variables including longitude and latitude and also have few categorical variables. I want to use this csv with geopandas to plot map. but i am confused about shapefiles and how to use them. Can anyone tell me how do i start ?

as per comments this is fully covered in documentation. Another example
import pandas as pd
import io, requests
import geopandas as gpd
# read some CSV data
df = pd.read_csv(
io.StringIO(requests.get("https://assets.nhs.uk/data/foi/Hospital.csv").text),
sep="Č",
engine="python",
)
# long / lat columns to geodataframe geomtry all other columns attributes
gdf = gpd.GeoDataFrame(
geometry=gpd.points_from_xy(df.Longitude, df.Latitude, crs="EPSG:4326"), data=df
)
# exclude empty geometries and show it works
gdf.loc[~gdf.geometry.is_empty, :].explore(
"Sector", cmap=["blue", "green"], height=300, width=300
)
output

Related

Folium HeatMapWithTime html file generated is blank

I created a self-contained code to create a HeatMapWithTime map but it shows up as a blank file. This code is run on Jupyter and the output is a 14KB file and I've tried to open it in Chrome, Safari, Firefox but it is still blank.
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
latitudes = np.random.uniform(low=45.523, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.675, high=-122.676, size=50)
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = {'latitude': latitudes, 'longitude': longitudes, 'time': times}
# Create a pandas dataframe from the dummy data
df = pd.DataFrame(data)
df['time'] = pd.to_datetime(df['time'], unit='s')
# Create a base map
map = folium.Map(location=[45.523, -122.675], zoom_start=13)
# Create a heat map with timeline
HeatMapWithTime(data=df[['latitude', 'longitude', 'time']].values.tolist(),
index=df['time'].dt.strftime("%Y-%m-%d %H:%M:%S"),
auto_play=True,
max_opacity=0.8).add_to(map)
# Save the map to an html file
map.save("heatmap_with_timeline.html")
Folium version: 0.14.0
Python version: 3.9.12
To begin with, the target data for the heatmap is time-series data in date format. The sample data itself was raw data, but it was converted to date format. Also, I think the index of the time animation of the heatmap also needs to be in list format. Finally, the sample data is a latitude/longitude and heatmap value for one time series. Since this folium heatmap is a densitiy heatmap, multiple groups of data may be necessary. To create the data to draw the heatmap, utilizing your sample data, I have added an array of 50 latitude/longitude and heatmap values for each time series index in a loop process for 50 indexes.
For data structures and examples, please refer to the following references. HeatMapWithTime Plugin
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = []
for i in range(len(times)):
latitudes = np.random.uniform(low=45.423, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.575, high=-122.676, size=50)
value = np.random.uniform(0.0, 20.0, 50)
row = []
for lat, lon ,v in zip(latitudes,longitudes,value):
row.append([lat, lon, v])
data.append(row)
index_time = pd.to_datetime(times, unit='s')
index_time = index_time.strftime("%Y-%m-%d %H:%M:%S").tolist()
# Create a base map
m = folium.Map(location=[45.523, -122.675], tiles="stamentoner", zoom_start=11)
# Create a heat map with timeline
hm = HeatMapWithTime(data,
index=index_time,
auto_play=True,
max_opacity=0.8)
hm.add_to(m)
# Save the map to an html file
#map.save("heatmap_with_timeline.html")
m
This should help.
import pandas as pd
import folium
from folium.plugins import HeatMap
#for_map = pd.read_csv('campaign_contributions_for_map.tsv', sep='\t')
df = pd.read_csv('C:\\your_path\\business.csv')
df.head(3)
max_amount = float(df['stars'].max())
hmap = folium.Map(location=[42.5, -75.5], zoom_start=7, )
hm_wide = HeatMap( list(zip(df.latitude.values, df.longitude.values, df.stars.values)),
min_opacity=0.2,
max_val=max_amount,
radius=17, blur=15,
max_zoom=1,
)
hmap.add_child(hm_wide)
import pandas as pd
import gmplot
import matplotlib.pyplot as plt
import folium
from folium import plugins
import seaborn as sns
df = pd.read_csv('C:\\your_path\\lat_lon.csv')
m = folium.Map([40.7379601, -73.9666422], zoom_start=11)
m
X = df[['longitude', 'latitude', 'LOT']].copy()
# mark each station as a point
for index, row in X.iterrows():
folium.CircleMarker([row['latitude'], row['longitude']],
radius=15,
popup=row['LOT'],
fill_color="#3db7e4", # divvy color
).add_to(m)
# convert to (n, 2) nd-array format for heatmap
stationArr = df[['latitude', 'longitude']].to_numpy()
# plot heatmap
m.add_child(plugins.HeatMap(stationArr, radius=15))
m
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20Coordinates%20using%20Folium%20CircleMarker.ipynb
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20to%20Visualize%20Spatial%20Data%20for%20NYC%20Taxis.ipynb

Choropleth map showing white page instead of the map

I am working on a choropleth map and it is showing a white page instead of the map as shown here
https://i.stack.imgur.com/boYKY.png
I have both the geojson and the excel file downloaded in the same folder.
geojson https://drive.google.com/file/d/1N-rp9yHqE1Rzn2VxoAAweJ8-5XIjk61j/view?usp=sharing
excel https://docs.google.com/spreadsheets/d/1NKeUg20XxJe0jccMgjj9pMxrTIIWeuQk/edit?usp=sharing&ouid=100050178655652050254&rtpof=true&sd=true
Here is my code
import json
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_excel('kraje.xlsx', sheet_name='List1')
regions_json = json.load(open("KRAJE.geojson", "r"))
fig = px.choropleth(df,
locations="K_KRAJ",
geojson=regions_json,
color='OB1506')
fig.show()
The console of my browser in which I am viewing the map shows
this
I am using a jupyter notebook in the brave browser.
Can anyone please help me solve this? Thanks
EDIT:
I found the correct geojson file but now I have a different issue. Only one region is colored and not even in the correct color and the rest of the map even outside of my regions is colored in the same color. When I hover over my regions I can see that they are in the correct place but with a wrong color. And I also have no idea why the code colored the whole map and not only the regions from the geojson file. here is an image of the output
new (should be correct) geojson https://drive.google.com/file/d/1S03NX5Q0pqgAsbJnjqt8O5w8gUHH1rt_/view?usp=sharing
import json
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_excel('kraje.xlsx', sheet_name='List1')
regions_json = json.load(open("KRAJE.geojson", "r"))
for feature in regions_json['features']:
feature["id"] = feature["properties"]["K_KRAJ"]
fig = px.choropleth(df,
locations="K_KRAJ",
geojson=regions_json,
color='OB1506')
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
SOLUTION
Thanks to Rob Raymond it finally works. There was an issue with the geojson file. I also had a ton of problems installing geopandas and the only tutorial that actually worked was installing each package separately (https://stackoverflow.com/a/69210111/17646343)
there are multiple issues with your geojson
need to define the CRS, it's clearly not epsg:4326. Appears to be UTM CRS for Czech Republic
even with this there are invalid polygons
with valid geojson, a few points you have missed
locations needs to be common across your data frame and geojson
featureidkey needs to be used to define you are joining on name
import json
import numpy as np
import pandas as pd
import plotly.express as px
import geopandas as gpd
files = {
f.suffix: f
for p in ["KRAJE*.*", "KRAJE*.*".lower()]
for f in Path.home().joinpath("Downloads").glob(p)
}
# df = pd.read_excel('kraje.xlsx', sheet_name='List1')
df = pd.read_excel(files[".xlsx"], sheet_name="List1")
# regions_json = json.load(open("KRAJE.geojson", "r"))
regions_json = json.load(open(files[".geojson"], "r"))
regions_json = (
gpd.read_file(files[".geojson"])
.dropna()
.set_crs("EPSG:32633", allow_override=True)
.to_crs("epsg:4326")
.__geo_interface__
)
fig = px.choropleth(
df,
locations="N_KRAJ",
featureidkey="properties.name",
geojson=regions_json,
color="OB1506",
)
fig.update_geos(fitbounds="locations", visible=True)
fig
updated
there are still issues with your geojson. Have fixed it using geopandas and buffer(0) (see Fix invalid polygon in Shapely)
with this and change to plotly parameters I can now generate a figure
import json
import numpy as np
import pandas as pd
import plotly.express as px
import geopandas as gpd
from pathlib import Path
files = {
f.suffix: f
for p in ["KRAJ_*.*", "KRAJE*.*".lower()]
for f in Path.home().joinpath("Downloads").glob(p)
}
# df = pd.read_excel('kraje.xlsx', sheet_name='List1')
df = pd.read_excel(files[".xlsx"], sheet_name="List1")
# regions_json = json.load(open("KRAJE.geojson", "r"))
regions_json = json.load(open(files[".json"], "r"))
# geometry is still invalid!!! force it to valid by buffer(0)
regions_json = gpd.read_file(files[".json"]).assign(geometry=lambda d: d["geometry"].buffer(0)).__geo_interface__
fig = px.choropleth(
df,
locations="K_KRAJ",
featureidkey="properties.K_KRAJ",
geojson=regions_json,
color="OB1506",
)
fig.update_geos(fitbounds="locations", visible=True)
fig

Plotly px.choropleth not drawing data from json file

I have a CSV file with the following structure
cardodb_id,CONCELHO,LAT,LONG,DATA,INC 225,Abrantes,39.466667,-8.2,2020-03-25,1000
And a Json file with the following structure:
{"type":"FeatureCollection", "features": [ {"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[-8.163874,39.626553],[-8.164286,39.626686],[-8.165384,39.626633],*(more coordinates' pairs)*,[-8.163874,39.626553]]]},"properties":{"cartodb_id":225,"id_2":225,"id_1":16,"id_0":182,"varname_2":null,"nl_name_2":null,"engtype_2":"Municipality","type_2":"Concelho","name_2":"Abrantes","name_1":"Santarém","name_0":"Portugal","iso":"PRT","split_id":"1"}} ]}
Both the CSV and the json file here are part of a larger set but this will do as an example
My code is as follows
import json
with open('abrantes.json') as json_file:
abr = json.load(json_file)
import pandas as pd
df = pd.read_csv("abrantes.csv")
import plotly.express as px
fig = px.choropleth(df, geojson=abr, locations='cardodb_id', color='INC',
color_continuous_scale="Viridis",
range_color=(0, 5000),
labels={'INC':'Incidência'}
)
fig.show()
The end result is an empty map with the scale from 0 to 5000 on the right side, when I was expecting the polygon to be filled with the color correspondent to "INC", i.e., "1000".
What am I doing wrong? Thank you in advance for all the help you can provide.
To draw a map, px.choropleth() must match IDs of your dataframe with IDs of your GeoJSON.
With the parameter locations you specify the column with the IDs in your dataframe.
What you are missing is the parameter featureidkey to specify the same IDs in the GeoJSON. Alternatively, you can omit featureidkey but then the features in your GeoJSON need a parameter id.
Then you have to pay attention to spelling. Your csv file has a column cardodb_id, your GeoJSON a parameter cartodb_id.
And since the polygon you provided is quite small, it is not visible on a world map. Thus, I recommend to add fig.update_geos(fitbounds="locations") to zoom the map to the area of interest.
import json
import pandas as pd
import plotly.express as px
with open('abrantes.json') as json_file:
abr = json.load(json_file)
df = pd.read_csv("abrantes.csv")
fig = px.choropleth(df, geojson=abr, locations='cardodb_id', color='INC',
color_continuous_scale="Viridis",
featureidkey="properties.cartodb_id",
range_color=(0, 5000),
labels={'INC':'Incidência'}
)
fig.update_geos(fitbounds="locations")
fig.show()

Geopandas data not plotting correctly

I don't have very much experience with GeoPandas at all, so I am a little lost. I am trying to plot this data
jupyterNotebook dataframe image
I have followed many references on the GeoPandas website, read through blog posts, and this stack overflow post. All of them tell me to do the same thing, but it seems to still now be working.
Ploting data in geopandas
When I try to plot this data, it comes out this like:
enter image description here
All I am trying to do is plot points from this csv file that has latitude and longitude data onto a map (eventually a map that I have loaded from an .shp file).
Anyways, here is the code I have written so far:
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
Any ideas what is wrong? I check all 100 coordinates and they all seem to be fine. Any suggestions would be great! Thanks!
It's likely to be a problem of projection system. A good thing to do is defining immediately the crs when creating a Geopandas object. If you try,
df = gpd.GeoDataFrame(df, geometry='coordinates', crs = 4326)
maybe you will be able to see your points. I put "4326" because your x-y coordinates look like GPS coordinates which are WSG84 standards (crs code: 4326). Change to the relevent crs code if it's not the good one.
Those responses above are helpful. This also turned out to be another solution as lingo suggested to set the crs. I was getting an error, but this worked out when I ignored the error. Here is my code that ended up working.
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import descartes
from shapely.geometry import Point, Polygon
#Load in the CSV Bike Station Location Data
df = pd.read_csv('HRSQ12020.csv')
#combine the latitude and longitude to make coordinates
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df.head()
#fixing wrong negative value for Latitude
df.loc[df["Latitude"] == df["Latitude"].min()]
df.at[80, 'Latitude'] = 40.467715
#count the numner of racks at each station
rackTot = 0
for index, row in df.iterrows():
rackTot += row['NumRacks']
crs = {'init' :'epsg:4326'}
geometry = [Point(xy) for xy in zip(df.Longitude, df.Latitude)]
geobikes = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
geobikes.head()
#plot the geodf
#not working for some reason, fix later
geobikes.plot()
When I run your code with the first four rows of coords, I get what you'd expect. From the extent of your plot, it looks like you might have some negative latitude values. Can you do df['Latitude'].min() to check?
import csv
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
df = pd.DataFrame({'Latitude' :[40.441326, 40.440877, 40.439030, 40.437200],
'Longitude' :[-80.004679, -80.003080, -80.001860, -80.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
df
#plot the geodf
df.plot(figsize=(20,10));
You can also use plt.subplots() and then set xlim and ylim for your data.
df = pd.DataFrame({'Latitude' :[40.441326, 41.440877, 42.439030, 43.437200],
'Longitude' :[-78.004679, -79.003080, -80.001860, -81.000375]})
df['coordinates'] = df[['Longitude', 'Latitude']].values.tolist()
# Change the coordinates to a geoPoint
df['coordinates'] = df['coordinates'].apply(Point)
df
#convert df to a geodf
df = gpd.GeoDataFrame(df, geometry='coordinates')
print(type(df))
#plot the geodf
fig, ax = plt.subplots(figsize=(14,6))
df.plot(ax = ax)
xlim = ([df.total_bounds[0] - 1, df.total_bounds[2] + 1])
ylim = ([df.total_bounds[1] - 1, df.total_bounds[3] + 1])
# you can also pass in the xlim or ylim vars defined above
ax.set_xlim([-82, -77])
ax.set_ylim([40, 42])
plt.show()

How do I pull data from both of these files?

I am trying to create a map that tracks COVID-19 confirmed cases by county using FIPS codes. How am I able to make this code gather the data from both of those data files?
If you run the code as is (NY times data) then the map does not fill in counties with zero cases as zero cases. This is because the NY times data does not list the data for the places with zero cases. The other data does list places with zero cases. So, whatever doesn't get filled in with the NY times data I would like to fill in with the other data set. How do I do this? Or how do I fix my problem? Also, when hovering over the map how do I make it state the county name instead of the FIPS number?
Furthermore, how do I make this a live map that auto-updates when there is new data?
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv",
dtype={"fips": str})
df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-28-2020.csv",
dtype={"fips": str})
import plotly.express as px
)
fig = px.choropleth(df, geojson=counties, locations='fips', color='cases',
color_continuous_scale="dense",
range_color=(0, 100),
scope="usa",
labels={'cases':'Confirmed COVID:19 Cases'},
)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

Categories