I need to plot a Choropleth graph on a Plotly map using a custom SHP file.
SHP file is used to get the boundary information. I convert it to geojson and feed it to Plotly but all i get is an empty base-map with no error messages.
Here is what i tried:
import json
import random
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go
# Create GeoDataFrame for SHP file
geodf = gpd.read_file('data/postal-areas-2021/PKS_postinumeroalueet_2021_shp.shp')
# Save as geojson
geodf.to_file("data.geojson", encoding='utf-8', driver = "GeoJSON")
# Open the file
with open("data.geojson", encoding='utf-8') as geofile:
counties = json.load(geofile)
# Create a new Dataframe for supplying z values(colors) to Choropleth.
df = pd.DataFrame()
# Create lists to store the values
postal_code = []
rand = []
# Using Posno(Postal code) as ID and generateing random integers from 1-100 as a color value
for i,v in enumerate(counties['features']):
postal_code.append(counties['features'][i]['properties']['Posno'])
rand.append(random.randint(1,100))
# Adding the columns to the dataframe
df['Posno'] = postal_code
df['rand'] = rand
# Creating the figure and assigning the values
fig = go.Figure(go.Choroplethmapbox(geojson=counties, locations=df['Posno'], z=df['rand'],
colorscale="Viridis", zmin=0, zmax=12, marker_line_width=5))
fig.update_layout(mapbox_style="open-street-map",
height = 1000,
autosize=True,
margin={"r":0,"t":0,"l":0,"b":0},
paper_bgcolor='#303030',
plot_bgcolor='#303030',
mapbox=dict(center=dict(lat=60.1699, lon=24.9384),zoom=11),
)
fig.show()
Question:
How to plot a choropleth from an SHP file with Plotly go.Choroplethmapbox()?
you have missed one very important step, consideration of CRS projection https://geopandas.org/docs/user_guide/projections.html
this is resolved with geodf = geodf.to_crs("WGS84")
additionally it's far simpler to use https://plotly.com/python/mapbox-county-choropleth/#using-geopandas-data-frames to generate this mapbox
plotly graph objects
import requests
from pathlib import Path
from zipfile import ZipFile
import geopandas as gpd
import numpy as np
import plotly.graph_objects as go
import json
# get the shape file...
url = "https://avoidatastr.blob.core.windows.net/avoindata/AvoinData/9_Kartat/PKS%20postinumeroalueet/Shp/PKS_postinumeroalueet_2021_shp.zip"
fn = Path.cwd().joinpath(url.split("/")[-1])
if not fn.exists():
r = requests.get(url, stream=True)
with open(fn, "wb") as f:
for chunk in r.raw.stream(1024, decode_content=False):
if chunk:
f.write(chunk)
zfile = ZipFile(fn)
zfile.extractall()
# open it...
geodf = gpd.read_file(list(Path.cwd().glob("PKS*.shp"))[0])
geodf["rand"] = np.random.randint(1, 100, len(geodf))
# shape file is a different CRS, change to lon/lat GPS co-ordinates
geodf = geodf.to_crs("WGS84").set_index("Posno")
fig = go.Figure(go.Choroplethmapbox(geojson=json.loads(geodf.to_json()),
locations=geodf.index, z=geodf['rand'],
colorscale="Viridis", marker_line_width=.5))
fig.update_layout(mapbox_style="open-street-map",
height = 1000,
autosize=True,
margin={"r":0,"t":0,"l":0,"b":0},
paper_bgcolor='#303030',
plot_bgcolor='#303030',
mapbox=dict(center=dict(lat=60.1699, lon=24.9384),zoom=9),
)
plotly express
import requests
from pathlib import Path
from zipfile import ZipFile
import geopandas as gpd
import numpy as np
import plotly.express as px
# get the shape file...
url = "https://avoidatastr.blob.core.windows.net/avoindata/AvoinData/9_Kartat/PKS%20postinumeroalueet/Shp/PKS_postinumeroalueet_2021_shp.zip"
fn = Path.cwd().joinpath(url.split("/")[-1])
if not fn.exists():
r = requests.get(url, stream=True)
with open(fn, "wb") as f:
for chunk in r.raw.stream(1024, decode_content=False):
if chunk:
f.write(chunk)
zfile = ZipFile(fn)
zfile.extractall()
# open it...
geodf = gpd.read_file(list(Path.cwd().glob("PKS*.shp"))[0])
geodf["rand"] = np.random.randint(1, 100, len(geodf))
# shape file is a different CRS, change to lon/lat GPS co-ordinates
geodf = geodf.to_crs("WGS84")
fig = px.choropleth_mapbox(
geodf.set_index("Posno"),
geojson=geodf.geometry,
locations=geodf.index,
color="rand",
center=dict(lat=60.1699, lon=24.9384),
mapbox_style="open-street-map",
zoom=9,
)
fig.update_layout(
height=1000,
autosize=True,
margin={"r": 0, "t": 0, "l": 0, "b": 0},
paper_bgcolor="#303030",
plot_bgcolor="#303030",
)
fig
Related
I created a self-contained code to create a HeatMapWithTime map but it shows up as a blank file. This code is run on Jupyter and the output is a 14KB file and I've tried to open it in Chrome, Safari, Firefox but it is still blank.
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
latitudes = np.random.uniform(low=45.523, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.675, high=-122.676, size=50)
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = {'latitude': latitudes, 'longitude': longitudes, 'time': times}
# Create a pandas dataframe from the dummy data
df = pd.DataFrame(data)
df['time'] = pd.to_datetime(df['time'], unit='s')
# Create a base map
map = folium.Map(location=[45.523, -122.675], zoom_start=13)
# Create a heat map with timeline
HeatMapWithTime(data=df[['latitude', 'longitude', 'time']].values.tolist(),
index=df['time'].dt.strftime("%Y-%m-%d %H:%M:%S"),
auto_play=True,
max_opacity=0.8).add_to(map)
# Save the map to an html file
map.save("heatmap_with_timeline.html")
Folium version: 0.14.0
Python version: 3.9.12
To begin with, the target data for the heatmap is time-series data in date format. The sample data itself was raw data, but it was converted to date format. Also, I think the index of the time animation of the heatmap also needs to be in list format. Finally, the sample data is a latitude/longitude and heatmap value for one time series. Since this folium heatmap is a densitiy heatmap, multiple groups of data may be necessary. To create the data to draw the heatmap, utilizing your sample data, I have added an array of 50 latitude/longitude and heatmap values for each time series index in a loop process for 50 indexes.
For data structures and examples, please refer to the following references. HeatMapWithTime Plugin
import folium
import pandas as pd
import numpy as np
from folium.plugins import HeatMapWithTime
# Generate dummy data
times = np.sort(np.random.uniform(low=1580000000, high=1600000000, size=50))
data = []
for i in range(len(times)):
latitudes = np.random.uniform(low=45.423, high=45.524, size=50)
longitudes = np.random.uniform(low=-122.575, high=-122.676, size=50)
value = np.random.uniform(0.0, 20.0, 50)
row = []
for lat, lon ,v in zip(latitudes,longitudes,value):
row.append([lat, lon, v])
data.append(row)
index_time = pd.to_datetime(times, unit='s')
index_time = index_time.strftime("%Y-%m-%d %H:%M:%S").tolist()
# Create a base map
m = folium.Map(location=[45.523, -122.675], tiles="stamentoner", zoom_start=11)
# Create a heat map with timeline
hm = HeatMapWithTime(data,
index=index_time,
auto_play=True,
max_opacity=0.8)
hm.add_to(m)
# Save the map to an html file
#map.save("heatmap_with_timeline.html")
m
This should help.
import pandas as pd
import folium
from folium.plugins import HeatMap
#for_map = pd.read_csv('campaign_contributions_for_map.tsv', sep='\t')
df = pd.read_csv('C:\\your_path\\business.csv')
df.head(3)
max_amount = float(df['stars'].max())
hmap = folium.Map(location=[42.5, -75.5], zoom_start=7, )
hm_wide = HeatMap( list(zip(df.latitude.values, df.longitude.values, df.stars.values)),
min_opacity=0.2,
max_val=max_amount,
radius=17, blur=15,
max_zoom=1,
)
hmap.add_child(hm_wide)
import pandas as pd
import gmplot
import matplotlib.pyplot as plt
import folium
from folium import plugins
import seaborn as sns
df = pd.read_csv('C:\\your_path\\lat_lon.csv')
m = folium.Map([40.7379601, -73.9666422], zoom_start=11)
m
X = df[['longitude', 'latitude', 'LOT']].copy()
# mark each station as a point
for index, row in X.iterrows():
folium.CircleMarker([row['latitude'], row['longitude']],
radius=15,
popup=row['LOT'],
fill_color="#3db7e4", # divvy color
).add_to(m)
# convert to (n, 2) nd-array format for heatmap
stationArr = df[['latitude', 'longitude']].to_numpy()
# plot heatmap
m.add_child(plugins.HeatMap(stationArr, radius=15))
m
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20Coordinates%20using%20Folium%20CircleMarker.ipynb
https://github.com/ASH-WICUS/Notebooks/blob/master/Plotting%20Longitude%20and%20Latitude%20to%20Visualize%20Spatial%20Data%20for%20NYC%20Taxis.ipynb
I am working on a choropleth map and it is showing a white page instead of the map as shown here
https://i.stack.imgur.com/boYKY.png
I have both the geojson and the excel file downloaded in the same folder.
geojson https://drive.google.com/file/d/1N-rp9yHqE1Rzn2VxoAAweJ8-5XIjk61j/view?usp=sharing
excel https://docs.google.com/spreadsheets/d/1NKeUg20XxJe0jccMgjj9pMxrTIIWeuQk/edit?usp=sharing&ouid=100050178655652050254&rtpof=true&sd=true
Here is my code
import json
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_excel('kraje.xlsx', sheet_name='List1')
regions_json = json.load(open("KRAJE.geojson", "r"))
fig = px.choropleth(df,
locations="K_KRAJ",
geojson=regions_json,
color='OB1506')
fig.show()
The console of my browser in which I am viewing the map shows
this
I am using a jupyter notebook in the brave browser.
Can anyone please help me solve this? Thanks
EDIT:
I found the correct geojson file but now I have a different issue. Only one region is colored and not even in the correct color and the rest of the map even outside of my regions is colored in the same color. When I hover over my regions I can see that they are in the correct place but with a wrong color. And I also have no idea why the code colored the whole map and not only the regions from the geojson file. here is an image of the output
new (should be correct) geojson https://drive.google.com/file/d/1S03NX5Q0pqgAsbJnjqt8O5w8gUHH1rt_/view?usp=sharing
import json
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_excel('kraje.xlsx', sheet_name='List1')
regions_json = json.load(open("KRAJE.geojson", "r"))
for feature in regions_json['features']:
feature["id"] = feature["properties"]["K_KRAJ"]
fig = px.choropleth(df,
locations="K_KRAJ",
geojson=regions_json,
color='OB1506')
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
SOLUTION
Thanks to Rob Raymond it finally works. There was an issue with the geojson file. I also had a ton of problems installing geopandas and the only tutorial that actually worked was installing each package separately (https://stackoverflow.com/a/69210111/17646343)
there are multiple issues with your geojson
need to define the CRS, it's clearly not epsg:4326. Appears to be UTM CRS for Czech Republic
even with this there are invalid polygons
with valid geojson, a few points you have missed
locations needs to be common across your data frame and geojson
featureidkey needs to be used to define you are joining on name
import json
import numpy as np
import pandas as pd
import plotly.express as px
import geopandas as gpd
files = {
f.suffix: f
for p in ["KRAJE*.*", "KRAJE*.*".lower()]
for f in Path.home().joinpath("Downloads").glob(p)
}
# df = pd.read_excel('kraje.xlsx', sheet_name='List1')
df = pd.read_excel(files[".xlsx"], sheet_name="List1")
# regions_json = json.load(open("KRAJE.geojson", "r"))
regions_json = json.load(open(files[".geojson"], "r"))
regions_json = (
gpd.read_file(files[".geojson"])
.dropna()
.set_crs("EPSG:32633", allow_override=True)
.to_crs("epsg:4326")
.__geo_interface__
)
fig = px.choropleth(
df,
locations="N_KRAJ",
featureidkey="properties.name",
geojson=regions_json,
color="OB1506",
)
fig.update_geos(fitbounds="locations", visible=True)
fig
updated
there are still issues with your geojson. Have fixed it using geopandas and buffer(0) (see Fix invalid polygon in Shapely)
with this and change to plotly parameters I can now generate a figure
import json
import numpy as np
import pandas as pd
import plotly.express as px
import geopandas as gpd
from pathlib import Path
files = {
f.suffix: f
for p in ["KRAJ_*.*", "KRAJE*.*".lower()]
for f in Path.home().joinpath("Downloads").glob(p)
}
# df = pd.read_excel('kraje.xlsx', sheet_name='List1')
df = pd.read_excel(files[".xlsx"], sheet_name="List1")
# regions_json = json.load(open("KRAJE.geojson", "r"))
regions_json = json.load(open(files[".json"], "r"))
# geometry is still invalid!!! force it to valid by buffer(0)
regions_json = gpd.read_file(files[".json"]).assign(geometry=lambda d: d["geometry"].buffer(0)).__geo_interface__
fig = px.choropleth(
df,
locations="K_KRAJ",
featureidkey="properties.K_KRAJ",
geojson=regions_json,
color="OB1506",
)
fig.update_geos(fitbounds="locations", visible=True)
fig
I have a graph in plotly which I want to replace the x labels.
I pasted this graph as an example. At the bottom you will see ARI, ATL, BAL, etc. I was wondering if its possible to replace these with images? Icons?
same approach that #r-begginers provided in referenced answer
have sourced all logos from kaggle. Used PIL for encoding
have synthesized as an axis by creating a second trace with a -ve percentage and used that plot area to place logos
have set xaxis to invisible so hover provided the team abbreviation
import kaggle.cli
import sys, requests
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import urllib
import plotly.express as px
from PIL import Image
# fmt: off
# download data set
url = "https://www.kaggle.com/anzhemeng/nfl-team-logos"
sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
kaggle.cli.main()
zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
# fmt: on
zfile.extractall("nfl-logos")
df = pd.DataFrame(Path.cwd().joinpath("nfl-logos").glob("*.png"), columns=["filename"])
df["team"] = df["filename"].apply(lambda d: d.stem)
df["passResult"] = np.random.uniform(0, 1, len(df))
df = df.sort_values("team")
fig = px.scatter(df, x="team", y="passResult").add_traces(
px.scatter(df, "team", np.full(len(df), -0.05))
.update_traces(marker_color="rgba(0,0,0,0)", hovertemplate="%{x}")
.data
)
for x in fig.data[0].x:
fig.add_layout_image(
source=Image.open(df.loc[df["team"].eq(x), "filename"].values[0]),
x=x,
y=-0.01,
xref="x",
yref="y",
xanchor="center",
sizex=1,
sizey=1,
)
fig.update_layout(xaxis={"visible":False})
I am new to python and wanted to try using a choropleth map. I have the following code for the graph.
import numpy as np
import pandas as pd
import plotly.express as px
df = pd.read_csv(r'C:\Users\lukee\Desktop\COVID Visualisation\time_series_covid_19_confirmed.csv')
#Data for number of cases for each country across the different dates
geojson = df['Country/Region']
#define the colour codes for the number of cases across the different dates
colourscale = px.colors.sequential.Plasma
#world map to show the intensity of cases in each country
fig = px.choropleth(df,
geojson=geojson,
locationmode= 'country names',
color = df['5/16/21'],
color_continuous_scale = colourscale,
scope='world',
hover_name=df["Country/Region"],
labels={'COVID Cases'})
fig.update(layout_coloraxis_showscale=False)
fig.show()
solution uses sourcing open world, not kaggle
plotting code, there were some inconsistencies on how you requested columns in data frame. addition of featureidkey parameter so dataframe and geojson join correctly
data sourcing
import requests
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import json, io
# source geojson for country boundaries
geosrc = pd.json_normalize(requests.get("https://pkgstore.datahub.io/core/geo-countries/7/datapackage.json").json()["resources"])
fn = Path(geosrc.loc[geosrc["name"].eq("geo-countries_zip"), "path"].values[0]).name
if not Path.cwd().joinpath(fn).exists():
r = requests.get(geosrc.loc[geosrc["name"].eq("geo-countries_zip"), "path"].values[0],stream=True,)
with open(fn, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(fn)
with zfile.open(zfile.infolist()[0]) as f:
geojson = json.load(f)
# source COVID data
dfall = pd.read_csv(io.StringIO(requests.get("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv").text))
dfall["date"] = pd.to_datetime(dfall["date"])
dflatest = (dfall.sort_values(["iso_code", "date"]).groupby("iso_code", as_index=False).last())
colorcol = "new_cases_smoothed_per_million"
# filter out where data is no for a country or no data available for latest date or a big outlier
dflatest = dflatest.loc[
dflatest[colorcol].gt(0).fillna(False)
& dflatest["iso_code"].str.len().eq(3)
& dflatest[colorcol].lt(dflatest[colorcol].quantile(0.97))
]
plotting
import plotly.express as px
#define the colour codes for the number of cases across the different dates
colourscale = px.colors.sequential.Plasma
#world map to show the intensity of cases in each country
fig = px.choropleth(dflatest,
geojson=geojson,
locations= 'iso_code',
featureidkey="properties.ISO_A3",
color = colorcol,
color_continuous_scale = colourscale,
scope='world',
hover_name="location",
labels={colorcol:'COVID Cases'}
)
fig.update_layout(coloraxis_showscale=False, margin={"l":0,"r":0,"t":0,"r":0})
fig
output
I have some sample code to plot a map of Ontario using Bokeh. The code reads in the shapefile and converts it to a geojson file as suggested from examples available in the internet.
The shapefile source data is the Ontario census subdivision geographic boundary from the StatsCan website downloaded as a shapefile.
Image screenshot: https://imgur.com/xn1Zzdh
The result so far is an empty chart and I can't figure out what's wrong.
The shapefile is loaded first as a geopandas dataframe and converted to geojson.
Apologies for my lack of stackoverflow etiquette. I'm a new user.
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import geopandas
import os
from bokeh.plotting import figure, output_file, show, save,output_notebook
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
pd.options.display.max_rows = 10
workspace = r'C:\Users\user\Documents\lcsd000b16a_e'
CSD_LAYER = geopandas.read_file(os.path.join(workspace,r"lcsd000b16a_e.shp"))
ONT_CSD = CSD_LAYER[CSD_LAYER['PRUID']=='35']
ONT_CSD['geometry'].head()
1372 POLYGON ((7202895.13143 1077367.822855, 720382...
1373 POLYGON ((7205717.394285 1098087.974285, 72058...
1374 POLYGON ((7169056.905715 1216085.682855, 71693...
1614 POLYGON ((7162217.717145 948748.982855, 716229...
1809 POLYGON ((7506330.95143 1116872.145715, 750632...
# # Get the CRS of our grid
CRS = ONT_CSD.crs
print('FROM:' + str(CRS))
ONT_CSD = ONT_CSD.to_crs(epsg=3857) #transform to webmercator
print('TO: '+ str(ONT_CSD.crs))
FROM:{'init': 'epsg:3347'}
TO: {'init': 'epsg:3857', 'no_defs': True}
import json
#read data to json file
ONT_CSD_json = json.loads(ONT_CSD.to_json())
#convert to string like object
ONT_CSD_JSON_DATA = json.dumps(ONT_CSD_json)
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = ONT_CSD_JSON_DATA)
#Create figure object.
p = figure(title = 'test', plot_height = 600 , plot_width = 950)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure.
p.patch('xs','ys', source = geosource,
line_color = 'black', line_width = 1, fill_alpha = 0.75)