I'm having an odd issue with Plotly, the image below will give some context:
This is the map made with Bokeh
This is the map made with Plotly
The same transformation steps are applied to both versions, however for some reason Plotly will exclude some of the shapes.
These are the transformation steps I am using:
import pandas as pd
import plotly.io as pio
import plotly.graph_objs as go
import json
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely import wkt
from bokeh.plotting import save, figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.io import show, output_file
from bokeh.palettes import brewer
df_test = pd.read_csv(f'{filepath}')
df_blocks = pd.read_csv(f'{filepath}')
group_2 = df_test[['geo_name', 'edited_characteristics', 'total', 'male', 'female']]
group_2 = group_2.pivot(index='geo_name', columns='edited_characteristics', values=['total', 'male', 'female'])
cat = 'Total - Low-income status in 2015 for the population in private households to whom low-income concepts are applicable - 100% data'
group_2['LIM 0-17 percent'] = (
group_2[( 'total', f'{cat}//0 to 17 years')] /
group_2[( 'total', cat)]
)
group_2.reset_index(inplace=True)
g2 = group_2[['geo_name', 'LIM 0-17 percent']]
g2.rename(columns={'geo_name': 'DAUID'}, inplace=True)
df_g2 = pd.merge(g2, df_blocks, on='DAUID')
df_g2['geometry'] = df_g2['geometry'].apply(wkt.loads)
geo_df_g2 = gpd.GeoDataFrame(df_g2, geometry='geometry')
geo_df_g2.crs = {'init': 'epsg:3347'}
geo_df_g2 = geo_df_g2.to_crs({'init': 'epsg:4326'})
geo_df_g2 = geo_df_g2[geo_df_g2[('LIM 0-17 percent', '')] < 1]
mean = geo_df_g2[('LIM 0-17 percent', '')].mean()
std = geo_df_g2[('LIM 0-17 percent', '')].std()
geo_df_g2 = geo_df_g2[(geo_df_g2[('LIM 0-17 percent', '')] < (mean - 1
* std)) | (geo_df_g2[('LIM 0-17 percent', '')] > (mean + 1 * std))]
geo_df_g2.columns = [x[0] if type(x) is tuple else x for x in
geo_df_g2.columns]
geo_df_g2 = geo_df_g2.loc[:, ~geo_df_g2.columns.duplicated()]
geo_df_g2_j = geo_df_g2.copy()
geo_df_g2_j['DAUID'] = geo_df_g2_j['DAUID'].astype(str)
geo_df_g2_j.set_index('DAUID', inplace=True)
geo_df_g2_json = json.loads(geo_df_g2_j.to_json())
USING PLOTLY
geo_df_g2 = geo_df_g2[['DAUID', 'LIM 0-17 percent']]
geo_df_g2['DAUID'] = geo_df_g2['DAUID'].astype(str)
fig = go.Figure(go.Choroplethmapbox(geojson=geo_df_g2_json,
locations=geo_df_g2['DAUID'],
z=geo_df_g2['LIM 0-17 percent'],
colorscale='Viridis',
zauto=True,
marker_opacity=0.5,
marker_line_width=0.5)
)
fig.update_layout(mapbox_style='white-bg',
#mapbox_accesstoken=mapbox_token,
mapbox_zoom=12,
mapbox_center={'lat': 45.41117, 'lon': -75.69812})
fig.update_layout(margin={'r':0, 't':0, 'l':0, 'b':0})
pio.renderers.default = 'browser'
fig.show()
USING BOKEH
json_data = json.dumps(geo_df_g2_json)
geosource = GeoJSONDataSource(geojson=json_data)
palette = brewer['YlGnBu'][8]
palette = palette[::-1]
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 40)
tick_labels = {'0': '0%', '5': '5%', '10':'10%', '15':'15%',
'20':'20%', '25':'25%', '30':'30%','35':'35%', '40': '>40%'}
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width
= 500, height = 20,
border_line_color=None,location = (0,0), orientation =
'horizontal', major_label_overrides = tick_labels)
p = figure(title='LIM', plot_height=600, plot_width=950,
toolbar_location=None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.patches('xs', 'ys', source=geosource, fill_color={'field': 'LIM 0-17 percent', 'transform': color_mapper}, line_color='black', line_width=0.25, fill_alpha=1)
output_file('test_bokeh.html')
show(p)
As you could see, they both use the same projections, same dataframe transformation, and the same categories. Is there a way to fix this?
TIA
EDIT: The shapes are in the correct position, there are just a lot of them missing from the plot.
UPDATE: In hopes of seeing if other Plotly modules could solve the problem, I kind of narrowed down the issue. Using the tutorial on Plotly for creating a Scattermapbox, the way they called the mapbox features worked better at revealing the inherit problems than the tutorial did on the Choroplethmapbox. Apparently what is happening is that Plotly (or Mapbox) is not recognizing several groups of nearby points as coordinates for a polygon, and hence excluding them until you specify that you want them present. This is done by setting the mapbox dictionary values for 'type' to either 'fill', 'line', or 'circle'. This of course leads to another issue, whereby those new shapes are not colored or labelled the same way as the original polygons since they were not there by default.
Here is the code sample that helps show the problem with the polygon points not forming a complete shape:
fig = go.Figure(go.Choroplethmapbox(geojson=geo_df_g2_json,
locations=geo_df_g2['DAUID'],
z=geo_df_g2['LIM 0-17 percent'],
below='traces',
colorscale='Viridis',
zauto=True,
marker_opacity=0.5,
marker_line_width=0.5)
)
fig.update_layout(
mapbox = {
'style': 'carto-positron',
'center': {'lat': 45.41117, 'lon': -75.69812},
'zoom': 12, 'layers': [{
'source': {
'type': "FeatureCollection",
'features': geo_df_g2_json['features']
},
'type': 'fill', 'below': 'traces', 'color': 'lightblue'}]},
margin = {'l':0, 'r':0, 'b':0, 't':0})
fig.show()
To clarify my intent, there are two questions I'm trying to answer:
Why does Plotly transform some polygon coordinates to a shape and others to just the individual points?
Is there a workaround to fill the shapes after using the above function, based on the 'z' value?
I found out what was causing the polygons to disappear. Since Plotly uses geojson files vs. interacting with geopandas dataframes (I believe that's the reason), it has more stringent requirements on data formatting. Other libraries like Bokeh, contextily, or geopandas aggregate multiple rows of polygons that share a common parent before plotting them, whereas Plotly looks at them individually. In my case, since each 'id' had mutliple sub-ids, each with their own polygon coordinates, Plotly would just pick one when plotting them. It would store the rest as points, and it would only display them if I used the 'fill' option. Here is a rough example of what my dataframe looked like:
DAUID DBUID Total geometry
001 00101 5 Polygon(x1, y1)
001 00102 5 Polygon(x2, y2)
001 00103 5 Polygon(x3, y3)
So while the primary id and the total values stayed constant, the geometries did not. I found this out by accident when trying to write a color mapper and noticed I had duplicate entries for the DAUID. At the end, it was my fault for not using the correct database.
It looks like Plotly will be introducing geopandas support soon, so I would be curious to see if it resolves edge cases like this.
I had a similar issue. That is a slice of my geopandas dataframe looked like -
province_id geometry
0 1 POLYGON (x1, y1)
1 1 POLYGON (x2, y2)
2 1 POLYGON (x3, y3)
I used province_id_data.dissolve(by='province_id', aggfunc='first') to combine them into a multipolygon and then plot using plotly.
Related
I use the scatter_geo function from plotly.express to plot geographical data on a map.
import geopandas as gpd
import plotly.express as px
fig = px.scatter_geo(gdf,
lat = 'latitude',
lon = 'longitude',
geojson='geometry',
scope='europe',
animation_frame = 'year')
fig.show()
How can I archive that the map is centered to only one country, in my case Germany? The parameter scope accepted only continents. There are two more parameters, center and fitbounds, that sounds useful, but I don't understand to fill in the right value.
center (dict) – Dict keys are 'lat' and 'lon' Sets the center point of
the map.
fitbounds (str (default False).) – One of False, locations or geojson.
Dummie data:
geometry latitude longitude value year
0 POINT (13.72740 51.05570) 51.0557 13.7274 35.55 1838
1 POINT (13.72740 51.05570) 51.0557 13.7274 35.15 1842
There are two ways to specify a specific latitude and longitude: by writing it directly or by adding it in a layout update. Adding it via layout update is more flexible and adjustable. For the scope, select Europe to draw the area by country. To zoom in on the map, use projection_scalse instead of zoom. I was not sure about the center of Germany so I used the data you presented, please change it.
fig = px.scatter_geo(gdf,
lat = 'latitude',
lon = 'longitude',
geojson='geometry',
scope='europe',
center=dict(lat=51.0057, lon=13.7274),
animation_frame = 'year')
Update layout
import plotly.express as px
df = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(df,
locations="iso_alpha",
size="pop",
projection="natural earth"
)
fig.update_layout(
autosize=True,
height=600,
geo=dict(
center=dict(
lat=51.0057,
lon=13.7274
),
scope='europe',
projection_scale=6
)
)
fig.show()
So, I am doing a map that shows the flow of people among some cities in Brazil by drawing lines on the map, representing the path, and setting its opacity according to the count of occurrences. To do so, I am following this code (third map, the one about flights on US).
My question is, can I draw the borders between countries? And, if possible, also between Brazilian states?
In the documentation, there is an argument of the function called "geojson", but I'am not sure on how to use it, or if it is even useful for me.
Note that I have GeoJSON data for both countries and states.
Here's the code to generate the my map:
import pandas as pd
import plotly.graph_objects as go
fig = go.Figure()
for i in range(len(my_df)):
fig.add_trace(
go.Scattergeo(
lon = [my_df['res_longitude'][i], my_df['nasc_longitude'][i]],
lat = [my_df['res_latitude'][i], my_df['nasc_latitude'][i]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = min(1, float(my_df['flow'][i]) / float(my_df['flow'].quantile(.95))),
)
)
fig.update_layout(
showlegend = False,
margin ={'l':0,'t':0,'b':0,'r':0},
mapbox = {
'center': {'lon': -50.3206, 'lat': -16.4984},
'style': "stamen-terrain",
'zoom': 3}
)
and here's the result:
Since I don't have the geojson data and the latitude and longitude information to draw the line, I'll use the official reference you referenced to answer your question.
Using the choropleth map, add a sum column with 0 to the data used in this sample.
Specify the geojson you obtained to geojson=usa_geo.
We associate the geojson state name with the state in the data.
I set the map fill to a light gray.
Note: The center setting of the map is automatically calculated since we are using fitbounds for the location.
from urllib import request
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
# usa geojson data
# https://eric.clst.org/tech/usgeojson/
usa_json = open('./data/gz_2010_us_040_00_500k.json', 'r')
usa_geo = json.load(usa_json)
# Choropleth Maps with go.Choropleth
# https://plotly.com/python/choropleth-maps/
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
# https://plotly.com/python/lines-on-maps/
df_flight_paths = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_february_aa_flight_paths.csv')
# dummy data column
dfs = pd.concat([df, pd.Series([0]*len(df),name='count')], axis=1)
fig = go.Figure()
fig.add_trace(go.Choropleth(
geojson=usa_geo,
locations=df['state'],
z = dfs['count'].astype(float),
featureidkey='properties.NAME',
colorscale = [[0,'rgb(200, 200, 200)']],
showlegend=False,
coloraxis=None,
colorbar=None
))
fig.update_traces(showscale=False)
flight_paths = []
for i in range(len(df_flight_paths)):
fig.add_trace(
go.Scattergeo(
#locationmode = 'USA-states',
lon = [df_flight_paths['start_lon'][i], df_flight_paths['end_lon'][i]],
lat = [df_flight_paths['start_lat'][i], df_flight_paths['end_lat'][i]],
mode = 'lines',
line = dict(width = 1,color = 'red'),
opacity = float(df_flight_paths['cnt'][i]) / float(df_flight_paths['cnt'].max()),
showlegend=False
)
)
fig.update_layout(
autosize=False,
width=1000,
height=600,
margin={"r":0,"t":0,"l":0,"b":0},
geo=dict(
scope='north america', # you chenge 'south america'
fitbounds="locations", # It is associated width 'px.Choropleth'
visible=True,
showland=True,
#center=dict(lon=34.05795, lat=-179.25450),
# The center designation of the map has no effect as this is automatically calculated
)
)
fig.show()
I am very new to using Python and especially new to using the Bokeh library. I am trying to plot a Choropleth map of the United States with the fill color of each state corresponding to their bee population of a year.
It shows the value when you hover over it, but only the states with a value of zero have color.
Link to an image of the output plot is here.
I know there is a big difference in the range (minimum:0, maximum: 310,000) which I believe is causing the problem. How can I change the range of the color map to not fill all of the higher values with grey?
Code for reference below:
from bokeh.models import LogColorMapper
from bokeh.palettes import YlGnBu9 as YlGnBu
from bokeh.sampledata.us_states import data as us_states
import pandas as pd
import numpy as np
bee_pop = pd.read_csv('./BeePopulation.csv')
us_states_df = pd.DataFrame(us_states).T
us_states_df = us_states_df[~us_states_df["name"].isin(['Alaska', "Hawaii", "District of
Columbia"])]
us_states_df["lons"] = us_states_df.lons.values.tolist()
us_states_df["lats"] = us_states_df.lats.values.tolist()
us_states_df = us_states_df.reset_index()
bee_2016 = bee_pop[bee_pop['Year']==2016]
us_states_df = us_states_df.merge(bee_2016[["State", "Pop"]], how="left", left_on="index",
right_on="State")
us_states_df.head()
us_states_datasource = {}
us_states_datasource["lons"] = us_states_df.lons.values.tolist()
us_states_datasource["lats"] = us_states_df.lats.values.tolist()
us_states_datasource["name"] = us_states_df.name.values.tolist()
us_states_datasource["BeePop"] = us_states_df.Pop.values.tolist()
fig = figure(plot_width=900, plot_height=600,
title="United Bee Population Per State Choropleth Map",
x_axis_location=None, y_axis_location=None,
tooltips=[
("Name", "#name"), ("Bee Population", "#BeePop")
])
fig.grid.grid_line_color = None
fig.patches("lons", "lats", source=us_states_datasource,
fill_color={'field': 'BeePop', 'transform': LogColorMapper(palette=YlGnBu[::-1])},
fill_alpha=0.7, line_color="white", line_width=0.5)
show(fig)
Thank you in advance!
The LogColorMapper has configurable high and low properties. Another option, of course, is to use a different color mapper, e.g. LinearColorMapper or CategorgicalColorMapper in conjunction with some categorical binning.
I can't find anything in the documentation about controlling where to place the colorbar, just whether or not it should be shown and with what color scale, etc. Can this be done?
If it helps, I am implementing my choropleth map with Dash.
The answer:
fig.data[0].colorbar.x=-0.1
or:
fig.update_layout(coloraxis_colorbar_x=-0.1)
Some details:
You haven't provided any code so I'll refer to an example from the docs where the position of the colorbar along the x-axis defaults to 1.2. You can change this directly through, for example:
fig.data[0].colorbar.x=-0.1
And get:
Complete code:
import plotly.graph_objects as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
fig = go.Figure(data=go.Choropleth(
locations=df['code'], # Spatial coordinates
z = df['total exports'].astype(float), # Data to be color-coded
locationmode = 'USA-states', # set of locations match entries in `locations`
colorscale = 'Reds',
colorbar_title = "Millions USD",
))
fig.update_layout(
title_text = '2011 US Agriculture Exports by State',
geo_scope='usa', # limite map scope to USA
)
fig.data[0].colorbar.x=-0.1
fig.show()
Using matplotlib, we can "trivially" fill the area between two vertical lines using fill_between() as in the example:
https://matplotlib.org/3.2.1/gallery/lines_bars_and_markers/fill_between_demo.html#selectively-marking-horizontal-regions-across-the-whole-axes
Using matplotlib, I can make what I need:
We have two signals, and I''m computing the rolling/moving Pearson's and Spearman's correlation. When the correlations go either below -0.5 or above 0.5, I want to shade the period (blue for Pearson's and orange for Spearman's). I also darken the weekends in gray in all plots.
However, I'm finding a hard time to accomplish the same using Plotly. And it will also be helpful to know how to do it between two horizontal lines.
Note that I'm using Plotly and Dash to speed up the visualization of several plots. Users asked for a more "dynamic type of thing." However, I'm not a GUI guy and cannot spend time on this, although I need to feed them with initial results.
BTW, I tried Bokeh in the past, and I gave up for some reason I cannot remember. Plotly looks good since I can use either from Python or R, which are my main development tools.
Thanks,
Carlos
I don't think there is any built-in Plotly method that that is equivalent to matplotlib's fill_between() method. However you can draw shapes so a possible workaround is to draw a grey rectangle and set the the parameter layer="below" so that the signal is still visible. You can also set the coordinates of the rectangle outside of your axis range to ensure the rectangle extends to the edges of the plot.
You can fill the area in between horizontal lines by drawing a rectangle and setting the axes ranges in a similar manner.
import numpy as np
import plotly.graph_objects as go
x = np.arange(0, 4 * np.pi, 0.01)
y = np.sin(x)
fig = go.Figure()
fig.add_trace(go.Scatter(
x=x,
y=y
))
# hard-code the axes
fig.update_xaxes(range=[0, 4 * np.pi])
fig.update_yaxes(range=[-1.2, 1.2])
# specify the corners of the rectangles
fig.update_layout(
shapes=[
dict(
type="rect",
xref="x",
yref="y",
x0="4",
y0="-1.3",
x1="5",
y1="1.3",
fillcolor="lightgray",
opacity=0.4,
line_width=0,
layer="below"
),
dict(
type="rect",
xref="x",
yref="y",
x0="9",
y0="-1.3",
x1="10",
y1="1.3",
fillcolor="lightgray",
opacity=0.4,
line_width=0,
layer="below"
),
]
)
fig.show()
You haven't provided a data sample so I'm going to use a synthetical time-series to show you how you can add a number of shapes with defined start and stop dates for several different categories using a custom function bgLevel
Two vertical lines with a fill between them very quickly turns into a rectangle. And rectangles can easily be added as shapes using fig.add_shape. The example below will show you how to find start and stop dates for periods given by a certain critera. In your case these criteria are whether or not the value of a variable is higher or lower than a certain level.
Using shapes instead of traces with fig.add_trace() will let you define the position with regards to plot layers using layer='below'. And the shapes outlines can easily be hidden using line=dict(color="rgba(0,0,0,0)).
Plot 1: Time series figure with random data:
Plot 2: Background is set to an opaque grey when A > 100 :
Plot 2: Background is also set to an opaque red when D < 60
Complete code:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import datetime
pd.set_option('display.max_rows', None)
# data sample
nperiods = 200
np.random.seed(123)
df = pd.DataFrame(np.random.randint(-10, 12, size=(nperiods, 4)),
columns=list('ABCD'))
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),periods=nperiods).tolist()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum().reset_index()
# function to set background color for a
# specified variable and a specified level
# plotly setup
fig = px.line(df, x='dates', y=df.columns[1:])
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
def bgLevels(fig, variable, level, mode, fillcolor, layer):
"""
Set a specified color as background for given
levels of a specified variable using a shape.
Keyword arguments:
==================
fig -- plotly figure
variable -- column name in a pandas dataframe
level -- int or float
mode -- set threshold above or below
fillcolor -- any color type that plotly can handle
layer -- position of shape in plotly fiugre, like "below"
"""
if mode == 'above':
m = df[variable].gt(level)
if mode == 'below':
m = df[variable].lt(level)
df1 = df[m].groupby((~m).cumsum())['dates'].agg(['first','last'])
for index, row in df1.iterrows():
#print(row['first'], row['last'])
fig.add_shape(type="rect",
xref="x",
yref="paper",
x0=row['first'],
y0=0,
x1=row['last'],
y1=1,
line=dict(color="rgba(0,0,0,0)",width=3,),
fillcolor=fillcolor,
layer=layer)
return(fig)
fig = bgLevels(fig = fig, variable = 'A', level = 100, mode = 'above',
fillcolor = 'rgba(100,100,100,0.2)', layer = 'below')
fig = bgLevels(fig = fig, variable = 'D', level = -60, mode = 'below',
fillcolor = 'rgba(255,0,0,0.2)', layer = 'below')
fig.show()
I think that fig.add_hrect() and fig.add_vrect() are the simplest approaches to reproducing the MatPlotLib fill_between functionality in this case:
https://plotly.com/python/horizontal-vertical-shapes/
For your example, add_vrect() should do the trick.