Sorting Box Plots by Median using Plotly Graph Objects

Sorting Box Plots by Median using Plotly Graph Objects - python

I'm pretty much a beginner in plotly/pandas/data but I'm trying to make this graph and no matter what I search up, I can't find any attributes that are compatible with dictionaries. The data I'm using is the Time series download speed for 9 different software. I am trying to display the box plot descending by their median values.
Here is my code:
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import plot
import numpy as np
olddf = pd.read_csv("justice.csv")
df = olddf.interpolate()
col = df.loc[:,'Bfy':'Sfy']
df["1"] = col.mean(axis=1)
col2 = df.loc[:,'Bakamai':'Sakamai']
df["2"] = col2.mean(axis=1)
col4 = df.loc[:,'Bazure':'Sazure']
df["4"] = col4.mean(axis=1)
col5 = df.loc[:,'Bcloudflare':'Scloudflare']
df["5"] = col5.mean(axis=1)
col6 = df.loc[:,'Bfastly':'Sfastly']
df["6"] = col6.mean(axis=1)
col7 = df.loc[:,'BAWS':'SAWS']
df["7"] = col7.mean(axis=1)
col8 = df.loc[:,'Bali':'Sali']
df["8"] = col8.mean(axis=1)
col9 = df.loc[:,'Bgoog':'Sgoog']
df["9"] = col9.mean(axis=1)
trace_one = go.Box(
y=df['1'],
name="Fy",
line = dict(color='#8235EA'),
opacity = 0.8)
trace_two = go.Box(
y=df['2'],
name="Akamai",
line = dict(color='#EA8933'),
opacity = 0.8)
trace_four = go.Box(
y=df['4'],
name="Azure",
line = dict(color='#62F92C'),
opacity = 0.8)
trace_five = go.Box(
y=df['5'],
name="Cloudflare",
line = dict(color='#3548EA'),
opacity = 0.8)
trace_six = go.Box(
y=df['6'],
name="Fastly",
line = dict(color='#D735EA'),
opacity = 0.8)
trace_seven = go.Box(
y=df['7'],
name="AWS Cloudfront",
line = dict(color='#29E5B7'),
opacity = 0.8)
trace_eight = go.Box(
y=df['8'],
name="Alibaba Cloud",
line = dict(color='#3597EA'),
opacity = 0.8)
trace_nine = go.Box(
y=df['9'],
name="Google Cloud",
line = dict(color='#EA4833'),
opacity = 0.8,
)
data=[trace_one, trace_four, trace_seven, trace_eight, trace_nine, trace_five, trace_two]
layout = dict(
title = "CHINA - Software vs Mb loaded per second")
fig = dict(data=data, layout=layout)
plot(fig)
csv layout example:
datetime,Bfy,Sfy,Gfy,Bakamai,Sakamai,Gakamai,Bazuaka,Sazuaka,Gazuaka,Bazure,Sazure,Gazure,Bcloudflare,Scloudflare,Gcloudflare,Bfastly,Sfastly,Gfastly,BAWS,SAWS,GAWS,Bali,Sali,Gali,Bgoog,Sgoog,Ggoog
23/07/21 10:02PM,,,215200,1489,1571,,1897,12400,173600,6551,,,1556,769,,,,749,6124,9347,2179,4160,,4473,4635,906,3426
23/07/21 10:12PM,22653,21520,,,1670,,17360,,,,10850,,,18261,1522,,3414,2010,5148,10447,2030,2667,4160,4119,5837,1592,3216
23/07/21 10:22PM,23911,,,1535,1615,815,3156,13354,177,6313,,,,825,586,873,,885,4280,6458,2114,4039,4119,6303,5629,1072,3283

taken a different approach to data preparation
pair columns, calculate means
create new dataframe from these paired column means
order columns of this data preparation based on their medians
create box plots in same order as ordered columns
found two providers that your code did not plot...
import plotly.graph_objects as go
import pandas as pd
import io
df = pd.read_csv(io.StringIO("""datetime,Bfy,Sfy,Gfy,Bakamai,Sakamai,Gakamai,Bazuaka,Sazuaka,Gazuaka,Bazure,Sazure,Gazure,Bcloudflare,Scloudflare,Gcloudflare,Bfastly,Sfastly,Gfastly,BAWS,SAWS,GAWS,Bali,Sali,Gali,Bgoog,Sgoog,Ggoog
23/07/21 10:02PM,,,215200,1489,1571,,1897,12400,173600,6551,,,1556,769,,,,749,6124,9347,2179,4160,,4473,4635,906,3426
23/07/21 10:12PM,22653,21520,,,1670,,17360,,,,10850,,,18261,1522,,3414,2010,5148,10447,2030,2667,4160,4119,5837,1592,3216
23/07/21 10:22PM,23911,,,1535,1615,815,3156,13354,177,6313,,,,825,586,873,,885,4280,6458,2114,4039,4119,6303,5629,1072,3283"""))
# different approach to getting means per provider to plot
df2 = pd.DataFrame({c[1:]:df.loc[:,[c, "S"+c[1:]]].mean(axis=1).values for c in df.columns if c[0]=="B"})
# re-order columns on ascending median
df2 = df2.reindex(df2.median().sort_values().index, axis=1)
meta = {'fy': {'color': '#8235EA', 'name': 'Fy'},
'azure': {'color': '#62F92C', 'name': 'Azure'},
'AWS': {'color': '#29E5B7', 'name': 'AWS Cloudfront'},
'ali': {'color': '#3597EA', 'name': 'Alibaba Cloud'},
'goog': {'color': '#EA4833', 'name': 'Google Cloud'},
'cloudflare': {'color': '#3548EA', 'name': 'Cloudflare'},
'akamai': {'color': '#EA8933', 'name': 'Akamai'},
# next two were missing
'fastly': {'color': 'pink', 'name': 'Fastly'},
'azuaka': {'color': 'purple', 'name': 'azuaka'},
}
go.Figure([go.Box(y=df2[c], name=meta[c]["name"], line={"color":meta[c]["color"]}) for c in df2.columns])

Related

Plotly scatter_mapbox python: How to update the displayed scatter_mapbox

i want to plot open street map points in a scatter_mapbox. To plot the data it is no problem.
But when I try to update the data the new data will not shown in the plot.
First, when I call again the .show function the new result will be displayed in a new firefox tab. How can I solve it that I get an update in the same windows.
I want to create dropdown menus und input boxes for the costumer and when he change the data, it should be shown in the same window.
Also nice to have improvements for the rest of the code.
`
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from osm.osm_reader import OSMHandler, calcDistanceBetweenTwoGPS
osmhandler = OSMHandler()
osmhandler.apply_file("data/osm/hamburg.osm")
data_colnames_ways = ['type', 'id', 'nodeId', 'version', 'visible', 'name', 'ts', 'uid',
'user', 'chgset']
data_colnames_nodes = ['type', 'id', 'version', 'visible', 'ts', 'uid',
'user', 'chgset', 'lat', 'lon']
df_osm_ways = pd.DataFrame(osmhandler.osm_ways, columns=data_colnames_ways)
df_osm_ways = df_osm_ways.sort_values(by=['type', 'id', 'ts'])
df_osm_nodes = pd.DataFrame(osmhandler.osm_nodes, columns=data_colnames_nodes)
df_osm_nodes = df_osm_nodes.sort_values(by=['type', 'id', 'ts'])
df_traffic_nodes = pd.DataFrame(osmhandler.traffic_nodes, columns=data_colnames_nodes)
df_traffic_nodes = df_traffic_nodes.sort_values(by=['type', 'id', 'ts'])
from accident_atlas.csv_reader import CsvReader
csv_reader = CsvReader()
csv_reader.set_data_path("data/aatlas/Unfallorte2021_LinRef.csv")
accident_data = csv_reader.get_data(lat_limits=(min(df_osm_nodes["lat"]), max(df_osm_nodes["lat"])),
lon_limits=(min(df_osm_nodes["lon"]), max(df_osm_nodes["lon"])))
accident_data["CloseToNode"] = False
max_distance = 50
df_traffic_nodes["CloseAccidents"] = 0
for idx, row_x in accident_data.iterrows():
for idy, row_y in df_traffic_nodes.iterrows():
if max_distance > calcDistanceBetweenTwoGPS(row_x["YGCSWGS84"], row_y["lat"], row_x["XGCSWGS84"], row_y["lon"]):
df_traffic_nodes.loc[idy, "CloseAccidents"] += 1
if not accident_data["CloseToNode"][idx]:
accident_data.loc[idx, "CloseToNode"] = True
df_acdata_filtered = accident_data[accident_data["CloseToNode"] == True]
df_traffic_nodes_filtered = df_traffic_nodes[df_traffic_nodes["CloseAccidents"] >= 0]
fig = px.scatter_mapbox(data_frame=df_traffic_nodes_filtered, lat=df_traffic_nodes_filtered["lat"], lon=df_traffic_nodes_filtered["lon"], color="CloseAccidents",
zoom=12, height=800, size_max = 50, hover_name="CloseAccidents", color_continuous_scale="bluered")
fig2 = px.scatter_mapbox(data_frame=None, lat=df_acdata_filtered["YGCSWGS84"], lon=df_acdata_filtered["XGCSWGS84"], color_discrete_sequence=["Black"],
zoom=12, height=800, size_max = 50)
fig.add_trace(fig2.data[0])
fig.update_layout(title = "Traffic lights on roads with number of accidents.")
fig.update_layout(mapbox_style="open-street-map")
f = go.FigureWidget(fig)
f.show()
f.data[0]["legendgroup"] = "test"
f.data[1]["legendgroup"] = "test2"
fig.update_traces(lat=fig.data[1].lat[-1], lon= fig.data[1].lon[-1], selector=dict(legendgroup= 'test'))
print("Test")
`
I tried to plot data to a scatter_mapbox and change the data after the show.

Plotting data from two different dataframes to a single Dash graph

I'm trying to generate a Dash app which displays historical and forecasted housing prices. I've got the forecasted data stored in a different dataframe from the historical prices, and I'd like to plot them both on the same graph in Dash, and have the graph get updated via callback when the user picks a different city from a dropdown menu. I would like both traces of the graph to update when a value is selected in the dropdown. I've tried various things but can only get one trace from one dataframe plotted for the graph in my callback:
# --- import libraries ---
import dash
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash.dependencies import Output, Input
# --- load data ---
df_h = pd.read_csv('df_h.csv')
df_arima = pd.read_csv('df_arima.csv')
options = [] #each column in the df_h dataframe is an option for the dropdown menu
for column in df_h.columns:
options.append({'label': '{}'.format(column, column), 'value': column})
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# --- initialize the app ---
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# --- layout the dashboard ---
app.layout = html.Div(
children = [
html.Div([
html.Label('Select a feature from drop-down to plot'),
dcc.Dropdown(
id = 'city-dropdown',
options = options,
value = 'Denver, CO',
multi = False,
clearable = True,
searchable = True,
placeholder = 'Choose a City...'),
html.Div(id = 'forecast-container',
style = {'padding': '50px'}),
]),
],
)
# --- dropdown callback ---
#app.callback(
Output('forecast-container', 'children'),
Input('city-dropdown', 'value'))
def forecast_graph(value):
dff = df_h[['Date', value]] #'value' is identical between the two dataframes. references
dfa = df_arima[df_arima['City'] == value] # a col in dff and row values in dfa
return [
dcc.Graph(
id = 'forecast-graph',
figure = px.line(
data_frame = dff,
x = 'Date',
y = value).update_layout(
showlegend = False,
template = 'xgridoff',
yaxis = {'title': 'Median Home Price ($USD)'},
xaxis = {'title': 'Year'},
title = {'text': 'Median Home Price vs. Year for {}'.format(value),
'font': {'size': 24}, 'x': 0.5, 'xanchor': 'center'}
),
)
]
I was able to accomplish this in Plotly but can't figure out how to do it in Dash. This is what I want in Dash:
Plotly graph I am trying to reproduce in callback, that is linked to a dropdown menu
This is all I can get to work in Dash:
Only one dataframe plots in Dash
This is the code that works in plotly graph objects:
from statsmodels.tsa.arima_model import ARIMA
df_ml = pd.read_csv('df_ml.csv')
# --- data cleaning ---
df_pred = df_ml[df_ml['RegionName'] == city]
df_pred = df_pred.transpose().reset_index().drop([0])
df_pred.columns = ['Date', 'MedianHomePrice_USD']
df_pred['MedianHomePrice_USD'] = df_pred['MedianHomePrice_USD'].astype('int')
df_pred['Date'] = pd.to_datetime(df_pred['Date'])
df_pred['Date'] = df_pred['Date'].dt.strftime('%Y-%m')
df_model = df_pred.set_index('Date')
model_data = df_model['MedianHomePrice_USD']
def house_price_forecast(model_data, forecast_steps, city):
#--- ARIMA Model (autoregressive integrated moving average) ---
model = ARIMA(model_data, order = (2,1,2), freq = 'MS')
res = model.fit()
forecast = res.forecast(forecast_steps)
forecast_mean = forecast[0]
forecast_se = forecast[1]
forecast_ci = forecast[2]
df_forecast = pd.DataFrame()
df_forecast['Mean'] = forecast_mean.T
df_forecast['Lower_ci'], df_forecast['Upper_ci'] = forecast_ci.T
df_forecast['Date'] = pd.date_range(start = '2021-02', periods = forecast_steps, freq = 'MS')
df_forecast['Date'] = df_forecast['Date'].dt.strftime('%Y-%m')
df_forecast.index = df_forecast['Date']
fig = go.Figure()
fig.add_trace(go.Scatter(x = df_pred['Date'], y = df_pred['MedianHomePrice_USD'],
mode = 'lines', name = 'Median Home Price ($USD)',
line_color = 'rgba(49, 131, 189, 0.75)', line_width = 2))
fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Mean'],
mode = 'lines', line_color = '#e6550d',
name = 'Forecast mean'))
fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Upper_ci'],
mode = 'lines', line_color = '#e0e0e0', fill = 'tonexty',
fillcolor = 'rgba(225,225,225, 0.3)',
name = 'Upper 95% confidence interval'))
fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Lower_ci'],
mode = 'lines', line_color = '#e0e0e0', fill = 'tonexty',
fillcolor = 'rgba(225,225,225, 0.3)',
name = 'Lower 95% confidence interval'))
fig.update_layout(title = 'Median Home Price in {}, {} - {} (Predicted)'.format(
city, model_data.idxmin()[:-3], df_forecast_mean.idxmax()[:-3]),
xaxis_title = 'Year', yaxis_title = 'Median Home Price ($USD)',
template = 'xgridoff')
fig.show()
house_price_forecast(model_data, 24, 'Denver, CO') #24 month prediction
Perhaps a more succinct way of asking this question: How do I add a trace to an existing Dash graph, with data from a different dataframe, and both traces get updated when the user selects a value from a single dropdown?

Figured it out...
Don't use the syntax I used above in your callback. Put the px.line call inside a variable(fig, in this case), and then use fig.add_scatter to add data from a different dataframe to the graph. Both parts of the graph will update from the callback.
Also, fig.add_scatter doesn't have a dataframe argument, so use df.column or df[column] (ex. 'dfa.Date' below)
# --- import libraries ---
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from dash.dependencies import Output, Input
# --- load data ---
df_h = pd.read_csv('df_h.csv')
df_h['Date'] = pd.to_datetime(df_h['Date'])
df_arima = pd.read_csv('df_arima.csv')
df_arima['Date'] = pd.to_datetime(df_arima['Date'])
df_arima['Date'] = df_arima['Date'].dt.strftime('%Y-%m')
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# --- initialize the app ---
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
dcc.Graph(id = 'forecast-container')
]
)
# --- dropdown callback ---
#app.callback(
Output('forecast-container', 'figure'),
Input('city-dropdown', 'value'))
def update_figure(selected_city):
dff = df_h[['Date', selected_city]]
# dff[selected_city] = dff[selected_city].round(0)
dfa = df_arima[df_arima['City'] == selected_city]
fig = px.line(dff, x = 'Date', y = selected_city,
hover_data = {selected_city: ':$,f'})
fig.add_scatter(x = dfa.Date, y = dfa.Mean,
line_color = 'orange', name = 'Forecast Mean')
fig.add_scatter(x = dfa.Date, y = dfa.Lower_ci,
fill = 'tonexty', fillcolor = 'rgba(225,225,225, 0.3)',
marker = {'color': 'rgba(225,225,225, 0.9)'},
name = 'Lower 95% Confidence Interval')
fig.add_scatter(x = dfa.Date, y = dfa.Upper_ci,
fill = 'tonexty', fillcolor = 'rgba(225,225,225, 0.3)',
marker = {'color': 'rgba(225,225,225, 0.9)'},
name = 'Upper 95% Confidence Interval')
fig.update_layout(template = 'xgridoff',
yaxis = {'title': 'Median Home Price ($USD)'},
xaxis = {'title': 'Year'},
title = {'text': 'Median Home Price vs. Year for {}'.format(selected_city),
'font': {'size': 24}, 'x': 0.5, 'xanchor': 'center'})
return fig
if __name__ == '__main__':
app.run_server(debug = True)

How to build an Altair layered chart w/ dual axis?

Description
This code shows three Altair charts:
scatter
rate
line_plot
Goal
The goal is to combine all charts into a layered chart w/ these specifications:
show the y-axis for both scatter and rate (ie. dual axis chart)
facet by Series
show the line_plot.
Code
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate of change'
source['line-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x=alt.X('X', title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
Current solution
The issue with the current solution is that the rate chart's y-axis is not displaying as a dual axis. Any suggestions?
alt.layer(rate,scatter,line_plot).facet(
'Series:N'
, columns=2
).resolve_scale(
x='independent',
y='independent'
).display()

Well, I got it, but this probably isn't the best solution. I've followed the method described in the following link where we manually facet the charts:
Thread on Facets
To get the dual axis, I just added .resolve_scale(y='independent') to the manual step. Below is the solution:
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source\['line-label'\] = 'x=y'
source = pd.concat(\[source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))\],axis=1)
source\['rate'\] = source.y_diff/source.x_diff
source\['rate-label'\] = 'rate of change'
source\['line-label'\] = 'line y=x'
source_linear = source.groupby(by=\['Series'\]).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=\['Series'\])
source_origin = source_linear.copy()
source_origin\['y_linear'\] = 0
source_origin\['x_linear'\] = 0
source_linear = pd.concat(\[source_origin,source_linear\]).sort_values(by=\['Series'\])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart().mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=\['X','Y','rate'\]
)
line_plot = alt.Chart().mark_line(color= 'black', strokeDash=\[3,8\]).encode(
x=alt.X('x_linear', title = '', axis=None),
y=alt.Y('y_linear', title = '', axis=None),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart().mark_line(strokeDash=\[5,3\]).encode(
x=alt.X('X', title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=\['rate','X','Y'\]
)
scatter_rate = alt.layer(scatter, rate, data=source)
chart_generator = (alt.layer(scatter, rate, line_plot, data = source, title=f"{val}: Duplicated Points w/ Line at Y=X").transform_filter(alt.datum.Series == val).resolve_scale(y='independent') \
for val in source.Series.unique())
chart = alt.concat(*(
chart_generator
), columns=2).display()

bokeh hover multiline with datetime axis

I want to create a multiline Bokeh plot with datetime axis and a hover tool that shows the datetime of the data point. This should be supported and I have tried to obtain the intended behaviour in two ways:
Use hover.formatters to format the x-value. This has no effect on the plot.
Add a description variable with the correctly formatted date/time values. This results in a hover tool where all date/time values are displayed in a list for each point.
I have included a smaller example of my code that illustrates my approach and the result. It is used in conjunction with a checkboxgroup that updates the data. This is why a new ColumnDataSource is made from the dataframe.
import pandas as pd
import numpy as np
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Spectral4
from bokeh.layouts import column
#output_file("demo.html")
available_quant = ["LACTIC_ACID", "GLUCOSE", "XYLOSE", "FORMIC_ACID"]
quant_legend = ["Lactic acid", "Glucose", "Xylose", "Formic acid"]
Create a dataframe with 4 quantities and the time
datelist = pd.date_range(end = pd.datetime.today(), periods=100).tolist()
desc = datelist
for i, date in enumerate(datelist):
desc[i] = str(date)
RT_x = np.linspace(-5, 5, num=100)
lactic = RT_x**2
data = {'time': datelist, 'desc': desc, 'LACTIC_ACID': RT_x**2 + 2, 'GLUCOSE': RT_x**2, 'XYLOSE': RT_x**2 - 2, 'FORMIC_ACID': RT_x**2 - 4}
df = pd.DataFrame.from_dict(data)
df['time'] = pd.to_datetime(df['time'], format = "%Y-%m-%d %H:%M:%S")
Copy the relevant data to a columndatasource
substance_colors = Spectral4
quant_to_plot = available_quant
xs = []
ys = []
xsprint = []
colors = []
labels = []
for i, substance in enumerate(quant_to_plot):
xs.append(list(df['time']))
ys.append(list(df[substance]))
xsprint.append(list(df['desc']))
index = available_quant.index(substance)
colors.append(substance_colors[index])
labels.append(quant_legend[index])
new_src = ColumnDataSource(data={'x': xs, 'y': ys, 'desc': xsprint, 'color': colors, 'label': labels})
Make the first plot using hover.formatters
p = figure(plot_width=800, plot_height=400, x_axis_type="datetime", title = 'Demo', x_axis_label = 'Time', y_axis_label = 'c [g/mL]')
p.multi_line('x','y', color = 'color', legend = 'label', source = new_src)
hover = HoverTool(tooltips=[('Type','#label'),
('Time','$x'),
('Conc','$y')],
formatters={'Time': 'datetime'},
mode = 'mouse',
line_policy='next')
p.add_tools(hover)
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
Make second plot using description variable
p2 = figure(plot_width=800, plot_height=400, x_axis_type="datetime", title = 'Demo', x_axis_label = 'Time', y_axis_label = 'c [g/mL]')
p2.multi_line('x','y', color = 'color', legend = 'label', source = new_src)
hover = HoverTool(tooltips=[('Type','#label'),
('Time','#desc'),
('Conc','$y')],
mode = 'mouse',
line_policy='nearest')
p2.add_tools(hover)
mylayout = column(p, p2)
show(mylayout)
Am I missing something trivial? I am running Bokeh 0.13.0 and python 3.6.4.

The first approach works with the following modification of the hovertool:
hover = HoverTool(tooltips=[('Type','#label'),
('Time','$x{%F}'),
('Conc','$y')],
formatters={'$x': 'datetime'},
mode = 'mouse',
line_policy='nearest')

Choropleth world map not showing all countries

I wanted to make a choropleth world map, which shows the hits(number of searches) of a word, on a World map.
Following is the code:
import plotly
import plotly.offline
import pandas as pd
df = pd.read_excel('F:\\Intern\\csir\\1yr\\news\\region_2016_2017.xlsx')
df = df.query('keyword==["addiction"]')
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
data = [dict(
type='choropleth',
colorscale=scl,
locations = df['location'],
z = df['hits'].astype(int),
locationmode = "country names",
autocolorscale = False,
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5)),
colorbar = dict(
autotick = False,
title = 'Hits'),)]
layout = dict(
title = 'Addiction keyword 1yr analysis',
geo = dict(
showframe = False,
showcoastlines = False,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict(data = data,layout = layout)
plotly.offline.plot(fig,validate=False,filename = 'd3-world-map.html')
And the plotted map is:
As one can see clearly, many countries are missing. This may be due to the fact that many countries didn't have entries which explicitly stated that they have zero hits.
I don't want to explicitly do that with my data. Is there any other way out of this? So that we can see all of the countries.
Data set can be found here.
Note that the dataset that I've linked is an .csv file whereas the file used in the program is an .xlsx version of the file.

You need to turn on country outlines under layout...
"geo":{
"countriescolor": "#444444",
"showcountries": true
},

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Sorting Box Plots by Median using Plotly Graph Objects - python

Related

Plotly scatter_mapbox python: How to update the displayed scatter_mapbox

Plotting data from two different dataframes to a single Dash graph

How to build an Altair layered chart w/ dual axis?

bokeh hover multiline with datetime axis

Choropleth world map not showing all countries

Categories

Resources