How to build an Altair layered chart w/ dual axis? - python

Description
This code shows three Altair charts:
scatter
rate
line_plot
Goal
The goal is to combine all charts into a layered chart w/ these specifications:
show the y-axis for both scatter and rate (ie. dual axis chart)
facet by Series
show the line_plot.
Code
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate of change'
source['line-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x=alt.X('X', title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
Current solution
The issue with the current solution is that the rate chart's y-axis is not displaying as a dual axis. Any suggestions?
alt.layer(rate,scatter,line_plot).facet(
'Series:N'
, columns=2
).resolve_scale(
x='independent',
y='independent'
).display()

Well, I got it, but this probably isn't the best solution. I've followed the method described in the following link where we manually facet the charts:
Thread on Facets
To get the dual axis, I just added .resolve_scale(y='independent') to the manual step. Below is the solution:
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source\['line-label'\] = 'x=y'
source = pd.concat(\[source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))\],axis=1)
source\['rate'\] = source.y_diff/source.x_diff
source\['rate-label'\] = 'rate of change'
source\['line-label'\] = 'line y=x'
source_linear = source.groupby(by=\['Series'\]).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=\['Series'\])
source_origin = source_linear.copy()
source_origin\['y_linear'\] = 0
source_origin\['x_linear'\] = 0
source_linear = pd.concat(\[source_origin,source_linear\]).sort_values(by=\['Series'\])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart().mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=\['X','Y','rate'\]
)
line_plot = alt.Chart().mark_line(color= 'black', strokeDash=\[3,8\]).encode(
x=alt.X('x_linear', title = '', axis=None),
y=alt.Y('y_linear', title = '', axis=None),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart().mark_line(strokeDash=\[5,3\]).encode(
x=alt.X('X', title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=\['rate','X','Y'\]
)
scatter_rate = alt.layer(scatter, rate, data=source)
chart_generator = (alt.layer(scatter, rate, line_plot, data = source, title=f"{val}: Duplicated Points w/ Line at Y=X").transform_filter(alt.datum.Series == val).resolve_scale(y='independent') \
for val in source.Series.unique())
chart = alt.concat(*(
chart_generator
), columns=2).display()

Related

How to display Resampling of candles / time series data in plotly?

How can I merge the two functions given below to achieve something like the histogram example. Any button or drop down would do fine.
If you run the function, you get a nice Candlesticks chart with the functionality of removing non trading day gaps.
def plot_candlesticks(df, names = ('DATE','OPEN','CLOSE','LOW','HIGH'), mv:list = [200], slider:bool = False, fig_size:bool = (1400,700), plot:bool = True):
'''
Plot a candlestick on a given dataframe
args:
df: DataFrame
names: Tuple of column names showing ('DATE','OPEN','CLOSE','LOW','HIGH')
mv: Moving Averages
slider: Whether to have below zoom slider or not
fig_size: Size of Figure as (Width, Height)
plotting: Whether to plot the figure or just return the figure for firther modifications
'''
freq = 5 # 5 min candle
candle_text = f"{str(freq)} Min"
stocks = df.copy()
stocks.sort_index(ascending=False, inplace = True) # Without reverse, recent rolling mean will be either NaN or equal to the exact value
Date, Open, Close, Low, High = names
mv = [] if not mv else mv # just in case you don't want to have any moving averages
colors = sample(['black','magenta','teal','brown','violet'],len(mv))
# To remove, non-trading days, grab first and last observations from df.date and make a continuous date range from that
start = stocks['DATE'].iloc[0] - timedelta(days=1)
end = stocks['DATE'].iloc[-1] + timedelta(days=1)
dt_all = pd.date_range(start=start,end=end, freq = f'{str(freq)}min')
# check which dates from your source that also accur in the continuous date range
dt_obs = [d.strftime("%Y-%m-%d %H:%M:%S") for d in stocks['DATE']]
# isolate missing timestamps
dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d %H:%M:%S").tolist() if not d in dt_obs]
rangebreaks=[dict(dvalue = freq*60*1000, values=dt_breaks)]
range_selector = dict(buttons = list([dict(step = 'all', label = 'All')]))
candle = go.Figure(data = [go.Candlestick(opacity = 0.9, x = stocks[Date], name = 'X',
open = stocks[Open], high = stocks[High], low = stocks[Low], close = stocks[Close]),])
for i in range(len(mv)):
stocks[f'{str(mv[i])}-SMA'] = stocks[Close].rolling(mv[i], min_periods = 1).mean()
candle.add_trace(go.Scatter(name=f'{str(mv[i])} MA',x=stocks[Date], y=stocks[f'{str(mv[i])}-SMA'],
line=dict(color=colors[i], width=1.7)))
candle.update_xaxes(title_text = 'Date', rangeslider_visible = slider, rangeselector = range_selector, rangebreaks=rangebreaks)
candle.update_layout(autosize = False, width = fig_size[0], height = fig_size[1],
title = {'text': f"{stocks['SYMBOL'][0]} : {str(candle_text)} Candles",'y':0.97,'x':0.5,
'xanchor': 'center','yanchor': 'top'},
margin=dict(l=30,r=30,b=30,t=30,pad=2),
paper_bgcolor="lightsteelblue")
candle.update_yaxes(title_text = 'Price in Rupees', tickprefix = u"\u20B9" ) # Rupee symbol
if plot:
candle.show()
return candle
and running the below code resamples your data.
def resample_data(self,to:str = '15min', names:tuple = ('OPEN','CLOSE','LOW','HIGH','DATE')):
'''
Resample the data from 5 Minutes to 15 or 75 Minutes
args:
data: Dataframe of Daily data
to: One of [15M, 75M]
'''
Open, Close, Low, High, Date = names
data = data.resample(to,on=Date).agg({Open:'first', High:'max', Low: 'min', Close:'last'})
return data.sort_index(ascending = False).reset_index()
Is there a functionality when I click 15M / 75M button in my chart, it shows me exactly the same data but resampled? Just like there is functionality in online trading softwares.
no sample data so I have used https://plotly.com/python/candlestick-charts/ sample
at core use https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.resample.html and change trace contents with resampled data
plus using https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html for events from widgets
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv",
parse_dates=["Date"],
)
fig = go.FigureWidget(
data=[
go.Candlestick(
x=df["Date"],
open=df["AAPL.Open"],
high=df["AAPL.High"],
low=df["AAPL.Low"],
close=df["AAPL.Close"],
)
]
).update_layout(margin={"t": 30, "b": 0, "l": 0, "r": 0})
out = widgets.Output(layout={"border": "1px solid black"})
out.append_stdout("Output appended with append_stdout\n")
reset = widgets.Button(description="Reset")
slider = widgets.IntSlider(
value=1,
min=1,
max=10,
step=1,
description='Days:',
disabled=False,
continuous_update=False,
orientation='horizontal',
readout=True,
readout_format='d'
)
#out.capture()
def on_slider_change(v):
print(f"slider: {v['new']}")
dfr = df.resample(f"{v['new']}B", on="Date").mean().reset_index()
t = fig.data[0]
t.update(
x=dfr["Date"],
open=dfr["AAPL.Open"],
high=dfr["AAPL.High"],
low=dfr["AAPL.Low"],
close=dfr["AAPL.Close"],
)
#out.capture()
def on_reset_clicked(b):
print("reset")
t = fig.data[0]
t.update(
x=df["Date"],
open=df["AAPL.Open"],
high=df["AAPL.High"],
low=df["AAPL.Low"],
close=df["AAPL.Close"],
)
out.clear_output()
reset.on_click(on_reset_clicked)
slider.observe(on_slider_change, names='value')
widgets.VBox([widgets.HBox([reset, slider]), widgets.VBox([fig, out])])

How to add a legend in altair layered plots

So I have the following code:
workload_df = pd.DataFrame({
'index': pd.to_datetime(['01.02.2010', '01.03.2010', '01.04.2010']),
'measure': [100, 90, 120],
'measure_max': [80, 100, 150],
})
measure_max_plot = alt.Chart(workload_df).mark_bar(color = 'lightgreen', text = 'measure_max').encode(
alt.X('index', title = '', axis = alt.Axis(labelAngle = -45, labelOverlap = False)),
alt.Y('measure_max', title = '')
)
measure_plot = alt.Chart(workload_df).mark_bar(text = 'measure').encode(
x = alt.X('index', title = 'X', axis = alt.Axis(labelAngle = -45, labelOverlap = False)),
y = alt.Y('measure', title = 'Y'),
color=alt.condition(
alt.datum.measure > alt.datum.measure_max,
alt.value('red'),
alt.value('steelblue')
)
)
altair_plot = alt.layer(measure_max_plot, measure_plot)
st.altair_chart(altair_plot, use_container_width=True)
I already tried adding a legend, by using this solution:
Add legend to line & bars to Altair chart without using size/color
But got a weird error all the time or got a legend without any plotted data.
Can anyone help me with that?
In order to add a legend to your chart, you will need an encoding that the legend will represent. For example, here's how you might add a color encoding that will generate a color legend:
measure_max_plot = alt.Chart(workload_df).transform_calculate(
color='"measure_max"'
).mark_bar(text = 'measure_max').encode(
alt.X('index', title = '', axis = alt.Axis(labelAngle = -45, labelOverlap = False)),
alt.Y('measure_max', title = ''),
alt.Color('color:N')
)
measure_plot = alt.Chart(workload_df).transform_calculate(
color="datum.measure > datum.measure_max ? 'bigger' : 'smaller'"
).mark_bar(text = 'measure').encode(
x = alt.X('index', title = 'X', axis = alt.Axis(labelAngle = -45, labelOverlap = False)),
y = alt.Y('measure', title = 'Y'),
color = alt.Color('color:N', scale=alt.Scale(range=['red', 'lightgreen', 'steelblue']))
)
altair_plot = alt.layer(measure_max_plot, measure_plot)
Notice that by default, the color scale is shared between the two layered charts. This behavior can be fine-tuned using the Scale & guide resolution API.

Plotting categorial groupings in bokeh with geopandas not working? (Problem with CategoricalColorMapper)

I'm having problems plotting groupings of countries on a world map using Bokeh in combination with the geopandas package. What I want to do is colour each country in a group with a certain colour on the map. The groupings are saved in the dataframe "dataset", which is merged with the geopandas geographical info, converted to json and fed to the mapping functions. (Code below.)
The interesting thing is that no error is generated, Bokeh even reports that "BokehJS 1.4.0 successfully loaded", but no chart is shown.
I am quite convinced that the problem is with my implementation of the CategoricalColorMapper. This is evident since if I change the color mapper to to linear color mapper, the code works perfectly.
This code does not work:
from bokeh.palettes import viridis
from bokeh.models import FactorRange
dataset = gdf.merge(dataset, left_on = 'country', right_on = 'location', how = 'left')
#gdf is geopandas geo info dataframe
#Read data to json
dataset_json = json.loads(dataset.to_json())
#Convert to str like object
dataset_json_data = json.dumps(dataset_json)
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = dataset_json_data)
catValues=list(dataset["val"].dropna().unique().astype("str"))
palette=viridis(len(catValues))
print("Palette len:", len(palette))
print("Factors:", len(catValues))
print(dataset)
color_mapper = CategoricalColorMapper(palette = palette , factors=catValues)
#Create figure object.
p = figure(title = title_string, plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure.
p.patches('xs','ys', source = geosource, fill_color = {'field' :'val', 'transform' : color_mapper})
#Display figure inline in Jupyter Notebook.
output_notebook()
#Display figure.
show(p)
Calling the function prints the following, but non map is shown. The number of colors and categories seems fine to me?
Palette len: 118
Factors: 118
BokehJS 1.4.0 successfully loaded.
Replacing only the color mapper works perfectly. This code works:
def plot_map(dataset, title_string = ""):
dataset = gdf.merge(dataset, left_on = 'country', right_on = 'location', how = 'left')
#Read data to json
dataset_json = json.loads(dataset.to_json())
#Convert to str like object
dataset_json_data = json.dumps(dataset_json)
#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = dataset_json_data)
#Define a sequential multi-hue color palette.
palette = brewer['OrRd'][7]
#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]
#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = dataset.val.min(), high = dataset.val.max())
#Define custom tick labels for color bar.
#tick_labels = {'0': '0', '1': '1', '2':'2', '3':'3', '4':'4', '5':'5', '6':'6','7':'7'}
#Create color bar.
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=7,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)
#Create figure object.
p = figure(title = title_string, plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
#Add patch renderer to figure.
p.patches('xs','ys', source = geosource, fill_color = {'field' :'val', 'transform' : color_mapper},
line_color = 'black', line_width = 0.25, fill_alpha = 1)
#Specify figure layout.
p.add_layout(color_bar, 'below')
#Display figure inline in Jupyter Notebook.
output_notebook()
#Display figure.
show(p)

bokeh hover multiline with datetime axis

I want to create a multiline Bokeh plot with datetime axis and a hover tool that shows the datetime of the data point. This should be supported and I have tried to obtain the intended behaviour in two ways:
Use hover.formatters to format the x-value. This has no effect on the plot.
Add a description variable with the correctly formatted date/time values. This results in a hover tool where all date/time values are displayed in a list for each point.
I have included a smaller example of my code that illustrates my approach and the result. It is used in conjunction with a checkboxgroup that updates the data. This is why a new ColumnDataSource is made from the dataframe.
import pandas as pd
import numpy as np
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.palettes import Spectral4
from bokeh.layouts import column
#output_file("demo.html")
available_quant = ["LACTIC_ACID", "GLUCOSE", "XYLOSE", "FORMIC_ACID"]
quant_legend = ["Lactic acid", "Glucose", "Xylose", "Formic acid"]
Create a dataframe with 4 quantities and the time
datelist = pd.date_range(end = pd.datetime.today(), periods=100).tolist()
desc = datelist
for i, date in enumerate(datelist):
desc[i] = str(date)
RT_x = np.linspace(-5, 5, num=100)
lactic = RT_x**2
data = {'time': datelist, 'desc': desc, 'LACTIC_ACID': RT_x**2 + 2, 'GLUCOSE': RT_x**2, 'XYLOSE': RT_x**2 - 2, 'FORMIC_ACID': RT_x**2 - 4}
df = pd.DataFrame.from_dict(data)
df['time'] = pd.to_datetime(df['time'], format = "%Y-%m-%d %H:%M:%S")
Copy the relevant data to a columndatasource
substance_colors = Spectral4
quant_to_plot = available_quant
xs = []
ys = []
xsprint = []
colors = []
labels = []
for i, substance in enumerate(quant_to_plot):
xs.append(list(df['time']))
ys.append(list(df[substance]))
xsprint.append(list(df['desc']))
index = available_quant.index(substance)
colors.append(substance_colors[index])
labels.append(quant_legend[index])
new_src = ColumnDataSource(data={'x': xs, 'y': ys, 'desc': xsprint, 'color': colors, 'label': labels})
Make the first plot using hover.formatters
p = figure(plot_width=800, plot_height=400, x_axis_type="datetime", title = 'Demo', x_axis_label = 'Time', y_axis_label = 'c [g/mL]')
p.multi_line('x','y', color = 'color', legend = 'label', source = new_src)
hover = HoverTool(tooltips=[('Type','#label'),
('Time','$x'),
('Conc','$y')],
formatters={'Time': 'datetime'},
mode = 'mouse',
line_policy='next')
p.add_tools(hover)
p.legend.location = "top_left"
p.legend.orientation = "horizontal"
Make second plot using description variable
p2 = figure(plot_width=800, plot_height=400, x_axis_type="datetime", title = 'Demo', x_axis_label = 'Time', y_axis_label = 'c [g/mL]')
p2.multi_line('x','y', color = 'color', legend = 'label', source = new_src)
hover = HoverTool(tooltips=[('Type','#label'),
('Time','#desc'),
('Conc','$y')],
mode = 'mouse',
line_policy='nearest')
p2.add_tools(hover)
mylayout = column(p, p2)
show(mylayout)
Am I missing something trivial? I am running Bokeh 0.13.0 and python 3.6.4.
The first approach works with the following modification of the hovertool:
hover = HoverTool(tooltips=[('Type','#label'),
('Time','$x{%F}'),
('Conc','$y')],
formatters={'$x': 'datetime'},
mode = 'mouse',
line_policy='nearest')

Choropleth world map not showing all countries

I wanted to make a choropleth world map, which shows the hits(number of searches) of a word, on a World map.
Following is the code:
import plotly
import plotly.offline
import pandas as pd
df = pd.read_excel('F:\\Intern\\csir\\1yr\\news\\region_2016_2017.xlsx')
df = df.query('keyword==["addiction"]')
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
data = [dict(
type='choropleth',
colorscale=scl,
locations = df['location'],
z = df['hits'].astype(int),
locationmode = "country names",
autocolorscale = False,
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5)),
colorbar = dict(
autotick = False,
title = 'Hits'),)]
layout = dict(
title = 'Addiction keyword 1yr analysis',
geo = dict(
showframe = False,
showcoastlines = False,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict(data = data,layout = layout)
plotly.offline.plot(fig,validate=False,filename = 'd3-world-map.html')
And the plotted map is:
As one can see clearly, many countries are missing. This may be due to the fact that many countries didn't have entries which explicitly stated that they have zero hits.
I don't want to explicitly do that with my data. Is there any other way out of this? So that we can see all of the countries.
Data set can be found here.
Note that the dataset that I've linked is an .csv file whereas the file used in the program is an .xlsx version of the file.
You need to turn on country outlines under layout...
"geo":{
"countriescolor": "#444444",
"showcountries": true
},

Categories