Plotting a Heat Table Based on bokeh - python

I am trying to make a heat map like this one from bokeh:
Where all the code is here: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I got pretty close, but for some reason it is only printing the values in a diagonal order.
I tried to format my data the same way and just substitute it, but it got a little more complicated than that. Here is my data:
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import ColumnDataSource, figure, show, output_file
from bokeh.models import HoverTool
import pandas.util.testing as tm; tm.N = 3
df = pd.read_csv('MYDATA.csv', usecols=[1, 16])
df = df.set_index('recvd_dttm')
df.index = pd.to_datetime(df.index, format='%m/%d/%Y %H:%M')
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg(len).reset_index()
result.columns = ['Month', 'CompanyName', 'NumberCalls']
pivot_table = result.pivot(index='Month', columns='CompanyName', values='NumberCalls').fillna(0)
s = pivot_table.sum().sort(ascending=False,inplace=False)
pivot_table = pivot_table.ix[:,s.index[:46]]
pivot_table = pivot_table.transpose()
pivot_table.to_csv('pivot_table.csv')
pivot_table = pivot_table.reset_index()
pivot_table['CompanyName'] = [str(x) for x in pivot_table['CompanyName']]
Companies = list(pivot_table['CompanyName'])
months = ["1","2","3","4","5","6","7","8","9","10","11","12"]
pivot_table = pivot_table.set_index('CompanyName')
# this is the colormap from the original plot
colors = [
"#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
"#ddb7b1", "#cc7878", "#933b41", "#550b1d"
]
# Set up the data for plotting. We will need to have values for every
# pair of year/month names. Map the rate to a color.
month = []
company = []
color = []
rate = []
for y in pivot_table.index:
for m in pivot_table.columns:
month.append(m)
company.append(y)
num_calls = pivot_table.loc[y,m]
rate.append(num_calls)
color.append(colors[min(int(num_calls)-2, 8)])
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
output_file('heatmap.html')
TOOLS = "resize,hover,save,pan,box_zoom,wheel_zoom"
p = figure(title="Customer Calls This Year",
x_range=Companies, y_range=list(reversed(months)),
x_axis_location="above", plot_width=1400, plot_height=900,
toolbar_location="left", tools=TOOLS)
p.rect("Companies", "months", 1, 1, source=source,
color="color", line_color=None)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('Company Name', '#Companies'),
('Number of Calls', '#rate'),
])
show(p) # show the plot

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# just following your previous post to simulate your data
np.random.seed(0)
dates = np.random.choice(pd.date_range('2015-01-01 00:00:00', '2015-06-30 00:00:00', freq='1h'), 10000)
company = np.random.choice(['company' + x for x in '1 2 3 4 5'.split()], 10000)
df = pd.DataFrame(dict(recvd_dttm=dates, CompanyName=company)).set_index('recvd_dttm').sort_index()
df['C'] = 1
df.columns = ['CompanyName', '']
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg({df.columns[1]: sum}).reset_index()
result.columns = ['Month', 'CompanyName', 'counts']
pivot_table = result.pivot(index='CompanyName', columns='Month', values='counts')
x_labels = ['Month'+str(x) for x in pivot_table.columns.values]
y_labels = pivot_table.index.values
fig, ax = plt.subplots()
x = ax.imshow(pivot_table, cmap=plt.cm.winter)
plt.colorbar(mappable=x, ax=ax)
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
ax.set_xlabel('Month')
ax.set_ylabel('Company')
ax.set_title('Customer Calls This Year')

The answer was in this line:
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
It should have been:
source = ColumnDataSource(
data=dict(month=months, company=company, color=color, rate=rate)
)

Related

Candlesticks in matplotlib

I'm trying to make a cryptoscanner but I'm struggling a bit. The code right now can loop through the different coins in symbols.csv and print plots for all of them. The plots include close price, SMA and bollinger bands. Now I really want the close price to be candlesticks and not a line. I've found that there are other plots like mpf to make candlesticks. The problem is that I don't know how to make the bollinger bands work with the mpf plots and I don't know how to make the candlesticks work with matplotlib. Can someone help me making candlesticks in matplotlib orrr make the bollingerbands in the mpf plots.
Thanks in advance!
The graph looks like this right now
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
with open('symbols.csv') as f:
symbols = f.read().splitlines()
for symbol in symbols:
df = yf.download(symbol, start='2020-01-01')
# df = yf.download(symbol, period = '22h', interval = '15m')
print(df)
# df = yf.download('ADA-USD', start='2021-01-01')
df['SMA'] = df.Close.rolling(window=20).mean()
df['stddev'] = df.Close.rolling(window=20).std()
df['Upper'] = df.SMA + 2* df.stddev
df['Lower'] = df.SMA - 2* df.stddev
df['Buy_Signal'] = np.where(df.Lower > df.Close, True, False)
df['Sell_Signal'] = np.where(df.Upper < df.Close, True, False)
buys = []
sells = []
open_pos = False
for i in range(len(df)):
if df.Lower[i] > df.Close[i]:
if open_pos == False:
buys.append(i)
open_pos = True
elif df.Upper[i] < df.Close[i]:
if open_pos:
sells.append(i)
open_pos = False
plt.figure(figsize=(12, 6))
plt.scatter(df.iloc[buys].index, df.iloc[buys].Close, marker = '^', color ='g')
plt.scatter(df.iloc[sells].index, df.iloc[sells].Close, marker = '^', color ='r')
plt.plot(df[['Close', 'SMA', 'Upper', 'Lower']])
plt.fill_between(df.index, df.Upper, df.Lower, color='grey', alpha=0.3)
plt.legend(['Close', 'SMA', 'Upper', 'Lower'])
plt.show()
merged = pd.concat([df.iloc[buys].Close, df.iloc[sells].Close], axis=1)
merged.columns = ['Buys', 'Sells']
print(merged)
totalprofit = merged.shift(-1).Sells - merged.Buys
print(totalprofit)
relprofits = (merged.shift(-1).Sells - merged.Buys) / merged.Buys
print(relprofits.mean())
The links in the comments provide a wealth of examples. Since you want to graph candlesticks, Bollinger Bands, and SMAs in mpf, I have modified the additional plot examples from the previous examples to suit your needs. The graphs were created from data obtained in stocks instead of currencies.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
df = yf.download("AAPL", start="2020-01-01")
df['SMA'] = df.Close.rolling(window=20).mean()
df['stddev'] = df.Close.rolling(window=20).std()
df['Upper'] = df.SMA + 2* df.stddev
df['Lower'] = df.SMA - 2* df.stddev
df['Buy_Signal'] = np.where(df.Lower > df.Close, True, False)
df['Sell_Signal'] = np.where(df.Upper < df.Close, True, False)
tcdf = df[['Lower','Upper','SMA']]
apd = mpf.make_addplot(tcdf)
mpf.plot(df, figratio=(8,4), type='candle', addplot=apd, volume=False, style='yahoo')

How to set bar labels in stack barmode grouped by date?

This code produces the figure I've attached. Notice the sums are the totals over the df, but I need the columns to only show the totals for that particular month. What do you have to set in the
text = ...
assignment for this to occur?
df = data[['Month', 'A', 'B']]
for X in df['A'].unique():
trace = go.Bar(
x = df[df['A']==X]['Month'],
y = df[df['A']==X]['B'],
text = str(df[df['A']==X]['B'].sum())
)
traces.append(trace)
df = data.groupby(['Month','TA']).sum().reset_index()
for TA in df['TA'].unique():
trace = go.Bar(
x = df[df['TA']==TA]['Month'],
y = df[df['TA']==TA]['Studies'],
text = df[df['TA']==TA]['Studies'],
name = TA
)
traces.append(trace)
As long as all values are already showing in your figure, the following will work regardless of how you've built your figure or grouped your data:
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()
Result:
Example of original figure:
Complete code:
# imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# data
df = px.data.stocks()
df = df[df.columns[:3]]
df = df.tail(25)
df['date'] = pd.to_datetime(df['date'])
# group py month
dfm = df.groupby(pd.Grouper(key = 'date', freq='M')).agg('sum').reset_index()
# figure setup
fig = go.Figure()
for col in dfm.columns[1:]:
fig.add_trace(go.Bar(x=dfm.date, y = dfm[col], text = [str(v)[:3] for v in dfm[col]], textposition = 'auto'))
fig.update_layout(barmode = 'stack')
# grap and sum data for all bars
numbers = []
fig.for_each_trace(lambda t: numbers.append([float(nr) for nr in t.text]))
sums = [sum(i) for i in zip(*numbers)]
for i,d in enumerate(fig.data):
if i == len(fig.data)-1:
d.text = sums
else:
d.text = ''
fig.show()

How to add labels to subplots in plotly?

I am trying to plot a candlestick with volume, using the plotly. However I can not get the proper x and yaxis label.please help.I need y labels for both plot but xlabel for just the bottom one, also one title for both. Bellow is the code.
** one more question, how can I change the line color in the volume plot.Thank you
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
fig = tools.make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
[None],
[{}]],
shared_xaxes=True,
vertical_spacing=0.1)
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
fig.update_layout(
yaxis_title = 'Apple Stock Price USD ($)'
)
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume']),
row = 3, col = 1)
fig.update_layout(
yaxis_title = 'Volume',
xaxis_title = 'Date'
)
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
chart_can_vol(df)
When you make your subplots, you can add the subplot_titles attribute. In the code below, I used the titles "test1" and "test2". When you change your axis labels, you can use update_xaxes and update_yaxes, just make sure that the row and column values are the same for the update_axes method and the subplot.
To change the color of the line, you can add the line attribute within the scatterplot method and set it equal to a dictionary with a hex value of the color you want.
P.S. You should update plotly, because the tools.make_subplots was deprecated. Once you update, you can simply use make_subplots. Also, you are using pandas, when you should use pandas-datareader. See import statements.
Code:
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
subplot_titles=["test1", "test2"]
rows = 2
cols = 2
height = 300 * rows
fig = make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
[None],
[{}]],
shared_xaxes=True,
subplot_titles=("test1", "test2"),
vertical_spacing=0.1)
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
fig.update_layout(
yaxis_title = 'Apple Stock Price USD ($)'
)
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume'],
line= dict(color="#ffe476")),
row = 3, col = 1)
fig.update_xaxes(title_text="Date", row = 3, col = 1)
fig.update_yaxes(title_text="Volume", row = 3, col = 1)
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
chart_can_vol(df).show()

How to combine two heatmaps in Seaborn in Python so both are shown in the same heatmap?

This is link to the data I'm using:
https://github.com/fivethirtyeight/data/tree/master/drug-use-by-age
I'm using Jupyter Lab, and here's the code:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sb
url = 'https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv'
df = pd.read_csv(url, index_col = 0)
df.dtypes
df.replace('-', np.nan, inplace=True)
df = df.iloc[:,:].astype(float)
df = df.loc[:, df.columns != 'n']
#df.columns = df.columns.str.rstrip('-use')
df
fig, axes = plt.subplots(1,2, figsize=(20, 8))
fig.subplots_adjust(wspace=0.1)
fig.colorbar(ax.collections[0], ax=ax,location="right", use_gridspec=False, pad=0.2)
#plt.figure(figsize=(16, 16))
df_percentage = df.iloc[:,range(0,26,2)]
plot_precentage = sb.heatmap(df_percentage, cmap='Reds', ax=axes[0], cbar_kws={'format': '%.0f%%', 'label': '% used in past 12 months'})
df_frequency = df.iloc[:,range(1,27,2)]
plot_frequency = sb.heatmap(df_frequency, cmap='Blues', ax=axes[1], cbar_kws= dict(label = 'median frequency a user used'))
I can just show two of them in a subplot in separate diagrams.
I want to make it look like this (this is made in paint):
Also show the data side by side. Is there a simple way to achieve that?
A pretty simple solution with mask option:
mask = np.vstack([np.arange(df.shape[1])]* df.shape[0]) % 2
fig, axes = plt.subplots()
plot_precentage = sns.heatmap(df,mask=mask, cmap='Reds', ax=axes,
cbar_kws={'format': '%.0f%%',
'label': '% used in past 12 months'}
)
plot_frequency = sns.heatmap(df, mask=1-mask, cmap='Blues', ax=axes,
cbar_kws= dict(label = 'median frequency a user used')
)
Output:

How to plot candlestick hourly with bokeh?

I have try to plot the candlestick chart.
From above figure you will see that the x-axis is sequences of data,
I just want to replace it by use date from df['date']
I try to use df['date'] instead df['seq'] but it doesn't work because time format.
How should i solve it?
Here is my code:
import pandas
import datetime
import random
import numpy
from bokeh.models import ColumnDataSource
from bokeh.models.tools import *
from bokeh.plotting import figure
from bokeh.io import gridplot, show
lenght = 20
df = pandas.DataFrame()
date = []
for i in range(1,5):
for j in range(10,15):
date.append(datetime.datetime(2018, 1, i, j, 0))
df['date'] = pandas.to_datetime(date).strftime('%Y-%m-%d %H:%M')
df['open'] = [random.randint(40,50) for p in range(lenght)]
df['high'] = [random.randint(40,50) for p in range(lenght)]
df['low'] = [random.randint(40,50) for p in range(lenght)]
df['close'] = [random.randint(40,50) for p in range(lenght)]
df['rsi'] = [random.randint(0,100) for p in range(lenght)]
seqs=numpy.arange(df.shape[0])
df["seq"]=pandas.Series(seqs)
df['mid']=df.apply(lambda x:(x['open']+x['close'])/2,axis=1)
df['height']=df.apply(lambda x:abs(x['close']-x['open'] if x['close']!=x['open'] else 0.001),axis=1)
df["seq"] = df['date']
inc = df.close > df.open
dec = df.open > df.close
w=0.3
#use ColumnDataSource to pass in data for tooltips
sourceInc=ColumnDataSource(ColumnDataSource.from_df(df.loc[inc]))
sourceDec=ColumnDataSource(ColumnDataSource.from_df(df.loc[dec]))
#the values for the tooltip come from ColumnDataSource
hover = HoverTool(
tooltips=[
("date", "#date"),
("close", "#close"),
]
)
TOOLS = [CrosshairTool(), hover, BoxZoomTool(), WheelZoomTool()]
ohlc = figure(plot_width=1000, plot_height=500, tools=TOOLS, x_axis_type='datetime')
ohlc.grid.grid_line_alpha=0.3
ohlc.segment(df.seq[inc], df.high[inc], df.seq[inc], df.low[inc], color="red")
ohlc.segment(df.seq[dec], df.high[dec], df.seq[dec], df.low[dec], color="green")
ohlc.rect(x='date', y='mid', width=w, height='height', fill_color="red", line_color="red", source=sourceInc)
ohlc.rect(x='date', y='mid', width=w, height='height', fill_color="green", line_color="green", source=sourceDec)
ohlc.xaxis.major_label_orientation = 3.14/4
rsi = figure(plot_width=1000, plot_height=100, y_range=(0,100))
rsi.xaxis.visible = False
rsi.multi_line(xs=[df.seq]*3, ys=[df.rsi, [30]*df.shape[0], [70]*df.shape[0]], line_color=['brown','grey','grey'], line_width=1)
chart = gridplot([[ohlc, None],[rsi, None]],toolbar_location='left')
print(df.date)
show(chart)
Try to replace the w=0.3 constant to w = 0.5 * 60 * 60 * 1000 # half hour in ms.
w means candlestick_width

Categories