mplfinance stacked plots with common, time-aligned shared axis - python

I am plotting stacked graphs of 5, 15, 30 and 60 minute candles on top of one another.
I would like to please:
Have all charts with price right aligned (y_on_right=True seems not be used)
For the times/grid on the 5 minute graph to be every 60 mins on the hour
For all the other graphs to use the same as the above, every 60 mins, all aligned
Optionally if possible, to remove the space on the left and the right (so the first bar is up against the left edge, and last bar up against the right edge)
This is my output so far:
And code is below:
import mplfinance as mpf
import pandas as pd
from polygon import RESTClient
def main():
key = "key"
with RESTClient(key) as client:
start = "2019-02-01"
end = "2019-02-02"
ticker = "TVIX"
resp5 = client.stocks_equities_aggregates(ticker, 5, "minute", start, end, unadjusted=False)
resp15 = client.stocks_equities_aggregates(ticker, 15, "minute", start, end, unadjusted=False)
resp30 = client.stocks_equities_aggregates(ticker, 30, "minute", start, end, unadjusted=False)
resp60 = client.stocks_equities_aggregates(ticker, 60, "minute", start, end, unadjusted=False)
print(f'5 min data is {len(resp5.results)} long')
print(f'15 min data is {len(resp15.results)} long')
print(f'30 min data is {len(resp30.results)} long')
print(f'60 min data is {len(resp60.results)} long')
df5 = pd.DataFrame(resp5.results)
df5.index = pd.DatetimeIndex( pd.to_datetime(df5['t']/1000, unit='s') )
df15 = pd.DataFrame(resp15.results)
df15.index = pd.DatetimeIndex( pd.to_datetime(df15['t']/1000, unit='s') )
df30 = pd.DataFrame(resp30.results)
df30.index = pd.DatetimeIndex( pd.to_datetime(df30['t']/1000, unit='s') )
df60 = pd.DataFrame(resp60.results)
df60.index = pd.DatetimeIndex( pd.to_datetime(df60['t']/1000, unit='s') )
df60.index.name = df30.index.name = df15.index.name = df5.index.name = 'Timestamp'
# mpf expects a dataframe containing Open, High, Low, and Close data with a Pandas TimetimeIndex
df60.columns = df30.columns = df15.columns = df5.columns = ['Volume', 'Volume Weighted', 'Open', 'Close', 'High', 'Low', 'Time', 'Num Items']
fig = mpf.figure(figsize=(32, 32))
ax1 = fig.add_subplot(4, 1, 1)
ax2 = fig.add_subplot(4, 1, 2)
ax3 = fig.add_subplot(4, 1, 3)
ax4 = fig.add_subplot(4, 1, 4)
ap = [
mpf.make_addplot(df15, type='candle', ax=ax2, y_on_right=True),
mpf.make_addplot(df30, type='candle', ax=ax3, y_on_right=True),
mpf.make_addplot(df60, type='candle', ax=ax4, y_on_right=True)
]
s = mpf.make_mpf_style(base_mpf_style='default',y_on_right=True)
mpf.plot(df5, style=s, ax=ax1, addplot=ap, xrotation=0, datetime_format='%H:%M', type='candlestick')
if __name__ == '__main__':
main()

I don't have the corresponding API key, so I used Yahoo Finance's stock price instead. As for the issue of placing the price on the right side, you can change the style to achieve this. Also, it seems that y_on_right is only valid for the first graph. From this information. To remove the first and last margins, use tight_layout=True, and to align the x-axis to the hour, you need to check how far the mpl time series formatter can go.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
import numpy as np
import datetime
import matplotlib.dates as mdates
start = '2021-12-22'
end = '2021-12-23'
intervals = [5,15,30,60]
for i in intervals:
vars()[f'df{i}'] = yf.download("AAPL", start=start, end=end, period='1d', interval=str(i)+'m')
for df in [df5,df15,df30,df60]:
df.index = pd.to_datetime(df.index)
df.index = df.index.tz_localize(None)
df5 = df5[df5.index.date == datetime.date(2021,12,21)]
df15 = df15[df15.index.date == datetime.date(2021,12,21)]
df30 = df30[df30.index.date == datetime.date(2021,12,21)]
df60 = df60[df60.index.date == datetime.date(2021,12,21)]
fig = mpf.figure(style='yahoo', figsize=(12,9))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
mpf.plot(df5, type='candle', ax=ax1, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df15, type='candle', ax=ax2, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df30, type='candle', ax=ax3, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df60, type='candle', ax=ax4, xrotation=0, datetime_format='%H:%M', tight_layout=True)
ax3_ticks = ax3.get_xticks()
print(ax3_ticks)

Related

animation.FuncAnimation mplfinance (candlestick and line togheter)? (python)

I created a reproducible example of random data for candlestick chart ohlc that is working correctly.
Now I need, in the same plot, to plot a random line (in the real application it will be a function of the ohlc data (not moving average)), so I created a random varialbe y0 that I will "concat" every loop through ani = animation.FuncAnimation() function.
The objects that mpf "will use" to plot are df and y0_arr and they have the "same format" (they are pd.DataFrame, have DateIndex as index, same dates, same dtypes=float).
if you comment(exclude) the part of the ##random line the code will work with no problem, only for the candlestick chart, but it won't if you include the random line. I've also tried to plot just/only the random line but it won't work also when it's only the random line.
this below is the code:
import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
from datetime import date, datetime
import mplfinance as mpf
import matplotlib.animation as animation
import time
i=0
sign=1
def genData():
global df0, df, i, close, sign, y0_arr
sign = sign*-1
## random ohlc generation
if i==0: #variable initialization
y0_arr = pd.DataFrame()
df = pd.DataFrame()
i=0
close = 0
open_ = np.round(np.random.normal(10, 4), decimals=2) ##initial open
print("i:",i)
dt = datetime.now() + timedelta(days=i)
dt = dt.date()
if i > 0:
open_ = close
high = np.round(open_ + np.random.normal(0.5, 2), decimals=2)
low = np.round(open_ - np.random.normal(0.5, 2), decimals=2)
close = np.round(open_ + sign*np.random.normal(0.2, 0.4), decimals=2)
dfi = np.column_stack((dt, open_, high, low, close))
dfi = pd.DataFrame(dfi)
dfi.columns = ['date', 'open', 'high', 'low', 'close']
dfi['date'] = pd.to_datetime(dfi['date'], format="%Y/%m/%d")
dfi.set_index('date', inplace=True)
dfi = dfi.convert_dtypes(float)
df = pd.concat([df, dfi])
## random ohlc generation
##random line (exclude from here...)
y0 = np.round(np.random.normal(1,2), decimals=2)
y0i = np.column_stack((dt,y0))
y0i = pd.DataFrame(y0i)
y0i.columns = ['date','open']
y0i['date'] = pd.to_datetime(y0i['date'],format="%Y/%m/%d")
y0i.set_index('date', inplace=True)
y0i = y0i.convert_dtypes(float)
y0_arr = pd.concat([y0_arr,y0i])
##random line (...to here)
time.sleep(1)
i=i+1
#### plotting
fig = mpf.figure(style="charles",figsize=(7,8))
ax1 = fig.add_subplot(1,1,1)
def animate(ival):
global df, y0_arr
print("animate()")
genData() ##create new data
ax1.clear
mpf.plot(df, ax=ax1, type='candle', ylabel='Price US$') ##ohlc
mpf.plot(y0_arr, ax=ax1, type='line',ylabel='Price US$') ##random line (...exclude this line)
ani = animation.FuncAnimation(fig, animate, interval=250)
mpf.show()
This below are the error messages that I'm getting:
How can I solve this problem? Where to look?
Thank You
Notice that the error message is KeyError: 'Open'. This is because mpf.plot() expects the first argument to be a DataFrame with columns 'Open', 'High', 'Low', and 'Close' (or with OHLC column names that you specify using kwarg columns=).
Apparently your y0_arr is not such a dataframe.
The correct way to add a line to a candlestick plot is with the mpf.make_addplot() call. <-Click here to see the documentation for addplot.
See also https://github.com/matplotlib/mplfinance/blob/master/examples/mpf_animation_macd.py for an example of how to add lines to an animated candlestick plot.

Skipping certain values in Python with Matplotlib

I am currently working on an intra-day stock chart using the Alpha Vantage API. The data frame contains values from 4:00 to 20:00. In my matplotlib.pyplot chart however, the x-Axis also includes values from 20:00 to 4:00 over night. I dont want this as it messes up the aesthetics and also the Volume subplot.
Q: Is there any way to skip x-Axis values which dont exist in the actual Data Frame (the values from 20:00 to 04:00)?
As you can see, the Data Frame clearly jumps from 20:00 to 04:00
However in the Matplotlib chart, the x-Axis contains the values from 20:00 to 4:00, messing with the chart
Code so far. I believe so far everything is right:
import pandas as pd
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
import time
import datetime as dt
from datetime import timedelta as td
from dateutil.relativedelta import relativedelta
#Accessing and Preparing API
ts = TimeSeries(key=api_key, output_format='pandas')
ticker_input = "TSLA"
interval_input = "15min"
df, meta_data = ts.get_intraday(symbol = ticker_input, interval = interval_input, outputsize = 'full')
slice_date = 16*4*5
df = df[0:slice_date]
df = df.iloc[::-1]
df["100ma"] = df["4. close"].rolling(window = 50, min_periods = 0).mean()
df["Close"] = df["4. close"]
df["Date"] = df.index
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df["Date"], df['Close'])
ax1.plot(df["Date"], df["100ma"], linewidth = 0.5)
plt.xticks(rotation=45)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df["Date"], df["5. volume"])
ax2.axes.xaxis.set_visible(False)
plt.tight_layout()
plt.show()
It would be great if anybody could help. Im still a complete beginner and only started Python 2 weeks ago.
We got the data from the same place, although the data acquisition method is different. After extracting it in 15 units, I created a graph by excluding the data after 8pm and before 4pm. I created the code with the understanding that your skip would open up the pause. What you want it to skip is skipped once the NaN is set.
import datetime
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import mplfinance as mpf
# import matplotlib.pyplot as plt
with open('./alpha_vantage_api_key.txt') as f:
api_key = f.read()
now_ = datetime.datetime.today()
start = datetime.datetime(2019, 1, 1)
end = datetime.datetime(now_.year, now_.month, now_.day - 1)
symbol = 'TSLA'
df = web.DataReader(symbol, 'av-intraday', start, end, api_key=api_key)
df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
df.index = pd.to_datetime(df.index)
df["100ma"] = df["Close"].rolling(window = 50, min_periods = 0).mean()
df["Date"] = df.index
df_15 = df.asfreq('15min')
df_15 = df_15[(df_15.index.hour >= 4)&(df_15.index.hour <= 20) ]
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,4.5),dpi=144)
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df_15["Date"], df_15['Close'])
ax1.plot(df_15["Date"], df_15["100ma"], linewidth = 0.5)
plt.xticks(rotation=20)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df_15["Date"], df_15["Volume"])
ax2.axes.xaxis.set_visible(False)
# plt.tight_layout()
plt.show()
I fixed it using matplotlib.ticker.formatter.
I first created a class and using:
class MyFormatter(Formatter):
def __init__(self, dates, fmt='%Y-%m-%d %H:%M'):
self.dates = dates
self.fmt = fmt
def __call__(self, x, pos=0):
'Return the label for time x at position pos'
ind = int(np.round(x))
if ind >= len(self.dates) or ind < 0:
return ''
return self.dates[ind].strftime(self.fmt)
formatter = MyFormatter(df.index)
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.xaxis.set_major_formatter(formatter)
ax1.plot(np.arange(len(df)), df["Close"])
ax1.plot(np.arange(len(df)), df["100ma"], linewidth = 0.5)
ax1.xticks(rotation=45)
ax1.axis([xmin,xmax,ymin,ymax])
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(np.arange(len(df)), df["5. volume"])
plt.show()
This gave me a smoother graph than the one before and also that recommended by r-beginner.
The only issue that I have is that if I zoom in the x-axis doesnt really change. it always has teh year, month, date, hour, and minute. Obviously I only want hour and minute when Im zoomed in further. I am yet to figure out how to do that

Weird White Areas in Matplotlib Stacked Bar Graph

I'm trying to create a stacked bar graph using Matplotlib. The graph seems to stack just fine, but there are these weird white spaces that appear when I display the result. I'm thinking it's because I'm setting the "bottom=" plt.bar attribute to the current dataset, while from example code it looks like you're supposed to set "bottom=" to the sum of the previous datasets. My endeavors to do this have yielded nothing but blank graphs, so I figured I might as well upload the code that shows the graphs with the unwanted whitespace.
Any ideas on a workaround for this problem?
from pandas.api.types import CategoricalDtype
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('D:/GIT/GSURFizDataProcessing/fizdata/fizmay2019data.txt', sep="\t", header=None)
#df.to_csv('D:/GIT/GSURFizDataProcessing/fizdata/fizproblemdata.csv')
df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
df = df.loc[:,[0,1,2]]
df.columns = ['date', 'sensor', 'value']
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S.%f')
df['Day of Week'] = df['date'].dt.weekday_name
weekdays = [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
cat_type = CategoricalDtype(categories=weekdays, ordered=True)
df['Day of Week'] = df['Day of Week'].astype(cat_type)
motionSensors = ["D8MotionA", "D14MotionA",
"D14MotionB",
"D15AMotionA", "D15BMotionA", "D15MotionA","D15MotionB",
"D15MotionC","D18MotionA","D21AMotionA","D21MotionA",
"D4MotionA","D6MotionA","D22MotionA"]
df = df[df['sensor'].isin(motionSensors)]
df['TimeDelta'] = 0
listOfColors = {}
for i in motionSensors:
listOfColors[i] = np.random.rand(3, )
usedLabels = []
for day in weekdays:
datasets = []
for sensor in motionSensors:
print("NEW SENSOR ", sensor)
parsedDf = df[(df['Day of Week']==day) & (df['sensor']==sensor)]
firstValue = parsedDf["value"].iloc[0]
if firstValue == "OFF":
parsedDf.drop(parsedDf.head(1).index, inplace=True)
# Get seconds
parsedDf['TimeDelta'] = parsedDf['date'].shift(-1) - parsedDf['date']
parsedDf.iloc[::2]
parsedDf = parsedDf[parsedDf['TimeDelta'] < timedelta(days=2)]
parsedDf.drop(parsedDf.tail(1).index, inplace=True)
parsedDf["TimeDelta"] / np.timedelta64(1, 's')
summed = parsedDf["TimeDelta"].sum()
summed = summed.total_seconds()
summed = summed / 3600
#labelText = sensor + ": " + ("%.2f" % sum) + " hrs"
#print(labelText)
if sensor in usedLabels:
plt.bar(day, summed, 0.4, color=listOfColors[sensor], bottom=summed)
else:
plt.bar(day, summed, 0.4, color=listOfColors[sensor], label=sensor, bottom=summed)
usedLabels.append(sensor)
datasets.append(summed)
plt.title("Fiz May 2019 Usage Data")
plt.ylabel("Time Spent (hr)")
ax = plt.subplot(111)
chartBox = ax.get_position()
ax.set_position([chartBox.x0, chartBox.y0, chartBox.width*0.6, chartBox.height])
ax.legend(loc='upper center', bbox_to_anchor=(1.45, 0.8), shadow=True, ncol=1)
plt.legend(loc='upper center', bbox_to_anchor=(1.45, 0.8))
plt.show()
Turns out the bars were overlapping: They weren't even stacking to begin with.

Create gantt chart with hlines?

I've tried for several hours to make this work. I tried using 'python-gantt' package, without luck. I also tried plotly (which was beautiful, but I can't host my sensitive data on their site, so that won't work).
My starting point is code from here:
How to plot stacked event duration (Gantt Charts) using Python Pandas?
Three Requirements:
Include the 'Name' on the y axis rather than the numbers.
If someone has multiple events, put all the event periods on one line (this will make pattern identification easier), e.g. Lisa will only have one line on the visual.
Include the 'Event' listed on top of the corresponding line (if possible), e.g. Lisa's first line would say "Hire".
The code will need to be dynamic to accommodate many more people and more possible event types...
I'm open to suggestions to visualize: I want to show the duration for various staffing events throughout the year, as to help identify patterns.
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dt
df = pd.DataFrame({'Name': ['Joe','Joe','Lisa','Lisa','Lisa','Alice'],
'Event': ['Hire','Term','Hire','Transfer','Term','Term'],
'Start_Date': ["2014-01-01","2014-02-01","2015-01-01","2015-02-01","2015-03-01","2016-01-01"],
'End_Date': ["2014-01-31","2014-03-15","2015-01-31","2015-02-28","2015-05-01","2016-09-01"]
})
df = df[['Name','Event','Start_Date','End_Date']]
df.Start_Date = pd.to_datetime(df.Start_Date).astype(datetime)
df.End_Date = pd.to_datetime(df.End_Date).astype(datetime)
fig = plt.figure()
ax = fig.add_subplot(111)
ax = ax.xaxis_date()
ax = plt.hlines(df.index, dt.date2num(df.Start_Date), dt.date2num(df.End_Date))
I encountered the same problem in the past. You seem to appreciate the aesthetics of Plotly. Here is a little piece of code which uses matplotlib.pyplot.broken_barh instead of matplotlib.pyplot.hlines.
from collections import defaultdict
from datetime import datetime
from datetime import date
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Name': ['Joe', 'Joe', 'Lisa', 'Lisa', 'Lisa', 'Alice'],
'Event': ['Hire', 'Term', 'Hire', 'Transfer', 'Term', 'Term'],
'Start_Date': ['2014-01-01', '2014-02-01', '2015-01-01', '2015-02-01', '2015-03-01', '2016-01-01'],
'End_Date': ['2014-01-31', '2014-03-15', '2015-01-31', '2015-02-28', '2015-05-01', '2016-09-01']
})
df = df[['Name', 'Event', 'Start_Date', 'End_Date']]
df.Start_Date = pd.to_datetime(df.Start_Date).astype(datetime)
df.End_Date = pd.to_datetime(df.End_Date).astype(datetime)
names = df.Name.unique()
nb_names = len(names)
fig = plt.figure()
ax = fig.add_subplot(111)
bar_width = 0.8
default_color = 'blue'
colors_dict = defaultdict(lambda: default_color, Hire='green', Term='red', Transfer='orange')
# Plot the events
for index, name in enumerate(names):
mask = df.Name == name
start_dates = mdates.date2num(df.loc[mask].Start_Date)
end_dates = mdates.date2num(df.loc[mask].End_Date)
durations = end_dates - start_dates
xranges = zip(start_dates, durations)
ymin = index - bar_width / 2.0
ywidth = bar_width
yrange = (ymin, ywidth)
facecolors = [colors_dict[event] for event in df.loc[mask].Event]
ax.broken_barh(xranges, yrange, facecolors=facecolors, alpha=1.0)
# you can set alpha to 0.6 to check if there are some overlaps
# Shrink the x-axis
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# Add the legend
patches = [mpatches.Patch(color=color, label=key) for (key, color) in colors_dict.items()]
patches = patches + [mpatches.Patch(color=default_color, label='Other')]
plt.legend(handles=patches, bbox_to_anchor=(1, 0.5), loc='center left')
# Format the x-ticks
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
# Format the y-ticks
ax.set_yticks(range(nb_names))
ax.set_yticklabels(names)
# Set the limits
date_min = date(df.Start_Date.min().year, 1, 1)
date_max = date(df.End_Date.max().year + 1, 1, 1)
ax.set_xlim(date_min, date_max)
# Format the coords message box
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
# Set the title
ax.set_title('Gantt Chart')
plt.show()
I hope this will help you.

Plotting a Heat Table Based on bokeh

I am trying to make a heat map like this one from bokeh:
Where all the code is here: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I got pretty close, but for some reason it is only printing the values in a diagonal order.
I tried to format my data the same way and just substitute it, but it got a little more complicated than that. Here is my data:
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import ColumnDataSource, figure, show, output_file
from bokeh.models import HoverTool
import pandas.util.testing as tm; tm.N = 3
df = pd.read_csv('MYDATA.csv', usecols=[1, 16])
df = df.set_index('recvd_dttm')
df.index = pd.to_datetime(df.index, format='%m/%d/%Y %H:%M')
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg(len).reset_index()
result.columns = ['Month', 'CompanyName', 'NumberCalls']
pivot_table = result.pivot(index='Month', columns='CompanyName', values='NumberCalls').fillna(0)
s = pivot_table.sum().sort(ascending=False,inplace=False)
pivot_table = pivot_table.ix[:,s.index[:46]]
pivot_table = pivot_table.transpose()
pivot_table.to_csv('pivot_table.csv')
pivot_table = pivot_table.reset_index()
pivot_table['CompanyName'] = [str(x) for x in pivot_table['CompanyName']]
Companies = list(pivot_table['CompanyName'])
months = ["1","2","3","4","5","6","7","8","9","10","11","12"]
pivot_table = pivot_table.set_index('CompanyName')
# this is the colormap from the original plot
colors = [
"#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
"#ddb7b1", "#cc7878", "#933b41", "#550b1d"
]
# Set up the data for plotting. We will need to have values for every
# pair of year/month names. Map the rate to a color.
month = []
company = []
color = []
rate = []
for y in pivot_table.index:
for m in pivot_table.columns:
month.append(m)
company.append(y)
num_calls = pivot_table.loc[y,m]
rate.append(num_calls)
color.append(colors[min(int(num_calls)-2, 8)])
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
output_file('heatmap.html')
TOOLS = "resize,hover,save,pan,box_zoom,wheel_zoom"
p = figure(title="Customer Calls This Year",
x_range=Companies, y_range=list(reversed(months)),
x_axis_location="above", plot_width=1400, plot_height=900,
toolbar_location="left", tools=TOOLS)
p.rect("Companies", "months", 1, 1, source=source,
color="color", line_color=None)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('Company Name', '#Companies'),
('Number of Calls', '#rate'),
])
show(p) # show the plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# just following your previous post to simulate your data
np.random.seed(0)
dates = np.random.choice(pd.date_range('2015-01-01 00:00:00', '2015-06-30 00:00:00', freq='1h'), 10000)
company = np.random.choice(['company' + x for x in '1 2 3 4 5'.split()], 10000)
df = pd.DataFrame(dict(recvd_dttm=dates, CompanyName=company)).set_index('recvd_dttm').sort_index()
df['C'] = 1
df.columns = ['CompanyName', '']
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg({df.columns[1]: sum}).reset_index()
result.columns = ['Month', 'CompanyName', 'counts']
pivot_table = result.pivot(index='CompanyName', columns='Month', values='counts')
x_labels = ['Month'+str(x) for x in pivot_table.columns.values]
y_labels = pivot_table.index.values
fig, ax = plt.subplots()
x = ax.imshow(pivot_table, cmap=plt.cm.winter)
plt.colorbar(mappable=x, ax=ax)
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
ax.set_xlabel('Month')
ax.set_ylabel('Company')
ax.set_title('Customer Calls This Year')
The answer was in this line:
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
It should have been:
source = ColumnDataSource(
data=dict(month=months, company=company, color=color, rate=rate)
)

Categories