I'm trying to create a stacked bar graph using Matplotlib. The graph seems to stack just fine, but there are these weird white spaces that appear when I display the result. I'm thinking it's because I'm setting the "bottom=" plt.bar attribute to the current dataset, while from example code it looks like you're supposed to set "bottom=" to the sum of the previous datasets. My endeavors to do this have yielded nothing but blank graphs, so I figured I might as well upload the code that shows the graphs with the unwanted whitespace.
Any ideas on a workaround for this problem?
from pandas.api.types import CategoricalDtype
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('D:/GIT/GSURFizDataProcessing/fizdata/fizmay2019data.txt', sep="\t", header=None)
#df.to_csv('D:/GIT/GSURFizDataProcessing/fizdata/fizproblemdata.csv')
df.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
df = df.loc[:,[0,1,2]]
df.columns = ['date', 'sensor', 'value']
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S.%f')
df['Day of Week'] = df['date'].dt.weekday_name
weekdays = [ 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
cat_type = CategoricalDtype(categories=weekdays, ordered=True)
df['Day of Week'] = df['Day of Week'].astype(cat_type)
motionSensors = ["D8MotionA", "D14MotionA",
"D14MotionB",
"D15AMotionA", "D15BMotionA", "D15MotionA","D15MotionB",
"D15MotionC","D18MotionA","D21AMotionA","D21MotionA",
"D4MotionA","D6MotionA","D22MotionA"]
df = df[df['sensor'].isin(motionSensors)]
df['TimeDelta'] = 0
listOfColors = {}
for i in motionSensors:
listOfColors[i] = np.random.rand(3, )
usedLabels = []
for day in weekdays:
datasets = []
for sensor in motionSensors:
print("NEW SENSOR ", sensor)
parsedDf = df[(df['Day of Week']==day) & (df['sensor']==sensor)]
firstValue = parsedDf["value"].iloc[0]
if firstValue == "OFF":
parsedDf.drop(parsedDf.head(1).index, inplace=True)
# Get seconds
parsedDf['TimeDelta'] = parsedDf['date'].shift(-1) - parsedDf['date']
parsedDf.iloc[::2]
parsedDf = parsedDf[parsedDf['TimeDelta'] < timedelta(days=2)]
parsedDf.drop(parsedDf.tail(1).index, inplace=True)
parsedDf["TimeDelta"] / np.timedelta64(1, 's')
summed = parsedDf["TimeDelta"].sum()
summed = summed.total_seconds()
summed = summed / 3600
#labelText = sensor + ": " + ("%.2f" % sum) + " hrs"
#print(labelText)
if sensor in usedLabels:
plt.bar(day, summed, 0.4, color=listOfColors[sensor], bottom=summed)
else:
plt.bar(day, summed, 0.4, color=listOfColors[sensor], label=sensor, bottom=summed)
usedLabels.append(sensor)
datasets.append(summed)
plt.title("Fiz May 2019 Usage Data")
plt.ylabel("Time Spent (hr)")
ax = plt.subplot(111)
chartBox = ax.get_position()
ax.set_position([chartBox.x0, chartBox.y0, chartBox.width*0.6, chartBox.height])
ax.legend(loc='upper center', bbox_to_anchor=(1.45, 0.8), shadow=True, ncol=1)
plt.legend(loc='upper center', bbox_to_anchor=(1.45, 0.8))
plt.show()
Turns out the bars were overlapping: They weren't even stacking to begin with.
Related
I need to plot data that starts and end at a certain time, next to this I need to exclude a period in the weekend in that time period.
How can I create a time_mask of my data that has two rules?
I already created a code for the "Start" and "End" period, but I am not able to add the rule for excluding the "Weekend period".
#create a time_mask
start_date = '2022-06-30 15:26:00'
end_date = '2022-07-11 15:30:00'
weekend_end = '2022-07-08 14:30:00'
weekend_start = '2022-07-11 09:50:00'
time_mask = (df['Time'] > start_date) & (df['Time'] <= end_date)
# use only this part of the dataframe as training data
df1_train = df1.loc[time_mask]
I tried to exclude the "Weekend period" with the code below, but this is not working...
time_mask = ((df['Time'] > start_date) & (df['Time'] <= end_date) & ((df['Time'] < weekend_start) or (df['Time'] > weekend_end)))
I already solved the problem for one part. But now in my plot the period is not excluded:
Plot
Plot in operating hours
UPDATE 22-08-22
#%% Plot data
fig, ax = plt.subplots()
ax.plot(df['Time'], df1[Temp])
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M:%S'))
fig.autofmt_xdate()
plt.show()
#%% Plot the data without empty values
N = len(df['Time'])
ind = np.arange(N)
def format_date(x, pos=None):
thisind = np.clip(int(x + 0.5), 0, N - 1)
return df['Time'][thisind].strftime('%Y-%m-%d %H:%M:%S')
fig, ax = plt.subplots()
ax.plot(ind, df[Temp])
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
ax.set_title("Without empty values")
fig.autofmt_xdate()
plt.show()
Update 22-08-22
use '|' instead of or.
And in my opinion, you confused weekend_end with weekend_start, since the start is a later date, and the end, on the contrary, is early.
After filtering by condition:
(df['Time'] > start_date) & (df['Time'] <= end_date)
the data is again filtered by time greater than weekend_start:
(df['Time'] > weekend_start)
or time less than weekend_end:
(df['Time'] < weekend_end)
that is, the period from 2022-07-08 14:30:00 to 2022-07-11 09:50:00 is excluded.
Now about drawing. The fact is that the axis with dates and times is continuous. Even if there is no data in a certain period. On the left is a picture that does not remove this gap, on the right, the 'format_date' function is used to exclude this gap.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates
import matplotlib.ticker as ticker
df = pd.read_csv('Data.csv', sep=',', header=0)
start_date = '2022-06-30 15:26:00'
end_date = '2022-07-11 15:30:00'
weekend_end = '2022-07-08 14:30:00'
weekend_start = '2022-07-11 09:50:00'
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
time_mask = ((df['Timestamp'] > start_date) & (df['Timestamp'] <= end_date) & (
(df['Timestamp'] > weekend_start) | (df['Timestamp'] < weekend_end)))
df1 = df[time_mask].copy()
df1 = df1.set_index('Timestamp')
fig, axes = plt.subplots(ncols=2)
ax = axes[0]
ax.plot(df1.index, df1['Data'])
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M:%S'))
ax.set_title("Default")
fig.autofmt_xdate()
N = len(df1['Data'])
ind = np.arange(N)
def format_date(x, pos=None):
thisind = np.clip(int(x + 0.5), 0, N - 1)
return df1.index[thisind].strftime('%Y-%m-%d %H:%M:%S')
ax = axes[1]
ax.plot(ind, df1['Data'])
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
ax.set_title("Without empty values")
fig.autofmt_xdate()
plt.show()
Note that the 'Timestamp' column is converted to an index.
df1 = df1.set_index('Timestamp')
Below is the drawing code with a simple moving average. It's hard for me to calculate ema. You can use a library like TA-Lib.
df1['sma'] = df1['Data'].rolling(window=33).mean()
N = len(df1.index)
ind = np.arange(N)
def format_date(x, pos=None):
thisind = np.clip(int(x + 0.5), 0, N - 1)
return df1.index[thisind].strftime('%Y-%m-%d %H:%M:%S')
fig, ax = plt.subplots()
ax.plot(ind, df1['Data'])
ax.plot(ind, df1['sma'])
ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_date))
fig.autofmt_xdate()
plt.show()
also it seems correct to me, to convert strings to date time format, make them like in file:
also it seems correct to me, to convert strings to date time format, make them like in file:
start_date = pd.to_datetime('2022-06-30T15:26:00+02:00', errors='coerce')
end_date = pd.to_datetime('2022-07-11T15:30:00+02:00', errors='coerce')
weekend_end = pd.to_datetime('2022-07-08T14:30:00+02:00', errors='coerce')
weekend_start = pd.to_datetime('2022-07-11T09:50:00+02:00', errors='coerce')
Update 12/09/2022.
made it more convenient to draw without gaps. Created a column from an index by converting the data to strings. In the previous version, the same principle, but here everything is done at once without a function. Also applied MaxNLocator is how many divisions to display.
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
df = pd.read_csv('Data.csv', sep=',', header=0)
start_date = '2022-06-30 15:26:00'
end_date = '2022-07-11 15:30:00'
weekend_end = '2022-07-08 14:30:00'
weekend_start = '2022-07-11 09:50:00'
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
time_mask = ((df['Timestamp'] > start_date) & (df['Timestamp'] <= end_date) & (
(df['Timestamp'] > weekend_start) | (df['Timestamp'] < weekend_end)))
df1 = df[time_mask].copy()
df1 = df1.set_index('Timestamp')
df1['string'] = df1.index.astype(str)
df1['sma'] = df1['Data'].rolling(window=33).mean()
fig, ax = plt.subplots()
ax.plot(df1['string'], df1['Data'])
ax.plot(df1['string'], df1['sma'])
ax.xaxis.set_major_locator(MaxNLocator(nbins=5))
fig.autofmt_xdate()
plt.show()
I'm playing around with kaggle dataframe to practice using matplotlib.
I was creating bar graph one by one, but it keeps adding up.
When I called plt.show() there were like 10 windows of figure suddenly shows up.
Is it possible to combine 4 of those figures into 1 window?
These part are in the same segments "Time Analysis" So I want to combine these 4 figures in 1 window.
import matplotlib.pyplot as plt
import seaborn as sns
dataset = ('accidents_data.csv')
df = pd.read_csv(dataset)
"""Time Analysis :
Analyze the time that accidents happen for various patterns and trends"""
df.Start_Time = pd.to_datetime(df.Start_Time) #convert the start time column to date time format
df['Hour_of_Accident'] = df.Start_Time.dt.hour #extract the hour from the time data
hour_accident = df['Hour_of_Accident'].value_counts()
hour_accident_df = hour_accident.to_frame() #convert the series data to dataframe in order to sort the index columns
hour_accident_df.index.names = ['Hours'] #naming the index column
hour_accident_df.sort_index(ascending=True, inplace=True)
print(hour_accident_df)
# Plotting the hour of accidents data in a bargraph
hour_accident_df.plot(kind='bar',figsize=(8,4),color='blue',title='Hour of Accident')
#plt.show() #Show the bar graph
"""Analyzing the accident frequency per day of the week"""
df['Day_of_the_week'] = df.Start_Time.dt.day_of_week
day_of_accident = df['Day_of_the_week'].value_counts()
day_of_accident_df = day_of_accident.to_frame() #convert the series data to dataframe so that we can sort the index columns
day_of_accident_df.index.names = ['Day'] # Renaming the index column
day_of_accident_df.sort_index(ascending=True, inplace=True)
print(day_of_accident_df)
f, ax = plt.subplots(figsize = (8, 5))
x = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Sartuday', 'Sunday']
l = day_of_accident_df.index.values
y = day_of_accident_df.Day_of_the_week
plt.bar(l, y, color='green')
plt.title('Day of the week vs total number of accidents')
plt.ylabel("No. of accidents recorded")
ax.set_xticks(l)
ax.set_xticklabels(x)
#plt.show()
"""Analysis for the months"""
df['Month'] = df.Start_Time.dt.month
accident_month = df['Month'].value_counts()
accident_month_df = accident_month.to_frame() #convert the series data to dataframe so that we can sort the index columns
accident_month_df.index.names = ['Month'] # Renaming the index column
accident_month_df.sort_index(ascending=True, inplace=True)
print(accident_month_df)
#Plotting the Bar Graph
accident_month_df.plot(kind='bar',figsize=(8,5),color='purple',title='Month of Accident')
"""Yearly Analysis"""
df['Year_of_accident'] = df.Start_Time.dt.year
#Check the yearly trend
yearly_count = df['Year_of_accident'].value_counts()
yearly_count_df = pd.DataFrame({'Year':yearly_count.index, 'Accidents':yearly_count.values})
yearly_count_df.sort_values(by='Year', ascending=True, inplace=True)
print(yearly_count_df)
#Creating line plot
yearly_count_df.plot.line(x='Year',color='red',title='Yearly Accident Trend ')
plt.show()
I am plotting stacked graphs of 5, 15, 30 and 60 minute candles on top of one another.
I would like to please:
Have all charts with price right aligned (y_on_right=True seems not be used)
For the times/grid on the 5 minute graph to be every 60 mins on the hour
For all the other graphs to use the same as the above, every 60 mins, all aligned
Optionally if possible, to remove the space on the left and the right (so the first bar is up against the left edge, and last bar up against the right edge)
This is my output so far:
And code is below:
import mplfinance as mpf
import pandas as pd
from polygon import RESTClient
def main():
key = "key"
with RESTClient(key) as client:
start = "2019-02-01"
end = "2019-02-02"
ticker = "TVIX"
resp5 = client.stocks_equities_aggregates(ticker, 5, "minute", start, end, unadjusted=False)
resp15 = client.stocks_equities_aggregates(ticker, 15, "minute", start, end, unadjusted=False)
resp30 = client.stocks_equities_aggregates(ticker, 30, "minute", start, end, unadjusted=False)
resp60 = client.stocks_equities_aggregates(ticker, 60, "minute", start, end, unadjusted=False)
print(f'5 min data is {len(resp5.results)} long')
print(f'15 min data is {len(resp15.results)} long')
print(f'30 min data is {len(resp30.results)} long')
print(f'60 min data is {len(resp60.results)} long')
df5 = pd.DataFrame(resp5.results)
df5.index = pd.DatetimeIndex( pd.to_datetime(df5['t']/1000, unit='s') )
df15 = pd.DataFrame(resp15.results)
df15.index = pd.DatetimeIndex( pd.to_datetime(df15['t']/1000, unit='s') )
df30 = pd.DataFrame(resp30.results)
df30.index = pd.DatetimeIndex( pd.to_datetime(df30['t']/1000, unit='s') )
df60 = pd.DataFrame(resp60.results)
df60.index = pd.DatetimeIndex( pd.to_datetime(df60['t']/1000, unit='s') )
df60.index.name = df30.index.name = df15.index.name = df5.index.name = 'Timestamp'
# mpf expects a dataframe containing Open, High, Low, and Close data with a Pandas TimetimeIndex
df60.columns = df30.columns = df15.columns = df5.columns = ['Volume', 'Volume Weighted', 'Open', 'Close', 'High', 'Low', 'Time', 'Num Items']
fig = mpf.figure(figsize=(32, 32))
ax1 = fig.add_subplot(4, 1, 1)
ax2 = fig.add_subplot(4, 1, 2)
ax3 = fig.add_subplot(4, 1, 3)
ax4 = fig.add_subplot(4, 1, 4)
ap = [
mpf.make_addplot(df15, type='candle', ax=ax2, y_on_right=True),
mpf.make_addplot(df30, type='candle', ax=ax3, y_on_right=True),
mpf.make_addplot(df60, type='candle', ax=ax4, y_on_right=True)
]
s = mpf.make_mpf_style(base_mpf_style='default',y_on_right=True)
mpf.plot(df5, style=s, ax=ax1, addplot=ap, xrotation=0, datetime_format='%H:%M', type='candlestick')
if __name__ == '__main__':
main()
I don't have the corresponding API key, so I used Yahoo Finance's stock price instead. As for the issue of placing the price on the right side, you can change the style to achieve this. Also, it seems that y_on_right is only valid for the first graph. From this information. To remove the first and last margins, use tight_layout=True, and to align the x-axis to the hour, you need to check how far the mpl time series formatter can go.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
import numpy as np
import datetime
import matplotlib.dates as mdates
start = '2021-12-22'
end = '2021-12-23'
intervals = [5,15,30,60]
for i in intervals:
vars()[f'df{i}'] = yf.download("AAPL", start=start, end=end, period='1d', interval=str(i)+'m')
for df in [df5,df15,df30,df60]:
df.index = pd.to_datetime(df.index)
df.index = df.index.tz_localize(None)
df5 = df5[df5.index.date == datetime.date(2021,12,21)]
df15 = df15[df15.index.date == datetime.date(2021,12,21)]
df30 = df30[df30.index.date == datetime.date(2021,12,21)]
df60 = df60[df60.index.date == datetime.date(2021,12,21)]
fig = mpf.figure(style='yahoo', figsize=(12,9))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
mpf.plot(df5, type='candle', ax=ax1, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df15, type='candle', ax=ax2, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df30, type='candle', ax=ax3, xrotation=0, datetime_format='%H:%M', tight_layout=True)
mpf.plot(df60, type='candle', ax=ax4, xrotation=0, datetime_format='%H:%M', tight_layout=True)
ax3_ticks = ax3.get_xticks()
print(ax3_ticks)
I have a lot of quarter-hourly data (consumption versus time). I have to make averages on these data and I would have liked to display the averages according to the days of the week + time.
So I am looking to put on a graphic the day and the time at the same time. The expected result is possible in Excel but I'm looking to do it in python with matplotlib (and using dataframes).
If you have any idea, thanks a lot!
Guillaume
Here is a code that displays a decent result but I would like better.
I'm sorry but I can't put an image attached directly because I'm new on the forum.
import pandas as pd
import datetime
import matplotlib.pyplot as plts
columns = ["Date/Time","Value"]
new_df = pd.DataFrame(columns = columns)
Jour1 = pd.to_datetime('02/01/2021')
value = np.random.randint(100, 150, size=(672,))
for x in range(672):
TimeStamp = Jour1
Jour1 = Jour1 + datetime.timedelta(minutes=15)
new_df = new_df.append(pd.Series([TimeStamp,value[x]], index = columns) ,ignore_index=True)
new_df['Day of week Name'] = new_df['Date/Time'].dt.dayofweek.astype(str) + ' - '+ new_df['Date/Time'].dt.day_name()
new_df["Time"] = new_df['Date/Time'].dt.time
new_df = new_df.groupby(['Day of week Name','Time'])['Value'].sum().reset_index()
new_df['TimeShow'] = new_df['Day of week Name'] +' '+ new_df['Time'].astype(str)
fig = plt.figure(figsize=(18,10))
ax=fig.add_subplot(111)
ax.plot(new_df['TimeShow'], new_df['Value'], label="Test", linewidth = 2)
plt.xticks(['0 - Monday 00:00:00','1 - Tuesday 00:00:00','2 - Wednesday 00:00:00','3 - Thursday 00:00:00','4 - Friday 00:00:00','5 - Saturday 00:00:00','6 - Sunday 00:00:00'])
plt.show()
Image in python
Image in excel - day not in order
EDIT :
Thanks to your help, I finally found something that works for me. I don't know if the code is optimized but it works. here is the code if needed :
fig = plt.figure(figsize=(18,10))
ax=fig.add_subplot(111)
date_rng = pd.date_range('2021-01-01 00:00:00','2021-01-08 00:00:00', freq='6h')
xlabels = pd.DataFrame(index=date_rng)
xlabels = xlabels.index.strftime('%H:%M').tolist()
liste_saisons = df['Saison'].unique().tolist()
for saisons in liste_saisons :
df_show = df.loc[(df['Saison'] == saisons)]
df_show = df_show.groupby(['Jour Semaine Nom','Time'],as_index=False)['SUM(CORR_VALUE)'].mean()
df_show['TimeShow'] = df_show['Jour Semaine Nom'] +' '+ df_show['Time'].astype(str)
ax.plot(df_show.index, df_show['SUM(CORR_VALUE)'], label=saisons, linewidth = 3)
fig.suptitle('Evolution de la charge BT quart-horaire moyenne semaine', fontsize=20)
plt.xlabel('Jour de la semaine + Heure', fontsize=20)
plt.ylabel('Charge BT quart-horaire moyenne [MW]', fontsize = 20)
plt.rc('legend', fontsize=16)
ax.legend(loc='upper left')
plt.grid(color='k', linestyle='-.', linewidth=1)
ax.set_xticklabels(xlabels)
plt.xticks(np.arange(0, 96*7, 4*6))
plt.ylim(50,350)
xdays = df_show["Jour Semaine Nom"].tolist()
graph_pos = plt.gca().get_position()
points = np.arange(48, len(xdays), 96)
day_points = np.arange(0, len(xdays), 96)
offset = -65.0
trans = ax.get_xaxis_transform()
for i,d in enumerate(xdays):
if i in points:
ax.text(i, graph_pos.y0 - offset, d, ha='center',bbox=dict(facecolor='cyan', edgecolor='black', boxstyle='round'), fontsize=12)
plt.show()
Result
There are many possible approaches to this kind of task, but I used the text and plot functions to deal with it. to add the first date, I took the size of the graph and subtracted the offset value from the y0 value to determine the position. To add the first date, I took the size of the graph and subtracted an offset value from the y0 value, and for each date, I manually set the y1 value to position the vertical line.
PS: For a faster answer, I will present it even with unfinished code. Attach an image instead of a link. Attach the toy data in text. This is necessary.
import pandas as pd
import numpy as np
date_rng = pd.date_range('2021-01-01','2021-03-01', freq='1h')
value = np.random.randint(100, 150, size=(1417,))
df = pd.DataFrame({'date':pd.to_datetime(date_rng),'value':value})
import matplotlib.pyplot as plt
w = 0.7
fig,ax = plt.subplots(figsize=(20,4))
ax.bar(df.date[:100].apply(lambda x:x.strftime('%Y-%m-%d %H:%M:%S')), df.value[:100], color='C0', width=w, align="center")
xlabels = df.date[:100].apply(lambda x:x.strftime('%H:%M:%S')).tolist()
xdays = df.date[:100].apply(lambda x:x.strftime('%d-%b')).tolist()
ax.set_xticklabels(xlabels, rotation=90)
graph_pos = plt.gca().get_position()
points = np.arange(12, len(xlabels), 24)
day_points = np.arange(0, len(xlabels), 24)
offset = 50.0
trans = ax.get_xaxis_transform()
for i,d in enumerate(xdays):
if i in points:
ax.text(i, graph_pos.y0 - offset, d, ha='center')
if i in day_points:
ax.plot([i, i], [0, -0.3], color='gray', transform=trans, clip_on=False)
ax.set_xlim(-1, len(xlabels))
plt.show()
I am trying to make a heat map like this one from bokeh:
Where all the code is here: http://docs.bokeh.org/en/latest/docs/gallery/unemployment.html
I got pretty close, but for some reason it is only printing the values in a diagonal order.
I tried to format my data the same way and just substitute it, but it got a little more complicated than that. Here is my data:
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import ColumnDataSource, figure, show, output_file
from bokeh.models import HoverTool
import pandas.util.testing as tm; tm.N = 3
df = pd.read_csv('MYDATA.csv', usecols=[1, 16])
df = df.set_index('recvd_dttm')
df.index = pd.to_datetime(df.index, format='%m/%d/%Y %H:%M')
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg(len).reset_index()
result.columns = ['Month', 'CompanyName', 'NumberCalls']
pivot_table = result.pivot(index='Month', columns='CompanyName', values='NumberCalls').fillna(0)
s = pivot_table.sum().sort(ascending=False,inplace=False)
pivot_table = pivot_table.ix[:,s.index[:46]]
pivot_table = pivot_table.transpose()
pivot_table.to_csv('pivot_table.csv')
pivot_table = pivot_table.reset_index()
pivot_table['CompanyName'] = [str(x) for x in pivot_table['CompanyName']]
Companies = list(pivot_table['CompanyName'])
months = ["1","2","3","4","5","6","7","8","9","10","11","12"]
pivot_table = pivot_table.set_index('CompanyName')
# this is the colormap from the original plot
colors = [
"#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
"#ddb7b1", "#cc7878", "#933b41", "#550b1d"
]
# Set up the data for plotting. We will need to have values for every
# pair of year/month names. Map the rate to a color.
month = []
company = []
color = []
rate = []
for y in pivot_table.index:
for m in pivot_table.columns:
month.append(m)
company.append(y)
num_calls = pivot_table.loc[y,m]
rate.append(num_calls)
color.append(colors[min(int(num_calls)-2, 8)])
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
output_file('heatmap.html')
TOOLS = "resize,hover,save,pan,box_zoom,wheel_zoom"
p = figure(title="Customer Calls This Year",
x_range=Companies, y_range=list(reversed(months)),
x_axis_location="above", plot_width=1400, plot_height=900,
toolbar_location="left", tools=TOOLS)
p.rect("Companies", "months", 1, 1, source=source,
color="color", line_color=None)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "10pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3
hover = p.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('Company Name', '#Companies'),
('Number of Calls', '#rate'),
])
show(p) # show the plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# just following your previous post to simulate your data
np.random.seed(0)
dates = np.random.choice(pd.date_range('2015-01-01 00:00:00', '2015-06-30 00:00:00', freq='1h'), 10000)
company = np.random.choice(['company' + x for x in '1 2 3 4 5'.split()], 10000)
df = pd.DataFrame(dict(recvd_dttm=dates, CompanyName=company)).set_index('recvd_dttm').sort_index()
df['C'] = 1
df.columns = ['CompanyName', '']
result = df.groupby([lambda idx: idx.month, 'CompanyName']).agg({df.columns[1]: sum}).reset_index()
result.columns = ['Month', 'CompanyName', 'counts']
pivot_table = result.pivot(index='CompanyName', columns='Month', values='counts')
x_labels = ['Month'+str(x) for x in pivot_table.columns.values]
y_labels = pivot_table.index.values
fig, ax = plt.subplots()
x = ax.imshow(pivot_table, cmap=plt.cm.winter)
plt.colorbar(mappable=x, ax=ax)
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels)
ax.set_yticklabels(y_labels)
ax.set_xlabel('Month')
ax.set_ylabel('Company')
ax.set_title('Customer Calls This Year')
The answer was in this line:
source = ColumnDataSource(
data=dict(months=months, Companies=Companies, color=color, rate=rate)
)
It should have been:
source = ColumnDataSource(
data=dict(month=months, company=company, color=color, rate=rate)
)