Create gantt chart with hlines? - python

I've tried for several hours to make this work. I tried using 'python-gantt' package, without luck. I also tried plotly (which was beautiful, but I can't host my sensitive data on their site, so that won't work).
My starting point is code from here:
How to plot stacked event duration (Gantt Charts) using Python Pandas?
Three Requirements:
Include the 'Name' on the y axis rather than the numbers.
If someone has multiple events, put all the event periods on one line (this will make pattern identification easier), e.g. Lisa will only have one line on the visual.
Include the 'Event' listed on top of the corresponding line (if possible), e.g. Lisa's first line would say "Hire".
The code will need to be dynamic to accommodate many more people and more possible event types...
I'm open to suggestions to visualize: I want to show the duration for various staffing events throughout the year, as to help identify patterns.
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dt
df = pd.DataFrame({'Name': ['Joe','Joe','Lisa','Lisa','Lisa','Alice'],
'Event': ['Hire','Term','Hire','Transfer','Term','Term'],
'Start_Date': ["2014-01-01","2014-02-01","2015-01-01","2015-02-01","2015-03-01","2016-01-01"],
'End_Date': ["2014-01-31","2014-03-15","2015-01-31","2015-02-28","2015-05-01","2016-09-01"]
})
df = df[['Name','Event','Start_Date','End_Date']]
df.Start_Date = pd.to_datetime(df.Start_Date).astype(datetime)
df.End_Date = pd.to_datetime(df.End_Date).astype(datetime)
fig = plt.figure()
ax = fig.add_subplot(111)
ax = ax.xaxis_date()
ax = plt.hlines(df.index, dt.date2num(df.Start_Date), dt.date2num(df.End_Date))

I encountered the same problem in the past. You seem to appreciate the aesthetics of Plotly. Here is a little piece of code which uses matplotlib.pyplot.broken_barh instead of matplotlib.pyplot.hlines.
from collections import defaultdict
from datetime import datetime
from datetime import date
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
df = pd.DataFrame({
'Name': ['Joe', 'Joe', 'Lisa', 'Lisa', 'Lisa', 'Alice'],
'Event': ['Hire', 'Term', 'Hire', 'Transfer', 'Term', 'Term'],
'Start_Date': ['2014-01-01', '2014-02-01', '2015-01-01', '2015-02-01', '2015-03-01', '2016-01-01'],
'End_Date': ['2014-01-31', '2014-03-15', '2015-01-31', '2015-02-28', '2015-05-01', '2016-09-01']
})
df = df[['Name', 'Event', 'Start_Date', 'End_Date']]
df.Start_Date = pd.to_datetime(df.Start_Date).astype(datetime)
df.End_Date = pd.to_datetime(df.End_Date).astype(datetime)
names = df.Name.unique()
nb_names = len(names)
fig = plt.figure()
ax = fig.add_subplot(111)
bar_width = 0.8
default_color = 'blue'
colors_dict = defaultdict(lambda: default_color, Hire='green', Term='red', Transfer='orange')
# Plot the events
for index, name in enumerate(names):
mask = df.Name == name
start_dates = mdates.date2num(df.loc[mask].Start_Date)
end_dates = mdates.date2num(df.loc[mask].End_Date)
durations = end_dates - start_dates
xranges = zip(start_dates, durations)
ymin = index - bar_width / 2.0
ywidth = bar_width
yrange = (ymin, ywidth)
facecolors = [colors_dict[event] for event in df.loc[mask].Event]
ax.broken_barh(xranges, yrange, facecolors=facecolors, alpha=1.0)
# you can set alpha to 0.6 to check if there are some overlaps
# Shrink the x-axis
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# Add the legend
patches = [mpatches.Patch(color=color, label=key) for (key, color) in colors_dict.items()]
patches = patches + [mpatches.Patch(color=default_color, label='Other')]
plt.legend(handles=patches, bbox_to_anchor=(1, 0.5), loc='center left')
# Format the x-ticks
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.xaxis.set_minor_locator(mdates.MonthLocator())
# Format the y-ticks
ax.set_yticks(range(nb_names))
ax.set_yticklabels(names)
# Set the limits
date_min = date(df.Start_Date.min().year, 1, 1)
date_max = date(df.End_Date.max().year + 1, 1, 1)
ax.set_xlim(date_min, date_max)
# Format the coords message box
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
# Set the title
ax.set_title('Gantt Chart')
plt.show()
I hope this will help you.

Related

Matplotlib Plot X-Axis by Month

I am looking to automate some work I have been doing in PowerPoint/Excel using Python and MatPlotLib; however, I am having trouble recreating what I have been doing in PowerPoint/Excel.
I have three data series that are grouped by month on the x-axis; however, the months are not date/time and have no real x-values. I want to be able to assign x-values based on the number of rows (so they are not stacked), then group them by month, and add a vertical line once the month "value" changes.
It is also important to note that the number of rows per month can vary, so im having trouble grouping the months and automatically adding the vertical line once the month data changes to the next month.
Here is a sample image of what I created in PowerPoint/Excel and what I am hoping to accomplish:
Here is what I have so far:
For above: I added a new column to my csv file named "Count" and added that as my x-values; however, that is only a workaround to get my desired "look" and does not separate the points by month.
My code so far:
manipulate.csv
Count,Month,Type,Time
1,June,Purple,13
2,June,Orange,3
3,June,Purple,13
4,June,Orange,12
5,June,Blue,55
6,June,Blue,42
7,June,Blue,90
8,June,Orange,3
9,June,Orange,171
10,June,Blue,132
11,June,Blue,96
12,July,Orange,13
13,July,Orange,13
14,July,Orange,22
15,July,Orange,6
16,July,Purple,4
17,July,Orange,3
18,July,Orange,18
19,July,Blue,99
20,August,Blue,190
21,August,Blue,170
22,August,Orange,33
23,August,Orange,29
24,August,Purple,3
25,August,Purple,9
26,August,Purple,6
testchart.py
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('manipulate.csv')
df=df.reindex(columns=["Month", "Type", "Time", "Count"])
df['Orange'] = df.loc[df['Type'] == 'Orange', 'Time']
df['Blue'] = df.loc[df['Type'] == 'Blue', 'Time']
df['Purple'] = df.loc[df['Type'] == 'Purple', 'Time']
print(df)
w = df['Count']
x = df['Orange']
y = df['Blue']
z = df['Purple']
plt.plot(w, x, linestyle = 'none', marker='o', c='Orange')
plt.plot(w, y, linestyle = 'none', marker='o', c='Blue')
plt.plot(w, z, linestyle = 'none', marker='o', c='Purple')
plt.ylabel("Time")
plt.xlabel("Month")
plt.show()
Can I suggest using Seaborn's swarmplot instead? It might be easier:
import seaborn as sns
import matplotlib.pyplot as plt
# Change the month to an actual date then set the format to just the date's month's name
df.Month = pd.to_datetime(df.Month, format='%B').dt.month_name()
sns.swarmplot(data=df, x='Month', y='Time', hue='Type', palette=['purple', 'orange', 'blue'])
plt.legend().remove()
for x in range(len(df.Month.unique())-1):
plt.axvline(0.5+x, linestyle='--', color='black', alpha = 0.5)
Output Graph:
Or Seaborn's stripplot with some jitter value:
import seaborn as sns
import matplotlib.pyplot as plt
# Change the month to an actual date then set the format to just the date's month's name
df.Month = pd.to_datetime(df.Month, format='%B').dt.month_name()
sns.stripplot(data=df, x='Month', y='Time', hue='Type', palette=['purple', 'orange', 'blue'], jitter=0.4)
plt.legend().remove()
for x in range(len(df.Month.unique())-1):
plt.axvline(0.5+x, linestyle='--', color='black', alpha = 0.5)
If not, this answer will use matplotlib.dates's mdates to format the labels of the xaxis to just the month names. It will also use datetime's timedelta to add some days to each month to split them up (so that they are not overlapped):
from datetime import timedelta
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
df.Month = pd.to_datetime(df.Month, format='%B')
separators = df.Month.unique() # Get each unique month, to be used for the vertical lines
# Add an amount of days to each value within a range of 25 days based on how many days are in each month in the dataframe
# This is just to split up the days so that there is no overlap
dayAdditions = sum([list(range(2,25,int(25/x))) for x in list(df.groupby('Month').count().Time)], [])
df.Month = [x + timedelta(days=count) for x,count in zip(df.Month, dayAdditions)]
df=df.reindex(columns=["Month", "Type", "Time", "Count"])
df['Orange'] = df.loc[df['Type'] == 'Orange', 'Time']
df['Blue'] = df.loc[df['Type'] == 'Blue', 'Time']
df['Purple'] = df.loc[df['Type'] == 'Purple', 'Time']
w = df['Count']
x = df['Orange']
y = df['Blue']
z = df['Purple']
fig, ax = plt.subplots()
plt.plot(df.Month, x, linestyle = 'none', marker='o', c='Orange')
plt.plot(df.Month, y, linestyle = 'none', marker='o', c='Blue')
plt.plot(df.Month, z, linestyle = 'none', marker='o', c='Purple')
plt.ylabel("Time")
plt.xlabel("Month")
ax.xaxis.set_major_locator(mdates.MonthLocator(bymonthday=15)) # Set the locator at the 15th of each month
ax.xaxis.set_major_formatter(mdates.DateFormatter('%B')) # Set the format to just be the month name
for sep in separators[1:]:
plt.axvline(sep, linestyle='--', color='black', alpha = 0.5) # Add a separator at every month starting at the second month
plt.show()
Output:
This is how I put your data in a df, in case anyone else wants to grab it to help answer the question:
from io import StringIO
import pandas as pd
TESTDATA = StringIO(
'''Count,Month,Type,Time
1,June,Purple,13
2,June,Orange,3
3,June,Purple,13
4,June,Orange,12
5,June,Blue,55
6,June,Blue,42
7,June,Blue,90
8,June,Orange,3
9,June,Orange,171
10,June,Blue,132
11,June,Blue,96
12,July,Orange,13
13,July,Orange,13
14,July,Orange,22
15,July,Orange,6
16,July,Purple,4
17,July,Orange,3
18,July,Orange,18
19,July,Blue,99
20,August,Blue,190
21,August,Blue,170
22,August,Orange,33
23,August,Orange,29
24,August,Purple,3
25,August,Purple,9
26,August,Purple,6''')
df = pd.read_csv(TESTDATA, sep = ',')
Maybe add custom x-axis labels and separating lines between months:
new_month = ~df.Month.eq(df.Month.shift(-1))
for c in df[new_month].Count.values[:-1]:
plt.axvline(c + 0.5, linestyle="--", color="gray")
plt.xticks(
(df[new_month].Count + df[new_month].Count.shift(fill_value=0)) / 2,
df[new_month].Month,
)
for color in ["Orange", "Blue", "Purple"]:
plt.plot(
df["Count"],
df[color],
linestyle="none",
marker="o",
color=color.lower(),
label=color,
)
I would also advise that you rename the color columns into something more descriptive and if possible add more time information to your data sample (days, year).

Change "Q" quarterly data to custom "kv" in Matplotlib timeline chart on x axis Python

I have done the following timeline chart in Python. Where the data is in quarterly format by datetimeindex. However, I need to translate the graph into my local language and therefore replace "Q1", "Q2", "Q3", "Q4" with "kv1", "kv2", "kv3", "kv4". Is this possible? So I need the x axsis to be kv3, kv4, kv1 2022, kv2 instead of Q3, Q4, Q1 2022, Q2 and so fourth.
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import pandas
plt.style.use('seaborn-whitegrid')
matplotlib.rcParams['font.sans-serif'] = "Arial"
matplotlib.rcParams['font.family'] = "Arial"
categories = ['Car','Train','Boat', 'Plane', 'Walk' ]
cat_dict = dict(zip(categories, range(1, len(categories)+1)))
val_dict = dict(zip(range(1, len(categories)+1), categories))
dates = pandas.DatetimeIndex(freq='Q', start='2021-09-30', end='2023-12-31')
values = [random.choice(categories) for _ in range(len(dates))]
df = pandas.DataFrame(data=values, index=dates, columns=['category'])
df['plotval'] = [float('NaN'),1,1,3,1,float('NaN'),5,2,1,float('NaN')]
df['plotval'][0] = np.nan
plt.rcParams["figure.figsize"] = 4,3.5
plt.figure(dpi=1000)
fig, ax = plt.subplots()
df['plotval'].plot(ax=ax, style='^',color='darkblue', label = "Renteheving", markersize=12)
ax.margins(0.2)
ax.spines['top'].set_visible(False)
ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, pos: val_dict.get(x)))
plt.yticks( weight = 'bold')
I tried to add
plt.xlabel(["kv1", "kv2", "kv3", "kv4"])
Which gave me
Help is as always highly appreciated.
Try to add this to your code:
# Call draw to populate tick labels
plt.draw()
# Change major labels
new_major_labels = []
for label in ax.get_xticklabels(minor=False):
s = label.get_text()
label.set_text(s.replace('Q', 'kv'))
new_major_labels.append(label)
ax.set_xticklabels(new_major_labels, minor=False)
# Change minor labels
new_minor_labels = []
for label in ax.get_xticklabels(minor=True):
s = label.get_text()
label.set_text(s.replace('Q', 'kv'))
new_minor_labels.append(label)
ax.set_xticklabels(new_minor_labels, minor=True)
It throws a warning which I don't understand, but I think it does what you want.
I could not test it because I can't reproduce your graph, but this should work:
D = {'Q1':'kv1', 'Q2':'kv2', 'Q3':'kv3', 'Q4':'kv4'} # define a dictionnary to replace labels
labels = [i.get_text() for i in ax.get_xticklabels()] # get former labels
labels = [i if i not in D.keys() else D[i] for i in labels] # replace it if in dictionnary
ax.set_xticklabels(labels) # apply the new labels

Wrap xlabels in Seaborn Plot

Have been trying to modify me plot such that the xlabels can be wrapped.
Have looked at few suggestions from similar questions.
But am unable to use them on this.
The ax.set_xticklabels code does not wrap the labels.
The plt.xticks code throws an error -
AttributeError: 'Text' object has no attribute 'expandtabs'
plt.figure(figsize = (7,5))
ax = sns.countplot(data = df3, x = df3.PaymentMethod, hue = df3.Churn)
#ax.set_xticklabels(ax.get_xticklabels(), ha="right", horizontalalignment = 'center', wrap = True)
plt.xticks([textwrap.fill(label, 10) for label in ax.get_xticklabels()],
rotation = 10, fontsize=8, horizontalalignment="center")
Image of plot with overlapping xlabels
textwrap works as expected with the code suggested in the comments:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import seaborn as sns # v 0.11.0
import textwrap
# Create sample dataset
rng = np.random.default_rng(seed=1)
cat_names = ['Short name', 'Slightly longer name', 'Rather much longer name',
'Longest name of them all by far']
counts = rng.integers(10, 100, len(cat_names))
var_cat = np.repeat(cat_names, counts)
var_bool = rng.choice(['True', 'False'], size=len(var_cat))
df = pd.DataFrame(dict(vcat=var_cat, vbool=var_bool))
# Plot seaborn countplot with wrapped tick labels
ax = sns.countplot(data=df, x='vcat', hue='vbool')
labels = [textwrap.fill(label.get_text(), 12) for label in ax.get_xticklabels()]
ax.set_xticklabels(labels);

How to combine two heatmaps in Seaborn in Python so both are shown in the same heatmap?

This is link to the data I'm using:
https://github.com/fivethirtyeight/data/tree/master/drug-use-by-age
I'm using Jupyter Lab, and here's the code:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sb
url = 'https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv'
df = pd.read_csv(url, index_col = 0)
df.dtypes
df.replace('-', np.nan, inplace=True)
df = df.iloc[:,:].astype(float)
df = df.loc[:, df.columns != 'n']
#df.columns = df.columns.str.rstrip('-use')
df
fig, axes = plt.subplots(1,2, figsize=(20, 8))
fig.subplots_adjust(wspace=0.1)
fig.colorbar(ax.collections[0], ax=ax,location="right", use_gridspec=False, pad=0.2)
#plt.figure(figsize=(16, 16))
df_percentage = df.iloc[:,range(0,26,2)]
plot_precentage = sb.heatmap(df_percentage, cmap='Reds', ax=axes[0], cbar_kws={'format': '%.0f%%', 'label': '% used in past 12 months'})
df_frequency = df.iloc[:,range(1,27,2)]
plot_frequency = sb.heatmap(df_frequency, cmap='Blues', ax=axes[1], cbar_kws= dict(label = 'median frequency a user used'))
I can just show two of them in a subplot in separate diagrams.
I want to make it look like this (this is made in paint):
Also show the data side by side. Is there a simple way to achieve that?
A pretty simple solution with mask option:
mask = np.vstack([np.arange(df.shape[1])]* df.shape[0]) % 2
fig, axes = plt.subplots()
plot_precentage = sns.heatmap(df,mask=mask, cmap='Reds', ax=axes,
cbar_kws={'format': '%.0f%%',
'label': '% used in past 12 months'}
)
plot_frequency = sns.heatmap(df, mask=1-mask, cmap='Blues', ax=axes,
cbar_kws= dict(label = 'median frequency a user used')
)
Output:

Skipping certain values in Python with Matplotlib

I am currently working on an intra-day stock chart using the Alpha Vantage API. The data frame contains values from 4:00 to 20:00. In my matplotlib.pyplot chart however, the x-Axis also includes values from 20:00 to 4:00 over night. I dont want this as it messes up the aesthetics and also the Volume subplot.
Q: Is there any way to skip x-Axis values which dont exist in the actual Data Frame (the values from 20:00 to 04:00)?
As you can see, the Data Frame clearly jumps from 20:00 to 04:00
However in the Matplotlib chart, the x-Axis contains the values from 20:00 to 4:00, messing with the chart
Code so far. I believe so far everything is right:
import pandas as pd
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
import time
import datetime as dt
from datetime import timedelta as td
from dateutil.relativedelta import relativedelta
#Accessing and Preparing API
ts = TimeSeries(key=api_key, output_format='pandas')
ticker_input = "TSLA"
interval_input = "15min"
df, meta_data = ts.get_intraday(symbol = ticker_input, interval = interval_input, outputsize = 'full')
slice_date = 16*4*5
df = df[0:slice_date]
df = df.iloc[::-1]
df["100ma"] = df["4. close"].rolling(window = 50, min_periods = 0).mean()
df["Close"] = df["4. close"]
df["Date"] = df.index
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df["Date"], df['Close'])
ax1.plot(df["Date"], df["100ma"], linewidth = 0.5)
plt.xticks(rotation=45)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df["Date"], df["5. volume"])
ax2.axes.xaxis.set_visible(False)
plt.tight_layout()
plt.show()
It would be great if anybody could help. Im still a complete beginner and only started Python 2 weeks ago.
We got the data from the same place, although the data acquisition method is different. After extracting it in 15 units, I created a graph by excluding the data after 8pm and before 4pm. I created the code with the understanding that your skip would open up the pause. What you want it to skip is skipped once the NaN is set.
import datetime
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import mplfinance as mpf
# import matplotlib.pyplot as plt
with open('./alpha_vantage_api_key.txt') as f:
api_key = f.read()
now_ = datetime.datetime.today()
start = datetime.datetime(2019, 1, 1)
end = datetime.datetime(now_.year, now_.month, now_.day - 1)
symbol = 'TSLA'
df = web.DataReader(symbol, 'av-intraday', start, end, api_key=api_key)
df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
df.index = pd.to_datetime(df.index)
df["100ma"] = df["Close"].rolling(window = 50, min_periods = 0).mean()
df["Date"] = df.index
df_15 = df.asfreq('15min')
df_15 = df_15[(df_15.index.hour >= 4)&(df_15.index.hour <= 20) ]
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,4.5),dpi=144)
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df_15["Date"], df_15['Close'])
ax1.plot(df_15["Date"], df_15["100ma"], linewidth = 0.5)
plt.xticks(rotation=20)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df_15["Date"], df_15["Volume"])
ax2.axes.xaxis.set_visible(False)
# plt.tight_layout()
plt.show()
I fixed it using matplotlib.ticker.formatter.
I first created a class and using:
class MyFormatter(Formatter):
def __init__(self, dates, fmt='%Y-%m-%d %H:%M'):
self.dates = dates
self.fmt = fmt
def __call__(self, x, pos=0):
'Return the label for time x at position pos'
ind = int(np.round(x))
if ind >= len(self.dates) or ind < 0:
return ''
return self.dates[ind].strftime(self.fmt)
formatter = MyFormatter(df.index)
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.xaxis.set_major_formatter(formatter)
ax1.plot(np.arange(len(df)), df["Close"])
ax1.plot(np.arange(len(df)), df["100ma"], linewidth = 0.5)
ax1.xticks(rotation=45)
ax1.axis([xmin,xmax,ymin,ymax])
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(np.arange(len(df)), df["5. volume"])
plt.show()
This gave me a smoother graph than the one before and also that recommended by r-beginner.
The only issue that I have is that if I zoom in the x-axis doesnt really change. it always has teh year, month, date, hour, and minute. Obviously I only want hour and minute when Im zoomed in further. I am yet to figure out how to do that

Categories