Flatten broken horizontal bar chart to line graph or heatmap - python

I have data for all the time I've spent coding. This data is represented as a dictionary where the key is the date and the value is a list of tuples containing the time I started a coding session and how long the coding session lasted.
I have successfully plotted this on a broken_barh using the below code, where the y-axis is the date, the x-axis is the time in that day and each broken bar is an individual session.
for i,subSessions in enumerate(sessions.values()):
plt.broken_barh(subSessions, (i,1))
months = {}
start = getStartMonth()
for month in period_range(start=start,end=datetime.today(),freq="M"):
month = str(month)
months[month] = (datetime.strptime(month,'%Y-%m')-start).days
plt.yticks(list(months.values()),months.keys())
plt.xticks(range(0,24*3600,3600),[str(i)+":00" for i in range(24)],rotation=45)
plt.gca().invert_yaxis()
plt.show()
I want to use this data to discover what times of the day I spend the most time coding, but it isn't very clear from the above chart so I'd like to display it as a line graph or heatmap where the y-axis is the number of days I spent coding at the time on the x-axis (or, in other words, how many sessions are present in that column of the above chart). How do I accomplish this?

You can find some great examples of how to create a heatmap from matplotlib website.
Here is a basic code with some random data:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
index_labels = np.arange(0,24)
column_labels = pd.date_range(start='1/1/2022', end='1/31/2022').strftime('%m/%d')
#random data
np.random.seed(12345)
data = np.random.randint(0,60, size=(len(index_labels), len(column_labels)))
df = pd.DataFrame(data=data, columns=column_labels, index=index_labels)
#heatmap function
def heatmap(df, ax, cbarlabel="", cmap="Greens", label_num_dec_place=0):
df = df.copy()
# Ploting a blank heatmap
im = ax.imshow(df.values, cmap)
# create a customized colorbar
cbar = ax.figure.colorbar(im, ax=ax, fraction=0.05, extend='both', extendfrac=0.05)
cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom", fontsize=14)
# Setting ticks
ax.set_xticks(np.arange(df.shape[1]), labels=df.columns, fontsize=12)
ax.set_yticks(np.arange(df.shape[0]), labels=list(df.index), fontsize=12)
# proper placement of ticks
ax.tick_params(axis='x', top=True, bottom=False,
labeltop=True, labelbottom=False)
ax.spines[:].set_visible(False)
ax.grid(which="both", visible="False", color="white", linestyle='solid', linewidth=2)
ax.grid(False)
# Rotation of tick labels
plt.setp(ax.get_xticklabels(), rotation=-60,
ha="right", rotation_mode=None)
plt.setp(ax.get_yticklabels(), rotation=30)
#plotting and saving
fig, ax = plt.subplots(facecolor=(1,1,1), figsize=(20,8), dpi=200)
heatmap(df=df, ax=ax, cbarlabel="time (min)", cmap="Greens", label_num_dec_place=0)
plt.savefig('time_heatmap.png',
bbox_inches='tight',
facecolor=fig.get_facecolor(),
transparent=True,
)
Output:

One way to do it is to use sampling. Choose how many samples you want to take in a given interval (the precision, for example 288 samples per day) and split each interval by that number of samples and count how many sessions are within this sample. The downside to this is that it can't be 100% precise and increasing the precision increases the time it takes to generate (for me, it takes several minutes to generate a second-precise image, though this level of precision makes little to no difference to the result).
Here is some code which can produce both a heatmap and a line graph
# Configuration options
precisionPerDay = 288
timeTicksPerDay = 24
timeTickRotation = 60
timeTickFontSize = 6
heatmap = True
# Constants
hoursInDay = 24
secondsInHour = 3600
secondsInDay = hoursInDay*secondsInHour
xInterval = secondsInDay/precisionPerDay
timeTickSecondInterval = precisionPerDay/timeTicksPerDay
timeTickHourInterval = hoursInDay/timeTicksPerDay
# Calculating x-axis (time) ticks
xAxis = range(precisionPerDay)
timeTickLabels = []
timeTickLocations = []
for timeTick in range(timeTicksPerDay):
timeTickLocations.append(int(timeTick*timeTickSecondInterval))
hours = timeTick/timeTicksPerDay*hoursInDay
hour = int(hours)
minute = int((hours-hour)*60)
timeTickLabels.append(f"{hour:02d}:{minute:02d}")
# Calculating y-axis (height)
heights = []
for dayX in xAxis:
rangeStart = dayX*xInterval
rangeEnd = rangeStart+xInterval
y = 0
for date,sessions in sessions.items():
for session in sessions:
if session[0] < rangeEnd and session[0]+session[1] > rangeStart:
y += 1
heights.append(y)
# Plotting data
if heatmap:
plt.yticks([])
plt.imshow([heights], aspect="auto")
else:
plt.plot(xAxis,heights)
plt.ylim(ymin=0)
plt.xlim(xmin=0,xmax=len(heights))
plt.xlabel("Time of day")
plt.ylabel("How often I've coded at that time")
plt.xticks(timeTickLocations,timeTickLabels,
fontsize=timeTickFontSize,rotation=timeTickRotation)
plt.show()
And here are some sample results
Graph produced by same configuration options shown in above code
Same data but as a line graph with a lower precision (24 per day) and more time ticks (48)

Related

How to set xlim in seaborn barplot?

I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.
Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Change matplotlib subplots to seperate plots

I have gathered a code to make plots from data from multiple days. I have a data file containing over 40 days and 19k timestamps, and I need a plot, one for each day. I want python to generate them as different plots.
Mr. T helped me a lot with providing the code, but I cannot manage the code to get it to plot individual plots instead of all in one subplot. Can somebody help me with this?
Picture shows the current output:
My code:
import matplotlib.pyplot as plt
import numpy as np
#read your data and create datetime index
df= pd.read_csv('test-februari.csv', sep=";")
df.index = pd.to_datetime(df["Date"]+df["Time"].str[:-5], format="%Y:%m:%d %H:%M:%S")
#group by date and hour, count entries
dfcounts = df.groupby([df.index.date, df.index.hour]).size().reset_index()
dfcounts.columns = ["Date", "Hour", "Count"]
maxcount = dfcounts.Count.max()
#group by date for plotting
dfplot = dfcounts.groupby(dfcounts.Date)
#plot each day into its own subplot
fig, axs = plt.subplots(dfplot.ngroups, figsize=(6,8))
for i, groupdate in enumerate(dfplot.groups):
ax=axs[i]
#the marker is not really necessary but has been added in case there is just one entry per day
ax.plot(dfplot.get_group(groupdate).Hour, dfplot.get_group(groupdate).Count, color="blue", marker="o")
ax.set_title(str(groupdate))
ax.set_xlim(0, 24)
ax.set_ylim(0, maxcount * 1.1)
ax.xaxis.set_ticks(np.arange(0, 25, 2))
plt.tight_layout()
plt.show()
Welcome to the Stackoverflow.
Instead of creating multiple subplots, you can create a figure on the fly and plot onto it in every loop separately. And at the end show all of them at the same time.
for groupdate in dfplot.groups:
fig = plt.figure()
plt.plot(groupdate.Hour, groupdate.Count, color="blue", marker="o")
plt.title(str(groupdate))
plt.xlim(0, 24)
plt.ylim(0, maxcount * 1.1)
plt.xticks(np.arange(0, 25, 2))
plt.tight_layout()
plt.show()

How to add monthly labels to x-axis using matplotlib?

For an assignment I need to plot record (min and max) temperatures over the period 2004-2014 using matplotlib. The figure is almost complete (see below) except for the x axis labelling. When plotting, I did not specify the x-axis value so it generated integers from 0-365, thus the number of days in a year. Now I want the months to appear as x-axis labels instead of integers (Jan, Feb, etc.). Can someone help me out?
Record low and high temperatures:
I generated source data as follows:
np.random.seed(13)
dates = pd.date_range(start='2014-01-01', end='2014-12-31')
temp = pd.DataFrame({'tMin': np.random.normal(0, 0.5, dates.size).cumsum() - 10,
'tMax': np.random.normal(0, 0.5, dates.size).cumsum() + 10}, index=dates)
To get the picture with month labels, try the following code:
# Imports
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Drawing
fig, ax = plt.subplots(figsize=(10, 4))
plt.xlabel('Month')
plt.ylabel('Temp')
plt.title('Temperatures 2014')
ax.xaxis.set_major_locator(mdates.MonthLocator())
fmt = mdates.DateFormatter('%b %Y')
ax.xaxis.set_major_formatter(fmt)
ax.plot(temp.tMin)
ax.plot(temp.tMax)
ax.fill_between(temp.index, temp.tMin, temp.tMax, color='#A0E0A0', alpha=0.2)
plt.setp(ax.get_xticklabels(), rotation=30);
For the above source data I got the following picture:

Is it possible to generate a chart with this very specific background?

I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()
OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()

Categories