Is it possible to generate a chart with this very specific background? - python

I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()

OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()

Related

How to set xlim in seaborn barplot?

I have created a barplot for given days of the year and the number of people born on this given day (figure a). I want to set the x-axes in my seaborn barplot to xlim = (0,365) to show the whole year.
But, once I use ax.set_xlim(0,365) the bar plot is simply moved to the left (figure b).
This is the code:
#data
df = pd.DataFrame()
df['day'] = np.arange(41,200)
df['born'] = np.random.randn(159)*100
#plot
f, axes = plt.subplots(4, 4, figsize = (12,12))
ax = sns.barplot(df.day, df.born, data = df, hue = df.time, ax = axes[0,0], color = 'skyblue')
ax.get_xaxis().set_label_text('')
ax.set_xticklabels('')
ax.set_yscale('log')
ax.set_ylim(0,10e3)
ax.set_xlim(0,366)
ax.set_title('SE Africa')
How can I set the x-axes limits to day 0 and 365 without the bars being shifted to the left?
IIUC, the expected output given the nature of data is difficult to obtain straightforwardly, because, as per the documentation of seaborn.barplot:
This function always treats one of the variables as categorical and draws data at ordinal positions (0, 1, … n) on the relevant axis, even when the data has a numeric or date type.
This means the function seaborn.barplot creates categories based on the data in x (here, df.day) and they are linked to integers, starting from 0.
Therefore, it means even if we have data from day 41 onwards, seaborn is going to refer the starting category with x = 0, making for us difficult to tweak the lower limit of x-axis post function call.
The following code and corresponding plot clarifies what I explained above:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# data
rng = np.random.default_rng(101)
day = np.arange(41,200)
born = rng.integers(low=0, high=10e4, size=200-41)
df = pd.DataFrame({"day":day, "born":born})
# plot
f, ax = plt.subplots(figsize=(4, 4))
sns.barplot(data=df, x='day', y='born', ax=ax, color='b')
ax.set_xlim(0,365)
ax.set_xticks(ticks=np.arange(0, 365, 30), labels=np.arange(0, 365, 30))
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()
I suggest using matplotlib.axes.Axes.bar to overcome this issue, although handling colors of the bars would be not straightforward compared to sns.barplot(..., hue=..., ...) :
# plot
f, ax = plt.subplots(figsize=(4, 4))
ax.bar(x=df.day, height=df.born) # instead of sns.barplot
ax.get_xaxis().set_label_text('')
ax.set_xlim(0,365)
ax.set_yscale('log')
ax.set_title('SE Africa')
plt.tight_layout()
plt.show()

Flatten broken horizontal bar chart to line graph or heatmap

I have data for all the time I've spent coding. This data is represented as a dictionary where the key is the date and the value is a list of tuples containing the time I started a coding session and how long the coding session lasted.
I have successfully plotted this on a broken_barh using the below code, where the y-axis is the date, the x-axis is the time in that day and each broken bar is an individual session.
for i,subSessions in enumerate(sessions.values()):
plt.broken_barh(subSessions, (i,1))
months = {}
start = getStartMonth()
for month in period_range(start=start,end=datetime.today(),freq="M"):
month = str(month)
months[month] = (datetime.strptime(month,'%Y-%m')-start).days
plt.yticks(list(months.values()),months.keys())
plt.xticks(range(0,24*3600,3600),[str(i)+":00" for i in range(24)],rotation=45)
plt.gca().invert_yaxis()
plt.show()
I want to use this data to discover what times of the day I spend the most time coding, but it isn't very clear from the above chart so I'd like to display it as a line graph or heatmap where the y-axis is the number of days I spent coding at the time on the x-axis (or, in other words, how many sessions are present in that column of the above chart). How do I accomplish this?
You can find some great examples of how to create a heatmap from matplotlib website.
Here is a basic code with some random data:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
index_labels = np.arange(0,24)
column_labels = pd.date_range(start='1/1/2022', end='1/31/2022').strftime('%m/%d')
#random data
np.random.seed(12345)
data = np.random.randint(0,60, size=(len(index_labels), len(column_labels)))
df = pd.DataFrame(data=data, columns=column_labels, index=index_labels)
#heatmap function
def heatmap(df, ax, cbarlabel="", cmap="Greens", label_num_dec_place=0):
df = df.copy()
# Ploting a blank heatmap
im = ax.imshow(df.values, cmap)
# create a customized colorbar
cbar = ax.figure.colorbar(im, ax=ax, fraction=0.05, extend='both', extendfrac=0.05)
cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom", fontsize=14)
# Setting ticks
ax.set_xticks(np.arange(df.shape[1]), labels=df.columns, fontsize=12)
ax.set_yticks(np.arange(df.shape[0]), labels=list(df.index), fontsize=12)
# proper placement of ticks
ax.tick_params(axis='x', top=True, bottom=False,
labeltop=True, labelbottom=False)
ax.spines[:].set_visible(False)
ax.grid(which="both", visible="False", color="white", linestyle='solid', linewidth=2)
ax.grid(False)
# Rotation of tick labels
plt.setp(ax.get_xticklabels(), rotation=-60,
ha="right", rotation_mode=None)
plt.setp(ax.get_yticklabels(), rotation=30)
#plotting and saving
fig, ax = plt.subplots(facecolor=(1,1,1), figsize=(20,8), dpi=200)
heatmap(df=df, ax=ax, cbarlabel="time (min)", cmap="Greens", label_num_dec_place=0)
plt.savefig('time_heatmap.png',
bbox_inches='tight',
facecolor=fig.get_facecolor(),
transparent=True,
)
Output:
One way to do it is to use sampling. Choose how many samples you want to take in a given interval (the precision, for example 288 samples per day) and split each interval by that number of samples and count how many sessions are within this sample. The downside to this is that it can't be 100% precise and increasing the precision increases the time it takes to generate (for me, it takes several minutes to generate a second-precise image, though this level of precision makes little to no difference to the result).
Here is some code which can produce both a heatmap and a line graph
# Configuration options
precisionPerDay = 288
timeTicksPerDay = 24
timeTickRotation = 60
timeTickFontSize = 6
heatmap = True
# Constants
hoursInDay = 24
secondsInHour = 3600
secondsInDay = hoursInDay*secondsInHour
xInterval = secondsInDay/precisionPerDay
timeTickSecondInterval = precisionPerDay/timeTicksPerDay
timeTickHourInterval = hoursInDay/timeTicksPerDay
# Calculating x-axis (time) ticks
xAxis = range(precisionPerDay)
timeTickLabels = []
timeTickLocations = []
for timeTick in range(timeTicksPerDay):
timeTickLocations.append(int(timeTick*timeTickSecondInterval))
hours = timeTick/timeTicksPerDay*hoursInDay
hour = int(hours)
minute = int((hours-hour)*60)
timeTickLabels.append(f"{hour:02d}:{minute:02d}")
# Calculating y-axis (height)
heights = []
for dayX in xAxis:
rangeStart = dayX*xInterval
rangeEnd = rangeStart+xInterval
y = 0
for date,sessions in sessions.items():
for session in sessions:
if session[0] < rangeEnd and session[0]+session[1] > rangeStart:
y += 1
heights.append(y)
# Plotting data
if heatmap:
plt.yticks([])
plt.imshow([heights], aspect="auto")
else:
plt.plot(xAxis,heights)
plt.ylim(ymin=0)
plt.xlim(xmin=0,xmax=len(heights))
plt.xlabel("Time of day")
plt.ylabel("How often I've coded at that time")
plt.xticks(timeTickLocations,timeTickLabels,
fontsize=timeTickFontSize,rotation=timeTickRotation)
plt.show()
And here are some sample results
Graph produced by same configuration options shown in above code
Same data but as a line graph with a lower precision (24 per day) and more time ticks (48)

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.
Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Python matplotlib stepped axis label

I am using matplotlib to generate an image in a tkinter application. The data provides a value stored against a time (real app data is open sessions by second to show load). I am trying to display this with time across the x axis, and sessions up the y axis.
I am having some difficulty however with formatting the labels on the x axis. If you see image Test 1, if i assign integers to the x axis, then although data is 0-19, matplotlib automatically displays 0,5,10,15, to keep the axis tidy.
If I just assign labels to this, then the first 4 labels are shown rather than every 5th label. If I reset the tciks and labels, I get every tick and every label (which just about fits here but in my real app theres too much data).
My solution has been to manually calculate every Nth tick and Nth label and assign those values which has worked but seems like there should be some built in functionality that handles this sort of data, the same way as it does for integers.
My code is:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import (FigureCanvasTkAgg)
import datetime
import tkinter
def main():
x_labels = []
x = []
y = []
dt_now = datetime.datetime.now()
entries = 20
for i in range(0, entries):
newtime=(dt_now + datetime.timedelta(seconds=i)).strftime("%H:%M:%S")
x.append(i)
y.append(i/2)
x_labels.append(newtime)
root = tkinter.Tk()
fig = plt.figure(figsize=(8,8))
canvas = FigureCanvasTkAgg(fig, master=root)
canvas.draw()
ax1 = fig.add_subplot(221)
ax1.plot(x,y)
ax1.set_title('Test 1 - Original Values')
ax1.set_xlabel('Entry ID')
ax1.set_ylabel('Sessions')
ax2 = fig.add_subplot(222)
ax2.plot(x,y)
ax2.set_title('Test 2 - Labels 0,1,2,3\nExpecting labels 0,5,10,15')
ax2.set_xlabel('Entry Time')
ax2.set_ylabel('Sessions')
ax2.set_xticklabels(x_labels, rotation=90, ha='center')
ax3 = fig.add_subplot(223)
ax3.plot(x_labels,y)
ax3.set_title('Test 3 - Every label')
ax3.set_xlabel('Entry Time')
ax3.set_ylabel('Sessions')
ax3.set_xticklabels(x_labels, rotation = 90)
major_ticks = []
major_tick_labels = []
for i in range(0,entries,int(entries/5)):
major_ticks.append(x[i])
major_tick_labels.append(x_labels[i])
ax4 = fig.add_subplot(224)
ax4.plot(x,y)
ax4.set_title('Test 4 - What I''m expecting\nbut hard coded')
ax4.set_xlabel('Entry Time')
ax4.set_ylabel('Sessions')
ax4.set_xticks(major_ticks)
ax4.set_xticklabels(major_tick_labels, rotation=90, ha='center')
plt.subplots_adjust(hspace = 0.75, bottom = 0.2)
canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)
tkinter.mainloop()
if __name__ == '__main__':
main()
Which generates the following:
Is this the only way to achieve the requirement or is there something available that I am missing. I have read the documentation, but couldn't see anything relevant in there (past making sure I set ticks and labels together). I'd like the process to be as automated as possible, as the time frame will be user driven, so may be best with 4 ticks, 5 ticks 10 ticks, etc. Matplotlib seems to handle this requirement well the a defined range of integers, I just want to associate the same labels.
You could use matplotlib.dates. Note that you need to keep the x_labels formatted as datetimes for this to work (I removed your call to .strftime)
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
x_labels = []
x = []
y = []
dt_now = datetime.datetime.now()
entries = 20
for i in range(0, entries):
newtime=(dt_now + datetime.timedelta(seconds=i))
x.append(i)
y.append(i/2)
x_labels.append(newtime)
fig = plt.figure(figsize=(8,8))
ax4 = fig.add_subplot(224)
ax4.plot(x_labels,y)
ax4.set_title('One possible solution')
ax4.set_xlabel('Entry Time')
ax4.set_ylabel('Sessions')
ax4.xaxis.set_major_locator(mdates.SecondLocator(interval=4))
ax4.tick_params(axis="x", rotation=90)
This will give you

Adjusting x-axis in matplotlib

I have a range of values for every hour of year. Which means there are 24 x 365 = 8760 values. I want to plot this information neatly with matplotlib, with x-axis showing January, February......
Here is my current code:
from matplotlib import pyplot as plt
plt.plot(x_data,y_data,label=str("Plot"))
plt.xticks(rotation=45)
plt.xlabel("Time")
plt.ylabel("Y axis values")
plt.title("Y axis values vs Time")
plt.legend(loc='upper right')
axes = plt.gca()
axes.set_ylim([0,some_value * 3])
plt.show()
x_data is a list containing dates in datetime format. y_data contains values corresponding to the values in x_data. How can I get the plot neatly done with months on the X axis? An example:
You could create a scatter plot with horizontal lines as markers. The month is extracted by using the datetime module. In case the dates are not ordered, the plot sorts both lists first according to the date:
#creating a toy dataset for one year, random data points within month-specific limits
from datetime import date, timedelta
import random
x_data = [date(2017, 1, 1) + timedelta(days = i) for i in range(365)]
random.shuffle(x_data)
y_data = [random.randint(50 * (i.month - 1), 50 * i.month) for i in x_data]
#the actual plot starts here
from matplotlib import pyplot as plt
#get a scatter plot with horizontal markers for each data point
#in case the dates are not ordered, sort first the dates and the y values accordingly
plt.scatter([day.strftime("%b") for day in sorted(x_data)], [y for _xsorted, y in sorted(zip(x_data, y_data))], marker = "_", s = 900)
plt.show()
Output
The disadvantage is obviously that the lines have a fixed length. Also, if a month doesn't have a data point, it will not appear in the graph.
Edit 1:
You could also use Axes.hlines, as seen here.
This has the advantage, that the line length changes with the window size. And you don't have to pre-sort the lists, because each start and end point is calculated separately.
The toy dataset is created as above.
from matplotlib import pyplot as plt
#prepare the axis with categories Jan to Dec
x_ax = [date(2017, 1, 1) + timedelta(days = 31 * i) for i in range(12)]
#create invisible bar chart to retrieve start and end points from automatically generated bars
Bars = plt.bar([month.strftime("%b") for month in x_ax], [month.month for month in x_ax], align = "center", alpha = 0)
start_1_12 = [plt.getp(item, "x") for item in Bars]
end_1_12 = [plt.getp(item, "x") + plt.getp(item, "width") for item in Bars]
#retrieve start and end point for each data point line according to its month
x_start = [start_1_12[day.month - 1] for day in x_data]
x_end = [end_1_12[day.month - 1] for day in x_data]
#plot hlines for all data points
plt.hlines(y_data, x_start, x_end, colors = "blue")
plt.show()
Output
Edit 2:
Now your description of the problem is totally different from what you show in your question. You want a simple line plot with specific axis formatting. This can be found easily in the matplotlib documentation and all over SO. An example, how to achieve this with the above created toy dataset would be:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, MonthLocator
ax = plt.subplot(111)
ax.plot([day for day in sorted(x_data)], [y for _xsorted, y in sorted(zip(x_data, y_data))], "r.-")
ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
ax.xaxis.set_minor_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter("%B"))
plt.show()
Output

Categories