divide x and y labels in Matplotlib - python

I have a graph with X as a date and Y as some readings. the X axis has a date interval with an increment of one day. what i want is to show the hours on the x axis between two days(just to set the hours in the yellow area in the graph).
The idea of the code is:
Date=[];Readings=[] # will be filled from another function
dateconv=np.vectorize(datetime.fromtimestamp)
Date_F=dateconv(Date)
ax1 = plt.subplot2grid((1,1), (0,0))
ax1.plot_date(Date_F,Readings,'-')
for label in ax1.xaxis.get_ticklabels():
label.set_rotation(45)
ax1.grid(True)
plt.xlabel('Date')
plt.ylabel('Readings')
ax1.set_yticks(range(0,800,50))
plt.legend()
plt.show()

You can use MultipleLocator from matplotlib.ticker with set_major_locator and set_minor_locator. See example.
Example
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import datetime
# Generate some data
d = datetime.timedelta(hours=1/5)
now = datetime.datetime.now()
times = [now + d * j for j in range(250)]
ax = plt.gca() # get the current axes
ax.plot(times, range(len(times)))
for label in ax.xaxis.get_ticklabels():
label.set_rotation(30)
# Set the positions of the major and minor ticks
dayLocator = MultipleLocator(1)
hourLocator = MultipleLocator(1/24)
ax.xaxis.set_major_locator(dayLocator)
ax.xaxis.set_minor_locator(hourLocator)
# Convert the labels to the Y-m-d format
xax = ax.get_xaxis() # get the x-axis
adf = xax.get_major_formatter() # the the auto-formatter
adf.scaled[1/24] = '%Y-%m-%d' # set the < 1d scale to Y-m-d
adf.scaled[1.0] = '%Y-%m-%d' # set the > 1d < 1m scale to Y-m-d
plt.show()
Result

Related

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.
Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Cannot prepare proper labels in Matplotlib

I have very simple code:
from matplotlib import dates
import matplotlib.ticker as ticker
my_plot=df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90)
I've got:
but I would like to have fewer labels on X axis. To do this I've add:
my_plot.xaxis.set_major_locator(ticker.MaxNLocator(12))
It generates fewer labels but values of labels have wrong values (=first of few labels from whole list)
What am I doing wrong?
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' (YYYY-MM)
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
You were almost there. Just set ticker.MultipleLocator.
The pandas.DataFrame.boxplot also returns axes, which is an object of class matplotlib.axes.Axes. So you can use this code snippet to customize your labels:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
center = np.random.randint(50,size=(10, 20))
spread = np.random.rand(10, 20) * 30
flier_high = np.random.rand(10, 20) * 30 + 30
flier_low = np.random.rand(10, 20) * -30
y = np.concatenate((spread, center, flier_high, flier_low))
fig, ax = plt.subplots(figsize=(10, 5))
ax.boxplot(y)
x = ['Label '+str(i) for i in range(20)]
ax.set_xticklabels(x)
ax.set_xlabel('Day')
# Set a tick on each integer multiple of a base within the view interval.
ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xticks(rotation=90)
I think there is a compatibility issue with Pandas plots and Matplotlib formatters.
With the following code:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
You get this chart:
But I notice that this is grouped by month instead of by day. It may not be what you wanted.
Adding the day component to the string 'Day' messes up the chart as there seems to be too many boxes.
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 15 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
The for loop creates the tick labels every as many periods as desired. In the first chart they were set every 3 months. In the second one, every 15 days.
If you would like to see less grid lines:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
new_locs = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
loc = locs[i]
i += 1
new_labels.append(label)
new_locs.append(loc)
else:
i += 1
ax.set_xticks(new_locs)
ax.set_xticklabels(new_labels)
ax.grid(axis='y')
plt.show()
I've read about x_compat in Pandas plot in order to apply Matplotlib formatters, but I get an error when trying to apply it. I'll give it another shot later.
Old unsuccesful answer
The tick labels seem to be dates. If they are set as datetime in your dataframe, you can:
months = mdates.MonthLocator(1,4,7,10) #Choose the months you like the most
ax.xaxis.set_major_locator(months)
Otherwise, you can let Matplotlib know they are dates by:
ax.xaxis_date()
Your comment:
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' *(YYYY-MM) *
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
Based on your comment in italic above, I would use reg_Date as the input and the following lines:
days = mdates.DayLocator(interval=3)
daysFmt = mdates.DateFormatter('%Y-%m') #to format display
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(daysFmt)
I forgot to mention that you will need to:
import matplotlib.dates as mdates
Does this work?

Adjusting x-axis in matplotlib

I have a range of values for every hour of year. Which means there are 24 x 365 = 8760 values. I want to plot this information neatly with matplotlib, with x-axis showing January, February......
Here is my current code:
from matplotlib import pyplot as plt
plt.plot(x_data,y_data,label=str("Plot"))
plt.xticks(rotation=45)
plt.xlabel("Time")
plt.ylabel("Y axis values")
plt.title("Y axis values vs Time")
plt.legend(loc='upper right')
axes = plt.gca()
axes.set_ylim([0,some_value * 3])
plt.show()
x_data is a list containing dates in datetime format. y_data contains values corresponding to the values in x_data. How can I get the plot neatly done with months on the X axis? An example:
You could create a scatter plot with horizontal lines as markers. The month is extracted by using the datetime module. In case the dates are not ordered, the plot sorts both lists first according to the date:
#creating a toy dataset for one year, random data points within month-specific limits
from datetime import date, timedelta
import random
x_data = [date(2017, 1, 1) + timedelta(days = i) for i in range(365)]
random.shuffle(x_data)
y_data = [random.randint(50 * (i.month - 1), 50 * i.month) for i in x_data]
#the actual plot starts here
from matplotlib import pyplot as plt
#get a scatter plot with horizontal markers for each data point
#in case the dates are not ordered, sort first the dates and the y values accordingly
plt.scatter([day.strftime("%b") for day in sorted(x_data)], [y for _xsorted, y in sorted(zip(x_data, y_data))], marker = "_", s = 900)
plt.show()
Output
The disadvantage is obviously that the lines have a fixed length. Also, if a month doesn't have a data point, it will not appear in the graph.
Edit 1:
You could also use Axes.hlines, as seen here.
This has the advantage, that the line length changes with the window size. And you don't have to pre-sort the lists, because each start and end point is calculated separately.
The toy dataset is created as above.
from matplotlib import pyplot as plt
#prepare the axis with categories Jan to Dec
x_ax = [date(2017, 1, 1) + timedelta(days = 31 * i) for i in range(12)]
#create invisible bar chart to retrieve start and end points from automatically generated bars
Bars = plt.bar([month.strftime("%b") for month in x_ax], [month.month for month in x_ax], align = "center", alpha = 0)
start_1_12 = [plt.getp(item, "x") for item in Bars]
end_1_12 = [plt.getp(item, "x") + plt.getp(item, "width") for item in Bars]
#retrieve start and end point for each data point line according to its month
x_start = [start_1_12[day.month - 1] for day in x_data]
x_end = [end_1_12[day.month - 1] for day in x_data]
#plot hlines for all data points
plt.hlines(y_data, x_start, x_end, colors = "blue")
plt.show()
Output
Edit 2:
Now your description of the problem is totally different from what you show in your question. You want a simple line plot with specific axis formatting. This can be found easily in the matplotlib documentation and all over SO. An example, how to achieve this with the above created toy dataset would be:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, MonthLocator
ax = plt.subplot(111)
ax.plot([day for day in sorted(x_data)], [y for _xsorted, y in sorted(zip(x_data, y_data))], "r.-")
ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
ax.xaxis.set_minor_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter("%B"))
plt.show()
Output

matplot lib y-axis interval with dollar sign

I have the following code to generate the a chart showing year in the x-axis, and the dollar amount in the y-axis. How can I format my y-axis to show intervals like $4,000,000, $8,000,000, $12,000,000...
Right now, the y-axis are showing 0.2, 0.4, and 1e7 on the left-top.
from matplotlib import pyplot as plt
...
plt.figure(figsize=(8,4))
plt.plot(x_values, y_values)
plt.ylabel('Amount')
...
plt.savefig(img_path)
Matlab Example
% Years on x axis
x = 2010:2016;
% Dollar amount on y axis
y = linspace(4000000,8000000,length(x));
% Plot and save the tick values that Matlab generates
plot(x,y);
yTicks = get(gca,'YTick');
% Turn ticks into non-exponential values
a = textscan(num2str(yTicks),'%f');
% Format into strings representing dollar amount with delimiter etc.
newYTickLabels = Sep1000Str(a{1});
% Set your new ticklabels
set(gca,'YTickLabel',newYTickLabels)
Where Sep1000Str() is:
function output = Sep1000Str(a)
n = length(a);
for k = 1:n
S = sprintf('$%.2f', a(k));
S(2, length(S) - 6:-3:2) = ',';
S = {transpose(S(S ~= char(0)))};
output(k) = S;
end
You could use the matplotlib FormatStrFormatter
x_values = [2011,2012,2013,2014,2015,2016,2017] #list of years
y_values = np.linspace(4e6,10e6,len(x_values)) #y_values with same size as x
fig, ax = plt.subplots(figsize=(8,4))
ax.plot(x_values, y_values)
formatter = ticker.FormatStrFormatter('$%0.1f') #declaring the formatter with the $ sign and y_values with 1 decimalplace
ax.yaxis.set_major_formatter(formatter)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_visible(True) #make your yvalues visible on the plot
plt.show()

Is it possible to generate a chart with this very specific background?

I need to create a chart, that has a grid like in the following picture.
The key factors being:
The x-axis is time with each tick marking 30 seconds
y-axes labels in the chart repeat at a variable interval
Chart must grow with the amount of data (i.e. for 30 minutes of data, it should be 60 boxes wide)
I have been looking into matplotlib for a bit, and it seems promising. I also managed to fill the chart with data. See my result for 40 Minutes of data.
But before I invest more time into research, I must know if this goal is even possible. If not I'll have to look into other charts. Thanks for your help!
Here is the source for the above image (my_data is actually read from a csv, but filled with random junk here):
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(50, 200), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
plt.ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2) # second plot
plt.ylim(0, 100)
actg.xaxis.set_minor_locator(dates.MinuteLocator())
actg.xaxis.set_major_formatter(hfmt)
atoco.xaxis.set_minor_locator(dates.MinuteLocator())
atoco.xaxis.set_major_formatter(hfmt)
plt.xticks(rotation=45)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in fig.axes:
ax.grid(True)
plt.tight_layout()
plt.show()
OK, here's something close to what you are after, I think.
I've used dates.SecondLocator(bysecond=[0,30]) to set the grid every 30 seconds (also need to make sure the grid is set on the minor ticks, with ax.xaxis.grid(True,which='both')
To repeat the yticklabels, I create a twinx of the axes for every major tick on the xaxis, and move the spine to that tick's location. I then set the spine color to none, so it doesn't show up, and turn of the actual ticks, but not the tick labels.
from matplotlib import dates
import matplotlib.pyplot as plt
import numpy as np
import time
from datetime import datetime
# how often to show xticklabels and repeat yticklabels:
xtickinterval = 10
# Make random data
my_data = list()
for i in range(3000):
my_data.append((datetime.fromtimestamp(i + time.time()), np.random.randint(120, 160), np.random.randint(10, 100)))
hfmt = dates.DateFormatter('%H:%M:%S')
fig = plt.figure()
actg = fig.add_subplot(2, 1, 1) # two rows, one column, first plot
actg.set_ylim(50, 210)
atoco = fig.add_subplot(2, 1, 2,sharex=actg) # second plot, share the xaxis with actg
atoco.set_ylim(-5, 105)
# Set the major ticks to the intervals specified above.
actg.xaxis.set_major_locator(dates.MinuteLocator(byminute=np.arange(0,60,xtickinterval)))
# Set the minor ticks to every 30 seconds
minloc = dates.SecondLocator(bysecond=[0,30])
minloc.MAXTICKS = 3000
actg.xaxis.set_minor_locator(minloc)
# Use the formatter specified above
actg.xaxis.set_major_formatter(hfmt)
times = []
fhr1 = []
toco = []
for key in my_data:
times.append(key[0])
fhr1.append(key[1])
toco.append(key[2])
print times[-1]-times[0]
# Make your plot
actg.plot_date(times, fhr1, '-')
atoco.plot_date(times, toco, '-')
for ax in [actg,atoco]:
# Turn off the yticklabels on the right hand side
ax.set_yticklabels([])
# Set the grids
ax.xaxis.grid(True,which='both',color='r')
ax.yaxis.grid(True,which='major',color='r')
# Create new yticklabels every major tick on the xaxis
for tick in ax.get_xticks():
tx = ax.twinx()
tx.set_ylim(ax.get_ylim())
tx.spines['right'].set_position(('data',tick))
tx.spines['right'].set_color('None')
for tic in tx.yaxis.get_major_ticks():
tic.tick1On = tic.tick2On = False
plt.tight_layout()
plt.show()

Categories