Changing axis on scatterplot to fixed intervals involving time - python

I have following code. my problem is I want to set the range of the y axis from 0:00 to 12:00 and have it equally spaced in increments of one. e.g. 0:00, 1:00, 2:00 etc. Any suggestions how I would go about doing this?
Also I also want to get rid of the extra :00 at the end of each number. As of right now it reads 00:00:00, 01:00:00 and so on when I only want it to read 0:00, 1:00 any ideas how I can go about doing this? here is the code I have so far.
import pandas as pd
import matplotlib.pyplot as plt
import datetime
data = pd.read_csv('data.csv', sep=',', header=None)
print (data)
ints = data[data[1]=='INT']
exts = data[data[1]=='EXT']
int_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in ints[4]]
ext_times = [datetime.datetime.time(datetime.datetime.strptime(t, '%H:%M')) for t in exts[4]]
int_dist = [d for d in ints[3]]
ext_dist = [d for d in exts[3]]
fig, ax = plt.subplots()
ax.scatter(int_dist, int_times, c='red', s=80)
ax.scatter(ext_dist, ext_times, c='blue', s=80)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.ylim(0,45000)
plt.show()

Well its possible to generate a list of time having only the minute and second. You need to change the format to '%M:%S'.
Next you need to change the labels using the plt.xticks(). I changed for x axis.
Here is a sample
start = datetime.combine(date.today(), time(0, 0))
axis_times = []
y_values = []
i = 0
while i<9:
start += timedelta(seconds=7)
axis_times.append(start.strftime("%M:%S"))
y_values.append(i)
i+=1
fig, ax = plt.subplots()
ax.scatter(range(len(axis_times)), y_values, c='red', s=80)
ax.scatter(range(len(axis_times)), y_values, c='blue', s=20)
plt.legend(['INT', 'EXT'], loc=4)
plt.xlabel('Distance')
plt.xticks(range(len(axis_times)), axis_times, size='small')
plt.show()

You can manually specify ticks to whatever you need. I can't run your example without the csv data but you can do,
import numpy as np
import pylab as plt
import datetime
#Some arbitrary data
x = np.linspace(0.,12.,100)
fig, ax = plt.subplots(1, 1)
ax.plot(x,np.sin(x)+6.)
#Set number of ticks to 12
ax.set_yticks(range(13))
#Relabel the ticks as needed
locs, labels = plt.yticks()
new_labels = [str(time) + ":00" for time in range(0,13)]
plt.yticks(locs, new_labels)
plt.show()
You can replace the new labels using datetime values or formatted strings which you obtain from you data (e.g. convert to string and remove the last 0)...

Related

How to plot large dataset of date vs time using matplot lib

I want to plot date vs time graph using matplot lib. The issue I am facing is that due to access of data many lines are showing on the xaxis and I can't find a way to plot my time on xaxis cleanly with one hour gap. Say i have data in my list as string as ['6:01','6:30','7:20','7:25']. I want to divide my xaxis from 6:00 to 7:00 and the time points between them should be plotted based on time.
Note: time list is just and example I want to do this for whole 24 hour.
I tried to use ticks and many other options to complete my task but unfortunatly I am stuck at this problem. My data is in csv file.
Below is my code:
def arrivalGraph():
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from matplotlib import dates as mpl_dates
with open("Timetable2021.csv","r") as f:
fileData = f.readlines()
del fileData[0]
date = []
train1 = []
for data in fileData:
ind = data.split(",")
date.append(datetime.strptime(ind[0],"%d/%m/%Y").date())
train1Time = datetime.strptime(ind[1],"%H:%M").time()
train1.append(train1Time.strftime("%H:%M"))
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
plt.gca().xaxis.set_major_formatter(dateformater) # to format the xaxis
plt.xlabel("Date")
plt.ylabel("Time")
plt.title("Train Time vs Date Schedule")
plt.tight_layout()
plt.show()
When i run the code i get the following output:
output of above code
Assuming that every single minute that every single minute is present in train1 (i.e. train1 = ["00:00", "00:01", "00:02", "00:03", ... , "23:59"]), you can use plt.xticks() by generating an array representing xticks with empty string on every minute which is not 0.
unique_times = sorted(set(train1))
xticks = ['' if time[-2:]!='00' else time for time in unique_times]
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
# I think you wanted to format the yaxis instead of xaxis
plt.gca().yaxis.set_major_formatter(dateformater) # to format the yaxis
plt.ylabel("Date")
plt.xlabel("Time")
plt.title("Train Time vs Date Schedule")
plt.xticks(range(len(xticks)), xticks)
plt.tight_layout()
plt.show()
If every single minute is not in the train1 array, you have to keep train1 data as an object and generate arrays representing xticks location and values to be used as plt.xticks() parameters.
date = []
train1 = []
for data in fileData:
ind = data.split(",")
date.append(datetime.strptime(ind[0],"%d/%m/%Y").date())
train1Time = datetime.strptime(ind[1],"%H:%M")
train1.append(train1Time)
plt.style.use("seaborn")
plt.figure(figsize = (10,10))
plt.plot_date(train1,date)
plt.gcf().autofmt_xdate()#gcf is get current figure - autofmt is auto format
dateformater = mpl_dates.DateFormatter("%b ,%d %Y")
# I think you wanted to format the y axis instead of xaxis
plt.gca().yaxis.set_major_formatter(dateformater) # to format the yaxis
plt.ylabel("Date")
plt.xlabel("Time")
plt.title("Train Time vs Date Schedule")
ax = plt.gca()
xticks_val = []
xticks_loc = []
distance = (ax.get_xticks()[-1] - ax.get_xticks()[0]) / 24
def to_hour_str(x):
x = str(x)
if len(x) < 2:
x = '0' + x
return x + ':00'
for h in range(25):
xticks_val.append(to_hour_str(h))
xticks_loc.append(ax.get_xticks()[0] + h * distance)
plt.xticks(xticks_loc, xticks_val, rotation=90, ha='left')
plt.tight_layout()
plt.show()
Here's the code output using dummy data I generated myself.

Matplotlib - 24h Timeline graph

I want to make a timeline that shows the average number of messages sent over a 24h period. So far, I have managed to format both of the axes. The Y-axis already has the correct data in it.
These are the lists of data:
dates[] #a list of datetimes reduced to hours and minutes
values[] #a list of int
Now, for some time, I have tried to insert data into the graph. I have managed to insert the data now, but I assume that the X-axis is causing some problems because of formatting.
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
dates = []
values = []
fig, ax = plt.subplots()
hours = mdates.HourLocator(interval=2)
d_fmt = mdates.DateFormatter('%H:%M')
ax.xaxis.set_minor_locator(mdates.HourLocator(interval=1))
ax.xaxis.set_major_locator(hours)
ax.xaxis.set_major_formatter(d_fmt)
ax.fill(dates, values)
ax.plot(dates, values, color=Commands.lineColor)
ax.set_xlim(["00:00", "23:59"])
plt.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages')
ax.tick_params(axis='y', colors=Commands.chartColor)
ax.tick_params(axis='x', colors=Commands.chartColor)
ax.tick_params(which='minor', colors=Commands.chartColor)
ax.set_ylabel('Messages', color=Commands.chartColor)
plt.grid(True, color=Commands.girdColor)
ax.set_facecolor(Commands.backgroundColor)
ax.spines["bottom"].set_color(Commands.chartColor)
ax.spines["left"].set_color(Commands.chartColor)
ax.spines["top"].set_color(Commands.chartColor)
ax.spines["right"].set_color(Commands.chartColor)
fig.patch.set_facecolor(Commands.backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
# endregion
There are similar questions, but they aren't much use for me.
Since I don't have any sample data, I created a simple data and made a graph. The 0:00 time on the timeline is a challenge, so I need to be creative. I have replaced the last 0:00 with 24:00. Then I set the time interval value to 48 as the interval on the X axis. In your code, it will be every 2 hours. I have removed the code that I deemed unnecessary.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
lineColor = "#f0f8ff"
chartColor = "#f0f8ff"
backgroundColor = "#36393f"
girdColor = "#8a8a8a"
date_rng = pd.date_range('2020-12-01', '2020-12-02', freq='1H')
dates = date_rng.strftime('%H:%M').tolist()
values = np.random.randint(0,25, size=25)
dates[-1] = '24:00'
fig, ax = plt.subplots(figsize=(12,9))
hours = mdates.HourLocator(interval=48)
ax.xaxis.set_major_locator(hours)
# ax.fill(dates, values)
ax.plot(dates, values, color=lineColor)
ax.fill_between(dates, values,)
# region ChartDesign
ax.set_title('Amount of Messages', color=chartColor)
ax.tick_params(axis='y', colors=chartColor)
ax.tick_params(axis='x', colors=chartColor)
# ax.tick_params(which='major', colors=chartColor)
ax.set_ylabel('Messages', color=chartColor)
ax.grid(True, color=girdColor)
ax.set_facecolor(backgroundColor)
ax.spines["bottom"].set_color(chartColor)
ax.spines["left"].set_color(chartColor)
ax.spines["top"].set_color(chartColor)
ax.spines["right"].set_color(chartColor)
fig.set_facecolor(backgroundColor)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()

Cannot prepare proper labels in Matplotlib

I have very simple code:
from matplotlib import dates
import matplotlib.ticker as ticker
my_plot=df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90)
I've got:
but I would like to have fewer labels on X axis. To do this I've add:
my_plot.xaxis.set_major_locator(ticker.MaxNLocator(12))
It generates fewer labels but values of labels have wrong values (=first of few labels from whole list)
What am I doing wrong?
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' (YYYY-MM)
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
You were almost there. Just set ticker.MultipleLocator.
The pandas.DataFrame.boxplot also returns axes, which is an object of class matplotlib.axes.Axes. So you can use this code snippet to customize your labels:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
center = np.random.randint(50,size=(10, 20))
spread = np.random.rand(10, 20) * 30
flier_high = np.random.rand(10, 20) * 30 + 30
flier_low = np.random.rand(10, 20) * -30
y = np.concatenate((spread, center, flier_high, flier_low))
fig, ax = plt.subplots(figsize=(10, 5))
ax.boxplot(y)
x = ['Label '+str(i) for i in range(20)]
ax.set_xticklabels(x)
ax.set_xlabel('Day')
# Set a tick on each integer multiple of a base within the view interval.
ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
plt.xticks(rotation=90)
I think there is a compatibility issue with Pandas plots and Matplotlib formatters.
With the following code:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
You get this chart:
But I notice that this is grouped by month instead of by day. It may not be what you wanted.
Adding the day component to the string 'Day' messes up the chart as there seems to be too many boxes.
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
for l in labels:
if i % 15 == 0:
label = labels[i]
i += 1
new_labels.append(label)
else:
label = ''
i += 1
new_labels.append(label)
ax.set_xticklabels(new_labels)
plt.show()
The for loop creates the tick labels every as many periods as desired. In the first chart they were set every 3 months. In the second one, every 15 days.
If you would like to see less grid lines:
df = pd.read_csv('lt_stream-1001-full.csv', header=0, encoding='utf8')
df['reg_date'] = pd.to_datetime(df['reg_date'] , format='%Y-%m-%d %H:%M:%S')
df.set_index('reg_date', inplace=True)
df_h = df.resample(rule='H').mean()
df_h['Day']=df_h.index.strftime('%Y-%m-%d')
print(df_h)
f, ax = plt.subplots()
my_plot = df_h.boxplot(by='Day',figsize=(12,5), showfliers=False, rot=90, ax=ax)
locs, labels = plt.xticks()
i = 0
new_labels = list()
new_locs = list()
for l in labels:
if i % 3 == 0:
label = labels[i]
loc = locs[i]
i += 1
new_labels.append(label)
new_locs.append(loc)
else:
i += 1
ax.set_xticks(new_locs)
ax.set_xticklabels(new_labels)
ax.grid(axis='y')
plt.show()
I've read about x_compat in Pandas plot in order to apply Matplotlib formatters, but I get an error when trying to apply it. I'll give it another shot later.
Old unsuccesful answer
The tick labels seem to be dates. If they are set as datetime in your dataframe, you can:
months = mdates.MonthLocator(1,4,7,10) #Choose the months you like the most
ax.xaxis.set_major_locator(months)
Otherwise, you can let Matplotlib know they are dates by:
ax.xaxis_date()
Your comment:
I have add additional information:
I've forgoten to show what is inside DataFrame.
I have three columns:
reg_Date - datetime64 (index)
temperature - float64
Day - date converted from reg_Date to string, it looks like '2017-10' *(YYYY-MM) *
Box plot group date by 'Day' and I would like to show values 'Day" as a label but not all values
, for example every third one.
Based on your comment in italic above, I would use reg_Date as the input and the following lines:
days = mdates.DayLocator(interval=3)
daysFmt = mdates.DateFormatter('%Y-%m') #to format display
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(daysFmt)
I forgot to mention that you will need to:
import matplotlib.dates as mdates
Does this work?

plot_date function set xticks for hourly data

I currently have a function that creates a time series graph from time/date data that is in MM-DD-YYYY HH-MM format. I am unsure as to how to change the x axis ticks such that it displays hours as well as the date as it currently only shows dates.
The set_major_locator line I included only returns ticks that have the year even though I have specified the hour_locator and the data is hourly.
def graph(region):
fig = plt.figure(num=None, figsize=(60, 20), dpi=100, facecolor='w', edgecolor='k')
df_da_region = df_da_abv_09[df_da_abv_09['Settlement Point'] == region]
df_rt_region = df_rt_abv_09[df_rt_abv_09['Settlement Point Name'] == region]
fig = plt.plot_date(x=list(df_da_region['DateTime']), y=list(df_da_region['Settlement Point Price']), xdate = True, fmt="r-", linewidth=0.7)
fig = plt.plot_date(x=list(df_rt_region['DateTime']), y=list(df_rt_region['Settlement Point Price']), xdate = True, fmt="g-", alpha=0.5, linewidth=0.7)
fig = plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=5))
plt.show()
Use matplotlib.dates.DateFormatter. First import it at the top
import matplotlib.dates as mdates
then replace this line
fig = plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=5))
by something like this
myFmt = mdates.DateFormatter('%y-%m-%d %H') # here you can format your datetick labels as desired
plt.gca().xaxis.set_major_formatter(myFmt)
In a example with random numbers (since you haven't provided sample data), it looks like this
Here, the formatter is chosen as you wanted: dates + hours. For further info about how to format the date on the axis, check here

divide x and y labels in Matplotlib

I have a graph with X as a date and Y as some readings. the X axis has a date interval with an increment of one day. what i want is to show the hours on the x axis between two days(just to set the hours in the yellow area in the graph).
The idea of the code is:
Date=[];Readings=[] # will be filled from another function
dateconv=np.vectorize(datetime.fromtimestamp)
Date_F=dateconv(Date)
ax1 = plt.subplot2grid((1,1), (0,0))
ax1.plot_date(Date_F,Readings,'-')
for label in ax1.xaxis.get_ticklabels():
label.set_rotation(45)
ax1.grid(True)
plt.xlabel('Date')
plt.ylabel('Readings')
ax1.set_yticks(range(0,800,50))
plt.legend()
plt.show()
You can use MultipleLocator from matplotlib.ticker with set_major_locator and set_minor_locator. See example.
Example
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import datetime
# Generate some data
d = datetime.timedelta(hours=1/5)
now = datetime.datetime.now()
times = [now + d * j for j in range(250)]
ax = plt.gca() # get the current axes
ax.plot(times, range(len(times)))
for label in ax.xaxis.get_ticklabels():
label.set_rotation(30)
# Set the positions of the major and minor ticks
dayLocator = MultipleLocator(1)
hourLocator = MultipleLocator(1/24)
ax.xaxis.set_major_locator(dayLocator)
ax.xaxis.set_minor_locator(hourLocator)
# Convert the labels to the Y-m-d format
xax = ax.get_xaxis() # get the x-axis
adf = xax.get_major_formatter() # the the auto-formatter
adf.scaled[1/24] = '%Y-%m-%d' # set the < 1d scale to Y-m-d
adf.scaled[1.0] = '%Y-%m-%d' # set the > 1d < 1m scale to Y-m-d
plt.show()
Result

Categories