I have the following sample codes:
import pandas as pd
import matplotlib.pyplot as plt
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007
00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
plt.plot(x,y)
I would like to have the xtick to be just 'Thu' for 01/02/2007 and 'Fri' for 02/02/2007. What is the best possible way to do that?
One possible solution is to change the X-axis format:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
fig, ax = plt.subplots()
ax.plot(x,y)
yearsFmt = mdates.DateFormatter('%a')
ax.xaxis.set_major_formatter(yearsFmt)
plt.show()
The key idea is to get the dayofweek from the DateTime object, like: x.dayofweek. This returns the numeric dayofweek. We can easily get the corresponding name np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
x_d = np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
y = [0.32,0.33,0.32,0.34]
ser = pd.Series(y, index=x_d)
ser.plot()
Related
I'm using python to analyze 911 Call for Service dataset. I'm showing data monthwise. Data is not sorted Date Wise.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('911_calls_for_service.csv')
r, c = df.shape
df['callDateTime'] = pd.to_datetime(df['callDateTime'])
df['MonthYear'] = df['callDateTime'].apply(lambda time: str(time.year) + '-' + str(time.month))
df['MonthYear'].value_counts().plot()
print(df['MonthYear'].value_counts())
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('911_calls_for_service.csv')
df['callDateTime'] = pd.to_datetime(df['callDateTime'])
ax = df['callDateTime'].groupby([df["callDateTime"].dt.year, df["callDateTime"].dt.month]).count().plot()
ax.set_xlabel("Date")
ax.set_ylabel("Frequency")
plt.tight_layout()
plt.show()
I have this dataframe :
import pandas as pd
import datetime
from sklearn.utils import check_random_state
import math
start = datetime.datetime.strptime("21-06-2014", "%d-%m-%Y")
end = datetime.datetime.strptime("17-03-2017", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]
X = [d.strftime('%d-%m-%Y') for d in date_generated] # I need this format for my real dataframe
Y = [math.cos(i) for i in range(1000)]
df = pd.DataFrame(dict(date=X,value=Y))
df.head(3)
date value
0 21-06-2014 1.000000
1 22-06-2014 0.540302
2 23-06-2014 -0.416147
df.tail(3)
date value
997 14-03-2017 -0.440062
998 15-03-2017 0.517847
999 16-03-2017 0.999650
When I plot the two columns of my dataframe through the following way, x-axis is unreadable :
from matplotlib import pyplot as plt
plt.figure(figsize=(20, 5))
plt.plot(df["date"].values,df["value"].values)
plt.show()
How please could I display only the years, one time each, instead of each 1st January ?
In that case, I would like therefore to have only 2015, 2016 and 2017 displayed in x-axis
You can use matplotlib.dates locator and formatter to format directly the datetime objects that you want to put on the xaxis:
import pandas as pd
import datetime
from sklearn.utils import check_random_state
import math
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
start = datetime.datetime.strptime("21-06-2014", "%d-%m-%Y")
end = datetime.datetime.strptime("17-03-2017", "%d-%m-%Y")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end-start).days)]
Y = [math.cos(i) for i in range(1000)]
formatter = mdates.DateFormatter("%Y") ### formatter of the date
locator = mdates.YearLocator() ### where to put the labels
fig = plt.figure(figsize=(20, 5))
ax = plt.gca()
ax.xaxis.set_major_formatter(formatter) ## calling the formatter for the x-axis
ax.xaxis.set_major_locator(locator) ## calling the locator for the x-axis
plt.plot(date_generated, Y)
# fig.autofmt_xdate() # optional if you want to tilt the date labels - just try it
plt.tight_layout()
plt.show()
I am trying to plot information against dates. I have a list of dates in the format "01/02/1991".
I converted them by doing the following:
x = parser.parse(date).strftime('%Y%m%d'))
which gives 19910102
Then I tried to use num2date
import matplotlib.dates as dates
new_x = dates.num2date(x)
Plotting:
plt.plot_date(new_x, other_data, fmt="bo", tz=None, xdate=True)
But I get an error. It says "ValueError: year is out of range". Any solutions?
You can do this more simply using plot() instead of plot_date().
First, convert your strings to instances of Python datetime.date:
import datetime as dt
dates = ['01/02/1991','01/03/1991','01/04/1991']
x = [dt.datetime.strptime(d,'%m/%d/%Y').date() for d in dates]
y = range(len(x)) # many thanks to Kyss Tao for setting me straight here
Then plot:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
plt.plot(x,y)
plt.gcf().autofmt_xdate()
Result:
I have too low reputation to add comment to #bernie response, with response to #user1506145. I have run in to same issue.
The answer to it is an interval parameter which fixes things up
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=5))
plt.plot(days,y)
plt.gcf().autofmt_xdate()
plt.show()
As #KyssTao has been saying, help(dates.num2date) says that the x has to be a float giving the number of days since 0001-01-01 plus one. Hence, 19910102 is not 2/Jan/1991, because if you counted 19910101 days from 0001-01-01 you'd get something in the year 54513 or similar (divide by 365.25, number of days in a year).
Use datestr2num instead (see help(dates.datestr2num)):
new_x = dates.datestr2num(date) # where date is '01/02/1991'
Adapting #Jacek Szałęga's answer for the use of a figure fig and corresponding axes object ax:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(days,y)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.tick_params(axis='x', labelrotation=45)
plt.show()
I am plotting two pandas series. The index is a date (1-1 to 12-31)
s1.plot()
s2.plot()
pd.plot() interprets the dates and assigns them to axis values as such:
I would like to modify the major ticks to be the 1st of every month and minor ticks to be the days in between
This works:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%m-%d')
s2014max = df2014.groupby(['Date'], sort=True)['Data_Value'].max()/10
s2014min = df2014.groupby(['Date'], sort=True)['Data_Value'].min()/10
#remove the leap day and convert to datetime for plotting
s2014min = s2014min[s2014min.index != '02-29']
s2014max = s2014max[s2014max.index != '02-29']
dateslist = s2014min.index.tolist()
dates = [pd.datetime.strptime(date, '%m-%d').date() for date in dateslist]
plt.figure()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_formatter(dayFmt)
ax.tick_params(direction='out', pad=15)
s2014min.plot()
s2014max.plot()
This results in no ticks:
A possible way is to use matplotlib for plotting the dates instead of pandas.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dates = pd.date_range("2016-01-01", "2016-12-31" )
y = np.cumsum(np.random.normal(size=len(dates)))
df = pd.DataFrame({"Dates" : dates, "y": y})
fig, ax = plt.subplots()
ax.plot_date(df["Dates"], df.y, '-')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
ax.xaxis.set_major_formatter(monthFmt)
plt.show()
You were so close! All you needed to do was add the formatters similar to how the other answer did it. Here is a working sample similar to your code (note I did mine in ipython notebook hence the %matplotlib inline).
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from random import random
y = [random() for i in range(25)]
x = [(datetime.now() - timedelta(days=i)) for i in range(25)]
x.reverse()
s = pd.Series(y, index=x) # NOTE: S, not df, since you said you were using series
# format the ticks
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt) # This is what you needed
ax.xaxis.set_minor_formatter(dayFmt) # This is what you needed
ax.tick_params(direction='out', pad=15)
# format the coords message box
s.plot(figsize=(10,3))
which will look like this:
I am plotting date time on the xaxis (which is actual dates) and then timedelta on the yaxis, which is actually time spans, or amount of time. Originally I was using date time for the yaxis, but I came across the usecase where the time values went over 24 hours, and then it broke the code. So instead I had to use timedelta in order to accommodate these values. But when I try to plot it using plot_date, the yaxis with the timedelta values comes out funny.
I have my information stored in a dataframe originally, and then change the values to a timedelta. This is the code I have to output this graph
import datetime as dt
import matplotlib.dates as mdates
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
plt.close('all')
#put data into dataframe
location='D:\CAT'
csvpath=location+('\metrics_summaryTEST.csv')
print csvpath
df=pd.read_csv(csvpath)
#setup plot/figure
media = set(df.mediaNumber.values)
num_plots = len(media)
ax = plt.gca()
pdfpath=location+('\metrics_graphs.pdf')
pp = PdfPages(pdfpath)
#declaring some variables
publishTimevals=np.zeros(len(df.publishTime.values),dtype="S20")
xdates=np.zeros(len(df.publishTime.values),dtype="S20")
ytimes=np.zeros(len(df.totalProcessTime.values),dtype="S8")
for f in sorted(media):
name = f
plt.figure(f)
plt.clf()
color = next(ax._get_lines.color_cycle)
#PROCESS PUBLISHTIME
publishTimevals= df.loc[df['mediaNumber']==f,['publishTime']]
xdates = map(lambda x: mpl.dates.date2num(dt.datetime.strptime(x, '%Y-%m-%d %H:%M')),publishTimevals.publishTime)
#PROCESS TOTALPROCESSTIME
totalProcessTimevals= df.loc[df['mediaNumber']==f,['totalProcessTime']]
ytimes = pd.to_timedelta(totalProcessTimevals.totalProcessTime)
plt.plot_date(xdates,ytimes,'o-',label='totalProcessTime',color=color)
print ytimes
plt.show()
#format the plot
plt.gcf().autofmt_xdate()
plt.xlabel('publishTime')
plt.ylabel('ProcessTime HH:MM:SS')
plt.legend(loc=8, bbox_to_anchor=(0.5,-0.3),ncol=3,prop={'size':9})
ax.grid('on')
plt.title('%s Processing Time' % (f))
plt.margins(0.05)
#plt.grid('on')
plt.minorticks_on()
plt.grid(which = 'minor', alpha = 0.3)
plt.grid(which = 'major', alpha = 0.7)
plt.show()
Could anyone point out what's going on here?