Load & Plot %Y-%m-%d %H:%M:%S from a file - python

I'm trying to make a simple XY plot using "plot_date" by loading a file with two dates and a value, but haven't yet had success. The idea is to have columns 0 and 1 to represent "time1", and columns 2 and 3 as "time2", and column 4 as "val".
Data input data looks like this:
2017-04-08 16:54:37 2017-04-08 16:55:08 1
2017-04-08 16:58:28 2017-04-08 16:58:33 1
2017-04-08 17:02:18 2017-04-08 17:02:24 1
Code
import matplotlib.pyplot as plt
from matplotlib.dates import strpdate2num
import numpy as np
import matplotlib.colors
import matplotlib.cm
from matplotlib.dates import date2num, DateFormatter
import datetime as dt
time1,time2,val = np.loadtxt(inputfile, usecols=(0,1,2,3,4), unpack=True, converters = {0,1: strpdate2num("%Y-%m-%d %H:%M:%S")})
Here I get an error:
>>> time1,time2,val = np.loadtxt(inputfile, usecols=(0,1,2,3,4), unpack=True, converters = {0,1: strpdate2num("%Y-%m-%d %H:%M:%S")})
File "<stdin>", line 1
time1,time2,val = np.loadtxt(inputfile, usecols=(0,1,2,3,4), unpack=True, converters = {0,1: strpdate2num("%Y-%m-%d %H:%M:%S")})
^
SyntaxError: invalid syntax
Any ideas on what I may be doing wrong? Suggestions on how to correct it?
Among other things, I'm hoping to subtract time2 from time1 and get the difference in seconds. Is this possible?
Finally, I'd like to plot the variables, something along the lines of:
fig, ax = plt.subplots()
ax.plot_date(time1, val, 'b-', color='b')
ax.plot_date(time2, val, 'b-', color='g')

You have to convert every column separately and then combine it:
import matplotlib.pyplot as plt
from matplotlib.dates import strpdate2num
import numpy as np
import matplotlib.colors
import matplotlib.cm
from matplotlib.dates import date2num, DateFormatter
import datetime as dt
time1,date1,time2,date2,val = np.loadtxt(inputfile, usecols=(0,1,2,3,4), unpack=True,
converters = {0: strpdate2num("%Y-%m-%d"), 1: strpdate2num("%H:%M:%S"),
2: strpdate2num("%Y-%m-%d"), 3: strpdate2num("%H:%M:%S")})
time1 += date1
time2 += date2
fig, ax = plt.subplots()
ax.plot_date(time1, val, 'b-', color='b')
ax.plot_date(time2, val, 'b-', color='g')
fig.autofmt_xdate()
plt.show()

Related

questions about matplotlib.dates.DateFormatter() and xticks() [duplicate]

I am trying to plot information against dates. I have a list of dates in the format "01/02/1991".
I converted them by doing the following:
x = parser.parse(date).strftime('%Y%m%d'))
which gives 19910102
Then I tried to use num2date
import matplotlib.dates as dates
new_x = dates.num2date(x)
Plotting:
plt.plot_date(new_x, other_data, fmt="bo", tz=None, xdate=True)
But I get an error. It says "ValueError: year is out of range". Any solutions?
You can do this more simply using plot() instead of plot_date().
First, convert your strings to instances of Python datetime.date:
import datetime as dt
dates = ['01/02/1991','01/03/1991','01/04/1991']
x = [dt.datetime.strptime(d,'%m/%d/%Y').date() for d in dates]
y = range(len(x)) # many thanks to Kyss Tao for setting me straight here
Then plot:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
plt.plot(x,y)
plt.gcf().autofmt_xdate()
Result:
I have too low reputation to add comment to #bernie response, with response to #user1506145. I have run in to same issue.
The answer to it is an interval parameter which fixes things up
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=5))
plt.plot(days,y)
plt.gcf().autofmt_xdate()
plt.show()
As #KyssTao has been saying, help(dates.num2date) says that the x has to be a float giving the number of days since 0001-01-01 plus one. Hence, 19910102 is not 2/Jan/1991, because if you counted 19910101 days from 0001-01-01 you'd get something in the year 54513 or similar (divide by 365.25, number of days in a year).
Use datestr2num instead (see help(dates.datestr2num)):
new_x = dates.datestr2num(date) # where date is '01/02/1991'
Adapting #Jacek Szałęga's answer for the use of a figure fig and corresponding axes object ax:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(days,y)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.tick_params(axis='x', labelrotation=45)
plt.show()

Display datetime as day for xtick

I have the following sample codes:
import pandas as pd
import matplotlib.pyplot as plt
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007
00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
plt.plot(x,y)
I would like to have the xtick to be just 'Thu' for 01/02/2007 and 'Fri' for 02/02/2007. What is the best possible way to do that?
One possible solution is to change the X-axis format:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
fig, ax = plt.subplots()
ax.plot(x,y)
yearsFmt = mdates.DateFormatter('%a')
ax.xaxis.set_major_formatter(yearsFmt)
plt.show()
The key idea is to get the dayofweek from the DateTime object, like: x.dayofweek. This returns the numeric dayofweek. We can easily get the corresponding name np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
x_d = np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
y = [0.32,0.33,0.32,0.34]
ser = pd.Series(y, index=x_d)
ser.plot()

Modify major and minor xticks for dates

I am plotting two pandas series. The index is a date (1-1 to 12-31)
s1.plot()
s2.plot()
pd.plot() interprets the dates and assigns them to axis values as such:
I would like to modify the major ticks to be the 1st of every month and minor ticks to be the days in between
This works:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%m-%d')
s2014max = df2014.groupby(['Date'], sort=True)['Data_Value'].max()/10
s2014min = df2014.groupby(['Date'], sort=True)['Data_Value'].min()/10
#remove the leap day and convert to datetime for plotting
s2014min = s2014min[s2014min.index != '02-29']
s2014max = s2014max[s2014max.index != '02-29']
dateslist = s2014min.index.tolist()
dates = [pd.datetime.strptime(date, '%m-%d').date() for date in dateslist]
plt.figure()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_formatter(dayFmt)
ax.tick_params(direction='out', pad=15)
s2014min.plot()
s2014max.plot()
This results in no ticks:
A possible way is to use matplotlib for plotting the dates instead of pandas.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dates = pd.date_range("2016-01-01", "2016-12-31" )
y = np.cumsum(np.random.normal(size=len(dates)))
df = pd.DataFrame({"Dates" : dates, "y": y})
fig, ax = plt.subplots()
ax.plot_date(df["Dates"], df.y, '-')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
ax.xaxis.set_major_formatter(monthFmt)
plt.show()
You were so close! All you needed to do was add the formatters similar to how the other answer did it. Here is a working sample similar to your code (note I did mine in ipython notebook hence the %matplotlib inline).
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from random import random
y = [random() for i in range(25)]
x = [(datetime.now() - timedelta(days=i)) for i in range(25)]
x.reverse()
s = pd.Series(y, index=x) # NOTE: S, not df, since you said you were using series
# format the ticks
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt) # This is what you needed
ax.xaxis.set_minor_formatter(dayFmt) # This is what you needed
ax.tick_params(direction='out', pad=15)
# format the coords message box
s.plot(figsize=(10,3))
which will look like this:

Plotting timestampt data from CSV using matplotlib

I am trying to plot data from a csv file using matplotlib. There is 1 column against a timestamp:
26-08-2016 00:01 0.062964691
26-08-2016 00:11 0.047209214
26-08-2016 00:21 0.047237823
I have only been able to create a simple plot using only integers using the code below, which doesn't work when the y data is a timestamp. What do I need to add?
This may seem simple, but I am pressed for time :/
thanks!
from matplotlib import pyplot as plt
from matplotlib import style
import numpy as np
import datetime as dt
x,y = np.loadtxt('I112-1.csv',
unpack=True,
delimiter = ',')
plt.plot(x,y)
plt.title('Title')
plt.ylabel('Y axis')
plt.xlabel('X axis')
plt.show()
Here's my example for this problem:
import pandas as pd
from io import StringIO
from datetime import datetime
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
data_file = StringIO("""
time,value
26-08-2016 00:01,0.062964691
26-08-2016 00:11,0.047209214
26-08-2016 00:21,0.047237823""")
df = pd.read_table(data_file,delimiter=",")
df['datetime']= df.time.map(lambda l: datetime.strptime(l, '%d-%m-%Y %H:%M'))
ax = df.set_index("datetime",drop=False)[['value','datetime']].plot(title="Title",yticks=df.value)

python 3.5 matplotlib reading date from csv but can't represent as date on X axis

I cannot figure this out at all, how do I read a date from csv but I CANNOT represent the date as a label on the x axis. I have tried all the approaches that people have suggested but I cannot get it to work. SO could someone look at the stripped down version of my code and tell me what I am missing please?
a sample of the data being read from csv file
2015-08-04 02:14:05.249392,AA,0.0193103612,0.0193515212,0.0249713335,30.6542480634,30.7195875454,39.640763021,0.2131498442,29.0406746589,13524.5347810182,89,57,99
2015-08-05 02:14:05.325113,AAPL,0.0170506271,0.0137941891,0.0105915637,27.0670313481,21.8975963326,16.8135861893,-19.0986405157,-23.2172064279,21.5647072302,33,26,75
2015-08-06 02:14:05.415193,AIG,0.0080808151,0.0073296055,0.0076213535,12.8278962785,11.635388035,12.0985236788,-9.2962105215,3.980405659,-142.8175077335,71,42,33
2015-08-07 02:14:05.486185,AMZN,0.0235649449,0.0305828226,0.0092703502,37.4081902773,48.5487257749,14.7162247572,29.7810062852,-69.6877219282,-334.0005615016,2,92,10
stripped down code
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.font_manager as fm
ax = plt.subplots(1, 1, figsize=(16, 20), dpi=50) #width height inches
data=np.genfromtxt('/home/dave/Desktop/development/hvanal2015s.csv',
dtype='M8[us],S5,float,float,float',delimiter=',',usecols= [0,1,11,12,13])
my_dates = np.array([d[0] for d in data]).astype('datetime64[D]')
dates = np.unique(my_dates)
print(dates)
x_list = []
y_list = [10,11,12,13]
x_list = dates
plt.plot(x_list,y_list)
plt.title('hv 20 to 10 ranks',fontsize=20)
plt.xlabel('dates')
plt.ylabel('symbol ranks',fontsize=30)
plt.show()
and the output as a png file
matplotlib does not support numpy datetime64 objects, you need to convert it to python datetime object and then select formatter like in code below:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.font_manager as fm
from datetime import datetime
import matplotlib.dates as mdates
fig,ax = plt.subplots(1, 1) #width height inches
data=np.genfromtxt('data',
dtype='M8[us],S5,float,float,float',delimiter=',',usecols= [0,1,11,12,13])
my_dates = np.array([d[0] for d in data]).astype('datetime64[D]')
dates = np.unique(my_dates)
print(dates)
x_list = []
x_list[:] = dates.astype(datetime)
y_list = [10,11,12,13]
plt.plot(x_list,y_list)
plt.title('hv 20 to 10 ranks',fontsize=20)
plt.xlabel('dates',fontsize=16)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.ylabel('symbol ranks',fontsize=30)
plt.show()

Categories