how to get dates from yahoo finance - python

I have a problem with getting the dates from yfinance into my matplotlib graph can somebody help/show me how to get the dates from yfinance into my matplotlib graph
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
# function getting data for the last x years with x weeks space from checking data and specific
observation.
def stock_data(ticker, period, interval, observation):
ticker = yf.Ticker(ticker)
ticker_history = ticker.history(period, interval)
print(ticker_history[observation]) #is here to show you the data that we get
date = pd.date_range(ticker_history[period])
x = date
y = np.array(ticker_history[observation])
plt.style.use('dark_background')
plt.plot(x,y)
plt.ylabel('Price($)')
plt.xlabel('Date', rotation=0)
plt.show()
stock_data('GOOGL', '6mo', '1wk', 'Open')

Tested ✅
🔰You could extract the date from ticker_history[observation]
🔰 It is a Pandas Series object, so here's how I'd do it:
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
# function getting data for the last x years with x weeks space
# from checking data and specific observation.
def stock_data(ticker, period, interval, observation):
ticker = yf.Ticker(ticker)
ticker_history = ticker.history(period, interval)
print((ticker_history[observation]))
sf = ticker_history[observation]
df = pd.DataFrame({'Date':sf.index, 'Values':sf.values})
x = df['Date'].tolist()
y = df['Values'].tolist()
plt.style.use('dark_background')
plt.plot(x,y)
plt.ylabel('Price($)')
plt.xlabel('Date', rotation=0)
plt.show()
if __name__ == '__main__':
stock_data('GOOGL', '6mo', '1wk', 'Open')

Related

What could I do to change the time intervals on the x-axis

import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import pandas_datareader as web
#This makes a chart w/3 hour intervals and I would need something like 30 minutes
style.use("ggplot")
start = dt.datetime(2019,4,24)
end = dt.datetime(2019,5,25)
df = web.get_data_yahoo("TSLA", start, end)
df["Adj Close"].plot()
plt.title('Tesla Price v. First Quarter Earnings 2019')
plt.ylabel('USD')
plt.show()
The following example shows how to set the primary scale at 7-day intervals and hide the secondary scale as an example. Other variations can be found on
formatter page and Locator page.
import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib import style
import pandas as pd
import pandas_datareader as web
fig, ax = plt.subplots()
#This makes a chart w/3 hour intervals and I would need something like 30 minutes
style.use("ggplot")
start = dt.datetime(2019,4,24)
end = dt.datetime(2019,5,25)
df = web.get_data_yahoo("TSLA", start, end)
ax = df["Adj Close"].plot()
days = mdates.DayLocator(interval=7)
days_fmt = mdates.DateFormatter('%m/%d')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(days_fmt)
ax.minorticks_off()
plt.title('Tesla Price v. First Quarter Earnings 2019')
plt.ylabel('USD')
plt.show()

questions about matplotlib.dates.DateFormatter() and xticks() [duplicate]

I am trying to plot information against dates. I have a list of dates in the format "01/02/1991".
I converted them by doing the following:
x = parser.parse(date).strftime('%Y%m%d'))
which gives 19910102
Then I tried to use num2date
import matplotlib.dates as dates
new_x = dates.num2date(x)
Plotting:
plt.plot_date(new_x, other_data, fmt="bo", tz=None, xdate=True)
But I get an error. It says "ValueError: year is out of range". Any solutions?
You can do this more simply using plot() instead of plot_date().
First, convert your strings to instances of Python datetime.date:
import datetime as dt
dates = ['01/02/1991','01/03/1991','01/04/1991']
x = [dt.datetime.strptime(d,'%m/%d/%Y').date() for d in dates]
y = range(len(x)) # many thanks to Kyss Tao for setting me straight here
Then plot:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
plt.plot(x,y)
plt.gcf().autofmt_xdate()
Result:
I have too low reputation to add comment to #bernie response, with response to #user1506145. I have run in to same issue.
The answer to it is an interval parameter which fixes things up
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=5))
plt.plot(days,y)
plt.gcf().autofmt_xdate()
plt.show()
As #KyssTao has been saying, help(dates.num2date) says that the x has to be a float giving the number of days since 0001-01-01 plus one. Hence, 19910102 is not 2/Jan/1991, because if you counted 19910101 days from 0001-01-01 you'd get something in the year 54513 or similar (divide by 365.25, number of days in a year).
Use datestr2num instead (see help(dates.datestr2num)):
new_x = dates.datestr2num(date) # where date is '01/02/1991'
Adapting #Jacek Szałęga's answer for the use of a figure fig and corresponding axes object ax:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import datetime as dt
np.random.seed(1)
N = 100
y = np.random.rand(N)
now = dt.datetime.now()
then = now + dt.timedelta(days=100)
days = mdates.drange(now,then,dt.timedelta(days=1))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(days,y)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(mdates.DayLocator(interval=5))
ax.tick_params(axis='x', labelrotation=45)
plt.show()

Display datetime as day for xtick

I have the following sample codes:
import pandas as pd
import matplotlib.pyplot as plt
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007
00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
plt.plot(x,y)
I would like to have the xtick to be just 'Thu' for 01/02/2007 and 'Fri' for 02/02/2007. What is the best possible way to do that?
One possible solution is to change the X-axis format:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
fig, ax = plt.subplots()
ax.plot(x,y)
yearsFmt = mdates.DateFormatter('%a')
ax.xaxis.set_major_formatter(yearsFmt)
plt.show()
The key idea is to get the dayofweek from the DateTime object, like: x.dayofweek. This returns the numeric dayofweek. We can easily get the corresponding name np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
x_d = np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
y = [0.32,0.33,0.32,0.34]
ser = pd.Series(y, index=x_d)
ser.plot()

Modify major and minor xticks for dates

I am plotting two pandas series. The index is a date (1-1 to 12-31)
s1.plot()
s2.plot()
pd.plot() interprets the dates and assigns them to axis values as such:
I would like to modify the major ticks to be the 1st of every month and minor ticks to be the days in between
This works:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%m-%d')
s2014max = df2014.groupby(['Date'], sort=True)['Data_Value'].max()/10
s2014min = df2014.groupby(['Date'], sort=True)['Data_Value'].min()/10
#remove the leap day and convert to datetime for plotting
s2014min = s2014min[s2014min.index != '02-29']
s2014max = s2014max[s2014max.index != '02-29']
dateslist = s2014min.index.tolist()
dates = [pd.datetime.strptime(date, '%m-%d').date() for date in dateslist]
plt.figure()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_formatter(dayFmt)
ax.tick_params(direction='out', pad=15)
s2014min.plot()
s2014max.plot()
This results in no ticks:
A possible way is to use matplotlib for plotting the dates instead of pandas.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dates = pd.date_range("2016-01-01", "2016-12-31" )
y = np.cumsum(np.random.normal(size=len(dates)))
df = pd.DataFrame({"Dates" : dates, "y": y})
fig, ax = plt.subplots()
ax.plot_date(df["Dates"], df.y, '-')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
ax.xaxis.set_major_formatter(monthFmt)
plt.show()
You were so close! All you needed to do was add the formatters similar to how the other answer did it. Here is a working sample similar to your code (note I did mine in ipython notebook hence the %matplotlib inline).
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from random import random
y = [random() for i in range(25)]
x = [(datetime.now() - timedelta(days=i)) for i in range(25)]
x.reverse()
s = pd.Series(y, index=x) # NOTE: S, not df, since you said you were using series
# format the ticks
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt) # This is what you needed
ax.xaxis.set_minor_formatter(dayFmt) # This is what you needed
ax.tick_params(direction='out', pad=15)
# format the coords message box
s.plot(figsize=(10,3))
which will look like this:

Trying to plot timedelta, but the yaxis is in 1e14, want format HH:MM:SS

I am plotting date time on the xaxis (which is actual dates) and then timedelta on the yaxis, which is actually time spans, or amount of time. Originally I was using date time for the yaxis, but I came across the usecase where the time values went over 24 hours, and then it broke the code. So instead I had to use timedelta in order to accommodate these values. But when I try to plot it using plot_date, the yaxis with the timedelta values comes out funny.
I have my information stored in a dataframe originally, and then change the values to a timedelta. This is the code I have to output this graph
import datetime as dt
import matplotlib.dates as mdates
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
from matplotlib.backends.backend_pdf import PdfPages
plt.close('all')
#put data into dataframe
location='D:\CAT'
csvpath=location+('\metrics_summaryTEST.csv')
print csvpath
df=pd.read_csv(csvpath)
#setup plot/figure
media = set(df.mediaNumber.values)
num_plots = len(media)
ax = plt.gca()
pdfpath=location+('\metrics_graphs.pdf')
pp = PdfPages(pdfpath)
#declaring some variables
publishTimevals=np.zeros(len(df.publishTime.values),dtype="S20")
xdates=np.zeros(len(df.publishTime.values),dtype="S20")
ytimes=np.zeros(len(df.totalProcessTime.values),dtype="S8")
for f in sorted(media):
name = f
plt.figure(f)
plt.clf()
color = next(ax._get_lines.color_cycle)
#PROCESS PUBLISHTIME
publishTimevals= df.loc[df['mediaNumber']==f,['publishTime']]
xdates = map(lambda x: mpl.dates.date2num(dt.datetime.strptime(x, '%Y-%m-%d %H:%M')),publishTimevals.publishTime)
#PROCESS TOTALPROCESSTIME
totalProcessTimevals= df.loc[df['mediaNumber']==f,['totalProcessTime']]
ytimes = pd.to_timedelta(totalProcessTimevals.totalProcessTime)
plt.plot_date(xdates,ytimes,'o-',label='totalProcessTime',color=color)
print ytimes
plt.show()
#format the plot
plt.gcf().autofmt_xdate()
plt.xlabel('publishTime')
plt.ylabel('ProcessTime HH:MM:SS')
plt.legend(loc=8, bbox_to_anchor=(0.5,-0.3),ncol=3,prop={'size':9})
ax.grid('on')
plt.title('%s Processing Time' % (f))
plt.margins(0.05)
#plt.grid('on')
plt.minorticks_on()
plt.grid(which = 'minor', alpha = 0.3)
plt.grid(which = 'major', alpha = 0.7)
plt.show()
Could anyone point out what's going on here?

Categories