Interpreting Multiindex datetime - python

I have the following code:
import pandas as pd
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings("ignore")
start = datetime.date(2020,1,1)
end = datetime.date.today()
stock = 'fb'
data = web.DataReader(stock, 'yahoo', start, end)
data.index = pd.to_datetime(data.index, format ='%Y-%m-%d')
data = data[~data.index.duplicated(keep='first')]
data['year'] = data.index.year
data['month'] = data.index.month
data['week'] = data.index.week
data['day'] = data.index.day
data.set_index('year', append=True, inplace =True)
data.set_index('month',append=True,inplace=True)
data.set_index('week',append=True,inplace=True)
data.set_index('day',append=True,inplace=True)
fig, ax = plt.subplots(dpi=300, figsize =(30,4))
data.plot(y='Close', ax=ax, xlabel= 'Date')
plt.show()
What can I do to interpret the multiindex dates as the x axis in more readable year and month format? Such as in a format like strftime('%y -%m'). A similar question was asked here: Renaming months from number to name in pandas
But I am unable to see how I can use this to rename the x axis. Any help would be appreciated.

You can use the dates from matplotlib. See the following link for more details:
https://matplotlib.org/stable/api/dates_api.html#matplotlib.dates.ConciseDateFormatter
Here is the modified code:
import pandas as pd
from pandas import DataFrame as df
import matplotlib
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings("ignore")
from matplotlib import dates as mdates
start = datetime.date(2020,1,1)
end = datetime.date.today()
stock = 'fb'
data = web.DataReader(stock, 'yahoo', start, end)
data.index = pd.to_datetime(data.index, format ='%Y-%m-%d')
data = data[~data.index.duplicated(keep='first')]
data['year'] = data.index.year
data['month'] = data.index.month
data['week'] = data.index.week
data['day'] = data.index.day
data.set_index('year', append=True, inplace =True)
data.set_index('month',append=True,inplace=True)
data.set_index('week',append=True,inplace=True)
data.set_index('day',append=True,inplace=True)
fig, ax = plt.subplots(dpi=300, figsize =(15,4))
plt.plot(data.index.get_level_values('Date'), data['Close'])
#--------------------------------------
#Feel free to try different options
#--------------------------------------
#locator = mdates.AutoDateLocator()
locator = mdates.MonthLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
plt.show()
Here is the
output.

Related

Trying to plot Earnings and Stock price in the same graph

I'm trying to plot the stock price and the earnings on the graph but for some reason I'm getting this:
Graph1
Please see my code below:
import matplotlib.pyplot as plt
import yfinance as yf
import pandas
import pandas_datareader
import matplotlib
t = yf.Ticker("T")
df1 = t.earnings
df1['Earnings'].plot(label = 'earnings', figsize = (15,7), color='green')
print(df1)
df2 = t.history(start = '2018-01-01', end = '2021-01-01', actions = False, rounding = True)
df2['Close'].plot(label = 'price', figsize = (15,7),color = 'blue')
plt.show()
Could someone help me?
Thanks in advance.
Plotting in pandas is easy to create graphs, but if you try to overlay them with time series data, as in this example, you will encounter problems. There are many approaches, but the method that I find easiest is to convert the data level to the gregorian calendar managed by matplotlib and create the graph. Finally, you can either convert it to your preferred formatting, etc., or use the automatic formatter and locator.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import yfinance as yf
import pandas as pd
t = yf.Ticker("T")
df1 = t.earnings
df1.index = pd.to_datetime(df1.index, format='%Y')
df1.index = mdates.date2num(df1.index)
ax = df1['Earnings'].plot(label='earnings', figsize=(15, 7), color='green')
df2 = t.history(start='2018-01-01', end='2021-01-01', actions=False, rounding=True)
df2.index = mdates.date2num(df2.index)
df2['Close'].plot(label='price', ax=ax,color='blue', secondary_y=True)
#ax.set_xticklabels([x.strftime('%Y-%m') for x in mdates.num2date(df2.index)][::125])
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
plt.show()

Is there any way to call and visualize more than the column Adj Close at the same time on python?

I have this part of the code done, but I would like to be able to add more columns like the Volume, Open, High.
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
assets = ["LAC", "NIO"]
weights = np.array([0.5, 0.5])
stockStartDate = "2016-01-01"
today = datetime.today().strftime("%Y-%m-%d")
today
df = pd.DataFrame()
for stock in assets:
df[stock] = web.DataReader(stock, data_source ="yahoo", start = stockStartDate, end = today)["Adj Close"]
Not very sure you want to do, but in essence if you want to be flexible for visualizing more variables, it's easier to keep the data in a long format, with a column (i used stock below) indicating the data source :
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import datetime
assets = ["LAC", "NIO"]
stockStartDate = "2016-01-01"
today = datetime.date.today().strftime("%Y-%m-%d")
df = []
for stock in assets:
x = web.DataReader(stock, data_source ="yahoo",
start = stockStartDate, end = today)
x['stock'] = stock
df.append(x)
df = pd.concat(df)
Then pivot it on the fly to plot:
df.pivot(values='High',columns='stock').plot.line()
Or use seaborn:
import seaborn as sns
sns.lineplot(x = df.index,y = "High",hue = 'stock',data=df)

matplotlib is not automatically reading dataframe as date

Since pandas last update, the x axis is not reading the index as a date. Any clues on what changed? As an example, the following code (Source) creates a random df. The matplotlib part is exactly what I'm have been doing with my real dataset (dates in my data where made using time.strftime("%Y-%m-%d")):
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(7), freq='D')
np.random.seed(seed=1111)
data = np.random.randint(1, high=100, size=len(days))
df = pd.DataFrame({'test': days, 'col2': data})
df = df.set_index('test')
# creates graph:
import matplotlib.pyplot as plt
fig = plt.plot(df.index, df["col2"])
fig = plt.xticks(rotation=30), plt.legend(loc='best'), plt.xlabel("Weeks")
fig = plt.style.use(['bmh', 'seaborn-paper'])
fig = plt.title("Index", fontsize=14, fontweight='bold')
plt.show()
The resulting graph has the x axis in number format. Before updating, my graphs automatically had dates in the index (because the index is in date format).
Pandas used to import the units handlers for datetime64, but as of 0.21 stopped (though it may be back for 0.22). The way to get the old behaviour without explicit conversion is
from pandas.tseries import converter as pdtc
pdtc.register()
Solution 1
Use pandas .plot on the dataframe:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(7), freq='D')
np.random.seed(seed=1111)
data = np.random.randint(1, high=100, size=len(days))
df = pd.DataFrame({'test': days, 'col2': data})
df = df.set_index('test')
# creates graph:
import matplotlib.pyplot as plt
sub = df.plot()
fig = plt.xticks(rotation=30), plt.legend(loc='best'), plt.xlabel("Weeks")
fig = plt.style.use(['bmh', 'seaborn-paper'])
fig = plt.title("Index", fontsize=14, fontweight='bold')
Solution 2
Convert them Python datetime objects:
fig = plt.plot(df.index.to_pydatetime(), df["col2"])
Result of both approaches

Display datetime as day for xtick

I have the following sample codes:
import pandas as pd
import matplotlib.pyplot as plt
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007
00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
plt.plot(x,y)
I would like to have the xtick to be just 'Thu' for 01/02/2007 and 'Fri' for 02/02/2007. What is the best possible way to do that?
One possible solution is to change the X-axis format:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
y = [0.32,0.33,0.32,0.34]
fig, ax = plt.subplots()
ax.plot(x,y)
yearsFmt = mdates.DateFormatter('%a')
ax.xaxis.set_major_formatter(yearsFmt)
plt.show()
The key idea is to get the dayofweek from the DateTime object, like: x.dayofweek. This returns the numeric dayofweek. We can easily get the corresponding name np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
dates = ['01/02/2007 00:02:00','01/02/2007 00:04:00','02/02/2007 00:02:00','02/02/2007 00:04:00']
x = pd.to_datetime(dates, format='%d/%m/%Y %H:%M:%S')
x_d = np.array(['Mon','Tue','Wed','Thu','Fri','Sat', 'Sun'])[x.dayofweek]
y = [0.32,0.33,0.32,0.34]
ser = pd.Series(y, index=x_d)
ser.plot()

Modify major and minor xticks for dates

I am plotting two pandas series. The index is a date (1-1 to 12-31)
s1.plot()
s2.plot()
pd.plot() interprets the dates and assigns them to axis values as such:
I would like to modify the major ticks to be the 1st of every month and minor ticks to be the days in between
This works:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%m-%d')
s2014max = df2014.groupby(['Date'], sort=True)['Data_Value'].max()/10
s2014min = df2014.groupby(['Date'], sort=True)['Data_Value'].min()/10
#remove the leap day and convert to datetime for plotting
s2014min = s2014min[s2014min.index != '02-29']
s2014max = s2014max[s2014max.index != '02-29']
dateslist = s2014min.index.tolist()
dates = [pd.datetime.strptime(date, '%m-%d').date() for date in dateslist]
plt.figure()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_formatter(dayFmt)
ax.tick_params(direction='out', pad=15)
s2014min.plot()
s2014max.plot()
This results in no ticks:
A possible way is to use matplotlib for plotting the dates instead of pandas.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dates = pd.date_range("2016-01-01", "2016-12-31" )
y = np.cumsum(np.random.normal(size=len(dates)))
df = pd.DataFrame({"Dates" : dates, "y": y})
fig, ax = plt.subplots()
ax.plot_date(df["Dates"], df.y, '-')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
ax.xaxis.set_major_formatter(monthFmt)
plt.show()
You were so close! All you needed to do was add the formatters similar to how the other answer did it. Here is a working sample similar to your code (note I did mine in ipython notebook hence the %matplotlib inline).
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from random import random
y = [random() for i in range(25)]
x = [(datetime.now() - timedelta(days=i)) for i in range(25)]
x.reverse()
s = pd.Series(y, index=x) # NOTE: S, not df, since you said you were using series
# format the ticks
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt) # This is what you needed
ax.xaxis.set_minor_formatter(dayFmt) # This is what you needed
ax.tick_params(direction='out', pad=15)
# format the coords message box
s.plot(figsize=(10,3))
which will look like this:

Categories