Matplotlib axvspan() function fills wrong date - python

When I want to use axvspan to fill the dates, It fills wrong area. I think it reversed axvspans. The reason for the problem may be the dataframe that I imported from World Bank. How can I fix it? Thank you.
import matplotlib.pyplot as plt
import matplotlib
from pandas_datareader import wb
import seaborn as sns
import pandas as pd
import datetime
start = datetime.datetime (2000,1,1)
end = datetime.datetime (2021,5,1)
ind = ['FM.LBL.BMNY.ZG',
'FR.INR.DPST'
]
df = wb.download(indicator=ind, country='CHN', start=start, end=end).dropna();df.reset_index(inplace=True)
df.columns = ['Country',
'Year',
'Broad money growth (annual %) - China',
'Deposit interest rate (%)'
]
df=df.sort_values(by='Year', ascending=True)
axes= df.plot(x='Year',subplots=True, figsize=(20,12), layout=(1,2), colormap='summer', legend=True)
for ax, col in zip(axes.flatten(), df.columns):
ax.axvspan('2007-1-12', '2009-6-1', color='teal', alpha=0.5,
label='2008 Crisis')
ax.axvspan('2019-12-1', '2020-2-1', color='orange', alpha=0.5,
label='Pandemic')
ax.set_title(col)
axes[0,0].set_title('Broad money growth (annual %) - China')
axes[0,0].invert_xaxis()
axes[0,0].legend(loc='upper left')
axes[0,0].set_ylabel('Percent(Annual)')
axes[0,0].invert_xaxis()
axes[0,1].set_title('Deposit interest rate (%)')
axes[0,1].invert_xaxis()
axes[0,1].legend(loc='upper left')
axes[0,1].set_ylabel('Percent(Annual)')
axes[0,1].invert_xaxis()
plt.suptitle("Financial Sector in China",fontweight="bold")
plt.show()

Correcting xaxis dtype in pandas.plot to correct vspan locations
The year column in df is an object not an int, so the xlim is (-1, 21) and the labels are formatted on top of those index locations. Therefore, when you go to place your vspan, the x location entered does not match those of the x axis. To fix this, simply make your year column an int and make your vspan in terms of years as ints.
import matplotlib.pyplot as plt
import matplotlib
from pandas_datareader import wb
import seaborn as sns
import pandas as pd
import datetime
import matplotlib.ticker as ticker
start = datetime.datetime (2000,1,1)
end = datetime.datetime (2021,5,1)
ind = ['FM.LBL.BMNY.ZG',
'FR.INR.DPST'
]
df = wb.download(indicator=ind, country='CHN', start=start, end=end).dropna();df.reset_index(inplace=True)
df.columns = ['Country',
'Year',
'Broad money growth (annual %) - China',
'Deposit interest rate (%)'
]
df=df.sort_values(by='Year', ascending=True)
df['Year'] = df['Year'].astype(int)
axes= df.plot(x='Year',subplots=True, figsize=(15,5), layout=(1,2), colormap='summer', legend=True)
for ax, col in zip(axes.flatten(), df.columns):
ax.axvspan(2007, 2009, color='teal', alpha=0.5, label='2008 Crisis')
ax.axvspan(2019, 2020, color='orange', alpha=0.5, label='Pandemic')
ax.set_title(col)
ax.set_xlim(2000, 2021)
ax.xaxis.set_major_locator(ticker.MultipleLocator(2))
axes[0,0].set_title('Broad money growth (annual %) - China')
axes[0,0].legend(loc='upper left')
axes[0,0].set_ylabel('Percent(Annual)')
axes[0,1].set_title('Deposit interest rate (%)')
axes[0,1].legend(loc='upper left')
axes[0,1].set_ylabel('Percent(Annual)')
plt.suptitle("Financial Sector in China",fontweight="bold")

Related

How to plot each index value on the Y axis

I'm trying to figure out how to plot an X-axis with hourly precision (the index column has hourly values) as is in my dataframe. Currently, it just labels each month. I want one label for each Y point “close values column”.
My code now:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(80, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
Thanks to #r-begginers's comment I am able to achieve what I want. My finished code to print financial data with hourly labels for the X axis is as follows:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(100, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
days = mpl_dates.DayLocator(interval=1)
days_fmt = mpl_dates.DateFormatter('%Y-%m-%d %H:%M')
plt.gca().xaxis.set_major_locator(days)
plt.gca().xaxis.set_major_formatter(days_fmt)
plt.grid()
plt.xticks(rotation=90, fontsize=6)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
I've simply replaced the three lines that were related to the formatting of the x-axis.
In:
hours = mpl_dates.HourLocator(interval=1)
hours_fmt = mpl_dates.DateFormatter('%H')
plt.gca().xaxis.set_major_locator(hours)
plt.gca().xaxis.set_major_formatter(hours_fmt)
Out:
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
I've also made some other changes, but they are not relevant to the question.
Thanks again to #r-begginers for pointing me in this direction.

Unable to get the dates right in xtick labels in matplotlib

Hi I am trying to make candlestick using rectangle patch in matplotlib, but the dates on xaxis are not coming right. I had to use a number as x value to create the rectangle shape, and now I want to replace these values with corresponding dates in the dataframe index.
Heres the code:`
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
import yfinance as yf
import numpy as np
import datetime as dt
df = yf.download('adaniports.ns', '2015-01-01', '2016-01-01')
df.rename(columns={'Adj Close': 'Adj_close'}, inplace= True)
def candlestick(df, colorup= '#2bff1c', colordown= '#ff5e00'):
df.reset_index(inplace= True)
df['Days_num'] = df.Date.apply(lambda x: x - df.Date[0])
df.set_index('Date', inplace= True)
df.Days_num = df.Days_num.apply(lambda x: x/np.timedelta64(1, 'D')).astype(int)
fig, ax = plt.subplots(figsize= (12, 8))
ax.plot([df.Days_num, df.Days_num], [df.High, df.Low], color= 'k', solid_capstyle='round', linewidth= 0.8)
for x in range(len(df.index)):
if df.Close[x] > df.Open[x]:
color = colorup
else:
color = colordown
rec = mpl.patches.Rectangle((df.Days_num[x] - 0.25, df.Open[x]), 0.5, (df.Close[x] - df.Open[x]), facecolor= color,\
edgecolor= 'k', linewidth= 0.5, capstyle= 'round', zorder= 3)
ax.add_patch(rec)
ax.set_xticklabels([x.strftime('%d-%m-%y') for x in df.index.tolist()])
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.grid(True)
fig.tight_layout()
fig.autofmt_xdate()
plt.show()
candlestick(df)
Here is the output:
As you can see, the range is one year, but the x-ticks only plot for 1 month.
I know that there are libraries like mplfinance for this, but I would rather do it this way as I need to make customisations on it, which is difficult in those libraries.

Create space on right side of plot

It is hard to see the last datapoint on the chart when it is right next to the y-axis. I would like to create some space between my last data point and the right y-axis. Any idea how to create this space?
import pandas as pd
import numpy as np
import yfinance as yf
import pandas_datareader as pdr
import datetime as dt
import matplotlib.pyplot as plt
##Get stock price data
ticker = '^GSPC, AAPL'
#get data from YFinance
df = yf.download(ticker, period = "max" , interval = "1d")['Adj Close']
#Convert the 'Date' Index to 'Date' Column
df.reset_index(inplace=True)
df['GSPCpctchange'] = (df['^GSPC'] / df['^GSPC'].shift(1))-1
df['AAPLpctchange'] = (df['AAPL'] / df['AAPL'].shift(1))-1
df['10_percent_R'] = df['GSPCpctchange'].rolling(10).corr(df['AAPLpctchange'])
df['10_price_R'] = df['^GSPC'].rolling(10).corr(df['AAPL'])
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m/%d')
# Assign this as index
df.set_index(['Date'], inplace=True)
#Chart S&P500 and AAPL 10D R on one chart
plt.style.use('classic')
fig, ax1 = plt.subplots(figsize=(13,9))
ax2 = ax1.twinx()
fig.suptitle('S&P500 10D Correlation with AAPL', fontsize=16)
ax1.set_xlabel('Date')
ax1.set_ylabel('S&P500', color="blue")
ax1.tick_params(axis='y', labelcolor="blue")
ax1.plot(df.loc['2019-01-01':'2021-02-27','^GSPC'], linewidth=3, color="blue")
ax2.set_ylabel('10D AAPL Correlation', color="navy") # we already handled the x-label with ax1
ax2.tick_params(axis='y', labelcolor="navy")
ax2.plot(df.loc['2019-01-01':'2021-02-27','10_percent_R'], color="orange")
ax2.plot(df.loc['2019-01-01':'2021-02-27','10_price_R'], color="navy")
ax1.grid()
plt.legend(['Percent R','Price R'], loc="upper left")
You can try like this:
ax1.set_xlim(['2019-01-01', '2021-03-01'])

Changing the tick frequency on the x-axis

I am trying to plot a bar chart with the date vs the price of a crypto currency from a dataframe and have 731 daily samples. When i plot the graph i get the image as seen below. Due to the amount of dates the x axis is unreadable and i would like to make it so it only labels the 1st of every month on the x-axis.
This is the graph i currently have: https://imgur.com/a/QVNn4Zp
I have tried using other methods i have found online both in stackoverflow and other sources such as youtube but had no success.
This is the Code i have so far to plot the bar chart.
df.plot(kind='bar',x='Date',y='Price in USD (at 00:00:00 UTC)',color='red')
plt.show()
One option is to plot a numeric barplot with matplotlib.
Matplotlib < 3.0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
start = pd.to_datetime("5-1-2012")
idx = pd.date_range(start, periods= 365)
df = pd.DataFrame({'Date': idx, 'A':np.random.random(365)})
fig, ax = plt.subplots()
dates = mdates.date2num(df["Date"].values)
ax.bar(dates, df["A"], width=1)
loc = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(loc)
ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))
plt.show()
Matplotlib >= 3.0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.plotting.register_matplotlib_converters()
start = pd.to_datetime("5-1-2012")
idx = pd.date_range(start, periods= 365)
df = pd.DataFrame({'Date': idx, 'A':np.random.random(365)})
fig, ax = plt.subplots()
ax.bar(df["Date"], df["A"], width=1)
plt.show()
Further options:
For other options see Pandas bar plot changes date format

how to highlight weekends for time series line plot in python

I am trying to do analysis on a bike share dataset. Part of the analysis includes showing the weekends' demand in date wise plot.
My dataframe in pandas with last 5 row looks like this.
Here is my code for date vs total ride plot.
import seaborn as sns
sns.set_style("darkgrid")
plt.plot(d17_day_count)
plt.show()
.
I want to highlight weekends in the plot. So that it could look something similar to this plot.
I am using Python with matplotlib and seaborn library.
You can easily highlight areas by using axvspan, to get the areas to be highlighted you can run through the index of your dataframe and search for the weekend days. I've also added an example for highlighting 'occupied hours' during a working week (hopefully that doesn't confuse things).
I've created dummy data for a dataframe based on days and another one for hours.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# dummy data (Days)
dates_d = pd.date_range('2017-01-01', '2017-02-01', freq='D')
df = pd.DataFrame(np.random.randint(1, 20, (dates_d.shape[0], 1)))
df.index = dates_d
# dummy data (Hours)
dates_h = pd.date_range('2017-01-01', '2017-02-01', freq='H')
df_h = pd.DataFrame(np.random.randint(1, 20, (dates_h.shape[0], 1)))
df_h.index = dates_h
#two graphs
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)
#plot lines
dfs = [df, df_h]
for i, df in enumerate(dfs):
for v in df.columns.tolist():
axes[i].plot(df[v], label=v, color='black', alpha=.5)
def find_weekend_indices(datetime_array):
indices = []
for i in range(len(datetime_array)):
if datetime_array[i].weekday() >= 5:
indices.append(i)
return indices
def find_occupied_hours(datetime_array):
indices = []
for i in range(len(datetime_array)):
if datetime_array[i].weekday() < 5:
if datetime_array[i].hour >= 7 and datetime_array[i].hour <= 19:
indices.append(i)
return indices
def highlight_datetimes(indices, ax):
i = 0
while i < len(indices)-1:
ax.axvspan(df.index[indices[i]], df.index[indices[i] + 1], facecolor='green', edgecolor='none', alpha=.5)
i += 1
#find to be highlighted areas, see functions
weekend_indices = find_weekend_indices(df.index)
occupied_indices = find_occupied_hours(df_h.index)
#highlight areas
highlight_datetimes(weekend_indices, axes[0])
highlight_datetimes(occupied_indices, axes[1])
#formatting..
axes[0].xaxis.grid(b=True, which='major', color='black', linestyle='--', alpha=1) #add xaxis gridlines
axes[1].xaxis.grid(b=True, which='major', color='black', linestyle='--', alpha=1) #add xaxis gridlines
axes[0].set_xlim(min(dates_d), max(dates_d))
axes[0].set_title('Weekend days', fontsize=10)
axes[1].set_title('Occupied hours', fontsize=10)
plt.show()
I tried using the code in the accepted answer but the way the indices are used, the last weekend in the time series does not get highlighted entirely, despite what the image currently shown suggests (this is noticeable mainly with a frequency of 6 hours or more). Also, it does not work if the frequency of the data is higher than daily. This is why I share here a solution that uses the x-axis units so that weekends (or any other recurring time period) can be highlighted without any problem related to the index.
This solution takes only 6 lines of code and it works with any frequency. In the example below, it highlights full weekend days which makes it more efficient than the accepted answer where small frequencies (e.g. 30 minutes) will produce many polygons to cover the whole weekend.
The x-axis limits are used to compute the range of time covered by the plot in terms of days, which is the unit used for matplotlib dates. Then a weekends mask is computed and passed to the where argument of the fill_between plotting function. The masks are processed as right-exclusive so in this case, they must contain Mondays for the highlights to be drawn up to Mondays 00:00. Because plotting these highlights can alter the x-axis limits when weekends occur near the limits, the x-axis limits are set back to the original values after plotting.
Note that contrary to axvspan, the fill_between function needs the y1 and y2 arguments. For some reason, using the default y-axis limits leaves a small gap between the plot frame and the tops and bottoms of the weekend highlights. This issue is solved by running ax.set_ylim(*ax.get_ylim()) just after creating the plot.
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.pyplot as plt # v 3.3.2
import matplotlib.dates as mdates
# Create sample dataset
rng = np.random.default_rng(seed=1234) # random number generator
dti = pd.date_range('2017-01-01', '2017-05-15', freq='D')
counts = 5000 + np.cumsum(rng.integers(-1000, 1000, size=dti.size))
df = pd.DataFrame(dict(Counts=counts), index=dti)
# Draw pandas plot: x_compat=True converts the pandas x-axis units to matplotlib
# date units (not strictly necessary when using a daily frequency like here)
ax = df.plot(x_compat=True, figsize=(10, 5), legend=None, ylabel='Counts')
ax.set_ylim(*ax.get_ylim()) # reset y limits to display highlights without gaps
# Highlight weekends based on the x-axis units
xmin, xmax = ax.get_xlim()
days = np.arange(np.floor(xmin), np.ceil(xmax)+2)
weekends = [(dt.weekday()>=5)|(dt.weekday()==0) for dt in mdates.num2date(days)]
ax.fill_between(days, *ax.get_ylim(), where=weekends, facecolor='k', alpha=.1)
ax.set_xlim(xmin, xmax) # set limits back to default values
# Create appropriate ticks using matplotlib date tick locators and formatters
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=np.arange(5, 31, step=7)))
ax.xaxis.set_major_formatter(mdates.DateFormatter('\n%b'))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%d'))
# Additional formatting
ax.figure.autofmt_xdate(rotation=0, ha='center')
title = 'Daily count of trips with weekends highlighted from SAT 00:00 to MON 00:00'
ax.set_title(title, pad=20, fontsize=14);
As you can see, the weekends are always highlighted to the full extent, regardless of where the data starts and ends.
You can find more examples of this solution in the answers I have posted here and here.
I have another suggestion to make in this regard, which takes inspirations from previous posts by other contributors. The code is as follows:
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
rng = np.random.default_rng(seed=42) # random number generator
dti = pd.date_range('2021-08-01', '2021-08-31', freq='D')
counts = 5000 + np.cumsum(rng.integers(-1000, 1000, size=dti.size))
df = pd.DataFrame(dict(Counts=counts), index=dti)
weekends = [d for d in df.index if d.isoweekday() in [6,7]]
weekend_list = []
for weekendday in weekends:
d1 = weekendday
d2 = weekendday + datetime.timedelta(days=1)
weekend_list.append((d1, d2))
weekend_df = pd.DataFrame(weekend_list)
sns.set()
plt.figure(figsize=(15, 10), dpi=100)
df.plot()
plt.legend(bbox_to_anchor=(1.02, 0), loc="lower left", borderaxespad=0)
plt.ylabel("Counts")
plt.xlabel("Date of visit")
plt.xticks(rotation = 0)
plt.title("Daily counts of shop visits with weekends highlighted in green")
ax = plt.gca()
for d in weekend_df.index:
print(weekend_df[0][d], weekend_df[1][d])
ax.axvspan(weekend_df[0][d], weekend_df[1][d], facecolor="g", edgecolor="none", alpha=0.5)
ax.relim()
ax.autoscale_view()
plt.savefig("junk.png", dpi=100, bbox_inches='tight', pad_inches=0.2)
The result would be something like the following diagram:

Categories