How do I clean up the x axis? - python

I have tried plt.gcf().autofmt_xdate() but that doesn't fix the overlapping dates on the x axis. How do I clean the x axis to every week instead of everyday?
# Convert string column into date
df['date'] = pd.to_datetime(df['date'], format = "%Y-%m-%d")
plt.figure(figsize=(14,10))
plt.title("Daily Tests")
plt.ylabel("Number of confirmed cases")
plt.xlabel("Date")
sns.barplot(x=df['date'], y=df['confirmed'])
plt.show()
The graph:
any suggestions would be appreciated.

Set the interval of mdates.DayLocator(interval=14) in order to control the time series data of x-axis. In this case, it is set to two weeks.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111)
sns.barplot(x=df['date'], y=df['confirmed'], ax=ax)
ax.set_title("Daily Tests")
ax.set_ylabel("Number of confirmed cases")
ax.set_xlabel("Date")
days = mdates.DayLocator(interval=14)
days_fmt = mdates.DateFormatter('%m-%d')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(days_fmt)
plt.show()

Related

How to plot each index value on the Y axis

I'm trying to figure out how to plot an X-axis with hourly precision (the index column has hourly values) as is in my dataframe. Currently, it just labels each month. I want one label for each Y point “close values column”.
My code now:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(80, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
Thanks to #r-begginers's comment I am able to achieve what I want. My finished code to print financial data with hourly labels for the X axis is as follows:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(100, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
days = mpl_dates.DayLocator(interval=1)
days_fmt = mpl_dates.DateFormatter('%Y-%m-%d %H:%M')
plt.gca().xaxis.set_major_locator(days)
plt.gca().xaxis.set_major_formatter(days_fmt)
plt.grid()
plt.xticks(rotation=90, fontsize=6)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
I've simply replaced the three lines that were related to the formatting of the x-axis.
In:
hours = mpl_dates.HourLocator(interval=1)
hours_fmt = mpl_dates.DateFormatter('%H')
plt.gca().xaxis.set_major_locator(hours)
plt.gca().xaxis.set_major_formatter(hours_fmt)
Out:
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
I've also made some other changes, but they are not relevant to the question.
Thanks again to #r-begginers for pointing me in this direction.

Date does not get displayed in the desired format in line chart

I have a plot_graph() function that plots pandas dataframe as a line chart.
def plot_graph(df):
ax = plt.gca()
#df["Date"].dt.strftime("%m/%d/%y")
#df["date"] = df["date"].astype('datetime64[ns]')
print(df['date'])
df.plot(kind='line', x='date', y='Actual', ax=ax)
df.plot(kind='line', x='date', y='Expected', color='red', ax=ax)
ax.xaxis.set_major_locator(plt.MaxNLocator(3))
plt.savefig("fig1.png")
I pass pandas dataframe in this format
date actual expected
2019-11 20 65
2019-12 35 65
When I plot the line chart, x axis labels does not get displayed correctly as in (yyyy-mm) format. I believe it is with the date format. So I tried converting it to date. I tried with all the options(commented in the code), nothing seems to work. Any suggestions would be appreicated.
Try this:
import pandas as pd
import matplotlib.dates as mdates
def plot_graph(df):
ax = plt.gca()
df['date'] = pd.to_datetime(df['date']).dt.date
df.plot(kind='line', x='date', y='actual', ax=ax)
df.plot(kind='line', x='date', y='expected', color='red', ax=ax)
ax.xaxis.set_major_locator(mdates.MonthLocator())
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) #to explicitly set format
plot_graph(df)
I think using matplotlib.dates is the best thing here, but it seems like df.plot() needs dates to be date and not datetime (or string). If you instead plot directly through matplotlib you don't need to do this. More here.
Reference Matplotlib: Date tick labels & Formatting date ticks using ConciseDateFormatter
matplotlib.dates.MonthLocator
matplotlib.dates.DateFormatter
matplotlib.axis.Axis.set_major_locator
matplotlib.axis.XAxis.set_major_formatter
Note the index column is in a datetime format. To transform your column to datetime, use df.date = pd.to_datetime(df.date)
df.plot() has tick locs like array([13136, 13152, 13174, 13175], dtype=int64). I don't actually know how those numbers are derived, but they cause an issue with some of the matplotlib axis and date formatting methods, which is why I changed the plots away from df.plot.
sns.lineplot and plt.plot have tick locs that are the ordinal representation of the datetime, array([737553., 737560., 737567., 737577., 737584., 737591., 737598., 737607.].
import pandas as pd
import numpy as np # for test data
from datetime import datetime # for test data
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
# synthetic data with date as a datetime
np.random.seed(365)
length = 700
df = pd.DataFrame(np.random.rand(length, 2) * 10, columns=['Actual', 'Expected'], index=pd.bdate_range(datetime.today(), freq='d', periods=length).tolist()).reset_index()
# display(df.head())
index Actual Expected
0 2020-07-16 9.414557 6.416027
1 2020-07-17 6.846105 5.885621
2 2020-07-18 5.438872 3.680709
3 2020-07-19 7.666258 3.050124
4 2020-07-20 4.420860 1.104433
# function
def plot_graph(df):
# df.date = pd.to_datetime(df.date) # if needed and date is the column name
fig, ax = plt.subplots()
months = mdates.MonthLocator() # every month
months_fmt = mdates.DateFormatter('%Y-%m') # format
ax.plot('index', 'Actual', data=df)
ax.plot('index', 'Expected', data=df, color='red')
# format the ticks
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(months_fmt)
plt.xticks(rotation=90)
plt.legend()
plt.show()
plot_graph(df)

Python showing timespan

I created a chart where you can see the visualized data and the trend of the data.
Is it possible to cut the chart on a timespan?
This is my code for the chart
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
fig, ax = plt.subplots()
ax.grid(True)
year = mdates.YearLocator(month=1)
month = mdates.MonthLocator(interval=3)
year_format = mdates.DateFormatter('%Y')
month_format = mdates.DateFormatter('%m')
ax.xaxis.set_minor_locator(month)
ax.xaxis.grid(True, which = 'minor')
ax.xaxis.set_major_locator(year)
ax.xaxis.set_major_formatter(year_format)
plt.plot(df.index, df['JAN'], c='blue')
plt.plot(decomposition.trend.index, decomposition.trend, c='red')
I had this code to shorten the chart but I couldn´t figure out how to use it in the code above.
start_date = datetime(2004,1,1)
end_date = datetime(2008,1,1)
df[(start_date<=df.index) & (df.index<=end_date)].plot(grid='on')
You can use plt.xlim to adjust the date range,
plt.xlim([datetime(2004, 1, 1), datetime(2008, 1, 1)])
Which will give you an x-axis that looks like

How to set x-ticks to months with `set_major_locator`?

I am trying to use the following code to set the x-ticks to [Jan., Feb., ...]
import matplotlib.pyplot as plt
from matplotlib.dates import MonthLocator, DateFormatter
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(np.arange(1000))
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
I get the following figure, without x-ticks
I'm wondering why all x-ticks disappeared? I wrote the above code with reference to this implementation
Many thanks.
It is not very clear the type of data you currently have. But below are my suggestions for plotting the month on the x-axis:
Transform your date using pd.to_datetime
Set it to your dataframe index.
Call explicitly the plt.set_xticks() method
Below one example with re-created data:
from datetime import datetime as dt
from datetime import timedelta
### create sample data
your_df = pd.DataFrame()
your_df['vals'] = np.arange(1000)
## make sure your datetime is considered as such by pandas
your_df['date'] = pd.to_datetime([dt.today()+timedelta(days=x) for x in range(1000)])
your_df= your_df.set_index('date') ## set it as index
### plot it
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Note that if you do not want every month plotted, you can let matplotlib handle that for you, by removing the major locator.
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
# ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Added Went into the link provided, and you do have a DATE field in the dataset used (boulder-precip.csv). You can actually follow the same procedure and have it plotted on a monthly-basis:
df = pd.read_csv('boulder-precip.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(df['PRECIP'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))

Plotting candlestick with matplotlib for time series w/o weekend gaps

trying to plot a candlestick serie after importing datas from yahoo-finance. I'm using python 2.7
I have already a serie plotted and I want to add the same one as candlestick but I don't see how I can do that :
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick2_ohlc
#Reset the index to remove Date column from index
df_ohlc = data.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Normal plot
ax1 = plt.subplot()
ax1.plot(df_ohlc["Date"], df_ohlc["Close"], label = "Price", color="blue", linewidth=2.0)
#Candle plot
candlestick2_ohlc(ax1,df_ohlc['Open'],df_ohlc['High'],df_ohlc['Low'],df_ohlc['Close'],width=0.6)
If I plot candlestick alone, it looks fine but the x axis is a list of integers.
If I plot candlestick alone after converting df_ohlc["Date"] to float then reconverting to datetime, it plots the serie with the correct x axis but there are gaps on the weekend even if the serie isn't defined for these dates.
Is there a way to plot both series at the same time ? I'm planning to add more series like moving average, OLS, Bollinger etc...
You can remove weekend gaps and make human-readable dates xticklabels in this way. Note that, this script is written in python 3 and there may be some differences from python 2.
import quandl
import numpy as np
from mpl_finance import candlestick_ohlc
import matplotlib.pyplot as plt
# getting data and modifying it to remove gaps at weekends
r = quandl.get('WIKI/AAPL', start_date='2016-01-01', end_date='2017-11-10')
date_list = np.array(r.index.to_pydatetime())
plot_array = np.zeros([len(r), 5])
plot_array[:, 0] = np.arange(plot_array.shape[0])
plot_array[:, 1:] = r.iloc[:, :4]
# plotting candlestick chart
fig, ax = plt.subplots()
num_of_bars = 100 # the number of candlesticks to be plotted
candlestick_ohlc(ax, plot_array[-num_of_bars:], colorup='g', colordown='r')
ax.margins(x=0.0, y=0.1)
ax.yaxis.tick_right()
x_tick_labels = []
ax.set_xlim(right=plot_array[-1, 0]+10)
ax.grid(True, color='k', ls='--', alpha=0.2)
# setting xticklabels actual dates instead of numbers
indices = np.linspace(plot_array[-num_of_bars, 0], plot_array[-1, 0], 8, dtype=int)
for i in indices:
date_dt = date_list[i]
date_str = date_dt.strftime('%b-%d')
x_tick_labels.append(date_str)
ax.set(xticks=indices, xticklabels=x_tick_labels)
plt.show()
I really need more information about your code and your dataframe, but you can use this example to do a candlestick
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt
#Reset the index to remove Date column from index
df_ohlc = df.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Converting dates column to float values
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
#Making plot
fig = plt.figure()
fig.autofmt_xdate()
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=6, colspan=1)
#Converts raw mdate numbers to dates
ax1.xaxis_date()
plt.xlabel("Date")
print(df_ohlc)
#Making candlestick plot
candlestick_ohlc(ax1,df_ohlc.values,width=1, colorup='g', colordown='k',alpha=0.75)
plt.ylabel("Price")
plt.legend()
plt.show()

Categories