Plot pandas dataframe index formatted as Month-Year on x axis - python

I have a dataframe that I want the x axis to show as APR-2018 for example. The ax.format_xdata line does not do the trick.
import datetime as dt
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
data = {("IVOG",1493510400000):{"Adj_Close":119.2136,"MA(3)":119.2136,"EWMA(3)":119.2136},
("IVOG",1496188800000):{"Adj_Close":120.8236,"MA(3)":120.0186,"EWMA(3)":120.0454},
("IVOG",1498780800000):{"Adj_Close":120.2736,"MA(3)":120.1036,"EWMA(3)":120.1266},
("IVOG",1501459200000):{"Adj_Close":121.7836,"MA(3)":120.5236,"EWMA(3)":120.5832},
("IVOG",1504137600000):{"Adj_Close":120.3536,"MA(3)":120.4896,"EWMA(3)":120.5309},
("IVOG",1506729600000):{"Adj_Close":124.3336,"MA(3)":121.1303,"EWMA(3)":121.2749}}
df=pd.DataFrame.from_dict(data, orient = 'index')
print(df)
ax = plt.gca() # get current axis
df.plot(kind='line',y='Adj_Close', ax=ax)
df.plot(kind='line',y='MA(3)',ax=ax)
df.plot(kind='line',y='EWMA(3)', color='green', ax=ax)
print(df.index[0][1])
ax.format_xdata = mdates.DateFormatter('%b-%Y') # Trying to get APR-2018
plt.xlabel(df.index[0][0]) # Trying to Get the Ticker
_=plt.grid()
_=plt.xticks(rotation=90)
plt.show()
The second index should be just the date and not time, but it incorrectly plots like this:Incorrect Plot

This should do the trick. Of course there are 'prettier' ways, but I have tried to make it so that you can keep your data and original data frame as close as to the original one in your question.
Edited after comments: so how about this, just create a new column with the date that you format in whatever shape you want. Then use set_xticklabels() passing that column to set the ticks as you want. Also you might want to remove the default plt.xlabel (otherwise you would have below your xticks the name of the indexes).
import datetime as dt
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# the first part of your code is the same
data = {("IVOG",1493510400000):{"Adj_Close":119.2136,"MA(3)":119.2136,"EWMA(3)":119.2136},
("IVOG",1496188800000):{"Adj_Close":120.8236,"MA(3)":120.0186,"EWMA(3)":120.0454},
("IVOG",1498780800000):{"Adj_Close":120.2736,"MA(3)":120.1036,"EWMA(3)":120.1266},
("IVOG",1501459200000):{"Adj_Close":121.7836,"MA(3)":120.5236,"EWMA(3)":120.5832},
("IVOG",1504137600000):{"Adj_Close":120.3536,"MA(3)":120.4896,"EWMA(3)":120.5309},
("IVOG",1506729600000):{"Adj_Close":124.3336,"MA(3)":121.1303,"EWMA(3)":121.2749}}
df=pd.DataFrame.from_dict(data, orient = 'index')
# first let's give a name to the indexes
df.index.names = ['ivog', 'timestamp']
# then create a new column with a datetime object
# (formatted to microseconds as your data seems to be)
df['date'] = pd.to_datetime(df.index.levels[1],
unit='ms')
# now let's change the date to the format you want
df['date'] = df['date'].apply(lambda x: x.strftime("%Y %B"))
print(df)
# plot the data just like you were doing
ax = plt.gca() # get current axis
df.plot(kind='line',y='Adj_Close', ax=ax)
df.plot(kind='line',y='MA(3)',ax=ax)
df.plot(kind='line',y='EWMA(3)', color='green', ax=ax)
# Now the x-axis label should be what you wished for
ax.set_xticklabels(df['date'])
plt.xlabel('Your x axis label')
plt.ylabel('Your y axis label')
plt.title('My Awseome Plot')
plt.xticks(rotation=45)

Related

Plotting more than 10K data point using Seaborn for x-axis as timestamp

I am trying to plot more than 10k data points, where I want to plot a data properties versus Timestamp. But on the x-axis the timestamps are overlapping and not visible.
How can I reduce the amount of labels on the x-axis, so that they are legible?
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
data = pd.read_csv('0912Testday4.csv',header=2)
for i in data.columns:
if i!='TIMESTAMP':
sns.lineplot(x="TIMESTAMP",y=i,data = data)
plt.title(f"{i} vs TIMESTAMP")
plt.show()
Example plot demonstrating the problem:
Update:TIMESTAMP was in string format by converting into datatime format it resolves the problem.
data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'])
Update:TIMESTAMP was in string format by converting into datetime format it resolves the problem.
data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'])
Please make sure that TIMESTAMP is a datetime object. This should not happen when the x axis is a datetime. (You can use pd.to_datetime to convert int, float, str, and ... to datetime.)
If TIMESTAMP is a datetime, you can use the autofmt_xdate() method:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots() # Create a figure and a set of subplots.
sns.set_style("whitegrid")
data = pd.read_csv('0912Testday4.csv',header=2)
# Use the following line if the TIMESTAMP is not a datetime.
# (You may need to change the format from "%Y-%m-%d %H:%M:%S+00:00".)
# data['TIMESTAMP'] = pd.to_datetime(data.TIMESTAMP, format="%Y-%m-%d %H:%M:%S+00:00")
for i in data.columns:
if i!='TIMESTAMP':
sns.lineplot(x="TIMESTAMP", y=i, data=data, ax=ax)
fig.autofmt_xdate() # rotate and right align date ticklabels
plt.title(f"{i} vs TIMESTAMP")
plt.show()
I didn't encounter such problem with sns.lineplot
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
# example data
time_stamps = pd.date_range('2019-01-01', '2020-01-01', freq='H')
vals =[np.random.randint(0, 1000) for i in time_stamps]
data_df = pd.DataFrame()
data_df['time'] = time_stamps
data_df['value'] = vals
print(data_df.shape)
# plotting
fig, ax = plt.subplots()
sns.lineplot(x='time', y='value', data=data_df)
plt.show()
sns automatically selects the x ticks and x labels.
alternatively, you can use ax.set_xticks and ax.set_xlabels to set the x ticks and x labels manually.
Also you may use fig.autofmt_xdate() to rotate the x labels

Date does not get displayed in the desired format in line chart

I have a plot_graph() function that plots pandas dataframe as a line chart.
def plot_graph(df):
ax = plt.gca()
#df["Date"].dt.strftime("%m/%d/%y")
#df["date"] = df["date"].astype('datetime64[ns]')
print(df['date'])
df.plot(kind='line', x='date', y='Actual', ax=ax)
df.plot(kind='line', x='date', y='Expected', color='red', ax=ax)
ax.xaxis.set_major_locator(plt.MaxNLocator(3))
plt.savefig("fig1.png")
I pass pandas dataframe in this format
date actual expected
2019-11 20 65
2019-12 35 65
When I plot the line chart, x axis labels does not get displayed correctly as in (yyyy-mm) format. I believe it is with the date format. So I tried converting it to date. I tried with all the options(commented in the code), nothing seems to work. Any suggestions would be appreicated.
Try this:
import pandas as pd
import matplotlib.dates as mdates
def plot_graph(df):
ax = plt.gca()
df['date'] = pd.to_datetime(df['date']).dt.date
df.plot(kind='line', x='date', y='actual', ax=ax)
df.plot(kind='line', x='date', y='expected', color='red', ax=ax)
ax.xaxis.set_major_locator(mdates.MonthLocator())
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) #to explicitly set format
plot_graph(df)
I think using matplotlib.dates is the best thing here, but it seems like df.plot() needs dates to be date and not datetime (or string). If you instead plot directly through matplotlib you don't need to do this. More here.
Reference Matplotlib: Date tick labels & Formatting date ticks using ConciseDateFormatter
matplotlib.dates.MonthLocator
matplotlib.dates.DateFormatter
matplotlib.axis.Axis.set_major_locator
matplotlib.axis.XAxis.set_major_formatter
Note the index column is in a datetime format. To transform your column to datetime, use df.date = pd.to_datetime(df.date)
df.plot() has tick locs like array([13136, 13152, 13174, 13175], dtype=int64). I don't actually know how those numbers are derived, but they cause an issue with some of the matplotlib axis and date formatting methods, which is why I changed the plots away from df.plot.
sns.lineplot and plt.plot have tick locs that are the ordinal representation of the datetime, array([737553., 737560., 737567., 737577., 737584., 737591., 737598., 737607.].
import pandas as pd
import numpy as np # for test data
from datetime import datetime # for test data
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
# synthetic data with date as a datetime
np.random.seed(365)
length = 700
df = pd.DataFrame(np.random.rand(length, 2) * 10, columns=['Actual', 'Expected'], index=pd.bdate_range(datetime.today(), freq='d', periods=length).tolist()).reset_index()
# display(df.head())
index Actual Expected
0 2020-07-16 9.414557 6.416027
1 2020-07-17 6.846105 5.885621
2 2020-07-18 5.438872 3.680709
3 2020-07-19 7.666258 3.050124
4 2020-07-20 4.420860 1.104433
# function
def plot_graph(df):
# df.date = pd.to_datetime(df.date) # if needed and date is the column name
fig, ax = plt.subplots()
months = mdates.MonthLocator() # every month
months_fmt = mdates.DateFormatter('%Y-%m') # format
ax.plot('index', 'Actual', data=df)
ax.plot('index', 'Expected', data=df, color='red')
# format the ticks
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(months_fmt)
plt.xticks(rotation=90)
plt.legend()
plt.show()
plot_graph(df)

How to set x-ticks to months with `set_major_locator`?

I am trying to use the following code to set the x-ticks to [Jan., Feb., ...]
import matplotlib.pyplot as plt
from matplotlib.dates import MonthLocator, DateFormatter
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(np.arange(1000))
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
I get the following figure, without x-ticks
I'm wondering why all x-ticks disappeared? I wrote the above code with reference to this implementation
Many thanks.
It is not very clear the type of data you currently have. But below are my suggestions for plotting the month on the x-axis:
Transform your date using pd.to_datetime
Set it to your dataframe index.
Call explicitly the plt.set_xticks() method
Below one example with re-created data:
from datetime import datetime as dt
from datetime import timedelta
### create sample data
your_df = pd.DataFrame()
your_df['vals'] = np.arange(1000)
## make sure your datetime is considered as such by pandas
your_df['date'] = pd.to_datetime([dt.today()+timedelta(days=x) for x in range(1000)])
your_df= your_df.set_index('date') ## set it as index
### plot it
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Note that if you do not want every month plotted, you can let matplotlib handle that for you, by removing the major locator.
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
# ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Added Went into the link provided, and you do have a DATE field in the dataset used (boulder-precip.csv). You can actually follow the same procedure and have it plotted on a monthly-basis:
df = pd.read_csv('boulder-precip.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(df['PRECIP'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))

datetime x-axis matplotlib labels causing uncontrolled overlap

I'm trying to plot a pandas series with a 'pandas.tseries.index.DatetimeIndex'. The x-axis label stubbornly overlap, and I cannot make them presentable, even with several suggested solutions.
I tried stackoverflow solution suggesting to use autofmt_xdate but it doesn't help.
I also tried the suggestion to plt.tight_layout(), which fails to make an effect.
ax = test_df[(test_df.index.year ==2017) ]['error'].plot(kind="bar")
ax.figure.autofmt_xdate()
#plt.tight_layout()
print(type(test_df[(test_df.index.year ==2017) ]['error'].index))
UPDATE: That I'm using a bar chart is an issue. A regular time-series plot shows nicely-managed labels.
A pandas bar plot is a categorical plot. It shows one bar for each index at integer positions on the scale. Hence the first bar is at position 0, the next at 1 etc. The labels correspond to the dataframes' index. If you have 100 bars, you'll end up with 100 labels. This makes sense because pandas cannot know if those should be treated as categories or ordinal/numeric data.
If instead you use a normal matplotlib bar plot, it will treat the dataframe index numerically. This means the bars have their position according to the actual dates and labels are placed according to the automatic ticker.
import pandas as pd
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=42).tolist()
df = pd.DataFrame(np.cumsum(np.random.randn(42)),
columns=['error'], index=pd.to_datetime(datelist))
plt.bar(df.index, df["error"].values)
plt.gcf().autofmt_xdate()
plt.show()
The advantage is then in addition that matplotlib.dates locators and formatters can be used. E.g. to label each first and fifteenth of a month with a custom format,
import pandas as pd
import numpy as np; np.random.seed(42)
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=93).tolist()
df = pd.DataFrame(np.cumsum(np.random.randn(93)),
columns=['error'], index=pd.to_datetime(datelist))
plt.bar(df.index, df["error"].values)
plt.gca().xaxis.set_major_locator(mdates.DayLocator((1,15)))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d %b %Y"))
plt.gcf().autofmt_xdate()
plt.show()
In your situation, the easiest would be to manually create labels and spacing, and apply that using ax.xaxis.set_major_formatter.
Here's a possible solution:
Since no sample data was provided, I tried to mimic the structure of your dataset in a dataframe with some random numbers.
The setup:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
# A dataframe with random numbers ro run tests on
np.random.seed(123456)
rows = 100
df = pd.DataFrame(np.random.randint(-10,10,size=(rows, 1)), columns=['error'])
datelist = pd.date_range(pd.datetime(2017, 1, 1).strftime('%Y-%m-%d'), periods=rows).tolist()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
test_df = df.copy(deep = True)
# Plot of data that mimics the structure of your dataset
ax = test_df[(test_df.index.year ==2017) ]['error'].plot(kind="bar")
ax.figure.autofmt_xdate()
plt.figure(figsize=(15,8))
A possible solution:
test_df = df.copy(deep = True)
ax = test_df[(test_df.index.year ==2017) ]['error'].plot(kind="bar")
plt.figure(figsize=(15,8))
# Make a list of empty myLabels
myLabels = ['']*len(test_df.index)
# Set labels on every 20th element in myLabels
myLabels[::20] = [item.strftime('%Y - %m') for item in test_df.index[::20]]
ax.xaxis.set_major_formatter(ticker.FixedFormatter(myLabels))
plt.gcf().autofmt_xdate()
# Tilt the labels
plt.setp(ax.get_xticklabels(), rotation=30, fontsize=10)
plt.show()
You can easily change the formatting of labels by checking strftime.org

Plotting candlestick with matplotlib for time series w/o weekend gaps

trying to plot a candlestick serie after importing datas from yahoo-finance. I'm using python 2.7
I have already a serie plotted and I want to add the same one as candlestick but I don't see how I can do that :
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick2_ohlc
#Reset the index to remove Date column from index
df_ohlc = data.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Normal plot
ax1 = plt.subplot()
ax1.plot(df_ohlc["Date"], df_ohlc["Close"], label = "Price", color="blue", linewidth=2.0)
#Candle plot
candlestick2_ohlc(ax1,df_ohlc['Open'],df_ohlc['High'],df_ohlc['Low'],df_ohlc['Close'],width=0.6)
If I plot candlestick alone, it looks fine but the x axis is a list of integers.
If I plot candlestick alone after converting df_ohlc["Date"] to float then reconverting to datetime, it plots the serie with the correct x axis but there are gaps on the weekend even if the serie isn't defined for these dates.
Is there a way to plot both series at the same time ? I'm planning to add more series like moving average, OLS, Bollinger etc...
You can remove weekend gaps and make human-readable dates xticklabels in this way. Note that, this script is written in python 3 and there may be some differences from python 2.
import quandl
import numpy as np
from mpl_finance import candlestick_ohlc
import matplotlib.pyplot as plt
# getting data and modifying it to remove gaps at weekends
r = quandl.get('WIKI/AAPL', start_date='2016-01-01', end_date='2017-11-10')
date_list = np.array(r.index.to_pydatetime())
plot_array = np.zeros([len(r), 5])
plot_array[:, 0] = np.arange(plot_array.shape[0])
plot_array[:, 1:] = r.iloc[:, :4]
# plotting candlestick chart
fig, ax = plt.subplots()
num_of_bars = 100 # the number of candlesticks to be plotted
candlestick_ohlc(ax, plot_array[-num_of_bars:], colorup='g', colordown='r')
ax.margins(x=0.0, y=0.1)
ax.yaxis.tick_right()
x_tick_labels = []
ax.set_xlim(right=plot_array[-1, 0]+10)
ax.grid(True, color='k', ls='--', alpha=0.2)
# setting xticklabels actual dates instead of numbers
indices = np.linspace(plot_array[-num_of_bars, 0], plot_array[-1, 0], 8, dtype=int)
for i in indices:
date_dt = date_list[i]
date_str = date_dt.strftime('%b-%d')
x_tick_labels.append(date_str)
ax.set(xticks=indices, xticklabels=x_tick_labels)
plt.show()
I really need more information about your code and your dataframe, but you can use this example to do a candlestick
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt
#Reset the index to remove Date column from index
df_ohlc = df.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Converting dates column to float values
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
#Making plot
fig = plt.figure()
fig.autofmt_xdate()
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=6, colspan=1)
#Converts raw mdate numbers to dates
ax1.xaxis_date()
plt.xlabel("Date")
print(df_ohlc)
#Making candlestick plot
candlestick_ohlc(ax1,df_ohlc.values,width=1, colorup='g', colordown='k',alpha=0.75)
plt.ylabel("Price")
plt.legend()
plt.show()

Categories