I'm trying to figure out how to plot an X-axis with hourly precision (the index column has hourly values) as is in my dataframe. Currently, it just labels each month. I want one label for each Y point “close values column”.
My code now:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(80, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
Thanks to #r-begginers's comment I am able to achieve what I want. My finished code to print financial data with hourly labels for the X axis is as follows:
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
import pandas as pd
data = pd.read_csv('C:/Users/renat/.spyder-py3/1H data new.csv', index_col=0, parse_dates=True)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d %H:%M:%S')
plt.figure(figsize=(100, 8))
plt.plot_date(data.index,data['close'], linestyle='solid',xdate=True, marker=None)
days = mpl_dates.DayLocator(interval=1)
days_fmt = mpl_dates.DateFormatter('%Y-%m-%d %H:%M')
plt.gca().xaxis.set_major_locator(days)
plt.gca().xaxis.set_major_formatter(days_fmt)
plt.grid()
plt.xticks(rotation=90, fontsize=6)
plt.title('Price Chart for TEST')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.show()
I've simply replaced the three lines that were related to the formatting of the x-axis.
In:
hours = mpl_dates.HourLocator(interval=1)
hours_fmt = mpl_dates.DateFormatter('%H')
plt.gca().xaxis.set_major_locator(hours)
plt.gca().xaxis.set_major_formatter(hours_fmt)
Out:
plt.gcf().autofmt_xdate()
date_format = mpl_dates.DateFormatter('%Y-%m-%d %H')
plt.gca().xaxis.set_major_formatter(date_format)
I've also made some other changes, but they are not relevant to the question.
Thanks again to #r-begginers for pointing me in this direction.
Related
I have tried plt.gcf().autofmt_xdate() but that doesn't fix the overlapping dates on the x axis. How do I clean the x axis to every week instead of everyday?
# Convert string column into date
df['date'] = pd.to_datetime(df['date'], format = "%Y-%m-%d")
plt.figure(figsize=(14,10))
plt.title("Daily Tests")
plt.ylabel("Number of confirmed cases")
plt.xlabel("Date")
sns.barplot(x=df['date'], y=df['confirmed'])
plt.show()
The graph:
any suggestions would be appreciated.
Set the interval of mdates.DayLocator(interval=14) in order to control the time series data of x-axis. In this case, it is set to two weeks.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111)
sns.barplot(x=df['date'], y=df['confirmed'], ax=ax)
ax.set_title("Daily Tests")
ax.set_ylabel("Number of confirmed cases")
ax.set_xlabel("Date")
days = mdates.DayLocator(interval=14)
days_fmt = mdates.DateFormatter('%m-%d')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_major_formatter(days_fmt)
plt.show()
I have a plot_graph() function that plots pandas dataframe as a line chart.
def plot_graph(df):
ax = plt.gca()
#df["Date"].dt.strftime("%m/%d/%y")
#df["date"] = df["date"].astype('datetime64[ns]')
print(df['date'])
df.plot(kind='line', x='date', y='Actual', ax=ax)
df.plot(kind='line', x='date', y='Expected', color='red', ax=ax)
ax.xaxis.set_major_locator(plt.MaxNLocator(3))
plt.savefig("fig1.png")
I pass pandas dataframe in this format
date actual expected
2019-11 20 65
2019-12 35 65
When I plot the line chart, x axis labels does not get displayed correctly as in (yyyy-mm) format. I believe it is with the date format. So I tried converting it to date. I tried with all the options(commented in the code), nothing seems to work. Any suggestions would be appreicated.
Try this:
import pandas as pd
import matplotlib.dates as mdates
def plot_graph(df):
ax = plt.gca()
df['date'] = pd.to_datetime(df['date']).dt.date
df.plot(kind='line', x='date', y='actual', ax=ax)
df.plot(kind='line', x='date', y='expected', color='red', ax=ax)
ax.xaxis.set_major_locator(mdates.MonthLocator())
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) #to explicitly set format
plot_graph(df)
I think using matplotlib.dates is the best thing here, but it seems like df.plot() needs dates to be date and not datetime (or string). If you instead plot directly through matplotlib you don't need to do this. More here.
Reference Matplotlib: Date tick labels & Formatting date ticks using ConciseDateFormatter
matplotlib.dates.MonthLocator
matplotlib.dates.DateFormatter
matplotlib.axis.Axis.set_major_locator
matplotlib.axis.XAxis.set_major_formatter
Note the index column is in a datetime format. To transform your column to datetime, use df.date = pd.to_datetime(df.date)
df.plot() has tick locs like array([13136, 13152, 13174, 13175], dtype=int64). I don't actually know how those numbers are derived, but they cause an issue with some of the matplotlib axis and date formatting methods, which is why I changed the plots away from df.plot.
sns.lineplot and plt.plot have tick locs that are the ordinal representation of the datetime, array([737553., 737560., 737567., 737577., 737584., 737591., 737598., 737607.].
import pandas as pd
import numpy as np # for test data
from datetime import datetime # for test data
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
# synthetic data with date as a datetime
np.random.seed(365)
length = 700
df = pd.DataFrame(np.random.rand(length, 2) * 10, columns=['Actual', 'Expected'], index=pd.bdate_range(datetime.today(), freq='d', periods=length).tolist()).reset_index()
# display(df.head())
index Actual Expected
0 2020-07-16 9.414557 6.416027
1 2020-07-17 6.846105 5.885621
2 2020-07-18 5.438872 3.680709
3 2020-07-19 7.666258 3.050124
4 2020-07-20 4.420860 1.104433
# function
def plot_graph(df):
# df.date = pd.to_datetime(df.date) # if needed and date is the column name
fig, ax = plt.subplots()
months = mdates.MonthLocator() # every month
months_fmt = mdates.DateFormatter('%Y-%m') # format
ax.plot('index', 'Actual', data=df)
ax.plot('index', 'Expected', data=df, color='red')
# format the ticks
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(months_fmt)
plt.xticks(rotation=90)
plt.legend()
plt.show()
plot_graph(df)
I am trying to use the following code to set the x-ticks to [Jan., Feb., ...]
import matplotlib.pyplot as plt
from matplotlib.dates import MonthLocator, DateFormatter
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(np.arange(1000))
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
I get the following figure, without x-ticks
I'm wondering why all x-ticks disappeared? I wrote the above code with reference to this implementation
Many thanks.
It is not very clear the type of data you currently have. But below are my suggestions for plotting the month on the x-axis:
Transform your date using pd.to_datetime
Set it to your dataframe index.
Call explicitly the plt.set_xticks() method
Below one example with re-created data:
from datetime import datetime as dt
from datetime import timedelta
### create sample data
your_df = pd.DataFrame()
your_df['vals'] = np.arange(1000)
## make sure your datetime is considered as such by pandas
your_df['date'] = pd.to_datetime([dt.today()+timedelta(days=x) for x in range(1000)])
your_df= your_df.set_index('date') ## set it as index
### plot it
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Note that if you do not want every month plotted, you can let matplotlib handle that for you, by removing the major locator.
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(your_df['vals'])
plt.xticks(rotation='vertical')
# ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
Added Went into the link provided, and you do have a DATE field in the dataset used (boulder-precip.csv). You can actually follow the same procedure and have it plotted on a monthly-basis:
df = pd.read_csv('boulder-precip.csv')
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.set_index('DATE')
fig = plt.figure(figsize=[10, 5])
ax = fig.add_subplot(111)
ax.plot(df['PRECIP'])
plt.xticks(rotation='vertical')
ax.xaxis.set_major_locator(MonthLocator())
ax.xaxis.set_major_formatter(DateFormatter('%b'))
I have a dataframe that I want the x axis to show as APR-2018 for example. The ax.format_xdata line does not do the trick.
import datetime as dt
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
data = {("IVOG",1493510400000):{"Adj_Close":119.2136,"MA(3)":119.2136,"EWMA(3)":119.2136},
("IVOG",1496188800000):{"Adj_Close":120.8236,"MA(3)":120.0186,"EWMA(3)":120.0454},
("IVOG",1498780800000):{"Adj_Close":120.2736,"MA(3)":120.1036,"EWMA(3)":120.1266},
("IVOG",1501459200000):{"Adj_Close":121.7836,"MA(3)":120.5236,"EWMA(3)":120.5832},
("IVOG",1504137600000):{"Adj_Close":120.3536,"MA(3)":120.4896,"EWMA(3)":120.5309},
("IVOG",1506729600000):{"Adj_Close":124.3336,"MA(3)":121.1303,"EWMA(3)":121.2749}}
df=pd.DataFrame.from_dict(data, orient = 'index')
print(df)
ax = plt.gca() # get current axis
df.plot(kind='line',y='Adj_Close', ax=ax)
df.plot(kind='line',y='MA(3)',ax=ax)
df.plot(kind='line',y='EWMA(3)', color='green', ax=ax)
print(df.index[0][1])
ax.format_xdata = mdates.DateFormatter('%b-%Y') # Trying to get APR-2018
plt.xlabel(df.index[0][0]) # Trying to Get the Ticker
_=plt.grid()
_=plt.xticks(rotation=90)
plt.show()
The second index should be just the date and not time, but it incorrectly plots like this:Incorrect Plot
This should do the trick. Of course there are 'prettier' ways, but I have tried to make it so that you can keep your data and original data frame as close as to the original one in your question.
Edited after comments: so how about this, just create a new column with the date that you format in whatever shape you want. Then use set_xticklabels() passing that column to set the ticks as you want. Also you might want to remove the default plt.xlabel (otherwise you would have below your xticks the name of the indexes).
import datetime as dt
import pandas as pd
import time
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# the first part of your code is the same
data = {("IVOG",1493510400000):{"Adj_Close":119.2136,"MA(3)":119.2136,"EWMA(3)":119.2136},
("IVOG",1496188800000):{"Adj_Close":120.8236,"MA(3)":120.0186,"EWMA(3)":120.0454},
("IVOG",1498780800000):{"Adj_Close":120.2736,"MA(3)":120.1036,"EWMA(3)":120.1266},
("IVOG",1501459200000):{"Adj_Close":121.7836,"MA(3)":120.5236,"EWMA(3)":120.5832},
("IVOG",1504137600000):{"Adj_Close":120.3536,"MA(3)":120.4896,"EWMA(3)":120.5309},
("IVOG",1506729600000):{"Adj_Close":124.3336,"MA(3)":121.1303,"EWMA(3)":121.2749}}
df=pd.DataFrame.from_dict(data, orient = 'index')
# first let's give a name to the indexes
df.index.names = ['ivog', 'timestamp']
# then create a new column with a datetime object
# (formatted to microseconds as your data seems to be)
df['date'] = pd.to_datetime(df.index.levels[1],
unit='ms')
# now let's change the date to the format you want
df['date'] = df['date'].apply(lambda x: x.strftime("%Y %B"))
print(df)
# plot the data just like you were doing
ax = plt.gca() # get current axis
df.plot(kind='line',y='Adj_Close', ax=ax)
df.plot(kind='line',y='MA(3)',ax=ax)
df.plot(kind='line',y='EWMA(3)', color='green', ax=ax)
# Now the x-axis label should be what you wished for
ax.set_xticklabels(df['date'])
plt.xlabel('Your x axis label')
plt.ylabel('Your y axis label')
plt.title('My Awseome Plot')
plt.xticks(rotation=45)
trying to plot a candlestick serie after importing datas from yahoo-finance. I'm using python 2.7
I have already a serie plotted and I want to add the same one as candlestick but I don't see how I can do that :
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick2_ohlc
#Reset the index to remove Date column from index
df_ohlc = data.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Normal plot
ax1 = plt.subplot()
ax1.plot(df_ohlc["Date"], df_ohlc["Close"], label = "Price", color="blue", linewidth=2.0)
#Candle plot
candlestick2_ohlc(ax1,df_ohlc['Open'],df_ohlc['High'],df_ohlc['Low'],df_ohlc['Close'],width=0.6)
If I plot candlestick alone, it looks fine but the x axis is a list of integers.
If I plot candlestick alone after converting df_ohlc["Date"] to float then reconverting to datetime, it plots the serie with the correct x axis but there are gaps on the weekend even if the serie isn't defined for these dates.
Is there a way to plot both series at the same time ? I'm planning to add more series like moving average, OLS, Bollinger etc...
You can remove weekend gaps and make human-readable dates xticklabels in this way. Note that, this script is written in python 3 and there may be some differences from python 2.
import quandl
import numpy as np
from mpl_finance import candlestick_ohlc
import matplotlib.pyplot as plt
# getting data and modifying it to remove gaps at weekends
r = quandl.get('WIKI/AAPL', start_date='2016-01-01', end_date='2017-11-10')
date_list = np.array(r.index.to_pydatetime())
plot_array = np.zeros([len(r), 5])
plot_array[:, 0] = np.arange(plot_array.shape[0])
plot_array[:, 1:] = r.iloc[:, :4]
# plotting candlestick chart
fig, ax = plt.subplots()
num_of_bars = 100 # the number of candlesticks to be plotted
candlestick_ohlc(ax, plot_array[-num_of_bars:], colorup='g', colordown='r')
ax.margins(x=0.0, y=0.1)
ax.yaxis.tick_right()
x_tick_labels = []
ax.set_xlim(right=plot_array[-1, 0]+10)
ax.grid(True, color='k', ls='--', alpha=0.2)
# setting xticklabels actual dates instead of numbers
indices = np.linspace(plot_array[-num_of_bars, 0], plot_array[-1, 0], 8, dtype=int)
for i in indices:
date_dt = date_list[i]
date_str = date_dt.strftime('%b-%d')
x_tick_labels.append(date_str)
ax.set(xticks=indices, xticklabels=x_tick_labels)
plt.show()
I really need more information about your code and your dataframe, but you can use this example to do a candlestick
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt
#Reset the index to remove Date column from index
df_ohlc = df.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Converting dates column to float values
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
#Making plot
fig = plt.figure()
fig.autofmt_xdate()
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=6, colspan=1)
#Converts raw mdate numbers to dates
ax1.xaxis_date()
plt.xlabel("Date")
print(df_ohlc)
#Making candlestick plot
candlestick_ohlc(ax1,df_ohlc.values,width=1, colorup='g', colordown='k',alpha=0.75)
plt.ylabel("Price")
plt.legend()
plt.show()