candlestick plot from pandas dataframe, replace index by dates - python

This code gives plot of candlesticks with moving averages but the x-axis is in index, I need the x-axis in dates.
What changes are required?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
#date format in data-> dd-mm-yyyy
nif = pd.read_csv('data.csv')
#nif['Date'] = pd.to_datetime(nif['Date'], format='%d-%m-%Y', utc=True)
mavg = nif['Close'].ewm(span=50).mean()
mavg1 = nif['Close'].ewm(span=13).mean()
fg, ax1 = plt.subplots()
cl = candlestick2_ohlc(ax=ax1,opens=nif['Open'],highs=nif['High'],lows=nif['Low'],closes=nif['Close'],width=0.4, colorup='#77d879', colordown='#db3f3f')
mavg.plot(ax=ax1,label='50_ema')
mavg1.plot(color='k',ax=ax1, label='13_ema')
plt.legend(loc=4)
plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0)
plt.show()
Output:

I also had a lot of "fun" with this in the past... Here is one way of doing it using mdates:
import pandas as pd
import pandas_datareader.data as web
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
ticker = 'MCD'
start = dt.date(2014, 1, 1)
#Gathering the data
data = web.DataReader(ticker, 'yahoo', start)
#Calc moving average
data['MA10'] = data['Adj Close'].rolling(window=10).mean()
data['MA60'] = data['Adj Close'].rolling(window=60).mean()
data.reset_index(inplace=True)
data['Date']=mdates.date2num(data['Date'].astype(dt.date))
#Plot candlestick chart
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = fig.add_subplot(111)
ax3 = fig.add_subplot(111)
ax1.xaxis_date()
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y'))
ax2.plot(data.Date, data['MA10'], label='MA_10')
ax3.plot(data.Date, data['MA60'], label='MA_60')
plt.ylabel("Price")
plt.title(ticker)
ax1.grid(True)
plt.legend(loc='best')
plt.xticks(rotation=45)
candlestick_ohlc(ax1, data.values, width=0.6, colorup='g', colordown='r')
plt.show()
Output:
Hope this helps.

Simple df:
Using plotly:
import plotly.figure_factory
fig = plotly.figure_factory.create_candlestick(df.open, df.high, df.low, df.close, dates=df.ts)
fig.show()
will automatically parse the ts column to be displayed correctly on x.

Clunky workaround here, derived from other post (if i can find again, will reference). Using a pandas df, plot by index and then reference xaxis tick labels to date strings for display. Am new to python / matplotlib, and this this solution is not so flexible, but it works basically. Also using a pd index for plotting removes the blank 'weekend' daily spaces on market price data.
Matplotlib xaxis index as dates
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_finance import candlestick2_ohlc
from mpl_finance import candlestick_ohlc
%matplotlib notebook # for Jupyter
# Format m/d/Y,Open,High,Low,Close,Adj Close,Volume
# csv data does not include NaN, or 'weekend' lines,
# only dates from which prices are recorded
DJIA = pd.read_csv('yourFILE.csv') #Format m/d/Y,Open,High,
Low,Close,Adj Close,Volume
print(DJIA.head())
fg, ax1 = plt.subplots()
cl =candlestick2_ohlc(ax=ax1,opens=DJIA['Open'],
highs=DJIA['High'],lows=DJIA['Low'],
closes=DJIA['Close'],width=0.4, colorup='#77d879',
colordown='#db3f3f')
ax1.set_xticks(np.arange(len(DJIA)))
ax1.set_xticklabels(DJIA['Date'], fontsize=6, rotation=-90)
plt.show()

Related

Plotting more than 10K data point using Seaborn for x-axis as timestamp

I am trying to plot more than 10k data points, where I want to plot a data properties versus Timestamp. But on the x-axis the timestamps are overlapping and not visible.
How can I reduce the amount of labels on the x-axis, so that they are legible?
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
data = pd.read_csv('0912Testday4.csv',header=2)
for i in data.columns:
if i!='TIMESTAMP':
sns.lineplot(x="TIMESTAMP",y=i,data = data)
plt.title(f"{i} vs TIMESTAMP")
plt.show()
Example plot demonstrating the problem:
Update:TIMESTAMP was in string format by converting into datatime format it resolves the problem.
data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'])
Update:TIMESTAMP was in string format by converting into datetime format it resolves the problem.
data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'])
Please make sure that TIMESTAMP is a datetime object. This should not happen when the x axis is a datetime. (You can use pd.to_datetime to convert int, float, str, and ... to datetime.)
If TIMESTAMP is a datetime, you can use the autofmt_xdate() method:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots() # Create a figure and a set of subplots.
sns.set_style("whitegrid")
data = pd.read_csv('0912Testday4.csv',header=2)
# Use the following line if the TIMESTAMP is not a datetime.
# (You may need to change the format from "%Y-%m-%d %H:%M:%S+00:00".)
# data['TIMESTAMP'] = pd.to_datetime(data.TIMESTAMP, format="%Y-%m-%d %H:%M:%S+00:00")
for i in data.columns:
if i!='TIMESTAMP':
sns.lineplot(x="TIMESTAMP", y=i, data=data, ax=ax)
fig.autofmt_xdate() # rotate and right align date ticklabels
plt.title(f"{i} vs TIMESTAMP")
plt.show()
I didn't encounter such problem with sns.lineplot
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
# example data
time_stamps = pd.date_range('2019-01-01', '2020-01-01', freq='H')
vals =[np.random.randint(0, 1000) for i in time_stamps]
data_df = pd.DataFrame()
data_df['time'] = time_stamps
data_df['value'] = vals
print(data_df.shape)
# plotting
fig, ax = plt.subplots()
sns.lineplot(x='time', y='value', data=data_df)
plt.show()
sns automatically selects the x ticks and x labels.
alternatively, you can use ax.set_xticks and ax.set_xlabels to set the x ticks and x labels manually.
Also you may use fig.autofmt_xdate() to rotate the x labels

Changing the tick frequency on the x-axis

I am trying to plot a bar chart with the date vs the price of a crypto currency from a dataframe and have 731 daily samples. When i plot the graph i get the image as seen below. Due to the amount of dates the x axis is unreadable and i would like to make it so it only labels the 1st of every month on the x-axis.
This is the graph i currently have: https://imgur.com/a/QVNn4Zp
I have tried using other methods i have found online both in stackoverflow and other sources such as youtube but had no success.
This is the Code i have so far to plot the bar chart.
df.plot(kind='bar',x='Date',y='Price in USD (at 00:00:00 UTC)',color='red')
plt.show()
One option is to plot a numeric barplot with matplotlib.
Matplotlib < 3.0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
start = pd.to_datetime("5-1-2012")
idx = pd.date_range(start, periods= 365)
df = pd.DataFrame({'Date': idx, 'A':np.random.random(365)})
fig, ax = plt.subplots()
dates = mdates.date2num(df["Date"].values)
ax.bar(dates, df["A"], width=1)
loc = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(loc)
ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))
plt.show()
Matplotlib >= 3.0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.plotting.register_matplotlib_converters()
start = pd.to_datetime("5-1-2012")
idx = pd.date_range(start, periods= 365)
df = pd.DataFrame({'Date': idx, 'A':np.random.random(365)})
fig, ax = plt.subplots()
ax.bar(df["Date"], df["A"], width=1)
plt.show()
Further options:
For other options see Pandas bar plot changes date format

Plotting candlestick with matplotlib for time series w/o weekend gaps

trying to plot a candlestick serie after importing datas from yahoo-finance. I'm using python 2.7
I have already a serie plotted and I want to add the same one as candlestick but I don't see how I can do that :
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick2_ohlc
#Reset the index to remove Date column from index
df_ohlc = data.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Normal plot
ax1 = plt.subplot()
ax1.plot(df_ohlc["Date"], df_ohlc["Close"], label = "Price", color="blue", linewidth=2.0)
#Candle plot
candlestick2_ohlc(ax1,df_ohlc['Open'],df_ohlc['High'],df_ohlc['Low'],df_ohlc['Close'],width=0.6)
If I plot candlestick alone, it looks fine but the x axis is a list of integers.
If I plot candlestick alone after converting df_ohlc["Date"] to float then reconverting to datetime, it plots the serie with the correct x axis but there are gaps on the weekend even if the serie isn't defined for these dates.
Is there a way to plot both series at the same time ? I'm planning to add more series like moving average, OLS, Bollinger etc...
You can remove weekend gaps and make human-readable dates xticklabels in this way. Note that, this script is written in python 3 and there may be some differences from python 2.
import quandl
import numpy as np
from mpl_finance import candlestick_ohlc
import matplotlib.pyplot as plt
# getting data and modifying it to remove gaps at weekends
r = quandl.get('WIKI/AAPL', start_date='2016-01-01', end_date='2017-11-10')
date_list = np.array(r.index.to_pydatetime())
plot_array = np.zeros([len(r), 5])
plot_array[:, 0] = np.arange(plot_array.shape[0])
plot_array[:, 1:] = r.iloc[:, :4]
# plotting candlestick chart
fig, ax = plt.subplots()
num_of_bars = 100 # the number of candlesticks to be plotted
candlestick_ohlc(ax, plot_array[-num_of_bars:], colorup='g', colordown='r')
ax.margins(x=0.0, y=0.1)
ax.yaxis.tick_right()
x_tick_labels = []
ax.set_xlim(right=plot_array[-1, 0]+10)
ax.grid(True, color='k', ls='--', alpha=0.2)
# setting xticklabels actual dates instead of numbers
indices = np.linspace(plot_array[-num_of_bars, 0], plot_array[-1, 0], 8, dtype=int)
for i in indices:
date_dt = date_list[i]
date_str = date_dt.strftime('%b-%d')
x_tick_labels.append(date_str)
ax.set(xticks=indices, xticklabels=x_tick_labels)
plt.show()
I really need more information about your code and your dataframe, but you can use this example to do a candlestick
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.finance import candlestick_ohlc
import matplotlib.dates as mdates
import datetime as dt
#Reset the index to remove Date column from index
df_ohlc = df.reset_index()
#Naming columns
df_ohlc.columns = ["Date","Open","High",'Low',"Close", "Adj Close", "Volume"]
#Converting dates column to float values
df_ohlc['Date'] = df_ohlc['Date'].map(mdates.date2num)
#Making plot
fig = plt.figure()
fig.autofmt_xdate()
ax1 = plt.subplot2grid((6,1), (0,0), rowspan=6, colspan=1)
#Converts raw mdate numbers to dates
ax1.xaxis_date()
plt.xlabel("Date")
print(df_ohlc)
#Making candlestick plot
candlestick_ohlc(ax1,df_ohlc.values,width=1, colorup='g', colordown='k',alpha=0.75)
plt.ylabel("Price")
plt.legend()
plt.show()

Modify major and minor xticks for dates

I am plotting two pandas series. The index is a date (1-1 to 12-31)
s1.plot()
s2.plot()
pd.plot() interprets the dates and assigns them to axis values as such:
I would like to modify the major ticks to be the 1st of every month and minor ticks to be the days in between
This works:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv('data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%m-%d')
s2014max = df2014.groupby(['Date'], sort=True)['Data_Value'].max()/10
s2014min = df2014.groupby(['Date'], sort=True)['Data_Value'].min()/10
#remove the leap day and convert to datetime for plotting
s2014min = s2014min[s2014min.index != '02-29']
s2014max = s2014max[s2014max.index != '02-29']
dateslist = s2014min.index.tolist()
dates = [pd.datetime.strptime(date, '%m-%d').date() for date in dateslist]
plt.figure()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_formatter(dayFmt)
ax.tick_params(direction='out', pad=15)
s2014min.plot()
s2014max.plot()
This results in no ticks:
A possible way is to use matplotlib for plotting the dates instead of pandas.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
dates = pd.date_range("2016-01-01", "2016-12-31" )
y = np.cumsum(np.random.normal(size=len(dates)))
df = pd.DataFrame({"Dates" : dates, "y": y})
fig, ax = plt.subplots()
ax.plot_date(df["Dates"], df.y, '-')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
ax.xaxis.set_major_formatter(monthFmt)
plt.show()
You were so close! All you needed to do was add the formatters similar to how the other answer did it. Here is a working sample similar to your code (note I did mine in ipython notebook hence the %matplotlib inline).
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timedelta
from random import random
y = [random() for i in range(25)]
x = [(datetime.now() - timedelta(days=i)) for i in range(25)]
x.reverse()
s = pd.Series(y, index=x) # NOTE: S, not df, since you said you were using series
# format the ticks
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_minor_locator(mdates.DayLocator())
monthFmt = mdates.DateFormatter('%b')
dayFmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(monthFmt) # This is what you needed
ax.xaxis.set_minor_formatter(dayFmt) # This is what you needed
ax.tick_params(direction='out', pad=15)
# format the coords message box
s.plot(figsize=(10,3))
which will look like this:

ploting subplot in matplotlib with pandas issue

i am try to plot subplot in matplotlib with pandas but there are issue i am facing. when i am plot subplot not show the date of stock...there is my program
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import pandas.io.data
df = pd.io.data.get_data_yahoo('goog', start=datetime.datetime(2008,1,1),end=datetime.datetime(2014,10,23))
fig = plt.figure()
r = fig.patch
r.set_facecolor('#0070BB')
ax1 = fig.add_subplot(2,1,1, axisbg='#0070BB')
ax1.grid(True)
ax1.plot(df['Close'])
ax2 = fig.add_subplot(2,1,2, axisbg='#0070BB')
ax2.plot(df['Volume'])
plt.show()
run this program own your self and solve date issue.....
When you're calling matplotlib's plot(), you are only giving it one array (e.g. df['Close'] in the first case). When there's only one array, matplotlib doesn't know what to use for the x axis data, so it just uses the index of the array. This is why your x axis shows the numbers 0 to 160: there are presumably 160 items in your array.
Use ax1.plot(df.index, df['Close']) instead, since df.index should hold the date values in your pandas dataframe.
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import pandas.io.data
df = pd.io.data.get_data_yahoo('goog', start=datetime.datetime(2008,1,1),end=datetime.datetime(2014,10,23))
fig = plt.figure()
r = fig.patch
r.set_facecolor('#0070BB')
ax1 = fig.add_subplot(2,1,1, axisbg='#0070BB')
ax1.grid(True)
ax1.plot(df.index, df['Close'])
ax2 = fig.add_subplot(2,1,2, axisbg='#0070BB')
ax2.plot(df.index, df['Volume'])
plt.show()

Categories