I'm trying to make a cryptoscanner but I'm struggling a bit. The code right now can loop through the different coins in symbols.csv and print plots for all of them. The plots include close price, SMA and bollinger bands. Now I really want the close price to be candlesticks and not a line. I've found that there are other plots like mpf to make candlesticks. The problem is that I don't know how to make the bollinger bands work with the mpf plots and I don't know how to make the candlesticks work with matplotlib. Can someone help me making candlesticks in matplotlib orrr make the bollingerbands in the mpf plots.
Thanks in advance!
The graph looks like this right now
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
with open('symbols.csv') as f:
symbols = f.read().splitlines()
for symbol in symbols:
df = yf.download(symbol, start='2020-01-01')
# df = yf.download(symbol, period = '22h', interval = '15m')
# df = yf.download('ADA-USD', start='2021-01-01')
df['SMA'] = df.Close.rolling(window=20).mean()
df['stddev'] = df.Close.rolling(window=20).std()
df['Upper'] = df.SMA + 2* df.stddev
df['Lower'] = df.SMA - 2* df.stddev
df['Buy_Signal'] = np.where(df.Lower > df.Close, True, False)
df['Sell_Signal'] = np.where(df.Upper < df.Close, True, False)
buys = []
sells = []
open_pos = False
for i in range(len(df)):
if df.Lower[i] > df.Close[i]:
if open_pos == False:
open_pos = True
elif df.Upper[i] < df.Close[i]:
if open_pos:
open_pos = False
plt.figure(figsize=(12, 6))
plt.scatter(df.iloc[buys].index, df.iloc[buys].Close, marker = '^', color ='g')
plt.scatter(df.iloc[sells].index, df.iloc[sells].Close, marker = '^', color ='r')
plt.plot(df[['Close', 'SMA', 'Upper', 'Lower']])
plt.fill_between(df.index, df.Upper, df.Lower, color='grey', alpha=0.3)
plt.legend(['Close', 'SMA', 'Upper', 'Lower'])
merged = pd.concat([df.iloc[buys].Close, df.iloc[sells].Close], axis=1)
merged.columns = ['Buys', 'Sells']
totalprofit = merged.shift(-1).Sells - merged.Buys
relprofits = (merged.shift(-1).Sells - merged.Buys) / merged.Buys
The links in the comments provide a wealth of examples. Since you want to graph candlesticks, Bollinger Bands, and SMAs in mpf, I have modified the additional plot examples from the previous examples to suit your needs. The graphs were created from data obtained in stocks instead of currencies.
import yfinance as yf
import pandas as pd
import mplfinance as mpf
df = yf.download("AAPL", start="2020-01-01")
df['SMA'] = df.Close.rolling(window=20).mean()
df['stddev'] = df.Close.rolling(window=20).std()
df['Upper'] = df.SMA + 2* df.stddev
df['Lower'] = df.SMA - 2* df.stddev
df['Buy_Signal'] = np.where(df.Lower > df.Close, True, False)
df['Sell_Signal'] = np.where(df.Upper < df.Close, True, False)
tcdf = df[['Lower','Upper','SMA']]
apd = mpf.make_addplot(tcdf)
mpf.plot(df, figratio=(8,4), type='candle', addplot=apd, volume=False, style='yahoo')
I created a reproducible example of random data for candlestick chart ohlc that is working correctly.
Now I need, in the same plot, to plot a random line (in the real application it will be a function of the ohlc data (not moving average)), so I created a random varialbe y0 that I will "concat" every loop through ani = animation.FuncAnimation() function.
The objects that mpf "will use" to plot are df and y0_arr and they have the "same format" (they are pd.DataFrame, have DateIndex as index, same dates, same dtypes=float).
if you comment(exclude) the part of the ##random line the code will work with no problem, only for the candlestick chart, but it won't if you include the random line. I've also tried to plot just/only the random line but it won't work also when it's only the random line.
this below is the code:
import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
from datetime import date, datetime
import mplfinance as mpf
import matplotlib.animation as animation
import time
def genData():
global df0, df, i, close, sign, y0_arr
sign = sign*-1
## random ohlc generation
if i==0: #variable initialization
y0_arr = pd.DataFrame()
df = pd.DataFrame()
close = 0
open_ = np.round(np.random.normal(10, 4), decimals=2) ##initial open
dt = datetime.now() + timedelta(days=i)
dt = dt.date()
if i > 0:
open_ = close
high = np.round(open_ + np.random.normal(0.5, 2), decimals=2)
low = np.round(open_ - np.random.normal(0.5, 2), decimals=2)
close = np.round(open_ + sign*np.random.normal(0.2, 0.4), decimals=2)
dfi = np.column_stack((dt, open_, high, low, close))
dfi = pd.DataFrame(dfi)
dfi.columns = ['date', 'open', 'high', 'low', 'close']
dfi['date'] = pd.to_datetime(dfi['date'], format="%Y/%m/%d")
dfi.set_index('date', inplace=True)
dfi = dfi.convert_dtypes(float)
df = pd.concat([df, dfi])
## random ohlc generation
##random line (exclude from here...)
y0 = np.round(np.random.normal(1,2), decimals=2)
y0i = np.column_stack((dt,y0))
y0i = pd.DataFrame(y0i)
y0i.columns = ['date','open']
y0i['date'] = pd.to_datetime(y0i['date'],format="%Y/%m/%d")
y0i.set_index('date', inplace=True)
y0i = y0i.convert_dtypes(float)
y0_arr = pd.concat([y0_arr,y0i])
##random line (...to here)
#### plotting
fig = mpf.figure(style="charles",figsize=(7,8))
ax1 = fig.add_subplot(1,1,1)
def animate(ival):
global df, y0_arr
genData() ##create new data
mpf.plot(df, ax=ax1, type='candle', ylabel='Price US$') ##ohlc
mpf.plot(y0_arr, ax=ax1, type='line',ylabel='Price US$') ##random line (...exclude this line)
ani = animation.FuncAnimation(fig, animate, interval=250)
This below are the error messages that I'm getting:
How can I solve this problem? Where to look?
Thank You
Notice that the error message is KeyError: 'Open'. This is because mpf.plot() expects the first argument to be a DataFrame with columns 'Open', 'High', 'Low', and 'Close' (or with OHLC column names that you specify using kwarg columns=).
Apparently your y0_arr is not such a dataframe.
The correct way to add a line to a candlestick plot is with the mpf.make_addplot() call. <-Click here to see the documentation for addplot.
See also https://github.com/matplotlib/mplfinance/blob/master/examples/mpf_animation_macd.py for an example of how to add lines to an animated candlestick plot.
I am trying to plot a candlestick with volume, using the plotly. However I can not get the proper x and yaxis label.please help.I need y labels for both plot but xlabel for just the bottom one, also one title for both. Bellow is the code.
** one more question, how can I change the line color in the volume plot.Thank you
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
fig = tools.make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
yaxis_title = 'Apple Stock Price USD ($)'
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume']),
row = 3, col = 1)
yaxis_title = 'Volume',
xaxis_title = 'Date'
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
When you make your subplots, you can add the subplot_titles attribute. In the code below, I used the titles "test1" and "test2". When you change your axis labels, you can use update_xaxes and update_yaxes, just make sure that the row and column values are the same for the update_axes method and the subplot.
To change the color of the line, you can add the line attribute within the scatterplot method and set it equal to a dictionary with a hex value of the color you want.
P.S. You should update plotly, because the tools.make_subplots was deprecated. Once you update, you can simply use make_subplots. Also, you are using pandas, when you should use pandas-datareader. See import statements.
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import tools
stock = 'AAPL'
df = web.DataReader(stock, data_source='yahoo', start='01-01-2019')
def chart_can_vol(df):
subplot_titles=["test1", "test2"]
rows = 2
cols = 2
height = 300 * rows
fig = make_subplots(
rows=3, cols=1,
specs=[[{"rowspan": 2}],
subplot_titles=("test1", "test2"),
fig.add_trace(go.Candlestick(x = df.index,
open = df['Open'],
close = df['Close'],
low = df['Low'],
high = df['High']),
row = 1, col = 1)
fig.update_layout(xaxis_rangeslider_visible = False)
yaxis_title = 'Apple Stock Price USD ($)'
fig.add_trace(go.Scatter(x = df.index,
y = df['Volume'],
line= dict(color="#ffe476")),
row = 3, col = 1)
fig.update_xaxes(title_text="Date", row = 3, col = 1)
fig.update_yaxes(title_text="Volume", row = 3, col = 1)
fig.update_layout(title_text="Apple Stock")
fig.update_layout(width=900, height=900)
return fig
I am dealing with 20 excels. For each excel, I do the same subsetting and plotting. I can get 20 separate figures now using for loop. But how can I put these figures in one subplot (5*4)? Check some posts but cannot get the answer.
for files in allfiles:
#if re.search(r".*.xlsx", files):
df = pd.read_excel("D:\Brown research\Task2 site selection\All_excel\{0}".format(files))
newdf = df[(df.slope != 0) & (df.AI >= 0.8) & (df.reach_len > 5000)]
Upper_Whisker = Q3+3*IQR
newdf = newdf[newdf['slope']<Upper_Whisker]
x = newdf['slope']
y = newdf['AI']
nbins = 20
plt.hist2d(x,y,nbins,cmap=plt.cm.coolwarm, cmin=1)
Create your fig and axes ahead of time and use matplotlib's object-oriented interface:
from pathlib import Path
from matplotlib import pyplot
import pandas
basedir = Path(r"D:\Brown research\Task2 site selection\All_excel")
fig, axes = pyplot.subplots(5, 4, figsize=(10, 10))
for xlfile, ax in zip(basedir.glob("*.xlsx"), axes.flat):
df = pandas.read_excel(xlfile)
Upper_Whisker = Q3+3*IQR
newdf = df.loc[lambda df:
(df["slope"] != 0) &
(df["slope"] < Upper_Whisker) &
(df["AI"] >= 0.8) &
(df["reach_len"] > 5000)
x = newdf['slope']
y = newdf['AI']
nbins = 20
hist = ax.hist2d(x,y,nbins,cmap=plt.cm.coolwarm, cmin=1)
ax.set_title(xlfile.stem.split('_')[0], x=0.5, y=0.9)
This assumes that you have exactly 20 files in your directory.
Better yet, I would concat everything into a single dataframe would use seaborn to build up the plot dynamically:
from pathlib import Path
from matplotlib import pyplot
import pandas
import seaborn
def get_upper_whisker(df, column):
Q1 = df[column].quantile(0.25)
Q3 = df[column].quantile(0.75)
IQR = Q3 - Q1
return Q3 + (3 * IQR)
basedir = Path(r"D:\Brown research\Task2 site selection\All_excel")
data = pandas.concat([
for xlfile in basefir.glob("*.xlsx")
fig = (
.apply(lambda g:
g.loc[g["slope"] < get_upper_whisker(g, "slope")]
.loc[lambda df:
(df["slope"] != 0) &
(df["AI"] >= 0.8) &
(df["reach_len"] > 5000)
.pipe(seaborn.FacetGrid, col="file", col_wrap=4)
fig.map(pyplot.hist2d, "slope", "AI", bins=20, cmap=pyplot.cm.coolwarm, cmin=1)
Thank you for the two solutions from #Paul H. For solution 1, just need to revise a little bit, e.g., "fig.colorbar(hist[3],ax=ax)". hist[3] is the "image".
I am currently working on an intra-day stock chart using the Alpha Vantage API. The data frame contains values from 4:00 to 20:00. In my matplotlib.pyplot chart however, the x-Axis also includes values from 20:00 to 4:00 over night. I dont want this as it messes up the aesthetics and also the Volume subplot.
Q: Is there any way to skip x-Axis values which dont exist in the actual Data Frame (the values from 20:00 to 04:00)?
As you can see, the Data Frame clearly jumps from 20:00 to 04:00
However in the Matplotlib chart, the x-Axis contains the values from 20:00 to 4:00, messing with the chart
Code so far. I believe so far everything is right:
import pandas as pd
import matplotlib.pyplot as plt
from alpha_vantage.timeseries import TimeSeries
import time
import datetime as dt
from datetime import timedelta as td
from dateutil.relativedelta import relativedelta
#Accessing and Preparing API
ts = TimeSeries(key=api_key, output_format='pandas')
ticker_input = "TSLA"
interval_input = "15min"
df, meta_data = ts.get_intraday(symbol = ticker_input, interval = interval_input, outputsize = 'full')
slice_date = 16*4*5
df = df[0:slice_date]
df = df.iloc[::-1]
df["100ma"] = df["4. close"].rolling(window = 50, min_periods = 0).mean()
df["Close"] = df["4. close"]
df["Date"] = df.index
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df["Date"], df['Close'])
ax1.plot(df["Date"], df["100ma"], linewidth = 0.5)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df["Date"], df["5. volume"])
It would be great if anybody could help. Im still a complete beginner and only started Python 2 weeks ago.
We got the data from the same place, although the data acquisition method is different. After extracting it in 15 units, I created a graph by excluding the data after 8pm and before 4pm. I created the code with the understanding that your skip would open up the pause. What you want it to skip is skipped once the NaN is set.
import datetime
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import mplfinance as mpf
# import matplotlib.pyplot as plt
with open('./alpha_vantage_api_key.txt') as f:
api_key = f.read()
now_ = datetime.datetime.today()
start = datetime.datetime(2019, 1, 1)
end = datetime.datetime(now_.year, now_.month, now_.day - 1)
symbol = 'TSLA'
df = web.DataReader(symbol, 'av-intraday', start, end, api_key=api_key)
df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
df.index = pd.to_datetime(df.index)
df["100ma"] = df["Close"].rolling(window = 50, min_periods = 0).mean()
df["Date"] = df.index
df_15 = df.asfreq('15min')
df_15 = df_15[(df_15.index.hour >= 4)&(df_15.index.hour <= 20) ]
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8,4.5),dpi=144)
#Plotting all as 2 different subplots
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(df_15["Date"], df_15['Close'])
ax1.plot(df_15["Date"], df_15["100ma"], linewidth = 0.5)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(df_15["Date"], df_15["Volume"])
# plt.tight_layout()
I fixed it using matplotlib.ticker.formatter.
I first created a class and using:
class MyFormatter(Formatter):
def __init__(self, dates, fmt='%Y-%m-%d %H:%M'):
self.dates = dates
self.fmt = fmt
def __call__(self, x, pos=0):
'Return the label for time x at position pos'
ind = int(np.round(x))
if ind >= len(self.dates) or ind < 0:
return ''
return self.dates[ind].strftime(self.fmt)
formatter = MyFormatter(df.index)
ax1 = plt.subplot2grid((7,1), (0,0), rowspan = 5, colspan = 1)
ax1.plot(np.arange(len(df)), df["Close"])
ax1.plot(np.arange(len(df)), df["100ma"], linewidth = 0.5)
ax2 = plt.subplot2grid((6,1), (5,0), rowspan = 2, colspan = 2, sharex = ax1)
ax2.bar(np.arange(len(df)), df["5. volume"])
This gave me a smoother graph than the one before and also that recommended by r-beginner.
The only issue that I have is that if I zoom in the x-axis doesnt really change. it always has teh year, month, date, hour, and minute. Obviously I only want hour and minute when Im zoomed in further. I am yet to figure out how to do that
Im using the following code:
import matplotlib.pyplot as pyplot
import pandas as pandas
from datetime import datetime
dataset = pandas.read_csv("HugLog_17.01.11.csv", sep=",", header=0)
print('filter data for SrcAddr')
dataset_filtered = dataset[dataset['SrcAddr']=='0x1FD3']
print('get Values')
varY = dataset_filtered.Battery_Millivolt.values
varX = dataset_filtered.Timestamp.values
print('Convert the date-strings in date-objects.')
dates_list = [datetime.strptime(date, '%y-%m-%d %H:%M:%S') for date in varX]
fig = pyplot.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.bar(dates_list, varY)
The problem i have is, its a large datacollection with 180k datapoints.
And pyplot displays all points an the graph which makes it slow and the bars overlap. Is there a way to set a maximum-limit on how much datapoints a displayed at a "view".
What i mean by that is, that as soon as the graph is render ther are only 50 datapoints and when i zoomm in i only get a maximum of 50 datapoints again.
Resampling can be done with the resample function from pandas.
Note that the resample syntax has changed between version 0.17 and 0.19 of pandas. The example below uses the old style. See e.g. this tutorial for the new style.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# generate some data for every second over a whole day
times = pd.date_range(start='2017-01-11',periods=86400, freq='1S')
df = pd.DataFrame(index = times)
df['data'] = np.sort(np.random.randint(low=1300, high=1600, size=len(df.index)) )[::-1] + \
# resample the data, taking the mean over 1 hours ("H")
t = "H" # for hours, try "T" for minutes as well
width=1./24 #matplotlib default uses a width of 1 day per bar
# try width=1./(24*60) for minutes
df_resampled = pd.DataFrame()
df_resampled['data'] = df.data.resample(t, how="mean")
fig, ax = plt.subplots()
#ax.bar(df.index, df['data'], width=1./(24*60*60)) # original data, takes too long to plot
ax.bar(df_resampled.index, df_resampled['data'], width=width)
Automatic adaption of the resampling when zooming would indeed require some manual work. There is a resampling example on the matplotlib event handling page, which does not work out of the box but could be adapted accordingly.
This is how it would look like:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates
class Sampler():
def __init__(self,df):
self.df = df
def resample(self, limits):
print limits
dt = limits[1] - limits[0]
if (type(dt) != pd.tslib.Timedelta) and (type(dt) != datetime.timedelta):
dt = datetime.timedelta(days=dt)
print dt
#see #http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
if dt > datetime.timedelta(hours=5):
t = "H"; width=1./24
elif dt > datetime.timedelta(minutes=60):
t = "15T"; width=15./(24.*60)
elif dt > datetime.timedelta(minutes=5):
t = "T"; width=1./(24.*60)
elif dt > datetime.timedelta(seconds=60):
t = "15S"; width=15./(24.*60*60)
#dt < datetime.timedelta(seconds=60):
t = "S"; width=1./(24.*60*60)
self.resampled = pd.DataFrame()
self.resampled['data'] = self.df.data.resample(t, how="mean")
print t, len(self.resampled['data'])
print "indextype", type(self.resampled.index[0])
print "limitstype", type(limits[1])
if type(limits[1]) == float or type(limits[1]) == np.float64 :
dlowlimit = matplotlib.dates.num2date(limits[0])
duplimit = matplotlib.dates.num2date(limits[1])
print type(duplimit), duplimit
self.resampled = self.resampled.loc[self.resampled.index <= duplimit]
self.resampled = self.resampled.loc[self.resampled.index >= dlowlimit]
self.resampled = self.resampled.loc[self.resampled.index <= limits[1]]
self.resampled = self.resampled.loc[self.resampled.index >= limits[0]]
return self.resampled.index,self.resampled['data'],width
def update(self, ax):
print "update"
lims = ax.viewLim
start, stop = lims.intervalx
x,y,width = self.resample([start, stop])
ax.bar(x,y, width=width)
ax.set_xlim([start, stop])
ax.callbacks.connect('xlim_changed', self.update)
times = pd.date_range(start='2017-01-11',periods=86400, freq='1S')
df = pd.DataFrame(index = times)
df['data'] = np.sort(np.random.randint(low=1300, high=1600, size=len(df.index)) )[::-1] + \
sampler = Sampler(df)
x,y,width = sampler.resample( [df.index[0],df.index[-1] ] )
fig, ax = plt.subplots()
ax.bar(x,y, width=width)
# connect to limits changes
ax.callbacks.connect('xlim_changed', sampler.update)
One thing you can do is plot a random subset of the data by using the sample method on your pandas DataFrame. Use the frac argument to determine the fraction of points you want to use. It ranges from 0 to 1.
After you get your dataset_filtered DataFrame, take a sample of it like this
dataset_filtered_sample = dataset_filtered.sample(frac=.001)