I'm making a candlestick chart with two data sets: [open, high, low, close] and volume. I'm trying to overlay the volumes at the bottom of the chart like this:
I'm calling volume_overlay3 but instead of bars it fills the whole plot area. What am I doing wrong?
My other option is to use .bar(), which doesn't have the up and down colors but would work if I could get the scale right:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
candlestick(ax, candlesticks)
ax2 = ax.twinx()
volume_overlay3(ax2, quotes)
ax2.xaxis_date()
ax2.set_xlim(candlesticks[0][0], candlesticks[-1][0])
ax.yaxis.set_label_position("right")
ax.yaxis.tick_right()
ax2.yaxis.set_label_position("left")
ax2.yaxis.tick_left()
The volume_overlay3 did not work for me. So I tried your idea to add a bar plot to the candlestick plot.
After creating a twin axis for the volume re-position this axis (make it short) and modify the range of the candlestick y-data to avoid collisions.
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# from matplotlib.finance import candlestick
# from matplotlib.finance import volume_overlay3
# finance module is no longer part of matplotlib
# see: https://github.com/matplotlib/mpl_finance
from mpl_finance import candlestick_ochl as candlestick
from mpl_finance import volume_overlay3
from matplotlib.dates import num2date
from matplotlib.dates import date2num
import matplotlib.mlab as mlab
import datetime
datafile = 'data.csv'
r = mlab.csv2rec(datafile, delimiter=';')
# the dates in my example file-set are very sparse (and annoying) change the dates to be sequential
for i in range(len(r)-1):
r['date'][i+1] = r['date'][i] + datetime.timedelta(days=1)
candlesticks = zip(date2num(r['date']),r['open'],r['close'],r['max'],r['min'],r['volume'])
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.set_ylabel('Quote ($)', size=20)
candlestick(ax, candlesticks,width=1,colorup='g', colordown='r')
# shift y-limits of the candlestick plot so that there is space at the bottom for the volume bar chart
pad = 0.25
yl = ax.get_ylim()
ax.set_ylim(yl[0]-(yl[1]-yl[0])*pad,yl[1])
# create the second axis for the volume bar-plot
ax2 = ax.twinx()
# set the position of ax2 so that it is short (y2=0.32) but otherwise the same size as ax
ax2.set_position(matplotlib.transforms.Bbox([[0.125,0.1],[0.9,0.32]]))
# get data from candlesticks for a bar plot
dates = [x[0] for x in candlesticks]
dates = np.asarray(dates)
volume = [x[5] for x in candlesticks]
volume = np.asarray(volume)
# make bar plots and color differently depending on up/down for the day
pos = r['open']-r['close']<0
neg = r['open']-r['close']>0
ax2.bar(dates[pos],volume[pos],color='green',width=1,align='center')
ax2.bar(dates[neg],volume[neg],color='red',width=1,align='center')
#scale the x-axis tight
ax2.set_xlim(min(dates),max(dates))
# the y-ticks for the bar were too dense, keep only every third one
yticks = ax2.get_yticks()
ax2.set_yticks(yticks[::3])
ax2.yaxis.set_label_position("right")
ax2.set_ylabel('Volume', size=20)
# format the x-ticks with a human-readable date.
xt = ax.get_xticks()
new_xticks = [datetime.date.isoformat(num2date(d)) for d in xt]
ax.set_xticklabels(new_xticks,rotation=45, horizontalalignment='right')
plt.ion()
plt.show()
data.csv is up here:
http://pastebin.com/5dwzUM6e
See the answer here. Apparently a bug and it's going to be fixed.
For now you need to assign the returned collection from the volume_overlay3 call to a variable then add that to the chart.
vc = volume_overlay3(ax2, quotes)
ax2.add_collection(vc)
If you want to stack up graphs on top of one another (i.e. plot them on the same axis) use:
plt.hold(True)
Related
I took data from excel and plotted it. The first column is date, while the next two columns are prices of different indexes.
I managed to plot them, but they are on separate graphs. I need them plotted against each other with one y-axis (date) and two x-axis.
Also, I can't figure out how to make the line dotted for one and a diamond marker for the other.
import matplotlib.pyplot as plt
import pandas as pd
excel_data = pd.read_excel('Python_assignment_InputData.xlsx', '^GSPTSE')
excel_data.plot(kind='line', x = 'Date', y = 'Bitcoin CAD (BTC-CAD)', color = 'green')
excel_data.plot(kind='line', x = 'Date', y = 'S&P/TSX Composite index (^GSPTSE)', color = 'blue')
plt.show()
I expect Bitcoin and S%P prices to be on one y axis, with dates being on the x axis.
I am providing a sample answer using the iris DataFrame from seaborn. You can modify it to your needs. What you need is a single x axis and two y-axes.
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
iris = sns.load_dataset("iris")
iris.plot(x='sepal_length', y='sepal_width', linestyle=':', ax=ax)
iris.plot(x='petal_length', y='petal_width', marker='d',
linestyle='None', secondary_y=True, ax=ax)
This is my code:
from matplotlib.ticker import FuncFormatter
import pandas as pd
import numpy as np
from datetime import datetime
from matplotlib import pyplot as plt
dates = pd.date_range('01/01/2016', datetime.today(), freq = 'M')
X = pd.DataFrame(index = dates)
X['values'] = np.random.rand(len(X)) * 300
fig, ax = plt.subplots()
fig.set_size_inches(8 * phi, 8 )
X['values'].plot(ax = ax)
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, pos: '$ {:,.0f}'.format(x)))
plt.show()
I've been trying for half an hour now and I really need some help with this.
What is the cleanest, simplest way to show the other months on the minor tick labels for the xaxis? Instead of what it wants to do for some reason, show only months that start with a J....
Notes: I do have seaborne installed.
First, in order to be able to use matplotlib tickers on pandas date plots you needs to set the compatibility option x_compat=True.
X.plot(ax = ax, x_compat=True)
Next, in order to format the x axis, you needs to use xaxis. In order to set the minor ticklabels, you need to use set_minor_formatter.
In order to assign some ticks to certain positions you need a Locator not a Formatter.
Now it seems you want to have full control over the output plot, hence you need to set the major and minor locators and formatters.
Note that labeling each month will surely let the labels overlap. So a larger figure or smaller fontsize would be needed.
fig, ax = plt.subplots(figsize=(12,3))
X.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_minor_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter("\n%Y"))
ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b"))
plt.setp(ax.get_xticklabels(), rotation=0, ha="center")
plt.show()
I am plotting aggregated data in Python, using Pandas and Matlplotlib.
My axis customization commands are failing as a function of which of two similar functions I'm calling to make bar plots. The working case is e.g.:
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
def format_x_date_month_day(ax):
days = mdates.DayLocator()
months = mdates.MonthLocator() # every month
dayFmt = mdates.DateFormatter('%D')
monthFmt = mdates.DateFormatter('%Y-%m')
ax.figure.autofmt_xdate()
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthFmt)
ax.xaxis.set_minor_locator(days)
span_days = 90
start = pd.to_datetime("1-1-2012")
idx = pd.date_range(start, periods=span_days).tolist()
df=pd.DataFrame(index=idx, data={'A':np.random.random(span_days), 'B':np.random.random(span_days)})
plt.close('all')
fig, ax = plt.subplots(1)
ax.bar(df.index, df.A) # loop over columns here to do stacked plot
format_x_date_month_day(ax)
plt.show()
(See matplotlib.org for example of looping to create a stacked bar plot.) This gives us
Another approach that should work and be much easier is to use df.plot.bar(ax=ax, stacked=True), however it does not admit date axis formatting with mdates:
plt.close('all')
fig, ax = plt.subplots(1)
df.plot.bar(ax=ax, stacked=True)
format_x_date_month_day(ax)
plt.show()
How can mdates and ax.figure.autofmt_xdate() be made to play nice with df.plot.bar?
Bar plots in pandas are designed to compare categories rather than to display time-series or other types of continuous variables, as stated in the docstring:
A bar plot shows comparisons among discrete categories. One axis of the plot shows the specific categories being compared, and the other axis represents a measured value.
This is why the scale of the x-axis of pandas bar plots is made of integers starting from zero, regardless of the data type of the x variable. When the same bar plot is created with matplotlib, the scale of the x-axis is made of matplotlib date numbers, so the tick locators and formatters of the matplotlib.dates module (mdates) can be used as expected.
To be able to use a pandas bar plot with mdates, you need to move the bars along the x-axis to locations that match the matplotlib date numbers. This can be done thanks to the mdates.date2num function. This is illustrated in the following example based on the code you provided with a few modifications: the sample dataset contains 3 variables, the time series is limited to 45 days, and the tick formatting is adjusted to my preferences (and is not wrapped as a function).
This example works for any number of variables (with or without NaNs) and for any bar width that is passed to the pandas plot function:
import numpy as np # v 1.19.2
import pandas as pd # v 1.1.3
import matplotlib.dates as mdates # v 3.3.2
# Create random dataset
rng = np.random.default_rng(seed=1) # random number generator
nperiods = 45
nvar = 3
idx = pd.date_range('2012-01-01', periods=nperiods, freq='D')
df = pd.DataFrame(rng.integers(11, size=(idx.size, nvar)),
index=idx, columns=list('ABC'))
# Draw pandas stacked bar chart
ax = df.plot(kind='bar', stacked=True, figsize=(10,5))
# Compute width of bars in matplotlib date units
pandas_width = ax.patches[0].get_width() # the default bar width is 0.5
mdates_x0 = mdates.date2num(df.index[0])
mdates_x1 = mdates.date2num(df.index[1])
mdates_width_default = (mdates_x1-mdates_x0)/2
mdates_width = pandas_width*mdates_width_default/0.5 # rule of three conversion
# Compute new x values for bars in matplotlib date units, adjusting the
# positions according to the bar width
mdates_x = mdates.date2num(df.index) - mdates_width/2
nvar = len(ax.get_legend_handles_labels()[1])
mdates_x_patches = np.ravel(nvar*[mdates_x])
# Set bars to new x positions: this loop works fine with NaN values as
# well because in bar plot NaNs are drawn with a rectangle of 0 height
# located at the foot of the bar, you can verify this with patch.get_bbox()
for patch, new_x in zip(ax.patches, mdates_x_patches):
patch.set_x(new_x)
patch.set_width(mdates_width)
# Set major and minor date tick locators
months = mdates.MonthLocator()
days = mdates.DayLocator(bymonthday=np.arange(31, step=3))
ax.xaxis.set_major_locator(months)
ax.xaxis.set_minor_locator(days)
# Set major date tick formatter
month_fmt = mdates.DateFormatter('\n%b\n%Y')
day_fmt = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(month_fmt)
ax.xaxis.set_minor_formatter(day_fmt)
# Shift the plot frame to where the bars are now located
xmin = min(mdates_x) - mdates_width
xmax = max(mdates_x) + 2*mdates_width
ax.set_xlim(xmin, xmax)
# Adjust tick label format last, else it may produce unexpected results
ax.figure.autofmt_xdate(rotation=0, ha='center')
Up to you to decide if this is more convenient than plotting stacked bars from scratch with matplotlib.
This solution can be slightly modified to display appropriate tick labels for time series based on any frequency of time. Here is an example using a frequency of minutes, a custom bar width, and an automatic date tick locator and formatter. Only the new/modified code lines are shown:
import matplotlib.ticker as mtick
#...
idx = pd.date_range('2012-01-01 12', periods=nperiods, freq='T')
#...
ax = df.plot(kind='bar', stacked=True, figsize=(10,5), width=0.3)
#...
# Set adaptive tick locators and major tick formatter
maj_loc = mdates.AutoDateLocator()
ax.xaxis.set_major_locator(maj_loc)
min_loc = mtick.FixedLocator(mdates_x + mdates_width/2)
ax.xaxis.set_minor_locator(min_loc) # draw minor tick under each bar
fmt = mdates.ConciseDateFormatter(maj_loc)
ax.xaxis.set_major_formatter(fmt)
#...
You may notice that the ticks are often not well aligned with the bars. There appears to be some issue with matplotlib when the figure elements are put together. I find this is usually only noticeable when plotting thinner-than-useful bars. You can check that the bars and ticks are indeed placed correctly by running ax.get_xticks() and comparing that to the values given by patch.get_bbox() when looping through ax.patches.
I came across this different behaviour in the third example plot below. Why am I able to correctly edit the x-axis' ticks with pandas line() and area() plots, but not with bar()? What's the best way to fix the (general) third example?
import numpy as np
import pandas as pd
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
x = np.arange(73,145,1)
y = np.cos(x)
df = pd.Series(y,x)
ax1 = df.plot.line()
ax1.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax1.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
plt.show()
ax2 = df.plot.area(stacked=False)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
plt.show()
ax3 = df.plot.bar()
ax3.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax3.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
plt.show()
Problem:
The bar plot is meant to be used with categorical data. Therefore the bars are not actually at the positions of x but at positions 0,1,2,...N-1. The bar labels are then adjusted to the values of x.
If you then put a tick only on every tenth bar, the second label will be placed at the tenth bar etc. The result is
You can see that the bars are actually positionned at integer values starting at 0 by using a normal ScalarFormatter on the axes:
ax3 = df.plot.bar()
ax3.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax3.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
ax3.xaxis.set_major_formatter(ticker.ScalarFormatter())
Now you can of course define your own fixed formatter like this
n = 10
ax3 = df.plot.bar()
ax3.xaxis.set_major_locator(ticker.MultipleLocator(n))
ax3.xaxis.set_minor_locator(ticker.MultipleLocator(n/4.))
seq = ax3.xaxis.get_major_formatter().seq
ax3.xaxis.set_major_formatter(ticker.FixedFormatter([""]+seq[::n]))
which has the drawback that it starts at some arbitrary value.
Solution:
I would guess the best general solution is not to use the pandas plotting function at all (which is anyways only a wrapper), but the matplotlib bar function directly:
fig, ax3 = plt.subplots()
ax3.bar(df.index, df.values, width=0.72)
ax3.xaxis.set_major_locator(ticker.MultipleLocator(10))
ax3.xaxis.set_minor_locator(ticker.MultipleLocator(2.5))
I have a website that produces (depending on available data stations running) an arbitrary number of plots (as an image), that are vertically stacked over one another. An example is the following:
The problem is that depending on the number of vertical plots, the suptitle (top title) goes to a different position. Check the following examples of 5 and 10 plots:
5 plots:
And here's 10 plots:
So for every number of plots, I get a different result. Using fig.tight_layout() didn't help.
What I need is to have the bottom of my text at a certain distance from the top of the plots. Is there a general answer to this problem?
I created some minimal working code that has the number of plots parametrized. Please check it out if you would like to reproduce this problem.
import datetime
import random
import matplotlib
matplotlib.use('Agg') # Force matplotlib not to use any Xwindows backend.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.image as mpimg
import matplotlib.gridspec as gridspec
import numpy as np
random.seed(datetime.datetime.now())
#initial parameters
numOfPlots = 2
dataLen = 100
randomRange = 10*dataLen
dpiVal = 180
#create data
xData = list(range(dataLen) for x in range(numOfPlots))
yData = list(random.sample(range(randomRange), dataLen) for x in range(numOfPlots))
#matplotlib initialize plot
gs = gridspec.GridSpec(numOfPlots,1)
plt.cla()
plt.clf()
fig = plt.figure()
ax = None
for i in list(range(numOfPlots)):
if i == 0:
ax = fig.add_subplot(gs[i])
else:
ax = fig.add_subplot(gs[i],sharex=ax)
ax.plot(xData[i], yData[i])
labelSize = 10
ax.set_ylabel("Hi there",size=8)
ax.get_yaxis().set_label_coords(-0.07,0.5)
plt.yticks(size=8)
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0),useOffset=True)
plt.subplots_adjust(hspace = 0.3)
if i == numOfPlots-1:
plt.xticks(rotation=0,size=7)
max_xticks = 10
xloc = plt.MaxNLocator(max_xticks)
ax.xaxis.set_major_locator(xloc)
ax=plt.gca()
else:
plt.tick_params(
axis='x', # changes apply to the x-axis
labelbottom='off') # labels along the bottom edge are off
ax_right = ax.twinx()
ax_right.yaxis.set_ticks_position('right')
ax_right.set_ylabel("Nice to see you!",size=labelSize)
ax_right.get_yaxis().set_ticks([])
#the following sets the size and the aspect ratio of the plot
fig.set_size_inches(10, 1.8*numOfPlots)
fig.suptitle("Hi there, this is the first line\nAnd this is the second!!!")
fig.savefig("img_"+str(numOfPlots)+".png",bbox_inches='tight',dpi=dpiVal)
I suggest trying something manual: adding text annotation with position in units of the figure relative coordinates.
Consider these two dummy examples:
hf,ax = plt.subplots(nrows=3)
hf.text(0.5,0.92,
"Hi there, this is the first line\nAnd this is the second!!!",
horizontalalignment='center')
hf,ax = plt.subplots(nrows=7)
hf.text(0.5,0.92,
"Hi there, this is the first line\nAnd this is the second!!!",
horizontalalignment='center')
The result has the "suptitle" located in the exact same position: