Average line for bar chart in matplotlib - python

How do we draw an average line (horizontal) for a histogram in using matplotlib?
Right now, I'm able to draw the histogram without any issues.
Here is the code I'm using:
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.2 # the width of the bars
plt.tick_params(axis='both', which='major', labelsize=30)
plt.tick_params(axis='both', which='minor', labelsize=30)
ax2 = ax.twinx()
## the bars
rects1 = ax.bar(ind, PAAE1, width,
color='0.2',
error_kw=dict(elinewidth=2,ecolor='red'),
label='PAAE1')
rects2 = ax.bar(ind+width, PAAE2, width,
color='0.3',
error_kw=dict(elinewidth=2,ecolor='black'),
label='PAAE2')
rects3 = ax2.bar(ind+width+width, AAE1, width,
color='0.4',
error_kw=dict(elinewidth=2,ecolor='red'),
label='AAE1')
rects4 = ax2.bar(ind+3*width, AAE2, width,
color='0.5',
error_kw=dict(elinewidth=2,ecolor='black'),
label='AAE3')
maxi = max(dataset[2])
maxi1 = max(dataset[4])
f_max = max(maxi, maxi1)
lns = [rects1,rects2,rects3,rects4]
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc='upper center', ncol=4)
# axes and labels
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0, 100)
ax.set_ylabel('PAAE', fontsize=25)
ax2.set_ylim(0, f_max+500)
ax2.set_ylabel('AAE (mW)', fontsize=25)
xTickMarks = dataset[0]
ax.set_xticks(ind+width)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=90, fontsize=25)
I want to plot the average line for PAAE 1, 2 and AAE 1, 2.
What should I be using to plot the average line?

If you'd like a vertical line to denote the mean use axvline(x_value). This will place a vertical line that always spans the full (or specified fraction of) y-axis. There's also axhline for horizontal lines.
In other works, you might have something like this:
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
As a more complete, but unnecessarily complex example (most of this is nicely annotating the means with curved arrows):
import numpy as np
import matplotlib.pyplot as plt
data1 = np.random.normal(0, 1, 1000)
data2 = np.random.normal(-2, 1.5, 1000)
fig, ax = plt.subplots()
bins = np.linspace(-10, 5, 50)
ax.hist(data1, bins=bins, color='blue', label='Dataset 1',
alpha=0.5, histtype='stepfilled')
ax.hist(data2, bins=bins, color='green', label='Dataset 2',
alpha=0.5, histtype='stepfilled')
ax.axvline(data1.mean(), color='blue', linewidth=2)
ax.axvline(data2.mean(), color='green', linewidth=2)
# Add arrows annotating the means:
for dat, xoff in zip([data1, data2], [15, -15]):
x0 = dat.mean()
align = 'left' if xoff > 0 else 'right'
ax.annotate('Mean: {:0.2f}'.format(x0), xy=(x0, 1), xytext=(xoff, 15),
xycoords=('data', 'axes fraction'), textcoords='offset points',
horizontalalignment=align, verticalalignment='center',
arrowprops=dict(arrowstyle='-|>', fc='black', shrinkA=0, shrinkB=0,
connectionstyle='angle,angleA=0,angleB=90,rad=10'),
)
ax.legend(loc='upper left')
ax.margins(0.05)
plt.show()

Related

How to remove a particular grid line?

please see the result graph image below.
I wish to remove only one major grid line at y-axis value of 10 (Blue horizontal line), and keep all other grid lines.
Is there a way to do that?
plt.rcParams['font.family'] = 'Arial'
fig, ax = plt.subplots(figsize=(14.78, 9.84))
plt.xlim(0, 105)
plt.ylim(0, 10)
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(2))
ax.spines['bottom'].set_linewidth(1.5)
ax.spines['left'].set_linewidth(1.5)
ax.spines['top'].set_linewidth(0)
ax.spines['right'].set_linewidth(0)
# Grid setting
plt.grid(True, color='#0100FF', which="major", ls="-")
plt.grid(True, color='#0BC904', which="minor", ls="-")
plt.xlabel("Indicator Amplitude, %FSH", fontsize=28, labelpad=15)
plt.ylabel("Function Generator Output, V", fontsize=28, labelpad=15)
# Axis setting
plt.tick_params(which="major", labelsize=22, length=10, pad=10, width=1.5)
plt.tick_params(which="minor", length=8, width=1.5)
# Plot scatter & line
plt.plot(FSH_axis, x_value[2:], color='black', marker='^', linewidth=1.5, markersize=8, label="40 dB")
plt.plot(FSH_axis, y_value[2:], color='red', marker='o', linewidth=1.5, markersize=8, label="60 dB")
plt.plot(FSH_axis, z_value[2:], color='blue', marker='v', linewidth=1.5, markersize=8, label="80 dB")
plt.legend(loc=(1 / 16, 58 / 90), ncol=1, fontsize=20, frameon=True, framealpha=1, edgecolor="black")
plt.show()
We can catch all gridlines with get_ygridlines(), then access individual gridlines as Line2D objects to modify them:
from matplotlib import pyplot as plt
from matplotlib.ticker import AutoMinorLocator
plt.rcParams['font.family'] = 'Arial'
fig, ax = plt.subplots(figsize=(14.78, 9.84))
plt.xlim(0, 105)
plt.ylim(0, 10)
ax.xaxis.set_minor_locator(AutoMinorLocator(2))
ax.yaxis.set_minor_locator(AutoMinorLocator(2))
ax.spines['bottom'].set_linewidth(1.5)
ax.spines['left'].set_linewidth(1.5)
ax.spines['top'].set_linewidth(0)
ax.spines['right'].set_linewidth(0)
# Grid setting
plt.grid(True, color='#0100FF', which="major", ls="-")
plt.grid(True, color='#0BC904', which="minor", ls="-")
#this part is added
#set the last horizontal gridline invisible
ygridlines = ax.get_ygridlines()
gridline_of_interest = ygridlines[-1]
gridline_of_interest.set_visible(False)
plt.xlabel("Indicator Amplitude, %FSH", fontsize=28, labelpad=15)
plt.ylabel("Function Generator Output, V", fontsize=28, labelpad=15)
# Axis setting
plt.tick_params(which="major", labelsize=22, length=10, pad=10, width=1.5)
plt.tick_params(which="minor", length=8, width=1.5)
# Plot scatter & line
FSH_axis = [10, 40, 100]
plt.plot(FSH_axis, [1, 3, 2], color='black', marker='^', linewidth=1.5, markersize=8, label="40 dB")
plt.plot(FSH_axis, [2, 2, 3], color='red', marker='o', linewidth=1.5, markersize=8, label="60 dB")
plt.plot(FSH_axis, [2, 1, 1], color='blue', marker='v', linewidth=1.5, markersize=8, label="80 dB")
plt.legend(loc=(1 / 16, 58 / 90), ncol=1, fontsize=20, frameon=True, framealpha=1, edgecolor="black")
plt.show()
Sample output:
Of course, the corresponding get_xgridlines() also exists.

Increase space between secondary y axis and x axis?

I have plotted a graph with two y-axes with python. However, I would like to have more space between the two lines, with the secondary y-axes on the top of the graph.
here's my code:
x = data['Data'].tolist()
y = data['Excess Return'].tolist()
z=data['EPU shock'].tolist()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
curve1 = ax1.plot(x, y, label='Excess Return', color='r')
curve2 = ax2.plot(x, z, label='EPU shock', color='b')
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
lines = lines_1 + lines_2
labels = labels_1 + labels_2
ax1.legend(lines, labels, loc="lower center", borderaxespad=-5, ncol=2)
plt.title("European Union")
plt.show()
Output:
but I would like to have something like this:
Would a two-subplots setup work for you?
import matplotlib.pyplot as plt
import numpy as np
# Dummy data.
x = np.arange(2000, 2020, 1)
y1 = np.sin(x)
y2 = np.cos(x/2)
# We create a two-subplots figure and hide the boundary between the two Axes.
fig, (ax1, ax_temporary) = plt.subplots(2, 1)
ax2 = ax_temporary.twinx()
for spine in (ax1.spines["bottom"], ax_temporary.spines["top"], ax2.spines["top"]):
spine.set_visible(False)
ax1.xaxis.set_visible(False)
ax_temporary.yaxis.set_visible(False)
fig.subplots_adjust(hspace=0) # No space left!
# Create curves and legend.
curve1, = ax1.plot(x, y1, label='Excess Return', color='r')
curve2, = ax2.plot(x, y2, label='EPU shock', color='b')
lines_1, labels_1 = ax1.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
lines = lines_1 + lines_2
labels = labels_1 + labels_2
ax2.legend(lines, labels, loc="lower center", borderaxespad=-5, ncol=2) # Legend on ax2 instead of ax1.
ax1.set_title("European Union")
fig.show()
Does it suit you to to adjust the limits?
fig, ax1 = plt.subplots()
ax2 = ax1.twinx() # open second y-axis
line1, = ax1.plot([0, 1, 2], [0, 1, 2], "b-", label="Line 1")
line2, = ax2.plot([0, 1, 2,], [10, 13, 12], "r-", label="Line 2")
# set limits
ax2.set_ylim( (-10,14) )
plt.show()

matplotlib annotation overlapping y_tick labels on plot

I have tried a number of different things to fix my chart, from zorder on the plots to plt.rcParams.
I feel that this is such a simple problem but I just dont know where I have gone wrong. As you can see the bottom annotation in cyan blue is unreadable and mashed with the y label.
Ideally, the annotation sits over the y label to a point where text inside annotation is readable.
If possible just for the annotation to sit on top and still overlay the y label..something like this
Any help on this would be greatly appreciated.
ax = df.plot(x=df.columns[0], y=df.columns[1], legend=False, zorder=0, linewidth=1)
y1 =df.loc[:, df.columns[2]].tail(1)
y2= df.loc[:, df.columns[1]].tail(1)
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
print(colors)
for var in (y1, y2):
plt.annotate('%0.2f' % var.max(), xy=(1, var.max()), zorder=1, xytext=(8, 0),
xycoords=('axes fraction', 'data'),
textcoords='offset points',
bbox=dict(boxstyle="round", fc=colors[0], ec=colors[0],))
ax2 = ax.twinx()
df.plot(x=df.columns[0], y=df.columns[2], ax=ax2, legend=False, color='#fa8174', zorder=0,linewidth=1)
ax.figure.legend(prop=subtitle_font)
ax.grid(True, color="white",alpha=0.2)
pack = [df.columns[1], df.columns[2], freq[0]]
plt.text(0.01, 0.95,'{0} v {1} - ({2})'.format(df.columns[1], df.columns[2], freq[0]),
horizontalalignment='left',
verticalalignment='center',
transform = ax.transAxes,
zorder=10,
fontproperties=subtitle_font)
ax.text(0.01,0.02,"Sources: FRED, Quandl, #Paul92s",
color="white",fontsize=10,
horizontalalignment='left',
transform = ax.transAxes,
verticalalignment='center',
zorder=20,
fontproperties=subtitle_font)
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
ax.xaxis.set_minor_locator(matplotlib.dates.MonthLocator((4,7,10)))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
ax.xaxis.set_minor_formatter(ticker.NullFormatter()) # matplotlib.dates.DateFormatter("%m")
plt.setp(ax.get_xticklabels(), rotation=0, ha="center", zorder=-1)
plt.setp(ax2.get_yticklabels(), rotation=0, zorder=-1)
plt.setp(ax.get_yticklabels(), rotation=0, zorder=-1)
plt.gcf().set_size_inches(14,7)
ax.set_xlabel('Data as of; {0}'.format(df['Date'].max().strftime("%B %d, %Y")), fontproperties=subtitle_font)
y1 =df.loc[:, df.columns[2]].tail(1)
y2= df.loc[:, df.columns[1]].tail(1)
for var in (y1, y2):
plt.annotate('%0.2f' % var.max(), xy=(1, var.max()), zorder=1,xytext=(8, 0),
xycoords=('axes fraction', 'data'),
textcoords='offset points',
bbox=dict(boxstyle="round", fc="#fa8174", ec="#fa8174"))
plt.title('{0}'.format("FRED Velocity of M2 Money Stock v Trade Weighted U.S. Dollar Index: Broad"),fontproperties=heading_font)
ax.texts.append(ax.texts.pop())
ax.set_facecolor('#181818')
ax.figure.set_facecolor('#181818')
plt.rcParams['axes.axisbelow'] = True
I don't figure out why zorder doesn't work, but you can directly set the label style of tick labels:
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import rand
import matplotlib.patches as mpatches
fig, ax = plt.subplots(1, 1)
ax.plot(rand(100), '^', color='r')
for label in ax.get_xticklabels():
label.set_bbox(dict(facecolor='orange'))
ax1 = ax.twinx()
ax1.plot(rand(100), 'o', color='b')
index_to_add_bbox = [2, 4]
ax1_labels = ax1.get_yticklabels()
for i in index_to_add_bbox:
ax1_labels[i].set_bbox(dict(boxstyle='Circle', facecolor='orange'))
plt.show()

Create separate distplot from countplot

How can I create distplot from countplot
plt.rcdefaults()
%config InlineBackend.figure_format='retina'
sns.set_style('darkgrid')
ax = sns.countplot(x='Age',hue='Gender',data=df,edgecolor="None")
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width()/2.
y = rect.get_height()
try:
ax.annotate("{}".format(int(y)), (x,y), ha='center', va='bottom', clip_on=True)
except:
pass
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato',weight='bold')
plt.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.savefig('files\\Countplot_for_Age(Gender).jpg')
I want distplot for 2 Genders either in same plot or separately
Any suggestions or help will be highly appreciable
The x-axis of a countplot is categorical: it puts one bar for each encountered age, skipping bars when there are no rows for a certain age (21 and 23 in the example). Internally the bars are numbered as 0, 1, 2, ...
The y-axis is the count, which is proportional to the number of rows.
For a distplot, the x-axis are the ages themselves, and the y-axis is a probability distribution, which usually are quite small numbers (the area under the curve is normalized to be 1).
So, as both the x-axis and the y-axis are different, it is better to use separate subplots.
A distplot can be generated directly from the given data. Passing the same ax results in two distplots in the same subplot. A distplot is a combination of a histogram and a kdeplot. If the histogram isn't needed, hist=False leaves
it out, or the kdeplot can be called directly. The shade=True option adds shading to the plot.
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
NF = 50
NM = 10
df = pd.DataFrame({'Age': np.concatenate([np.random.randint(13, 20, NF) + np.random.randint(2, 7, NF),
np.random.randint(15, 23, NM)]),
'Gender': np.repeat(['female', 'male'], (NF, NM))})
df['Age'] = df['Age'].where((df['Age'] != 21) & (df['Age'] != 23), 20)
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2, figsize=(12, 4))
ax = sns.countplot(x='Age', hue='Gender', data=df, edgecolor="None", ax=axs[0])
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
for rect in ax.patches:
x = rect.get_x() + rect.get_width() / 2.
y = rect.get_height()
ax.annotate(f"{y:.0f}", (x, y), ha='center', va='bottom', clip_on=True)
ax.set_xlabel('Age', color='green')
ax.set_ylabel('Count', color='green')
ax.set_title('Countplot for Age(Gender)', color='tomato', weight='bold')
ax.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
for gender in ('female', 'male'):
# ax2 = sns.kdeplot(df[df['Gender'] == gender]['Age'], shade=True, ax=axs[1], label=gender)
ax2 = sns.distplot(df[df['Gender'] == gender]['Age'], hist=False, kde_kws={'shade': True}, ax=axs[1], label=gender)
ax2.set_axisbelow(True)
ax2.set_xlabel('Age', color='green')
ax2.set_ylabel('probability distribution', color='green')
ax2.set_title('Distplot for Age(Gender)', color='tomato', weight='bold')
ax2.legend(title='Gender', fontsize='large', loc='upper right').get_frame().set_facecolor('white')
plt.tight_layout()
plt.show()

How do I animate this graph so that the dot moves and the green line plots over a different range for each element in loop?

I am trying to make this animated so that the dot and the green line move due to the for loop. This code displays 3 different graphs one below the other. The middle graph has no animation section.
x =lag_range
count = 0
plt.ion()
fig, ax = plt.subplots()
for b in x:
plt.subplot(311)
plt.plot(x,pear_corr, color='b', linewidth=1.5, label ='Pearson')
plt.plot(x,spear_corr, color ='r', linewidth=1.5, label='Spearman')
plt.plot(x[count],pear_corr[count],'yo')
plt.legend()
axes = plt.gca()
plt.ylabel('Correlation coefficients')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
axes.set_ylim(-1,1)
plt.subplot(312)
plt.plot(x,pear_p_values, color='b', linewidth=1.5)
plt.plot(x,spear_p_values, color ='r', linewidth=1.5)
axes = plt.gca()
plt.ylabel('P values')
plt.xlabel('Lag times /days')
axes.set_xlim([min(lag_list),last])
plt.subplot(313)
ax1 = plt.subplot(313)
x_for_p = range(len(x_prices))
ax1.plot(x_for_p, x_prices, color ='grey', linewidth=1.5)
ax1.set_ylabel('Share price', color ='grey')
ax1.tick_params('y', colors='grey')
ax1.set_xlabel('Days')
axes = plt.gca()
axes.set_xlim([min(lag_list),(2*last)])
ax2 = ax1.twinx()
x_for_den = range(b,(b+len(x_prices)))
ax2.plot(x_for_den, y_planes, color='g', linewidth=1.5)
ax2.set_ylabel('Plane density', color='g')
ax2.tick_params('y', colors='g')
count += 1
plt.pause(2)
plt.draw()
cross_corr2_vis(prices, density_p3)
If you could share a working code or just definitions of variables pear_corr, spear_corr, etc., the following code might have not resulted in this simple animation:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
N_points = 1000
x = np.linspace(0,2.*np.pi,N_points)
y = np.sin(x)
fig, ax = plt.subplots()
ax.set_xlim([0,2.*np.pi])
ax.set_ylim([-1,1])
line, = ax.plot( [],[], lw=2, color='g')
sctr = ax.scatter([],[], s=100, color='r')
def animate(i):
line.set_ydata(y[:i+1]) # update
line.set_xdata(x[:i+1])
sctr.set_offsets((x[i],y[i]))
return line,sctr
ani = animation.FuncAnimation(fig, animate, N_points, interval=5, blit=True)
plt.show()

Categories