How to insert Normal distribution in multiple subplots? - python

So I have multiple plots, using subplot and I would like to add normal distribution on it. I haven't had much luck finding information on how to do this for each individual subplot.
This is what my subplots look like so far:
But I want it to look like this:
This is the code I'm working with so far:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
def grade_distribution():
# importing datasets
df=pd.read_csv('assets/class_grades.csv')
VALID_GRADES = ['A+','A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'E']
NUMERIC_GRADES = [4.3,4.0, 3.7, 3.3, 3.0, 2.7, 2.3, 2.0, 1.7, 1.3, 1.0, 0.7, 0.0]
grade_point_dict = dict(zip(VALID_GRADES, NUMERIC_GRADES))
df['ECON101_grade'] = df.replace({'ECON101_grade' : grade_point_dict})['ECON101_grade']
df['EECS545_grade'] = df.replace({'EECS545_grade' : grade_point_dict})['EECS545_grade']
df['ENGLISH125_grade'] = df.replace({'ENGLISH125_grade' : grade_point_dict})['ENGLISH125_grade']
df['MATH217_grade'] = df.replace({'MATH217_grade' : grade_point_dict})['MATH217_grade']
df['DATASCI306_grade'] = df.replace({'DATASCI306_grade' : grade_point_dict})['DATASCI306_grade']
df['STATS250_grade'] = df.replace({'STATS250_grade' : grade_point_dict})['STATS250_grade']
cols_to_plot = ['ECON101_grade', 'EECS545_grade','ENGLISH125_grade','MATH217_grade', 'DATASCI306_grade', 'STATS250_grade']
fig, axs = plt.subplots(nrows=3, ncols=2)
fig.set_size_inches(30, 20)
for col, ax in zip(cols_to_plot, axs.flatten()):
dftemp = df[col].value_counts()
ax.bar(dftemp.index, list(dftemp))
ax.set_title(col)
ax.tick_params(axis='x', labelrotation=30)
axs[0, 0].set_title("ECON 101 Grade")
axs[0, 1].set_title("EECS 545 Grade")
axs[1, 0].set_title("ENGLISH 125 Grade")
axs[1, 1].set_title("MATH 217 Grade")
axs[2, 0].set_title("DATASCI 306 Grade")
axs[2, 1].set_title("STATS 250 Grade")
plt.show()
grade_distribution()

Related

How to create a 3D graph with filled-below curves and position the y-ticks?

I am making my first 3D graph in Python on an Anaconda Jupyter Notebook. The idea is to obtain a graph with a format similar to the following:
The code I made is as follows:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
fig = plt.figure()
ax = Axes3D(fig)
def func(x, pos): # formatter function takes tick label and tick position
s = str(x)
ind = s.index('.')
return s[:ind] + ',' + s[ind+1:] # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax.yaxis.set_major_formatter(x_format)
df = pd.read_excel('EDS 7.xlsx', header=None, usecols=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], names=['A', 'B', 'C', 'D','E','F','G','H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P'])
plt.rcParams["figure.figsize"] = [14.5,10]
nomes = ['Triângulo de MoSe$_2$','Losango branco','Losango cinzento','Fundo']
ax.set_yticks(range(0,4))
ax.set_yticklabels(nomes)
ax.tick_params(axis='z', pad=10)
ax.tick_params(axis='y', pad=20)
# put 0s on the y-axis, and put the y axis on the z-axis
ax.plot(xs=df['A'], ys=df['B'], zs=df['C'], zdir='z', label='ys=0, zdir=z', color='blue', linewidth=3)
ax.plot(xs=df['D'], ys=df['E'], zs=df['F'], zdir='z', label='ys=0, zdir=z', color='red', linewidth=3)
ax.plot(xs=df['G'], ys=df['H'], zs=df['I'], zdir='z', label='ys=0, zdir=z', color='green', linewidth=3)
ax.plot(xs=df['J'], ys=df['K'], zs=df['L'], zdir='z', label='ys=0, zdir=z', color='orange', linewidth=3)
y=df['M'];
plt.xlim([0.0, 4.0])
#plt.ylim([0.0, 4.0])
ax.set_zlim(0,1400)
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
#plt.xticks(np.arange(0.0,1.4,0.1).round(decimals=1))
#plt.yticks(np.arange(-0.8,1.3,0.2).round(decimals=1))
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.xaxis._axinfo["grid"]['linestyle'] = '--'
ax.xaxis._axinfo["grid"]['color'] = 'silver'
ax.yaxis._axinfo["grid"]['linestyle'] = '--'
ax.yaxis._axinfo["grid"]['color'] = 'silver'
ax.zaxis._axinfo["grid"]['linestyle'] = '--'
ax.zaxis._axinfo["grid"]['color'] = 'silver'
ax.set_xlabel('Energia (keV)', fontsize=20, labelpad=18)
ax.set_zlabel('Contagens', fontsize=20, labelpad=18)
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')
Excel file:
The graphic I got is this:
I am having two problems that I am unable to solve:
The underside of the lines is not filled with color and I would like them to be opaque.
In the yy axis, the strings are too far to the left and for example the string "Triângulo de MoSe2" of the yy axis is to the left of the number 4.0 of the xx axis. I would like the y-axis strings to be more centered.
How can I adjust the code for the graph to have these two characteristics that I lack?
Here is an example to create something similar to the desired plot. Some toy data are used to create 4 curves.
To fill the area below the curves, the approach from this tutorial is used. For the y tick labels, it seems ax.set_yticklabels(..., ha='left') together with ax.tick_params(axis='y', pad=0) get quite close to the desired result.
To make the polygons fully opaque, set the opaqueness alpha in PolyCollection(...) to a value closer to 1. Usually a small bit of transparency gives a better feeling of being a 3D plot. You can leave out the call to ax.plot(...) if the thicker "border" isn't needed.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
def polygon_under_graph(xlist, ylist):
return [(xlist[0], 0.), *zip(xlist, ylist), (xlist[-1], 0.)]
x_cols = {col: np.linspace(0, 4, 50) for col in [*'ADGJ']}
z_cols = {col: 1400 ** np.random.rand(50) for col in [*'CFIL']}
df = pd.DataFrame({**x_cols, **z_cols})
fig = plt.figure()
ax = Axes3D(fig)
plt.rcParams["figure.figsize"] = [14.5, 10]
nomes = ['Triângulo de MoSe$_2$', 'Losango branco', 'Losango cinzento', 'Fundo']
ax.set_yticks(range(0, 4))
ax.set_yticklabels(nomes, ha='left')
ax.tick_params(axis='z', pad=10)
ax.tick_params(axis='y', pad=0)
color_list = ['cornflowerblue', 'crimson', 'limegreen', 'orange']
verts = []
ys = [3, 2, 1, 0]
for x_col, z_col, y, color in zip(['A', 'D', 'G', 'J'], ['C', 'F', 'I', 'L'], ys, color_list):
xs = df[x_col].to_numpy()
zs = df[z_col].to_numpy()
ax.plot(xs=xs, ys=np.repeat(y, len(xs)), zs=zs, zdir='z', color=color, linewidth=3)
verts.append(polygon_under_graph(xs, zs))
poly = PolyCollection(verts, facecolors=color_list, alpha=.8)
ax.add_collection3d(poly, zs=ys, zdir='y')
plt.show()
About having the outlines of the 3 panes in black, some experimenting with 3D figures from Matplotlib visibility of pane edge leads to the following. It is unclear to me why that hack works (an other approaches don't).
def lims(mplotlims):
scale = 1.021
offset = (mplotlims[1] - mplotlims[0]) * scale
return mplotlims[1] - offset, mplotlims[0] + offset
xlims, ylims, zlims = lims(ax.get_xlim()), lims(ax.get_ylim()), lims(ax.get_zlim())
i = np.array([xlims[0], ylims[0], zlims[0]])
f = np.array([xlims[0], ylims[0], zlims[1]])
p = art3d.Poly3DCollection(np.array([[i, f]]))
p.set_color('black')
ax.add_collection3d(p)
ax.xaxis.pane.set_edgecolor('#000000FF')
ax.yaxis.pane.set_edgecolor('#000000FF')
ax.zaxis.pane.set_edgecolor('#000000FF')

How to assign color to error bar caps? [Matplotlib]

I am new at Matplotlib and would like to assign colors to error bar caps...in my data (attached) the mean values are 'numbers' and the SD ('error') is in the column 'sd'. I grouped data by 'strain' (4 categories; mc, mut1, etc.). Colors are 'strains' (lines). The code below works BUT When I use "capsize" to add caps it throws an error...
I want the caps to have the same color as lines (from color vector "c"), any way? Thanks!
The file is https://anonfiles.com/d8A7m4F5o0/mutdp_csv
muts = pd.read_csv('mutdp.csv')
#SUBSET
# Select rows (i.e. 1 to 28)
gr=muts[1:28]
fig, ax = plt.subplots(figsize=(12,9.9))
c=['b','y','r','g']
#Data GR ---------------------------------------------------------------------------------------------
grstrain=gr.groupby(['time','strain']).mean()['numbers'].unstack()
grstrain.plot.line(ax=ax, style=['-o','-o','-o','-o'],color=c, ls = '--', linewidth=2.7)
# Error (-----HERE is where "capsize" causes the error----)
ax.errorbar(gr.time, gr.numbers, yerr=gr.sd, ls='', color=[i for i in c for _i in range(7)], capsize=3, capthick=3)
#(SCALE)
plt.yscale('log')
plt.ylim(0.04, 3)
#SAVE FIG!
plt.show()
As ax.errorbar only accepts one fixed color, it could be called in a loop, once for each color. The following code creates some random data to show how the loop could be written:
from matplotlib import pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
gr = pd.DataFrame({'time': np.tile(range(0, 14, 2), 4),
'strain': np.repeat(['mc', 'mut1', 'mut2', 'mut3'], 7),
'numbers': 0.1 + np.random.uniform(-0.01, 0.06, 28).cumsum(),
'sd': np.random.uniform(0.01, 0.05, 28)})
fig, ax = plt.subplots(figsize=(12, 9.9))
colors = ['b', 'y', 'r', 'g']
grstrain = gr.groupby(['time', 'strain']).mean()['numbers'].unstack()
grstrain.plot.line(ax=ax, style=['-o', '-o', '-o', '-o'], color=colors, ls='--', linewidth=2.7)
for strain, color in zip(np.unique(gr.strain), colors):
grs = gr[gr.strain == strain]
ax.errorbar(grs.time, grs.numbers, yerr=grs.sd, ls='', color=color, capsize=3, capthick=3)
plt.yscale('log')
plt.ylim(0.04, 3)
plt.show()

Put forward a chart with matplotlib

Below script create a chart and an array at the upper left corner.
How can we put forward the array so we can't see the Y axis through it?
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
fig = plt.figure(figsize=(13,4))
ax = fig.gca()
f = lambda x : np.power(1.2, x)
x0 = np.arange(*(0,100))
y0 = f(x0)
ax.plot(x0,y0)
ax.legend(['f(x)'])
ax.set_title('Exp')
index = ['index1', 'index2', 'index3', 'index4', 'index5']
values = [['1'], ['2'], ['3'], ['4'], ['5']]
table = ax.table(cellText=values, colWidths=[0.15, 0.15], rowLabels=index, loc='upper left')
plt.show()
Use zorder to control its position.
table = ax.table(cellText=values, colWidths=[0.15, 0.15], rowLabels=index, loc='upper left', zorder=10)

plotting multiple time series with different scales

I'm trying to plot 4 different dataframes over time to highlight possible relations between them.
I've met several difficulties:
different scales
same values overlap each other (IORR and IOER curves)
curves have big "points" making them unreadable
can't shift bar plots x values with df.index + 0.1 as I get an error
regarding the point 2, trying to shift the bars between the df IORR and IOER this way:
p1 = ax1.bar(df_ioer.index + 0.1, df_ioer.Value, ls='dashed', label='IOER', color='g')
ax1.xaxis_date()
I get this error:
TypeError: unsupported operand type(s) for +: 'DatetimeIndex' and 'float'
overall it's getting a little too much.
Could someone give a few pointers around this issue to get an intuitive representation of the data?
here is the code:
import quandl
from cycler import cycler
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
quandl.ApiConfig.api_key = "Get Free Key From Quandl.com"
df_dff = quandl.get("FRED/DFF")
df_iorr = quandl.get("FRED/IORR")
df_ioer = quandl.get("FRED/IOER")
df_gdp = quandl.get("FRED/GDP")
df_dff = df_dff[df_dff.index >= df_iorr.index.min()]
df_iorr = df_iorr[df_iorr.index >= df_iorr.index.min()]
df_ioer = df_ioer[df_ioer.index >= df_iorr.index.min()]
df_gdp = df_gdp[df_gdp.index >= df_iorr.index.min()]
# https://matplotlib.org/gallery/ticks_and_spines/multiple_yaxis_with_spines.html
plt.rc('axes', prop_cycle=(cycler('color', ['r', 'c', 'm', 'y', 'k', 'b', 'g', 'r', 'c', 'm'])))
def make_patch_spines_invisible(ax):
ax.set_frame_on(True)
ax.patch.set_visible(False)
for sp in ax.spines.values():
sp.set_visible(False)
fig, ax0 = plt.subplots()
#p0, = ax0.plot_date(df_iorr.index, df_iorr.Value, ls='dashed', tz=None, xdate=True, ydate=False, label='IORR', color='r')
#ax0.yaxis.label.set_color(p0.get_color())
p0 = ax0.bar(df_iorr.index, df_iorr.Value, ls='dashed', label='IORR', color='r')
ax0.xaxis_date( tz=None)
ax1 = ax0.twinx()
#p1, = ax0.plot_date(df_ioer.index, df_ioer.Value, ls='dashed', tz=None, xdate=True, ydate=False, label='IOER', color='g')
#ax1.yaxis.label.set_color(p1.get_color())
p1 = ax1.bar(df_ioer.index, df_ioer.Value, ls='dashed', label='IOER', color='g')
ax1.xaxis_date( tz=None)
ax2 = ax0.twinx()
p2, = ax0.plot_date(df_dff.index, df_dff.Value, ls='solid', tz=None, xdate=True, ydate=False, label='DFF', color='b')
ax2.spines["right"].set_position(("axes", 1.2))
make_patch_spines_invisible(ax2)
ax2.spines["right"].set_visible(True)
ax3 = ax0.twinx()
p3, = ax3.plot_date(df_gdp.index, df_gdp.Value, ls='solid', tz=None, xdate=True, ydate=False, label='GDP', color='y')
lines = [p0, p1, p2, p3]
ax0.legend(lines, [l.get_label() for l in lines])
plt.show()
result looks like this and is nowhere good enough.
Any help greatly appreciated!

Separate two groups of bars in matplotlib

Do you know if it is possible to separate the bars into two groups of different sizes, but maintaining both in the same plot? I have this code:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = ('A', 'B', 'C', 'D', 'E', 'F', 'G')
y_pos = np.arange(len(objects))
performance = [15.3, 25.8, 37.1, 50.0, 15.0, 18.5, 28.9]
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('Reduction Error')
plt.title("")
plt.show()
And I would like to have A and B close together, then some space, and then all the other bars.
I found this issue Function to create grouped bar plot, but I would like to keep each name under the bar and not group them as in the example.
Thank you for your help!
If I understood you correctly, you can do in this way:
objects = ('A', 'B', 'C', 'D', 'E', 'F', 'G')
x = [1,1.8,5,6,7,8,9]
performance = [15.3, 25.8, 37.1, 50.0, 15.0, 18.5, 28.9]
plt.bar(x, performance, align='center', alpha=0.5)
plt.xticks(x, objects)
plt.ylabel('Reduction Error')
plt.title("")
plt.show()
Or use 2 instead of 1.8 in x to have some space between A and B

Categories