Related
I am using Matplotlib for a back to back plot:
import numpy as np
import matplotlib.pyplot as plt
# create data
A = np.array([3,6,9,4,2,5])
B = np.array([2,8,1,9,7,3])
X = np.arange(6)
# plot the bars
plt.barh(X, A, align='center',
alpha=0.9, color = 'y')
plt.barh(X, -B, align='center',
alpha=0.6, color = 'c')
plt.yticks([0, 1, 2,3,4,5], ['A', 'B', 'C', 'D', 'E', 'F'])
plt.xticks([], [])
plt.show()
I am wondering how to generate d3 code using python?
Is Altair the right option?
Here is an example of generating a similar chart with Altair:
import pandas as pd
import altair as alt
df = pd.DataFrame({
"A": np.array([3,6,9,4,2,5]),
"B": np.array([2,8,1,9,7,3]),
"X": ['A', 'B', 'C', 'D', 'E', 'F'],
})
alt.Chart(df).transform_calculate(
A=-alt.datum.A
).transform_fold(
["A", "B"], as_=["key", "value"]
).mark_bar().encode(
x=alt.X("value:Q", axis=None),
y='X:N',
color="key:N"
).properties(
width=300,
height=200
)
So I have multiple plots, using subplot and I would like to add normal distribution on it. I haven't had much luck finding information on how to do this for each individual subplot.
This is what my subplots look like so far:
But I want it to look like this:
This is the code I'm working with so far:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
def grade_distribution():
# importing datasets
df=pd.read_csv('assets/class_grades.csv')
VALID_GRADES = ['A+','A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'E']
NUMERIC_GRADES = [4.3,4.0, 3.7, 3.3, 3.0, 2.7, 2.3, 2.0, 1.7, 1.3, 1.0, 0.7, 0.0]
grade_point_dict = dict(zip(VALID_GRADES, NUMERIC_GRADES))
df['ECON101_grade'] = df.replace({'ECON101_grade' : grade_point_dict})['ECON101_grade']
df['EECS545_grade'] = df.replace({'EECS545_grade' : grade_point_dict})['EECS545_grade']
df['ENGLISH125_grade'] = df.replace({'ENGLISH125_grade' : grade_point_dict})['ENGLISH125_grade']
df['MATH217_grade'] = df.replace({'MATH217_grade' : grade_point_dict})['MATH217_grade']
df['DATASCI306_grade'] = df.replace({'DATASCI306_grade' : grade_point_dict})['DATASCI306_grade']
df['STATS250_grade'] = df.replace({'STATS250_grade' : grade_point_dict})['STATS250_grade']
cols_to_plot = ['ECON101_grade', 'EECS545_grade','ENGLISH125_grade','MATH217_grade', 'DATASCI306_grade', 'STATS250_grade']
fig, axs = plt.subplots(nrows=3, ncols=2)
fig.set_size_inches(30, 20)
for col, ax in zip(cols_to_plot, axs.flatten()):
dftemp = df[col].value_counts()
ax.bar(dftemp.index, list(dftemp))
ax.set_title(col)
ax.tick_params(axis='x', labelrotation=30)
axs[0, 0].set_title("ECON 101 Grade")
axs[0, 1].set_title("EECS 545 Grade")
axs[1, 0].set_title("ENGLISH 125 Grade")
axs[1, 1].set_title("MATH 217 Grade")
axs[2, 0].set_title("DATASCI 306 Grade")
axs[2, 1].set_title("STATS 250 Grade")
plt.show()
grade_distribution()
I am making my first 3D graph in Python on an Anaconda Jupyter Notebook. The idea is to obtain a graph with a format similar to the following:
The code I made is as follows:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
fig = plt.figure()
ax = Axes3D(fig)
def func(x, pos): # formatter function takes tick label and tick position
s = str(x)
ind = s.index('.')
return s[:ind] + ',' + s[ind+1:] # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax.yaxis.set_major_formatter(x_format)
df = pd.read_excel('EDS 7.xlsx', header=None, usecols=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], names=['A', 'B', 'C', 'D','E','F','G','H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P'])
plt.rcParams["figure.figsize"] = [14.5,10]
nomes = ['Triângulo de MoSe$_2$','Losango branco','Losango cinzento','Fundo']
ax.set_yticks(range(0,4))
ax.set_yticklabels(nomes)
ax.tick_params(axis='z', pad=10)
ax.tick_params(axis='y', pad=20)
# put 0s on the y-axis, and put the y axis on the z-axis
ax.plot(xs=df['A'], ys=df['B'], zs=df['C'], zdir='z', label='ys=0, zdir=z', color='blue', linewidth=3)
ax.plot(xs=df['D'], ys=df['E'], zs=df['F'], zdir='z', label='ys=0, zdir=z', color='red', linewidth=3)
ax.plot(xs=df['G'], ys=df['H'], zs=df['I'], zdir='z', label='ys=0, zdir=z', color='green', linewidth=3)
ax.plot(xs=df['J'], ys=df['K'], zs=df['L'], zdir='z', label='ys=0, zdir=z', color='orange', linewidth=3)
y=df['M'];
plt.xlim([0.0, 4.0])
#plt.ylim([0.0, 4.0])
ax.set_zlim(0,1400)
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
#plt.xticks(np.arange(0.0,1.4,0.1).round(decimals=1))
#plt.yticks(np.arange(-0.8,1.3,0.2).round(decimals=1))
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.xaxis._axinfo["grid"]['linestyle'] = '--'
ax.xaxis._axinfo["grid"]['color'] = 'silver'
ax.yaxis._axinfo["grid"]['linestyle'] = '--'
ax.yaxis._axinfo["grid"]['color'] = 'silver'
ax.zaxis._axinfo["grid"]['linestyle'] = '--'
ax.zaxis._axinfo["grid"]['color'] = 'silver'
ax.set_xlabel('Energia (keV)', fontsize=20, labelpad=18)
ax.set_zlabel('Contagens', fontsize=20, labelpad=18)
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')
Excel file:
The graphic I got is this:
I am having two problems that I am unable to solve:
The underside of the lines is not filled with color and I would like them to be opaque.
In the yy axis, the strings are too far to the left and for example the string "Triângulo de MoSe2" of the yy axis is to the left of the number 4.0 of the xx axis. I would like the y-axis strings to be more centered.
How can I adjust the code for the graph to have these two characteristics that I lack?
Here is an example to create something similar to the desired plot. Some toy data are used to create 4 curves.
To fill the area below the curves, the approach from this tutorial is used. For the y tick labels, it seems ax.set_yticklabels(..., ha='left') together with ax.tick_params(axis='y', pad=0) get quite close to the desired result.
To make the polygons fully opaque, set the opaqueness alpha in PolyCollection(...) to a value closer to 1. Usually a small bit of transparency gives a better feeling of being a 3D plot. You can leave out the call to ax.plot(...) if the thicker "border" isn't needed.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
def polygon_under_graph(xlist, ylist):
return [(xlist[0], 0.), *zip(xlist, ylist), (xlist[-1], 0.)]
x_cols = {col: np.linspace(0, 4, 50) for col in [*'ADGJ']}
z_cols = {col: 1400 ** np.random.rand(50) for col in [*'CFIL']}
df = pd.DataFrame({**x_cols, **z_cols})
fig = plt.figure()
ax = Axes3D(fig)
plt.rcParams["figure.figsize"] = [14.5, 10]
nomes = ['Triângulo de MoSe$_2$', 'Losango branco', 'Losango cinzento', 'Fundo']
ax.set_yticks(range(0, 4))
ax.set_yticklabels(nomes, ha='left')
ax.tick_params(axis='z', pad=10)
ax.tick_params(axis='y', pad=0)
color_list = ['cornflowerblue', 'crimson', 'limegreen', 'orange']
verts = []
ys = [3, 2, 1, 0]
for x_col, z_col, y, color in zip(['A', 'D', 'G', 'J'], ['C', 'F', 'I', 'L'], ys, color_list):
xs = df[x_col].to_numpy()
zs = df[z_col].to_numpy()
ax.plot(xs=xs, ys=np.repeat(y, len(xs)), zs=zs, zdir='z', color=color, linewidth=3)
verts.append(polygon_under_graph(xs, zs))
poly = PolyCollection(verts, facecolors=color_list, alpha=.8)
ax.add_collection3d(poly, zs=ys, zdir='y')
plt.show()
About having the outlines of the 3 panes in black, some experimenting with 3D figures from Matplotlib visibility of pane edge leads to the following. It is unclear to me why that hack works (an other approaches don't).
def lims(mplotlims):
scale = 1.021
offset = (mplotlims[1] - mplotlims[0]) * scale
return mplotlims[1] - offset, mplotlims[0] + offset
xlims, ylims, zlims = lims(ax.get_xlim()), lims(ax.get_ylim()), lims(ax.get_zlim())
i = np.array([xlims[0], ylims[0], zlims[0]])
f = np.array([xlims[0], ylims[0], zlims[1]])
p = art3d.Poly3DCollection(np.array([[i, f]]))
p.set_color('black')
ax.add_collection3d(p)
ax.xaxis.pane.set_edgecolor('#000000FF')
ax.yaxis.pane.set_edgecolor('#000000FF')
ax.zaxis.pane.set_edgecolor('#000000FF')
I have this list of "coordinates":
myList = [[0.7366771159874608, 0.6270718232044199], [0.7382352941176471, 0.6710182767624021], [0.7967479674796748, 0.656441717791411], [0.7296511627906976, 0.5727109515260324], [0.7992700729927007, 0.5833333333333334], [0.750788643533123, 0.5288888888888889], [0.851063829787234, 0.7423312883435583], [0.767515923566879, 0.5525114155251142]]
I want to create a grouped bar plot so that each of this pairs is close. The names of the column are just numbered from I to 8. I looked on the internet but it doesn't seem to me other people had this problem.
My code:
import matplotlib.pyplot as plt
x, y = zip(*mylist)
group_labels = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII']
plt.bar(x, y)
plt.title("Trial")
plt.show()
How should I change my dataset in order to achieve my goal?
Adapted from the docs.
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
ar = [[0.7366771159874608, 0.6270718232044199],
[0.7382352941176471, 0.6710182767624021],
[0.7967479674796748, 0.656441717791411],
[0.7296511627906976, 0.5727109515260324],
[0.7992700729927007, 0.5833333333333334],
[0.750788643533123, 0.5288888888888889],
[0.851063829787234, 0.7423312883435583],
[0.767515923566879, 0.5525114155251142]
]
xx, yy = zip(*ar)
group_labels = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII']
x = np.arange(len(group_labels))
width = 0.35
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, xx, width)
rects2 = ax.bar(x + width/2, yy, width)
ax.set_xticklabels(group_labels)
ax
As far as I can tell, you are trying to barplot more than one variables in the same barplot. The grouping that you are mentioning can be actually handled as plotting 2 different variables, man and woman.
Unfortunately, this is not natively implemented in matplotlib python, but you can use pandas to achieve the result you want. THe code is
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
myList = [[0.7366771159874608, 0.6270718232044199], [0.7382352941176471, 0.6710182767624021], [0.7967479674796748, 0.656441717791411], [0.7296511627906976, 0.5727109515260324], [0.7992700729927007, 0.5833333333333334], [0.750788643533123, 0.5288888888888889], [0.851063829787234, 0.7423312883435583], [0.767515923566879, 0.5525114155251142]]
x, y = zip(*myList)
group_labels = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII']
df = pd.DataFrame(np.c_[x, y], index=group_labels)
df.plot.bar()
I have a plot where different colors are used for different parameters, and where different line styles are used for different algorithms. The goal is to compare the results of the different algorithms performed with similar parameters. It means in total I use 4 different colors, and 3 different line styles, for a total of 12 plots on the same graph.
I actually build the legend based on colors, associating each color with the corresponding parameter. Now I'd like to display a second legend on the same graph, with the meaning of each line style. It is possible to achieve that? How?
Here is what my code looks like actually:
colors = ['b', 'r', 'g', 'c']
cc = cycle(c)
for p in parameters:
d1 = algo1(p)
d2 = algo2(p)
d3 = algo3(p)
pyplot.hold(True)
c = next(cc)
pyplot.plot(d1, '-', color=c, label="d1")
pyplot.plot(d1, '--', color=c)
pyplot.plot(d2, '.-', color=c)
pyplot.legend()
There's a section in the matplotlib documentation on that exact subject.
Here's code for your specific example:
import itertools
from matplotlib import pyplot
colors = ['b', 'r', 'g', 'c']
cc = itertools.cycle(colors)
plot_lines = []
for p in parameters:
d1 = algo1(p)
d2 = algo2(p)
d3 = algo3(p)
pyplot.hold(True)
c = next(cc)
l1, = pyplot.plot(d1, '-', color=c)
l2, = pyplot.plot(d2, '--', color=c)
l3, = pyplot.plot(d3, '.-', color=c)
plot_lines.append([l1, l2, l3])
legend1 = pyplot.legend(plot_lines[0], ["algo1", "algo2", "algo3"], loc=1)
pyplot.legend([l[0] for l in plot_lines], parameters, loc=4)
pyplot.gca().add_artist(legend1)
Here's an example of its output:
Here is also a more "hands-on" way to do it (i.e. interacting explicitely with any figure axes):
import itertools
from matplotlib import pyplot
fig, axes = plt.subplot(1,1)
colors = ['b', 'r', 'g', 'c']
cc = itertools.cycle(colors)
plot_lines = []
for p in parameters:
d1 = algo1(p)
d2 = algo2(p)
d3 = algo3(p)
c = next(cc)
axes.plot(d1, '-', color=c)
axes.plot(d2, '--', color=c)
axes.plot(d3, '.-', color=c)
# In total 3x3 lines have been plotted
lines = axes.get_lines()
legend1 = pyplot.legend([lines[i] for i in [0,1,2]], ["algo1", "algo2", "algo3"], loc=1)
legend2 = pyplot.legend([lines[i] for i in [0,3,6]], parameters, loc=4)
axes.add_artist(legend1)
axes.add_artist(legend2)
I like this way of writing it since it allows potentially to play with different axes in a less obscure way. You can first create your set of legends, and then add them to the axes you want with the method "add_artist". Also, I am starting with matplotlib, and for me at least it is easier to understand scripts when objets are explicited.
NB: Be careful, your legends may be cutoff while displaying/saving. To solve this issue, use the method axes.set_position([left, bottom, width, length]) to shrink the subplot relatively to the figure size and make the legends appear.
What about using a twin ghost axis?
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots()
colors = ['b', 'r', 'g', ]
styles = ['-', '--', '-.']
for cc, col in enumerate(colors):
for ss, sty in enumerate(styles):
print(cc, ss)
ax.plot([0, 1], [cc, ss], c=colors[cc], ls=styles[ss])
for cc, col in enumerate(colors):
ax.plot(np.NaN, np.NaN, c=colors[cc], label=col)
ax2 = ax.twinx()
for ss, sty in enumerate(styles):
ax2.plot(np.NaN, np.NaN, ls=styles[ss],
label='style ' + str(ss), c='black')
ax2.get_yaxis().set_visible(False)
ax.legend(loc=1)
ax2.legend(loc=3)
plt.show()
You can also use line.get_label()
import matplotlib.pyplot as plt
plt.figure()
colors = ['b', 'r', 'g', 'c']
parameters = [1,2,3,4]
for p in parameters:
color = colors[parameters.index(p)]
plt.plot([1,10],[1,p], '-', c=color, label='auto label '+str(p))
lines = plt.gca().get_lines()
include = [0,1]
legend1 = plt.legend([lines[i] for i in include],[lines[i].get_label() for i in include], loc=1)
legend2 = plt.legend([lines[i] for i in [2,3]],['manual label 3','manual label 4'], loc=4)
plt.gca().add_artist(legend1)
plt.show()
import matplotlib.pyplot as plt
plt.figure()
colors = ['b', 'r', 'g', 'c']
parameters = [1,2,3,4]
for p in parameters:
color = colors[parameters.index(p)]
plt.plot([1,10],[1,p], '-', c=color, label='auto label '+str(p))
lines = plt.gca().get_lines()
include = [0,1]
legend1 = plt.legend([lines[i] for i in include],[lines[i].get_label() for i in include], loc=1)
legend2 = plt.legend([lines[i] for i in [2,3]],['manual label 3','manual label 4'], loc=4)
plt.gca().add_artist(legend1)
plt.show()