Repeated Categorical X-Axis Labels in Matplotlib - python

I have a simple question: why are my x-axis labels repeated?
Here's an MWE: X-Axis Labels MWE
a = { # DATA -- 'CATEGORY': (VALUE, ERROR)
'Cats': (1, 0.105),
'Dogs': (2, 0.023),
'Pigs': (2.6, 0.134)
}
compositions = list(a.keys()) # MAKE INTO LIST
a_vals = [i[0] for i in a.values()] # EXTRACT VALUES
a_errors = [i[1] for i in a.values()] # EXTRACT ERRORS
fig = plt.figure(figsize=(8, 6)) # DICTATE FIGURE SIZE
bax = brokenaxes(ylims=((0,1.5), (1.7, 3)), hspace = 0.05) # BREAK AXES
bax.plot(compositions, a_vals, marker = 'o') # PLOT DATA
for i in range(0, len(a_errors)): # PLOT ALL ERROR BARS
bax.errorbar(i, a_vals[i], yerr = a_errors[i], capsize = 5, fmt = 'red') # FORMAT ERROR BAR
Here's stuff I tried:
Manually setting x-axis tick marks using xticks
Converting strings to floats using np.asarray(x, float)
Reducing # ticks using pyplot.locator_params(nbins=3)

You can use bax.locator_params(axis='x', nbins=len(compositions)) to reduce the number of x-ticks so that it matches the length of compositions.
More on locator_params() method, which controls the behavior of major tick locators:
https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.locator_params.html
import matplotlib.pyplot as plt
from brokenaxes import brokenaxes
a = { # DATA -- 'CATEGORY': (VALUE, ERROR)
'Cats': (1, 0.105),
'Dogs': (2, 0.023),
'Pigs': (2.6, 0.134)
}
compositions = list(a.keys()) # MAKE INTO LIST
a_vals = [i[0] for i in a.values()] # EXTRACT VALUES
a_errors = [i[1] for i in a.values()] # EXTRACT ERRORS
fig = plt.figure(figsize=(8, 6)) # DICTATE FIGURE SIZE
bax = brokenaxes(ylims=((0, 1.5), (1.7, 3)), hspace=0.05) # BREAK AXES
bax.plot(compositions, a_vals, marker='o') # PLOT DATA
for i in range(0, len(a_errors)): # PLOT ALL ERROR BARS
bax.errorbar(i, a_vals[i], yerr=a_errors[i], capsize=5, fmt='red') # FORMAT ERROR BAR
bax.locator_params(axis='x', nbins=len(compositions))
plt.show()
Result:

Related

how to plot pairs in different subplots with difference on the side

I want to make a plot in seaborn but I am having some difficulties. The data has 2 variable: time (2 levels) and state (2 levels). I want to plot time on the x axis and state as different subplots, showing individual data lines. Finally, to the right of these I want to show a difference plot of the difference between time 2 and time 1, for each of the levels of state. I cannot do it very well, because I cannot get the second plot to show onto the right. Here has been my try:
import numpy as np
import pandas as pd
import seaborn as sns
# Just making some fake data
ids = [1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5]
times = [1,1,2,2,1,1,2,2,1,1,2,2,1,1,2,2,1,1,2,2]
states = ['A', 'B', 'A', 'B'] * 5
np.random.seed(121)
resps = [(i*t) + np.random.normal() for i, t in zip(ids, times)]
DATA = {
'identity': ids,
'time': times,
'state': states,
'resps': resps
}
df = pd.DataFrame(DATA)
# Done with data
g = sns.relplot(
data=df, kind='line',
col='state', x='time', y='resps', units='identity',
estimator=None, alpha=.5, height=5, aspect=.7)
# # Draw a line onto each Axes
g.map(sns.lineplot,"time", "resps", lw=5, ci=None)
# Make a wide data to make the difference
wide = df.set_index(['identity', 'state', 'time']).unstack().reset_index()
A = wide['state']=='A'
B = wide['state']=='B'
wide['diffA'] = wide[A][('resps', 2)] - wide[A][('resps', 1)]
wide['diffB'] = wide[B][('resps', 2)] - wide[B][('resps', 1)]
wide['difference'] = wide[['diffA', 'diffB']].sum(axis=1)
wide = wide.drop(columns=[('diffA', ''), ('diffB', '')])
sns.pointplot(x='state', y='difference', data=wide, join=False)
Output from the first
And output from the second:
Is there no way to put them together? Even though they are different data? I did try to use matplotlib. And then achieved slightly better results but this still had a problem because I wanted the two left plots to have a shared y axis but not the difference. This created lots of work as well, because I want to be flexible for different numbers of the state variable, but only kept to 2 for simplicity. Here is a paint version of what I want to do (sorry for the poor quality), hopefully with some more control over appearance but this is secondary:
Is there a reliable way to do this in a simpler way? Thanks!
The problem is that sns.relplot operates at a figure level. This means it creates its own figure object and we cannot control the axes it uses. If you want to leverage seaborn for the creation of the lines without using "pure" matplotlib, you can copy the lines on matplotlib axes:
import numpy as np
import pandas as pd
import seaborn as sns
# Just making some fake data
ids = [1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5]
times = [1,1,2,2,1,1,2,2,1,1,2,2,1,1,2,2,1,1,2,2]
states = ['A', 'B', 'A', 'B'] * 5
np.random.seed(121)
resps = [(i*t) + np.random.normal() for i, t in zip(ids, times)]
DATA = {
'identity': ids,
'time': times,
'state': states,
'resps': resps
}
df = pd.DataFrame(DATA)
# Done with data
g = sns.relplot(
data=df, kind='line',
col='state', x='time', y='resps', units='identity',
estimator=None, alpha=.5, height=5, aspect=.7)
# # Draw a line onto each Axes
g.map(sns.lineplot,"time", "resps", lw=5, ci=None)
# Make a wide data to make the difference
wide = df.set_index(['identity', 'state', 'time']).unstack().reset_index()
A = wide['state']=='A'
B = wide['state']=='B'
wide['diffA'] = wide[A][('resps', 2)] - wide[A][('resps', 1)]
wide['diffB'] = wide[B][('resps', 2)] - wide[B][('resps', 1)]
wide['difference'] = wide[['diffA', 'diffB']].sum(axis=1)
wide = wide.drop(columns=[('diffA', ''), ('diffB', '')])
# New code ----------------------------------------
import matplotlib.pyplot as plt
plt.close(g.figure)
fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2, sharey=ax1)
ax3 = fig.add_subplot(1, 3, 3)
l = list(g.axes[0][0].get_lines())
l2 = list(g.axes[0][1].get_lines())
for ax, g_ax in zip([ax1, ax2], g.axes[0]):
l = list(g_ax.get_lines())
for line in l:
ax.plot(line.get_data()[0], line.get_data()[1], color=line.get_color(), lw=line.get_linewidth())
ax.set_title(g_ax.get_title())
sns.pointplot(ax=ax3, x='state', y='difference', data=wide, join=False)
# End of new code ----------------------------------
plt.show()
Result:

Pandas hist subplots - adding colour bar for the colours of each histogram

I have the columns of a dataframe plotted as separate histogram subplots. For each subplot, I want the bars coloured according to the value in a separate list. I have managed this by making a cmap of it and manually cycling those colours, however, is there a way to add a colorbar to the side to show what values these colours belong to? This is what I have right now:
import pandas as pd
import matplotlib as mpl
from matplotlib.colors import rgb2hex
#reading in the data
df = pd.read_csv( "shortlist_temp.dat", sep='\t',header=(0), usecols=(range(1,13)))
#separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
#Colormapping the values. I did not like the result from the original values so I reduced by 4000.
orig_star_teff_norm = [i - 4000 for i in orig_star_teff]
orig_star_teff_norm = [float(i)/max(orig_star_teff_norm) for i in orig_star_teff_norm]
cmap = mpl.cm.plasma
color_list = cmap(orig_star_teff_norm)
color_list2 = [ rgb2hex(color_list[i,:]) for i in range(color_list.shape[0]) ]
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color = color_list2)
ax = df.plot.hist(subplots=True, bins = 12, legend=False, layout=(3, 4), figsize = (15,10), sharey = True)
ax[0,0].set_title('ABOO')
ax[0,1].set_title('EpsVIR')
ax[0,2].set_title('HIP 96014')
ax[0,3].set_title('2M16113361')
ax[1,0].set_title('KIC 3955590')
ax[1,1].set_title('KIC 5113061')
ax[1,2].set_title('KIC 5859492')
ax[1,3].set_title('KIC 6547007')
ax[2,0].set_title('KIC 11444313')
ax[2,1].set_title('KIC 11657684')
ax[2,2].set_title('HD102328-K3III')
ax[2,3].set_title('HD142091-K0III')
Resulting plot
Instead of doing all the normalization steps manually, it probably is easier to create a norm. In this case a norm that maps the values from 4000 till max to the range 0,1 needed for the colormap. Note that converting to hex isn't necessary.
With the norm and the colormap a ScalarMapple can be created with all the necessary information for a colorbar:
import pandas as pd
import matplotlib as mpl
from matplotlib.cm import ScalarMappable
# reading in the data
# df = pd.read_csv("shortlist_temp.dat", sep='\t', header=(0), usecols=(range(1, 13)))
# generating some dummy data
df = pd.DataFrame(np.random.randn(100, 12))
# separate list of values
orig_star_teff = [4308.0, 5112.0, 4240.0, 4042.0, 4411.0, 4100.0, 4511.0, 4738.0, 4630.0, 4870.0, 4442.0, 4845.0]
norm = plt.Normalize(4000, max(orig_star_teff))
cmap = mpl.cm.plasma
color_list = cmap(norm(orig_star_teff))
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=color_list)
axs = df.plot.hist(subplots=True, bins=12, legend=False, layout=(3, 4), figsize=(15, 10), sharey=True)
titles = ['ABOO', 'EpsVIR', 'HIP 96014', '2M16113361',
'KIC 3955590', 'KIC 5113061', 'KIC 5859492', 'KIC 6547007',
'KIC 11444313', 'KIC 11657684', 'HD102328-K3III', 'HD142091-K0III']
for ax, title in zip(axs.flat, titles):
ax.set_title(title)
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=axs[:, -1])
plt.show()

How to do waffle charts in python? (square piechart)

Something like this:
There is a very good package to do it in R. In python, the best that I could figure out is this, using the squarify package (inspired by a post on how to do treemaps):
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns # just to have better line color and width
import squarify
# for those using jupyter notebooks
%matplotlib inline
df = pd.DataFrame({
'v1': np.ones(100),
'v2': np.random.randint(1, 4, 100)})
df.sort_values(by='v2', inplace=True)
# color scale
cmap = mpl.cm.Accent
mini, maxi = df['v2'].min(), df['v2'].max()
norm = mpl.colors.Normalize(vmin=mini, vmax=maxi)
colors = [cmap(norm(value)) for value in df['v2']]
# figure
fig = plt.figure()
ax = fig.add_subplot(111, aspect="equal")
ax = squarify.plot(df['v1'], color=colors, ax=ax)
ax.set_xticks([])
ax.set_yticks([]);
But when I create not 100 but 200 elements (or other non-square numbers), the squares become misaligned.
Another problem is that if I change v2 to some categorical variable (e.g., a hundred As, Bs, Cs and Ds), I get this error:
could not convert string to float: 'a'
So, could anyone help me with these two questions:
how can I solve the alignment problem with non-square numbers of observations?
how can use categorical variables in v2?
Beyond this, I am really open if there are any other python packages that can create waffle plots more efficiently.
I spent a few days to build a more general solution, PyWaffle.
You can install it through
pip install pywaffle
The source code: https://github.com/gyli/PyWaffle
PyWaffle does not use matshow() method, but builds those squares one by one. That makes it easier for customization. Besides, what it provides is a custom Figure class, which returns a figure object. By updating attributes of the figure, you can basically control everything in the chart.
Some examples:
Colored or transparent background:
import matplotlib.pyplot as plt
from pywaffle import Waffle
data = {'Democratic': 48, 'Republican': 46, 'Libertarian': 3}
fig = plt.figure(
FigureClass=Waffle,
rows=5,
values=data,
colors=("#983D3D", "#232066", "#DCB732"),
title={'label': 'Vote Percentage in 2016 US Presidential Election', 'loc': 'left'},
labels=["{0} ({1}%)".format(k, v) for k, v in data.items()],
legend={'loc': 'lower left', 'bbox_to_anchor': (0, -0.4), 'ncol': len(data), 'framealpha': 0}
)
fig.gca().set_facecolor('#EEEEEE')
fig.set_facecolor('#EEEEEE')
plt.show()
Use icons replacing squares:
data = {'Democratic': 48, 'Republican': 46, 'Libertarian': 3}
fig = plt.figure(
FigureClass=Waffle,
rows=5,
values=data,
colors=("#232066", "#983D3D", "#DCB732"),
legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)},
icons='child', icon_size=18,
icon_legend=True
)
Multiple subplots in one chart:
import pandas as pd
data = pd.DataFrame(
{
'labels': ['Hillary Clinton', 'Donald Trump', 'Others'],
'Virginia': [1981473, 1769443, 233715],
'Maryland': [1677928, 943169, 160349],
'West Virginia': [188794, 489371, 36258],
},
).set_index('labels')
fig = plt.figure(
FigureClass=Waffle,
plots={
'311': {
'values': data['Virginia'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['Virginia'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.05, 1), 'fontsize': 8},
'title': {'label': '2016 Virginia Presidential Election Results', 'loc': 'left'}
},
'312': {
'values': data['Maryland'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['Maryland'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.2, 1), 'fontsize': 8},
'title': {'label': '2016 Maryland Presidential Election Results', 'loc': 'left'}
},
'313': {
'values': data['West Virginia'] / 30000,
'labels': ["{0} ({1})".format(n, v) for n, v in data['West Virginia'].items()],
'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.3, 1), 'fontsize': 8},
'title': {'label': '2016 West Virginia Presidential Election Results', 'loc': 'left'}
},
},
rows=5,
colors=("#2196f3", "#ff5252", "#999999"), # Default argument values for subplots
figsize=(9, 5) # figsize is a parameter of plt.figure
)
I've put together a working example, below, which I think meets your needs. Some work is needed to fully generalize the approach, but I think you'll find that it's a good start. The trick was to use matshow() to solve your non-square problem, and to build a custom legend to easily account for categorical values.
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
# Let's make a default data frame with catagories and values.
df = pd.DataFrame({ 'catagories': ['cat1', 'cat2', 'cat3', 'cat4'],
'values': [84911, 14414, 10062, 8565] })
# Now, we define a desired height and width.
waffle_plot_width = 20
waffle_plot_height = 7
classes = df['catagories']
values = df['values']
def waffle_plot(classes, values, height, width, colormap):
# Compute the portion of the total assigned to each class.
class_portion = [float(v)/sum(values) for v in values]
# Compute the number of tiles for each catagories.
total_tiles = width * height
tiles_per_class = [round(p*total_tiles) for p in class_portion]
# Make a dummy matrix for use in plotting.
plot_matrix = np.zeros((height, width))
# Popoulate the dummy matrix with integer values.
class_index = 0
tile_index = 0
# Iterate over each tile.
for col in range(waffle_plot_width):
for row in range(height):
tile_index += 1
# If the number of tiles populated is sufficient for this class...
if tile_index > sum(tiles_per_class[0:class_index]):
# ...increment to the next class.
class_index += 1
# Set the class value to an integer, which increases with class.
plot_matrix[row, col] = class_index
# Create a new figure.
fig = plt.figure()
# Using matshow solves your "non-square" problem.
plt.matshow(plot_matrix, cmap=colormap)
plt.colorbar()
# Get the axis.
ax = plt.gca()
# Minor ticks
ax.set_xticks(np.arange(-.5, (width), 1), minor=True);
ax.set_yticks(np.arange(-.5, (height), 1), minor=True);
# Gridlines based on minor ticks
ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
# Manually constructing a legend solves your "catagorical" problem.
legend_handles = []
for i, c in enumerate(classes):
lable_str = c + " (" + str(values[i]) + ")"
color_val = colormap(float(i+1)/len(classes))
legend_handles.append(mpatches.Patch(color=color_val, label=lable_str))
# Add the legend. Still a bit of work to do here, to perfect centering.
plt.legend(handles=legend_handles, loc=1, ncol=len(classes),
bbox_to_anchor=(0., -0.1, 0.95, .10))
plt.xticks([])
plt.yticks([])
# Call the plotting function.
waffle_plot(classes, values, waffle_plot_height, waffle_plot_width,
plt.cm.coolwarm)
Below is an example of the output this script produced. As you can see, it works fairly well for me, and meets all of your stated needs. Just let me know if it gives you any trouble. Enjoy!
You can use this function for automatic creation of a waffle with simple parameters:
def create_waffle_chart(categories, values, height, width, colormap, value_sign=''):
# compute the proportion of each category with respect to the total
total_values = sum(values)
category_proportions = [(float(value) / total_values) for value in values]
# compute the total number of tiles
total_num_tiles = width * height # total number of tiles
print ('Total number of tiles is', total_num_tiles)
# compute the number of tiles for each catagory
tiles_per_category = [round(proportion * total_num_tiles) for proportion in category_proportions]
# print out number of tiles per category
for i, tiles in enumerate(tiles_per_category):
print (df_dsn.index.values[i] + ': ' + str(tiles))
# initialize the waffle chart as an empty matrix
waffle_chart = np.zeros((height, width))
# define indices to loop through waffle chart
category_index = 0
tile_index = 0
# populate the waffle chart
for col in range(width):
for row in range(height):
tile_index += 1
# if the number of tiles populated for the current category
# is equal to its corresponding allocated tiles...
if tile_index > sum(tiles_per_category[0:category_index]):
# ...proceed to the next category
category_index += 1
# set the class value to an integer, which increases with class
waffle_chart[row, col] = category_index
# instantiate a new figure object
fig = plt.figure()
# use matshow to display the waffle chart
colormap = plt.cm.coolwarm
plt.matshow(waffle_chart, cmap=colormap)
plt.colorbar()
# get the axis
ax = plt.gca()
# set minor ticks
ax.set_xticks(np.arange(-.5, (width), 1), minor=True)
ax.set_yticks(np.arange(-.5, (height), 1), minor=True)
# add dridlines based on minor ticks
ax.grid(which='minor', color='w', linestyle='-', linewidth=2)
plt.xticks([])
plt.yticks([])
# compute cumulative sum of individual categories to match color schemes between chart and legend
values_cumsum = np.cumsum(values)
total_values = values_cumsum[len(values_cumsum) - 1]
# create legend
legend_handles = []
for i, category in enumerate(categories):
if value_sign == '%':
label_str = category + ' (' + str(values[i]) + value_sign + ')'
else:
label_str = category + ' (' + value_sign + str(values[i]) + ')'
color_val = colormap(float(values_cumsum[i])/total_values)
legend_handles.append(mpatches.Patch(color=color_val, label=label_str))
# add legend to chart
plt.legend(
handles=legend_handles,
loc='lower center',
ncol=len(categories),
bbox_to_anchor=(0., -0.2, 0.95, .1)
)

Plot multiple Y axes

I know pandas supports a secondary Y axis, but I'm curious if anyone knows a way to put a tertiary Y axis on plots. Currently I am achieving this with numpy+pyplot, but it is slow with large data sets.
This is to plot different measurements with distinct units on the same graph for easy comparison (eg: Relative Humidity/Temperature/ and Electrical Conductivity).
So really just curious if anyone knows if this is possible in pandas without too much work.
[Edit] I doubt that there is a way to do this(without too much overhead) however I hope to be proven wrong, as this may be a limitation of matplotlib.
I think this might work:
import matplotlib.pyplot as plt
import numpy as np
from pandas import DataFrame
df = DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C'])
fig, ax = plt.subplots()
ax3 = ax.twinx()
rspine = ax3.spines['right']
rspine.set_position(('axes', 1.15))
ax3.set_frame_on(True)
ax3.patch.set_visible(False)
fig.subplots_adjust(right=0.7)
df.A.plot(ax=ax, style='b-')
# same ax as above since it's automatically added on the right
df.B.plot(ax=ax, style='r-', secondary_y=True)
df.C.plot(ax=ax3, style='g-')
# add legend --> take advantage of pandas providing us access
# to the line associated with the right part of the axis
ax3.legend([ax.get_lines()[0], ax.right_ax.get_lines()[0], ax3.get_lines()[0]],\
['A','B','C'], bbox_to_anchor=(1.5, 0.5))
Output:
A simpler solution without plt:
ax1 = df1.plot()
ax2 = ax1.twinx()
ax2.spines['right'].set_position(('axes', 1.0))
df2.plot(ax=ax2)
ax3 = ax1.twinx()
ax3.spines['right'].set_position(('axes', 1.1))
df3.plot(ax=ax3)
....
Using function to achieve this:
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = get_standard_colors(num_colors=len(cols))
# First axis
ax = data.loc[:, cols[0]].plot(label=cols[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=cols[0])
lines, labels = ax.get_legend_handles_labels()
for n in range(1, len(cols)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines['right'].set_position(('axes', 1 + spacing * (n - 1)))
data.loc[:, cols[n]].plot(ax=ax_new, label=cols[n], color=colors[n % len(colors)], **kwargs)
ax_new.set_ylabel(ylabel=cols[n])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Example:
from random import randrange
data = pd.DataFrame(dict(
s1=[randrange(-1000, 1000) for _ in range(100)],
s2=[randrange(-100, 100) for _ in range(100)],
s3=[randrange(-10, 10) for _ in range(100)],
))
plot_multi(data.cumsum(), figsize=(10, 5))
Output:
I modified the above answer a bit to make it accept custom x column, well-documented, and more flexible.
You can copy this snippet and use it as a function:
from typing import List, Union
import matplotlib.axes
import pandas as pd
def plot_multi(
data: pd.DataFrame,
x: Union[str, None] = None,
y: Union[List[str], None] = None,
spacing: float = 0.1,
**kwargs
) -> matplotlib.axes.Axes:
"""Plot multiple Y axes on the same chart with same x axis.
Args:
data: dataframe which contains x and y columns
x: column to use as x axis. If None, use index.
y: list of columns to use as Y axes. If None, all columns are used
except x column.
spacing: spacing between the plots
**kwargs: keyword arguments to pass to data.plot()
Returns:
a matplotlib.axes.Axes object returned from data.plot()
Example:
>>> plot_multi(df, figsize=(22, 10))
>>> plot_multi(df, x='time', figsize=(22, 10))
>>> plot_multi(df, y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))
>>> plot_multi(df[['time price qty'.split()]], x='time', figsize=(22, 10))
See Also:
This code is mentioned in https://stackoverflow.com/q/11640243/2593810
"""
from pandas.plotting._matplotlib.style import get_standard_colors
# Get default color style from pandas - can be changed to any other color list
if y is None:
y = data.columns
# remove x_col from y_cols
if x:
y = [col for col in y if col != x]
if len(y) == 0:
return
colors = get_standard_colors(num_colors=len(y))
if "legend" not in kwargs:
kwargs["legend"] = False # prevent multiple legends
# First axis
ax = data.plot(x=x, y=y[0], color=colors[0], **kwargs)
ax.set_ylabel(ylabel=y[0])
lines, labels = ax.get_legend_handles_labels()
for i in range(1, len(y)):
# Multiple y-axes
ax_new = ax.twinx()
ax_new.spines["right"].set_position(("axes", 1 + spacing * (i - 1)))
data.plot(
ax=ax_new, x=x, y=y[i], color=colors[i % len(colors)], **kwargs
)
ax_new.set_ylabel(ylabel=y[i])
# Proper legend position
line, label = ax_new.get_legend_handles_labels()
lines += line
labels += label
ax.legend(lines, labels, loc=0)
return ax
Here's one way to use it:
plot_multi(df, x='time', y='price qty value'.split(), figsize=(22, 10))

How to plot a grouped bar chart from multiple datasets

I am going through Think Stats and I would like to compare multiple data sets visually. I can see from the book examples that it is possible to generate an interleaved bar graph with a different color for each data set by using a module provided by the book author, how to obtain the same result in pyplot?
Call the bar function multiple times, one for each series. You can control the left position of the bars using the left parameter, and you can use this to prevent overlap.
Entirely untested code:
pyplot.bar( numpy.arange(10) * 2, data1, color = 'red' )
pyplot.bar( numpy.arange(10) * 2 + 1, data2, color = 'red' )
Data2 will be drawn shifted over the right compared to where data one will be drawn.
Matplotlib's example code for interleaved bar charts works nicely for arbitrary real-valued x coordinates (as mentioned by #db42).
However, if your x coordinates are categorical values (like in the case of dictionaries in the linked question), the conversion from categorical x coordinates to real x coordinates is cumbersome and unnecessary.
You can plot two dictionaries side-by-side directly using matplotlib's api. The trick for plotting two bar charts with an offset to each other is to set align=edge and a positive width (+width) for plotting one bar chart, whereas a negative width (-width) for plotting the other one.
The example code modified for plotting two dictionaries looks like the following then:
"""
========
Barchart
========
A bar plot with errorbars and height labels on individual bars
"""
import matplotlib.pyplot as plt
# Uncomment the following line if you use ipython notebook
# %matplotlib inline
width = 0.35 # the width of the bars
men_means = {'G1': 20, 'G2': 35, 'G3': 30, 'G4': 35, 'G5': 27}
men_std = {'G1': 2, 'G2': 3, 'G3': 4, 'G4': 1, 'G5': 2}
rects1 = plt.bar(men_means.keys(), men_means.values(), -width, align='edge',
yerr=men_std.values(), color='r', label='Men')
women_means = {'G1': 25, 'G2': 32, 'G3': 34, 'G4': 20, 'G5': 25}
women_std = {'G1': 3, 'G2': 5, 'G3': 2, 'G4': 3, 'G5': 3}
rects2 = plt.bar(women_means.keys(), women_means.values(), +width, align='edge',
yerr=women_std.values(), color='y', label='Women')
# add some text for labels, title and axes ticks
plt.xlabel('Groups')
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.legend()
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
'%d' % int(height),
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
plt.show()
The result:
I came across this problem a while ago and created a wrapper function that takes a 2D array and automatically creates a multi-barchart from it:
The code:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import operator as o
import numpy as np
dpoints = np.array([['rosetta', '1mfq', 9.97],
['rosetta', '1gid', 27.31],
['rosetta', '1y26', 5.77],
['rnacomposer', '1mfq', 5.55],
['rnacomposer', '1gid', 37.74],
['rnacomposer', '1y26', 5.77],
['random', '1mfq', 10.32],
['random', '1gid', 31.46],
['random', '1y26', 18.16]])
fig = plt.figure()
ax = fig.add_subplot(111)
def barplot(ax, dpoints):
'''
Create a barchart for data across different categories with
multiple conditions for each category.
#param ax: The plotting axes from matplotlib.
#param dpoints: The data set as an (n, 3) numpy array
'''
# Aggregate the conditions and the categories according to their
# mean values
conditions = [(c, np.mean(dpoints[dpoints[:,0] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,0])]
categories = [(c, np.mean(dpoints[dpoints[:,1] == c][:,2].astype(float)))
for c in np.unique(dpoints[:,1])]
# sort the conditions, categories and data so that the bars in
# the plot will be ordered by category and condition
conditions = [c[0] for c in sorted(conditions, key=o.itemgetter(1))]
categories = [c[0] for c in sorted(categories, key=o.itemgetter(1))]
dpoints = np.array(sorted(dpoints, key=lambda x: categories.index(x[1])))
# the space between each set of bars
space = 0.3
n = len(conditions)
width = (1 - space) / (len(conditions))
# Create a set of bars at each position
for i,cond in enumerate(conditions):
indeces = range(1, len(categories)+1)
vals = dpoints[dpoints[:,0] == cond][:,2].astype(np.float)
pos = [j - (1 - space) / 2. + i * width for j in indeces]
ax.bar(pos, vals, width=width, label=cond,
color=cm.Accent(float(i) / n))
# Set the x-axis tick labels to be equal to the categories
ax.set_xticks(indeces)
ax.set_xticklabels(categories)
plt.setp(plt.xticks()[1], rotation=90)
# Add the axis labels
ax.set_ylabel("RMSD")
ax.set_xlabel("Structure")
# Add a legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc='upper left')
barplot(ax, dpoints)
plt.show()
If you're interested in what this function does and the logic behind it, here's a (shamelessly self-promoting) link to the blog post describing it.

Categories