Related
I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))
Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()
I can't find a way to word my issue properly in the header so I'm going to explain it a bit better, I'm making a swarm plot in seaborn, on the Y axis is Sentiment, on the X axis is a symbol, a symbol is mentioned a certain number of times and so it gets pushed out to show a larger spread of mentions on the x axis, I'm trying to overlay another column of data of 'Avg. Sentiment' I only need the point plotted once but since the average technically goes with the number of mentions it creates essentially a line on the graph where the avg would be, it's like a duplicate value almost.
as you can see I only need the value once, I can't just end up using some sort of function to plot an average from pandas or seaborn because I plan on using a custom weighted average point that's already been made
here is the code to output and test the graph
np.random.seed(5)
df = pd.DataFrame({
'Symbol': ['AMC', 'GME', 'BB', 'SPY', 'SPCE'],
'Mentions': [100, 75, 50, 25, 20],
'Avg.Sentiment':[.8,.7,.6,.5,.4]
})
df['Sentiment'] = df['Mentions'].apply(lambda x: (np.random.random(x) * 2) - 1)
df = df.explode('Sentiment')
pos = [0.0, 1.0]
colors = ['#FF5000', '#00C805']
cmap = LinearSegmentedColormap.from_list("",list(zip(pos,colors)))
matplotlib.cm.register_cmap("newmap", cmap)
sns.set_style("darkgrid")
sns.set(rc={'figure.figsize':(32,14)})
sns.set(font_scale=2.0)
dplot = sns.swarmplot(x="Symbol", y="Avg.Sentiment", color='black', data=df, marker='X', size=10)
dplot= sns.swarmplot(x="Symbol", y="Sentiment", hue='Sentiment',palette="newmap", data=df)
dplot.get_legend().remove()
plt.show()
I've found the solution, just using plt.scatter you can enter in single points from the same data frame, so in my case
plt.scatter(x="Symbol", y="Avg.Sentiment", data=df, color='black', marker='X')
I'm trying to create a matplotlib bar chart with categories on the X-axis, but I can't get the categories right. Here's a minimal example of what I'm trying to do.
data = [[46, 11000], [97, 15000], [27, 24000], [36, 9000], [9, 17000]]
df = pd.DataFrame(data, columns=['car_id', 'price'])
fig1, ax1 = plt.subplots(figsize=(10,5))
ax1.set_title('Car prices')
ax1.bar(df['car_id'], df['price'])
plt.xticks(np.arange(len(df)), list(df['car_id']))
plt.legend()
plt.show()
I need the five categories (car_id) on the X-axis. What Am I doing wrong? :-/
You can turn car_id into category:
df['car_id'] = df['car_id'].astype('category')
df.plot.bar(x='car_id')
Output:
You can also plot just the price column and relabel:
ax = df.plot.bar(y='price')
ax.set_xticklabels(df['car_id'])
You got confused in the xticks with the label and position. Here you specify the position np.arange(len(df)) and the labels list(df['car_id']. So he puts the labels at the specified position list(df['car_id'], i.e. array([0, 1, 2, 3, 4]).
If the position and the labels are here the same, just replace plt.xticks(np.arange(len(df)), list(df['car_id'])) by plt.xticks(df['car_id']).
If you want them to be evenly spaced, your approach is right but you also need to change ax1.bar(df['car_id'], df['price']) toax1.bar(np.arange(len(df)), df['price']), so that the bar x-position is now evenly spaced.
Full code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = [[46, 11000], [97, 15000], [27, 24000], [36, 9000], [9, 17000]]
df = pd.DataFrame(data, columns=['car_id', 'price'])
fig1, ax1 = plt.subplots(figsize=(10,5))
ax1.set_title('Car prices')
ax1.bar(np.arange(len(df)), df['price'])
ax1.set_xticks(np.arange(len(df)))
ax1.set_xticklabels(df['car_id'])
plt.show()
I'm trying to create pie charts with matplotlib in which the colour of each category is fixed.
I've got a function which creates a pie chart from sets of value and category data. Here's one example:
Category Value
TI 65
Con 43
FR 40
TraI 40
Bug 38
Data 22
Int 15
KB 12
Other 8
Dep 7
PW 6
Uns 5
Perf 4
Dep 3
The problem is that the data differs from one instance to another, and that in turn changes the order of the categories. Thus, each category gets labelled a different colour each time I generate a chart. I could sort the data alphabetically every time, but that causes two problems: some categories are missing from some datasets, and I'd prefer it sorted by size anyway so that the smallest wedges are oriented horizontally.
How can I set matplotlib to assign colours depending on, say, the index of a pandas.Series?
Here's the code that I'm using to generate a pie chart:
import matplotlib.pyplot as plt
slices = [62, 39, 39, 38, 37, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, colors=colors, labels=labels, labeldistance=1.05, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
titlestring = 'Issues'
ax.set_title(titlestring)
EDIT: I forgot to explain the autopct function, it's for adding value and percentage labels:
def make_autopct(values):
def my_autopct(pct):
total = sum(values)
val = int(round(pct*total/100.0))
return '{p:.2f}% ({v:d})'.format(p=pct,v=val)
return my_autopct
Here is a simpler solution to #tmdavison's answer.
Let's first see the problem with an MWE:
import matplotlib.pyplot as plt
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes, labels=labels)
ax[1].pie(sizes[1:], labels=labels[1:])
This produces the problem plots:
The problem is that in the left-hand plot, Hogs is coloured in orange, but in the right-hand plot Hogs is coloured in blue (with a similar mix-up for Logs and Dogs).
We would like the colours for the labels to be the same across both plots. We can do this by specifying a dictionary of colours to use:
labels = ['Frogs', 'Hogs', 'Dogs', 'Logs']
sizes = [15, 30, 45, 10]
colours = {'Frogs': 'C0',
'Hogs': 'C1',
'Dogs': 'C2',
'Logs': 'C3'}
fig, ax = plt.subplots(1, 2)
ax[0].pie(sizes,
labels=labels,
colors=[colours[key] for key in labels])
ax[1].pie(sizes[1:],
labels=labels[1:],
colors=[colours[key] for key in labels[1:]])
This works to create the plot:
Here we see that the labels are represented by the same colours across both plots, as desired.
If you have lots of categories it can be cumbersome to manually set a colour for each category. In this case you could construct the colours dictionary as:
colours = dict(zip(labels, plt.cm.tab10.colors[:len(labels)]))
If you have more than 10 categories you would instead use:
colours = dict(zip(labels, plt.cm.tab20.colors[:len(labels)]))
Here's an idea you could try. Make a dictionary from your labels and colors, so each color is mapped to a label. Then, after making the pie chart, go in an assign the facecolor of the wedge using this dictionary.
Here's an untested bit of code which might do what you are looking for:
import numpy as np
import matplotlib.pyplot as plt
def mypie(slices,labels,colors):
colordict={}
for l,c in zip(labels,colors):
print l,c
colordict[l]=c
fig = plt.figure(figsize=[10, 10])
ax = fig.add_subplot(111)
pie_wedge_collection = ax.pie(slices, labels=labels, labeldistance=1.05)#, autopct=make_autopct(slices))
for pie_wedge in pie_wedge_collection[0]:
pie_wedge.set_edgecolor('white')
pie_wedge.set_facecolor(colordict[pie_wedge.get_label()])
titlestring = 'Issues'
ax.set_title(titlestring)
return fig,ax,pie_wedge_collection
slices = [37, 39, 39, 38, 62, 21, 15, 9, 6, 7, 6, 5, 4, 3]
cmap = plt.cm.prism
colors = cmap(np.linspace(0., 1., len(slices)))
labels = [u'TI', u'Con', u'FR', u'TraI', u'Bug', u'Data', u'Int', u'KB', u'Other', u'Dep', u'PW', u'Uns', u'Perf', u'Dep']
fig,ax,pie_wedge_collection = mypie(slices,labels,colors)
plt.show()
I have lists of data indicating responses to likert questions with a one (very unhappy) to five (very happy) scale. I would like to create a page of plots showing these lists as skewed stacked horizontal bar charts. The lists of responses can be of different sizes (e.g. when someone has opted out of answering a particular question). Here is a minimal example of the data:
likert1 = [1.0, 2.0, 1.0, 2.0, 1.0, 3.0, 3.0, 4.0, 4.0, 1.0, 1.0]
likert2 = [5.0, 4.0, 5.0, 4.0, 5.0, 3.0]
I would like to be able to plot this with something like:
plot_many_likerts(likert1, likert2)
At the moment I've written a function to iterate over the lists, and plot each one as its own subplot on a shared figure in matplotlib:
def plot_many_likerts(*lsts):
#get the figure and the list of axes for this plot
fig, axlst = plt.subplots(len(lsts), sharex=True)
for i in range(len(lsts)):
likert_horizontal_bar_list(lsts[i], axlst[i], xaxis=[1.0, 2.0, 3.0, 4.0, 5.0])
axlst[i].axis('off')
fig.show()
def likert_horizontal_bar_list(lst, ax, xaxis):
cnt = Counter(lst)
#del (cnt[None])
i = 0
colour_float = 0.00001
previous_right = 0
for key in sorted(xaxis):
ax.barh(bottom=0, width=cnt[key], height=0.4, left=previous_right, color=plt.cm.jet(colour_float),label=str(key))
i += 1
previous_right = previous_right + cnt[key]
colour_float = float(i) / float(len(xaxis))
This works not badly and create stacked bar charts all with the same representative sizes (e.g. the widths share common axis scales). Here is a screen shot:
What is currently Produced http://s7.postimg.org/vh0j816gn/figure_1.jpg
What I would like is to have these two plots centered on midpoints of the mode of the datasets (the datasets will have the same range). For instance:
What I would like to see http://s29.postimg.org/z0qwv4ryr/figure_2.jpg
Suggestions on how I might do this?
I needed to make a divergent bar chart for some likert data. I was using pandas, but the approach would probably be similar without it. The key mechanism is to add in an invisible buffer at the start.
likert_colors = ['white', 'firebrick','lightcoral','gainsboro','cornflowerblue', 'darkblue']
dummy = pd.DataFrame([[1,2,3,4, 5], [5,6,7,8, 5], [10, 4, 2, 10, 5]],
columns=["SD", "D", "N", "A", "SA"],
index=["Key 1", "Key B", "Key III"])
middles = dummy[["SD", "D"]].sum(axis=1)+dummy["N"]*.5
longest = middles.max()
complete_longest = dummy.sum(axis=1).max()
dummy.insert(0, '', (middles - longest).abs())
dummy.plot.barh(stacked=True, color=likert_colors, edgecolor='none', legend=False)
z = plt.axvline(longest, linestyle='--', color='black', alpha=.5)
z.set_zorder(-1)
plt.xlim(0, complete_longest)
xvalues = range(0,complete_longest,10)
xlabels = [str(x-longest) for x in xvalues]
plt.xticks(xvalues, xlabels)
plt.show()
There are many limitations to this approach. First, bars no longer get a black outline, and the legend will have an extra blank element. I just hid the legend (I figure there's probably a way to hide just the individual element). I'm not sure of a convenient way to make the bars have an outline without also adding the outline to the buffer element.
First, we establish some colors and dummy data. Then we calculate the width of the left two columns and half of the middle-most column (which i know to be "SD", "D", and "N", respectively). I find the longest column, and use its width to calculate the difference needed for the other columns. Next, I insert this new buffer column into the first column position with a blank title (which felt gross, lemme tell you). For good measure, I also added a vertical line (axvline) behind the middle of the middle bar based on the advice of [2]. Finally, I adjust the x-axis to have the proper scale by offsetting its labels.
You might want more horizontal space on the left - you can easily do so by adding to "longest".
[2] Heiberger, Richard M., and Naomi B. Robbins. "Design of diverging stacked bar charts for Likert scales and other applications." Journal of Statistical Software 57.5 (2014): 1-32.
I too recently needed to make a divergent bar chart for some Likert data. I took a slightly different approach than #austin-cory-bart.
I modified an example from the gallery instead and created this:
import numpy as np
import matplotlib.pyplot as plt
category_names = ['Strongly disagree', 'Disagree',
'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
'Question 1': [10, 15, 17, 32, 26],
'Question 2': [26, 22, 29, 10, 13],
'Question 3': [35, 37, 7, 2, 19],
'Question 4': [32, 11, 9, 15, 33],
'Question 5': [21, 29, 5, 5, 40],
'Question 6': [8, 19, 5, 30, 38]
}
def survey(results, category_names):
"""
Parameters
----------
results : dict
A mapping from question labels to a list of answers per category.
It is assumed all lists contain the same number of entries and that
it matches the length of *category_names*. The order is assumed
to be from 'Strongly disagree' to 'Strongly aisagree'
category_names : list of str
The category labels.
"""
labels = list(results.keys())
data = np.array(list(results.values()))
data_cum = data.cumsum(axis=1)
middle_index = data.shape[1]//2
offsets = data[:, range(middle_index)].sum(axis=1) + data[:, middle_index]/2
# Color Mapping
category_colors = plt.get_cmap('coolwarm_r')(
np.linspace(0.15, 0.85, data.shape[1]))
fig, ax = plt.subplots(figsize=(10, 5))
# Plot Bars
for i, (colname, color) in enumerate(zip(category_names, category_colors)):
widths = data[:, i]
starts = data_cum[:, i] - widths - offsets
rects = ax.barh(labels, widths, left=starts, height=0.5,
label=colname, color=color)
# Add Zero Reference Line
ax.axvline(0, linestyle='--', color='black', alpha=.25)
# X Axis
ax.set_xlim(-90, 90)
ax.set_xticks(np.arange(-90, 91, 10))
ax.xaxis.set_major_formatter(lambda x, pos: str(abs(int(x))))
# Y Axis
ax.invert_yaxis()
# Remove spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
# Ledgend
ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
loc='lower left', fontsize='small')
# Set Background Color
fig.set_facecolor('#FFFFFF')
return fig, ax
fig, ax = survey(results, category_names)
plt.show()