I am attempting to create a scatter pie plot that groups by 2 columns, Column1 & Column 2 where the colour in the pie (if numbers are the same) is decided by Column 3.
See my example below of where I am:
This graph shows Column 1 (y-axis) and Column 2 (x-axis). The colour is dictated by Column 3.
But with the code I use the colours do not stay consistent across graphs and if the same Column 3 appears with a different Column 2 or Column 3 value it assigns it a different colour.
I have attempted using cmaps and manually assigning colours but I cannot keep it consistent across each column 2.
See my current code below:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticks
from matplotlib.font_manager import FontProperties
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
color,
ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(70,60))
# for incremental pie slices
cumsum = np.cumsum(dist)
cumsum = cumsum/ cumsum[-1]
pie = [0] + cumsum.tolist()
for r1, r2 in zip(pie[:-1], pie[1:]):
angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2)
x = [0] + np.cos(angles).tolist()
y = [0] + np.sin(angles).tolist()
xy = np.column_stack([x, y])
ax.scatter([xpos], [ypos], marker=xy, s=size,c=color)
return ax
colors = {'Group A':'red', 'Group B':'green', 'Group C':'blue', 'Group D':'yellow', 'Group E':'yellow', 'Group F':'yellow', 'Group G':'yellow', 'Group H':'yellow'}
fig, ax = plt.subplots(figsize=(94,70))
for (x,y), d in dataset.groupby(['Column 1','Column 2']):
dist = d['Column 3'].value_counts()
draw_pie(dist, x, y, 50000, ax=ax,color=dataset['Column 3'].map(colors))
params = {'legend.fontsize': 100}
plt.rcParams.update(params)
#plt.legend(dataset["Column 3"],markerscale=.4,frameon=True,framealpha=1,ncol=3,loc=(0.00, -0.3), bbox_to_anchor=(0.0, 0., 0.5, 1.25),handletextpad=1,markerfirst=True,facecolor='lightgrey',mode='expand',borderaxespad=-16)
ax.yaxis.set_major_locator(mticks.MultipleLocator(1))
full = plt.Rectangle((-0.05, 4.25), 2.10, 2, color='g', alpha=0.15)
partial = plt.Rectangle((-0.05, 2.25), 2.10, 2, color='orange', alpha=0.15)
low = plt.Rectangle((-0.05, 0.25), 2.10, 2, color='r', alpha=0.15)
ax.add_patch(full)
ax.add_patch(partial)
ax.add_patch(low)
plt.xticks(fontsize=120)
plt.yticks(fontsize=100)
plt.ylim([0, 6.75])
plt.tight_layout()
plt.show()
Ideally the output graph based on the data (I will copy in below) should be like the below graph (I have placed a number in each pie to define what colour should be there)
Here is the full data used for the graph:
Column 1 3 2 Colour Group Desired
First Line Group A 6 1
First Line Group A 6 1
First Line Group A 6 1
First Line Group C 6 3
First Line Group B 6 2
First Line Group B 6 2
First Line Group B 6 2
First Line Group A 6 1
First Line Group A 6 1
First Line Group C 6 3
First Line Group A 6 1
Second Line Group A 6 1
Second Line Group A 6 1
Second Line Group A 6 1
Second Line Group C 6 3
Second Line Group B 6 2
Second Line Group B 6 2
Second Line Group B 6 2
Second Line Group A 4.5 1
Second Line Group A 6 1
Second Line Group C 6 3
Second Line Group A 6 1
Third Line Group A 1 1
Third Line Group A 6 1
Third Line Group A 1 1
Third Line Group C 6 3
Third Line Group B 3.5 2
Third Line Group B 3.5 2
Third Line Group B 3.5 2
Third Line Group A 1 1
Third Line Group A 1 1
Third Line Group C 4 3
Third Line Group A 1 1
Additionally I would like to add a label in each section of the pie with the count of distinct(Column 3).
Currently I came up with the following solution to fix the issue with the colours:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticks
from matplotlib.font_manager import FontProperties
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
ax=None):
if ax is None:
fig, ax = plt.subplots(figsize=(7,6))
# The colors, corresponding to the values 1, 2 and 3:
# 1 is tab:blue
# 2 is tab:orange
# 3 is tab:green
# Of course, you can change this
colors = ['tab:blue', 'tab:orange', 'tab:green']
# for incremental pie slices
cumsum = np.cumsum(dist)
cumsum = cumsum/ cumsum[-1]
pie = [0] + cumsum.tolist()
for r1, r2, i in zip(pie[:-1], pie[1:], range(0, len(dist))):
# If no counts present, skip this one
if dist[i] == 0:
continue
angles = np.linspace(2 * np.pi * r1, 2 * np.pi * r2)
x = [0] + np.cos(angles).tolist()
y = [0] + np.sin(angles).tolist()
xy = np.column_stack([x, y])
ax.scatter([xpos], [ypos], marker=xy, s=size, color=colors[i])
return ax
#colors = {'Group A':'red', 'Group B':'green', 'Group C':'blue', 'Group D':'yellow', 'Group E':'yellow', 'Group F':'yellow', 'Group G':'yellow', 'Group H':'yellow'}
# Read dataset
dataset = pd.read_csv('dataset.csv')
fig, ax = plt.subplots(figsize=(9,5))
for (x,y), d in dataset.groupby(['Column 1','Column 2']):
# Only interested in the 'Column 3' column, as this one
# contains the values 1-2-3
d = d['Colour Group Desired']
# Count how often each value (1-2-3) occurs and store
# this in a list (count for value i located at list index
# i-1)
dist = list()
for i in [1,2,3]:
dist.append(d[d==i].count())
# Call your draw_pie function
draw_pie(dist, x, y, 500, ax=ax)
ax.yaxis.set_major_locator(mticks.MultipleLocator(1))
full = plt.Rectangle((-0.05, 4.25), 2.10, 2, color='g', alpha=0.15)
partial = plt.Rectangle((-0.05, 2.25), 2.10, 2, color='orange', alpha=0.15)
low = plt.Rectangle((-0.05, 0), 2.10, 2.25, color='r', alpha=0.15)
ax.add_patch(full)
ax.add_patch(partial)
ax.add_patch(low)
plt.xticks(fontsize=10)
plt.yticks(fontsize=8)
plt.ylim([0, 6.75])
plt.tight_layout()
plt.show()
Small note to start with, I changed all sizes by a factor of 10 such that the plot fits on my screen (e.g. the figsize). You probably want to use your original values again, but this doesn't matter for the question anyway.
The first change I made was to the loop body of the for (x,y), d in dataset.groupby(['Column 1','Column 2']) loop. Instead of using dist = d['Column 3'].value_counts(), I create an empty array. Subsequently, I loop over the values 1, 2 and 3. In each iteration, I check how many rows match the specific value and append the outcome to the list. In this way, I end up with a list of size 3, in which the first element corresponds to the amount of rows that equal 1, the second element corresponds to the amount of rows that equal 2 and the third element corresponds to the amount of rows that equal 3. The advantage is that I can also keep track of values that occur 0 times.
Secondly, I changed the draw_pie function a bit. However, since I do not fully understand the meaning of a group in terms of the colour, I commented out the colors dictionary. It looks as if 1 always corresponds to Group A, 2 always corresponds to Group B and 3 always corresponds to Group C. I made use of this observation and defined another colors variable (in the draw_pie function). Instead of a dictionary, colors is now a list (where the first element corresponds to the value 1, the second element corresponds to the value 2 and the third element corresponds to the value 3). I changed your for loop from for r1, r2 in zip(pie[:-1], pie[1:]) to for r1, r2, i in zip(pie[:-1], pie[1:], range(0, len(dist))). The advantage is that I can now use the iteration variable i to get the proper color from the colors list. In addition, I added a small if statement which checks if there are exactly 0 occurences. If that is the case, I just skip the remainder of the loop and draw nothing (if you don't skip these cases, it will draw a very thin line instead. Try this for yourself by removing it).
If I run the code, I get the following result:
Unfortunately, I was not successful in adding the labels. I tried using the Axes.text method, but I couldn't get the labels to be placed at the proper locations.
Edit
I decided to change the body of the draw_pie function. In this new version, we draw an Axes instance at the desired (xpos, ypos) location. This involves some transformations: first a transformation from data coordinates to display coordinates and subsequently a transformation from display coordinates to figure coordinates. See this tutorial for an explanation. The advantage is that we can now plot a pie chart inside the created axes using the Axes.pie method. This method has some nice options, such as adding labels!
However, there is a catch. Before we start drawing the pie charts, we need to already fix the xlim and the ylim values of the main Axes. If we don't do this (and do it after plotting the pie charts), the pie charts will no longer be located at the proper positions. Therefore, I have moved the code which sets the xlim and ylim values before the first time we call the draw_pie function. I also removed to call to plt.tight_layout(), as this will (unfortunately) also cause the pie charts to no longer be located at the proper locations.
As a small side note, I changed the manner in which the background colors are drawn. Instead of using patches, I now use the Axes.axhspan method. With this method, you can still control the y-locations, but the width will extend infinitely (meaning that the colors remain if you scroll left/right). If you don't want this, you can remove it again :).
See the new version of the code below (I note once more, I changed all sizes such that it fits on my computer screen. You probably want to substitute your original sizes again):
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticks
from matplotlib.font_manager import FontProperties
import numpy as np
def draw_pie(dist,
xpos,
ypos,
size,
ax=None,
fig=None):
if ax is None:
fig, ax = plt.subplots(figsize=(7,6))
# Transform xpos and ypos to figure coordinates by the
# following steps:
# 1. First transform xpos and ypos to data coordinates
# 2. Transform the data coordinates back to figure coordinates
xfig, yfig = ax.transData.transform((xpos, ypos))
trans_to_fig = fig.transFigure.inverted()
xfig, yfig = trans_to_fig.transform((xfig, yfig))
# Calculate figure coordinates from the desired pie chart size
# given in pixels
size = trans_to_fig.transform((size, 0))[0]
# Add axes at data coordinates xpos and ypos. On these axes,
# the pie chart will be plotted
ax_pie = fig.add_axes([xfig-0.5*size, yfig-0.5*size, size, size])
# Plot the pie chart (with some special options)
textprops = {'color' : 'w',
'fontweight' : 'bold',
'fontsize' : 10,
'va' : 'center',
'ha' : 'center'
}
labels = [str(i) if not i == 0 else "" for i in dist]
labeldistance = 0.5
if sum(not x==0 for x in dist) == 1: # Ensures we plot the label in the center
labeldistance = 0.0 # if we have only one entry
ax_pie.pie(dist, labels=labels, labeldistance=labeldistance, textprops=textprops)
return ax_pie
# Read dataset
dataset = pd.read_csv('dataset.txt')
fig, ax = plt.subplots(figsize=(9,5))
# Important, limits must be set before calling draw_pie function!
# (Otherwise, the data coordinates will change, which will break
# the transform sequence inside the draw_pie function!)
ax.set_xlim([-0.2, 2.2]) # Tweak these values for the desired output
ax.set_ylim([0, 6.75])
# Make sure the string from 'Column 1' is displayed again
ax.set_xticks([0, 1, 2])
ax.set_xticklabels(['First Line', 'Second Line', 'Third Line'])
# Remainder of formatting
ax.yaxis.set_major_locator(mticks.MultipleLocator(1))
plt.xticks(fontsize=10)
plt.yticks(fontsize=8)
# Define float values for 'Column 1' (easier for transformation,
# we have already put the text back there using ax.set_xticklabels)
column1_to_float = {'First Line':0, 'Second Line':1, 'Third Line':2}
for (x,y), d in dataset.groupby(['Column 1','Column 2']):
# Only interested in the 'Column 3' column, as this one
# contains the values 1-2-3
d = d['Colour Group Desired']
# Count how often each value (1-2-3) occurs and store
# this in a list (count for value i located at list index
# i-1)
dist = list()
for i in [1,2,3]:
dist.append(d[d==i].count())
# Call your draw_pie function
draw_pie(dist, column1_to_float[x], y, 100, ax=ax, fig=fig)
# Plot the colours (note: using axhspan, they extend the full
# horizontal direction, even while scrolling)
ax.axhspan(0 , 2.25, fc='r' , ec=None, alpha=0.15)
ax.axhspan(2.25, 4.25, fc='orange', ec=None, alpha=0.15)
ax.axhspan(4.25, 6.75, fc='g' , ec=None, alpha=0.15)
# Unfortunately, tight_layout can no longer be used. If we do use this,
# the pie charts will no longer be at the proper positions...
# plt.tight_layout()
plt.show()
If I run this code, I get the following output:
Just to mention it, you can decrease the size of the pie charts by adjusting the size argument to the draw_pie function (I just liked the output above :) ). But keep in mind that in this case, you also want to decrease the fontsize specified in the textprops dictionary in the body of the draw_pie function. As an example (size = 63, fontsize=7):
Related
I have the following dataframe where it contains the best equipment in operation ranked by 1 to 300 (1 is the best, 300 is the worst) over a few days (df columns)
Equipment 21-03-27 21-03-28 21-03-29 21-03-30 21-03-31 21-04-01 21-04-02
P01-INV-1-1 1 1 1 1 1 2 2
P01-INV-1-2 2 2 4 4 5 1 1
P01-INV-1-3 4 4 3 5 6 10 10
I would like to customize a line plot (example found here) but I'm having some troubles trying to modify the example code provided:
import matplotlib.pyplot as plt
import numpy as np
def energy_rank(data, marker_width=0.1, color='blue'):
y_data = np.repeat(data, 2)
x_data = np.empty_like(y_data)
x_data[0::2] = np.arange(1, len(data)+1) - (marker_width/2)
x_data[1::2] = np.arange(1, len(data)+1) + (marker_width/2)
lines = []
lines.append(plt.Line2D(x_data, y_data, lw=1, linestyle='dashed', color=color))
for x in range(0,len(data)*2, 2):
lines.append(plt.Line2D(x_data[x:x+2], y_data[x:x+2], lw=2, linestyle='solid', color=color))
return lines
data = ranks.head(4).to_numpy() #ranks is the above dataframe
artists = []
for row, color in zip(data, ('red','blue','green','magenta')):
artists.extend(energy_rank(row, color=color))
fig, ax = plt.subplots()
ax.set_xticklabels(ranks.columns) # set X axis to be dataframe columns
ax.set_xticklabels(ax.get_xticklabels(), rotation=35, fontsize = 10)
for artist in artists:
ax.add_artist(artist)
ax.set_ybound([15,0])
ax.set_xbound([.5,8.5])
When using ax.set_xticklabels(ranks.columns), for some reason, it only plots 5 of the 7 days from ranks columns, removing specifically the first and last values. I tried to duplicate those values but this did not work as well. I end up having this below:
In summary, I would like to know if its possible to do 3 customizations:
input all dates from ranks columns on X axis
revert Y axis. ax.set_ybound([15,0]) is not working. It would make more sense to see the graph starting with 0 on top, since 1 is the most important rank to look at
add labels to the end of each line at the last day (last value on X axis). I could add the little window label, but it often gets really messy when you plot more data, so adding just the text at the end of each line would really make it look cleaner
Please let me know if those customizations are impossible to do and any help is really appreciated! Thank you in advance!
To show all the dates, use plt.xticks() and set_xbound to start at 0. To reverse the y axis, use ax.set_ylim(ax.get_ylim()[::-1]). To set the legends the way you described, you can use annotation and set the coordinates of the annotation at your last datapoint for each series.
fig, ax = plt.subplots()
plt.xticks(np.arange(len(ranks.columns)), list(ranks.columns), rotation = 35, fontsize = 10)
plt.xlabel('Date')
plt.ylabel('Rank')
for artist in artists:
ax.add_artist(artist)
ax.set_ybound([0,15])
ax.set_ylim(ax.get_ylim()[::-1])
ax.set_xbound([0,8.5])
ax.annotate('Series 1', xy =(7.1, 2), color = 'red')
ax.annotate('Series 2', xy =(7.1, 1), color = 'blue')
ax.annotate('Series 3', xy =(7.1, 10), color = 'green')
plt.show()
Here is the plot for the three rows of data in your sample dataframe:
Now let's assume I have a data file example.csv:
first,second,third,fourth,fifth,sixth
-42,11,3,L,D
4,21,40,L,Q
2,31,15,R,D
-42,122,50,S,L
print(df.head()) of the above is:
first second third fourth fifth sixth
0 -42 11 3 L D NaN
1 4 21 40 L Q NaN
2 2 31 15 R D NaN
3 -42 122 50 S L NaN
I want to draw the bar plot as a group, where the first and second columns will work as an index. Their different numbers will have different colors.
What I'm expecting is below which I have started working on.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
filename = 'example.csv'
df = pd.read_csv(filename)
print(df.head())
first = df['first']
second = df['second']
third = df['third']
labels = df['third']
x = np.arange(len(labels))
width = 0.35
df.sort_values(by=['third'], axis=0, ascending=False)
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, third, width, label='Parent 1')
rects2 = ax.bar(x + width/2, third, width, label='Parent 2')
ax.set_ylabel('Scores')
ax.set_title('Scores by group and gender')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
autolabel(rects1)
autolabel(rects2)
fig.tight_layout()
From the bar plot, it is clear that the Y value is from the column called "third", which is exactly what we are getting. But in the grouping, we need to have some modifications to the labeling in the grouping. I have drawn on the figure so you can see what I'm expecting.
Each top number on each bar plot will have a different color. FOr example, in the first pair of bars, we have numbers (-42,11). So we need to assign two different colors. But if these numbers on other bars reappear again, these same numbers will have the same color. That means each number will have a unique bar color. The complete list of bar colors can be shown as legends in the top left instead of what we have right now.
Another identification will be the bottom of the bars. For example, we have (L, D) in the first pair which are representing the fourth and fifth columns of the data file.
I wanted to draw with the descending order of the third column. I applied the command to short the column as descending, but it seems it did not do that in the plot.
df.sort_values(by=['third'], axis=0, ascending=False)
Too many customization, so I think it's easier with a loop through the rows and plot the bars differently. Also, sort_values returns a copy by default, pass inplace=True makes it operate inplace:
# sort dataframe, notice `inplace`
df.sort_values(by=['third'], axis=0, ascending=False, inplace=True)
from matplotlib import cm
# we use this to change the colors with `cmap`
values = np.unique(df[['first','second']])
# scaled the values to 0-1 for cmap
def scaled_value(val):
return (val-values.min())/np.ptp(values)
cmap = cm.get_cmap('viridis')
width = 0.35
fig, ax = plt.subplots()
for i, idx in enumerate(df.index):
row = df.loc[idx]
# draw the first
ax.bar(i-width/2,row['third'],
color=cmap(scaled_value(row['first'])), # specify color here
width=width, edgecolor='w',
label='Parent 1' if i==0 else None) # label first bar
# draw the second
ax.bar(i+width/2, row['third'],
color=cmap(scaled_value(row['second'])),
width=width, edgecolor='w', hatch='//',
label='Parent 2' if i==0 else None)
# set the ticks manually
ax.set_xticks([i + o for i in range(df.shape[0]) for o in [-width/2, width/2]]);
ax.set_xticklabels(df[['fourth','fifth']].values.ravel());
ax.legend()
Output:
I believe you first need to work on the right data structure in the dataframe. I believe you want the following:
df['xaxis'] = df.fourth + ":" + df.fifth
df.groupby('xaxis').agg({'third':'sum','first':'sum'}).plot(kind='bar')
it output
third first
xaxis
L:D 3 -42
L:Q 40 4
R:D 15 2
S:L 50 -42
plots as :
My dataset is in the form of :
Data[0] = [headValue,x0,x1,..xN]
Data[1] = [headValue_ya,ya0,ya1,..yaN]
Data[2] = [headValue_yb,yb0,yb1,..ybN]
...
Data[n] = [headvalue_yz,yz0,yz1,..yzN]
I want to plot f(y*) = x, so I can visualize all Lineplots in the same figure with different colors, each color determined by the headervalue_y*.
I also want to add a colorbar whose color matching the lines and therefore the header values, so we can link visually which header value leads to which behaviour.
Here is what I am aiming for :(Plot from Lacroix B, Letort G, Pitayu L, et al. Microtubule Dynamics Scale with Cell Size to Set Spindle Length and Assembly Timing. Dev Cell. 2018;45(4):496–511.e6. doi:10.1016/j.devcel.2018.04.022)
I have trouble adding the colorbar, I have tried to extract N colors from a colormap (N is my number of different headValues, or column -1) and then adding for each line plot the color corresponding here is my code to clarify:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
Data = [['Time',0,0.33,..200],[0.269,4,4.005,...11],[0.362,4,3.999,...16.21],...[0.347,4,3.84,...15.8]]
headValues = [0.269,0.362,0.335,0.323,0.161,0.338,0.341,0.428,0.245,0.305,0.305,0.314,0.299,0.395,0.32,0.437,0.203,0.41,0.392,0.347]
# the differents headValues_y* of each column here in a list but also in Data
# with headValue[0] = Data[1][0], headValue[1] = Data[2][0] ...
cmap = mpl.cm.get_cmap('rainbow') # I choose my colormap
rgba = [] # the color container
for value in headValues:
rgba.append(cmap(value)) # so rgba will contain a different color for each headValue
fig, (ax,ax1) = plt.subplots(2,1) # creating my figure and two axes to put the Lines and the colorbar
c = 0 # index for my colors
for i in range(1, len(Data)):
ax.plot( Data[0][1:], Data[i][1:] , color = rgba[c])
# Data[0][1:] is x, Data[i][1:] is y, and the color associated with Data[i][0]
c += 1
fig.colorbar(mpl.cm.ScalarMappable(cmap= mpl.colors.ListedColormap(rgba)), cax=ax1, orientation='horizontal')
# here I create my scalarMappable for my lineplot and with the previously selected colors 'rgba'
plt.show()
The current result:
How to add the colorbar on the side or the bottom of the first axis ?
How to properly add a scale to this colorbar correspondig to different headValues ?
How to make the colorbar scale and colors match to the different lines on the plot with the link One color = One headValue ?
I have tried to work with scatterplot which are more convenient to use with scalarMappable but no solution allows me to do all these things at once.
Here is a possible approach. As the 'headValues' aren't sorted, nor equally spaced and one is even used twice, it is not fully clear what the most-desired result would be.
Some remarks:
The standard way of creating a colorbar in matplotlib doesn't need a separate subplot. Matplotlib will reduce the existing plot a bit and put the colorbar next to it (or below for a vertical bar).
Converting the 'headValues' to a numpy array allows for compact code, e.g. writing rgba = cmap(headValues) directly calculates the complete array.
Calling cmap on unchanged values will map 0 to the lowest color and 1 to the highest color, so for values only between 0.16 and 0.44 they all will be mapped to quite similar colors. One approach is to create a norm to map 0.16 to the lowest color and 0.44 to the highest. In code: norm = plt.Normalize(headValues.min(), headValues.max()) and then calculate rgba = cmap(norm(headValues)).
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
headValues = np.array([0.269, 0.362, 0.335, 0.323, 0.161, 0.338, 0.341, 0.428, 0.245, 0.305, 0.305, 0.314, 0.299, 0.395, 0.32, 0.437, 0.203, 0.41, 0.392, 0.347])
x = np.linspace(0, 200, 500)
# create Data similar to the data in the question
Data = [['Time'] + list(x)] + [[val] + list(np.sqrt(4 * x) * val + 4) for val in headValues]
headValues = np.array([d[0] for d in Data[1:]])
order = np.argsort(headValues)
inverse_order = np.argsort(order)
cmap = mpl.cm.get_cmap('rainbow')
rgba = cmap(np.linspace(0, 1, len(headValues))) # evenly spaced colors
fig, ax = plt.subplots(1, 1)
for i in range(1, len(Data)):
ax.plot(Data[0][1:], Data[i][1:], color=rgba[inverse_order[i-1]])
# Data[0][1:] is x, Data[i][1:] is y, and the color associated with Data[i-1][0]
cbar = fig.colorbar(mpl.cm.ScalarMappable(cmap=mpl.colors.ListedColormap(rgba)), orientation='vertical',
ticks=np.linspace(0, 1, len(rgba) * 2 + 1)[1::2])
cbar.set_ticklabels(headValues[order])
plt.show()
Alternatively, the colors can be assigned using their position in the colormap, but without creating
cmap = mpl.cm.get_cmap('rainbow')
norm = plt.Normalize(headValues.min(), headValues.max())
fig, ax = plt.subplots(1, 1)
for i in range(1, len(Data)):
ax.plot(Data[0][1:], Data[i][1:], color=cmap(norm(Data[i][0])))
cbar = fig.colorbar(mpl.cm.ScalarMappable(cmap=cmap, norm=norm))
To get ticks for each of the 'headValues', these ticks can be set explicitly. As putting a label for each tick will result in overlapping text, labels that are too close to other labels can be replaced by an empty string:
headValues.sort()
cbar2 = fig.colorbar(mpl.cm.ScalarMappable(cmap=cmap, norm=norm), ticks=headValues)
cbar2.set_ticklabels([val if val < next - 0.007 else '' for val, next in zip(headValues[:-1], headValues[1:])]
+ [headValues[-1]])
At the left the result of the first approach (colors in segments), at the right the alternative colorbars (color depending on value):
* Please help it's very important: Why is not possible to get subplots of cloumns of Pandas dataframe by using HeatMap inside of for-loop?
I am trying to create subplots of columns in pandas dataframe inside of for-loop during iterations since I plot result for every cycle that is for each 480 values to get all 3 subplots belong to A, B, C side by side in one window. I've found only one answer here which I'm afraid is not my case! #euri10 answered by using flat.
My scripts are following:
# Import and call the needed libraries
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
'''
Take a list and create the formatted matrix
'''
def mkdf(ListOf480Numbers):
normalMatrix = np.array_split(ListOf480Numbers,8) #Take a list and create 8 array (Sections)
fixMatrix = []
for i in range(8):
lines = np.array_split(normalMatrix[i],6) #Split each section in lines (each line contains 10 cells from 0-9)
newMatrix = [0,0,0,0,0,0] #Empty array to contain reordered lines
for j in (1,3,5):
newMatrix[j] = lines[j] #lines 1,3,5 remain equal
for j in (0,2,4):
newMatrix[j] = lines[j][::-1] #lines 2,4,6 are inverted
fixMatrix.append(newMatrix) #After last update of format of table inverted (bottom-up zig-zag)
return fixMatrix
'''
Print the matrix with the required format
'''
def print_df(fixMatrix):
values = []
for i in range(6):
values.append([*fixMatrix[4][i], *fixMatrix[7][i]]) #lines form section 6 and 7 are side by side
for i in range(6):
values.append([*fixMatrix[5][i], *fixMatrix[6][i]]) #lines form section 4 and 5 are side by side
for i in range(6):
values.append([*fixMatrix[1][i], *fixMatrix[2][i]]) #lines form section 2 and 3 are side by side
for i in range(6):
values.append([*fixMatrix[0][i], *fixMatrix[3][i]]) #lines form section 0 and 1 are side by side
df = pd.DataFrame(values)
return (df)
'''
Normalizing Formula
'''
def normalize(value, min_value, max_value, min_norm, max_norm):
new_value = ((max_norm - min_norm)*((value - min_value)/(max_value - min_value))) + min_norm
return new_value
'''
Split data in three different lists A, B and C
'''
dft = pd.read_csv('D:\me4.TXT', header=None)
id_set = dft[dft.index % 4 == 0].astype('int').values
A = dft[dft.index % 4 == 1].values
B = dft[dft.index % 4 == 2].values
C = dft[dft.index % 4 == 3].values
data = {'A': A[:,0], 'B': B[:,0], 'C': C[:,0]}
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])
'''
Data generation phase
'''
#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for i in df:
try:
os.mkdir(i)
except:
pass
min_val = df[i].min()
min_nor = -1
max_val = df[i].max()
max_nor = 1
for cycle in range(1): #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
count = '{:04}'.format(cycle)
j = cycle * 480
ordered_data = mkdf(df.iloc[j:j+480][i])
csv = print_df(ordered_data)
#Print .csv files contains matrix of each parameters by name of cycles respectively
csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)
if 'C' in i:
min_nor = -40
max_nor = 150
#Applying normalization for C between [-40,+150]
new_value3 = normalize(df['C'].iloc[j:j+480][i].values, min_val, max_val, -40, 150)
n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
df3 = print_df(mkdf(new_value3))
else:
#Applying normalizayion for A,B between [-1,+1]
new_value1 = normalize(df['A'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
new_value2 = normalize(df['B'].iloc[j:j+480][i].values, min_val, max_val, -1, 1)
n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
df1 = print_df(mkdf(new_value1))
df2 = print_df(mkdf(new_value2))
#Plotting parameters by using HeatMap
plt.figure()
sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)
plt.title(i, fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
#Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
plt.savefig(f'{i}/{i}{count}.png')
#plotting all columns ['A','B','C'] in-one-window side by side
fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))
plt.subplot(131)
sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
plt.title('A', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.subplot(132)
sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
#sns.despine(left=True)
plt.title('B', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.subplot(133)
sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]})
fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
#sns.despine(left=True)
plt.title('C', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
#plt.subplot_tool()
plt.savefig(f'{i}/{i}{i}{count}.png')
plt.show()
So far I couldn't get proper output due to in each cycle it prints plot each of them 3 times in different intervals eg. it prints 'A' left then again it prints 'A' under the name of 'B' and 'C' in middle and right in-one-window. Again it prints 'B' 3-times instead once and put it middle and in the end it prints 'C' 3-times instead of once and put in right side it put in middle and left!
Target is to catch subplots of all 3 columns A,B & C in one-window for each cycle (every 480-values by 480-values) in main for-loop!
1st cycle : 0000 -----> subplots of A,B,C ----> Store it as 0000.png
2nd cycle : 0001 -----> subplots of A,B,C ----> Store it as 0001.png
...
Problem is usage of df inside of for-loop and it passes values of A or B or C 3 times while it should pass it values belong to each column once respectively I provide a picture of unsuccessful output here so that you could see exactly where the problem is clearly
my desired output is below:
I also provide sample text file of dataset for 3 cycles: dataset
So after looking at your code and and your requirements I think I know what the problem is.
Your for loops are in the wrong order. You want a new figure for each cycle, containing each 'A', 'B' and 'C' as subplots.
This means your outer loop should go over the cycles and then your inner loop over i, whereas your indentation and order of the loops makes you trying to plot all 'A','B','C'subplots already on your first loop through i (i='A', cycle=1) and not after your first loop through the first cycle, with all i (i='A','B','C', cycle=1).
This is also why you get the problem (as mentioned in your comment on this answer ) of not defining df3. The definition of df3 ist in an if block checking if 'C' in i, on your first loop through, this condition is not met and therefore df3 is not defined, but you are still trying to plot it!
Also you got the same problem as in your other question with the NaN/inf values again.
Rearraning the for loops and the indentation and cleaning up the NaN/inf values gets you the following code:
#...
#df contains all the data
df = pd.DataFrame(data, columns=['A','B','C'], index = id_set[:,0])
df = df.replace(np.inf, np.nan)
df = df.fillna(0)
'''
Data generation phase
'''
#next iteration create all plots, change the number of cycles
cycles = int(len(df)/480)
print(cycles)
for cycle in range(cycles): #iterate thriugh all cycles range(1) by ====> range(int(len(df)/480))
count = '{:04}'.format(cycle)
j = cycle * 480
for i in df:
try:
os.mkdir(i)
except:
pass
min_val = df[i].min()
min_nor = -1
max_val = df[i].max()
max_nor = 1
ordered_data = mkdf(df.iloc[j:j+480][i])
csv = print_df(ordered_data)
#Print .csv files contains matrix of each parameters by name of cycles respectively
csv.to_csv(f'{i}/{i}{count}.csv', header=None, index=None)
if 'C' in i:
min_nor = -40
max_nor = 150
#Applying normalization for C between [-40,+150]
new_value3 = normalize(df['C'].iloc[j:j+480], min_val, max_val, -40, 150)
n_cbar_kws = {"ticks":[-40,150,-20,0,25,50,75,100,125]}
df3 = print_df(mkdf(new_value3))
else:
#Applying normalizayion for A,B between [-1,+1]
new_value1 = normalize(df['A'].iloc[j:j+480], min_val, max_val, -1, 1)
new_value2 = normalize(df['B'].iloc[j:j+480], min_val, max_val, -1, 1)
n_cbar_kws = {"ticks":[-1.0,-0.75,-0.50,-0.25,0.00,0.25,0.50,0.75,1.0]}
df1 = print_df(mkdf(new_value1))
df2 = print_df(mkdf(new_value2))
# #Plotting parameters by using HeatMap
# plt.figure()
# sns.heatmap(df, vmin=min_nor, vmax=max_nor, cmap ='coolwarm', cbar_kws=n_cbar_kws)
# plt.title(i, fontsize=12, color='black', loc='left', style='italic')
# plt.axis('off')
# #Print .PNG images contains HeatMap plots of each parameters by name of cycles respectively
# plt.savefig(f'{i}/{i}{count}.png')
#plotting all columns ['A','B','C'] in-one-window side by side
fig, axes = plt.subplots(nrows=1, ncols=3 , figsize=(20,10))
plt.subplot(131)
sns.heatmap(df1, vmin=-1, vmax=1, cmap ="coolwarm", linewidths=.75 , linecolor='black', cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
fig.axes[-1].set_ylabel('[MPa]', size=20) #cbar_kws={'label': 'Celsius'}
plt.title('A', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.subplot(132)
sns.heatmap(df2, vmin=-1, vmax=1, cmap ="coolwarm", cbar=True , cbar_kws={"ticks":[-1.0,-0.75,-0.5,-0.25,0.00,0.25,0.5,0.75,1.0]})
fig.axes[-1].set_ylabel('[Mpa]', size=20) #cbar_kws={'label': 'Celsius'}
#sns.despine(left=True)
plt.title('B', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.subplot(133)
sns.heatmap(df3, vmin=-40, vmax=150, cmap ="coolwarm" , cbar=True , cbar_kws={"ticks":[-40,150,-20,0,25,50,75,100,125]})
fig.axes[-1].set_ylabel('[°C]', size=20) #cbar_kws={'label': 'Celsius'}
#sns.despine(left=True)
plt.title('C', fontsize=12, color='black', loc='left', style='italic')
plt.axis('off')
plt.suptitle(f'Analysis of data in cycle Nr.: {count}', color='yellow', backgroundcolor='black', fontsize=48, fontweight='bold')
plt.subplots_adjust(top=0.7, bottom=0.3, left=0.05, right=0.95, hspace=0.2, wspace=0.2)
#plt.subplot_tool()
plt.savefig(f'{i}/{i}{i}{count}.png')
plt.show()
This gets you the following three images as three seperate figures with the data you provided:
Figure 1, Figure 2, Figure 3
Generally speaking, your code is quite messy. I get it, if you're new to programming and just want to analyse your data, you do whatever works, doesn't matter if it is pretty.
However, I think that the messy code means you cant properly look at the underlying logic of your script, which is how you got this problem.
I would recommend if you get a problem like that again to write out some 'pseudo code' with all of the loops and try to think about what you are trying to accomplish in each loop.
I am trying to replicate the following image in matplotlib and it seems barh is my only option. Though it appears that you can't stack barh graphs so I don't know what to do
If you know of a better python library to draw this kind of thing, please let me know.
This is all I could come up with as a start:
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
y_pos = np.arange(len(people))
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
ax.barh(y_pos, bottomdata,color='r',align='center')
ax.barh(y_pos, topdata,color='g',align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
I would then have to add labels individually using ax.text which would be tedious. Ideally I would like to just specify the width of the part to be inserted then it updates the center of that section with a string of my choosing. The labels on the outside (e.g. 3800) I can add myself later, it is mainly the labeling over the bar section itself and creating this stacked method in a nice way I'm having problems with. Can you even specify a 'distance' i.e. span of color in any way?
Edit 2: for more heterogeneous data. (I've left the above method since I find it more usual to work with the same number of records per series)
Answering the two parts of the question:
a) barh returns a container of handles to all the patches that it drew. You can use the coordinates of the patches to aid the text positions.
b) Following these two answers to the question that I noted before (see Horizontal stacked bar chart in Matplotlib), you can stack bar graphs horizontally by setting the 'left' input.
and additionally c) handling data that is less uniform in shape.
Below is one way you could handle data that is less uniform in shape is simply to process each segment independently.
import numpy as np
import matplotlib.pyplot as plt
# some labels for each row
people = ('A','B','C','D','E','F','G','H')
r = len(people)
# how many data points overall (average of 3 per person)
n = r * 3
# which person does each segment belong to?
rows = np.random.randint(0, r, (n,))
# how wide is the segment?
widths = np.random.randint(3,12, n,)
# what label to put on the segment (xrange in py2.7, range for py3)
labels = range(n)
colors ='rgbwmc'
patch_handles = []
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
left = np.zeros(r,)
row_counts = np.zeros(r,)
for (r, w, l) in zip(rows, widths, labels):
print r, w, l
patch_handles.append(ax.barh(r, w, align='center', left=left[r],
color=colors[int(row_counts[r]) % len(colors)]))
left[r] += w
row_counts[r] += 1
# we know there is only one patch but could enumerate if expanded
patch = patch_handles[-1][0]
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x, y, "%d%%" % (l), ha='center',va='center')
y_pos = np.arange(8)
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
Which produces a graph like this , with a different number of segments present in each series.
Note that this is not particularly efficient since each segment used an individual call to ax.barh. There may be more efficient methods (e.g. by padding a matrix with zero-width segments or nan values) but this likely to be problem-specific and is a distinct question.
Edit: updated to answer both parts of the question.
import numpy as np
import matplotlib.pyplot as plt
people = ('A','B','C','D','E','F','G','H')
segments = 4
# generate some multi-dimensional data & arbitrary labels
data = 3 + 10* np.random.rand(segments, len(people))
percentages = (np.random.randint(5,20, (len(people), segments)))
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)
colors ='rgbwmc'
patch_handles = []
left = np.zeros(len(people)) # left alignment of data starts at zero
for i, d in enumerate(data):
patch_handles.append(ax.barh(y_pos, d,
color=colors[i%len(colors)], align='center',
left=left))
# accumulate the left-hand offsets
left += d
# go through all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5*patch.get_width() + bl[0]
y = 0.5*patch.get_height() + bl[1]
ax.text(x,y, "%d%%" % (percentages[i,j]), ha='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.set_xlabel('Distance')
plt.show()
You can achieve a result along these lines (note: the percentages I used have nothing to do with the bar widths, as the relationship in the example seems unclear):
See Horizontal stacked bar chart in Matplotlib for some ideas on stacking horizontal bar plots.
Imports and Test DataFrame
Tested in python 3.10, pandas 1.4.2, matplotlib 3.5.1, seaborn 0.11.2
For vertical stacked bars see Stacked Bar Chart with Centered Labels
import pandas as pd
import numpy as np
# create sample data as shown in the OP
np.random.seed(365)
people = ('A','B','C','D','E','F','G','H')
bottomdata = 3 + 10 * np.random.rand(len(people))
topdata = 3 + 10 * np.random.rand(len(people))
# create the dataframe
df = pd.DataFrame({'Female': bottomdata, 'Male': topdata}, index=people)
# display(df)
Female Male
A 12.41 7.42
B 9.42 4.10
C 9.85 7.38
D 8.89 10.53
E 8.44 5.92
F 6.68 11.86
G 10.67 12.97
H 6.05 7.87
Updated with matplotlib v3.4.2
Use matplotlib.pyplot.bar_label
See How to add value labels on a bar chart for additional details and examples with .bar_label.
labels = [f'{v.get_width():.2f}%' if v.get_width() > 0 else '' for v in c ] for python < 3.8, without the assignment expression (:=).
Plotted using pandas.DataFrame.plot with kind='barh'
ax = df.plot(kind='barh', stacked=True, figsize=(8, 6))
for c in ax.containers:
# customize the label to account for cases when there might not be a bar section
labels = [f'{w:.2f}%' if (w := v.get_width()) > 0 else '' for v in c ]
# set the bar label
ax.bar_label(c, labels=labels, label_type='center')
# uncomment and use the next line if there are no nan or 0 length sections; just use fmt to add a % (the previous two lines of code are not needed, in this case)
# ax.bar_label(c, fmt='%.2f%%', label_type='center')
# move the legend
ax.legend(bbox_to_anchor=(1.025, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Using seaborn
sns.barplot does not have an option for stacked bar plots, however, sns.histplot and sns.displot can be used to create horizontal stacked bars.
seaborn typically requires the dataframe to be in a long, instead of wide, format, so use pandas.DataFrame.melt to reshape the dataframe.
Reshape dataframe
# convert the dataframe to a long form
df = df.reset_index()
df = df.rename(columns={'index': 'People'})
dfm = df.melt(id_vars='People', var_name='Gender', value_name='Percent')
# display(dfm)
People Gender Percent
0 A Female 12.414557
1 B Female 9.416027
2 C Female 9.846105
3 D Female 8.885621
4 E Female 8.438872
5 F Female 6.680709
6 G Female 10.666258
7 H Female 6.050124
8 A Male 7.420860
9 B Male 4.104433
10 C Male 7.383738
11 D Male 10.526158
12 E Male 5.916262
13 F Male 11.857227
14 G Male 12.966913
15 H Male 7.865684
sns.histplot: axes-level plot
fig, axe = plt.subplots(figsize=(8, 6))
sns.histplot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', ax=axe)
# iterate through each set of containers
for c in axe.containers:
# add bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
sns.displot: figure-level plot
g = sns.displot(data=dfm, y='People', hue='Gender', discrete=True, weights='Percent', multiple='stack', height=6)
# iterate through each facet / supbplot
for axe in g.axes.flat:
# iteate through each set of containers
for c in axe.containers:
# add the bar annotations
axe.bar_label(c, fmt='%.2f%%', label_type='center')
axe.set_xlabel('Percent')
plt.show()
Original Answer - before matplotlib v3.4.2
The easiest way to plot a horizontal or vertical stacked bar, is to load the data into a pandas.DataFrame
This will plot, and annotate correctly, even when all categories ('People'), don't have all segments (e.g. some value is 0 or NaN)
Once the data is in the dataframe:
It's easier to manipulate and analyze
It can be plotted with the matplotlib engine, using:
pandas.DataFrame.plot.barh
label_text = f'{width}' for annotations
pandas.DataFrame.plot.bar
label_text = f'{height}' for annotations
SO: Vertical Stacked Bar Chart with Centered Labels
These methods return a matplotlib.axes.Axes or a numpy.ndarray of them.
Using the .patches method unpacks a list of matplotlib.patches.Rectangle objects, one for each of the sections of the stacked bar.
Each .Rectangle has methods for extracting the various values that define the rectangle.
Each .Rectangle is in order from left the right, and bottom to top, so all the .Rectangle objects, for each level, appear in order, when iterating through .patches.
The labels are made using an f-string, label_text = f'{width:.2f}%', so any additional text can be added as needed.
Plot and Annotate
Plotting the bar, is 1 line, the remainder is annotating the rectangles
# plot the dataframe with 1 line
ax = df.plot.barh(stacked=True, figsize=(8, 6))
# .patches is everything inside of the chart
for rect in ax.patches:
# Find where everything is located
height = rect.get_height()
width = rect.get_width()
x = rect.get_x()
y = rect.get_y()
# The height of the bar is the data value and can be used as the label
label_text = f'{width:.2f}%' # f'{width:.2f}' to format decimal values
# ax.text(x, y, text)
label_x = x + width / 2
label_y = y + height / 2
# only plot labels greater than given width
if width > 0:
ax.text(label_x, label_y, label_text, ha='center', va='center', fontsize=8)
# move the legend
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
# add labels
ax.set_ylabel("People", fontsize=18)
ax.set_xlabel("Percent", fontsize=18)
plt.show()
Example with Missing Segment
# set one of the dataframe values to 0
df.iloc[4, 1] = 0
Note the annotations are all in the correct location from df.
For this case, the above answers work perfectly. The issue I had, and didn't find a plug-and-play solution online, was that I often have to plot stacked bars in multi-subplot figures, with many values, which tend to have very non-homogenous amplitudes.
(Note: I work usually with pandas dataframes, and matplotlib. I couldn't make the bar_label() method of matplotlib to work all the times.)
So, I just give a kind of ad-hoc, but easily generalizable solution. In this example, I was working with single-row dataframes (for power-exchange monitoring purposes per hour), so, my dataframe (df) had just one row.
(I provide an example figure to show how this can be useful in very densely-packed plots)
[enter image description here][1]
[1]: https://i.stack.imgur.com/9akd8.png
'''
This implementation produces a stacked, horizontal bar plot.
df --> pandas dataframe. Columns are used as the iterator, and only the firs value of each column is used.
waterfall--> bool: if True, apart from the stack-direction, also a perpendicular offset is added.
cyclic_offset_x --> list (of any length) or None: loop through these values to use as x-offset pixels.
cyclic_offset_y --> list (of any length) or None: loop through these values to use as y-offset pixels.
ax --> matplotlib Axes, or None: if None, creates a new axis and figure.
'''
def magic_stacked_bar(df, waterfall=False, cyclic_offset_x=None, cyclic_offset_y=None, ax=None):
if isinstance(cyclic_offset_x, type(None)):
cyclic_offset_x = [0, 0]
if isinstance(cyclic_offset_y, type(None)):
cyclic_offset_y = [0, 0]
ax0 = ax
if isinstance(ax, type(None)):
fig, ax = plt.subplots()
fig.set_size_inches(19, 10)
cycler = 0;
prev = 0 # summation variable to make it stacked
for c in df.columns:
if waterfall:
y = c ; label = "" # bidirectional stack
else:
y = 0; label = c # unidirectional stack
ax.barh(y=y, width=df[c].values[0], height=1, left=prev, label = label)
prev += df[c].values[0] # add to sum-stack
offset_x = cyclic_offset_x[divmod(cycler, len(cyclic_offset_x))[1]]
offset_y = cyclic_offset_y[divmod(cycler, len(cyclic_offset_y))[1]]
ax.annotate(text="{}".format(int(df[c].values[0])), xy=(prev - df[c].values / 2, y),
xytext=(offset_x, offset_y), textcoords='offset pixels',
ha='center', va='top', fontsize=8,
arrowprops=dict(facecolor='black', shrink=0.01, width=0.3, headwidth=0.3),
bbox=dict(boxstyle='round', facecolor='grey', alpha=0.5))
cycler += 1
if not waterfall:
ax.legend() # if waterfall, the index annotates the columns. If
# waterfall ==False, the legend annotates the columns
if isinstance(ax0, type(None)):
ax.set_title("Voi la")
ax.set_xlabel("UltraWatts")
plt.show()
else:
return ax
''' (Sometimes, it is more tedious and requires some custom functions to make the labels look alright.
'''
A, B = 80,80
n_units = df.shape[1]
cyclic_offset_x = -A*np.cos(2*np.pi / (2*n_units) *np.arange(n_units))
cyclic_offset_y = B*np.sin(2*np.pi / (2*n_units) * np.arange(n_units)) + B/2