I am trying to plot a repeating color horizontal stacked bar plot. But I couldn't make it!
This my starting code:
import numpy as np
from matplotlib import pyplot as plt
dataset = [{'A':19, 'B':55},
{'A':10, 'B':68}]
data_orders = [['B', 'A'],
['A', 'B']]
colors = ["green","blue"]
names = sorted(dataset[0].keys())
values = np.array([[data[name] for name in order] for data,order in
zip(dataset, data_orders)])
lefts = np.insert(np.cumsum(values, axis=1),0,0, axis=1)[:, :-1]
orders = np.array(data_orders)
bottoms = np.arange(len(data_orders))
for name, color in zip(names, colors):
idx = np.where(orders == name)
value = values[idx]
left = lefts[idx]
plt.bar(left=left, height=0.8, width=value, bottom=bottoms,
color=color, orientation="horizontal", label=name)
plt.legend(loc="best", bbox_to_anchor=(1.0, 1.00))
plt.subplots_adjust(right=0.85)
plt.show()
Output:
But I want something like:
And want to provide a dataset with labels-values pairs and get the second output figure.
Maybe my code is a bad start!
Any help will be very, very helpful! Thanks...
Related
I'm using this nice boxplot graph, answer from #Parfait.
I got an out of bound error on j and had to use range(i*5,i*5+5). Why?
I'd like to set the median to a particular color, let's say red. medianprops=dict(color="red") won't work. How to do it?
How to set the y-axis tick labels to the same color as the boxes?
Disclaimer: I don't know what I'm doing.
Here's the code using random data :
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import string
import matplotlib.colors as mc
import colorsys
# data
df = pd.DataFrame(np.random.normal(np.random.randint(5,15),np.random.randint(1,5),size=(100, 16)), columns=list(string.ascii_uppercase)[:16])
# Boxplot
fig, ax = plt.subplots(figsize=(9, 10))
medianprops=dict(color="red")
ax = sns.boxplot(data=df, orient="h", showfliers=False, palette = "husl")
ax = sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette = "husl") # show data points
ax.set_title("Title")
plt.xlabel("X label")
def lighten_color(color, amount=0.5):
# --------------------- SOURCE: #IanHincks ---------------------
try:
c = mc.cnames[color]
except:
c = color
c = colorsys.rgb_to_hls(*mc.to_rgb(c))
return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
for i,artist in enumerate(ax.artists):
# Set the linecolor on the artist to the facecolor, and set the facecolor to None
col = lighten_color(artist.get_facecolor(), 1.2)
artist.set_edgecolor(col)
# Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
# Loop over them here, and use the same colour as above
for j in range(i*5,i*5+5):
line = ax.lines[j]
line.set_color(col)
line.set_mfc(col)
line.set_mec(col)
#line.set_linewidth(0.5)
To change the color of the median, you can use the medianprops in sns.boxplot(..., medianprops=...). If you also set a unique label, that label can be tested again when iterating through the lines.
To know how many lines belong to each boxplot, you can divide the number of lines by the number of artists (just after the boxplot has been created, before other elements have been added to the plot). Note that a line potentially has 3 colors: the line color, the marker face color and the marker edge color. Matplotlib creates the fliers as an invisible line with markers. The code below thus also changes these colors to make it more robust to different options and possible future changes.
Looping simultaneously through the boxes and the y tick labels allows copying the color. Making them a bit larger and darker helps for readability.
import matplotlib.pyplot as plt
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb, to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
def enlighten(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, 1 - factor * (1 - v)))
def endarken(color, factor=0.5):
h, s, v = rgb_to_hsv(to_rgb(color))
return hsv_to_rgb((h, s, factor * v))
df = pd.DataFrame(np.random.normal(1, 5, size=(100, 16)).cumsum(axis=0),
columns=['Hydrogen', 'Helium', 'Lithium', 'Beryllium', 'Boron', 'Carbon', 'Nitrogen', 'Oxygen',
'Fluorine', 'Neon', 'Sodium', 'Magnesium', 'Aluminum', 'Silicon', 'Phosphorus', 'Sulfur'])
sns.set_style('white')
fig, ax = plt.subplots(figsize=(9, 10))
colors = sns.color_palette("husl", len(df.columns))
sns.boxplot(data=df, orient="h", showfliers=False, palette='husl',
medianprops=dict(color="yellow", label='median'), ax=ax)
lines_per_boxplot = len(ax.lines) // len(ax.artists)
for i, (box, ytick) in enumerate(zip(ax.artists, ax.get_yticklabels())):
ytick.set_color(endarken(box.get_facecolor()))
ytick.set_fontsize(20)
color = enlighten(box.get_facecolor())
box.set_color(color)
for lin in ax.lines[i * lines_per_boxplot: (i + 1) * lines_per_boxplot]:
if lin.get_label() != 'median':
lin.set_color(color)
lin.set_markerfacecolor(color)
lin.set_markeredgecolor(color)
sns.stripplot(data=df, orient="h", jitter=True, size=7, alpha=0.5, palette='husl', ax=ax)
sns.despine(ax=ax)
ax.set_title("Title")
ax.set_xlabel("X label")
plt.tight_layout()
plt.show()
I just answer point 2. of my question.
After tinkering, I found this to work :
# Each box has 5 associated Line2D objects (the whiskers and median)
# Loop over them here, and use the same colour as above
n=5 # this was for tinkering
for j in range(i*n,i*n+n):
if j != i*n+4 : line = ax.lines[j] # not the median
line.set_color(col)
Again, I don't know what I'm doing. So someone more knowledgeable may provide a more valuable answer.
I removed the stripplot for better clarity.
How can I get the colors of the elements in a pandas bar plot?
Example: I have a bar plot with historic data for a couple of columns. Now I want to plot a horizontal line with the mean value of each column in the same color as the bars.
This question discusses access to colors of matplotlib line plots: How to get color of most recent plotted line in Python's plt
It allowed to me to get the colors of line plots, but not bar plots. I suppose there could be an equivalent to get_lines(), but I cannot find it.
"""Access bar plot colors."""
import pandas as pd
df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
ax = df.plot()
# This works fine for line plots
for i, col in enumerate(df):
color = ax.get_lines()[i].get_color()
print(color)
ax = df.plot(kind='bar')
# This does not work: "IndexError: list index out of range"
for i, col in enumerate(df):
color = ax.get_lines()[i].get_color()
print(color)
You could use .patches:
>>> ax = df.plot(kind='bar')
>>> print(ax.patches)
[<matplotlib.patches.Rectangle object at 0x7f02e2ba3760>, <matplotlib.patches.Rectangle object at 0x7f02e2ba35e0>, <matplotlib.patches.Rectangle object at 0x7f02e2ba3dc0>, <matplotlib.patches.Rectangle object at 0x7f02e2ba35b0>]
>>> for i, col in enumerate(df):
... color = ax.patches[i].get_facecolor()
... print(color)
...
(0.12156862745098039, 0.4666666666666667, 0.7058823529411765, 1.0)
(0.12156862745098039, 0.4666666666666667, 0.7058823529411765, 1.0)
However as you can see enumerating them doesn’t tell you which patch corresponds to which data. Here we’ve grabbed 2 rectangles of the same color. Therefore I would recommend the .get_legend_handles_labels() function that is used to build legends:
>>> print(ax.get_legend_handles_labels())
([<BarContainer object of 2 artists>, <BarContainer object of 2 artists>], ['col1', 'col2'])
>>> for bars, column in zip(*ax.get_legend_handles_labels()):
... color = bars[0].get_facecolor()
... print(column, color)
...
col1 (0.12156862745098039, 0.4666666666666667, 0.7058823529411765, 1.0)
col2 (1.0, 0.4980392156862745, 0.054901960784313725, 1.0)
I would change it around and plot the data yourself explicitly, instead of trying to piece out what you need afterwards from the returned axes. But Cimbali already showed how you can do that, if needed.
It's a little bit more code, but calling ax.bar yourself allows you to capture the return value.
For example plot with:
fig, ax = plt.subplots()
idx = df.index.values
width = 0.25
col1_bars = ax.bar(idx - width/2, 'col1', width, label='Col1', data=df)
col2_bars = ax.bar(idx + width/2, 'col2', width, label='Col2', data=df)
ax.set_xticks(idx)
ax.legend(loc=2)
You can then extract the colors, or any other property of the bars, by examining the returned BarContainer objects:
col1_colors = list(map(lambda x: x.get_facecolor(), col1_bars))
col2_colors = list(map(lambda x: x.get_facecolor(), col2_bars))
Combining the answers by Rutger and Cimbali, I came up with my own solution, which I leave here for reference: It possible to find the correct BarContainer and get the color that way. But Cimbali's use of get_legend_handles_labels() might be more elegant. :-)
"""Access bar plot colors."""
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
# This works fine for line plots
ax = df.plot()
for i, col in enumerate(df):
color = ax.get_lines()[i].get_color()
print(color)
ax.axhline(y=df[col].mean(), label=col+' (mean)', linestyle='--', c=color)
plt.legend()
plt.show()
# Now try with bar plot: Access color via BarContainer
ax = df.plot(kind='bar')
for i, col in enumerate(df):
color = ax.containers[i][0].get_facecolor()
ax.axhline(y=df[col].mean(), label=col+' (mean)', linestyle='--', c=color)
plt.legend()
plt.show()
# Now try with bar plot: Access color via get_legend_handles_labels()
ax = df.plot(kind='bar')
for bars, col in zip(*ax.get_legend_handles_labels()):
color = bars[0].get_facecolor()
ax.axhline(y=df[col].mean(), label=col+' (mean)', linestyle='--', c=color)
plt.legend()
plt.show()
I am attempting to build a violin plot to illustrate depth on the y-axis and a distance away from a known point on the x-axis. I am able to get the x-axis labels to distribute appropriately spaced on the x-axis based on the variable distances but i am unable to get the violin plots to align. They plots appear to be shifted to the y-axis. Any help would be appreciated. My code is below:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
path = 'O:\info1.csv'
df = pd.read_csv(path)
item = ['a', 'b', 'c', 'd', 'e', 'f']
dist = [450, 1400, 2620, 3100, 3830, 4940]
plt.rcParams.update({'font.size': 15})
fig, axes1 = plt.subplots(figsize=(20,10))
axes1 = sns.violinplot(x='item', y='surface', data=df, hue = 'item', order = (item))
axes1.invert_yaxis()
axes1.set_xlabel('Item')
axes1.set_ylabel('Depth')
axes1.set_xticks(dist)
plt.xticks(rotation=20)
plt.show()
Example dataset:
You cannot use seaborn violin plot, because from the vignette:
This function always treats one of the variables as categorical and
draws data at ordinal positions (0, 1, … n) on the relevant axis, even
when the data has a numeric or date type.
So if you draw it directly with seaborn, it is categorical:
sns.violinplot(x='dist', y='surface', data=df, hue = 'item',dodge=False,cut=0)
To place the boxplot according, you need to use matplotlib, first we get the data out in the format required and define a color palette:
surface_values = list([np.array(value) for name,value in df.groupby('item')['surface']])
dist_values = df.groupby('item')['dist'].agg("mean")
pal = ["crimson","darkblue","rebeccapurple"]
You need to set the width, provide the distance, and for the inner "box", we modify the code from here:
fig, ax = plt.subplots(1, 1,figsize=(8,4))
parts = ax.violinplot(surface_values,widths=200,positions=dist_values,
showmeans=False, showmedians=False,showextrema=False)
for i,pc in enumerate(parts['bodies']):
pc.set_facecolor(pal[i])
pc.set_edgecolor('black')
pc.set_alpha(1)
quartile1, medians, quartile3 = np.percentile(surface_values, [25, 50, 75], axis=1)
whiskers = np.array([
adjacent_values(sorted_array, q1, q3)
for sorted_array, q1, q3 in zip(surface_values, quartile1, quartile3)])
whiskersMin, whiskersMax = whiskers[:, 0], whiskers[:, 1]
inds = dist_values
ax.scatter(inds, medians, marker='o', color='white', s=30, zorder=3)
ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5)
ax.vlines(inds, whiskersMin, whiskersMax, color='k', linestyle='-', lw=1)
If you don't need the inner box, you can just call plt.violin ...
thanks for including a bit of data.
To change your plot, the item and dist variables in your code need to be adjusted, and remove the item = [a,b...] and dist = [] arrays in your code. The ticks on the x-axis using the axes1.set_xticks needs a bit of tweaking to get what you're looking for there.
Example 1:
removed the two arrays that were creating the plot you were seeing before; violinplot function unchanged.
# item = ['a', 'b', 'c', 'd', 'e', 'f'] * Removed
# dist = [450, 1400, 2620, 3100, 3830, 4940] * Removed
plt.rcParams.update({'font.size': 15})
fig, axes1 = plt.subplots(figsize=(20,10))
axes1 = sb.violinplot(x='item', y='surface', data=df, hue = 'item', inner = 'box')
axes1.invert_yaxis()
axes1.set_xlabel('Item')
axes1.set_ylabel('Depth')
#axes1.set_xticks(dist) * Removed
plt.xticks(rotation=20)
plt.show()
Inside each curve, there is a black shape with a white dot inside. This is the miniature box plot mentioned above. If you'd like to remove the box plot, you can set the inner = None parameter in the violinplot call to simplify the look of the final visualization.
Example 2:
put dist on your x axis in place of the xticks.
plt.rcParams.update({'font.size': 15})
plt.subplots(figsize=(20,10))
# Put 'dist' as your x input, keep your categorical variable (hue) equal to 'item'
axes1 = sb.violinplot(data = df, x = 'dist', y = 'surface', hue = 'item', inner = 'box');
axes1.invert_yaxis()
axes1.set_xlabel('Item')
axes1.set_ylabel('Depth');
I'm not confident the items and the distances you are working with have a relationship you want to show on the x-axis, or if you just want to use those integers as your tick marks for that axis. If there is an important relationship between the item and the dist, you could use a dictionary new_dict = {450: 'a', 1400: 'b', 2620: 'c' ...
Hope you find this helpful.
I have the following code, which creates a horizontal bar chart:
data = pandas.read_csv('C:/py/matplotlib/02-BarCharts/data.csv')
responders_id = data['Responder_id']
langs_worked_with = data['LanguagesWorkedWith']
languages = Counter()
for reponse in langs_worked_with:
languages.update(reponse.split(";"))
langs = []
langs_users_num = []
for language in languages.most_common(15):
langs.append(language[0])
langs_users_num.append(language[1])
langs.reverse()
langs_users_num.reverse()
plt.barh(langs, langs_users_num)
plt.tight_layout()
plt.show()
and after running above code I get this:
the data which represents this chart is parsed from stackoverflow and the same chart in stackoverflow looks like below. How do I modify above code to add percentage as shown below.
Please help me with this. Thank you
Here is one way to do it. I chose some sample data. The idea is
Create new y-tick labels by adding the name of the language and the percentage
Assign these modified labels on the y-axis
Hide the frame and the x-axis of the figure together with the y-axis ticks
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
total = 90000
langs = ['C', 'C++', 'Java', 'Python']
langs_users_num = np.array([32000, 40000, 50000, 60000])
percent = langs_users_num/total*100
new_labels = [i+' {:.2f}%'.format(j) for i, j in zip(langs, percent)]
plt.barh(langs, langs_users_num, color='lightskyblue', edgecolor='blue')
plt.yticks(range(len(langs)), new_labels)
plt.tight_layout()
for spine in ax.spines.values():
spine.set_visible(False)
ax.axes.get_xaxis().set_visible(False)
ax.tick_params(axis="y", left=False)
plt.show()
I have a pandas data frame df like this
NAME VALUE ID
A 0.2 X
B 0.4 X
C 0.5 X
D 0.8 X
...
Z 0.3 X
I would like to color all the points by the 'NAME' column by specifying the hue='NAME' but specify the color for ONE point: B.
How do you specify the color for 1 point only, and have the "hue" command take care of the rest (where each point A-Z has a unique color)?
Right now this is my command to plot, where hue is the NAME.
plot = sns.stripplot(x="ID", y="VALUE", hue="NAME", data=df, jitter=True, c=df['NAME'], s=7, linewidth=1)
You can replace one color in the palette by converting it to a list of colors and then replace one of the colors by some other color of your liking.
import pandas as pd
import numpy as np;np.random.seed(42)
import matplotlib.pyplot as plt
import seaborn as sns
letters = list(map(chr, range(ord('A'), ord('Z')+1)))
df = pd.DataFrame({"NAME" : letters,
"VALUE": np.sort(np.random.rand(len(letters)))[::-1],
"ID" : ["X"]*len(letters)})
special_letter = "B"
special_color = "indigo"
levels = df["NAME"].unique()
colors = sns.color_palette("hls", len(levels))
inx = list(levels).index(special_letter)
colors[inx] = special_color
ax = sns.stripplot(x="ID", y="VALUE", hue="NAME", data=df,
jitter=True, s=7, palette=colors)
ax.legend(ncol=3, bbox_to_anchor=(1.05,1), loc=2)
ax.figure.subplots_adjust(right=0.6)
plt.show()
Instead of providing a palette directly, one may also (thanks to #mwaskom for pointing that out) use a dictionary of (hue name, color) pairs:
levels = df["NAME"].unique()
colors = sns.color_palette("hls", len(levels))
colors = dict(zip(levels, colors))
colors[special_letter] = special_color