Categorize and order bar chart by Hue - python

I have a problem. I want to show the two highest countries of each category. But unfortunately I only get the below output. However, I would like the part to be listed as an extra category.
Is there an option?
import pandas as pd
import seaborn as sns
d = {'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 's', 's']
}
df = pd.DataFrame(data=d)
print(df)
#print(df.sort_values('count', ascending=False).groupby('party').head(2))
ax = sns.barplot(x="country", y="count", hue='part',
data=df.sort_values('count', ascending=False).groupby('part').head(2), palette='GnBu')
What I got
What I want

You can always not use seaborn and plot everything in matplotlib directly.
from matplotlib import pyplot as plt
import pandas as pd
plt.style.use('seaborn')
df = pd.DataFrame({
'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 'b', 's']
})
fig, ax = plt.subplots()
offset = .2
xticks, xlabels = [], []
handles, labels = [], []
for i, (idx, group) in enumerate(df.groupby('part')):
plot_data = group.nlargest(2, 'count')
x = [i - offset, i + offset]
barcontainer = ax.bar(x=x, height=plot_data['count'], width=.35)
xticks += x
xlabels += plot_data['country'].tolist()
handles.append(barcontainer[0])
labels.append(idx)
ax.set_xticks(xticks)
ax.set_xticklabels(xlabels)
ax.legend(handles=handles, labels=labels, title='Part')
plt.show()

The following approach creates a FacetGrid for your data. Seaborn 11.2 introduced the helpful g.axes_dict. (In the example data I changed the second entry for 'BG' to 'b', supposing that each country/part combination only occurs once, as in the example plots).
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
d = {'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 'b', 's']
}
df = pd.DataFrame(data=d)
sns.set()
g = sns.FacetGrid(data=df, col='part', col_wrap=2, sharey=True, sharex=False)
for part, df_part in df.groupby('part'):
order = df_part.nlargest(2, 'count')['country']
ax = sns.barplot(data=df_part, x='country', y='count', order=order, palette='summer', ax=g.axes_dict[part])
ax.set(xlabel=f'part = {part}')
g.set_ylabels('count')
plt.tight_layout()
plt.show()

Related

How to make a cell higher in matplotlib using the plt.table function?

The following code uses colwidths to adjust the cell's width:
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False
labels = ['A难度水平', 'B难度水平', 'C难度水平', 'D难度水平']
students = [0.35, 0.15, 0.2, 0.3]
explode = [0.1, 0.1, 0.1, 0.1]
colors = ['r', 'y', 'b', 'gray']
plt.pie(students, autopct='%3.1f%%',
labels=labels, textprops={'fontsize': 12,
'family': 'FangSong',
'fontweight': 'bold'},
explode=explode, colors=colors)
studentValues = [['A', 'B', 'C', 'D'], [350, 150, 200, 300], ['test', 'test', 'test', 'test']]
cellcolors = [['r', 'y', 'b', 'gray'], ['b', 'gray', 'y', 'r'], ['gray', 'y', 'b', 'r']]
rowLabels = ['aaaaa','bbbbb','ccccc']
plt.table(cellText=studentValues,
cellColours=cellcolors,
cellLoc='center', colWidths=[0.1] * 4,
rowLabels=rowLabels)
plt.show()
How could I adjust the height of the cell inside the plt.table function?
Save the link to: plt.table. And adjust via 'scale'.
ytable = plt.table(cellText=studentValues,
cellColours=cellcolors,
cellLoc='center', colWidths=[0.1] * 4,
rowLabels=rowLabels)
ytable.scale(1, 1.0)

how to remove count from a plotly express bar chart hover data?

Given the following code:
import pandas as pd
import plotly.express as px
d = {'col1': ['a', 'a', 'b', 'b', 'b'], 'col2': [5, 6, 7, 8, 9]}
df = pd.DataFrame(data=d)
fig = px.bar(df, y='col1', color='col1')
fig.show()
that generates the following bar plot:
how do I remove count from hover_data?
plotly==5.1.0
You can remove it from hovertemplate
import pandas as pd
import plotly.express as px
d = {'col1': ['a', 'a', 'b', 'b', 'b'], 'col2': [5, 6, 7, 8, 9]}
df = pd.DataFrame(data=d)
fig = px.bar(df, y='col1', color='col1').update_traces(hovertemplate='col1=%{y}<br><extra></extra>')
fig.show()

Create a color-coded key for a matplotlib scatter plot with specific colors

Here is the data:
import pandas as pd
data = {'letter': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X'], 'color': ['#FF0000', '#FF7F00', '#FFD400', '#FFFF00', '#BFFF00', '#6AFF00', '#00EAFF', '#0095FF', '#0040FF', '#AA00FF', '#FF00AA', '#EDB9B9', '#E7E9B9', '#B9EDE0', '#B9D7ED', '#DCB9ED', '#8F2323', '#8F6A23', '#4F8F23', '#23628F', '#6B238F', '#000000', '#737373', '#CCCCCC'], 'percent': [0.59, 0.569, 0.343, 0.791, 0.099, 0.047, 0.387, 0.232, 0.262, 0.177, 0.522, 0.317, 0.252, 0.617, 0.644, 0.571, 0.382, 0.12, 0.281, 0.855, 0.283, 1.0, 0.844, 0.499], 'score': [0.541, 0.399, 0.625, 0.584, 0.83, 0.859, 0.62, 0.618, 0.545, 0.536, 0.513, 0.563, 0.592, 0.276, 0.037, 0.0, 0.5, 0.653, 0.485, 0.213, 0.44, 0.0, 0.308, 0.35]}
df = pd.DataFrame(data)
# display(df.head())
letter color percent score
0 A #FF0000 0.590 0.541
1 B #FF7F00 0.569 0.399
2 C #FFD400 0.343 0.625
3 D #FFFF00 0.791 0.584
4 E #BFFF00 0.099 0.830
Where the leftmost column is the index.
This code creates a scatter plot:
df.plot.scatter(x='percent', y='score', color=df['color'])
On the right, I want to have a key specifying which color represents which letter. Ideally it should be a list of solid colored rectangles and the letter. I have not been able to find a solution where one can use colors that they had selected, but I need that behavior as there will be multiple plots that need to be color coded the same way.
You can use the .legend method of the Axes object:
import matplotlib.lines as mlines
ax = df.plot.scatter(x='percent', y='score', color=df['color'])
ax.legend(handles=[mlines.Line2D([], [], color=r['color'], marker='.', linestyle='None',
markersize=9, label=r['letter'])
for _, r in df.iterrows()
],
ncol=3,
bbox_to_anchor=(1, 1)
)
NB. In my opinion, you should not go over ~10 colored items, this strongly impacts readability
As you have a single element per color, you could annotate the points directly:
ax = df.plot.scatter(x='percent', y='score', color=df['color'])
for _, r in df.iterrows():
ax.annotate(r['letter'], (r['percent'], r['score']), ha='left', va='bottom')
You can use mpatches.Patch for a custom legend.
import matplotlib.patches as mpatches
ax = df.plot.scatter(x='percent', y='score', color=df['color'])
colorlist = zip(df['letter'], df['color'])
handles = [mpatches.Patch(color=colour, label=label) for label, colour in colorlist]
labels = df['letter']
ax.legend(handles, labels, ncol=2, bbox_to_anchor=(1, 1))
Alternatively, you could use seaborn
import seaborn as sns
ax = sns.scatterplot(x=df['percent'], y=df['score'], palette=df['color'].tolist(), hue=df['letter'])
ax.legend(ncol=2, bbox_to_anchor=(1, 1))

How to plot a grouped bar plot from two or more dataframes

I have multiple dataframes, and I want to plot them on the same figure in the Grouped Bar Chart view.
These are two very small dataframes that I would like to plot together in the same figure.
The dataframes are:
I want to plot a figure like this example:
I try this, plot only one graph:
fig, ax = plt.subplots()
df1.plot.bar(x='Zona',y='Total_MSP')
df4.plot.bar(x='Zona',y='NumEstCasasFavelas2017',ax=ax)
plt.show()
I tried this too:
fig, ax = plt.subplots()
df1.plot.bar(x='Zona',y='Total_MSP',ax=ax)
df4.plot.bar(x='Zona',y='NumEstCasasFavelas2017',ax=ax)
plt.show()
The results are just data from a single dataframe in a picture, not two data from both dataframes. Note that only the captions of both dataframes appear in the same picture, the data is only from a single isolated dataframe.
In order to create a grouped bar plot, the DataFrames must be combined with pandas.merge or pandas.DataFrame.merge.
See pandas User Guide: Merge, join, concatenate and compare and SO: Pandas Merging 101.
Data:
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'Total_MSP': [464245, 3764942, 1877505, 1023160, 3179477]})
df2 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'CasasFavelas_2017': [463, 4228, 851, 1802, 2060]})
Merge the dataframes:
Using pandas.merge, combine the DataFrames.
df = pd.merge(df1, df2, on='Zone')
Zone Total_MSP CasasFavelas_2017
0 C 464245 463
1 L 3764942 4228
2 N 1877505 851
3 O 1023160 1802
4 S 3179477 2060
Plot:
Plot the DataFrame with pandas.DataFrame.plot.
Use log scale for Casas to show up.
df.plot.bar(x='Zone', logy=True)
plt.xticks(rotation=0)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
Update:
The OP added additional data in an answer, after this answer was provided.
Use pandas.concat to combine more than 2 DataFrames.
df12 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'], 'Total_MSP': [464245, 3764942, 1877505, 1023160, 3179477]})
df13 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'], 'ValorMedioDollar': [1852.27, 1291.53, 1603.44, 2095.90, 1990.10]})
df14 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'], 'IDH2010': [0.89, 0.70, 0.79, 0.90, 0.80]})
df15 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'], 'QtdNovasCasas': [96,1387, 561, 281, 416]})
# use concat to combine more than two DataFrames
df = pd.concat([df12.set_index('Zone'), df13.set_index('Zone'), df14.set_index('Zone'), df15.set_index('Zone')], axis=1)
Total_MSP ValorMedioDollar IDH2010 QtdNovasCasas
Zone
C 464245 1852.27 0.89 96
L 3764942 1291.53 0.70 1387
N 1877505 1603.44 0.79 561
O 1023160 2095.90 0.90 281
S 3179477 1990.10 0.80 416
# plot the DataFrame
df.plot.bar(logy=True, figsize=(8, 6))
plt.xticks(rotation=0)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
Adding Annotations:
Not part of the original question.
How to plot and annotate a grouped bar chart with 3 bars in each group?
How to plot a dictionary
Graphic with four custom color dataframes and caption
import pandas as pd
df12 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'Total_MSP': [464245, 3764942, 1877505, 1023160, 3179477]})
df13 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'ValorMedioDollar': [1852.27, 1291.53, 1603.44, 2095.90, 1990.10]})
df14 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'IDH2010': [0.89, 0.70, 0.79, 0.90, 0.80]})
df15 = pd.DataFrame({'Zone': ['C', 'L', 'N', 'O', 'S'],
'QtdNovasCasas': [96,1387, 561, 281, 416]})
df16 = pd.merge(df12, df13, on='Zone')
df16 = pd.merge(df16, df14, on='Zone')
df16 = pd.merge(df16, df15, on='Zone')
fig, ax = plt.subplots(figsize=(50, 20))
#https://xkcd.com/color/rgb/
colors2 = ['#448ee4', '#a9f971','#ceb301','#ffb7ce']
#For all values to be displayed, even though these scales are different, the log scale is used.
df16.plot.bar(x='Zone', logy=True, color=colors2, ax=ax,width=0.5, align = 'center');
#legend
#https://stackoverflow.com/questions/19125722/adding-a-legend-to-pyplot-in-matplotlib-in-the-most-simple-manner-possible
plt.gca().legend(('Total Resident Population-2017',
'Median Value of square meter-Dollars US',
'HDI- Human Development Index-2010',
'Number of new housing properties-2018'),bbox_to_anchor=(0.87, 0.89) ,fontsize=28)
plt.title('Estimated Resident Population, Average value of square meter, HDI, New housing properties in São Paulo - Brazil',fontsize=40)
plt.xlabel ('Names of the geographical subdivisions of São Paulo',fontsize=40)
plt.ylabel('Log Scale', fontsize=30)
#change the name of month on the x
ax = plt.gca()
names = ['Zone: Center', 'Zone: East', 'Zone: North', 'Zone: West', 'Zone: South']
ax.set_xticklabels(names,fontsize=40)
x = plt.gca().xaxis
plt.rcParams['ytick.labelsize'] = 30
# rotate the tick labels for the x axis
for item in x.get_ticklabels():
item.set_rotation(0)
for spine in plt.gca().spines.values():
spine.set_visible(False)
# remove all the ticks (both axes), and tick labels on the Y axis
plt.tick_params(top='off', bottom='off', left='off', right='off', labelleft='on', labelbottom='on')
# direct label each bar with Y axis values
for p in ax.patches[0:]:
plt.gca().text(p.get_x() + p.get_width()/2, p.get_height()+0.01, str(float(p.get_height())),
ha='center', va='baseline', rotation=0 ,color='black', fontsize=25)
plt.show()
fig.savefig('GraficoMultiplo.jpg')

seaborn plot from total

I have the following data frame:
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA_found': [10, 40, 50, 20, 50, 70],
'valueA_total': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01', '2017-01-01', '2017-02-01', '2017-03-01']})
and can create a plot:
fig, ax = plt.subplots(figsize=(15,8))
sns.set_style("whitegrid")
g = sns.barplot(x="date", y="valueA_found", hue="group", data=df)
# g.set_yscale('log')
g.set_xticklabels(df.date, rotation=45)
g.set(xlabel='date', ylabel='value from total')
But, I would rather like to see below per each point in time:
as you can see per each model valueA_found is plotted as a bar and the total is plotted as a single bar.
Initially suggested, it would also be possible to plot the total as a line - but as outlined in the comments it is probably better to produce a bar as well. valueA_total i.e. the total should be the same per group per month.
An option might be to plot the total values in a desaturated/more transparent bar plot behind the first dataset.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn.apionly as sns
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA': [10, 40, 50, 20, 50, 70],
'valueB': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01',
'2017-01-01', '2017-02-01', '2017-03-01']})
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(x="date", y="valueB", hue="group", data=df,
ax=ax, palette={"Red":"#f3c4c4","Blue":"#c5d6f2" }, alpha=0.6)
sns.barplot(x="date", y="valueA", hue="group", data=df,
ax=ax, palette={"Red":"#d40000","Blue":"#0044aa" })
ax.set_xticklabels(df.date, rotation=45)
ax.set(xlabel='date', ylabel='value from total')
plt.show()
Or just putting one bar plot in the background, assuming that the totals of each group are always the same:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn.apionly as sns
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA': [10, 40, 50, 20, 50, 70],
'valueB': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01',
'2017-01-01', '2017-02-01', '2017-03-01']})
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(x="date", y="valueB", data=df[df.group=="Red"],
ax=ax, color="#e7e2e8", label="total")
sns.barplot(x="date", y="valueA", hue="group", data=df,
ax=ax, palette={"Red":"#d40000","Blue":"#0044aa" })
ax.set_xticklabels(df.date, rotation=45)
ax.set(xlabel='date', ylabel='value from total')
plt.show()

Categories