Python Matplotlib bars subplots by Category and Aggregation - python

I have a table like this:
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
df
Out:
Category Product QTY
0 Toys AA 100
1 Toys BB 200
2 Toys CC 300
3 Toys DD 50
4 Food SSS 20
5 Food DDD 800
6 Food FFF 300
7 Food RRR 450
8 Food EEE 150
9 Food WWW 320
10 Food LLLLL 400
11 Food PPPPP 1000
12 Furniture LPO 150
13 Furniture NHY 900
14 Furniture MKO 1150
So, I need to make bars subplots like this (Sum Products in each Category):
My problem is that I can't figure out how to combine categories, series, and aggregation.
I manage to split them into 3 subplots (1 always stays blank) but I can not unite them ...
import matplotlib.pyplot as plt
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
df['Category'].value_counts().plot.bar(
ax=axarr[0][0], fontsize=12, color='b'
)
axarr[0][0].set_title("Category", fontsize=18)
df['Product'].value_counts().plot.bar(
ax=axarr[1][0], fontsize=12, color='b'
)
axarr[1][0].set_title("Product", fontsize=18)
df['QTY'].value_counts().plot.bar(
ax=axarr[1][1], fontsize=12, color='b'
)
axarr[1][1].set_title("QTY", fontsize=18)
plt.subplots_adjust(hspace=.3)
plt.show()
Out
What do I need to add to combine them?

This would be a lot easier with seaborn and FacetGrid
import pandas as pd
import seaborn as sns
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
g = sns.FacetGrid(df, col='Category', sharex=False, sharey=False, col_wrap=2, height=3, aspect=1.5)
g.map_dataframe(sns.barplot, x='Product', y='QTY')

Related

Seaborn Violin Plot - Apply hue and split options to different features

I'm trying to combine two Seaborn violin plots into a single one and display relations between three different features. I'm working on the tips dataset:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
.. ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2
For this data set, I'd like to compare total_bill for different week days depending on sex and smoker columns using the split option. The graphs I'd like to combine are produced by the code below:
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset("tips")
ax = sns.violinplot(x="day", y="total_bill", hue="smoker", data=tips, palette="muted", split=False)
ax = sns.violinplot(x="day", y="total_bill", hue="sex", data=tips, palette="muted", split=True)
Is it possible to create a single graph where different violins represent the total_bill distribution for smokers and non-smokers (as in the first graph), but each of violin is also split to represent differences between men and women? I'd still like to have 8 non-overlapping violins (2 per day - smokers and non smokers), but each will be further split between male and female.
I've found this thread, but the answer creates a separate violin for each combination which is not my goal.
I believe this is what you are looking for
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
# Load the dataset
tips = sns.load_dataset("tips")
# Configure the coloring
colors = {"Male": {"Yes": "orange", "No": "blue"}, "Female": {"Yes": "red", "No": "green"}}
# create figure and axes
fig, ax = plt.subplots()
# draw violins for each sex
sex_types = set(tips.sex)
for sex in sex_types:
sns.violinplot(
x="day",
y="total_bill",
hue="smoker",
data=tips[tips.sex == sex],
palette=colors[sex],
split=True,
ax=ax,
scale="count",
scale_hue=False,
saturation=0.75,
inner=None
)
# Set transparancy for all violins
for violin in ax.collections:
violin.set_alpha(0.25)
# Compose a custom legend
custom_lines = [
Line2D([0], [0], color=colors[sex][smoker], lw=4, alpha=0.25)
for smoker in ["Yes", "No"]
for sex in sex_types
]
ax.legend(
custom_lines,
[f"{sex} : {smoker}" for smoker in ["Yes", "No"] for sex in sex_types],
title="Gender : Smoker"
)

Pandas Groubpy plotting with unstack()

I have the following code
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots()
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
print(name)
print(group)
group.plot(x='date', y='force', label=name)
plt.show()
The result obtained is as follows:
bill
type num date pos force
6 bill 1006 2012 3 4
5 bill 1006 2013 3 12
1 bill 1004 2016 0 2
3 bill 1004 2017 4 10
4 bill 1006 2017 0 6
john
type num date pos force
8 john 1004 2012 9 8
7 john 1004 2013 8 7
2 john 1006 2015 1 7
0 john 1006 2017 0 5
[img1_force_Bill][1]
[img2_Force_john][2]
how can i get 4 Fig, in each one 2 lines:
Fig1 for bill: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig2 for bill: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)
Fig3 for john: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig4 for john: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)
Let's try this:
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots(2,2)
axi=iter(ax.flatten())
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
# print(name)
# print(group)
group.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', ax=next(axi), legend=False)
group.set_index(['date','num'])['pos'].unstack().plot(title=name+ ' - pos', ax=next(axi), legend=False)
plt.tight_layout()
plt.legend(loc='upper center', bbox_to_anchor=(0, -.5), ncol=2)
plt.show()
Output:
Update per comment below:
dfj = df[df['type'] == 'john']
ax = dfj.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', legend=False)
ax.axhline(y=dfj['force'].max(), color='red', alpha=.8)
Chart:
#Scott Boston
.... thank you alot for your help.
unfortunately after using the following code with big data to plot 2 lines
for name, group in grp_new:
axn= group.set_index(['date', 'num'])['pos'].unstack().plot(title= name+' _pos', legend=False)
the plot looks like plot2Lines .They are not continuous plots.I tried to plot single lines and it were ok.

How to make plots with small whitespace separations in Matplotlib or Seaborn?

I'd like to make this type of plot with multiple columns separated by small whitespace, each having different category having 3-5 (5 in this example) different observations with varying values on y axis:
actually, i can plot this plot use ggplot2. for example:
head(mtcars)
# mpg cyl disp hp drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
# Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
# Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
# Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
library(dplyr)
library(ggplot2)
mtcars %>% reshape2::melt() %>%
ggplot(aes(x = variable, y = value)) +
geom_point() + facet_grid(~ variable) +
theme(axis.text.x = element_blank())
you set a categorical variable in your dataset,then use the facet_grid(~).this function can change your plot into multiple plot by your categrical variable
Here is an approach to draw a similar plot using Python's matplotlib. The plot has a grey background and white major and minor gridlines to delimit the zones. Getting the dots in the center of each little cell is somewhat tricky: divide into n+1 spaces and shift half a cell (1/2n). A secondary x-axis can be used to set the labels. A zorder has to be set to have the dots on top of the gridlines.
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import ticker
n = 5
cols = 7
values = [np.random.uniform(1, 10, n) for c in range(cols)]
fig, ax = plt.subplots()
ax.set_facecolor('lightgrey')
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(1 / (n)))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
ax.grid(True, which='both', axis='both', color='white')
ax.set_xticklabels([])
ax.tick_params(axis='x', which='both', length=0)
ax.grid(which='major', axis='both', lw=3)
ax.set_xlim(1, cols + 1)
for i in range(1, cols + 1):
ax.scatter(np.linspace(i, i + 1, n, endpoint=False) + 1 / (2 * n), values[i-1], c='crimson', zorder=2)
ax2 = ax.twiny()
ax2.set_xlim(0.5, cols + 0.5)
ticks = range(1, cols + 1)
ax2.set_xticks(ticks)
ax2.set_xticklabels([f'Cat_{t:02d}' for t in ticks])
bbox = dict(boxstyle="round", ec="limegreen", fc="limegreen", alpha=0.5)
plt.setp(ax2.get_xticklabels(), bbox=bbox)
ax2.tick_params(axis='x', length=0)
plt.show()

How to make bar graph of 2 variables based on same DataFrame and I want to choose 2 or until 5 data

I have a DataFrame:
wilayah branch Income Januari 2018 Income Januari 2019 Income Febuari 2018 Income Febuari 2019 Income Jan-Feb 2018 Income Jan-Feb 2019
1 sunarto 1000 1500 2000 3000 3333 4431
1 pemabuk 500 700 3000 3000 4333 5431
1 pemalas 2000 2200 4000 3000 5333 6431
1 hasuntato 9000 1200 6000 3000 2222 2121
1 sibodoh 1000 1500 3434 3000 2233 2121
...
My expectation to to create a bar graph where x axis is every name in branch (e.g sunarto, pemabuk, pemalas, etc), and y axis is income.
Let's say I will compare sunarto's income januari 2018 and income januari 2019, pemabuk's income januari 2018 and income januari 2019, and so on (1 name in x axis, 2 values as comparison of two values). Then I will sort values high to low value from Income Jan-Feb 2019 in my bar graph.
I tried:
import matplotlib.pyplot as plt
import pandas as pd
fig, ax = plt.subplots()
ax = df1[["Sunarto","Income Januari 2018", "Income Januari 2019"]].plot(x='branch', kind='bar', color=["g","b"],rot=45)
plt.show()
Consider a groupby aggregation then run DataFrame.plot. Below will line all branches on x-axis with different income columns as color_coded keys in legend.
agg_df = df.groupby('branch').sum()
fig, ax = plt.subplots(figsize=(15,5))
agg_df.plot(kind='bar', edgecolor='w', ax=ax, rot=22, width=0.5, fontsize = 15)
# ADD TITLES AND LABELS
plt.title('Income by Branches, Jan/Feb 2018-2019', weight='bold', size=24)
plt.xlabel('Branch', weight='bold', size=24)
plt.ylabel('Income', weight='bold', size=20)
plt.tight_layout()
plt.show()
plt.clf()
Should you want each separate branch plots on specific columns, iterate off a groupby list:
dfs = df.groupby('branch')
for i,g in dfs:
ord_cols = (pd.melt(g.drop(columns="wilayah"), id_vars = "branch")
.sort_values("value")["variable"].values
)
fig, ax = plt.subplots(figsize=(8,4))
(g.reindex(columns=ord_cols)
.plot(kind='bar', edgecolor='w', ax=ax, rot=0, width=0.5, fontsize = 15)
)
# ADD TITLES AND LABELS
plt.title('Income by {} Branch, Jan/Feb 2018-2019'.format(i),
weight='bold', size=16)
plt.xlabel('Branch', weight='bold', size=16)
plt.ylabel('Income', weight='bold', size=14)
plt.tight_layout()
plt.show()

python pandas bar plot another column text

max min mincount maxcount
0 12 10 1 6
1 21 14 1 6
2 34 19 1 6
3 6 20 1 4
4 8 22 1 4
5 41 23 1 4
this is pandas DataFrame.
so I want like this image.
enter image description here
text label is very important.
here my code
df = pd.DataFrame({'maxcount': max_count, 'mincount': min_count, 'max': max, 'min': min})
ax = df[['maxcount', 'mincount']].plot(kind='bar')
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#create your dataframe
d= {'max':[12,21,34,6,8,41],'min':[10,14,19,20,22,23],
'mincount':[1,1,1,1,1,1],'maxcount':[6,6,6,4,4,4]}
df=pd.DataFrame(d)
#create 2 dataframes counts and max_min (1 for plotting and 1 for text)
counts=pd.DataFrame(df,columns=['maxcount','mincount'])
max_min=pd.DataFrame(df,columns=['max','min'])
#plot the counts
ax=counts[counts.columns].plot(kind='bar',colormap='Paired',figsize= (12,4))
#using zip() and ax.annotate specify where (location by means of z)
#and what (max_min or counts) you want to plot
for x,y,z in zip(max_min.iloc[:,0].values,counts.iloc[:,0].values, range(len(counts))):
ax.annotate('%.d' % x, (z-0.2, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z-0.1, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
for x,y,z in zip(max_min.iloc[:,1].values,counts.iloc[:,1].values, range(len(counts))):
ax.annotate('%.d' % x, (z+0.1, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z+0.2, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
This is the output:

Categories