Related
Below is the data that is used to create the histogram subplot charts in ploty express graph objects.
Below code is used to create histogram subplot charts in ploty express graph objects.
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
specs = [[{'type':'histogram'}, {'type':'histogram'},{'type':'histogram'}]]
fig = make_subplots(rows=1, cols=3, specs=specs, subplot_titles=['<b> Millenials </b>',
'<b> Generation X </b>',
'<b> Boomers </b>'])
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Millenials']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,1)
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Generation X']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,2)
fig.add_trace(go.Histogram(
x=df[df['Generation']=='Boomers']['NumCompaniesWorked'],
opacity = 0.5,
marker_color = ['#455f66'] * 15
),1,3)
fig.update_layout(
showlegend=False,
title=dict(text="<b> Histogram - <br> <span style='color: #f55142'> How to add the box plot and mean vertical line on each diagram </span></b> ",
font=dict(
family="Arial",
size=20,
color='#283747')
))
fig.show()
And below is the output I get from the above code
How can I include the mean (Average) vertical line in a histogram diagrams as the mean values are,
Millenials = 2.2
Generation X = 3.4
Boomers = 4.1
and a box plot above all 03 histogram diagrams.
Which should look like the shown diagram below for all 03 histogram diagrams.
import pandas as pd
import numpy as np
#original df
df = pd.DataFrame({'NumCompaniesWorked':list(range(10)),
'Millenials':[139,407,54,57,55,32,35,28,17,24],
'Generation X':[53,108,83,90,70,27,32,40,26,24],
'Boomers':[5,6,9,12,14,4,3,6,6,4]})
#reorganizing df
dfs = []
for col in ['Millenials', 'Generation X', 'Boomers']:
dfs.append(df[['NumCompaniesWorked', col]].rename(columns={col:'count'}).assign(Generation=col))
df = pd.concat(dfs)
#output
NumCompaniesWorked count Generation
0 0 139 Millenials
1 1 407 Millenials
2 2 54 Millenials
3 3 57 Millenials
4 4 55 Millenials
5 5 32 Millenials
6 6 35 Millenials
7 7 28 Millenials
8 8 17 Millenials
9 9 24 Millenials
0 0 53 Generation X
1 1 108 Generation X
2 2 83 Generation X
3 3 90 Generation X
4 4 70 Generation X
5 5 27 Generation X
6 6 32 Generation X
7 7 40 Generation X
8 8 26 Generation X
9 9 24 Generation X
0 0 5 Boomers
1 1 6 Boomers
2 2 9 Boomers
3 3 12 Boomers
4 4 14 Boomers
5 5 4 Boomers
6 6 3 Boomers
7 7 6 Boomers
8 8 6 Boomers
9 9 4 Boomers
fig = px.histogram(df,
x='NumCompaniesWorked',
y='count',
marginal='box',
facet_col='Generation')
fig.add_vline(x=2.2, line_width=1, line_dash='dash', line_color='gray', col=1)
fig.add_vline(x=3.4, line_width=1, line_dash='dash', line_color='gray', col=2)
fig.add_vline(x=4.1, line_width=1, line_dash='dash', line_color='gray', col=3)
fig.show()
I have a table like this:
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
df
Out:
Category Product QTY
0 Toys AA 100
1 Toys BB 200
2 Toys CC 300
3 Toys DD 50
4 Food SSS 20
5 Food DDD 800
6 Food FFF 300
7 Food RRR 450
8 Food EEE 150
9 Food WWW 320
10 Food LLLLL 400
11 Food PPPPP 1000
12 Furniture LPO 150
13 Furniture NHY 900
14 Furniture MKO 1150
So, I need to make bars subplots like this (Sum Products in each Category):
My problem is that I can't figure out how to combine categories, series, and aggregation.
I manage to split them into 3 subplots (1 always stays blank) but I can not unite them ...
import matplotlib.pyplot as plt
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
df['Category'].value_counts().plot.bar(
ax=axarr[0][0], fontsize=12, color='b'
)
axarr[0][0].set_title("Category", fontsize=18)
df['Product'].value_counts().plot.bar(
ax=axarr[1][0], fontsize=12, color='b'
)
axarr[1][0].set_title("Product", fontsize=18)
df['QTY'].value_counts().plot.bar(
ax=axarr[1][1], fontsize=12, color='b'
)
axarr[1][1].set_title("QTY", fontsize=18)
plt.subplots_adjust(hspace=.3)
plt.show()
Out
What do I need to add to combine them?
This would be a lot easier with seaborn and FacetGrid
import pandas as pd
import seaborn as sns
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
g = sns.FacetGrid(df, col='Category', sharex=False, sharey=False, col_wrap=2, height=3, aspect=1.5)
g.map_dataframe(sns.barplot, x='Product', y='QTY')
I have the following code
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots()
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
print(name)
print(group)
group.plot(x='date', y='force', label=name)
plt.show()
The result obtained is as follows:
bill
type num date pos force
6 bill 1006 2012 3 4
5 bill 1006 2013 3 12
1 bill 1004 2016 0 2
3 bill 1004 2017 4 10
4 bill 1006 2017 0 6
john
type num date pos force
8 john 1004 2012 9 8
7 john 1004 2013 8 7
2 john 1006 2015 1 7
0 john 1006 2017 0 5
[img1_force_Bill][1]
[img2_Force_john][2]
how can i get 4 Fig, in each one 2 lines:
Fig1 for bill: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig2 for bill: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)
Fig3 for john: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig4 for john: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)
Let's try this:
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots(2,2)
axi=iter(ax.flatten())
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
# print(name)
# print(group)
group.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', ax=next(axi), legend=False)
group.set_index(['date','num'])['pos'].unstack().plot(title=name+ ' - pos', ax=next(axi), legend=False)
plt.tight_layout()
plt.legend(loc='upper center', bbox_to_anchor=(0, -.5), ncol=2)
plt.show()
Output:
Update per comment below:
dfj = df[df['type'] == 'john']
ax = dfj.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', legend=False)
ax.axhline(y=dfj['force'].max(), color='red', alpha=.8)
Chart:
#Scott Boston
.... thank you alot for your help.
unfortunately after using the following code with big data to plot 2 lines
for name, group in grp_new:
axn= group.set_index(['date', 'num'])['pos'].unstack().plot(title= name+' _pos', legend=False)
the plot looks like plot2Lines .They are not continuous plots.I tried to plot single lines and it were ok.
max min mincount maxcount
0 12 10 1 6
1 21 14 1 6
2 34 19 1 6
3 6 20 1 4
4 8 22 1 4
5 41 23 1 4
this is pandas DataFrame.
so I want like this image.
enter image description here
text label is very important.
here my code
df = pd.DataFrame({'maxcount': max_count, 'mincount': min_count, 'max': max, 'min': min})
ax = df[['maxcount', 'mincount']].plot(kind='bar')
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#create your dataframe
d= {'max':[12,21,34,6,8,41],'min':[10,14,19,20,22,23],
'mincount':[1,1,1,1,1,1],'maxcount':[6,6,6,4,4,4]}
df=pd.DataFrame(d)
#create 2 dataframes counts and max_min (1 for plotting and 1 for text)
counts=pd.DataFrame(df,columns=['maxcount','mincount'])
max_min=pd.DataFrame(df,columns=['max','min'])
#plot the counts
ax=counts[counts.columns].plot(kind='bar',colormap='Paired',figsize= (12,4))
#using zip() and ax.annotate specify where (location by means of z)
#and what (max_min or counts) you want to plot
for x,y,z in zip(max_min.iloc[:,0].values,counts.iloc[:,0].values, range(len(counts))):
ax.annotate('%.d' % x, (z-0.2, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z-0.1, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
for x,y,z in zip(max_min.iloc[:,1].values,counts.iloc[:,1].values, range(len(counts))):
ax.annotate('%.d' % x, (z+0.1, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z+0.2, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
This is the output:
Here is how my dataframe looks like:
year item_id sales_quantity
2014 1 10
2014 1 4
... ... ...
2015 1 7
2015 1 10
... ... ...
2014 2 1
2014 2 8
... ... ...
2015 2 17
2015 2 30
... ... ...
2014 3 9
2014 3 18
... ... ...
For each item_id, I want to plot a boxplot showing the distribution for each year.
Here is what I tried:
data = pd.DataFrame.from_csv('electronics.csv')
grouped = data.groupby(['year'])
ncols=4
nrows = int(np.ceil(grouped.ngroups/ncols))
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(35,45),
sharey=False)
for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
grouped.get_group(key).boxplot(x='year', y='sales_quantity',
ax=ax, label=key)
I get the error boxplot() got multiple values for argument 'x'. Can someone please tell me how to do this right?
If I have only a single item, then the following works
sns.boxplot(data.sales_quantity, groupby = data.year). How could I extend it for multiple items?
Link to csv
Please check comment on the code.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(1, len(df['item_id_copy'].unique()), sharey=True)
for n, i in enumerate(df['item_id_copy'].unique()):
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns], rotation=45)
axes[n].set_ylim(0, 1) # You should disable this line to specify outlier properly. (but I didn't to show you a normal graph)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(2, 5, sharey=True)
gen_n = (n for n in range(1, 11))
gen_i = (i for i in df['item_id_copy'].unique())
for r in range(2):
for c in range(5):
n = gen_n.__next__()
i = gen_i.__next__()
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[r][c])
axes[r][c].set_title('ID {}'.format(i))
axes[r][c].set_xticklabels([e[1] for e in idf.columns], rotation=0)
axes[r][c].set_ylim(0, 1)
plt.show()
I will leave this simple version for others...
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_table('sample.txt', delimiter='\s+')
fig, axes = plt.subplots(1, 3, sharey=True)
for n, i in enumerate(df['item_id'].unique()):
idf = df[df['item_id'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('Item ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns])
plt.show()
sample.txt
year item_id sales_quantity
2014 1 10
2014 1 4
2015 1 7
2015 1 10
2014 2 1
2014 2 8
2015 2 17
2015 2 30
2014 3 9
2014 3 18