Pandas Groubpy plotting with unstack() - python

I have the following code
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots()
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
print(name)
print(group)
group.plot(x='date', y='force', label=name)
plt.show()
The result obtained is as follows:
bill
type num date pos force
6 bill 1006 2012 3 4
5 bill 1006 2013 3 12
1 bill 1004 2016 0 2
3 bill 1004 2017 4 10
4 bill 1006 2017 0 6
john
type num date pos force
8 john 1004 2012 9 8
7 john 1004 2013 8 7
2 john 1006 2015 1 7
0 john 1006 2017 0 5
[img1_force_Bill][1]
[img2_Force_john][2]
how can i get 4 Fig, in each one 2 lines:
Fig1 for bill: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig2 for bill: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)
Fig3 for john: line1(x=date , y= force) for num(1004)/
line2(x=date , y= force) for num(1006)
Fig4 for john: line1(x=date , y= pos) for num(1004)/
line2(x=date , y= pos) for num(1006)

Let's try this:
df = pd.DataFrame({
'type':['john','bill','john','bill','bill','bill','bill','john','john'],
'num':[1006,1004,1006,1004,1006,1006,1006,1004,1004],
'date':[2017,2016,2015,2017,2017,2013,2012,2013,2012],
'pos':[0,0,1,4,0,3,3,8,9],
'force':[5,2,7,10,6,12,4,7,8]})
fig, ax = plt.subplots(2,2)
axi=iter(ax.flatten())
grp=df.sort_values('date').groupby(['type'])
for name, group in grp :
# print(name)
# print(group)
group.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', ax=next(axi), legend=False)
group.set_index(['date','num'])['pos'].unstack().plot(title=name+ ' - pos', ax=next(axi), legend=False)
plt.tight_layout()
plt.legend(loc='upper center', bbox_to_anchor=(0, -.5), ncol=2)
plt.show()
Output:
Update per comment below:
dfj = df[df['type'] == 'john']
ax = dfj.set_index(['date','num'])['force'].unstack().plot(title=name+' - force', legend=False)
ax.axhline(y=dfj['force'].max(), color='red', alpha=.8)
Chart:

#Scott Boston
.... thank you alot for your help.
unfortunately after using the following code with big data to plot 2 lines
for name, group in grp_new:
axn= group.set_index(['date', 'num'])['pos'].unstack().plot(title= name+' _pos', legend=False)
the plot looks like plot2Lines .They are not continuous plots.I tried to plot single lines and it were ok.

Related

Python Matplotlib bars subplots by Category and Aggregation

I have a table like this:
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
df
Out:
Category Product QTY
0 Toys AA 100
1 Toys BB 200
2 Toys CC 300
3 Toys DD 50
4 Food SSS 20
5 Food DDD 800
6 Food FFF 300
7 Food RRR 450
8 Food EEE 150
9 Food WWW 320
10 Food LLLLL 400
11 Food PPPPP 1000
12 Furniture LPO 150
13 Furniture NHY 900
14 Furniture MKO 1150
So, I need to make bars subplots like this (Sum Products in each Category):
My problem is that I can't figure out how to combine categories, series, and aggregation.
I manage to split them into 3 subplots (1 always stays blank) but I can not unite them ...
import matplotlib.pyplot as plt
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
df['Category'].value_counts().plot.bar(
ax=axarr[0][0], fontsize=12, color='b'
)
axarr[0][0].set_title("Category", fontsize=18)
df['Product'].value_counts().plot.bar(
ax=axarr[1][0], fontsize=12, color='b'
)
axarr[1][0].set_title("Product", fontsize=18)
df['QTY'].value_counts().plot.bar(
ax=axarr[1][1], fontsize=12, color='b'
)
axarr[1][1].set_title("QTY", fontsize=18)
plt.subplots_adjust(hspace=.3)
plt.show()
Out
What do I need to add to combine them?
This would be a lot easier with seaborn and FacetGrid
import pandas as pd
import seaborn as sns
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
g = sns.FacetGrid(df, col='Category', sharex=False, sharey=False, col_wrap=2, height=3, aspect=1.5)
g.map_dataframe(sns.barplot, x='Product', y='QTY')

[Python3]How to use Seaborn/Matplotlib to graph pandas dataframe

I'm still having troubles to do this
Here is how my data looks like:
date positive negative neutral
0 2015-09 23 6 18
1 2016-04 709 288 704
2 2016-08 1478 692 1750
3 2016-09 1881 926 2234
4 2016-10 3196 1594 3956
in my csv file I don't have those 0-4 indexes, but only 4 columns from 'date' to 'neutral'.
I don't know how to fix my codes to get it look like this
Seaborn code
sns.set(style='darkgrid', context='talk', palette='Dark2')
fig, ax = plt.subplots(figsize=(8, 8))
sns.barplot(x=df['positive'], y=df['negative'], ax=ax)
ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
ax.set_ylabel("Percentage")
plt.show()
To do this in seaborn you'll need to transform your data into long format. You can easily do this via melt:
plotting_df = df.melt(id_vars="date", var_name="sign", value_name="percentage")
print(plotting_df.head())
date sign percentage
0 2015-09 positive 23
1 2016-04 positive 709
2 2016-08 positive 1478
3 2016-09 positive 1881
4 2016-10 positive 3196
Then you can plot this long-format dataframe with seaborn in a straightforward mannter:
sns.set(style='darkgrid', context='talk', palette='Dark2')
fig, ax = plt.subplots(figsize=(8, 8))
sns.barplot(x="date", y="percentage", ax=ax, hue="sign", data=plotting_df)
Based on the data you posted
sns.set(style='darkgrid', context='talk', palette='Dark2')
# fig, ax = plt.subplots(figsize=(8, 8))
df.plot(x="date",y=["positive","neutral","negative"],kind="bar")
plt.xticks(rotation=-360)
# ax.set_xticklabels(['Negative', 'Neutral', 'Positive'])
# ax.set_ylabel("Percentage")
plt.show()

Drawing of Cluster Column Graph in Matplotlib

Can anyone explain how can I draw a cluster column chart exactly like this in Matplotlib? I found some similar graphs but I want exactly the graph as shown. I have fruit names such as apples and pears etc as keys and their sale in years as values of these keys.
The following code first creates some toy data and then uses matplotlib to draw a bar plot.
import matplotlib.pyplot as plt
from matplotlib.transforms import blended_transform_factory
from matplotlib.ticker import MultipleLocator
import numpy as np
import pandas as pd
import seaborn as sns
fruits = ['apples', 'pears', 'nectarines', 'plums', 'grapes', 'strawberries']
years = [2015, 2016, 2017]
num_fruit = len(fruits)
num_years = len(years)
df = pd.DataFrame({'fruit': np.tile(fruits, num_years),
'year': np.repeat(years, num_fruit),
'value': np.random.randint(1, 8, num_fruit * num_years)})
width = 0.8
for i, fruit in enumerate(fruits):
for j, year in enumerate(years):
plt.bar(i + width / num_years * (j - (num_years - 1) / 2),
df[(df['fruit'] == fruit) & (df['year'] == year)]['value'],
width / num_years, color='skyblue', ec='white')
plt.xticks([i + width / num_years * (j - (num_years - 1) / 2) for i in range(num_fruit) for j in range(num_years)],
np.tile(years, num_fruit), rotation=45)
ax = plt.gca()
ax.yaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.2))
ax.grid(True, axis='y')
ax.autoscale(False, axis='y')
trans = blended_transform_factory(ax.transData, ax.transAxes)
for i, fruit in enumerate(fruits):
ax.text(i, -0.2, fruit, transform=trans, ha='center')
if i != 0:
ax.vlines(i - 0.5, 0, -0.3, color='lightgrey', clip_on=False, transform=trans)
plt.tight_layout()
print(df)
plt.show()
For this example the data looked like:
fruit year value
0 apples 2015 1
1 pears 2015 3
2 nectarines 2015 6
3 plums 2015 3
4 grapes 2015 3
5 strawberries 2015 1
6 apples 2016 4
7 pears 2016 6
8 nectarines 2016 1
9 plums 2016 6
10 grapes 2016 4
11 strawberries 2016 5
12 apples 2017 3
13 pears 2017 6
14 nectarines 2017 7
15 plums 2017 3
16 grapes 2017 5
17 strawberries 2017 1

How to make bar graph of 2 variables based on same DataFrame and I want to choose 2 or until 5 data

I have a DataFrame:
wilayah branch Income Januari 2018 Income Januari 2019 Income Febuari 2018 Income Febuari 2019 Income Jan-Feb 2018 Income Jan-Feb 2019
1 sunarto 1000 1500 2000 3000 3333 4431
1 pemabuk 500 700 3000 3000 4333 5431
1 pemalas 2000 2200 4000 3000 5333 6431
1 hasuntato 9000 1200 6000 3000 2222 2121
1 sibodoh 1000 1500 3434 3000 2233 2121
...
My expectation to to create a bar graph where x axis is every name in branch (e.g sunarto, pemabuk, pemalas, etc), and y axis is income.
Let's say I will compare sunarto's income januari 2018 and income januari 2019, pemabuk's income januari 2018 and income januari 2019, and so on (1 name in x axis, 2 values as comparison of two values). Then I will sort values high to low value from Income Jan-Feb 2019 in my bar graph.
I tried:
import matplotlib.pyplot as plt
import pandas as pd
fig, ax = plt.subplots()
ax = df1[["Sunarto","Income Januari 2018", "Income Januari 2019"]].plot(x='branch', kind='bar', color=["g","b"],rot=45)
plt.show()
Consider a groupby aggregation then run DataFrame.plot. Below will line all branches on x-axis with different income columns as color_coded keys in legend.
agg_df = df.groupby('branch').sum()
fig, ax = plt.subplots(figsize=(15,5))
agg_df.plot(kind='bar', edgecolor='w', ax=ax, rot=22, width=0.5, fontsize = 15)
# ADD TITLES AND LABELS
plt.title('Income by Branches, Jan/Feb 2018-2019', weight='bold', size=24)
plt.xlabel('Branch', weight='bold', size=24)
plt.ylabel('Income', weight='bold', size=20)
plt.tight_layout()
plt.show()
plt.clf()
Should you want each separate branch plots on specific columns, iterate off a groupby list:
dfs = df.groupby('branch')
for i,g in dfs:
ord_cols = (pd.melt(g.drop(columns="wilayah"), id_vars = "branch")
.sort_values("value")["variable"].values
)
fig, ax = plt.subplots(figsize=(8,4))
(g.reindex(columns=ord_cols)
.plot(kind='bar', edgecolor='w', ax=ax, rot=0, width=0.5, fontsize = 15)
)
# ADD TITLES AND LABELS
plt.title('Income by {} Branch, Jan/Feb 2018-2019'.format(i),
weight='bold', size=16)
plt.xlabel('Branch', weight='bold', size=16)
plt.ylabel('Income', weight='bold', size=14)
plt.tight_layout()
plt.show()

Multiple boxplots based on pandas groups

Here is how my dataframe looks like:
year item_id sales_quantity
2014 1 10
2014 1 4
... ... ...
2015 1 7
2015 1 10
... ... ...
2014 2 1
2014 2 8
... ... ...
2015 2 17
2015 2 30
... ... ...
2014 3 9
2014 3 18
... ... ...
For each item_id, I want to plot a boxplot showing the distribution for each year.
Here is what I tried:
data = pd.DataFrame.from_csv('electronics.csv')
grouped = data.groupby(['year'])
ncols=4
nrows = int(np.ceil(grouped.ngroups/ncols))
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(35,45),
sharey=False)
for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
grouped.get_group(key).boxplot(x='year', y='sales_quantity',
ax=ax, label=key)
I get the error boxplot() got multiple values for argument 'x'. Can someone please tell me how to do this right?
If I have only a single item, then the following works
sns.boxplot(data.sales_quantity, groupby = data.year). How could I extend it for multiple items?
Link to csv
Please check comment on the code.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(1, len(df['item_id_copy'].unique()), sharey=True)
for n, i in enumerate(df['item_id_copy'].unique()):
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns], rotation=45)
axes[n].set_ylim(0, 1) # You should disable this line to specify outlier properly. (but I didn't to show you a normal graph)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(2, 5, sharey=True)
gen_n = (n for n in range(1, 11))
gen_i = (i for i in df['item_id_copy'].unique())
for r in range(2):
for c in range(5):
n = gen_n.__next__()
i = gen_i.__next__()
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[r][c])
axes[r][c].set_title('ID {}'.format(i))
axes[r][c].set_xticklabels([e[1] for e in idf.columns], rotation=0)
axes[r][c].set_ylim(0, 1)
plt.show()
I will leave this simple version for others...
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_table('sample.txt', delimiter='\s+')
fig, axes = plt.subplots(1, 3, sharey=True)
for n, i in enumerate(df['item_id'].unique()):
idf = df[df['item_id'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('Item ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns])
plt.show()
sample.txt
year item_id sales_quantity
2014 1 10
2014 1 4
2015 1 7
2015 1 10
2014 2 1
2014 2 8
2015 2 17
2015 2 30
2014 3 9
2014 3 18

Categories