I have the following data frame:
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA_found': [10, 40, 50, 20, 50, 70],
'valueA_total': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01', '2017-01-01', '2017-02-01', '2017-03-01']})
and can create a plot:
fig, ax = plt.subplots(figsize=(15,8))
sns.set_style("whitegrid")
g = sns.barplot(x="date", y="valueA_found", hue="group", data=df)
# g.set_yscale('log')
g.set_xticklabels(df.date, rotation=45)
g.set(xlabel='date', ylabel='value from total')
But, I would rather like to see below per each point in time:
as you can see per each model valueA_found is plotted as a bar and the total is plotted as a single bar.
Initially suggested, it would also be possible to plot the total as a line - but as outlined in the comments it is probably better to produce a bar as well. valueA_total i.e. the total should be the same per group per month.
An option might be to plot the total values in a desaturated/more transparent bar plot behind the first dataset.
import matplotlib.pyplot as plt
import pandas as pd
import seaborn.apionly as sns
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA': [10, 40, 50, 20, 50, 70],
'valueB': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01',
'2017-01-01', '2017-02-01', '2017-03-01']})
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(x="date", y="valueB", hue="group", data=df,
ax=ax, palette={"Red":"#f3c4c4","Blue":"#c5d6f2" }, alpha=0.6)
sns.barplot(x="date", y="valueA", hue="group", data=df,
ax=ax, palette={"Red":"#d40000","Blue":"#0044aa" })
ax.set_xticklabels(df.date, rotation=45)
ax.set(xlabel='date', ylabel='value from total')
plt.show()
Or just putting one bar plot in the background, assuming that the totals of each group are always the same:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn.apionly as sns
df = pd.DataFrame({'group': ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Blue'],
'valueA': [10, 40, 50, 20, 50, 70],
'valueB': [100,200, 210, 100, 200, 210],
'date': ['2017-01-01', '2017-02-01', '2017-03-01',
'2017-01-01', '2017-02-01', '2017-03-01']})
fig, ax = plt.subplots(figsize=(6,4))
sns.barplot(x="date", y="valueB", data=df[df.group=="Red"],
ax=ax, color="#e7e2e8", label="total")
sns.barplot(x="date", y="valueA", hue="group", data=df,
ax=ax, palette={"Red":"#d40000","Blue":"#0044aa" })
ax.set_xticklabels(df.date, rotation=45)
ax.set(xlabel='date', ylabel='value from total')
plt.show()
Related
I have multiple data frames with different no. of rows and same no.of columns i.e
DATA
female_df1 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
female_df2 = pd.DataFrame({'ID': [75,1,7], 'value': [39, 66.7, 77.9]})
female_df3 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
female_df4 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
male_df1 = pd.DataFrame({'ID': [35,1,7], 'value': [15, 36.7, 87.9]})
male_df2 = pd.DataFrame({'ID': [5,11,17], 'value': [99, 96.7, 97.9]})
male_df3 = pd.DataFrame({'ID': [35,41,37], 'value': [15, 16.7, 17.9]})
male_df4 = pd.DataFrame({'ID': [51,11,27], 'value': [35, 36.7, 37.9]})
Now, I would like to plot a single boxplot from above multiple df's. I used below code to do so
fig, ax2 = plt.subplots(figsize = (15,10))
vec = [female_df1['value'].values,female_df2['value'].values,female_df3['value'].values,female_df4['value'].values]
labels = ['f1','f2','f3', 'f4']
ax2.boxplot(vec, labels = labels)
plt.show()
The Output in female values boxplot, now similarly I have Male data frames with values, and I want to plot side by side (i.e fbeta1.0 and mbeta1.0) to observe the difference in data distribution. Valuable insights much appreciated
Desired Output plot:
Desired Output
This is a bit manual, but should do what you need...
### DATA ###
female_df1 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
female_df2 = pd.DataFrame({'ID': [75,1,7], 'value': [39, 66.7, 77.9]})
female_df3 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
female_df4 = pd.DataFrame({'ID': [5,21,17], 'value': [85, 56.7, 77.9]})
male_df1 = pd.DataFrame({'ID': [35,1,7], 'value': [15, 36.7, 87.9]})
male_df2 = pd.DataFrame({'ID': [5,11,17], 'value': [99, 96.7, 97.9]})
male_df3 = pd.DataFrame({'ID': [35,41,37], 'value': [15, 16.7, 17.9]})
male_df4 = pd.DataFrame({'ID': [51,11,27], 'value': [35, 36.7, 37.9]})
### PLOTTING ###
fig, ax = plt.subplots(1,4, figsize = (15,6))
ax[0].boxplot([female_df1['value'].values, male_df1['value'].values], labels = ['f1','m1'])
ax[1].boxplot([female_df2['value'].values, male_df2['value'].values], labels = ['f1','m1'])
ax[2].boxplot([female_df3['value'].values, male_df3['value'].values], labels = ['f1','m1'])
ax[3].boxplot([female_df4['value'].values, male_df4['value'].values], labels = ['f1','m1'])
ax[0].set_title("M1 & F1")
ax[1].set_title("M2 & F2")
ax[2].set_title("M3 & F3")
ax[3].set_title("M4 & F4")
plt.show()
Plot
The following code uses colwidths to adjust the cell's width:
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['FangSong']
mpl.rcParams['axes.unicode_minus'] = False
labels = ['A难度水平', 'B难度水平', 'C难度水平', 'D难度水平']
students = [0.35, 0.15, 0.2, 0.3]
explode = [0.1, 0.1, 0.1, 0.1]
colors = ['r', 'y', 'b', 'gray']
plt.pie(students, autopct='%3.1f%%',
labels=labels, textprops={'fontsize': 12,
'family': 'FangSong',
'fontweight': 'bold'},
explode=explode, colors=colors)
studentValues = [['A', 'B', 'C', 'D'], [350, 150, 200, 300], ['test', 'test', 'test', 'test']]
cellcolors = [['r', 'y', 'b', 'gray'], ['b', 'gray', 'y', 'r'], ['gray', 'y', 'b', 'r']]
rowLabels = ['aaaaa','bbbbb','ccccc']
plt.table(cellText=studentValues,
cellColours=cellcolors,
cellLoc='center', colWidths=[0.1] * 4,
rowLabels=rowLabels)
plt.show()
How could I adjust the height of the cell inside the plt.table function?
Save the link to: plt.table. And adjust via 'scale'.
ytable = plt.table(cellText=studentValues,
cellColours=cellcolors,
cellLoc='center', colWidths=[0.1] * 4,
rowLabels=rowLabels)
ytable.scale(1, 1.0)
I have a problem. I want to show the two highest countries of each category. But unfortunately I only get the below output. However, I would like the part to be listed as an extra category.
Is there an option?
import pandas as pd
import seaborn as sns
d = {'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 's', 's']
}
df = pd.DataFrame(data=d)
print(df)
#print(df.sort_values('count', ascending=False).groupby('party').head(2))
ax = sns.barplot(x="country", y="count", hue='part',
data=df.sort_values('count', ascending=False).groupby('part').head(2), palette='GnBu')
What I got
What I want
You can always not use seaborn and plot everything in matplotlib directly.
from matplotlib import pyplot as plt
import pandas as pd
plt.style.use('seaborn')
df = pd.DataFrame({
'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 'b', 's']
})
fig, ax = plt.subplots()
offset = .2
xticks, xlabels = [], []
handles, labels = [], []
for i, (idx, group) in enumerate(df.groupby('part')):
plot_data = group.nlargest(2, 'count')
x = [i - offset, i + offset]
barcontainer = ax.bar(x=x, height=plot_data['count'], width=.35)
xticks += x
xlabels += plot_data['country'].tolist()
handles.append(barcontainer[0])
labels.append(idx)
ax.set_xticks(xticks)
ax.set_xticklabels(xlabels)
ax.legend(handles=handles, labels=labels, title='Part')
plt.show()
The following approach creates a FacetGrid for your data. Seaborn 11.2 introduced the helpful g.axes_dict. (In the example data I changed the second entry for 'BG' to 'b', supposing that each country/part combination only occurs once, as in the example plots).
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
d = {'count': [50, 20, 30, 100, 3, 40, 5],
'country': ['DE', 'CN', 'CN', 'BG', 'PL', 'BG', 'RU'],
'part': ['b', 'b', 's', 's', 'b', 'b', 's']
}
df = pd.DataFrame(data=d)
sns.set()
g = sns.FacetGrid(data=df, col='part', col_wrap=2, sharey=True, sharex=False)
for part, df_part in df.groupby('part'):
order = df_part.nlargest(2, 'count')['country']
ax = sns.barplot(data=df_part, x='country', y='count', order=order, palette='summer', ax=g.axes_dict[part])
ax.set(xlabel=f'part = {part}')
g.set_ylabels('count')
plt.tight_layout()
plt.show()
import pandas as pd
import numpy as np
np.random.seed(365)
rows = 100
data = {'Month': np.random.choice(['2014-01', '2014-02', '2014-03', '2014-04'], size=rows),
'Code': np.random.choice(['A', 'B', 'C'], size=rows),
'ColA': np.random.randint(5, 125, size=rows),
'ColB': np.random.randint(0, 51, size=rows),}
df = pd.DataFrame(data)
df = df[((~((df.Code=='A')&(df.Month=='2014-04')))&(~((df.Code=='C')&(df.Month=='2014-03'))))]
dfg = df.groupby(['Code', 'Month']).sum()
For above. I wish to plot a stacked plot..
dfg.unstack(level=0).plot(kind='bar', stacked =True)
I wish to stack over 'Code' column. But, above is stacking over 'Month' Why?. How to better plot stacked plot with this?
The index of the input dataframe is used by default as x-value in plot.bar
IIUC, you need:
dfg.unstack(level=1).plot(kind='bar', stacked=True)
legend position:
ax = dfg.unstack(level=1).plot(kind='bar', stacked=True, legend=False)
ax.figure.legend(loc='center left', bbox_to_anchor=(1, 0.5))
I'm new to Python, Pandas and Plotly so maybe the answer is easy but I couldn't find anything on the forum or anywhere else …
I don’t want to use Dash nor ipywidgets since I want to be able to export in HTML using plotly.offline.plot (I need an interactive HTML file to dynamically control the figure without any server running like Dash seems to do).
Well my problem is that I would like to filter a plotly figure using several (cumulative) dropdown buttons (2 in this example, but it could be more) by filtering the original data with the selected value in the dropdown lists.
num label color value
1 A red 0.4
2 A blue 0.2
3 A green 0.3
4 A red 0.6
5 A blue 0.7
6 A green 0.4
7 B blue 0.2
8 B green 0.4
9 B red 0.4
10 B green 0.2
11 C red 0.1
12 C blue 0.3
13 D red 0.8
14 D blue 0.4
15 D green 0.6
16 D yellow 0.5
In this example, if I choose label ‘A’ and color ‘red’ I would like to display ONLY the values of rows with label ‘A’ AND color ‘red’, as follow :
num label color value
1 A red 0.4
4 A red 0.6
Then, the figure should display only 2 values
1) So here is the code I have for the moment (see below) but I don’t know how to continue. Do you have any idea ?
2) Extra question : is it possible to use checkboxes instead of dropdown lists, to be able to select multiple values inside a criteria, for example : Labels filter could be A or B, not only one in the list …
Thanks in advance for your help !
import pandas as pd
import plotly.graph_objects as go
d = {
'num' : [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
'label' : ['A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C', 'D', 'D', 'D', 'D'],
'color' : ['red', 'blue', 'green', 'red', 'blue', 'green', 'blue', 'green', 'red', 'green', 'red', 'blue', 'red', 'blue', 'green', 'yellow'],
'value' : [0.4, 0.2, 0.3, 0.6, 0.7, 0.4, 0.2, 0.4, 0.4, 0.2, 0.1, 0.3, 0.8, 0.4, 0.6, 0.5]
}
# Build dataframe
df = pd.DataFrame(data=d)
# Build dropdown Labels
labels = df["label"].unique()
buttonsLabels = [dict(label = "All labels",
method = "restyle",
args = [{'y' : [df["value"] * 100]}] # or what else ?
)]
for label in labels:
buttonsLabels.append(dict(label = label,
method = "restyle",
visible = True,
#args = [{'y' : ??? }]
))
# Build dropdown Colors
colors = df["color"].unique()
buttonsColors = [dict(label = "All colors",
method = "restyle",
args = [{'y' : [df["value"] * 100]}] # or what else ?
)]
for color in colors:
buttonsColors.append(dict(label = color,
method = "restyle",
visible = True,
# args = [{'y' : ??? }]
))
# Display figure
fig = go.Figure(data = [ go.Scatter(x = df["num"], y = df["value"] * 100 ) ])
fig.update_layout(updatemenus = [
dict(buttons = buttonsLabels, showactive = True),
dict(buttons = buttonsColors, showactive = True, y = 0.8)
])
fig.show()
It's certainly possible to display and filter a dataframe with multiple dropdowns. The code snippet below will do exactly that for you. The snippet has quite a few elements in common with your provided code, but I had to build it from scratch to make sure everything harmonized. Run the snippet below, and select A and Red to see that you will in fact get:
num label color value
1 A red 0.4
4 A red 0.6
Plot:
There's still room for improvement. I'll polish the code and improve the layout when I find the time. First, please let me know if this is in fact what you were looking for.
Complete code:
# Imports
import plotly.graph_objs as go
import pandas as pd
import numpy as np
# source data
df = pd.DataFrame({0: {'num': 1, 'label': 'A', 'color': 'red', 'value': 0.4},
1: {'num': 2, 'label': 'A', 'color': 'blue', 'value': 0.2},
2: {'num': 3, 'label': 'A', 'color': 'green', 'value': 0.3},
3: {'num': 4, 'label': 'A', 'color': 'red', 'value': 0.6},
4: {'num': 5, 'label': 'A', 'color': 'blue', 'value': 0.7},
5: {'num': 6, 'label': 'A', 'color': 'green', 'value': 0.4},
6: {'num': 7, 'label': 'B', 'color': 'blue', 'value': 0.2},
7: {'num': 8, 'label': 'B', 'color': 'green', 'value': 0.4},
8: {'num': 9, 'label': 'B', 'color': 'red', 'value': 0.4},
9: {'num': 10, 'label': 'B', 'color': 'green', 'value': 0.2},
10: {'num': 11, 'label': 'C', 'color': 'red', 'value': 0.1},
11: {'num': 12, 'label': 'C', 'color': 'blue', 'value': 0.3},
12: {'num': 13, 'label': 'D', 'color': 'red', 'value': 0.8},
13: {'num': 14, 'label': 'D', 'color': 'blue', 'value': 0.4},
14: {'num': 15, 'label': 'D', 'color': 'green', 'value': 0.6},
15: {'num': 16, 'label': 'D', 'color': 'yellow', 'value': 0.5},
16: {'num': 17, 'label': 'E', 'color': 'purple', 'value': 0.68}}
).T
df_input = df.copy()
# split df by labels
labels = df['label'].unique().tolist()
dates = df['num'].unique().tolist()
# dataframe collection grouped by labels
dfs = {}
for label in labels:
dfs[label]=pd.pivot_table(df[df['label']==label],
values='value',
index=['num'],
columns=['color'],
aggfunc=np.sum)
# find row and column unions
common_cols = []
common_rows = []
for df in dfs.keys():
common_cols = sorted(list(set().union(common_cols,list(dfs[df]))))
common_rows = sorted(list(set().union(common_rows,list(dfs[df].index))))
# find dimensionally common dataframe
df_common = pd.DataFrame(np.nan, index=common_rows, columns=common_cols)
# reshape each dfs[df] into common dimensions
dfc={}
for df_item in dfs:
#print(dfs[unshaped])
df1 = dfs[df_item].copy()
s=df_common.combine_first(df1)
df_reshaped = df1.reindex_like(s)
dfc[df_item]=df_reshaped
# plotly start
fig = go.Figure()
# one trace for each column per dataframe: AI and RANDOM
for col in common_cols:
fig.add_trace(go.Scatter(x=dates,
visible=True,
marker=dict(size=12, line=dict(width=2)),
marker_symbol = 'diamond',name=col
)
)
# menu setup
updatemenu= []
# buttons for menu 1, names
buttons=[]
# create traces for each color:
# build argVals for buttons and create buttons
for df in dfc.keys():
argList = []
for col in dfc[df]:
#print(dfc[df][col].values)
argList.append(dfc[df][col].values)
argVals = [ {'y':argList}]
buttons.append(dict(method='update',
label=df,
visible=True,
args=argVals))
# buttons for menu 2, colors
b2_labels = common_cols
# matrix to feed all visible arguments for all traces
# so that they can be shown or hidden by choice
b2_show = [list(b) for b in [e==1 for e in np.eye(len(b2_labels))]]
buttons2=[]
buttons2.append({'method': 'update',
'label': 'All',
'args': [{'visible': [True]*len(common_cols)}]})
# create buttons to show or hide
for i in range(0, len(b2_labels)):
buttons2.append(dict(method='update',
label=b2_labels[i],
args=[{'visible':b2_show[i]}]
)
)
# add option for button two to hide all
buttons2.append(dict(method='update',
label='None',
args=[{'visible':[False]*len(common_cols)}]
)
)
# some adjustments to the updatemenus
updatemenu=[]
your_menu=dict()
updatemenu.append(your_menu)
your_menu2=dict()
updatemenu.append(your_menu2)
updatemenu[1]
updatemenu[0]['buttons']=buttons
updatemenu[0]['direction']='down'
updatemenu[0]['showactive']=True
updatemenu[1]['buttons']=buttons2
updatemenu[1]['y']=0.6
fig.update_layout(showlegend=False, updatemenus=updatemenu)
fig.update_layout(yaxis=dict(range=[0,df_input['value'].max()+0.4]))
# title
fig.update_layout(
title=dict(
text= "<i>Filtering with multiple dropdown buttons</i>",
font={'size':18},
y=0.9,
x=0.5,
xanchor= 'center',
yanchor= 'top'))
# button annotations
fig.update_layout(
annotations=[
dict(text="<i>Label</i>", x=-0.2, xref="paper", y=1.1, yref="paper",
align="left", showarrow=False, font = dict(size=16, color = 'steelblue')),
dict(text="<i>Color</i>", x=-0.2, xref="paper", y=0.7, yref="paper",
align="left", showarrow=False, font = dict(size=16, color = 'steelblue')
)
])
fig.show()