I'm trying to show two figures for every x in my boxplot next to each other. But my code keeps the figures on top of each other. I cannot figure out how to fix this because I'm using three different separated data-frames (org_data, holiday_false and holiday_true). Please help.
data.csv:
weekday | holiday | casual | registered
---------------------------------------
0 1 500 153
2 0 412 654
6 1 846 113
2 0 456 121
3 0 124 654
... ... ... ...
... ... ... ...
code:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
plt.style.use('seaborn-notebook')
%matplotlib inline
fig, axes = plt.subplots(figsize=(16, 7))
org_data = pd.read_csv("data.csv")
holiday_false = org_data[(org_data["holiday"] == 0) & (org_data["weekday"] != 0) & (org_data["weekday"] != 6)]
holiday_true = org_data[(org_data["holiday"] == 1) | (org_data["weekday"] == 0) | (org_data["weekday"] == 6)]
ax1 = plt.subplot(121)
sns.boxplot(x=org_data["weekday"], y=holiday_false["casual"], color="orange")
sns.boxplot(x=org_data["weekday"], y=holiday_true["casual"], color="skyblue")
ax1.set_title("Nuber of Casual Users on Holidays and Non-Holidays")
ax1.set_xlabel("Days")
ax1.set_ylabel("Number of Casual Users")
ax2 = plt.subplot(122)
sns.boxplot(x=org_data["weekday"], y=holiday_false["registered"], width=0.50, color="orange")
sns.boxplot(x=org_data["weekday"], y=holiday_true["registered"], width=0.50, color="skyblue")
ax2.set_title("Number of Registered Users on Holidays and Non-Holidays")
ax2.set_xlabel("Days")
ax2.set_ylabel("Number of Registered Users")
plt.show()
the type of chart I get:
the chart I want:
Related
I have a problem with my plotting.
I want to plot multiple meshes in one graph, and each mesh is marked by label.
This is what the data looks like:
I only could plot 1 mesh. Please help.
this is my code (just one mesh) :
import numpy as np
import pandas as pd
import plotly.graph_objects as go
geob_data = pd.read_csv("Geobody.csv")
x = list(geob_data["X"])
y = list(geob_data["Y"])
z = list(geob_data["Z"])
label = list(geob_data["LABEL"])
fig = go.Figure(data=[go.Mesh3d(x=x, y=y, z=z, color='green',
opacity=1, alphahull=0)])
fig.show()
Your question was code with the understanding that you want to draw two meshes on a 3D graph. The key is to extract and add a graph for each label.
import pandas as pd
import io
data = '''
X Y Z LABEL
500 500 -200 1
500 500 -180 1
505 505 -190 1
495 495 -190 1
495 505 -190 1
505 495 -190 1
400 400 -150 2
400 400 -130 2
405 405 -140 2
395 395 -140 2
395 405 -140 2
405 395 -140 2
'''
geob_data = pd.read_csv(io.StringIO(data), delim_whitespace=True)
import plotly.graph_objects as go
#geob_data = pd.read_csv("Geobody.csv")
x = list(geob_data["X"])
y = list(geob_data["Y"])
z = list(geob_data["Z"])
label = list(geob_data["LABEL"])
fig = go.Figure()
for lbl in geob_data['LABEL'].unique():
df = geob_data.query('LABEL == #lbl')
colors = 'green' if lbl == 1 else 'red'
fig.add_trace(go.Mesh3d(x=df['X'].tolist(),
y=df['Y'].tolist(),
z=df['Z'].tolist(),
color=colors,
opacity=1,
alphahull=0
))
fig.update_layout(
autosize=False,
height=600,
width=600,
)
fig.show()
I have a table like this:
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
df
Out:
Category Product QTY
0 Toys AA 100
1 Toys BB 200
2 Toys CC 300
3 Toys DD 50
4 Food SSS 20
5 Food DDD 800
6 Food FFF 300
7 Food RRR 450
8 Food EEE 150
9 Food WWW 320
10 Food LLLLL 400
11 Food PPPPP 1000
12 Furniture LPO 150
13 Furniture NHY 900
14 Furniture MKO 1150
So, I need to make bars subplots like this (Sum Products in each Category):
My problem is that I can't figure out how to combine categories, series, and aggregation.
I manage to split them into 3 subplots (1 always stays blank) but I can not unite them ...
import matplotlib.pyplot as plt
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
df['Category'].value_counts().plot.bar(
ax=axarr[0][0], fontsize=12, color='b'
)
axarr[0][0].set_title("Category", fontsize=18)
df['Product'].value_counts().plot.bar(
ax=axarr[1][0], fontsize=12, color='b'
)
axarr[1][0].set_title("Product", fontsize=18)
df['QTY'].value_counts().plot.bar(
ax=axarr[1][1], fontsize=12, color='b'
)
axarr[1][1].set_title("QTY", fontsize=18)
plt.subplots_adjust(hspace=.3)
plt.show()
Out
What do I need to add to combine them?
This would be a lot easier with seaborn and FacetGrid
import pandas as pd
import seaborn as sns
data = {'Category':["Toys","Toys","Toys","Toys","Food","Food","Food","Food","Food","Food","Food","Food","Furniture","Furniture","Furniture"],
'Product':["AA","BB","CC","DD","SSS","DDD","FFF","RRR","EEE","WWW","LLLLL","PPPPPP","LPO","NHY","MKO"],
'QTY':[100,200,300,50,20,800,300,450,150,320,400,1000,150,900,1150]}
df = pd.DataFrame(data)
g = sns.FacetGrid(df, col='Category', sharex=False, sharey=False, col_wrap=2, height=3, aspect=1.5)
g.map_dataframe(sns.barplot, x='Product', y='QTY')
I will like to know how I can go about plotting a barchart with upper and lower limits of the bins represented by the values in the age_classes column of the dataframe shown below with pandas, seaborn or matplotlib. A sample of the dataframe looks like this:
age_classes total_cases male_cases female_cases
0 0-9 693 381 307
1 10-19 931 475 454
2 20-29 4530 1919 2531
3 30-39 7466 3505 3885
4 40-49 13701 6480 7130
5 50-59 20975 11149 9706
6 60-69 18089 11761 6254
7 70-79 19238 12281 6868
8 80-89 16252 8553 7644
9 >90 4356 1374 2973
10 Unknown 168 84 81
If you want a chart like this:
then you can make it with sns.barplot setting age_classes as x and one columns (in my case total_cases) as y, like in this code:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('data.csv')
fig, ax = plt.subplots()
sns.barplot(ax = ax,
data = df,
x = 'age_classes',
y = 'total_cases')
plt.show()
max min mincount maxcount
0 12 10 1 6
1 21 14 1 6
2 34 19 1 6
3 6 20 1 4
4 8 22 1 4
5 41 23 1 4
this is pandas DataFrame.
so I want like this image.
enter image description here
text label is very important.
here my code
df = pd.DataFrame({'maxcount': max_count, 'mincount': min_count, 'max': max, 'min': min})
ax = df[['maxcount', 'mincount']].plot(kind='bar')
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
#create your dataframe
d= {'max':[12,21,34,6,8,41],'min':[10,14,19,20,22,23],
'mincount':[1,1,1,1,1,1],'maxcount':[6,6,6,4,4,4]}
df=pd.DataFrame(d)
#create 2 dataframes counts and max_min (1 for plotting and 1 for text)
counts=pd.DataFrame(df,columns=['maxcount','mincount'])
max_min=pd.DataFrame(df,columns=['max','min'])
#plot the counts
ax=counts[counts.columns].plot(kind='bar',colormap='Paired',figsize= (12,4))
#using zip() and ax.annotate specify where (location by means of z)
#and what (max_min or counts) you want to plot
for x,y,z in zip(max_min.iloc[:,0].values,counts.iloc[:,0].values, range(len(counts))):
ax.annotate('%.d' % x, (z-0.2, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z-0.1, counts.iloc[z,0]), va='bottom', ha='center', fontsize=10)
for x,y,z in zip(max_min.iloc[:,1].values,counts.iloc[:,1].values, range(len(counts))):
ax.annotate('%.d' % x, (z+0.1, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
ax.annotate("("'%.d' % y+")", (z+0.2, counts.iloc[z,1]), va='bottom', ha='center', fontsize=10)
This is the output:
Here is how my dataframe looks like:
year item_id sales_quantity
2014 1 10
2014 1 4
... ... ...
2015 1 7
2015 1 10
... ... ...
2014 2 1
2014 2 8
... ... ...
2015 2 17
2015 2 30
... ... ...
2014 3 9
2014 3 18
... ... ...
For each item_id, I want to plot a boxplot showing the distribution for each year.
Here is what I tried:
data = pd.DataFrame.from_csv('electronics.csv')
grouped = data.groupby(['year'])
ncols=4
nrows = int(np.ceil(grouped.ngroups/ncols))
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(35,45),
sharey=False)
for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
grouped.get_group(key).boxplot(x='year', y='sales_quantity',
ax=ax, label=key)
I get the error boxplot() got multiple values for argument 'x'. Can someone please tell me how to do this right?
If I have only a single item, then the following works
sns.boxplot(data.sales_quantity, groupby = data.year). How could I extend it for multiple items?
Link to csv
Please check comment on the code.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(1, len(df['item_id_copy'].unique()), sharey=True)
for n, i in enumerate(df['item_id_copy'].unique()):
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns], rotation=45)
axes[n].set_ylim(0, 1) # You should disable this line to specify outlier properly. (but I didn't to show you a normal graph)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('electronics_157_3cols.csv')
print(df)
fig, axes = plt.subplots(2, 5, sharey=True)
gen_n = (n for n in range(1, 11))
gen_i = (i for i in df['item_id_copy'].unique())
for r in range(2):
for c in range(5):
n = gen_n.__next__()
i = gen_i.__next__()
idf = df[df['item_id_copy'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[r][c])
axes[r][c].set_title('ID {}'.format(i))
axes[r][c].set_xticklabels([e[1] for e in idf.columns], rotation=0)
axes[r][c].set_ylim(0, 1)
plt.show()
I will leave this simple version for others...
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_table('sample.txt', delimiter='\s+')
fig, axes = plt.subplots(1, 3, sharey=True)
for n, i in enumerate(df['item_id'].unique()):
idf = df[df['item_id'] == int('{}'.format(i))][['year', 'sales_quantity']].pivot(columns='year')
print(idf)
idf.plot.box(ax=axes[n])
axes[n].set_title('Item ID {}'.format(i))
axes[n].set_xticklabels([e[1] for e in idf.columns])
plt.show()
sample.txt
year item_id sales_quantity
2014 1 10
2014 1 4
2015 1 7
2015 1 10
2014 2 1
2014 2 8
2015 2 17
2015 2 30
2014 3 9
2014 3 18