How to plot all the xaxis values in a df.plot? - python

I have this df:
CODE MONTH PP
24 000136 Enero 57.9
25 000136 Febrero 124.3
26 000136 Marzo 147.7
27 000136 Abril 71.5
28 000136 Mayo 13.2
... ... ...
I'm plotting the figure with this code:
fig = plt.figure('Graphic', figsize=(30,15), dpi=150)
ax1 = fig.add_axes([0.2, 0.25, 0.60, 0.60])
df.plot(x='MONTH',y='PP',style='--o',color='black',linewidth=8,marker='o',markersize=12, ax=ax1)
ax1.xaxis.set_major_locator(MaxNLocator(min_n_ticks=12))
I want to see all the 12 values of the months in my xaxis figure but i only got 6 of them in the figure.
I tried with ax1.xaxis.set_major_locator(MaxNLocator(min_n_ticks=12)) but it only plot the ticks, not the values in the xaxis.
Do you know how can i plot all the xaxis values?
I can only use a df.plot
Thanks in advance.

The x-axis scale is automatically displayed, so add the number of pieces you wish to display to the x-axis setting. This will display 12 months. The missing data is added as appropriate.
import matplotlib.pyplot as plt
fig = plt.figure('Graphic', figsize=(20,10), dpi=150)
ax1 = fig.add_axes([0.2, 0.25, 0.60, 0.60])
df.plot(x='MONTH',y='PP',style='--o',color='black',linewidth=8,markersize=12, xticks=range(12), ax=ax1)
plt.show()

Related

How to plot distributions for several bivariate groups of variable using Python

I am analysing data which is organised as following:
There are 4 different pandas data fram for each groups (A, B and C).
Each dataframe representing a group has 4 subroups (columns) and rows representing thoer corresponding observations.
For example, a single group of data looks like:
subgroup-1
subgroup-2
subgroup-3
subgroup-4
12
4
NaN
9
15
3
4
NaN
16
8
3
11
17
12
8
13
11
17
12
14
I want to visualise the distributions for each subgroup for the different group. Can anyone let me know what are the available options in Python to do this (the chart types I can use). Thanks.
I tried using histogram, density plots but all of them work only for 2 variables.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# pandas Dataframes
group_A = pd.DataFrame(np.random.rand(50, 4) , columns=['subgroup-1' , 'subgroup-2' , 'subgroup-3' , 'subgroup-4'])
group_B = pd.DataFrame(np.random.rand(50, 4) , columns=['subgroup-1' , 'subgroup-2' , 'subgroup-3' , 'subgroup-4'])
group_C = pd.DataFrame(np.random.rand(50, 4) , columns=['subgroup-1' , 'subgroup-2' , 'subgroup-3' , 'subgroup-4'])
def plot_hist(subgroup):
np.random.seed(19680801)
n_bins = 10
x = np.dstack([group_A[subgroup] , group_B[subgroup] , group_C[subgroup]])[0]
fig, axes = plt.subplots(nrows=2, ncols=2)
ax0, ax1, ax2, ax3 = axes.flatten()
ax0.hist(x, n_bins, density=True, histtype='bar', label = ['A', 'B', 'C'])
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')
ax1.hist(x, n_bins, density=True, histtype='bar', stacked=True)
ax1.set_title('stacked bar')
ax2.hist(x, n_bins, histtype='step', stacked=True, fill=False)
ax2.set_title('stack step (unfilled)')
# Make a multiple-histogram of data-sets with different length.
x_multi = [np.random.randn(n) for n in [10000, 5000, 2000]]
ax3.hist(x_multi, n_bins, histtype='bar')
ax3.set_title('different sample sizes')
fig.tight_layout()
plt.show()
plot_hist('subgroup-1')
reference

plot from pandas dataframe with negative and positive values

I have a dataframe which looks like this:
MM Initial Energy MM Initial Angle QM Energy QM Angle
0 13.029277 120.0 18.048 120.0
1 11.173115 125.0 15.250 125.0
2 9.411475 130.0 12.668 130.0
3 7.762888 135.0 10.309 135.0
4 6.239025 140.0 8.180 140.0
5 4.853004 145.0 6.286 145.0
6 3.617394 150.0 4.633 150.0
7 2.544760 155.0 3.226 155.0
8 1.646335 160.0 2.070 160.0
9 0.934298 165.0 1.166 165.0
10 0.419003 170.0 0.519 170.0
11 0.105913 175.0 0.130 175.0
12 0.000000 -180.0 0.000 -180.0
13 0.105988 -175.0 0.130 -175.0
14 0.420029 -170.0 0.519 -170.0
15 0.937312 -165.0 1.166 -165.0
16 1.650080 -160.0 2.070 -160.0
17 2.548463 -155.0 3.227 -155.0
18 3.621227 -150.0 4.633 -150.0
19 4.856266 -145.0 6.286 -145.0
20 6.236939 -140.0 8.180 -140.0
21 7.760035 -135.0 10.309 -135.0
22 9.409117 -130.0 12.669 -130.0
23 11.170671 -125.0 15.251 -125.0
24 13.033293 -120.0 18.048 -120.0
I want to plot the data with Angles on the x-axis and energy on the y. This sounds fairly simple, however what happens is that pandas or matplotlib sorts the X-axis values in a such a manner that my plot looks split. This is what it looks like:
However, this is how I want it:
My code is as follows:
df=pd.read_fwf('scan_c1c2c3h31_orig.txt', header=None, prefix='X')
df.rename(columns={'X0':'MM Initial Energy',
'X1':'MM Initial Angle',
'X2':'QM Energy', 'X3':'QM Angle'},
inplace=True)
df=df.sort_values(by=['MM Initial Angle'], axis=0, ascending=True)
df=df.reset_index(drop=False)
df2=pd.read_fwf('scan_c1c2c3h31.txt', header=None, prefix='X')
df2.rename(columns={'X0':'MM Energy',
'X1':'MM Angle',
'X2':'QM Energy', 'X3':'QM Angle'},
inplace=True)
df2=df2.sort_values(by=['MM Angle'], axis=0, ascending=True)
df2=df2.reset_index(drop=False)
df
df2
ax = plt.axes()
df.plot(y="MM Initial Energy", x="MM Initial Angle", color='red', linestyle='dashed',linewidth=2.0, ax=ax, fontsize=20, legend=True)
df2.plot(y="MM Energy", x="MM Angle", color='red', ax=ax, linewidth=2.0, fontsize=20, legend=True)
df2.plot(y="QM Energy", x="QM Angle", color='blue', ax=ax, linewidth=2.0, fontsize=20, legend=True)
plt.ylim(-0.05, 6)
ax.xaxis.set_major_locator(MultipleLocator(20))
ax.xaxis.set_minor_locator(MultipleLocator(10))
ax.yaxis.set_minor_locator(MultipleLocator(0.5))
plt.xlabel('Angles (Degrees)', fontsize=25)
plt.ylabel('Energy (kcal/mol)', fontsize=25)
What I am doing is, sorting the dataframe by 'MM Angles'/'MM Initial Angles' to avoid plot "scarambling" due to repeating values in the y-axis.The angles vary from -180 to 180, where I want the -180 and +180 next to each other.
I have tried sorting the negative values in ascending order and positive values in descending order as suggested in this post, but I still get the same plot where x axis ranges from -180 to +180.
I have also tried matplotlib axis spines to recenter the plot, and I have also tried inverting the x-axis as suggested in this post, but still get the same plot. Additionally, I have also tried suggestion in this another post.
Any help will be appreciated.
If you don't need to rescale the plot, I would plot against the positive angles 0-360 and manually re-label the ticks:
fig, ax = plt.subplots()
(df.assign(Angle=df['MM Initial Angle']%360)
.plot(x='Angle', y=['QM Energy','MM Initial Energy'], ax=ax)
)
ax.xaxis.set_major_locator(MultipleLocator(20))
x_ticks = ax.get_xticks()
x_ticks = [t-360 if t>180 else t for t in x_ticks]
ax.set_xticklabels(x_ticks)
plt.plot()
Output:

Expand x axis when x is a string (make xlim wider)

I have the following pandas data frame:
print(so)
Time Minions Crime_rate
0 2018-01 1907 0.147352
1 2018-02 2094 0.165234
2 2018-03 2227 0.148181
3 2018-04 2101 0.135174
4 2018-05 2321 0.132271
5 2018-06 2208 0.128623
6 2018-07 2593 0.140378
7 2018-08 2660 0.145865
8 2018-09 2488 0.149920
9 2018-10 2640 0.152273
10 2018-11 2501 0.138345
11 2018-12 2379 0.134931
I want to plot Time on the x axis, Minions on the y axis and Crime_rate on a secondary y axis. The problem is that the x axis is cropped and I want to expand it. I tried the following code:
so.plot(x="Time", y="Minions", kind="bar", color="orange", legend=False)
plt.ylabel("Number of Minions")
so["Crime_rate"].plot(secondary_y=True, rot=90)
plt.ylabel("Minion crime rate")
plt.ylim(0, 1)
# plt.xlim(min, max)
plt.show()
The code returns the following plot:
I had done this before using plt.xlim(), but so["Time"] is a string, so I cannot subtract or add to the limits. How can I expand the x axis limits to show the first and last bars?
I couldn't find a solution that involves keeping the x axis as a string. To solve this, I had to avoid setting the x axis and then overwriting its values using set_xticklabels().
fig, ax1 = plt.subplots()
ax1 = so["Minions"].plot(ax=ax1, kind="bar", color="orange", legend=False)
ax2 = ax1.twinx()
so["Crime_rate"].plot(ax=ax2, legend=False)
ax1.set_ylabel("Minions")
ax1.set_xlabel("Time")
ax2.set_ylabel("Minion crime rate")
ax2.set_xlim(-0.5, len(so) - 0.5) # extend the x axis by 0.5 to the left and 0.5 to the right
ax2.set_ylim(0, 1)
ax2.set_xticklabels(so["Time"])
plt.show()
This works because I never set the x axis in ax1, so it was generically set to a [0, 1, 2, ..., 10, 11]. This way, I could set the x axis range from -0.5 to 11.5.

How to make bar graph of 2 variables based on same DataFrame and I want to choose 2 or until 5 data

I have a DataFrame:
wilayah branch Income Januari 2018 Income Januari 2019 Income Febuari 2018 Income Febuari 2019 Income Jan-Feb 2018 Income Jan-Feb 2019
1 sunarto 1000 1500 2000 3000 3333 4431
1 pemabuk 500 700 3000 3000 4333 5431
1 pemalas 2000 2200 4000 3000 5333 6431
1 hasuntato 9000 1200 6000 3000 2222 2121
1 sibodoh 1000 1500 3434 3000 2233 2121
...
My expectation to to create a bar graph where x axis is every name in branch (e.g sunarto, pemabuk, pemalas, etc), and y axis is income.
Let's say I will compare sunarto's income januari 2018 and income januari 2019, pemabuk's income januari 2018 and income januari 2019, and so on (1 name in x axis, 2 values as comparison of two values). Then I will sort values high to low value from Income Jan-Feb 2019 in my bar graph.
I tried:
import matplotlib.pyplot as plt
import pandas as pd
fig, ax = plt.subplots()
ax = df1[["Sunarto","Income Januari 2018", "Income Januari 2019"]].plot(x='branch', kind='bar', color=["g","b"],rot=45)
plt.show()
Consider a groupby aggregation then run DataFrame.plot. Below will line all branches on x-axis with different income columns as color_coded keys in legend.
agg_df = df.groupby('branch').sum()
fig, ax = plt.subplots(figsize=(15,5))
agg_df.plot(kind='bar', edgecolor='w', ax=ax, rot=22, width=0.5, fontsize = 15)
# ADD TITLES AND LABELS
plt.title('Income by Branches, Jan/Feb 2018-2019', weight='bold', size=24)
plt.xlabel('Branch', weight='bold', size=24)
plt.ylabel('Income', weight='bold', size=20)
plt.tight_layout()
plt.show()
plt.clf()
Should you want each separate branch plots on specific columns, iterate off a groupby list:
dfs = df.groupby('branch')
for i,g in dfs:
ord_cols = (pd.melt(g.drop(columns="wilayah"), id_vars = "branch")
.sort_values("value")["variable"].values
)
fig, ax = plt.subplots(figsize=(8,4))
(g.reindex(columns=ord_cols)
.plot(kind='bar', edgecolor='w', ax=ax, rot=0, width=0.5, fontsize = 15)
)
# ADD TITLES AND LABELS
plt.title('Income by {} Branch, Jan/Feb 2018-2019'.format(i),
weight='bold', size=16)
plt.xlabel('Branch', weight='bold', size=16)
plt.ylabel('Income', weight='bold', size=14)
plt.tight_layout()
plt.show()

Making Categorical or Grouped Bar Graph with secondary Axis Line Graph

I need to compare different sets of daily data between 4 shifts(categorical / groupby), using bar graphs and line graphs. I have looked everywhere and have not found a working solution for this that doesn't include generating new pivots and such.
I've used both, matplotlib and seaborn, and while I can do one or the other(different colored bars/lines for each shift), once I incorporate the other, either one disappears, or other anomalies happen like only one plot point shows. I have looked all over and there are solutions for representing a single series of data on both chart types, but none that goes into multi category or grouped for both.
Data Example:
report_date wh_id shift Head_Count UTL_R
3/17/19 55 A 72 25%
3/18/19 55 A 71 10%
3/19/19 55 A 76 20%
3/20/19 55 A 59 33%
3/21/19 55 A 65 10%
3/22/19 55 A 54 20%
3/23/19 55 A 66 14%
3/17/19 55 1 11 10%
3/17/19 55 2 27 13%
3/17/19 55 3 18 25%
3/18/19 55 1 23 100%
3/18/19 55 2 16 25%
3/18/19 55 3 12 50%
3/19/19 55 1 28 10%
3/19/19 55 2 23 50%
3/19/19 55 3 14 33%
3/20/19 55 1 29 25%
3/20/19 55 2 29 25%
3/20/19 55 3 10 50%
3/21/19 55 1 17 20%
3/21/19 55 2 29 14%
3/21/19 55 3 30 17%
3/22/19 55 1 12 14%
3/22/19 55 2 10 100%
3/22/19 55 3 17 14%
3/23/19 55 1 16 10%
3/23/19 55 2 11 100%
3/23/19 55 3 13 10%
tm_daily_df = pd.read_csv('fg_TM_Daily.csv')
tm_daily_df = tm_daily_df.set_index('report_date')
fig2, ax2 = plt.subplots(figsize=(12,8))
ax3 = ax2.twinx()
group_obj = tm_daily_df.groupby('shift')
g = group_obj['Head_Count'].plot(kind='bar', x='report_date', y='Head_Count',ax=ax2,stacked=False,alpha = .2)
g = group_obj['UTL_R'].plot(kind='line',x='report_date', y='UTL_R', ax=ax3,marker='d', markersize=12)
plt.legend(tm_daily_df['shift'].unique())
This code has gotten me the closest I've been able to get. Notice that even with stacked = False, they are still stacked. I changed the setting to True, and nothing changes.
All i need is for the bars to be next to each other with the same color scheme representative of the shift
The graph:
Here are two solutions (stacked and unstacked). Based on your questions we will:
plot Head_Count in the left y axis and UTL_R in the right y axis.
report_date will be our x axis
shift will represent the hue of our graph.
The stacked version uses pandas default plotting feature, and the unstacked version uses seaborn.
EDIT
From your request, I added a 100% stacked graph. While it is not quite exactly what you asked in the comment, the graph type you asked may create some confusion when reading (are the values based on the upper line of the stack or the width of the stack). An alternative solution may be using a 100% stacked graph.
Stacked
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
fig, ax = plt.subplots(figsize=(12,6))
ax2 = ax.twinx()
dfg['Head_Count'].unstack().plot.bar(stacked=True, ax=ax, alpha=0.6)
dfg['UTL_R'].unstack().plot(kind='line', ax=ax2, marker='o', legend=None)
ax.set_title('My Graph')
plt.show()
Stacked 100%
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
# Create `Head_Count_Pct` column
for date in dfg.index.get_level_values('report_date').unique():
for shift in dfg.loc[date, :].index.get_level_values('shift').unique():
dfg.loc[(date, shift), 'Head_Count_Pct'] = dfg.loc[(date, shift), 'Head_Count'].sum() / dfg.loc[(date, 'A'), 'Head_Count'].sum()
fig, ax = plt.subplots(figsize=(12,6))
ax2 = ax.twinx()
pal = sns.color_palette("Set1")
dfg[dfg.index.get_level_values('shift').isin(['1','2','3'])]['Head_Count_Pct'].unstack().plot.bar(stacked=True, ax=ax, alpha=0.5, color=pal)
dfg['UTL_R'].unstack().plot(kind='line', ax=ax2, marker='o', legend=None, color=pal)
ax.set_title('My Graph')
plt.show()
Unstacked
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
dfg = df.set_index(['report_date', 'shift']).sort_index(level=[0,1])
fig, ax = plt.subplots(figsize=(15,6))
ax2 = ax.twinx()
sns.barplot(x=dfg.index.get_level_values('report_date'),
y=dfg.Head_Count,
hue=dfg.index.get_level_values('shift'), ax=ax, alpha=0.7)
sns.lineplot(x=dfg.index.get_level_values('report_date'),
y=dfg.UTL_R,
hue=dfg.index.get_level_values('shift'), ax=ax2, marker='o', legend=None)
ax.set_title('My Graph')
plt.show()
EDIT #2
Here is the graph as you requested in a second time (stacked, but stack n+1 does not start where stack n ends).
It is slightly more involving as we have to do multiple things:
- we need to manually assign our color to our shift in our df
- once we have our colors assign, we will iterate through each date range and 1) sort or Head_Count values descending (so that our largest sack is in the back when we plot the graph), and 2) plot the data and assign the color to each stacj
- Then we can create our second y axis and plot our UTL_R values
- Then we need to assign the correct color to our legend labels
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
def assignColor(shift):
if shift == 'A':
return 'R'
if shift == '1':
return 'B'
if shift == '2':
return 'G'
if shift == '3':
return 'Y'
# map a color to a shift
df['color'] = df['shift'].apply(assignColor)
fig, ax = plt.subplots(figsize=(15,6))
# plot our Head_Count values
for date in df.report_date.unique():
d = df[df.report_date == date].sort_values(by='Head_Count', ascending=False)
y = d.Head_Count.values
x = date
color = d.color
b = plt.bar(x,y, color=color)
# Plot our UTL_R values
ax2 = ax.twinx()
sns.lineplot(x=df.report_date, y=df.UTL_R, hue=df['shift'], marker='o', legend=None)
# Assign the color label color to our legend
leg = ax.legend(labels=df['shift'].unique(), loc=1)
legend_maping = dict()
for shift in df['shift'].unique():
legend_maping[shift] = df[df['shift'] == shift].color.unique()[0]
i = 0
for leg_lab in leg.texts:
leg.legendHandles[i].set_color(legend_maping[leg_lab.get_text()])
i += 1
How about this?
tm_daily_df['UTL_R'] = tm_daily_df['UTL_R'].str.replace('%', '').astype('float') / 100
pivoted = tm_daily_df.pivot_table(values=['Head_Count', 'UTL_R'],
index='report_date',
columns='shift')
pivoted
# Head_Count UTL_R
# shift 1 2 3 A 1 2 3 A
# report_date
# 3/17/19 11 27 18 72 0.10 0.13 0.25 0.25
# 3/18/19 23 16 12 71 1.00 0.25 0.50 0.10
# 3/19/19 28 23 14 76 0.10 0.50 0.33 0.20
# 3/20/19 29 29 10 59 0.25 0.25 0.50 0.33
# 3/21/19 17 29 30 65 0.20 0.14 0.17 0.10
# 3/22/19 12 10 17 54 0.14 1.00 0.14 0.20
# 3/23/19 16 11 13 66 0.10 1.00 0.10 0.14
fig, ax = plt.subplots()
pivoted['Head_Count'].plot.bar(ax=ax)
pivoted['UTL_R'].plot.line(ax=ax, legend=False, secondary_y=True, marker='D')
ax.legend(loc='upper left', title='shift')

Categories