Currently I am using .plot(table=True) to display the tables and this is my current output:
May I know how can I shift the table down?
Here is my code:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
sns.set()
df = pd.read_csv('file.csv')
df['year'] = pd.DatetimeIndex(df['month']).year
df.head()
ax = df.groupby(['year', 'child_gender'])['birth_count'].count().unstack('child_gender').fillna(0).plot(
kind='line',
stacked=False,
marker='o',
table = True)
plt.xticks(np.arange(2016,2019,1))
plt.legend(title='Child Gender', bbox_to_anchor=(1.0, 1), loc='upper left')
plt.ylabel('Birth Count')
plt.xlabel('Year')
plt.title("Total Birth Count By Child Gender")
plt.autoscale(enable=False)
plt.tight_layout()
plt.show()
this is the current dataframe I have:
Use bbox
dont use table=True, instead create a table under the plot using plt.table
fig, ax = plt.subplots(figsize=(5,8))
df.plot(ax=ax)
plt.table(cellText=df.values,
loc='center left',
rowLabels=['f','m'],
colLabels=['2016','2017','2018'],
bbox=[0.0, -0.2, 1, 0.1])
Using second value in bbox list you can move it as much as you want
Related
I usually use matplotlib, but was playing with pandas plotting and experienced unexpected behaviour. I was assuming the following would return red and green edges rather than alternating. What am I missing here?
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"col1":[1,2,4,5,6], "col2":[4,5,1,2,3]})
def amounts(df):
fig, ax = plt.subplots(1,1, figsize=(3,4))
(df.filter(['col1','col2'])
.plot.bar(ax=ax,stacked=True, edgecolor=["red","green"],
fill=False,linewidth=2,rot=0))
ax.set_xlabel("")
plt.tight_layout()
plt.show()
amounts(df)
I think plotting each column separately and setting the bottom argument to stack the bars provides the output you desire.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"col1":[1,2,4,5,6], "col2":[4,5,1,2,3]})
def amounts(df):
fig, ax = plt.subplots(1,1, figsize=(3,4))
df['col1'].plot.bar(ax=ax, linewidth=2, edgecolor='green', rot=0, fill=False)
df['col2'].plot.bar(ax=ax, bottom=df['col1'], linewidth=2, edgecolor='red', rot=0, fill=False)
plt.legend()
plt.tight_layout()
plt.show()
amounts(df)
I drawn the comparison bar chart for very small values with the following code,
import pandas as pd
import matplotlib.pyplot as plt
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.bar(df['A'], df['B'])
plt.show()
Due to very small values I can't visualise the chart colour for the ('B' column) smaller value (e.g. 0.00002112) in the graph.
How can I modify the code to visualise smaller value(B column) colour in the graph? Thanks..
A common way to display data with different orders of magnitude is
to use a logarithmic scaling for the y-axis. Below the logarithm
to base 10 is used but other bases could be chosen.
import pandas as pd
import matplotlib.pyplot as plt
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.yscale("log")
plt.show()
Update:
To change the formatting of the yaxis labels an instance of ScalarFormatter can be used:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.yscale("log")
plt.gca().yaxis.set_major_formatter(ScalarFormatter())
plt.show()
You could create 2 y-axis like this:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
width = 0.2
df['A'].plot(kind='bar', color='green', ax=ax1, width=width, position=1, label = 'A')
df['B'].plot(kind='bar', color='blue', ax=ax2, width=width, position=0, label = 'B')
ax1.set_ylabel('A')
ax2.set_ylabel('B')
# legend
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1+h2, l1+l2, loc=2)
plt.show()
I want to make a graph about how the maximum value of a cluster of points at any given x coordinate changes over time.
What I have achieved so far:
What I want to achieve:
I was thinking that making a subset of the data with only the day and value, and then getting the maximum value of the array either by iterating trough it or using a function. But I don't know if it's possible like here:
Here's my code
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('strong.csv', names=[
'time', 'exercise', 'set_number', 'mass', 'reps'],parse_dates=['time'])
df.time = pd.to_datetime(df.time,format='%Y-%m-%d')
df_exercise = df[(df.exercise == 'Bench Press (Barbell)')]
fig, ax = plt.subplots()
ax.scatter(
df_exercise.time,df_exercise.mass, c='Orange', s=30
)
ax.set(xlabel='Day', ylabel='Weight [ kg ]',
title='Time/Weight')
plt.xticks(fontsize=8,rotation=45)
plt.show()
plt.savefig('grafic.png')
You could group the dataframe by date and aggregate the maxima:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'time': np.repeat(pd.date_range('2021-03-01', periods=6), 2),
'mass': np.random.randint(20, 56, 12),
'excersie': 'Bench Press (Barbell)'})
df.time = pd.to_datetime(df.time, format='%Y-%m-%d')
df_exercise = df # just creating a dataframe similar to the question's
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 5))
ax1.scatter(df_exercise.time, df_exercise.mass, c='limegreen', s=30)
df_plot = df_exercise.groupby('time')['mass'].agg('max')
ax2.scatter(df_exercise.time, df_exercise.mass, c='limegreen', s=30, alpha=0.3)
ax2.scatter(df_plot.index, df_plot.values, c='orange', s=30)
ax2.plot(df_plot.index, df_plot.values, c='black', lw=2, zorder=0)
for ax in (ax1, ax2):
ax.set(xlabel='Day', ylabel='Weight [ kg ]', title='Time/Weight')
ax.tick_params(axis='x', labelsize=8, labelrotation=45)
plt.tight_layout()
plt.show()
I am trying to create a count plot and also add another plot on it which would actually be the mean of the other columns.
The sample data is in the below link:
Sample Data
I have used the below code to create the sns count plot:
df = pd.read_csv("latestfile.csv")
df.sort_values(by=["Business"],inplace=True)
sns.countplot(data=df,x=df["Business"],hue="location")
and I generate the below:
Now I use the groupby and use the below code to get the desired data:
dfg = df.groupby(["Business","location"])['Ageing'].mean().reset_index()
dfg.set_index("Business",inplace=True)
but how do I plot this on the same count plot on the different y axis.
Unable to think of a way to do it.
Below is what I am finally looking for:
Of course, you can squeeze another bar plot into the countplot graph:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location", palette="muted", edgecolor="black")
for patch in ax1.patches:
patch.set_x(patch.get_x() + 0.3 * patch.get_width())
ax1.legend(title="Count")
ax2 = ax1.twinx()
sns.barplot(data=df, x="Business", y="Ageing", hue="location", palette="bright", ci=None, ax=ax2, edgecolor="white")
ax2.legend(title="Ageing")
ax1.autoscale_view()
plt.show()
However, I would definitely prefer two subplots:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.countplot(data=df, x="Business", hue="location", ax=ax1)
ax1.legend(title="Count")
sns.barplot(data=df, x="Business", y="Ageing", hue="location", ci=None, ax=ax2)
ax2.legend(title="Ageing")
plt.show()
Since you prefer now the distribution, you can combine the countplot with a stripplot:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location")
ax2 = ax1.twinx()
sns.stripplot(data=df, x="Business", y="Ageing", hue="location", jitter=True, dodge=True, ax=ax2, linewidth=1)
ax1.legend(title="", loc="upper center")
ax2.legend_.remove()
plt.show()
I plotted two Pandas Series from the same DataFrame with the same x axis and everything worked out fine. However, when I tried to manually create a Legend, it appears but only with the title and not with the actually content. I've tried other solutions without any luck. Here's my code:
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
width = .3
df.tally.plot(kind='bar', color='red', ax=ax1, width=width, position=1, grid=False)
df.costs.plot(kind='bar', color='blue', ax=ax2, width=width, position=0, grid=True)
ax1.set_ylabel('Tally')
ax2.set_ylabel('Total Cost')
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
plt.legend([handles1, handles2], [labels1, labels2], loc='upper left', title='Legend')
plt.show()
plt.clf()
Maybe you have a good reason to do it your way, but if not, this is much easier:
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Optional, just better looking
import seaborn as sns
# Generate random data
df = pd.DataFrame(np.random.randn(10,3), columns=['tally', 'costs', 'other'])
df[['tally', 'costs']].plot(kind='bar', width=.3)
plt.show();
Out[1]:
Edit
After learning that this is because you have a much different scale for the other one, here's the pandas approach:
# Generate same data as Jianxun Li
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['tally', 'costs', 'other'])
df.costs = df.costs * 5
width = .3
df.tally.plot(kind='bar', color='#55A868', position=1, width=width, legend=True, figsize=(12,6))
df.costs.plot(kind='bar', color='#4C72B0', position=0, width=width, legend=True, secondary_y=True)
plt.show();
Something like this?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# your data
# ===============================
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['col1', 'col2', 'col3'])
df.col2 = df.col2 * 5
# bar plot with twinx
# ===============================
fig, ax = plt.subplots()
width=0.3
ax.bar(df.index, df.col1, width=width, color='red', label='col1_data')
ax.legend(loc='best')
ax2 = ax.twinx()
ax2.bar(df.index+width, df.col2, width=width, color='blue', label='col2_data')
ax2.legend(loc='best')