I have some data and need to generate a plot like this image, I just wonder how to do this using python seaborn scatter plot?
Thanks, heaps!
example:
Here is a minimal example using seaborn.scatterplot:
import pandas as pd
import seaborn as sns
import numpy as np
np.random.seed(0)
df1 = pd.DataFrame({'x': np.random.random(size=10),
'y1': np.random.random(size=10),
})
df2 = pd.DataFrame({'x': np.random.random(size=10),
'y2': np.random.random(size=10)*100,
})
ax1 = plt.subplot()
ax2 = ax1.twinx()
sns.scatterplot(data=df1, x='x', y='y1', ax=ax1)
sns.scatterplot(data=df2, x='x', y='y2', color='r', ax=ax2)
ax2.tick_params(axis='y', colors='red')
output:
Related
I am trying to create a count plot and also add another plot on it which would actually be the mean of the other columns.
The sample data is in the below link:
Sample Data
I have used the below code to create the sns count plot:
df = pd.read_csv("latestfile.csv")
df.sort_values(by=["Business"],inplace=True)
sns.countplot(data=df,x=df["Business"],hue="location")
and I generate the below:
Now I use the groupby and use the below code to get the desired data:
dfg = df.groupby(["Business","location"])['Ageing'].mean().reset_index()
dfg.set_index("Business",inplace=True)
but how do I plot this on the same count plot on the different y axis.
Unable to think of a way to do it.
Below is what I am finally looking for:
Of course, you can squeeze another bar plot into the countplot graph:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location", palette="muted", edgecolor="black")
for patch in ax1.patches:
patch.set_x(patch.get_x() + 0.3 * patch.get_width())
ax1.legend(title="Count")
ax2 = ax1.twinx()
sns.barplot(data=df, x="Business", y="Ageing", hue="location", palette="bright", ci=None, ax=ax2, edgecolor="white")
ax2.legend(title="Ageing")
ax1.autoscale_view()
plt.show()
However, I would definitely prefer two subplots:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.countplot(data=df, x="Business", hue="location", ax=ax1)
ax1.legend(title="Count")
sns.barplot(data=df, x="Business", y="Ageing", hue="location", ci=None, ax=ax2)
ax2.legend(title="Ageing")
plt.show()
Since you prefer now the distribution, you can combine the countplot with a stripplot:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
df = pd.read_csv("test.csv")
df.sort_values(by=["Business"],inplace=True)
ax1 = sns.countplot(data=df, x="Business", hue="location")
ax2 = ax1.twinx()
sns.stripplot(data=df, x="Business", y="Ageing", hue="location", jitter=True, dodge=True, ax=ax2, linewidth=1)
ax1.legend(title="", loc="upper center")
ax2.legend_.remove()
plt.show()
I'm trying to change the labels in a violin plot on Seaborn. I wanna change the NU_NOTA_CN, NU_NOTA_CH, NU_NOTA_LC, NU_NOTA_MT and NU_NOTA_REDAÇÃO, and TP_ESCOLA, and the 2 and 3.
import pandas as pd
import numpy as np
import seaborn as sns
fig_dims = (10, 8)
fig, ax = plt.subplots(figsize=fig_dims)
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot)
plt.show()
plt.clf()
plt.close()
violin plot here
You can use the set_xticklabels
f, ax = plt.subplots()
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot, ax=ax)
ax.set_xticklabels([...]) # list of strings
In addition, you can use get_xticklabels, for example.
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
xticklabels = [t.replace('NU_', '').replace('_', ' ').title()
ax.set_xticklabels(xticklabels)
tips = sns.load_dataset("tips")
foo = sns.boxplot(x="day", y="total_bill", data=tips)
plt.xticks([0, 1, 2, 3], ['x1', 'x2', 'x3', 'x4'])
plt.show()
The number of unique values in 'day' column (i.e. cardinality of the feature) should be the length of the lists passed to plt.xticks() function.
matplotlib.pyplot.xticks
I have the following script for generating a figure with two subplots: one line plot, and one bar plot.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
plt.close('all')
np.random.seed(42)
n = 1000
idx = pd.date_range(end='2020-02-27', periods=n)
df = pd.Series(np.random.randint(-5, 5, n),
index=idx)
curve = df.cumsum()
bars = df.resample('M').sum()
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
curve.plot(ax=ax1)
bars.plot(kind='bar', ax=ax2)
fig.set_tight_layout(True)
I would like to share the x axis between the two subplots, however the command ax2 = fig.add_subplot(212, sharex=ax1) will result in an empty graph for the line plot like the following figure.
Here is my version based on Matplotlib (without pandas api for plotting), may be it would be helpful.
I explicitly set the width of bars.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
plt.close('all')
np.random.seed(42)
n = 1000
idx = pd.date_range(end='2020-02-27', periods=n)
df = pd.Series(np.random.randint(-5, 5, n), index=idx)
curve = df.cumsum()
bars = df.resample('M').sum()
#fig = plt.figure()
#ax1 = fig.add_subplot(211)
#ax2 = fig.add_subplot(212)
#curve.plot(ax=ax1)
#bars.plot(kind='bar', ax=ax2)
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, gridspec_kw={'hspace': 0})
ax1.plot(curve.index, curve.values)
ax2.bar(bars.index, bars.values, width = (bars.index[0] - bars.index[1])/2)
fig.set_tight_layout(True)
_ = plt.xticks(bars.index, bars.index, rotation=90)
Using the data frame
df = pd.DataFrame({
"date" : ["2018-01-01", "2018-01-02", "2018-01-03", "2018-01-04"],
"column1" : [555,525,532,585],
"column2" : [50,48,49,51]
})
one can plot with seaborn say column1 with sns.tsplot(data=df.column1, color="g").
How can we plot both time series with two y-axis in seaborn ?
As seaborn is built on the top of matplotlib, you can use its power:
import matplotlib.pyplot as plt
sns.lineplot(data=df.column1, color="g")
ax2 = plt.twinx()
sns.lineplot(data=df.column2, color="b", ax=ax2)
I would recommend using a normal line plot. You can get a twin axes via ax.twinx().
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({"date": ["2018-01-01", "2018-01-02", "2018-01-03", "2018-01-04"],
"column1": [555,525,532,585],
"column2": [50,48,49,51]})
ax = df.plot(x="date", y="column1", legend=False)
ax2 = ax.twinx()
df.plot(x="date", y="column2", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
You could try the following code, based on #Andrey Sobolev's solution, but which will also generate a complete legend.
from matplotlib.lines import Line2D
g = sb.lineplot(data=df.column1, color="g")
sb.lineplot(data=df.column2, color="b", ax=g.axes.twinx())
g.legend(handles=[Line2D([], [], marker='_', color="g", label='column1'), Line2D([], [], marker='_', color="b", label='column2')])
I plotted two Pandas Series from the same DataFrame with the same x axis and everything worked out fine. However, when I tried to manually create a Legend, it appears but only with the title and not with the actually content. I've tried other solutions without any luck. Here's my code:
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twinx()
width = .3
df.tally.plot(kind='bar', color='red', ax=ax1, width=width, position=1, grid=False)
df.costs.plot(kind='bar', color='blue', ax=ax2, width=width, position=0, grid=True)
ax1.set_ylabel('Tally')
ax2.set_ylabel('Total Cost')
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
plt.legend([handles1, handles2], [labels1, labels2], loc='upper left', title='Legend')
plt.show()
plt.clf()
Maybe you have a good reason to do it your way, but if not, this is much easier:
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Optional, just better looking
import seaborn as sns
# Generate random data
df = pd.DataFrame(np.random.randn(10,3), columns=['tally', 'costs', 'other'])
df[['tally', 'costs']].plot(kind='bar', width=.3)
plt.show();
Out[1]:
Edit
After learning that this is because you have a much different scale for the other one, here's the pandas approach:
# Generate same data as Jianxun Li
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['tally', 'costs', 'other'])
df.costs = df.costs * 5
width = .3
df.tally.plot(kind='bar', color='#55A868', position=1, width=width, legend=True, figsize=(12,6))
df.costs.plot(kind='bar', color='#4C72B0', position=0, width=width, legend=True, secondary_y=True)
plt.show();
Something like this?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# your data
# ===============================
np.random.seed(0)
df = pd.DataFrame(np.random.randint(50,100,(20,3)), columns=['col1', 'col2', 'col3'])
df.col2 = df.col2 * 5
# bar plot with twinx
# ===============================
fig, ax = plt.subplots()
width=0.3
ax.bar(df.index, df.col1, width=width, color='red', label='col1_data')
ax.legend(loc='best')
ax2 = ax.twinx()
ax2.bar(df.index+width, df.col2, width=width, color='blue', label='col2_data')
ax2.legend(loc='best')