scatter more than one dataframe in the same plot - python

I'm using a for cycle to scatter more than one dataframe on a same pd.plot.scatterplot, but everytime the cycle return it print a colorbar.
How can I have just one colorbar at the end of the cycle?
This is my code
if colormap is None: colormap='jet'
f,ax = plt.subplots()
for i, data in enumerate(wells):
data.plot.scatter(x,y, c=z, colormap=colormap, ax=ax)
ax.set_xlabel(x); ax.set_xlim(xlim)
ax.set_ylabel(y); ax.set_ylim(ylim)
ax.legend()
ax.grid()
ax.set_title(title)

This can be achieved by using figure and adding the axes to the same subplot:
import pandas as pd
import numpy as np
# created two dataframes with random values
df1 = pd.DataFrame(np.random.rand(25, 2), columns=['a', 'b'])
df2 = pd.DataFrame(np.random.rand(25, 2), columns=['a', 'b'])
And then:
fig = plt.figure()
for i, data in enumerate([df1, df2]):
ax = fig.add_subplot(111)
ax = data.plot.scatter(x='a', y='b', ax=ax,
c='#00FF00' if i == 0 else '#FF0000')
plt.show()
You can add the labels and other elements as required.

Related

Combine Binned barplot with lineplot

I'd like to represent two datasets on the same plot, one as a line as one as a binned barplot. I can do each individually:
tobar = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
tobar["bins"] = pd.qcut(tobar.index, 20)
bp = sns.barplot(data=tobar, x="bins", y="value")
toline = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
lp = sns.lineplot(data=toline, x=toline.index, y="value")
But when I try to combine them, of course the x axis gets messed up:
fig, ax = plt.subplots()
ax2 = ax.twinx()
bp = sns.barplot(data=tobar, x="bins", y="value", ax=ax)
lp = sns.lineplot(data=toline, x=toline.index, y="value", ax=ax2)
bp.set(xlabel=None)
I also can't seem to get rid of the bin labels.
How can I get these two informations on the one plot?
This answer explains why it's better to plot the bars with matplotlib.axes.Axes.bar instead of sns.barplot or pandas.DataFrame.bar.
In short, the xtick locations correspond to the actual numeric value of the label, whereas the xticks for seaborn and pandas are 0 indexed, and don't correspond to the numeric value.
This answer shows how to add bar labels.
ax2 = ax.twinx() can be used for the line plot if needed
Works the same if the line plot is different data.
Tested in python 3.11, pandas 1.5.2, matplotlib 3.6.2, seaborn 0.12.1
Imports and DataFrame
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
# test data
np.random.seed(2022)
df = pd.melt(pd.DataFrame(np.random.randn(1000).cumsum()))
# create the bins
df["bins"] = pd.qcut(df.index, 20)
# add a column for the mid point of the interval
df['mid'] = df.bins.apply(lambda row: row.mid.round().astype(int))
# pivot the dataframe to calculate the mean of each interval
pt = df.pivot_table(index='mid', values='value', aggfunc='mean').reset_index()
Plot 1
# create the figure
fig, ax = plt.subplots(figsize=(30, 7))
# add a horizontal line at y=0
ax.axhline(0, color='black')
# add the bar plot
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
# set the labels on the xticks - if desired
ax.set_xticks(ticks=pt.mid, labels=pt.mid)
# add the intervals as labels on the bars - if desired
ax.bar_label(ax.containers[0], labels=df.bins.unique(), weight='bold')
# add the line plot
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 2
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=4, alpha=0.5)
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')
Plot 3
The bar width is the width of the interval
fig, ax = plt.subplots(figsize=(30, 7))
ax.axhline(0, color='black')
ax.bar(data=pt, x='mid', height='value', width=50, alpha=0.5, ec='k')
ax.set_xticks(ticks=pt.mid, labels=df.bins.unique(), rotation=45)
ax.bar_label(ax.containers[0], weight='bold')
_ = sns.lineplot(data=df, x=df.index, y="value", ax=ax, color='tab:orange')

matplotlib multiple Y-axis pandas plot

Could someone give me a tip on how to do multiple Y axis plots?
This is some made up data below, how could I put Temperature its own Y axis, Pressure on its own Y axis, and then have both Value1 and Value2 on the same Y axis. I am trying to go for the same look and feel of this SO post answer. Thanks for any tips, I don't understand ax3 = ax.twinx() process, like as far as do I need to define an ax.twinx() for each separate Y axis plot I need?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
# using subplots() function
fig, ax = plt.subplots(figsize=(25,8))
plt.title('Multy Y Plot')
ax2 = ax.twinx()
ax3 = ax.twinx()
ax4 = ax.twinx()
plot1, = ax.plot(df.index, df.Temperature)
plot2, = ax2.plot(df.index, df.Value1, color = 'r')
plot3, = ax3.plot(df.index, df.Pressure, color = 'g')
plot4, = ax4.plot(df.index, df.Value2, color = 'b')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature')
ax2.set_ylabel('Value1')
ax3.set_ylabel('Pressure')
ax4.set_ylabel('Value2')
plt.legend([plot1,plot2,plot3,plot4],list(df.columns))
# defining display layout
plt.tight_layout()
# show plot
plt.show()
This will output everything jumbled up on the same side without separate Y axis for Pressure, Value1, and Value2.
You are adding 4 different plots in one, which is not helpful. I would recommend breaking it into 2 plots w/ shared x-axis "Date":
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature)
plot1b, = ax1b.plot(df.index, df.Pressure, color='r')
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2b = ax2.twinx()
plot2a, = ax2.plot(df.index, df.Value1, color='k')
plot2b, = ax2b.plot(df.index, df.Value2, color='g')
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1')
ax2b.set_ylabel('Value2')
plt.legend([plot1a, plot1b, plot2a, plot2b], df.columns)
# defining display layout
plt.tight_layout()
# show plot
plt.show()
Here I have added in the first plot (on the top) Temperature and Pressure and on the second plot (on the bottom) Value 1 and Value 2. Normally, we add in the same plot things that make sense to compare on the same x-axis. Pressure and Temperature is a valid combination that is why I combined those two together. But you can do as you wish.
This answer below uses mpatches is how to make the subplot of Value1 and Value2 on the same axis. The solution for this post has subplot for Value1 and Value2 on different axis. Thanks for the help #tzinie!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature, color='r') # red
plot1b, = ax1b.plot(df.index, df.Pressure, color='b') # blue
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2.plot(df.index, df.Value1, color='k') # black
ax2.plot(df.index, df.Value2, color='g') # green
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1 & Value2')
red_patch = mpatches.Patch(color='red', label='Temperature')
blue_patch = mpatches.Patch(color='blue', label='Pressure')
green_patch = mpatches.Patch(color='green', label='Value2')
black_patch = mpatches.Patch(color='black', label='Value1')
plt.legend(handles=[red_patch,blue_patch,green_patch,black_patch])
# defining display layout
#plt.tight_layout()
# show plot
plt.show()

How to show the bar for small values in python chart? [duplicate]

I drawn the comparison bar chart for very small values with the following code,
import pandas as pd
import matplotlib.pyplot as plt
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.bar(df['A'], df['B'])
plt.show()
Due to very small values I can't visualise the chart colour for the ('B' column) smaller value (e.g. 0.00002112) in the graph.
How can I modify the code to visualise smaller value(B column) colour in the graph? Thanks..
A common way to display data with different orders of magnitude is
to use a logarithmic scaling for the y-axis. Below the logarithm
to base 10 is used but other bases could be chosen.
import pandas as pd
import matplotlib.pyplot as plt
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.yscale("log")
plt.show()
Update:
To change the formatting of the yaxis labels an instance of ScalarFormatter can be used:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
data = [[ 0.00790019035339353, 0.00002112],
[0.0107705593109131, 0.0000328540802001953],
[0.0507792949676514, 0.0000541210174560547]]
df = pd.DataFrame(data, columns=['A', 'B'])
df.plot.bar()
plt.yscale("log")
plt.gca().yaxis.set_major_formatter(ScalarFormatter())
plt.show()
You could create 2 y-axis like this:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
width = 0.2
df['A'].plot(kind='bar', color='green', ax=ax1, width=width, position=1, label = 'A')
df['B'].plot(kind='bar', color='blue', ax=ax2, width=width, position=0, label = 'B')
ax1.set_ylabel('A')
ax2.set_ylabel('B')
# legend
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1+h2, l1+l2, loc=2)
plt.show()

How to label a violin plot on Seaborn?

I'm trying to change the labels in a violin plot on Seaborn. I wanna change the NU_NOTA_CN, NU_NOTA_CH, NU_NOTA_LC, NU_NOTA_MT and NU_NOTA_REDAÇÃO, and TP_ESCOLA, and the 2 and 3.
import pandas as pd
import numpy as np
import seaborn as sns
fig_dims = (10, 8)
fig, ax = plt.subplots(figsize=fig_dims)
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot)
plt.show()
plt.clf()
plt.close()
violin plot here
You can use the set_xticklabels
f, ax = plt.subplots()
sns.boxplot(x="DISCIPLINA", y="NOTA", hue="TP_ESCOLA", data=publica_privada_pivot, ax=ax)
ax.set_xticklabels([...]) # list of strings
In addition, you can use get_xticklabels, for example.
xticklabels = [t.get_text() for t in ax.get_xticklabels()]
xticklabels = [t.replace('NU_', '').replace('_', ' ').title()
ax.set_xticklabels(xticklabels)
tips = sns.load_dataset("tips")
foo = sns.boxplot(x="day", y="total_bill", data=tips)
plt.xticks([0, 1, 2, 3], ['x1', 'x2', 'x3', 'x4'])
plt.show()
The number of unique values in 'day' column (i.e. cardinality of the feature) should be the length of the lists passed to plt.xticks() function.
matplotlib.pyplot.xticks

Matplotlib missing patches in graph

I need to create a heatmap from a csv file and highlight some cells, my idea was to create a mask from a Panda's dataframe and then iterate through the mask and add a patch each time.
Unfortunately even if the mask seems to work correctly only two patches are placed instead of the 4 I would like to have, does anyone know why?
df = pd.read_csv(argv[1])
df = df.transpose()
mask = df == 3
fig, ax = plt.subplots()
ax = sns.heatmap(df, ax=ax)
for row in range(df.shape[0]):
for col in range(df.shape[1]):
if mask[col][row]:
ax.add_patch(Rectangle((row, col), 1, 1))
plt.show()
The obtained graph:
After trying, you need to change the order of your indices when you create the rectangles:
import seaborn as sns
from matplotlib.patches import Rectangle
df = pd.DataFrame([[3,0,0],
[2,3,0],
[1,2,0],
[3,1,0],
[2,0,3],
[1,0,2],
[0,0,1],
[0,0,0]], columns = ["reg1","reg2","reg3"])
mask = df == 3
fig, ax = plt.subplots()
ax = sns.heatmap(df, ax=ax)
for row in range(df.shape[0]):
for col in range(df.shape[1]):
if mask.iloc[row,col]:
ax.add_patch(Rectangle((col, row), 1, 1, fill=False, edgecolor='blue', lw=3))
plt.show()
Output:

Categories