How to plot only max values using python - python

I want to make a graph about how the maximum value of a cluster of points at any given x coordinate changes over time.
What I have achieved so far:
What I want to achieve:
I was thinking that making a subset of the data with only the day and value, and then getting the maximum value of the array either by iterating trough it or using a function. But I don't know if it's possible like here:
Here's my code
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('strong.csv', names=[
'time', 'exercise', 'set_number', 'mass', 'reps'],parse_dates=['time'])
df.time = pd.to_datetime(df.time,format='%Y-%m-%d')
df_exercise = df[(df.exercise == 'Bench Press (Barbell)')]
fig, ax = plt.subplots()
ax.scatter(
df_exercise.time,df_exercise.mass, c='Orange', s=30
)
ax.set(xlabel='Day', ylabel='Weight [ kg ]',
title='Time/Weight')
plt.xticks(fontsize=8,rotation=45)
plt.show()
plt.savefig('grafic.png')

You could group the dataframe by date and aggregate the maxima:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({'time': np.repeat(pd.date_range('2021-03-01', periods=6), 2),
'mass': np.random.randint(20, 56, 12),
'excersie': 'Bench Press (Barbell)'})
df.time = pd.to_datetime(df.time, format='%Y-%m-%d')
df_exercise = df # just creating a dataframe similar to the question's
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 5))
ax1.scatter(df_exercise.time, df_exercise.mass, c='limegreen', s=30)
df_plot = df_exercise.groupby('time')['mass'].agg('max')
ax2.scatter(df_exercise.time, df_exercise.mass, c='limegreen', s=30, alpha=0.3)
ax2.scatter(df_plot.index, df_plot.values, c='orange', s=30)
ax2.plot(df_plot.index, df_plot.values, c='black', lw=2, zorder=0)
for ax in (ax1, ax2):
ax.set(xlabel='Day', ylabel='Weight [ kg ]', title='Time/Weight')
ax.tick_params(axis='x', labelsize=8, labelrotation=45)
plt.tight_layout()
plt.show()

Related

Python Seaborn heatmap with custom order on both axes and values from a frequency table (data included)

I have this data in a frequency table. I just want to be able to create a heatmap with Fac1 on Y axis, Fac2 on X axis and the frequency values as heatmap. The order of the Factors in Fac1 and Fac2 must be maintained in the same sequence (after removing duplicates from both Fac1 and Fac2 columns). I haven't been able to get this working after so many tries but I've managed to get the data in order and the simplest representation. I'd greatly appreciate any help in this.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
plt.subplots(figsize=(15,30))
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = True, labeltop=True)
sns.heatmap(df, cmap="Blues", linewidth=1, xticklabels=True, yticklabels=True)
You have to convert your dataframe if you want to sue heatmap:
df2 = df.drop_duplicates().pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair', sort=False)
plt.subplots(figsize=(15, 30))
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom=False, bottom=False, top=True, labeltop=True)
sns.heatmap(df2, cmap="Blues", linewidth=1, xticklabels=True, yticklabels=True)
This is the result (zoomed on the first rows and columns):
You first need to reorganize the dataframe such that Fac1 becomes the index, Fac2 the columns, and the values are aggregated from the third column. E.g. df_pivoted = df.pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair').
The heatmap will use the order provided by the columns and index as created by pivot_table. Keeping the original order is a bit tricky, but can be achieved by pd.Categorical (which forces an order) combined by pd.unique() (which keeps the original order, unlike np.unique).
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
df['Fac1'] = pd.Categorical(df['Fac1'], categories=pd.unique(df['Fac1']))
df['Fac2'] = pd.Categorical(df['Fac2'], categories=pd.unique(df['Fac2']))
df_pivoted = df.pivot_table(index='Fac1', columns='Fac2', values='Frequency Fac1-Fac2 pair')
fig, ax = plt.subplots(figsize=(20, 30))
sns.heatmap(data=df_pivoted, cmap='Blues', xticklabels=True, yticklabels=True, ax=ax)
ax.tick_params(axis='both', which='major', labelsize=10, labeltop=True, top=True, labelbottom=False, bottom=False)
ax.tick_params(axis='x', labelrotation=90)
plt.tight_layout()
plt.show()
If you are aiming for a 2d histogram or kde plot where the last column is intended as weights:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
url = "https://raw.githubusercontent.com/rroyss/stack/main/dfso.csv"
df = pd.read_csv(url)
df['Fac1'] = [int(f[5:]) for f in df['Fac1']]
df['Fac2'] = [int(f[6:]) for f in df['Fac2']]
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20, 10))
sns.histplot(data=df, x='Fac1', y='Fac2', weights='Frequency Fac1-Fac2 pair', bins=20, color='blue', ax=ax1)
sns.kdeplot(data=df, x='Fac1', y='Fac2', weights='Frequency Fac1-Fac2 pair', color='blue', ax=ax2)
for ax in (ax1, ax2):
ax.tick_params(axis='both', which='major', labelsize=10)
plt.tight_layout()
plt.show()

Normalized Group Values in Seaborn

I want to plot normalized count grouped values with seaborn. At first, I tried doing the following:
fig, ax = plt.subplots(figsize=(10, 6))
ax = sns.histplot(
data = df,
x = 'age_bins',
hue = 'Showup',
multiple="dodge",
stat = 'count',
shrink = 0.4,
)
Original Count
Now I want to normalize each bar relative to the overall 'bin' count. The only way I successeded to do so was by doing this:
fig, ax = plt.subplots(figsize=(10, 6))
ax = sns.histplot(
data = df,
x = 'age_bins',
hue = 'Showup',
multiple="fill",
stat = 'count',
shrink = 0.4,
)
multiple = 'fill'
Now this made me achieve what I wanted in terms of values, but is there anyway to plot the same results but with bars dodged beside each other instead of above each other?
You can group by ages and "showup", count them, then change "showup" to individual columns. Then divide each row by the row total and create a bar plot via pandas:
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd
import numpy as np
ages = ['<10', '<20', '<30', '<40', '<50', '<60', '<70', '++70']
df = pd.DataFrame({'age_bins': np.random.choice(ages, 10000),
'Showup': np.random.choice([True, False], 10000, p=[0.88, 0.12])})
df_counts = df.groupby(['age_bins', 'Showup']).size().unstack().reindex(ages)
df_percentages = df_counts.div(df_counts.sum(axis=1), axis=0) * 100
sns.set() # set default seaborn style
fig, ax = plt.subplots(figsize=(10, 6))
df_percentages.plot.bar(rot=0, ax=ax)
ax.set_xlabel('')
ax.set_ylabel('Percentage per age group')
ax.yaxis.set_major_formatter(PercentFormatter(100))
plt.tight_layout()
plt.show()

matplotlib multiple Y-axis pandas plot

Could someone give me a tip on how to do multiple Y axis plots?
This is some made up data below, how could I put Temperature its own Y axis, Pressure on its own Y axis, and then have both Value1 and Value2 on the same Y axis. I am trying to go for the same look and feel of this SO post answer. Thanks for any tips, I don't understand ax3 = ax.twinx() process, like as far as do I need to define an ax.twinx() for each separate Y axis plot I need?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
# using subplots() function
fig, ax = plt.subplots(figsize=(25,8))
plt.title('Multy Y Plot')
ax2 = ax.twinx()
ax3 = ax.twinx()
ax4 = ax.twinx()
plot1, = ax.plot(df.index, df.Temperature)
plot2, = ax2.plot(df.index, df.Value1, color = 'r')
plot3, = ax3.plot(df.index, df.Pressure, color = 'g')
plot4, = ax4.plot(df.index, df.Value2, color = 'b')
ax.set_xlabel('Date')
ax.set_ylabel('Temperature')
ax2.set_ylabel('Value1')
ax3.set_ylabel('Pressure')
ax4.set_ylabel('Value2')
plt.legend([plot1,plot2,plot3,plot4],list(df.columns))
# defining display layout
plt.tight_layout()
# show plot
plt.show()
This will output everything jumbled up on the same side without separate Y axis for Pressure, Value1, and Value2.
You are adding 4 different plots in one, which is not helpful. I would recommend breaking it into 2 plots w/ shared x-axis "Date":
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature)
plot1b, = ax1b.plot(df.index, df.Pressure, color='r')
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2b = ax2.twinx()
plot2a, = ax2.plot(df.index, df.Value1, color='k')
plot2b, = ax2b.plot(df.index, df.Value2, color='g')
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1')
ax2b.set_ylabel('Value2')
plt.legend([plot1a, plot1b, plot2a, plot2b], df.columns)
# defining display layout
plt.tight_layout()
# show plot
plt.show()
Here I have added in the first plot (on the top) Temperature and Pressure and on the second plot (on the bottom) Value 1 and Value 2. Normally, we add in the same plot things that make sense to compare on the same x-axis. Pressure and Temperature is a valid combination that is why I combined those two together. But you can do as you wish.
This answer below uses mpatches is how to make the subplot of Value1 and Value2 on the same axis. The solution for this post has subplot for Value1 and Value2 on different axis. Thanks for the help #tzinie!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
rows,cols = 8760,4
data = np.random.rand(rows,cols)
tidx = pd.date_range('2019-01-01', periods=rows, freq='H')
df = pd.DataFrame(data, columns=['Temperature','Value1','Pressure','Value2'], index=tidx)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(25,8))
plt.title('Multy Y Plot')
ax1b = ax1.twinx()
plot1a, = ax1.plot(df.index, df.Temperature, color='r') # red
plot1b, = ax1b.plot(df.index, df.Pressure, color='b') # blue
ax1.set_ylabel('Temperature')
ax1b.set_ylabel('Pressure')
ax2.plot(df.index, df.Value1, color='k') # black
ax2.plot(df.index, df.Value2, color='g') # green
ax2.set_xlabel('Date')
ax2.set_ylabel('Value1 & Value2')
red_patch = mpatches.Patch(color='red', label='Temperature')
blue_patch = mpatches.Patch(color='blue', label='Pressure')
green_patch = mpatches.Patch(color='green', label='Value2')
black_patch = mpatches.Patch(color='black', label='Value1')
plt.legend(handles=[red_patch,blue_patch,green_patch,black_patch])
# defining display layout
#plt.tight_layout()
# show plot
plt.show()

barh with plot : cannot get different scale for data on secondary x axis

I cannot get two different scales for the plot:
I don't know how to activate the scale of the secondary x axis.
"STK" and "Material" are supposed to be displayed at different scales.
How to display "Material" on it's own scale (0,max) like it was done automatically for "STK"?
I need it to be displayed like on the image below :
Here's the code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = [['MPP1',400,30],['MPP2',3500,700], ['MPP3',1900,3], ['MPP4',15000,56], ['MPP5',8500,306]]
df = pd.DataFrame(df)
df.columns =['MPP', 'STK', 'Material']
plt.rcdefaults()
fig, ax = plt.subplots(constrained_layout=True)
xdata = df.STK
x2data = df.Material
ydata = df.MPP
y_pos = np.arange(len(ydata))
ax.barh(y_pos, df.STK , label='STK per MPP')
ax.invert_yaxis()
ax.plot(x2data, ydata, label='Material per MPP', color='red')
ax.set_xlabel('STK')
ax.legend()
ax2 = ax.secondary_xaxis('top')
ax2.set_xlabel('Material')
ax2.set_xticks(df.Material)
ax2.set_xticklabels(df.Material)
ax2.set_xlabel(r"Material")
plt.show()
You should create the secondary axis with:
ax2 = ax.twiny()
and plot your data on it:
ax2.plot(x2data, ydata, label='Material per MPP', color='red')
Pay attention: ax2.plot, not ax.plot.
Complete Code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = [['MPP1',400,30],['MPP2',3500,700], ['MPP3',1900,3], ['MPP4',15000,56], ['MPP5',8500,306]]
df = pd.DataFrame(df)
df.columns =['MPP', 'STK', 'Material']
plt.rcdefaults()
fig, ax = plt.subplots(constrained_layout=True)
xdata = df.STK
x2data = df.Material
ydata = df.MPP
y_pos = np.arange(len(ydata))
ax.barh(y_pos, df.STK , label='STK per MPP')
ax.invert_yaxis()
ax.set_xlabel('STK')
leg = plt.legend()
ax2 = ax.twiny()
ax2.plot(x2data, ydata, label='Material per MPP', color='red')
ax2.set_xlabel('Material')
leg2 = plt.legend()
plt.legend(leg.get_patches()+leg2.get_lines(),
[text.get_text() for text in leg.get_texts()+leg2.get_texts()])
leg.remove()
plt.show()

scatter more than one dataframe in the same plot

I'm using a for cycle to scatter more than one dataframe on a same pd.plot.scatterplot, but everytime the cycle return it print a colorbar.
How can I have just one colorbar at the end of the cycle?
This is my code
if colormap is None: colormap='jet'
f,ax = plt.subplots()
for i, data in enumerate(wells):
data.plot.scatter(x,y, c=z, colormap=colormap, ax=ax)
ax.set_xlabel(x); ax.set_xlim(xlim)
ax.set_ylabel(y); ax.set_ylim(ylim)
ax.legend()
ax.grid()
ax.set_title(title)
This can be achieved by using figure and adding the axes to the same subplot:
import pandas as pd
import numpy as np
# created two dataframes with random values
df1 = pd.DataFrame(np.random.rand(25, 2), columns=['a', 'b'])
df2 = pd.DataFrame(np.random.rand(25, 2), columns=['a', 'b'])
And then:
fig = plt.figure()
for i, data in enumerate([df1, df2]):
ax = fig.add_subplot(111)
ax = data.plot.scatter(x='a', y='b', ax=ax,
c='#00FF00' if i == 0 else '#FF0000')
plt.show()
You can add the labels and other elements as required.

Categories