How to find inflection point in python? - python

I have a histogram of an image in RGB which represents the three curves of the three components R, G and B. I want to find the inflection points of each curve. I used the second derivative to find them but I can't, the second derivative does not cancel its returns null. So how can I find the inflection point? Is there any other method to find them?
import os, cv2, random
import numpy as np
import matplotlib.pyplot as plt
import math
from sympy import *
image = cv2.imread('C:/Users/Xers/Desktop/img.jpg')
CHANNELS = ['r', 'g', 'b']
for i, channel in enumerate( CHANNELS ):
histogram = cv2.calcHist([image], [i], None, [256], [0,256])
histogram = cv2.GaussianBlur( histogram, (5,5), 0)
plt.plot(histogram, color = channel)
x= plt.xlim([0,256])
y = plt.ylim([0, 24000])
derivative1= np.diff(histogram, axis=0)
derivative2= np.diff(derivative1, axis=0)
inf_point = np.where ( derivative2 == 0)[0]
print(inf_point)
plt.show()

There are two issues of numerical nature with your code:
the data does not seem to be continuous enough to rely on the second derivative computed from two subsequent np.diff() applications
even if it were, the chances of it being exactly 0 are very slim
To address the first point, you should smooth your histogram (e.g. using a uniform or Gaussian filter on the histogram itself).
To solve the second point, instead of looking for == 0, look for positive-to-negative (and viceversa) switching point.
To give you some minimal example of a possible approach:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
np.random.seed(0)
# generate noisy data
raw = np.cumsum(np.random.normal(5, 100, 1000))
raw /= np.max(raw)
# smooth
smooth = gaussian_filter1d(raw, 100)
# compute second derivative
smooth_d2 = np.gradient(np.gradient(smooth))
# find switching points
infls = np.where(np.diff(np.sign(smooth_d2)))[0]
# plot results
plt.plot(raw, label='Noisy Data')
plt.plot(smooth, label='Smoothed Data')
plt.plot(smooth_d2 / np.max(smooth_d2), label='Second Derivative (scaled)')
for i, infl in enumerate(infls, 1):
plt.axvline(x=infl, color='k', label=f'Inflection Point {i}')
plt.legend(bbox_to_anchor=(1.55, 1.0))

I used the code provided by nook2, but scaling the second derivate to a different range (also it will work for any input data, you don't have to change it every time)
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
np.random.seed(0)
# generate noisy data
raw = np.cumsum(np.random.normal(5, 100, 1000))
raw /= np.max(raw)
# smooth
smooth = gaussian_filter1d(raw, 100)
# compute second derivative
smooth_d2 = np.gradient(np.gradient(smooth))
# find switching points
infls = np.where(np.diff(np.sign(smooth_d2)))[0]
# plot results
plt.plot(raw, label='Noisy Data')
plt.plot(smooth, label='Smoothed Data')
plt.plot(np.max(smooth)*(smooth_d2)/(np.max(smooth_d2)-np.min(smooth_d2)) , label='Second Derivative (scaled)')
for i, infl in enumerate(infls, 1):
plt.axvline(x=infl, color='k', label=f'Inflection Point {i}')
plt.legend(bbox_to_anchor=(1.55, 1.0))

Related

How to find series of highest peaks of a repeating pattern using find_peaks() in Python?

I'm trying to determine the highest peaks of the pattern blocks in the following waveform:
Basically, I need to detect the following peaks only (highlighted):
If I use scipy.find_peaks(), it's unable to detect the appropriate peaks:
indices = find_peaks(my_waveform, prominence = 1)[0]
It ends up detecting all of the following points, which is not what I am looking for:
I can't provide the input arguments of distance or height thresholds to scipy.find_peaks() since there are many of the desired peaks on either extremes which are lower in height than the non-desired peaks in the middle.
Note: I had de-trended the waveform in order to aid this above problem too as you can see in the above snapshot, but it still doesn't give the right results.
So can anyone help with a correct way to tackle this?
Here's the code to fully reproduce the dataset I've shown ("autocorr" is the final waveform of interest)
import json
import sys, os
import numpy as np
import pandas as pd
import glob
import pickle
from statsmodels.tsa.stattools import adfuller, acf, pacf
from scipy.signal import find_peaks, square
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
#GENERATION OF A FUNCTION WITH DUAL SEASONALITY & NOISE
def white_noise(mu, sigma, num_pts):
""" Function to generate Gaussian Normal Noise
Args:
sigma: std value
num_pts: no of points
mu: mean value
Returns:
generated Gaussian Normal Noise
"""
noise = np.random.normal(mu, sigma, num_pts)
return noise
def signal_line_plot(input_signal: pd.Series, title: str = "", y_label: str = "Signal"):
""" Function to plot a time series signal
Args:
input_signal: time series signal that you want to plot
title: title on plot
y_label: label of the signal being plotted
Returns:
signal plot
"""
plt.plot(input_signal)
plt.title(title)
plt.ylabel(y_label)
plt.show()
t_week = np.linspace(1,480, 480)
t_weekend=np.linspace(1,192,192)
T=96 #Time Period
x_weekday = 10*square(2*np.pi*t_week/T, duty=0.7)+10 + white_noise(0, 1,480)
x_weekend = 2*square(2*np.pi*t_weekend/T, duty=0.7)+2 + white_noise(0,1,192)
x_daily_weekly = np.concatenate((x_weekday, x_weekend))
x_daily_weekly_long = np.concatenate((x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly))
signal_line_plot(x_daily_weekly_long)
signal_line_plot(x_daily_weekly_long[0:1000])
#x_daily_weekly_long is the final waveform on which I'm carrying out Autocorrelation
#PERFORMING AUTOCORRELATION:
import scipy.signal as signal
autocorr = signal.correlate(x_daily_weekly_long, x_daily_weekly_long, mode = "same")
lags = signal.correlation_lags(len(x_daily_weekly_long), len(x_daily_weekly_long), mode = "same")
#VISUALIZATION:
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
As you have some kind of double (or even triple) signal, I would attempt a double smoothing.
One to remove the overall trend, and one to remove the sharp noise.
A picture is probably better than a long explanation:
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
def smooth(s, win):
return pd.Series(s).rolling(window=win, center=True).mean().ffill().bfill()
plt.plot(lags, autocorr, label='data')
WINDOW = 100 # needs to be determined empirically
# and so are the multipliers below
# double smoothing difference + clipping
ddiff = np.clip(smooth(autocorr, 2*WINDOW)-smooth(autocorr, 10*WINDOW), 0, np.inf)
plt.plot(lags, ddiff, label='smooth+clip')
peaks = find_peaks(ddiff, width=WINDOW)[0]
plt.plot(lags[peaks], autocorr[peaks], marker='o', ls='')
plt.plot(lags[peaks], ddiff[peaks], marker='o', ls='')
plt.legend()
output:
smoothing the original signal
As often in data analysis, the earlier you perform a transformation might be the better. You could also clean your original signal before running the autocorrelation. Here is a quick example (using the smooth function defined above):
from scipy.signal import find_peaks
x2 = smooth(x_daily_weekly_long, 100)
autocorr2 = signal.correlate(x2, x2, mode = "same")
plt.plot(lags, autocorr2)
idx = find_peaks(autocorr2)[0]
plt.plot(lags[idx], autocorr2[idx], marker='o', ls='')
cleaned signal:
For testing purposes i used a rough reconstruction of your signal.
import numpy as np
from scipy.signal import find_peaks, square
import matplotlib.pyplot as plt
x = np.linspace(3,103,10000)
sin = np.clip(np.sin(0.6*x)-0.5,0,10)
tri = np.concatenate([np.linspace(0,0.3,5000),np.linspace(0.3,0,5000)],axis =0)
sig = np.sin(6*x-1.2)
full = sin+tri+sig
peak run #1
peaks = find_peaks(full)[0]
plt.plot(full)
plt.scatter(peaks,full[peaks], color='red', s=5)
plt.show()
peak run #2 + index reextraction (this needs the actual values from your signal)
peaks2 = find_peaks(full[peaks])[0]
index = peaks[peaks2]
plt.plot(full)
plt.scatter(index,full[index], color='red', s=5)
plt.show()
If you know the period you can do this:
w=T
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], autocorr[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], color="r")
Result:
I don't know if it works in other cases.
EDIT:
another approach is to find the maximum in a slicing window, but also in this case you must define empirically a window size.
w=900
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[filters.maximum_filter(autocorr, size=W)==autocorr], autocorr[filters.maximum_filter(autocorr, size=W)==autocorr], color="r")
Result:

Calculating the area under multiple Peaks using Python

My problem is calculating the area under the peaks in my FT-IR analysis. I usually work with Origin but I would like to see if I get a better result working with Python. The data I'm using is linked here and the code is below. The problem I'm facing is, I don't know how to find the start and the end of the peak to calculate the area and how to set a Baseline.
I found this answered question about how to calculate the area under multiple peaks but I don't know how to implement it in my code: How to get value of area under multiple peaks
import numpy as np
from numpy import trapz
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv(r'CuCO3.csv', skiprows=5)
print(df)
Wavenumber = df.iloc[:,0]
Absorbance = df.iloc[:,1]
Wavenumber_Peak = Wavenumber.iloc[700:916] #Where the peaks start/end that i want to calculate the area
Absorbance_Peak = Absorbance.iloc[700:916] #Where the peaks start/end that i want to calculate the area
plt.figure()
plt.plot(Wavenumber_Peak, Absorbance_Peak)
plt.show()
Plot of the peaks to calculate the area:
Okay, I have quickly added the code from the other post to your beginning and checked that it works. Unfortunately, the file that you linked did not work with your code, so I had to change some stuff in the beginning to make it work (in a very unelegant way, because I do not really know how to work with dataframes). If your local file is different and processing the file in this way does not work, then just exchange my beginning by yours.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import peakutils
df = pd.read_csv(r'CuCO3.csv', skiprows=5)
data = np.asarray([[float(y) for y in x[0].split(",")] for x in df.to_numpy()])
Wavenumber = np.arange(700, 916)
Absorbance = data[700:916,1]
indices = peakutils.indexes(Absorbance, thres=0.35, min_dist=0.1)
peak_values = [Absorbance[i] for i in indices]
peak_Wavenumbers = [Wavenumber[i] for i in indices]
plt.figure()
plt.scatter(peak_Wavenumbers, peak_values)
plt.plot(Wavenumber, Absorbance)
plt.show()
ixpeak = Wavenumber.searchsorted(peak_Wavenumbers)
ixmin = np.array([np.argmin(i) for i in np.split(Absorbance, ixpeak)])
ixmin[1:] += ixpeak
mins = Wavenumber[ixmin]
# split up the x and y values based on those minima
xsplit = np.split(Wavenumber, ixmin[1:-1])
ysplit = np.split(Absorbance, ixmin[1:-1])
# find the areas under each peak
areas = [np.trapz(ys, xs) for xs, ys in zip(xsplit, ysplit)]
# plotting stuff
plt.figure(figsize=(5, 7))
plt.subplots_adjust(hspace=.33)
plt.subplot(211)
plt.plot(Wavenumber, Absorbance, label='trace 0')
plt.plot(peak_Wavenumbers, Absorbance[ixpeak], '+', c='red', ms=10, label='peaks')
plt.plot(mins, Absorbance[ixmin], 'x', c='green', ms=10, label='mins')
plt.xlabel('dep')
plt.ylabel('indep')
plt.title('Example data')
plt.ylim(-.1, 1.6)
plt.legend()
plt.subplot(212)
plt.bar(np.arange(len(areas)), areas)
plt.xlabel('Peak number')
plt.ylabel('Area under peak')
plt.title('Area under the peaks of trace 0')
plt.show()

Discontinuos, non-monotonic x-axis on contourf

I am plotting a 3D shape in spherical coordinates. In order to rotate it, I am shifting the phi values by 30 deg as phi_lin and phi_rot show in in the following code. I would expect the result in panel 4 to have the same distribution of panel 2, but rigidly shifted to the right by 30 degrees.
I guess, the problem is that plotting function countorf cannot deal with the phi_rot input vector since it is non-monotonic. It is possible to see in panel 3 the discontinuity du the shifting. How can I overcome this problem?
Here a working code:
import glob
import math
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import LightSource
%matplotlib inline
import itertools
def ellips(THETA,PHI):
"""
#Definiton of the ellipsoid
# from https://arxiv.org/pdf/1104.5145.pdf
"""
a=1; b=2; c=3
R = (a*b*c) / np.sqrt(b**2*c**2*np.cos(THETA)**2 + c**2*a**2*np.sin(THETA)**2*np.cos(PHI)**2 + a**2*b**2*np.sin(THETA)**2*np.sin(PHI)**2)
return np.array(R)
nth=13
theta = np.linspace(0, np.pi, nth)
#length = 13
phi_lin=[-180,-150,-120,-90,-60,-30,0,30,60,90,120,150,180]
phi_rot=[-150,-120,-90,-60,-30,0,30,60,90,120,150,180,-180]
THETA_lin, PHI_lin = np.meshgrid(theta, phi_lin)
THETA_rot, PHI_rot = np.meshgrid(theta, phi_rot)
THETA_deg_lin=[el*180/np.pi for el in THETA_lin]
THETA_deg_rot=[el*180/np.pi for el in THETA_rot]
PHI_deg_lin=[el for el in PHI_lin]
PHI_deg_rot=[el for el in PHI_rot]
fig1, ax = plt.subplots(2,2, figsize=(15,15), constrained_layout=True)
ax[0,0].plot(PHI_deg_lin, "o")
ax[0,0].set_xlabel("# element")
ax[0,0].set_ylabel('phi [DEG]')
ax[0,0].set_title("initial coordinates")
ax[0,1].contourf(PHI_deg_lin, THETA_deg_lin, ellips(THETA_deg_lin,PHI_deg_lin).reshape(len(phi_lin),nth))
ax[0,1].set_xlabel('phi [DEG]')
ax[0,1].set_ylabel('theta [DEG]')
ax[0,1].set_title("Original ellipsoind in spherical coordinates")
ax[1,0].plot(PHI_deg_rot, "o")
ax[1,0].set_xlabel("# element")
ax[1,0].set_ylabel('phi [DEG]')
ax[1,0].set_title("shifted coordinates")
ax[1,1].contourf(PHI_deg_rot, THETA_deg_rot, ellips(THETA_deg_rot,PHI_deg_rot).reshape(len(phi_rot),nth))
ax[1,1].set_xlabel('phi [DEG]')
ax[1,1].set_ylabel('theta [DEG]')
ax[1,1].set_title("Original ellipsoind in spherical coordinates")
and the output:
UPDATE: I tried to create an interpolation function z=f(x,y) with the rotated coordinates and to plot the new z:
from scipy import interpolate
i2d = interpolate.interp2d(theta, phi_rot, ellips(THETA_deg_rot,PHI_deg_rot))
znew = i2d(theta,phi_lin)
ax[1,1].contourf(PHI_deg_rot, THETA_deg_rot,znew.reshape(len(phi_rot),nth))
the shifting occurs as you can see in the following output, but the non linearly-spaced x axis prevents to have a smooth contour:
any idea how to fix it?
The solution has been inspired by this post.
Since contourf doesn´t accept non-linearly-spaced axis, it is necessary to interpolate the rotated data
from scipy import interpolate
i2d = interpolate.interp2d(theta, phi_rot, ellips(THETA_deg_rot,PHI_deg_rot))
evaluate it on the same axis (lin or rot doesn´t matter at this point)
znew = i2d(theta,phi_lin)
and plotting it using the tricontourf with a suitable numner of levels
ax[1,1].tricontourf(np.array(PHI_deg_rot).reshape(-1), np.array(THETA_deg_rot).reshape(-1),znew.reshape(-1),10)
the output is the expected one:

is there a simple method to smooth a curve without taking into account future values and without a time shift?

I have a Unix time series (x) with an associated signal value (y) which is generated every minute, dropping the first value and appending a new one. I am trying to smooth the resulting curve without loosing time accuracy with a specific emphasis on the final value of the smoothed curve which will be written to a database. I would like to be able to adjust the smoothing to a considerable degree.
I have studied (as mathematical layman, more or less) all options I could find and I could master. I came across Savitzki Golay which looked perfect until I realized it works well on past data but fails to produce a reliable final value if no future data is available for smoothing. I have tried many other methods which produced results but could not be adjusted like Savgol.
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
from math import pi
from scipy.signal import savgol_filter
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from scipy.interpolate import splrep, splev
from scipy.ndimage import gaussian_filter1d
from scipy.signal import lfilter
from scipy.interpolate import UnivariateSpline
import matplotlib.pyplot as plt
df_sim = pd.read_csv("/home/20190905_Signal_Smooth_Test.csv")
#sklearn Polynomial*****************************************
poly = PolynomialFeatures(degree=4)
X = df_sim.iloc[:, 0:1].values
print(X)
y = df_sim.iloc[:, 1].values
print(y)
X_poly = poly.fit_transform(X)
poly.fit(X_poly, y)
lin2 = LinearRegression()
lin2.fit(X_poly, y)
# Visualising the Polynomial Regression results
plt.scatter(X, y, color='blue')
plt.plot(X, lin2.predict(poly.fit_transform(X)), color='red')
plt.title('Polynomial Regression')
plt.xlabel('Time')
plt.ylabel('Signal')
plt.show()
#scipy interpolate********************************************
bspl = splrep(df_sim['timestamp'], df_sim['signal'], s=5)
bspl_y = splev(df_sim['timestamp'], bspl)
df_sim['signal_spline'] = bspl_y
#scipy gaussian filter****************************************
smooth = gaussian_filter1d(df_sim['signal'], 3)
df_sim['signal_gauss'] = smooth
#scipy lfilter************************************************
n = 5 # the larger n is, the smoother curve will be
b = [1.0 / n] * n
a = 1
histo_filter = lfilter(b, a, df_sim['signal'])
df_sim['signal_lfilter'] = histo_filter
print(df_sim)
#scipy UnivariateSpline**************************************
s = UnivariateSpline(df_sim['timestamp'], df_sim['signal'], s=5)
xs = df_sim['timestamp']
ys = s(xs)
df_sim['signal_univariante'] = ys
#scipy savgol filter****************************************
sg = savgol_filter(df_sim['signal'], 11, 3)
df_sim['signal_savgol'] = sg
df_sim['date'] = pd.to_datetime(df_sim['timestamp'], unit='s')
#plotting it all********************************************
print(df_sim)
w = 60000
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, plot_height=250,
title=f"Various Signals y vs Timestamp x")
p.xaxis.major_label_orientation = pi / 4
p.grid.grid_line_alpha = 0.9
p.line(x=df_sim['date'], y=df_sim['signal'], color='green')
p.line(x=df_sim['date'], y=df_sim['signal_spline'], color='blue')
p.line(x=df_sim['date'], y=df_sim['signal_gauss'], color='red')
p.line(x=df_sim['date'], y=df_sim['signal_lfilter'], color='magenta')
p.line(x=df_sim['date'], y=df_sim['signal_univariante'], color='yellow')
p1 = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, plot_height=250,
title=f"Savgol vs Signal")
p1.xaxis.major_label_orientation = pi / 4
p1.grid.grid_line_alpha = 0.9
p1.line(x=df_sim['date'], y=df_sim['signal'], color='green')
p1.line(x=df_sim['date'], y=df_sim['signal_savgol'], color='blue')
output_file("signal.html", title="Signal Test")
show(column(p, p1)) # open a browser
I expect a result that is similar to Savitzky Golay but with valid final smoothed values for the data series. None of the other methods present the same flexibility to adjust the grade of smoothing. Most other methods shift the curve to the right. I can provide to csv file for testing.
This really depends on why you are smoothing the data. Every smoothing method will have side effects, such as letting some 'noise' through more than other. Research 'phase response of filtering'.
A common technique to avoid the problem of missing data at the end of a symmetric filter is to just forecast your data a few points ahead and use that. For example, if you are using a 5-term moving average filter you will be missing 2 data points when you go to calculate your end value.
To forecast these two points, you could use the auto_arima() function from the pmdarima module, or look at the fbprophet module (which I find quite good for this kind of situation).

What is the source of discrepancy in 2D interpolated spectrogram with matplotlib?

I am trying to interpolate spectrogram obtained from matplotlib using scipy's inetrp2d function, but somehow fail to get the same spectrogram. The data is available here
The actual spectrogram is:
And interpolated spectrogram is:
The code looks okay, but even then something is wrong. The code used is:
from __future__ import division
from matplotlib import ticker as mtick
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import numpy as np
from bisect import bisect
from scipy import interpolate
from matplotlib.ticker import MaxNLocator
data = np.genfromtxt('spectrogram.dat', skiprows = 2, delimiter = ',')
pressure = data[:, 1] * 0.065
time = data[:, 0]
cax = plt.specgram(pressure * 100000, NFFT = 256, Fs = 50000, noverlap=4, cmap=plt.cm.gist_heat, zorder = 1)
f = interpolate.interp2d(cax[2], cax[1], cax[0], kind='cubic')
xnew = np.linspace(cax[2][0], cax[2][-1], 100)
ynew = np.linspace(cax[1][0], cax[1][-1], 100)
znew = 10 * np.log10(f(xnew, ynew))
fig = plt.figure(figsize=(6, 3.2))
ax = fig.add_subplot(111)
ax.set_title('colorMap')
plt.pcolormesh(xnew, ynew, znew, cmap=plt.cm.gist_heat)
# plt.colorbar()
plt.title('Interpolated spectrogram')
plt.colorbar(orientation='vertical')
plt.savefig('interp_spectrogram.pdf')
How to interpolate a spectrogram correctly with Python?
The key to your solution is in this warning, which you may or may not have seen:
RuntimeWarning: invalid value encountered in log10
znew = 10 * np.log10(f(xnew, ynew))
If your data is actually a power whose log you'd like to view explicitly as decibel power, take the log first, before fitting to the spline:
spectrum, freqs, t, im = cax
dB = 10*np.log10(spectrum)
#f = interpolate.interp2d(t, freqs, dB, kind='cubic') # docs for this recommend next line
f = interpolate.RectBivariateSpline(t, freqs, dB.T) # but this uses xy not ij, hence the .T
xnew = np.linspace(t[0], t[-1], 10*len(t))
ynew = np.linspace(freqs[0], freqs[-1], 10*len(freqs)) # was it wider spaced than freqs on purpose?
znew = f(xnew, ynew).T
Then plotting as you have:
Previous answer:
If you just want to plot on logscale, use matplotlib.colors.LogNorm
znew = f(xnew, ynew) # Don't take the log here
plt.figure(figsize=(6, 3.2))
plt.pcolormesh(xnew, ynew, znew, cmap=plt.cm.gist_heat, norm=colors.LogNorm())
And that looks like this:
Of course that still has gaps where its value is negative when plotted on a log scale. What your data means to you when the value is negative should dictate how you fill this in. One simple solution is to just set those values to the smallest positive value and they'd fill in as black:

Categories