Cross correlating with time lag using python

Cross correlating with time lag using python - python

fd = pd.read_csv('data1.csv', sep=r'\s*,\s*',header=0, encoding='ascii', engine='python')
dv = pd.read_csv('data2.csv', sep=";",header=0, engine='python')
x = fd.iloc[:,1]
print (x)
y = dv.iloc[:,1]
fdmean = x.mean(axis=0)
dvmean = y.mean(axis=0)
multi1list = []
k3list = []
k4list = []
k1list = []
k2list = []
L=-1
i = 0
while i < 97:
x = (fd.iloc[i, 1])
j = i-L
print(j)
y = ( dv.iloc[i-L,1])
print (y)
k1 = x - fdmean
k1list.append(k1)
k2 = y - dvmean
k2list.append(k2)
multi1 = k1*k2
multi1list.append(multi1)
k3 = (x-fdmean)
k3 = k3*2
k3list.append(k3)
k4 = (y-dvmean)
k4 = k4*2
k4list.append(k4)
i = i+1
Summulti = sum(multi1list)
Sumk3 = sum(k3list)
Sumk4 = sum(k4list)
Sumk = Sumk3*Sumk4
Sumk = np.sqrt(Sumk)
r = Summulti/Sumk
print (r)
n = 97 - abs(L)
t = r*(np.sqrt((n-2)/(1-(r**2))))
print (t)
I'm trying to calculate the cross correlation manually after Dawis (1986). I correlated my data with the software Past before and I want to reproduce my results with Python. I have a lot more datasets now so manually correlating them in Past isn't an option any more and I need to use loops. Unfortunately I can't seem to get any plausible results with my code. Can someone spot the error? The t (pvalues) isn't even between 1 and 0.
Thanks a lot!

Related

Performing Excel formula in Python

import numpy
K = 1
Rmv = 26
SigS = 111.7
M = 2.050
N = 2
SigD = (-249.4)
def Mittelspannung():
result = []
Y = []
SigM = []
for i in range(1,31):
output = 1 - pow((((i-1)*1/14.5)-1),2)
#Z = Rmv - (output*Rmv)
result.append(output)
#print(output)
for value in range(0,15):
C4 = (Rmv) - (result[value]) * (Rmv)
Y.append(C4)
print(C4)
for value in range(15,30):
B11 = (SigD) - (result[value]) * (SigD)
Y.append(B11)
print(B11)
for x in range(0,30):
SigMean = pow(SigS,M) * pow(1-(pow(Y[x]+SigS,N)/(pow(Rmv+SigS,N))),1/M)
SigM.append(SigMean)
#print(Y[value])
return SigM
print(Mittelspannung())
From the above script I can execute the output same as excel. But, the last for loop I couldn't get the output same as Excel. I also mentioned the Excel formula in my code.
The output for SigM in Excel is 25.8 but in Python I'm getting 3650.9824739444566.

How to structure python programs? Tried making it more structured, now runs 13 times slower

Im very new to programming, I wrote a simple program for a school project and wanted to make the code "prettier" by not just having the program be one giant function but instead be made up of multiple smaller functions with a singe purpose. I seemed to have messed up royally since the program now runs 13 times slower. How should I structured the program to make it run faster and just in general make programs easier to write, read and edit?
Here are the two programs:
First program (for reference values runs in ≈0:20):
import numpy as np
import matplotlib.pyplot as plt
def graf(a,b,H,p):
GM = 39.5216489684
x_0 = a + np.sqrt(a**2 - b**2)
v_0 = np.sqrt(GM*(2/x_0 - 1/a))
konstant_period = np.sqrt(a**3)*H
h = 1/H
'''starting position given by an elliptic orbit '''
stor_x_lista = [x_0]
stor_y_lista = [0]
hastighet_x = [0]
hastighet_y = [v_0]
liten_x_lista = []
liten_y_lista = []
''' a loop that approximates the points of the orbit'''
t = 0
tid_lista = []
n = 0
while n < konstant_period:
hastighet_x.append(hastighet_x[n] - h*GM* stor_x_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_x_lista.append(stor_x_lista[n] + h*hastighet_x[n])
hastighet_y.append(hastighet_y[n] - h*GM*stor_y_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_y_lista.append(stor_y_lista[n] + h*hastighet_y[n])
'''smaller list of points to run faster'''
if n % p == 0:
liten_x_lista.append(stor_x_lista[n])
liten_y_lista.append(stor_y_lista[n])
tid_lista.append(t)
n += 1
t += h
''' function that finds the angle'''
vinkel = []
siffra = 0
while siffra < len(liten_x_lista):
if liten_y_lista[siffra ] >= 0:
vinkel.append( np.arccos( liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2)))
siffra += 1
elif liten_y_lista[siffra] < 0 :
vinkel.append( np.pi + np.arccos( -liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2) ))
siffra += 1
'''get rid of line to find periodic function'''
mod_lista = []
modn = 0
while modn < len(vinkel):
mod_lista.append(vinkel[modn] - (2*np.pi*tid_lista[modn])/np.sqrt(a**3))
modn += 1
'''make all inputs have period 1'''
squeeze_tid = []
squeezen = 0
while squeezen < len(tid_lista):
squeeze_tid.append(tid_lista[squeezen]/np.sqrt(a**3))
squeezen += 1
del mod_lista[-1:]
del tid_lista[-1:]
del squeeze_tid[-1:]
plt.plot(squeeze_tid,mod_lista)
plt.title('p(t) där a = ' + str(a) + ' och b = ' + str(b))
plt.show
Second more split up program (for reference values runs in ≈4:20):
import numpy as np
import matplotlib.pyplot as plt
'''function that generates the points of the orbit'''
def punkt(a,b,H,p):
GM = 39.5216489684
x_0 = a + np.sqrt(a**2 - b**2)
v_0 = np.sqrt(GM*(2/x_0 - 1/a))
konstant_period = np.sqrt(a**3)*H
h = 1/H
'''starting position given by an elliptic orbit '''
stor_x_lista = [x_0]
stor_y_lista = [0]
hastighet_x = [0]
hastighet_y = [v_0]
liten_x_lista = []
liten_y_lista = []
''' a loop that approximates the points of the orbit'''
t = 0
tid_lista = []
n = 0
while n < konstant_period:
hastighet_x.append(hastighet_x[n] - h*GM* stor_x_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_x_lista.append(stor_x_lista[n] + h*hastighet_x[n])
hastighet_y.append(hastighet_y[n] - h*GM*stor_y_lista[n]/(np.sqrt(stor_x_lista[n]**2 + stor_y_lista[n]**2))**3)
stor_y_lista.append(stor_y_lista[n] + h*hastighet_y[n])
'''smaller list of points to run faster'''
if n % p == 0:
liten_x_lista.append(stor_x_lista[n])
liten_y_lista.append(stor_y_lista[n])
tid_lista.append(t)
n += 1
t += h
return (liten_x_lista,liten_y_lista,tid_lista)
''' function that finds the angle'''
def vinkel(a,b,H,p):
'''import lists'''
liten_x_lista = punkt(a,b,H,p)[0]
liten_y_lista = punkt(a,b,H,p)[1]
tid_lista = punkt(a,b,H,p)[2]
'''find the angle'''
vinkel_lista = []
siffra = 0
while siffra < len(liten_x_lista):
if liten_y_lista[siffra ] >= 0:
vinkel_lista.append( np.arccos( liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2)))
siffra += 1
elif liten_y_lista[siffra] < 0 :
vinkel_lista.append( np.pi + np.arccos( -liten_x_lista[siffra]/np.sqrt( liten_x_lista[siffra]**2 + liten_y_lista[siffra]**2) ))
siffra += 1
return (vinkel_lista, tid_lista)
def periodisk(a,b,H,p):
'''import lists'''
tid_lista = vinkel(a,b,H,p)[1]
vinkel_lista = vinkel(a,b,H,p)[0]
'''get rid of linear line to find p(t)'''
mod_lista = []
modn = 0
while modn < len(vinkel_lista):
mod_lista.append((vinkel_lista[modn] - (2*np.pi*tid_lista[modn])/np.sqrt(a**3)))
modn += 1
'''make all inputs have period 1'''
squeeze_tid = []
squeezen = 0
while squeezen < len(tid_lista):
squeeze_tid.append(tid_lista[squeezen]/np.sqrt(a**3))
squeezen += 1
del mod_lista[-1:]
del tid_lista[-1:]
del squeeze_tid[-1:]
return (squeeze_tid,mod_lista)
'''fixa 3d-punkt av p(a,b) a är konstant b varierar??? '''
def hitta_amp(a):
x_b = []
y_b = []
n_b = 0.1
while n_b <= a:
x_b.append(n_b)
y_b.append(punkt(a,n_b,10**5,10**3))
return 0
def graf(a,b,H,p):
plt.plot(periodisk(a,b,H,p)[0],periodisk(a,b,H,p)[1])
plt.show
I would assume the thing that is going wrong is that the program is running the same, slow code multiple times instead of just running it once and then accessing the data. Is the problem that everything is done locally and nothing is stored globally or is it something else?
Just as a heads up, the only thing I know about programming is basic syntax, I have no clue how to actually write and run programs. I ran all the code in spyder if that affects anything.

plt.plot(periodisk(a,b,H,p)[0],periodisk(a,b,H,p)[1])
This code runs periodisk twice with the same arguments, thus at this point we know we run things at least 2 times slower.
You should do some_var = periodisk(a,b,H,p) and then some_var[0], some_var[1]. Or just use unpacking:
plt.plot(*periodisk(a,b,H,p))
tid_lista = vinkel(a,b,H,p)[1]
vinkel_lista = vinkel(a,b,H,p)[0]
Again doing the same thing twice (total: 4*time of (current) vinkel function). Again, smart assignment to fix this:
vinkel_lista, tid_lista = vinkel(a,b,H,p)
liten_x_lista = punkt(a,b,H,p)[0]
liten_y_lista = punkt(a,b,H,p)[1]
tid_lista = punkt(a,b,H,p)[2]
And now you repeat yourself thrice. (total: 12 * time of current punkt function)
liten_x_lista, liten_y_lista, tid_lista = punkt(a,b,H,p)
punkt function is like in original, so we arrived as total being 12 times slower - which quite matches your time estimations. :)

You are calling the functions once per returned list, you should only call them once.
When a method returns multiple variables, (e.g. punkt):
def punkt(a,b,H,p):
# Here is all your code
return (liten_x_lista,liten_y_lista,tid_lista)
You must be careful to only call the function once:
result = punkt(a,b,H,p)
liten_x_lista = result[0]
liten_y_lista = result[1]
tid_lista = result[2]
# As opposed to:
liten_x_lista = punkt(a,b,H,p)[0] # 1st call, ignoring results 2 and 3
liten_y_lista = punkt(a,b,H,p)[1] # 2nd call, ignoring results 1 and 3
tid_lista = punkt(a,b,H,p)[2] # 3rd call, ignoring results 1 and 2
Note: I would personally not return a list, but use python's unpacking:
def punkt(a,b,H,p):
# Here is all your code
return liten_x_lista, liten_y_lista, tid_lista
And you'd access it:
liten_x_lista, liten_y_lista, tid_lista = punkt(a,b,H,p)

How to parallelize a while loop?

I am attempting to perform a value function iteration (for an Aiyagari model). The loop I am hoping to optimize is here (EDITED TO INCLUDE MWE):
beta = 0.95
r =0.03
a_lb = -1.5
a_ub = 10
y_l = 0.9
y_h = 1.1
yGrid = [y_l, y_h]
aSz = 100
nstates = 2
V0 = np.zeros((nstates, aSz))
V1 = np.zeros((nstates, aSz))
aPol = np.zeros((nstates, aSz))
Tol = 0.0001
Iter_max = 300
err = 1.0
PI = np.matrix([[0.5, 0.5],[0.09, 0.91]])
aGrid = np.linspace(a_lb, a_ub, num = aSz)
#
#
#
Iter = 0
while (err> Tol) and (Iter < Iter_max):
V0l= intp.interp1d(aGrid, V0[0,:])
V0h= intp.interp1d(aGrid, V0[1,:])
for a_today in range(aSz):
for yix in range(nstates):
def objective(a_tomorrow):
c = yGrid[yix] + (1+r)*aGrid[a_today] - a_tomorrow
exp_cont_Val = PI[yix,0] * V0l(a_tomorrow) + PI[yix,1] * V0h(a_tomorrow)
return -(-1/c + beta* exp_cont_Val)
minima_val = opt.fminbound(objective , a_lb, min(a_ub, yGrid[yix] + (1+r) *aGrid[a_today] -0.00001))
aPol[yix, a_today] = minima_val
V1[yix, a_today] = -objective(aPol[yix, a_today])
err = (abs(V1-V0)).max()
Iter = Iter+1
V0=V1.copy()
print('Iteration ' + str( Iter) + ' with error ' + str( err))
This is a part of a much bigger loop that uses a bisection method to find a value for a variable r.
First I guess an arbitrary value for r and use it to fill the values in the c array. From my testing, this first part of the loop is very fast. For the second part (given above) I think the opt.fminbound has the most overhead. I tried to use jit, but I kept getting error messages, I would appreciate any insight.

Condensing repeat code with a "for" statement using strings - Python

I am very new with "for" statements in Python, and I can't get something that I think should be simple to work. My code that I have is:
import pandas as pd
df1 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
df2 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
df3 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
DF1 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
DF2 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
DF3 = pd.DataFrame({'Column1' : pd.Series([1,2,3,4,5,6])})
Then:
A1 = len(df1.loc[df1['Column1'] <= DF1['Column1'].iloc[2]])
Z1 = len(df1.loc[df1['Column1'] >= DF1['Column1'].iloc[3]])
A2 = len(df2.loc[df2['Column1'] <= DF2['Column1'].iloc[2]])
Z2 = len(df2.loc[df2['Column1'] >= DF2['Column1'].iloc[3]])
A3 = len(df3.loc[df3['Column1'] <= DF3['Column1'].iloc[2]])
Z3 = len(df3.loc[df3['Column1'] >= DF3['Column1'].iloc[3]])
As you can see, it is a lot of repeat code with just the identifying numbers being different. So my first attempt at a "for" statement was:
Numbers = [1,2,3]
for i in Numbers:
"A" + str(i) = len("df" + str(i).loc["df" + str(i)['Column1'] <= "DF" + str(i)['Column1'].iloc[2]])
"Z" + str(i) = len("df" + str(i).loc["df" + str(i)['Column1'] >= "DF" + str(i)['Column1'].iloc[3]])
This yielded the SyntaxError: "can't assign to operator". So I tried:
Numbers = [1,2,3]
for i in Numbers:
A = "A" + str(i)
Z = "Z" + str(i)
A = len("df" + str(i).loc["df" + str(i)['Column1'] <= "DF" + str(i)['Column1'].iloc[2]])
Z = len("df" + str(i).loc["df" + str(i)['Column1'] >= "DF" + str(i)['Column1'].iloc[3]])
This yielded the AttributeError: 'str' object has no attribute 'loc'. I tried a few other things like:
Numbers = [1,2,3]
for i in Numbers:
A = "A" + str(i)
Z = "Z" + str(i)
df = "df" + str(i)
DF = "DF" + str(i)
A = len(df.loc[df['Column1'] <= DF['Column1'].iloc[2]])
Z = len(df.loc[df['Column1'] <= DF['Column1'].iloc[3]])
But that just gives me the same errors. Ultimately what I would want is something like:
Numbers = [1,2,3]
for i in Numbers:
Ai = len(dfi.loc[dfi['Column1'] <= DFi['Column1'].iloc[2]])
Zi = len(dfi.loc[dfi['Column1'] <= DFi['Column1'].iloc[3]])
Where the output would be equivalent if I typed:
A1 = len(df1.loc[df1['Column1'] <= DF1['Column1'].iloc[2]])
Z1 = len(df1.loc[df1['Column1'] >= DF1['Column1'].iloc[3]])
A2 = len(df2.loc[df1['Column1'] <= DF2['Column1'].iloc[2]])
Z2 = len(df2.loc[df1['Column1'] >= DF2['Column1'].iloc[3]])
A3 = len(df3.loc[df3['Column1'] <= DF3['Column1'].iloc[2]])
Z3 = len(df3.loc[df3['Column1'] >= DF3['Column1'].iloc[3]])

It is "restricted" to generate variables in for loop (you can do that, but it's better to avoid. See other posts: post_1, post_2).
Instead use this code to achieve your goal without generating as many variables as your needs (actually generate only the values in the for loop):
# Lists of your dataframes
Hanimals = [H26, H45, H46, H47, H51, H58, H64, H65]
Ianimals = [I26, I45, I46, I47, I51, I58, I64, I65]
# Generate your series using for loops iterating through your lists above
BPM = pd.DataFrame({'BPM_Base':pd.Series([i_a for i_a in [len(i_h.loc[i_h['EKG-evt'] <=\
i_i[0].iloc[0]]) / 10 for i_h, i_i in zip(Hanimals, Ianimals)]]),
'BPM_Test':pd.Series([i_z for i_z in [len(i_h.loc[i_h['EKG-evt'] >=\
i_i[0].iloc[-1]]) / 30 for i_h, i_i in zip(Hanimals, Ianimals)]])})
UPDATE
A more efficient way (iterate over "animals" lists only once):
# Lists of your dataframes
Hanimals = [H26, H45, H46, H47, H51, H58, H64, H65]
Ianimals = [I26, I45, I46, I47, I51, I58, I64, I65]
# You don't need using pd.Series(),
# just create a list of tuples: [(A26, Z26), (A45, Z45)...] and iterate over it
BPM = pd.DataFrame({'BPM_Base':i[0], 'BPM_Test':i[1]} for i in \
[(len(i_h.loc[i_h['EKG-evt'] <= i_i[0].iloc[0]]) / 10,
len(i_h.loc[i_h['EKG-evt'] >= i_i[0].iloc[-1]]) / 30) \
for i_h, i_i in zip(Hanimals, Ianimals)])

Figured out a better way to do this that fits my needs. This is mainly so that I will be able to find my method.
# Change/Add animals and conditions here, make sure they match up directly
Animal = ['26','45','46','47','51','58','64','65', '69','72','84']
Cond = ['Stomach','Intestine','Stomach','Stomach','Intestine','Intestine','Intestine','Stomach','Cut','Cut','Cut']
d = []
def CuSO4():
for i in Animal:
# load in Spike data
A = pd.read_csv('TXT/INJ/' + i + '.txt',delimiter=r"\s+", skiprows = 15, header = None, usecols = range(1))
B = pd.read_csv('TXT/EKG/' + i + '.txt', skiprows = 3)
C = pd.read_csv('TXT/ESO/' + i + '.txt', skiprows = 3)
D = pd.read_csv('TXT/TRACH/' + i + '.txt', skiprows = 3)
E = pd.read_csv('TXT/BP/' + i + '.txt', delimiter=r"\s+").rename(columns={"4 BP": "BP"})
# Count number of beats before/after injection, divide by 10/30 minutes for average BPM.
F = len(B.loc[B['EKG-evt'] <= A[0].iloc[0]])/10
G = len(B.loc[B['EKG-evt'] >= A[0].iloc[-1]])/30
# Count number of esophogeal events before/after injection
H = len(C.loc[C['Eso-evt'] <= A[0].iloc[0]])
I = len(C.loc[C['Eso-evt'] >= A[0].iloc[-1]])
# Find Trach events after injection
J = D.loc[D['Trach-evt'] >= A[0].iloc[-1]]
# Count number of breaths before/after injection, divide by 10/30 min for average breaths/min
K = len(D.loc[D['Trach-evt'] <= A[0].iloc[0]])/10
L = len(J)/30
# Use Trach events from J to find the number of EE
M = pd.DataFrame(pybursts.kleinberg(J['Trach-evt'], s=4, gamma=0.1))
N = M.last_valid_index()
# Use N and M to determine the latency, set value to MaxTime (1800s)if EE = 0
O = 1800 if N == 0 else M.iloc[1][1] - A[0].iloc[-1]
# Find BP value before/after injection, then determine the mean value
P = E.loc[E['Time'] <= A[0].iloc[0]]
Q = E.loc[E['Time'] >= A[0].iloc[-1]]
R = P["BP"].mean()
S = Q["BP"].mean()
# Combine all factors into one DF
d.append({'EE' : N, 'EE-lat' : O,
'BPM_Base' : F, 'BPM_Test' : G,
'Eso_Base' : H, 'Eso_Test' : I,
'Trach_Base' : K, 'Trach_Test' : L,
'BP_Base' : R, 'BP_Test' : S})
CuSO4()
# Create shell DF with animal numbers and their conditions.
DF = pd.DataFrame({'Animal' : pd.Series(Animal), 'Cond' : pd.Series(Cond)})
# Pull appended DF from CuSO4 and make it a pd.DF
Df = pd.DataFrame(d)
# Combine the two DF's
df = pd.concat([DF, Df], axis=1)
df

Loop gives correct output if script run in steps but not when skript runs from scratch

My script contains a while loop:
import numpy as np
ptf = 200 #profiltiefe
dz = 5
DsD0 = 0.02
D0 = 0.16 #cm2/sec bei 20°C
Ds= D0 * DsD0
eps= 0.3
R= 8.314
Ptot=101300
Te = 20
dt = 120
modellzeit = 86400*3
J=modellzeit/dt
PiA = 0.04
CA = PiA*1000/Ptot
respannual = 10 #t C ha-1 a-1
respmol = respannual/12*10**6/10000/(365*24)
respvol_SI = respmol * R * (Te+273)/(Ptot*3600)
respvol = respvol_SI * 100
I= ptf/dz
S = np.zeros(40)
for i in range(40):
if i <= 4:
S[i] = respvol/(2*4*dz)
if i > 4 and i <= 8:
S[i] = respvol/(4*4*dz)
if i > 8 and i <= 16:
S[i] = respvol/(8*4*dz)
Calt = np.repeat(CA,len(range(int(I+1))))
Cakt = Calt.copy()
res_out = range(1,int(J),1)
Cresult = np.array(Cakt)
faktor = dt*Ds/(dz*dz*eps)
timestep=0
#%%
while timestep <= J:
timestep = timestep+1
for ii in range(int(I)):
if ii == 0:
s1 = Calt[ii+1]
s2 = -3 * Calt[ii]
s3 = 2 * CA
elif ii == int(I-1):
s1 = 0
s2 = -1 * Calt[ii]
s3 = Calt[ii-1]
else:
s1 = Calt[ii+1]
s2 = -2 * Calt[ii]
s3 = Calt[ii-1]
result = Calt[ii]+S[ii]*dt/eps+faktor*(s1+s2+s3)
print(result)
Cakt[ii] = result
Cresult = np.vstack([Cresult,Cakt])
Calt = Cakt.copy()
What is intersting: If I run the complete script print(result) gives me different (and incorrect) values. But if I add all my constants before and run the loop part of the code (shown above) the loop performs well and delivers the output I want.
Any idea why this might happen?
I am on Python 2.7.5 / Mac OS X 10.9.1/ Spyder 2.

You are using python 2.7.5, so division of integers gives integer results. I suspect that is not what you want. For example, the term respannual/12 will be 0, so respmol is 0. Either change your constants to floating point values (e.g. respannual = 10.0), or add from __future__ import division at the top of your script.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Cross correlating with time lag using python - python

Related

Performing Excel formula in Python

How to structure python programs? Tried making it more structured, now runs 13 times slower

How to parallelize a while loop?

Condensing repeat code with a "for" statement using strings - Python

Loop gives correct output if script run in steps but not when skript runs from scratch

Categories

Resources