Adding Numbers in a Dictionary - python

I am relatively new to python and have been asked to write a code where I enter a specific sequence of DNA and get various values for that sequence in return. I made a dictionary and am having trouble making the code recall the dictionary for values. The code is as follows:
import math
def dnaTM(seqobj):
if isinstance(seqobj,SeqRecord):
seq = seqobj.seq.tostring().upper()
elif isinstance(seqobj,Seq):
seq = seqobj.tostring().upper()
elif isinstance(seqobj,str):
seq = seqobj.upper()
C_primer = .000001
C_Monovalent = 1
percentage_annealed = 50
percentage_annealed = percentage_annealed/100.0
R = 1.987
deltaH = dict()
deltaS = dict()
deltaH = { 'AA/TT': -7.9, 'AT/TA': -7.2, 'TA/AT': -7.2, 'CA/GT': -8.5,
'GT/CA': -8.4, 'CT/GA': -7.8, 'GA/CT': -8.2, 'CG/GC': -10.6,
'GC/CG': -9.8, 'GG/CC': -8.0, 'GC_init': 0.1, 'AT_init': 2.3}
deltaS = { 'AA/TT': -22.2, 'AT/TA': -20.4, 'TA/AT': -21.3, 'CA/GT': -22.7,
'GT/CA': -22.4, 'CT/GA': -21.0, 'GA/CT': -22.2, 'CG/GC': -27.2,
'GC/CG': -24.4, 'GG/CC': -19.9, 'GC_init': -2.8, 'AT_init': 4.1,
'sym': -1.4}
seqLength = len(seq)
dH = 0.2 + deltaH[str(seq[0])] + deltaH[str(seq[len(seq)-1])]
dS = -5.7 + deltaS[seq[0]] + deltaS[seq[len(seq)-1]]
for i in range(0, seqLength - 1):
dH += deltaH[str(seq[i:i+2])]
dS += deltaS[seq[i:i+2]]
dS = dS + 0.368 * seqLength * math.log(1)
Tm =(dH * 1000) / (dS + R * (math.log(C_primer*(1- percentage_annealed)/percenta\
ge_annealed)-21.4164))
return Tm

Related

Mpi4py: printing and plotting during execution

I recently start working with mpi in order to use it to accelerate some code (a sph/gravity simulation).
So far it seem to be working. The position of my particle have change between the beginning and the end of the program, task manager show that several python thread are working...
But i have two problem:
1/ i can't print text during the execution of the program. The text is only print once it's finished
2/ I'm unable to create a graph using matplotlib.
In both case, neither python nor mpi return any error. My guess, for the text at least, is that its print when the execution end with mpiexec.
thanks for any insight !
here the code i run it with !mpiexec -n 8 python mpi4py_test.py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from mpi4py import MPI
class particule:
def __init__(self, h, pos, vel = [0, 0], m = 1, P = 0, density = 0, acc = [0, 0]):
self.x, self.y = pos
self.u, self.v = vel
self.acc_x, self.acc_y = acc
self.m = m
self.h = h # kernel lenght
self.P = P # Pressure
self.density = density
def kernel(part_a, part_b ):
" monaghan cubic spline "
cst = 1 / (np.pi * part_a.h**3)
dist = np.sqrt((part_b.x-part_a.x)**2 + (part_b.y-part_a.y)**2 )
r = dist / h
tmp = 0
if r < 1:
tmp = cst * (1 - 3/2* r**2 + 3/4*r**3)
elif r < 2:
tmp = cst * (1/4*(2-r)**3)
return tmp
def grad_kernel(part_a, part_b):
cst = 1 / (np.pi * part_a.h**3)
dist = np.sqrt((part_b.x-part_a.x)**2 + (part_b.y-part_a.y)**2 )
r = dist / part_a.h
tmp = 0
if r < 1:
tmp = cst * (9/4 * r**2 - 3*r)
elif r < 2:
tmp = cst * (-3/4*(2-r)**2)
return tmp
class hash_grid:
def __init__(self, cell_size):
self.cell_size = cell_size
self.cell = {}
self.part_list = []
def key(self, part):
return (part.x//self.cell_size,
part.y//self.cell_size)
def add(self, part):
tmp = self.key(part)
self.cell.setdefault(tmp, []).append(part)
self.part_list.append(part)
def neighbours(self, part):
idx = self.key(part)
cell_N = None
cell_S = None
cell_E = None
cell_W = None
cell_NE = None
cell_NW = None
cell_SE = None
cell_SW = None
if (idx[0], idx[1]+1) in self.cell: cell_N = (idx[0], idx[1]+1)
if (idx[0], idx[1]-1) in self.cell: cell_S = (idx[0], idx[1]-1)
if (idx[0]+1, idx[1]) in self.cell: cell_E = (idx[0]+1, idx[1])
if (idx[0]-1, idx[1]) in self.cell: cell_W = (idx[0]-1, idx[1])
if (idx[0]+1, idx[1]+1) in self.cell: cell_NE = (idx[0]+1, idx[1]+1)
if (idx[0]-1, idx[1]+1) in self.cell: cell_NW = (idx[0]-1, idx[1]+1)
if (idx[0]+1, idx[1]-1) in self.cell: cell_SE = (idx[0]+1, idx[1]-1)
if (idx[0]-1, idx[1]-1) in self.cell: cell_SW = (idx[0]-1, idx[1]-1)
return [value for cel in (idx, cell_N, cell_S, cell_E, cell_W, cell_NE, cell_NW, cell_SE, cell_SW) if cel!=None for value in self.cell.get(cel) ]
def split(to_split, nb_chunk):
"take a list and split it most evenly possible"
result = []
q, r = divmod(len(to_split), nb_chunk)
curr = 0
last = 0
for i in range(nb_chunk):
if r>0:
last = curr + q + 1
result.append(to_split[curr: last])
r = r-1
curr = last
else:
last = curr + q
result.append(to_split[curr: last])
curr = last
return result
comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
if rank == 0:
n_part = 2000
h = 2
grid = hash_grid(h)
points = np.zeros((n_part, 2))
sim_range = 20
dt = 0.005
n_iter = 2500
k = 5
for _i in range(n_part):
pos = np.random.uniform(-sim_range, sim_range, 2)
vel = np.random.uniform(-.2, .2, 2)
p = particule(h, pos, vel)
grid.add(p)
points[_i, :] = pos
for part in grid.part_list:
part.density = 0
part.P = 0
for p in grid.neighbours(part):
part.density = part.density + p.m * kernel(part, p)
part.P = k * ( part.density )**2 # - density_0)
data = split(grid.part_list,8)
for t in range(10):
if rank == 0 :
"1 - verlet 1/2 ; serial"
for i, part in enumerate(grid.part_list):
part.u, part.v = part.u + part.acc_x * dt/2, part.v + part.acc_y * dt/2
part.x, part.y = part.x + part.u * dt, part.y + part.v * dt
"2 - update grid ; serial"
jnk = grid.part_list
del grid
grid=hash_grid(h)
for p in jnk:
grid.add(p)
grid_bcast = grid
data_b = grid
chunk_of_part_list = split(grid.part_list,8)
else:
grid_bcast=None
chunk_of_part_list = None
data_b=None
grid_bcast = comm.bcast(grid_bcast, root=0)
chunk_of_part_list = comm.scatter(chunk_of_part_list, root=0)
"3 - get acc ; parallel"
for part in chunk_of_part_list:
part.acc_x = 0
part.acc_y = 0
for p in grid_bcast.neighbours(part):
if p != part:
r = np.sqrt((p.x-part.x)**2 + (p.y-part.y)**2)
if r==0: pass
else:
part.acc_x = part.acc_x - p.m * (part.P/part.density**2 + p.P/p.density**2) * grad_kernel(part, p) * (p.x - part.x)/r
part.acc_y = part.acc_y - p.m * (part.P/part.density**2 + p.P/p.density**2) * grad_kernel(part, p) * (p.y - part.y)/r
dist = np.sqrt(part.x**2+part.y**2)
part.acc_x = part.acc_x - .5 * part.x -1*part.u
part.acc_y = part.acc_y - .5 * part.y -1*part.v
chunk_of_part_list = comm.gather(chunk_of_part_list,root=0)
"4 - verlet 2/2 ; serial"
if rank == 0:
grid.part_list = list(matplotlib.cbook.flatten(chunk_of_part_list))
for i, part in enumerate(grid.part_list):
part.u, part.v = part.u + part.acc_x * dt/2, part.v + part.acc_y * dt/2
points[i,0], points[i,1] = part.x, part.y # for the figure
if rank==0:
print(t, flush=True)
if rank == 0:
print('point 0', points[0,:])
fig = plt.figure()
ax = fig.add_subplot()
sc = ax.scatter(points[:, 0], points[:, 1],s=3)
ax.set_aspect('equal', 'box')
ax.set_xlim(-sim_range,sim_range)
ax.set_ylim(-sim_range,sim_range)

GEKKO - How to fix Python Gekko Max Equation error - number of elements

I have developed a script using Gekko optimisation functions. The script below runs for a number of elements. I tested the optimisation algorithm for 20 and 47 cells (shapefile dataset) and the script achieves a solution. However, when I test for a bigger dataset, with 160 elements, for example, the following error message is shown:
“APM model error: string > 15000 characters
Consider breaking up the line into multiple equations”
I read some suggestions to fix this problem. I tried using m.sum, but the problem persists.
Please, could you help me fix this problem?
Please, find below the we transfer link to download the datasets with 47 cells and with 160 cells.
https://wetransfer.com/downloads/64cc631237adacc926c67f56124b327a20210928212223/d8a2d7
Thank you
Alexandre.
import geopandas as gpd
import time
import csv
from gekko import GEKKO
import numpy as np
import math
import pandas as pd
m = GEKKO()
A = -0.00000536
B = -0.0000291
E = 0.4040771
r = 0.085
input_path = 'D:/Alexandre/shapes/Threats/Prototype/BHO50k/Velhas_BHO50k1summ4_47cells.shp'
output_folder = 'D:/Alexandre/shapes/Threats/Prototype/Small_area/resultados'
input_layer = gpd.read_file(input_path)
input_layer = input_layer[
['cocursodag', 'cobacia', 'nuareacont', 'nudistbact', 'D0c', 'Ki0', 'Kj0', 'nuareamont', 'deltai', 'It',
'cost_op_BR', 'Ii_ub', 'Itj', 'cj', 'deltaj2']]
input_layer = input_layer.astype({'cobacia': 'string', 'cocursodag': 'string'})
count_input_feat = input_layer.shape[0]
row=count_input_feat
col=10
input_cobacia = {}
ubi = {}
numareacont = {}
Ki0 = {}
Kj0 = {}
X = {}
deltai2 = {}
ai = {}
aj = {}
D0 = {}
Itj = {}
It = {}
deltaj = {}
for row1 in input_layer.iterrows():
i = row1[0]
input_cobacia[i] = row1[1]['cobacia']
Ki0[i] = row1[1]['Ki0']+0.001
Kj0[i] = row1[1]['Kj0']
X[i] = row1[1]['nuareamont']
deltai2[i] = row1[1]['deltai']
ai[i] = 5423304*(pow(X[i],-0.1406852))
aj[i] = row1[1]['cj']*100 + row1[1]['cost_op_BR']*100
ubi[i] = row1[1]['Ii_ub']
numareacont[i] = row1[1]['nuareacont']
D0[i] = row1[1]['D0c']
It[i] = row1[1]['It']
Itj[i] = row1[1]['Itj']
if Itj[i]<1:
deltaj[i] = row1[1]['deltaj2'] * 0.0001
elif Itj[i]<2:
deltaj[i] = row1[1]['deltaj2'] * 0.0001
else:
deltaj[i] = row1[1]['deltaj2'] * 0.0001
Ii = m.Array(m.Var, (row, col))
Ij = m.Array(m.Var, (row, col))
for i in range(row):
for j in range(col):
if It[i] == 0:
Ii[i, j].value = 0
Ii[i, j].lower = 0
Ii[i, j].upper = 5
Ij[i,j].value = 0
Ij[i,j].lower = 0
Ij[i,j].upper = numareacont[i]*0.05*Itj[i]/3.704545
elif It[i] <= 2:
Ii[i, j].value = 0
Ii[i, j].lower = 0
Ii[i, j].upper = 10
Ij[i, j].value = 0
Ij[i, j].lower = 0
Ij[i, j].upper = numareacont[i]*0.05*Itj[i]/3.704545
elif It[i] <= 2.5:
Ii[i, j].value = 0
Ii[i, j].lower = 0
Ii[i, j].upper = 15
Ij[i, j].value = 0
Ij[i, j].lower = 0
Ij[i, j].upper = numareacont[i]*0.05*Itj[i]/3.704545
elif It[i] <= 3:
Ii[i, j].value = 0
Ii[i, j].lower = 0
Ii[i, j].upper = 15
Ij[i, j].value = 0
Ij[i, j].lower = 0
Ij[i, j].upper = numareacont[i]*0.05*Itj[i]/3.704545
else:
Ii[i,j].value = 0
Ii[i,j].lower = 0
Ii[i,j].upper = 20
Ij[i,j].value = 0
Ij[i,j].lower = 0
Ij[i,j].upper = numareacont[i]*0.05*Itj[i]/3.704545
Ki = m.Array(m.Var, (row, col))
Kj = m.Array(m.Var, (row, col))
indicator = m.Array(m.Var, (row, col))
p = 2
numerator = m.Array(m.Var, (row, col))
denominator = m.Array(m.Var, (row, col))
for row2 in input_layer.iterrows():
input_cobacia2 = row2[1]['cobacia']
input_cocursodag = row2[1]['cocursodag']
input_distance = row2[1]['nudistbact']
numerator = 0
denominator = 0
exp = f"cobacia > '{input_cobacia2}' and cocursodag.str.startswith('{input_cocursodag}')"
for j in range(col):
for target_feat in input_layer.query(exp).iterrows():
i=target_feat[0]
target_green_area = Ij[i,j]
target_distance = target_feat[1]['nudistbact']
distance = float(target_distance) - float(input_distance)
idw = 1 / (distance + 1) ** p
numerator += target_green_area * idw
denominator += idw
i=row2[0]
sum = Ij[i,j]
if denominator:
indicator[i,j] = numerator / denominator + sum
else:
indicator[i,j] = sum
D0F = m.Array(m.Var, (row, col))
for i in range(row):
def constraintD0(x):
return x - 0.2
for j in range(col):
if j == 0:
m.fix(Ki[i,j],val = Ki0[i])
Ki[i,j].lower = 0
Ki[i,j].upper = 500000
m.fix(Kj[i,j], val = Kj0[i])
Kj[i,j].lower = 0
Kj[i,j].upper = 100000
m.Equation(D0F[i, j] == A * Ki[i, j] + B * Kj[i, j] + E)
D0[i] = D0F[i, j]
else:
D0F[i,j].lower = 0
D0F[i, j].upper = 1
Ki[i,j].lower = 0
Ki[i,j].upper = 500000
Kj[i, j].lower = 0
Kj[i, j].upper = 100000
m.Equation(Ki[i,j] - Ki[i,j-1] == Ii[i,j] - deltai2[i] * Ki[i,j-1])
m.Equation(Kj[i,j] - Kj[i,j-1] == Ij[i,j] + deltaj[i] * Kj[i,j-1]+indicator[i,j])
m.Equation(D0F[i,j] == A*Ki[i,j] + B*Kj[i,j] + E)
m.Equation(D0F[i,j]<=D0[i])
dep = 1 / (1+r)
z1 = m.sum([m.sum([pow(dep, j)*(ai[i]*Ii[i,j]+aj[i]*Ij[i,j]) for i in range(row)]) for j in range(col)])
# Objective
m.Obj(z1)
m.options.IMODE = 3
m.options.SOLVER = 3
m.options.DIAGLEVEL = 1
m.options.REDUCE=3
try:
m.solve() # solve
# Outputs
output_Ki = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_Kj = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_Ii = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_Ij = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_D0 = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_ai = pd.DataFrame(columns=['cobacia'] + list(range(col)))
output_aj = pd.DataFrame(columns=['cobacia'] + list(range(col)))
for i in range(row):
for j in range(col):
print(Ki)
output_Ii.loc[i, 'cobacia'] = input_cobacia[i]
output_Ii.loc[i, j] = Ii[i,j].value[0]
output_Ij.loc[i, 'cobacia'] = input_cobacia[i]
output_Ij.loc[i, j] = Ij[i,j].value[0]
output_Ki.loc[i, 'cobacia'] = input_cobacia[i]
output_Ki.loc[i, j] = Ki[i,j].value[0]
output_Kj.loc[i, 'cobacia'] = input_cobacia[i]
output_Kj.loc[i, j] = Kj[i,j].value[0]
output_D0.loc[i, 'cobacia'] = input_cobacia[i]
output_D0.loc[i, j] = D0F[i, j].value[0]
output_ai.loc[i, 'cobacia'] = input_cobacia[i]
output_ai.loc[i, j] = ai[i]
output_aj.loc[i, 'cobacia'] = input_cobacia[i]
output_aj.loc[i, j] = aj[i]
df_outputIi = pd.DataFrame(output_Ii)
df_outputIj = pd.DataFrame(output_Ij)
df_outputKi = pd.DataFrame(output_Ki)
df_outputKj = pd.DataFrame(output_Kj)
df_outputD0 = pd.DataFrame(output_D0)
df_outputai = pd.DataFrame(output_ai)
df_outputaj = pd.DataFrame(output_aj)
with pd.ExcelWriter('output.xlsx') as writer:
df_outputIi.to_excel(writer, sheet_name="resultado Ii")
df_outputIj.to_excel(writer, sheet_name="resultado Ij")
df_outputKi.to_excel(writer, sheet_name="resultado Ki")
df_outputKj.to_excel(writer, sheet_name="resultado Kj")
df_outputD0.to_excel(writer, sheet_name="resultado D0")
df_outputai.to_excel(writer, sheet_name="ai")
df_outputaj.to_excel(writer, sheet_name="aj")
except:
print('Not successful')
from gekko.apm import get_file
print(m._server)
print(m._model_name)
f = get_file(m._server,m._model_name,'infeasibilities.txt')
f = f.decode().replace('\r','')
with open('infeasibilities.txt', 'w') as fl:
fl.write(str(f))
for i in range(row):
for j in range(col):
print(Ki[i,j].value)
print(Kj[i,j].value)
print(D0F[i,j].value)```
Look at the APMonitor model file gk0_model.apm in m.path by opening the run folder with m.open_folder(). Start with a smaller problem size and observe the equation size as the problem scales up. This limitation (<15,000 characters per equation) is built-in to encourage more efficient model expression and solution. An intermediate with m.Intermediate() can be used to more effectively express the model. Additional information on Intermediates is available in the documentation.

How can I know the dimension of my variable?

I get this error :
ValueError: operands could not be broadcast together with shapes (365,) (2,)
But I'm surprised by this (2,).
How do I know which variable does this dimension (2,) please?
Because none of my variables should have it.
Thank you for your help !
Here, you can see the first script, where I define my function. It include a loop and also another function so I don't know if I can.
I have a lot of variable with (365, ) for the dimension because, it's function of the time, so for 365 days.
I have some fixed variable like the soil parameter, so the dimension for these is (1,)
But I don't know which variable get (2,) dimension ?
import pandas as pd
import numpy as np
def SA(MO = 0,
ETPr = 0,
SWSa = 0,
pb = 1.70 ):
DB = pd.read_excel("~/Documents/Spider/Data/data_base.xlsx", sheet_name = "DB")
DB1 = pd.read_excel("~/Documents/Spider/Bilan_Courgette.xlsx", sheet_name = "sol")
DB2 = pd.read_excel("~/Documents/Spider/Bilan_Courgette.xlsx", sheet_name = "culture")
#Calculs inter. pour déterminer ET0/day
#Array qui reprend "date" en une série 1 -> 365
JourDeLAnnee = pd.Series(range(1,366))
#Mauves
dist_TS = 1+(0.033*np.cos(0.0172 * JourDeLAnnee))
decli_So = 0.409*np.sin((0.0172 * JourDeLAnnee)-1.39)
lat = 0.87266463
ang_Hor_So =np.arccos(-np.tan(lat)*np.tan(decli_So))
gamma = 0.067
#Jaunes
delta = 2504*np.exp((17.27*DB.tsa_by_day)/(DB.tsa_by_day +237.3))/(DB.tsa_by_day +237.3)**2
rg = DB.ens_by_day / 1000000 * 86400
ra = 37.6 * dist_TS * ((ang_Hor_So * np.sin(lat) * np.sin(decli_So)) + \
(np.cos(lat) * np.cos(decli_So) * np.sin(ang_Hor_So)))
rso = (0.75 + (2*0.00001*120)) * ra
tw =(DB.tsa_by_day * np.arctan(0.151977 * ((DB.hra_by_day + 8.313659)**0.5))) + \
np.arctan(DB.tsa_by_day + DB.hra_by_day) - np.arctan(DB.hra_by_day - 1.676331) + \
(0.00391838 * ((DB.hra_by_day)**1.5) * np.arctan(0.023101 * DB.hra_by_day)) - 4.686035
ed = (0.611 * np.exp((17.27 * tw) / (tw + 237.3))) - (0.0008 *(DB.tsa_by_day-tw) * 101.325)
ea =((0.611 * np.exp((17.27*DB.tsa_max) / (DB.tsa_max + 237.3))) + \
(0.611 * np.exp((17.27 * DB.tsa_min) / (DB.tsa_min +237.3)))) / 2.0
rn = (0.77 * rg) - (((1.35 * (rg / rso)) - 0.35) \
* (0.34 - (0.14 * (ed**0.5))) * (4.9E-9) * ((((273+DB.tsa_max)**4)+((273+DB.tsa_min)**4))/2))
#Calcul de G
from typing import List
def get_g_constant(tsa_by_day: List[float], day: int):
assert day >= 1
return 0.38 * (tsa_by_day[day] - tsa_by_day[day-1])
def get_g_for_year(tsa_by_day: List[int]) -> List[float]:
g_list = []
for i in range(1, len(tsa_by_day)):
g_value = get_g_constant(tsa_by_day, i)
g_list.append(g_value)
return g_list
G = get_g_for_year(DB.tsa_by_day)
G = [DB.tsa_by_day[0]] + G
#Le fameux ET0
ET0 = ((0.408 * delta * (rn - G)) + (gamma * (900 /(DB.tsa_by_day + 273)) * DB.vtt_by_day * (ea - ed))) / \
(delta + (0.067*(1+(0.34 * DB.vtt_by_day))))
# Calcul des paramètres du sol
Profil = 500
pb = 100 / ((MO / 224000) + ((100-MO) / (1.64)))
Os = 0.6355+0.0013* DB1.A -0.1631* pb
Or = 0
lnα = (-4.3003) - (0.0097*DB1.A) + (0.0138* DB1.S ) - (0.0992*MO)
lnn = -1.0846-0.0236 * DB1.A -0.0085 * DB1.S +0.0001 * (DB1.S)**2
nn = np.exp(lnn) + 1
m = 1 - (1/nn)
lnK0 = 1.9582 + 0.0308*DB1.S - 0.6142* pb - 0.1566*MO
λ = -1.8642 - 0.1317*DB1.A + 0.0067*DB1.S
α = np.exp(lnα)
K0 = np.exp(lnK0)
θPf2 =(((1 + ((α*(10**2.5))**nn))**(-m))*( Os - Or)) + Or
θPf4 =(((1 + ((α*(10**4.2))**nn))**(-m))*( Os - Or)) + Or
SWS = θPf2 - θPf4
diff = SWS*SWSa
aj = diff / 2
θPf2New = θPf2 + aj
θPf4New = θPf4 - aj
#Calcul du volume de stock p à atteindre
p = 0.04 *(5 - ET0) + DB2.ptab[0]
θp =(1 - p) * ( θPf2New - θPf4New )+ θPf4New
Vp = θp * Profil
#Le fameux ETP
import datetime
DateS = datetime.datetime.strptime('30/03/2019','%d/%m/%Y').timetuple().tm_yday
DateR = datetime.datetime.strptime('15/09/2019','%d/%m/%Y').timetuple().tm_yday
ETP=ET0.copy()
for n in range(364):
if n >= (DateS - 1) and n <= (DateR - 1) :
ETP[n] = ET0[n] * DB2.Kc[0]
else:
ETP[n] = ET0[n] * DB2.SolNu[0]
ETP[0] = 0
ETPNew = ET0.copy()
ETPNew = ETP - ETP * ETPr
#Le Bilan Hydrique
Stock = ET0.copy()
θ = ET0.copy()
Drainage = ET0.copy()
Irrigation = ET0.copy()
Se = ET0.copy()
SeC = ET0.copy()
θ[0] = θPf2New
Stock[0] = θ[0]*Profil
for i in range(364) :
Se[i] = (θ[i] - Or)/( Os - Or)
if Se[i] > 1 :
SeC[i] = 1
else:
SeC[i] = Se[i]
Drainage[i] = K0 *(((SeC[i])**λ )*(1-(1- SeC[i]**(nn/(nn-1)))**m)**2)*10
if Vp[i] - Stock[i] > 0 : #Ici stock non défini
Irrigation[i] = Vp[i] - Stock[i]
else:
Irrigation[i] = 0
Stock[i+1] = Stock[i] + DB.plu_by_day[i] - ETPNew[i] - Drainage[i] + Irrigation[i]
θ[i+1] = Stock[i+1] / Profil
return (Irrigation.sum())
After, i use a second script to do a sensitivity analysis. And It's here, when I run this script, I get the error 'ValueError: operands could not be broadcast together with shapes (365,) (2,)'
import numpy as np
from SALib.analyze import sobol
from SALib.sample import saltelli
from test import*
import matplotlib.pyplot as plt
# Set up dictionary with system parameters
problem = {
'num_vars': 4,
'names': ['MO', 'ETPr', 'SWSa', 'K0'],
'bounds': [[0, 10],
[0, 0.04135],
[0, 0.2615],
[1.40, 1.70],
]}
# Array with n's to use
nsamples = np.arange(50, 400, 50)
# Arrays to store the index estimates
S1_estimates = np.zeros([problem['num_vars'],len(nsamples)])
ST_estimates = np.zeros([problem['num_vars'],len(nsamples)])
# Loop through all n values, create sample, evaluate model and estimate S1 & ST
for i in range(len(nsamples)):
print('n= '+ str(nsamples[i]))
# Generate samples
sampleset = saltelli.sample(problem, nsamples[i],calc_second_order=False)
# Run model for all samples
output = [SA(*sampleset[j,:]) for j in range(len(sampleset))]
# Perform analysis
results = sobol.analyze(problem, np.asarray(output), calc_second_order=False,print_to_console=False)
# Store estimates
ST_estimates[:,i]=results['ST']
S1_estimates[:,i]=results['S1']
np.save('ST_estimates.npy', ST_estimates)
np.save('S1_estimates.npy', S1_estimates)
S1_estimates = np.load('S1_estimates.npy')
ST_estimates = np.load('ST_estimates.npy')
# Generate figure showing evolution of indices
fig = plt.figure(figsize=(18,9))
ax1 = fig.add_subplot(1,2,1)
handles = []
for j in range(problem['num_vars']):
handles += ax1.plot(nsamples, S1_estimates[j,:], linewidth=5)
ax1.set_title('Evolution of S1 index estimates', fontsize=20)
ax1.set_ylabel('S1', fontsize=18)
ax1.set_xlabel('Number of samples (n)', fontsize=18)
ax1.tick_params(axis='both', which='major', labelsize=14)
ax2 = fig.add_subplot(1,2,2)
for j in range(problem['num_vars']):
ax2.plot(nsamples, ST_estimates[j,:], linewidth=5)
ax2.set_title('Evolution of ST index estimates', fontsize=20)
ax2.set_ylabel('ST', fontsize=18)
ax2.tick_params(axis='both', which='major', labelsize=14)
ax2.set_xlabel('Number of samples (n)', fontsize=18)
fig.legend(handles, problem['names'], loc = 'right', fontsize=11)
plt.savefig('indexevolution.png')
# Calculate parameter rankings
S1_ranks = np.zeros_like(S1_estimates)
ST_ranks = np.zeros_like(ST_estimates)
for i in range(len(nsamples)):
orderS1 = np.argsort(S1_estimates[:,i])
orderST = np.argsort(ST_estimates[:,i])
S1_ranks[:,i] = orderS1.argsort()
ST_ranks[:,i] = orderST.argsort()
Thank you for your help !

looking to integrate formula into one and present it in a single array (python)

i = [2,3,4]
interest = [1.08,1.0824,1.09,1.095]
yr2 = (interest[0] * interest[1])**(1./i[0]) - 1.
yr3 = (interest[0] * interest[1] * interest[2])**(1./i[1]) - 1.
yr4 = (interest[0] * interest[1] * interest[2] * interest[3])**(1./i[2]) - 1.
I declared 2 arrays (above) but i am looking to display my answer in this format:
yield = [yr2,yr3,yr4]
# data
i = [2,3,4]
interest = [1.08,1.0824,1.09,1.095]
def count_interests(bar, interest):
"""
Returns a list:
[(interest[0] * interest[1]), (interest[0] * interest[1] * interest[2]),
(interest[0] * interest[1] * interest[2] * interest[3])]
"""
res = []
compound = [interest[0]]
print(f'first compound {compound}')
for i in range(len(bar)): # [0,1,2]
inter = compound[i] * interest[i+1]
compound.append(inter)
print(compound)
return compound
compound = count_interests(i, interest)
compound.pop(0)
print(compound)
# [1.168992, 1.2742012800000002, 1.3952504016000002]
zipped = zip(i, compound)
def yr(zip):
res = []
for k, v in zip:
temp = v ** (1./k) - 1
res.append(temp)
return res
result = yr(zipped)
Result
[0.0811993340730468, 0.08412496573593131, 0.08683355662001646]

Spectral analysis on HRV data with LombScargle in Python

I'm working with RR peaks and want to derive the frequency domain measures for HRV to recreate the results from the native C package by Physionet (WFDB tools). Both signal processing and spectral analysis are new fields for me, but after a long week with trial an error I've hacked together some code based on the Astropy module after trying several other solutions.
from astropy.stats import LombScargle
import random
dy = 0.1 * random.randint(1,100)
t = drive01["time"].values
y = drive01["intervals"].values
frequency, power = LombScargle(t, y,dy).autopower(minimum_frequency=0.0,maximum_frequency=4)
plt.plot(frequency, power)
This creates a plot that looks quite similar to the plot from Physionets package.
Physionets HRV tools with the code get_hrv makes this plot
Then by calculating common frequency domain measures I get quite different results.
Pxx = np.nan_to_num(power)
Fxx = np.nan_to_num(frequency)
ulf = 0.003
vlf = 0.04
lf = 0.15
hf = 0.4
Fs = 15.5 # the sampling rate of the drive file
# find the indexes corresponding to the VLF, LF, and HF bands
vlf_freq_band = (Fxx >= ulf) & (Fxx <= vlf)
lf_freq_band = (Fxx >= vlf) & (Fxx <= lf)
hf_freq_band = (Fxx >= lf) & (Fxx <= hf)
tp_freq_band = (Fxx >= 0) & (Fxx <= hf)
# Calculate the area under the given frequency band
dy = 1.0 / Fs
VLF = np.trapz(y=abs(Pxx[vlf_freq_band]), x=None, dx=dy)
LF = np.trapz(y=abs(Pxx[lf_freq_band]), x=None, dx=dy)
HF = np.trapz(y=abs(Pxx[hf_freq_band]), x=None, dx=dy)
TP = np.trapz(y=abs(Pxx[tp_freq_band]), x=None, dx=dy)
LF_HF = float(LF) / HF
Python
'HF': 0.10918703853414605,
'LF': 0.050074418080717789,
'LF/HF': 0.45861137689028925,
'TP': 0.20150514290250854,
'VLF': 0.025953350304821571
From the Physionet package:
TOT PWR = 0.0185973
VLF PWR = 0.00372733
LF PWR = 0.00472635
HF PWR = 0.0101436
LF/HF = 0.465944
When comparing the results it looks like this:
Python Physionet
TP 0.201505143 0.0185973 Quite similar + decimal dif
HF 0.109187039 0.0101436 Quite similar + decimal dif
LF 0.050074418 0.00472635 Quite similar + decimal dif
VLF 0.02595335 0.00372733 Not similar
LF/HF 0.458611377 0.465944 Quite similar
The calculations in Python are based on the code from another Stackoverflow post but the fix he got from the respondent is based on a python module I'm not able to get working and he is not using the Lomb Periodgram. I'm very open for trying something else as well, as long as its working with uneven samples.
the data I'm working with is the drivedb from Physionet and I've used the Physionet packages to make a text file with RR peaks and time which is read into a Pandas DataFrame. The textfile can be found here
LombScargle based on the Astropy caculate power different with C package by Physionet (WFDB tools). I write lombscargle again in python and result the same with C package by Physionet (WFDB tools).
import numpy as np
import os
import math
import csv
from itertools import zip_longest
import time
DATA_PATH = '/home/quangpc/Desktop/Data/PhysionetData/mitdb/'
class FreqDomainClass:
#staticmethod
def power(freq, mag):
lo = [0, 0.0033, 0.04, 0.15]
hi = [0.0033, 0.04, 0.15, 0.4]
pr = np.zeros(4)
nbands = 4
for index in range(0, len(freq)):
pwr = np.power(mag[index], 2)
for n in range(0, nbands):
if (freq[index] >= lo[n]) and freq[index] <= hi[n]:
pr[n] += pwr
break
return pr
#staticmethod
def avevar(y):
var = 0.0
ep = 0.0
ave = np.mean(y)
for i in range(len(y)):
s = y[i] - ave
ep += s
var += s * s
var = (var - ep * ep / len(y)) / (len(y) - 1)
return var
def lomb(self, t, h, ofac, hifac):
period = max(t) - min(t)
z = h - np.mean(h)
f = np.arange(1 / (period * ofac), hifac * len(h) / (2 * period), 1 / (period * ofac))
f = f[:int(len(f) / 2) + 1]
f = np.reshape(f, (len(f), -1))
w = 2 * np.pi * f
lenght_t = len(t)
t = np.reshape(t, (lenght_t, -1))
t = np.transpose(t)
tau = np.arctan2(np.sum(np.sin(2 * w * t), axis=1), np.sum(np.cos(2 * w * t), axis=1)) / (2 * w)
tau = np.diag(tau)
tau = np.reshape(tau, (len(tau), -1))
tau = np.tile(tau, (1, lenght_t))
cos = np.cos(w * (t - tau))
sin = np.sin(w * (t - tau))
pc = np.power(np.sum(z * cos, axis=1), 2)
ps = np.power(np.sum(z * sin, axis=1), 2)
cs = pc / np.sum(np.power(cos, 2), axis=1)
ss = ps / np.sum(np.power(sin, 2), axis=1)
p = cs + ss
pwr = self.avevar(h)
nout = len(h)
p = p / (2 * pwr)
p = p / (nout / (2.0 * pwr))
return f, np.sqrt(p)
def lomb_for(self, t, h, ofac, hifac):
period = max(t) - min(t)
f = np.arange(1 / (period * ofac), hifac * len(h) / (2 * period), 1 / (period * ofac))
f = f[:int(len(f) / 2) + 1]
z = h - np.mean(h)
p = np.zeros(len(f))
for i in range(len(f)):
w = 2 * np.pi * f[i]
if w > 0:
twt = 2 * w * t
y = sum(np.sin(twt))
x = sum(np.cos(twt))
tau = math.atan2(y, x) / (2 * w)
wtmt = w * (t - tau)
cs = np.power(sum(np.multiply(z, np.cos(wtmt))), 2) / sum(np.power((np.cos(wtmt)), 2))
ss = np.power(sum(np.multiply(z, np.sin(wtmt))), 2) / sum(np.power((np.sin(wtmt)), 2))
p[i] = cs + ss
else:
p[i] = np.power(sum(np.multiply(z, t)), 1) / sum(np.power(t), 1)
pwr = self.avevar(h)
nout = len(h)
p = p / (2 * pwr)
p = p / (nout / (2.0 * pwr))
return f, np.sqrt(p)
def freq_domain(self, time, rr_intervals):
frequency, mag0 = self.lomb(time, rr_intervals, 4.0, 2.0)
frequency = np.round(frequency, 8)
mag0 = mag0 / 2.0
mag0 = np.round(mag0, 8)
result = self.power(frequency, mag0)
return result[0], result[1], result[2], result[3], result[0] + result[1] + result[2] + result[3], \
result[2] / result[3]
def time_domain(time, rr_intervals, ann):
sum_rr = 0.0
sum_rr2 = 0.0
rmssd = 0.0
totnn = 0
totnnn = 0
nrr = 1
totrr = 1
nnx = 0
nnn = 0
lastann = ann[0]
lastrr = int(rr_intervals[0])
lenght = 300
t = float(time[0])
end = t + lenght
i = 0
ratbuf = np.zeros(2400)
avbuf = np.zeros(2400)
sdbuf = np.zeros(2400)
for x in range(1, len(ann)):
t = float(time[x])
while t > (end+lenght):
i += 1
end += lenght
if t >= end:
if nnn > 1:
ratbuf[i] = nnn/nrr
sdbuf[i] = np.sqrt(((sdbuf[i] - avbuf[i]*avbuf[i]/nnn) / (nnn-1)))
avbuf[i] /= nnn
i += 1
nnn = nrr = 0
end += lenght
nrr += 1
totrr += 1
if ann[x] == 'N' and ann[x-1] == 'N':
rr_intervals[x] = int(rr_intervals[x])
nnn += 1
avbuf[i] += rr_intervals[x]
sdbuf[i] += (rr_intervals[x] * rr_intervals[x])
sum_rr += rr_intervals[x]
sum_rr2 += (rr_intervals[x] * rr_intervals[x])
totnn += 1
if lastann == 'N':
totnnn += 1
rmssd += (rr_intervals[x] - lastrr) * (rr_intervals[x] - lastrr)
# nndif[0] = NNDIF
if abs(rr_intervals[x] - lastrr) - 0.05 > (10 ** -10):
nnx += 1
lastann = ann[x-1]
lastrr = rr_intervals[x]
if totnn == 0:
return 0, 0, 0, 0
sdnn = np.sqrt((sum_rr2 - sum_rr * sum_rr / totnn) / (totnn - 1))
rmssd = np.sqrt(rmssd/totnnn)
pnn50 = nnx / totnnn
if nnn > 1:
ratbuf[i] = nnn / nrr
sdbuf[i] = np.sqrt((sdbuf[i] - avbuf[i] * avbuf[i] / nnn) / (nnn - 1))
avbuf[i] /= nnn
nb = i + 1
sum_rr = 0.0
sum_rr2 = 0.0
k = 0
h = 0
while k < nb:
if ratbuf[k] != 0:
h += 1
sum_rr += avbuf[k]
sum_rr2 += (avbuf[k] * avbuf[k])
k += 1
sdann = np.sqrt((sum_rr2 - sum_rr * sum_rr / h) / (h - 1))
return sdnn, sdann, rmssd, pnn50
def get_result_from_get_hrv(filename):
with open(filename, 'r') as f:
csv_reader = csv.reader(f, delimiter=',')
index = 0
for row in csv_reader:
if index > 0:
output = [s.strip() for s in row[0].split('=') if s]
# print('output = ', output)
if output[0] == 'SDNN':
sdnn = output[1]
if output[0] == 'SDANN':
sdann = output[1]
if output[0] == 'rMSSD':
rmssd = output[1]
if output[0] == 'pNN50':
pnn50 = output[1]
if output[0] == 'ULF PWR':
ulf = output[1]
if output[0] == 'VLF PWR':
vlf = output[1]
if output[0] == 'LF PWR':
lf = output[1]
if output[0] == 'HF PWR':
hf = output[1]
if output[0] == 'TOT PWR':
tp = output[1]
if output[0] == 'LF/HF':
ratio_lf_hf = output[1]
index += 1
return float(sdnn), float(sdann), float(rmssd), float(pnn50), float(ulf), float(vlf), \
float(lf), float(hf), float(tp), float(ratio_lf_hf)
def save_file():
extension = "atr"
result_all = []
file_process = ['File']
sdnn_l = ['sdnn']
sdann_l = ['sdann']
rmssd_l = ['rmssd']
pnn50_l = ['pnn50']
ulf_l = ['ulf']
vlf_l = ['vlf']
lf_l = ['lf']
hf_l = ['hf']
tp_l = ['tp']
ratio_lf_hf_l = ['ratio_lf_hf']
sdnn_l_p = ['sdnn']
sdann_l_p = ['sdann']
rmssd_l_p = ['rmssd']
pnn50_l_p = ['pnn50']
ulf_l_p = ['ulf']
vlf_l_p = ['vlf']
lf_l_p = ['lf']
hf_l_p = ['hf']
tp_l_p = ['tp']
ratio_lf_hf_l_p = ['ratio_lf_hf']
test_file = ['103', '113', '117', '121', '123', '200', '202', '210', '212', '213',
'219', '221', '213', '228', '231', '233', '234',
'101', '106', '108', '112', '114', '115', '116', '119', '122', '201', '203',
'205', '208', '209', '215', '220', '223', '230',
'105', '100']
file_dis = ['109', '111', '118', '124', '207', '214', '232']
for root, dirs, files in os.walk(DATA_PATH):
files = np.sort(files)
for name in files:
if extension in name:
if os.path.basename(name[:-4]) not in test_file:
continue
print('Processing file...', os.path.basename(name))
cur_dir = os.getcwd()
os.chdir(DATA_PATH)
os.system('rrlist {} {} -M -s >{}.rr'.format(extension, name.split('.')[0], name.split('.')[0]))
time_m = []
rr_intervals = []
ann = []
with open(name.split('.')[0] + '.rr', 'r') as rr_file:
for line in rr_file:
time_m.append(line.split(' ')[0])
rr_intervals.append(line.split(' ')[1])
ann.append(line.split(' ')[2].split('\n')[0])
time_m = np.asarray(time_m, dtype=float)
rr_intervals = np.asarray(rr_intervals, dtype=float)
sdnn, sdann, rmssd, pnn50 = time_domain(time_m, rr_intervals, ann)
if sdnn == 0 and sdann == 0 and rmssd == 0 and pnn50 == 0:
print('No result hrv')
file_dis.append(os.path.basename(name[:-4]))
continue
print('sdnn', sdnn)
print('rmssd', rmssd)
print('pnn50', pnn50)
print('sdann', sdann)
time_m = time_m - time_m[0]
time_m = np.round(time_m, 3)
time_nn = []
nn_intervals = []
for i in range(1, len(ann)):
if ann[i] == 'N' and ann[i - 1] == 'N':
nn_intervals.append(rr_intervals[i])
time_nn.append(time_m[i])
time_nn = np.asarray(time_nn, dtype=float)
nn_intervals = np.asarray(nn_intervals, dtype=float)
fc = FreqDomainClass()
ulf, vlf, lf, hf, tp, ratio_lf_hf = fc.freq_domain(time_nn, nn_intervals)
sdnn_l.append(sdnn)
sdann_l.append(sdann)
rmssd_l.append(rmssd)
pnn50_l.append(pnn50)
ulf_l.append(ulf)
vlf_l.append(vlf)
lf_l.append(lf)
hf_l.append(hf)
tp_l.append(tp)
ratio_lf_hf_l.append(ratio_lf_hf)
print('ULF PWR: ', ulf)
print('VLF PWR: ', vlf)
print('LF PWR: ', lf)
print('HF PWR: ', hf)
print('TOT PWR: ', tp)
print('LF/HF: ', ratio_lf_hf)
if os.path.exists('physionet_hrv.txt'):
os.remove('physionet_hrv.txt')
os.system('get_hrv -R ' + name.split('.')[0] + '.rr >> ' + 'physionet_hrv.txt')
sdnn, sdann, rmssd, pnn50, ulf, vlf, lf, hf, tp, ratio_lf_hf = \
get_result_from_get_hrv('physionet_hrv.txt')
file_process.append(os.path.basename(name[:-4]))
sdnn_l_p.append(sdnn)
sdann_l_p.append(sdann)
rmssd_l_p.append(rmssd)
pnn50_l_p.append(pnn50)
ulf_l_p.append(ulf)
vlf_l_p.append(vlf)
lf_l_p.append(lf)
hf_l_p.append(hf)
tp_l_p.append(tp)
ratio_lf_hf_l_p.append(ratio_lf_hf)
os.chdir(cur_dir)
result_all.append(file_process)
result_all.append(sdnn_l)
result_all.append(sdnn_l_p)
result_all.append(sdann_l)
result_all.append(sdann_l_p)
result_all.append(rmssd_l)
result_all.append(rmssd_l_p)
result_all.append(pnn50_l)
result_all.append(pnn50_l_p)
result_all.append(ulf_l)
result_all.append(ulf_l_p)
result_all.append(vlf_l)
result_all.append(vlf_l_p)
result_all.append(lf_l)
result_all.append(lf_l_p)
result_all.append(hf_l)
result_all.append(hf_l_p)
result_all.append(tp_l)
result_all.append(tp_l_p)
result_all.append(ratio_lf_hf_l)
result_all.append(ratio_lf_hf_l_p)
print(file_dis)
with open('hrv2.csv', 'w+') as f:
writer = csv.writer(f)
for values in zip_longest(*result_all):
writer.writerow(values)
def main():
extension = "atr"
for root, dirs, files in os.walk(DATA_PATH):
files = np.sort(files)
for name in files:
if extension in name:
if name not in ['101.atr']:
continue
cur_dir = os.getcwd()
os.chdir(DATA_PATH)
os.system('rrlist {} {} -M -s >{}.rr'.format(extension, name.split('.')[0], name.split('.')[0]))
time_m = []
rr_intervals = []
ann = []
with open(name.split('.')[0] + '.rr', 'r') as rr_file:
for line in rr_file:
time_m.append(line.split(' ')[0])
rr_intervals.append(line.split(' ')[1])
ann.append(line.split(' ')[2].split('\n')[0])
time_m = np.asarray(time_m, dtype=float)
rr_intervals = np.asarray(rr_intervals, dtype=float)
sdnn, sdann, rmssd, pnn50 = time_domain(time_m, rr_intervals, ann)
if sdnn == 0 and sdann == 0 and rmssd == 0 and pnn50 == 0:
print('No result hrv')
return 0
print('sdnn', sdnn)
print('rmssd', rmssd)
print('pnn50', pnn50)
print('sdann', sdann)
time_m = time_m - time_m[0]
time_m = np.round(time_m, 3)
time_nn = []
nn_intervals = []
for i in range(1, len(ann)):
if ann[i] == 'N' and ann[i - 1] == 'N':
nn_intervals.append(rr_intervals[i])
time_nn.append(time_m[i])
time_nn = np.asarray(time_nn, dtype=float)
nn_intervals = np.asarray(nn_intervals, dtype=float)
start = time.time()
fc = FreqDomainClass()
ulf, vlf, lf, hf, tp, ratio_lf_hf = fc.freq_domain(time_nn, nn_intervals)
end = time.time()
print('ULF PWR: ', ulf)
print('VLF PWR: ', vlf)
print('LF PWR: ', lf)
print('HF PWR: ', hf)
print('TOT PWR: ', tp)
print('LF/HF: ', ratio_lf_hf)
print('finish', end - start)
os.chdir(cur_dir)

Categories