Pycharm debugger skips breakpoints after specific line of code - why? - python

I'm new to pycharm and so far my impression of the debugger is that it's marverlous! However, it behaves weird in my code and I cannot figure out what is going wrong.
If I set a breakpoint to these lines of code and then press "step over" or "step into my code" it runs until the end ignoring all other upcoming breakpoints. Any idea what I do wrong? Breakpoints before that line work perfectly fine.
for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit
my code
You need to pip install deap, efel and brian2 for the code to run.
# DEAP
# https://github.com/DEAP/deap/tree/54b83e2cc7a73be7657cb64b01033a31584b891d
# import array
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat
import random, numpy, os, efel, scipy, math, time, array, json
from deap import algorithms, base, creator, tools, benchmarks
from deap.benchmarks.tools import diversity, convergence # , hypervolume
from machine_hh_model_v02 import brian_hh_model
# parallel processing
# from scoop import futures
parallel_processing = "no"
# Starting values
channels = {"ENa": 65,
"EK": -90,
"El": -70,
"ECa": 120,
"gNa": 0.05,
"gK": 0.005,
"gL": 1e-4,
"gM": 8e-5,
"gCa": 1e-5}
# Boundaries
bounds = {"ENa": [50, 70],
"EK": [-100, -80],
"El": [-50, -100],
"ECa": [100, 120],
"gNa": [0, 1],
"gK": [0, 1],
"gL": [0, 1],
"gM": [0, 1],
"gCa": [0, 1]}
low, up = [x[0] for x in bounds.values()], [x[1] for x in bounds.values()]
# Set parameters
ext = 2.5 # external current stimulation [nA]
num_gen = 2 # number of generations
num_parents = 40 # number of parents
num_params = len(channels) # number of parameters to optimize
prob_crossover = 0.9 # probability that crossover takes place
# How to generate individuals
def initIndividual(container, sigma):
return container(random.gauss(x, sigma) for x in channels.values())
# CREATOR
# http://deap.readthedocs.io/en/master/tutorials/basic/part1.html
# The create() function takes at least two arguments, a name for the newly created class and a base class. Any
# subsequent argument becomes an attribute of the class. Neg. weights relate to minizing, pos. weight to maximizing
# problems.
# -- define fitness problem (which params are min./max. problems with which weight)
creator.create("FitnessMulti", base.Fitness, weights=tuple(numpy.ones(num_params) * -1))
# Next we will create the class Individual, which will inherit the class list and contain our previously defined
# FitnessMulti class in its fitness attribute. Note that upon creation all our defined classes will be part of the
# creator container and can be called directly.
# -- associate fitness problem to individuals, that are going to be created
creator.create("Individual", list, fitness=creator.FitnessMulti)
# TOOLBOX
# http://deap.readthedocs.io/en/master/examples/ga_onemax.html
# http://deap.readthedocs.io/en/master/api/tools.html#module-deap.tools
# All the objects we will use on our way, an individual, the population, as well as all functions, operators, and
# arguments will be stored in a DEAP container called Toolbox. It contains two methods for adding and removing content,
# register() and unregister().
toolbox = base.Toolbox()
# The newly introduced register() method takes at least two arguments: an alias and a function. Toolbox.attr_bool(),
# when called, will draw a random integer between -100 and 100. Toolbox.attr_float(), when called, will draw a random
# floating point number.
# -- how to generate values for each individual
# toolbox.register("attr_float", random.uniform, -100, 100)
# toolbox.register("attr_float", lambda: [random.gauss(x, 5) for x in channels.values()])
# Our individuals will be generated using the function initRepeat(). Its first argument is a container class, the
# Individual one we defined in the previous section. This container will be filled using the method attr_float(),
# provided as second argument, and will contain 10 integers, as specified using the third argument. When called, the
# individual() method will thus return an individual initialized with what would be returned by calling the attr_float()
# method 100 times.
# -- how and how many individuals to create
# toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, num_params)
toolbox.register("individual", initIndividual, creator.Individual, sigma=1)
# Finally, the population() method uses the same paradigm.
# -- how and how many parents to create
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# LOAD EXPERIMENTAL DATA
# set path
# mainpath = r'C:\OwnCloud\Masterarbeit' # mainpath
# pathfit = os.path.join(mainpath, r'fitness_params') # fitness files
# os.chdir(os.path.join(mainpath, pathfit)) # change directory
# load fitness file
# xl = pd.ExcelFile('fitness.xlsx') # load excel file
# xl_mean = xl.parse("median") # load sheet 'mean' containing mean/median values
# xl_var = xl.parse("quartile-to-median distance") # load sheet 'std containing std/quantiles values
xl_mean = pd.read_json("median")
xl_var = pd.read_json("distance")
########################
############ SOMETHING IS WRONG HERE
########################
# EFEL: median
def efel_stats(features):
# get latency of first spike
if features['peak_time'].any():
features['first_peak_time'] = features['peak_time'][0]
del features['peak_time']
# get median
for key, val in features.items():
if val is None or numpy.isnan(val) or not val:
features[key] = 9999
else:
features[key] = scipy.nanmedian(val)
# get median
# if features['Spikecount'] == 0:
# for key, val in f#eatures.items( ):
# features[key] = 9999
# else:
# for key, val in features.items():
# features[key] = scipy.nanmedian(val)
return features
# ERROR FUNCTION
# The returned value must be iterable and of a length equal to the number of objectives (weights).
def error_function(external_current, indi, xl_mean=xl_mean, xl_var=xl_var):
# output variable
allerrors = []
# BRIAN: run model
stim_start, stim_duration = 500, 1000
voltage, time = brian_hh_model(1, 0, stim_start, stim_duration,
ENa=indi[0], EK=indi[1], El=indi[2], ECa=indi[3], gNa=indi[4], gK=indi[5],
gL=indi[6], gM=indi[7], gCa=indi[8])
# EFEL: extract features and get median
feature_names = ['Spikecount', 'peak_voltage', 'min_AHP_values', 'AP_begin_voltage', 'spike_half_width',
'voltage_base', 'steady_state_voltage_stimend',
'AP_begin_time', 'peak_time']
trace = {'T': time, 'V': voltage, 'stim_start': [stim_start], 'stim_end': [stim_start + stim_duration]}
features = efel.getFeatureValues([trace], feature_names)[0]
features = efel_stats(features)
# # ERROR FUNCTION: get error value
for feature, value in features.items():
# median for one external current (experimental data)
experiment_vals = xl_mean.loc[xl_mean['stimulus'] == external_current, feature].values[0]
error = float(abs(value - experiment_vals))
# my model can produce the same, less or more #spikes, peakvoltage, ...
if value == experiment_vals:
error = 0.
elif value < experiment_vals:
error = error / float(xl_var.loc[xl_var['stimulus'] == external_current, feature].values[0][0])
elif value > experiment_vals:
error = error / float(xl_var.loc[xl_var['stimulus'] == external_current, feature].values[0][1])
# append error value of this feature
allerrors.append(error)
return allerrors
# GENETIC OPERATORS
# Within DEAP there are two ways of using operators. We can 1) simply call a function from the tools module or
# 2) register it with its arguments in a toolbox, as we have already seen for our initialization methods. The second
# option allows us to to easily switch between the operators if desired.
# see http://deap.readthedocs.io/en/master/api/tools.html#module-deap.tools
# Crossover
# Register the crossover function to the toolbox
# tools.cxOnePoint --> one point crossover
# tools.cxTwoPoint --> two-point crossover
toolbox.register("mate", tools.cxSimulatedBinary, eta=20.0)
# Mutation
# Register the mutation function to the toolbox
# tools.mutGaussian --> applies a gaussian mutation of mean mu and standard deviation sigma on the input individual. The indpb argument is the probability of each attribute to be mutated.
# tools.mutPolynomialBounded --> Polynomial mutation as implemented in original NSGA-II algorithm in C by Deb.
# toolbox.register("mutate", tools.mutPolynomialBounded, eta=20, low=low, up=up, indpb=1.0/num_params)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=2, indpb=0.9)
# Selection
# tools.sortNondominated(individuals, k, first_front_only=False) --> Sort the first k individuals into different nondomination levels using the “Fast Nondominated Sorting Approach” proposed by Deb et al., see [Deb2002].
# tools.sortLogNondominated(individuals, k, first_front_only=False) --> Sort individuals in pareto non-dominated fronts using the Generalized Reduced Run-Time Complexity Non-Dominated Sorting Algorithm presented by Fortin et al. (2013).
toolbox.register("select", tools.selNSGA2)
# Evaluation
# Register error function in the toolbox.
# The evaluation will be performed by calling the alias "evaluate".
# toolbox.register("evaluate", error_function, ext, model_vals)
# ALGORITHM
# Now that everything is ready, we can start to write our own algorithm. It is usually done in a main function.
if parallel_processing=="yes":
toolbox.register("map", futures.map)
def main():
# register statistics to the toolbox to maintain stats of the evolution
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", numpy.mean, axis=0)
stats.register("std", numpy.std, axis=0)
stats.register("min", numpy.min, axis=0)
stats.register("max", numpy.max, axis=0)
logbook = tools.Logbook()
logbook.header = "gen", "evals", "min"
###
### NSGA-II algorithm as in "Deb 2002: A Fast and Elitist Multiobjective Genetic Algorithm: NSGA-II"
### https://github.com/DEAP/deap/blob/master/examples/ga/nsga2.py
###
# create random parent population pop
pop = toolbox.population(n=num_parents)
# register error function
toolbox.register("evaluate", error_function, ext)
# evaluate parent population
invalid_ind = [ind for ind in pop if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
# assign crowding distance to the individuals, no actual selection is done
pop = toolbox.select(pop, len(pop))
# print logbook
record = stats.compile(pop)
logbook.record(gen=0, evals=len(invalid_ind), **record)
print(logbook.stream)
# print(record)
# Begin the generational process
for gen in range(1, num_gen):
# increase the variance in my population
offspring = tools.selTournamentDCD(pop, len(pop))
# I have no idea why
offspring = [toolbox.clone(ind) for ind in offspring]
# crossover
for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
if random.random() <= prob_crossover:
toolbox.mate(ind1, ind2)
# mutation
toolbox.mutate(ind1)
toolbox.mutate(ind2)
del ind1.fitness.values, ind2.fitness.values
# Fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
# Select the next generation population
pop = toolbox.select(pop + offspring, num_parents)
record = stats.compile(pop)
logbook.record(gen=gen, evals=len(invalid_ind), **record)
print(logbook.stream)
# print(record)
# print("Final population hypervolume is %f" % hypervolume(pop, [11.0, 11.0]))
return pop, logbook
# create pareto front (all non-dominated individuals that ever lived)
# pareto = tools.ParetoFront()
if __name__ == "__main__":
pop, logbook = main()
print(logbook)
print("POPULATION", pop)
for indi in pop:
stim_start, stim_duration = 500, 1000
voltage, time = brian_hh_model(1, 1, stim_start, stim_duration,
ENa=indi[0], EK=indi[1], El=indi[2], ECa=indi[3], gNa=indi[4], gK=indi[5],
gL=indi[6], gM=indi[7], gCa=indi[8])
print("INDIVIDUAL ", indi)
machine_hh_model_v02
# brian user guide
# http://brian2.readthedocs.io/en/2.0rc/user/index.html
from brian2 import *
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style()
def brian_hh_model(input_current, plotflag, stim_start, stim_duration, **parameter):
# C++ standalone mode
# At the beginning of the script, i.e. after the import statements, add:
# set_device('cpp_standalone', build_on_run=False)
# ********
# Handling units
# You can generate a physical quantity by multiplying a scalar or vector value with its physical unit:
# tau = 20*ms --> 20. ms
# rates = [10, 20, 30] * Hz --> [ 10. 20. 30.] Hz
# Most Brian functions will also complain about non-specified or incorrect units:
# G = NeuronGroup(10, 'dv/dt = -v/tau: volt', dt=0.5) --> "dt" has wrong dimensions, dimensions were (1) (s)
# Directly get the unitless value of a state variable by appending an underscore to the name
# print(rates_) --> [ 10. 20. 30.]
# ********
# Default parameters
# Set default parameters
default = {"ENa": 65, "EK": -90, "El": -70, "ECa": 120, "gNa": 0.05, "gK": 0.005, "gL": 1e-4, "gM": 8e-5,
"gCa": 1e-5}
# Use default parameter, if not defined as an input parameter
for key, val in default.items():
if key not in parameter:
parameter[key] = val
# Parameters
# Extract parameters that were given as an input (as a dictionary).
d = 96 * umetre # 79.8 umetre
area = d*d*3.141
Cm = 1*ufarad*cm**-2 * area # 1 ufarad
ENa = parameter["ENa"]*mV
EK = parameter["EK"]*mV
El = parameter["El"]*mV
ECa = parameter["ECa"]*mV
g_na = parameter["gNa"]*siemens*cm**-2 * area # 0.05 siemens
g_kd = parameter["gK"]*siemens*cm**-2 * area # 0.005 siemens
gl = parameter["gL"]*siemens*cm**-2 * area # 1e-4 siemens
gm = parameter["gM"]*siemens*cm**-2 * area # 8e-5 siemens
gCa = parameter["gCa"]*siemens*cm**-2 * area
tauMax = 4000 * ms
VT = -63*mV
# Equations
# Define both state variables and continuous-updates on these variables through differential equations. An Equation is
# a set of single lines in a string:
# 1. dx/dt = f : unit (differential equation)
# 2. x = f : unit (subexpression)
# 3. x : unit (parameter)
# There are three special units, "1" -> floating point number, "boolean" and "integer"
# Some special variables are defined: t, dt (time) and xi (white noise). Some other variable names (e.g. _pre) are
# forbidden. Flags -- dx/dt = f : unit (constant), parameter will not be changed during a run.
# The model
eqs = Equations('''
Im = gm * p * (v-EK) : amp
Ica = gCa * q*q * r * (v-ECa) : amp
dv/dt = (gl*(El-v) - g_na*(m*m*m)*h*(v-ENa) - g_kd*(n*n*n*n)*(v-EK) - Im - Ica + I)/Cm : volt
dm/dt = 0.32*(mV**-1)*(13.*mV-v+VT)/
(exp((13.*mV-v+VT)/(4.*mV))-1.)/ms*(1-m)-0.28*(mV**-1)*(v-VT-40.*mV)/
(exp((v-VT-40.*mV)/(5.*mV))-1.)/ms*m : 1
dn/dt = 0.032*(mV**-1)*(15.*mV-v+VT)/
(exp((15.*mV-v+VT)/(5.*mV))-1.)/ms*(1.-n)-.5*exp((10.*mV-v+VT)/(40.*mV))/ms*n : 1
dh/dt = 0.128*exp((17.*mV-v+VT)/(18.*mV))/ms*(1.-h)-4./(1+exp((40.*mV-v+VT)/(5.*mV)))/ms*h : 1
# K+ current
dp/dt = (1/(1+exp(-(v-VT+35.*mV)/(10.*mV))) - p) / (tauMax / (3.3 * exp((v - VT + 35.*mV)/(20.*mV) + exp(-(v - VT + 35.*mV)/(20.*mV))))) : 1
# Ca2+ current
dq/dt = 0.055*(mV**-1) * (-27.*mV - v) / (exp((-27.*mV - v) / (3.8*mV)) - 1.)/ms * (1.-q) - 0.94*exp((-75.*mV - v) / (17.*mV))/ms*q : 1
dr/dt = 0.000457 * exp((-13.*mV - v) / (50.*mV))/ms * (1.-r) - 0.0065 / (1. + exp((-15.*mV - v) / (28.*mV)))/ms*r : 1
I : amp
''')
# NeuronGroup
# The core of every simulation is a NeuronGroup, a group of neurons that share the same equations defining their
# properties. Minimum inputs are "number of neurons" and "model description in the form of equations". Threshold and
# refractoriness are only used for emiting spikes. To make a neuron non-excitable for a certain time period after a
# spike, the refractory keyword can be used.
# G = NeuronGroup(10, 'dv/dt = -v/tau : volt', threshold='v > -50*mV', reset='v = -70*mV', refractory=5*ms)
# Dictionary
# You can set multiple initial values at once using a dictionary and the Group.get_states() and Group.set_states()
# methods.
# initial_values = {'v': 1, 'tau': 10*ms}
# group.set_states(initial_values)
# group.v[:] --> 1)
# states = group.get_states()
# states['v'] --> 1)
group = NeuronGroup(1, eqs, method="exponential_euler")
group.v = El
group.I = 0*nA
# Recording
# Recording variables during a simulation is done with “monitor” objects. Specifically, spikes are recorded with
# SpikeMonitor, the time evolution of variables with StateMonitor and the firing rate of a population of neurons with
# PopulationRateMonitor. You can get all the stored values in a monitor with the Group.get_states().
# In this example, we record two variables v and u, and record from indices 0, 10 and 100 --> three neurons.
# G = NeuronGroup(...)
# M = StateMonitor(G, ('v', 'u'), record=[0, 10, 100])
# M.v[1] will return the values for the second recorded neuron which is the neuron with the index 10.
M = StateMonitor(group, 'v', record=0)
# Run the model
# The command run(100*ms) runs the simulation for 100 ms.
run(stim_start*ms)
group.I[0] = input_current*nA # current injection at one end
run(stim_duration*ms)
group.I = 0*nA
run(stim_start*ms)
# C++ standalone mode
# After the last run() call, call device.build() explicitly:
# device.build(directory='output', compile=True, run=True, debug=False)
# Timing
# profiling_summary(show=5) -- show the 5 objects that took the longest
# profiling_summary(show=2)
# Output
time = M.t/ms
voltage = M.v[0]/mV
# plot output
if plotflag:
plt.plot(time, voltage)
xlabel('Time [ms]')
ylabel('Membrane potential [mV]')
plt.show()
# For multiple calls
# device.reinit()
# device.activate()
return (voltage, time)
#brian_hh_model(1, 1, 500, 1000, ENa=65, EK=-90, El=-70, ECa=120, gNa=0.05, gK=0.005, gL=1e-4, gM=8e-5, gCa=1e-5)
Thanks a lot in advance!!

Related

How to account for delta change in a vector in CVXPY?

I have a problem that has a vector of 48, this represents a selected flow rate between three periods (each of length 16 which represents 0.5 hour sections). The goal I have is to include a lag vector, which accurately shows the flow rate and represents what will truly happen rather than sharply changing like seen in the below image, blue represents what my input_flow_vector currently does and orange represents what truly happens and my desired effect.
I have managed to calculate the number of elements from the selections that need modified to give my desired result, its just a mater of how I can implement this using CVXPY and allowing for the delta.
As you can see from the code below, I am assuming the current flow is at 19, and we have a lag rate of 0.5. Essentially my desired outcome is to have the input_flow_vector represent a true flow so if we know current flow is 19, the first flows should be something like this 19 18.5 18 17.95 (17.95 being the selected flow from the program below for input_flow_vector[0:16]), we should account for flow changes also between input_flow_vector[15:16], and input_flow_vector[32:33]. Is there a method that I can invoke to achieve or some formulation to apply to my calculations to account for the lag?
My working example of what I am trying:
import cvxpy as cp
import numpy as np
# Setup variables for use in problem
volume_ = np.array([
[457920, 478080, 498240, 516960, 535680, 555840, 576000, 590400, 604800, 622080, 639360, 652320, 665280],
[457920, 478080, 498240, 516960, 535680, 555840, 576000, 590400, 604800, 622080, 639360, 652320, 665280],
[457920, 478080, 498240, 516960, 535680, 555840, 576000, 590400, 604800, 622080, 639360, 652320, 665280]])
cost_ = np.array([
[5.2735998,5.5679998,5.862399, 6.1, 6.457600, 6.77119995, 7.084799, 7.417600, 7.7503997,8.0767999,8.4032000,8.75200012, 9.10080017],
[5.2735998,5.5679998,5.862399, 6.1, 6.457600, 6.77119995, 7.084799, 7.417600, 7.7503997,8.0767999,8.4032000,8.75200012, 9.10080017],
[5.2735998,5.5679998,5.862399, 6.1, 6.457600, 6.77119995, 7.084799, 7.417600, 7.7503997,8.0767999,8.4032000,8.75200012, 9.10080017]])
flow_ = np.array([[15.90, 16.60, 17.30, 17.95, 18.60, 19.30, 20.00, 20.500, 21.00, 21.60, 22.20, 22.65, 23.100],
[15.90, 16.60, 17.30, 17.95, 18.60, 19.30, 20.00, 20.500, 21.00, 21.60, 22.20, 22.65, 23.100],
[15.90, 16.60, 17.30, 17.95, 18.60, 19.30, 20.00, 20.500, 21.00, 21.60, 22.20, 22.65, 23.100]])
hist_df = np.array([[17.9940539, 18.1688886, 17.29390022],
[17.9247597, 22.9460481, 17.49191979],
[17.6406868, 13.7089169, 18.534422 ],
[17.4549785, 16.0060969, 17.36141559],
[18.1842739, 16.3079156, 15.21796288],
[17.1996601, 15.2406543, 18.53437301],
[17.9109670, 15.882214, 17.14792592],
[17.766329, 23.1746440, 16.902871],
[17.5548241, 16.6919832, 17.14028154],
[17.4038959, 20.0329258, 17.86862441],
[17.3635444, 27.6489774, 16.48076412],
[18.0562542, 20.7923857, 17.99141175],
[18.5127303, 16.9623712, 17.18607112],
[16.8388930, 16.3304315, 14.691192],
[18.1595540, 17.2483613, 16.40383005],
[18.1194714, 17.0064085, 17.49042838]])
hours = np.array([16, 16, 16])
# Solving my problem
def optimiser(cost_, volume_, v_min, flow_, min_level, max_level, initial_level, period_lengths, out_flow_):
surface_area = 533.9000244141
FACTOR = 1/surface_area
input_flow_matrix=np.zeros((max(period_lengths),len(period_lengths)))
for i,l in enumerate(period_lengths):
input_flow_matrix[:l,i]=1
selection = cp.Variable(shape=cost_.shape,boolean=True)
assignment_constraint = cp.sum(selection,axis=1) == 1
# This at the moment does not account for delta flow, in which if the flow changes in reality it takes X time to reach this at Y flow rate
# We need to account for this change and show the real flow when it changes flow form s1 to s2 and vice versa
# Example selection may be 18 19 19.5
# Each flows will have 16 periods (30 minute intervals) of the same value but we need to account for the change in flow rate and where we are at currently
# See the attachec excel sheet for an example of how this is done
input_flow_= cp.sum(cp.multiply(flow_,selection),axis=1)
input_flow_vector=cp.vec(cp.multiply(input_flow_matrix,np.ones((max(period_lengths), 1)) # cp.reshape(input_flow_,(1,len(period_lengths)))))
# Current flow
current_flow = 15
# Lag rate is the amount of time it takes to reach the current flow from the previous flow
lag_rate = 0.5
# Calculate the delta from the current flow to input_flow_[0], input_flow_[1], input_flow_[2]
delta_flow = cp.hstack((current_flow-input_flow_[0],input_flow_[1]-input_flow_[0],input_flow_[2]-input_flow_[1]))
# CVXY floor delta_flow to nearest whole number
delta_flow = cp.floor(delta_flow)
delta_flow_times = delta_flow / lag_rate # this is the number of times for each input_flow_vector from current_flow to reach the next flow
# Input_flow_vector is the vector that needs manipulation to reach the next flow
res_flow= (input_flow_vector-cp.vec(out_flow_))
net_volume = res_flow * 1.8
res_level=cp.cumsum(net_volume) * FACTOR + initial_level
volume_= cp.sum(cp.multiply(volume_,selection))
volume_constraint = volume_ >= v_min
min_level_constraint = res_level >= min_level
max_level_constraint = res_level <= max_level
constraints = [assignment_constraint, max_level_constraint, min_level_constraint, volume_constraint]
cost_ = cp.sum(cp.multiply(cost_,selection))
assign_prob = cp.Problem(cp.Minimize(cost_),constraints)
assign_prob.solve(solver=cp.CPLEX, verbose=False)
return selection
# Real world variables that can change from project to project
target = 1633548.75
min_level = 2.5
max_level = 3
surface_area = 533.9000244141
current_level = 2.75
# Call function
sol = optimiser(cost_, volume_, target, flow_, min_level, max_level, current_level, hours, hist_df)
# Selection as flow values
assignments = [np.where(r>=0.99)[0][0] for r in sol.value]
flow_selection = [ flow_[i][assignments[i]] for i in range(len(assignments)) ]
print(flow_selection)
Any guidance on different things to try would be appreciated or to point me in the right direction. Thanks.

optimization of wind farm in optuna

in the following code, I want to optimize the objective function using optuna.
"""
Additional modules
pip install optuna
pip install scikit-optimize
"""
import time
from py_wake.examples.data.hornsrev1 import V80
from py_wake.examples.data.hornsrev1 import Hornsrev1Site # We work with the Horns Rev 1 site, which comes already set up with PyWake.
from py_wake import BastankhahGaussian
import numpy as np
import optuna
from py_wake.turbulence_models import GCLTurbulence
from py_wake.deflection_models.jimenez import JimenezWakeDeflection
from py_wake.wind_turbines.power_ct_functions import PowerCtFunctionList, PowerCtTabular
def newSite(x,y):
xNew=np.array([x[0]+560*i for i in range(4)])
yNew=np.array([y[0]+560*i for i in range(4)])
x_newsite=np.array([xNew[0],xNew[0],xNew[0],xNew[1],xNew[1],xNew[1],xNew[2],xNew[2],xNew[2]])
y_newsite=np.array([yNew[0],yNew[1],yNew[2],yNew[0],yNew[1],yNew[2],yNew[0],yNew[1],yNew[2]])
return (x_newsite,y_newsite)
def objective(trial):
site = Hornsrev1Site()
x, y = site.initial_position.T
x_newsite,y_newsite=newSite(x,y)
windTurbines = V80()
# We ask values of c from optuna.
c = []
for i in range(9):
for l in range(360):
for k in range(23):
varname = f'c{i}'
minv, maxv, stepv = 0, 1, 1
c.append(trial.suggest_int(varname, minv, maxv, step=stepv))
C=np.array(c)
C=C.reshape((9,360,23))
for item in range(9):
for j in range(10,370,10):
for i in range(j-10,j):
C[item][i]=C[item][j-5]
windTurbines.powerCtFunction = PowerCtFunctionList(
key='operating',
powerCtFunction_lst=[PowerCtTabular(ws=[0, 100], power=[0, 0], power_unit='w', ct=[0, 0]), # 0=No power and ct
windTurbines.powerCtFunction], # 1=Normal operation
default_value=1)
print(C)
operating = np.ones((9,360,23)) # shape=(#wt,wd,ws)
operating[C <= 0.5]=0
wf_model = BastankhahGaussian(site, windTurbines,deflectionModel=JimenezWakeDeflection(),turbulenceModel=GCLTurbulence())
# run wind farm simulation
sim_res = wf_model(
x_newsite, y_newsite, # wind turbine positions
h=None, # wind turbine heights (defaults to the heights defined in windTurbines)
wd=None, # Wind direction (defaults to site.default_wd (0,1,...,360 if not overriden))
ws=None, # Wind speed (defaults to site.default_ws (3,4,...,25m/s if not overriden))
operating=operating
)
for i in range(9):
for l in range(360):
for k in range(23):
if sim_res.TI_eff[i][l][k]-0.14 > 0 :
sim_res.Power[i][l][k]=sim_res.Power[i][l][k]-10000*(sim_res.TI_eff[i][l][k]-0.14)**2
print(-float(np.sum(sim_res.Power))/1e+11)
return -float(np.sum(sim_res.Power)/1e+11)
def optuna_hpo():
t0 = time.perf_counter()
num_trials = 300
sampler = optuna.integration.SkoptSampler()
study = optuna.create_study(sampler=sampler, direction="maximize")
study.optimize(objective, n_trials=num_trials)
print(f"Best params: {study.best_params}")
print(f"Best value: {study.best_value}\n")
print(f'elapse: {round(time.perf_counter() - t0)}s')
# start
optuna_hpo()
The problem is the initial guess is c which is an array of (9,360,23). but when I run the following code print(f"Best params: {study.best_params}") only print 9 values of c while I need 9*360*23 so I think it just changes these 9 values, not all the c values.
I have also tried another way to write c like this:
for i in range(9*360*23):
varname = f'c{i}'
minv, maxv, stepv = 0, 1, 1
c.append(trial.suggest_int(varname, minv, maxv, step=stepv))
but in this way, the code will stop after 7,8 iteration because of a memory problem. I have also done this using a university server so I think there would be a problem with it. so now I want to know is there any way to apply all c values in optimization? I mean instead of just changing 9 values of them changing all of them.

Python: define individual binning

I am trying to define my own binning and calculate the mean value of some other columns of my dataframe over these bins. Unfortunately, it only works with integer inputs as you can see below. In this particular case "step_size" defines the step of one bin and I would like to use float values like 0.109 which corresponds to 0.109 seconds. Do you have any idea how I can do this? I think the problem is in the definition of "create_bins" but I cannot fix it...
The goal should be to get this: [(0,0.109),(0.109,0,218),(0.218,0.327) ......]
Greets
# =============================================================================
# Define parameters
# =============================================================================
seconds_min = 0
seconds_max = 9000
step_size = 1
bin_number = int((seconds_max-seconds_min)/step_size)
# =============================================================================
# Define function to create your own individual binning
# lower_bound defines the lowest value of the binning interval
# width defines the width of the binning interval
# quantity defines the number of bins
# =============================================================================
def create_bins(lower_bound, width, quantity):
bins = []
for low in range(lower_bound,
lower_bound + quantity * width + 1, width):
bins.append((low, low+width))
return bins
# =============================================================================
# Create binning list
# =============================================================================
bin_list = create_bins(lower_bound=seconds_min,
width=step_size,
quantity=bin_number)
print(bin_list)
The problem lies in the fact that the range function does not allow for float ranges.
You can use the numeric_range function in more_itertools for this:
from more_itertools import numeric_range
seconds_min = 0
seconds_max = 9
step_size = 0.109
bin_number = int((seconds_max-seconds_min)/step_size)
def create_bins(lower_bound, width, quantity):
bins = []
for low in numeric_range(lower_bound,
lower_bound + quantity * width + 1, width):
bins.append((low, low+width))
return bins
bin_list = create_bins(lower_bound=seconds_min,
width=step_size,
quantity=bin_number)
print(bin_list)
# (0.0, 0.109), (0.109, 0.218), (0.218, 0.327) ... ]
Here's a simple way to do it using zip and numpy's arange. I've put the upper limit at 5, but you can, of course, choose other numbers.
list(zip(np.arange(0, 5, .109), np.arange(.109, 5, .109)))
The result is:
[(0.0, 0.109),
(0.109, 0.218),
(0.218, 0.327),
(0.327, 0.436),
(0.436, 0.545),
(0.545, 0.654),
(0.654, 0.763),
...
no numpy:
max_bin=100
min_bin=0
step_size=0.109
number_of_bins = int(1+((max_bin-min_bin)/step_size)) # +1 to cover the whole interval
bins= []
for a in range(number_of_bins):
bins.append((a*step_size, (a+1)*step_size))

Odd Results on Entropy Calculation

I am trying to write a function that properly calculates the entropy of a given dataset. However, I am getting very weird entropy values.
I am following the understanding that all entropy calculations must fall between 0 and 1, yet I am consistently getting values above 2.
Note: I must use log base 2 for this
Can someone explain why am I yielding incorrect entropy results?
The dataset I am testing is the ecoli dataset from the UCI Machine Learning Repository
import numpy
import math
#################### DATA HANDLING LIBRARY ####################
def csv_to_array(file):
# Open the file, and load it in delimiting on the ',' for a comma separated value file
data = open(file, 'r')
data = numpy.loadtxt(data, delimiter=',')
# Loop through the data in the array
for index in range(len(data)):
# Utilize a try catch to try and convert to float, if it can't convert to float, converts to 0
try:
data[index] = [float(x) for x in data[index]]
except Exception:
data[index] = 0
except ValueError:
data[index] = 0
# Return the now type-formatted data
return data
# Function that utilizes the numpy library to randomize the dataset.
def randomize_data(csv):
csv = numpy.random.shuffle(csv)
return csv
# Function to split the data into test, training set, and validation sets
def split_data(csv):
# Call the randomize data function
randomize_data(csv)
# Grab the number of rows and calculate where to split
num_rows = csv.shape[0]
validation_split = int(num_rows * 0.10)
training_split = int(num_rows * 0.72)
testing_split = int(num_rows * 0.18)
# Validation set as the first 10% of the data
validation_set = csv[:validation_split]
# Training set as the next 72
training_set = csv[validation_split:training_split + validation_split]
# Testing set as the last 18
testing_set = csv[training_split + validation_split:]
# Split the data into classes vs actual data
training_cols = training_set.shape[1]
testing_cols = testing_set.shape[1]
validation_cols = validation_set.shape[1]
training_classes = training_set[:, training_cols - 1]
testing_classes = testing_set[:, testing_cols - 1]
validation_classes = validation_set[:, validation_cols - 1]
# Take the sets and remove the last (classification) column
training_set = training_set[:-1]
testing_set = testing_set[:-1]
validation_set = validation_set[:-1]
# Return the datasets
return testing_set, testing_classes, training_set, training_classes, validation_set, validation_classes
#################### DATA HANDLING LIBRARY ####################
# This function returns the list of classes, and their associated weights (i.e. distributions)
# for a given dataset
def class_distribution(dataset):
# Ensure the dataset is a numpy array
dataset = numpy.asarray(dataset)
# Collect # of total rows and columns, using numpy
num_total_rows = dataset.shape[0]
num_columns = dataset.shape[1]
# Create a numpy array of just the classes
classes = dataset[:, num_columns - 1]
# Use numpy.unique to remove duplicates
classes = numpy.unique(classes)
# Create an empty array for the class weights
class_weights = []
# Loop through the classes one by one
for aclass in classes:
# Create storage variables
total = 0
weight = 0
# Now loop through the dataset
for row in dataset:
# If the class of the dataset is equal to the current class you are evaluating, increase the total
if numpy.array_equal(aclass, row[-1]):
total = total + 1
# If not, continue
else:
continue
# Divide the # of occurences by total rows
weight = float((total / num_total_rows))
# Add that weight to the list of class weights
class_weights.append(weight)
# Turn the weights into a numpy array
class_weights = numpy.asarray(class_weights)
# Return the array
return classes, class_weights
# This function returns the entropy for a given dataset
# Can be used across an entire csv, or just for a column of data (feature)
def get_entropy(dataset):
# Set initial entropy
entropy = 0.0
# Determine the classes and their frequencies (weights) of the dataset
classes, class_freq = class_distribution(dataset)
# Utilize numpy's quicksort to test the most occurring class first
numpy.sort(class_freq)
# Determine the max entropy for the dataset
max_entropy = math.log(len(classes), 2)
print("MAX ENTROPY FOR THIS DATASET: ", max_entropy)
# Loop through the frequencies and use given formula to calculate entropy
# For...Each simulates the sequence operator
for freq in class_freq:
entropy += float(-freq * math.log(freq, 2))
# Return the entropy value
return entropy
def main():
ecol = csv_to_array('ecoli.csv')
testing_set, testing_classes, training_set, training_classes, validation_set, validation_classes = split_data(ecol)
entropy = get_entropy(ecol)
print(entropy)
main()
The following function was used to calculate Entropy:
# Function to return Shannon's Entropy
def entropy(attributes, dataset, targetAttr):
freq = {}
entropy = 0.0
index = 0
for item in attributes:
if (targetAttr == item):
break
else:
index = index + 1
index = index - 1
for item in dataset:
if ((item[index]) in freq):
# Increase the index
freq[item[index]] += 1.0
else:
# Initialize it by setting it to 0
freq[item[index]] = 1.0
for freq in freq.values():
entropy = entropy + (-freq / len(dataset)) * math.log(freq / len(dataset), 2)
return entropy
As #MattTimmermans had indicated, entropy's value is actually contingent on the number of classes. For strictly 2 classes, it is contained in the 0 to 1 (inclusive) range. However, for more than 2 classes (which is what was being tested), entropy is calculated with a different formula (converted to Pythonic code above). This post here explains those mathematics and calculations a bit more in detail.

Finding anomalous values from sinusoidal data

How can I find anomalous values from following data. I am simulating a sinusoidal pattern. While I can plot the data and spot any anomalies or noise in data, but how can I do it without plotting the data. I am looking for simple approaches other than Machine learning methods.
import random
import numpy as np
import matplotlib.pyplot as plt
N = 10 # Set signal sample length
t1 = -np.pi # Simulation begins at t1
t2 = np.pi; # Simulation ends at t2
in_array = np.linspace(t1, t2, N)
print("in_array : ", in_array)
out_array = np.sin(in_array)
plt.plot(in_array, out_array, color = 'red', marker = "o") ; plt.title("numpy.sin()")
Inject random noise
noise_input = random.uniform(-.5, .5); print("Noise : ",noise_input)
in_array[random.randint(0,len(in_array)-1)] = noise_input
print(in_array)
plt.plot(in_array, out_array, color = 'red', marker = "o") ; plt.title("numpy.sin()")
Data with noise
I've thought of the following approach to your problem, since you have only some values that are anomalous in the time vector, it means that the rest of the values have a regular progression, which means that if we gather all the data points in the vector under clusters and calculate the average step for the biggest cluster (which is essentially the pool of values that represent the real deal), then we can use that average to do a triad detection, in a given threshold, over the vector and detect which of the elements are anomalous.
For this we need two functions: calculate_average_step which will calculate that average for the biggest cluster of close values, and then we need detect_anomalous_values which will yield the indexes of the anomalous values in our vector, based on that average calculated earlier.
After we detected the anomalous values, we can go ahead and replace them with an estimated value, which we can determine from our average step value and by using the adjacent points in the vector.
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def calculate_average_step(array, threshold=5):
"""
Determine the average step by doing a weighted average based on clustering of averages.
array: our array
threshold: the +/- offset for grouping clusters. Aplicable on all elements in the array.
"""
# determine all the steps
steps = []
for i in range(0, len(array) - 1):
steps.append(abs(array[i] - array[i+1]))
# determine the steps clusters
clusters = []
skip_indexes = []
cluster_index = 0
for i in range(len(steps)):
if i in skip_indexes:
continue
# determine the cluster band (based on threshold)
cluster_lower = steps[i] - (steps[i]/100) * threshold
cluster_upper = steps[i] + (steps[i]/100) * threshold
# create the new cluster
clusters.append([])
clusters[cluster_index].append(steps[i])
# try to match elements from the rest of the array
for j in range(i + 1, len(steps)):
if not (cluster_lower <= steps[j] <= cluster_upper):
continue
clusters[cluster_index].append(steps[j])
skip_indexes.append(j)
cluster_index += 1 # increment the cluster id
clusters = sorted(clusters, key=lambda x: len(x), reverse=True)
biggest_cluster = clusters[0] if len(clusters) > 0 else None
if biggest_cluster is None:
return None
return sum(biggest_cluster) / len(biggest_cluster) # return our most common average
def detect_anomalous_values(array, regular_step, threshold=5):
"""
Will scan every triad (3 points) in the array to detect anomalies.
array: the array to iterate over.
regular_step: the step around which we form the upper/lower band for filtering
treshold: +/- variation between the steps of the first and median element and median and third element.
"""
assert(len(array) >= 3) # must have at least 3 elements
anomalous_indexes = []
step_lower = regular_step - (regular_step / 100) * threshold
step_upper = regular_step + (regular_step / 100) * threshold
# detection will be forward from i (hence 3 elements must be available for the d)
for i in range(0, len(array) - 2):
a = array[i]
b = array[i+1]
c = array[i+2]
first_step = abs(a-b)
second_step = abs(b-c)
first_belonging = step_lower <= first_step <= step_upper
second_belonging = step_lower <= second_step <= step_upper
# detect that both steps are alright
if first_belonging and second_belonging:
continue # all is good here, nothing to do
# detect if the first point in the triad is bad
if not first_belonging and second_belonging:
anomalous_indexes.append(i)
# detect the last point in the triad is bad
if first_belonging and not second_belonging:
anomalous_indexes.append(i+2)
# detect the mid point in triad is bad (or everything is bad)
if not first_belonging and not second_belonging:
anomalous_indexes.append(i+1)
# we won't add here the others because they will be detected by
# the rest of the triad scans
return sorted(set(anomalous_indexes)) # return unique indexes
if __name__ == "__main__":
N = 10 # Set signal sample length
t1 = -np.pi # Simulation begins at t1
t2 = np.pi; # Simulation ends at t2
in_array = np.linspace(t1, t2, N)
# add some noise
noise_input = random.uniform(-.5, .5);
in_array[random.randint(0, len(in_array)-1)] = noise_input
noisy_out_array = np.sin(in_array)
# display noisy sin
plt.figure()
plt.plot(in_array, noisy_out_array, color = 'red', marker = "o");
plt.title("noisy numpy.sin()")
# detect anomalous values
average_step = calculate_average_step(in_array)
anomalous_indexes = detect_anomalous_values(in_array, average_step)
# replace anomalous points with an estimated value based on our calculated average
for anomalous in anomalous_indexes:
# try forward extrapolation
try:
in_array[anomalous] = in_array[anomalous-1] + average_step
# else try backwward extrapolation
except IndexError:
in_array[anomalous] = in_array[anomalous+1] - average_step
# generate sine wave
out_array = np.sin(in_array)
plt.figure()
plt.plot(in_array, out_array, color = 'green', marker = "o");
plt.title("cleaned numpy.sin()")
plt.show()
Noisy sine:
Cleaned sine:
Your problem relies in the time vector (which is of 1 dimension). You will need to apply some sort of filter on that vector.
First thing that came to mind was medfilt (median filter) from scipy and it looks something like this:
from scipy.signal import medfilt
l1 = [0, 10, 20, 30, 2, 50, 70, 15, 90, 100]
l2 = medfilt(l1)
print(l2)
the output of this will be:
[ 0. 10. 20. 20. 30. 50. 50. 70. 90. 90.]
the problem with this filter though is that if we apply some noise values to the edges of the vector like [200, 0, 10, 20, 30, 2, 50, 70, 15, 90, 100, -50] then the output would be something like [ 0. 10. 10. 20. 20. 30. 50. 50. 70. 90. 90. 0.] and obviously this is not ok for the sine plot since it will produce the same artifacts for the sine values array.
A better approach to this problem is to treat the time vector as an y output and it's index values as the x input and do a linear regression on the "time linear function", not the quotes, it just means we're faking the 2 dimensional model by applying a fake X vector. The code implies the use of scipy's linregress (linear regression) function:
from scipy.stats import linregress
l1 = [5, 0, 10, 20, 30, -20, 50, 70, 15, 90, 100]
l1_x = range(0, len(l1))
slope, intercept, r_val, p_val, std_err = linregress(l1_x, l1)
l1 = intercept + slope * l1_x
print(l1)
whose output will be:
[-10.45454545 -1.63636364 7.18181818 16. 24.81818182
33.63636364 42.45454545 51.27272727 60.09090909 68.90909091
77.72727273]
Now let's apply this to your time vector.
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
N = 20
# N = 10 # Set signal sample length
t1 = -np.pi # Simulation begins at t1
t2 = np.pi; # Simulation ends at t2
in_array = np.linspace(t1, t2, N)
# add some noise
noise_input = random.uniform(-.5, .5);
in_array[random.randint(0, len(in_array)-1)] = noise_input
# apply filter on time array
in_array_x = range(0, len(in_array))
slope, intercept, r_val, p_val, std_err = linregress(in_array_x, in_array)
in_array = intercept + slope * in_array_x
# generate sine wave
out_array = np.sin(in_array)
print("OUT ARRAY")
print(out_array)
plt.plot(in_array, out_array, color = 'red', marker = "o") ; plt.title("numpy.sin()")
plt.show()
the output will be:
the resulting signal will be an approximation of the original, as it is with any form of extrapolation/interpolation/regression filtering.

Categories