Issue with Python scipy optimize minimize fmin_slsqp solver - python

I start with the optimization function from scipy.
I tried to create my code by copying the Find optimal vector that minimizes function solution
I have an array that contains series in columns. I need to multiply each of them by a weight so that the sum of last row of these columns multiplied by the weights gives a given number (constraint).
The sum of the series multiplied by the weights gives a new series where I extract the max-draw-down and I want to minimize this mdd.
I wrote my code as best as I can (2 months of Python and 3 hours of scipy) and can't solve the error message on the function used to solve the problem.
Here is my code and any help would be much appreciated:
import numpy as np
from scipy.optimize import fmin_slsqp
# based on:
# the number of columns (and so of weights) can vary; it should be generic, regardless the number of columns
def mdd(serie): # finding the max-draw-down of a series (put aside not to create add'l problems)
min = np.nanargmax(np.fmax.accumulate(serie) - serie)
max = np.nanargmax((serie)[:min])
return serie[np.nanargmax((serie)[:min])] - serie[min] # max-draw-down
# defining the input data
# mat is an array of 5 columns containing series of independent data
mat = np.array([[1, 0, 0, 1, 1],[2, 0, 5, 3, 4],[3, 2, 4, 3, 7],[4, 1, 3, 3.1, -6],[5, 0, 2, 5, -7],[6, -1, 4, 1, -8]]).astype('float32')
w = np.ndarray(shape=(5)).astype('float32') # 1D vector for the weights to be used for the columns multiplication
w0 = np.array([1/5, 1/5, 1/5, 1/5, 1/5]).astype('float32') # initial weights (all similar as a starting point)
fixed_value = 4.32 # as a result of constraint nb 1
# testing the operations that are going to be used in the minimization
series = np.sum(mat * w0, axis=1)
# objective:
# minimize the mdd of the series by modifying the weights (w)
def test(w, mat):
series = np.sum(mat * w, axis=1)
return mdd(series)
# constraints:
def cons1(last, w, fixed_value): # fixed_value = 4.32
# the sum of the weigths multiplied by the last value of each column must be equal to this fixed_value
return np.sum(mat[-1, :] * w) - fixed_value
def cons2(w): # the sum of the weights must be equal to 1
return np.sum(w) - 1
# solution:
# looking for the optimal set of weights (w) values that minimize the mdd with the two contraints and bounds being respected
# all w values must be between 0 and 1
result = fmin_slsqp(test, w0, f_eqcons=[cons1, cons2], bounds=[(0.0, 1.0)]*len(w), args=(mat, fixed_value, w0), full_output=True)
weights, fW, its, imode, smode = result

You weren't that far off the mark. The biggest problem lies in the mdd function: In case there is no draw-down, your function spits out an empty list as an intermediate result, which then can no longer cope with the argmax function.
def mdd(serie): # finding the max-draw-down of a series (put aside not to create add'l problems)
i = np.argmax(np.maximum.accumulate(serie) - serie) # end of the period
start = serie[:i]
# check if there is dd at all
if not start.any():
return 0
j = np.argmax(start) # start of period
return serie[j] - serie[i] # max-draw-down
In addition, you must make sure that the parameter list is the same for all functions involved (cost function and constraints).
# objective:
# minimize the mdd of the series by modifying the weights (w)
def test(w, mat,fixed_value):
series = mat # w
return mdd(series)
# constraints:
def cons1(w, mat, fixed_value): # fixed_value = 4.32
# the sum of the weigths multiplied by the last value of each column must be equal to this fixed_value
return mat[-1, :] # w - fixed_value
def cons2(w, mat, fixed_value): # the sum of the weights must be equal to 1
return np.sum(w) - 1
# solution:
# looking for the optimal set of weights (w) values that minimize the mdd with the two contraints and bounds being respected
# all w values must be between 0 and 1
result = fmin_slsqp(test, w0, eqcons=[cons1, cons2], bounds=[(0.0, 1.0)]*len(w), args=(mat,fixed_value), full_output=True)
One more remark: You can make the matrix-vector multiplications much leaner with the #-operator.


Python weighted quantile as R wtd.quantile()

I want to convert the R package Hmisc::wtd.quantile() into python.
Here is the example in R:
I took this as reference and it seems that the logics are different than R:
# First function
def weighted_quantile(values, quantiles, sample_weight = None,
values_sorted = False, old_style = False):
""" Very close to numpy.percentile, but supports weights.
NOTE: quantiles should be in [0, 1]!
:param values: numpy.array with data
:param quantiles: array-like with many quantiles needed
:param sample_weight: array-like of the same length as `array`
:return: numpy.array with computed quantiles.
values = np.array(values)
quantiles = np.array(quantiles)
if sample_weight is None:
sample_weight = np.ones(len(values))
sample_weight = np.array(sample_weight)
assert np.all(quantiles >= 0) and np.all(quantiles <= 1), 'quantiles should be in [0, 1]'
if not values_sorted:
sorter = np.argsort(values)
values = values[sorter]
sample_weight = sample_weight[sorter]
# weighted_quantiles = np.cumsum(sample_weight)
# weighted_quantiles /= np.sum(sample_weight)
weighted_quantiles = np.cumsum(sample_weight)/np.sum(sample_weight)
return np.interp(quantiles, weighted_quantiles, values)
weighted_quantile(values = [0.4890342, 0.4079128, 0.5083345, 0.2136325, 0.6197319],
quantiles = np.arange(0, 1 + 1 / 5, 1 / 5),
sample_weight = [1,1,1,1,1])
>> array([0.2136325, 0.2136325, 0.4079128, 0.4890342, 0.5083345, 0.6197319])
# Second function
def weighted_percentile(data, weights, perc):
perc : percentile in [0-1]!
data = np.array(data)
weights = np.array(weights)
ix = np.argsort(data)
data = data[ix] # sort data
weights = weights[ix] # sort weights
cdf = (np.cumsum(weights) - 0.5 * weights) / np.sum(weights) # 'like' a CDF function
return np.interp(perc, cdf, data)
weighted_percentile([0.4890342, 0.4079128, 0.5083345, 0.2136325, 0.6197319], [1,1,1,1,1], np.arange(0, 1 + 1 / 5, 1 / 5))
>> array([0.2136325 , 0.31077265, 0.4484735 , 0.49868435, 0.5640332 ,
0.6197319 ])
Both are different with R. Any idea?
I am Python-illiterate, but from what I see and after some quick checks I can tell you the following.
Here you use uniform (sampling) weights, so you could also directly use the quantile() function. Not surprisingly, it gives the same results as wtd.quantile() with uniform weights:
x <- c(0.4890342, 0.4079128, 0.5083345, 0.2136325, 0.6197319)
n <- length(x)
x <- sort(x)
quantile(x, probs = seq(0,1,0.2))
# 0% 20% 40% 60% 80% 100%
# 0.2136325 0.3690567 0.4565856 0.4967543 0.5306140 0.6197319
The R quantile() function get the quantiles in a 'textbook' way, i.e. by determining the index i of the obs to use with i = q(n+1).
In your case:
# 0.0 1.2 2.4 3.6 4.8 6.0
Of course since you have 5 values/obs and you want quintiles, the indices are not integers. But you know for example that the first quintile (i = 1.2) lies between obs 1 and obs 2. More precisely, it is a linear combination of the two observations (the 'weights' are derived from the value of the index):
0.2*x[1] + 0.8*x[2]
# 0.3690567
You can do the same for the all the quintiles, on the basis of the indices:
q <-
c(min(x), ## 0: actually, the first obs
0.2*x[1] + 0.8*x[2], ## 1.2: quintile lies between obs 1 and 2
0.4*x[2] + 0.6*x[3], ## 2.4: quintile lies between obs 2 and 3
0.6*x[3] + 0.4*x[4], ## 3.6: quintile lies between obs 3 and 4
0.8*x[4] + 0.2*x[5], ## 4.8: quintile lies between obs 4 and 5
max(x) ## 6: actually, the last obs
# 0.2136325 0.3690567 0.4565856 0.4967543 0.5306140 0.6197319
You can see that you get exactly the output of quantile() and wtd.quantile().
If instead of 0.2*x[1] + 0.8*x[2] we consider the following:
0.5*x[1] + 0.5*x[2]
# 0.3107726
We get the output of your second Python function. It appears that your second function considers uniform 'weights' (obviously I am not talking about the sampling weights here) when combining the two observations. The issue (at least for the second Python function) seems to come from this. I know these are just insights, but I hope they will help.
EDIT: note that the difference between the two is not necessary an 'issue' with the python code. There are different quantile estimators (and their weighted versions) and the python functions could simply rely on a different estimator than Hmisc::wtd.quantile(). I think that the latter uses the weighted version of the Harrell-Davis quantile estimator. If you really want to implement this one, you should check the source code of Hmisc::wtd.quantile() and try to 'directly' translate this into Python.

np.random.rand somehow produces floats bigger than 1

I'm trying to create a 2-layer neural network, for that I first initialize weights and biases to random floats between 0 an 1 using numpy.random.rand. However, for some reason this process produces floats bigger than 1 for W1 (weight 1) whereas it works correctly for all other weights an biases. I can't understand why this happens, I thought maybe something affects the function from outside the function where I initialized the parameters, but I couldn't detect any part in the function that could be affected from outside the function.
import numpy as np
n_x = 12288 # num_px * num_px * 3
n_h = 7
n_y = 1
layers_dims = (n_x, n_h, n_y)
def initialize_parameters_deep(layer_dims):
layer_dims -- python array (list) containing the dimensions of each layer in our network
parameters -- python dictionary containing your parameters "W1", "b1","W2", "b2":
parameters = {}
parameters["W1"] = np.random.rand(n_h, n_x) #(7, 12288)
parameters["b1"] = np.random.rand(n_h, 1) #(7)
parameters["W2"] = np.random.rand(n_y, n_h) #(7, 1)
parameters["b2"] = np.random.rand(n_y, 1) #(1)
return parameters
parameters = initialize_parameters_deep(layers_dims)
{'W1': array([[4.17022005e-01, 7.20324493e-01, 1.14374817e-04, ...,
3.37562919e-01, 1.12292153e-01, 5.37047221e-01],
[7.07934286e-01, 3.37726007e-01, 7.07954162e-01, ...,
4.22040811e-01, 7.78593215e-01, 3.49866021e-01],
[9.01338451e-01, 7.95132845e-03, 1.03777034e-01, ...,
2.78602449e-01, 5.05813021e-02, 8.26828833e-01],
[5.62717083e-03, 6.58208224e-01, 3.88407263e-01, ...,
5.56312618e-01, 8.69650932e-01, 1.00112287e-01],
[4.16278934e-01, 4.56060621e-01, 9.33378848e-01, ...,
9.52798385e-01, 9.41894584e-01, 4.44342962e-01],
[8.89254832e-01, 6.42558949e-01, 2.29427262e-01, ...,
8.05884494e-01, 1.80676088e-01, 6.12694420e-01]]), 'b1': array([[0.11933315],
[0.43137737]]), 'W2': array([[0.81360697, 0.44638382, 0.41794085, 0.08649817, 0.29957473,
0.33706742, 0.24721952]]), 'b2': array([[0.92363097]])}
It's not generating floats bigger than 1, it's just representing them differently.
4.17022005e-01 is the same as 0.417022005, and 1.14374817e-04 is the same as 0.000114374817.
See here or here.
The e-01, e-02, e-03, etc at the end of the W1 numbers just mean that the numbers are written in exponential format. So if you have for example 2.786e-01 that is the same as if it was written like (2.786/10) and that is the same as 0.2786. Same thing goes for: 2.786e-03 == (2.786/1000) == 0.002786. e+2 is 10^2 and e-2 is 1/(10^2).
Pay attention to the final few characters printed when you print your weights parameter tensor, which gives e.g. e-01. This represents base-10 exponentiation, i.e. meaning that the value of a given weight is the number printed times 10 to the given power.
All of the powers are negative, meaning the weights have small but positive values in the range [0, 1].
For example, 4.17022005e-01 equals 0.417022005.

How to use for loop with dictionary and a array at the same time

i want to make a linear equation with some dynamic inputs like it can be
y = θ0*x0 + θ1*x1
y = θ0*x0 + θ1*x1 + θ2*x2 + θ3*x3 + θ4*x4
for that i have
dictionary for x0,x1,x2......xn
and array for θ0,θ1,θ2......θn
im new to python so i tried this function but im stuck
so my question is how can i write a fucntion that gets x_values and theta_values as parameters and gives y_values as output
X = pd.DataFrame({'x0': np.ones(6), 'x1': np.linspace(0, 5, 6)})
θ = np.matrix('0 1')
def line_func(features, parameters):
result = []
for feat, param in zip(features.iteritems(), parameters):
for i in feat:
return result
If you want to multiply your thetas with a list of features, then you technically mulitply a matrix (the features) with a vector (theta).
You can do this as follows:
import numpy as np
x_array= x.values
theta= np.array([theta_0, theta_1])
Just order your theta-vector the way your columns are ordered in x. But note, that this gives a row-wise sum of the products for theta_i*x_i for all is. If you don't want it to be summed up rowise, you just need to write x_array * theta.
If you want to work with pandas (which I wouldn't recommend) also for the mulitplication and want to get a dataframe with the products of the column value and the corresponding theta, you could do this as follows:
# define the theta-x mapping (theta-value per column name in x)
thetas={'x1': 1, 'x2': 3}
# create an empty result dataframe with the index of x
df_result= pd.DataFrame(index=x.index)
# assign the calculated columns in a loop
for col_name, col_series in x.iteritems():
df_result[col_name]= col_series*thetas[col_name]
This results in:
x1 x2
0 1 6
1 -1 3

Tensorflow runtime determine the shape of Tensor

Does Tensorflow support runtime determine the shape of Tensor?
The problem is to build a Constant tensor in runtime based on the input vector length_q. The number of columns of the target tensor is the sum of length_q. The code snippet is shown as follows, the length of length_q is fixed to 64.
T = tf.reduce_sum(length_q, 0)[0]
N = np.shape(length_q)[0]
wm = np.zeros((N, T), dtype=np.float32)
# Something inreletive.
count = 0
for i in xrange(N):
ones = np.ones(length_q[i])
wm[i][count:count+length_q[i]] = ones
count += length_q[i]
return tf.Constant(wm)
I want to create a dynamic Tensor according to the input length_q. length_q is some input vector (64*1). The new tensor's shape I want to create depends on the sum of length_q because in each batch the data in length_q changes. The current code snippet is as follows:
def some_matrix(length_q):
T = tf.reduce_sum(length_q, 0)[0]
N = np.shape(length_q)[0]
wm = np.zeros((N, T), dtype=np.float32)
count = 0
return wm
def network_inference(length_q):
wm = tf.constant(some_matrix(length_q));
And the problem occurs probably because length_q is the placeholder and doesn't have summation operation. Are there some ways to solve this problem?
It sounds like the tf.fill() op is what you need. This op allows you to specify a shape as a tf.Tensor (i.e. a runtime value) along with a value:
def some_matrix(length_q):
T = tf.reduce_sum(length_q, 0)[0]
N = tf.shape(length_q)[0]
wm = tf.fill([T, N], 0.0)
return wm
Not clear about what you are calculating. If you need to calculate N shape, you can generate ones like this
T = tf.constant(20.0,tf.float32) # tf variable which is reduced sum , 20.0 is example float value
T = tf.cast(T,tf.int32) # columns will be integer only
N = 10 # if numpy integer- assuming np.shape giving 10
# N = length_q.getshape()[0] # if its a tensor, 'lenght_q' replace by your tensor name
wm = tf.ones([N,T],dtype=tf.float32) # N rows and T columns
sess = tf.Session()

Theano - Sum by group

I'm working on a custom likelihood function for Theano (Attempting to fit a conditional logistic regression.)
The likelihood requires summing values by group. In R we have the "ave()" function, in Python Pandas we have "groupby()". How would I do something similar in Theano?
Edited for more detail
I want to create a cox proportional hazards model (same as conditional logistic regression.) The log likelihood requires the sum of values by group:
In Pandas, this would be:
temp = df.groupby('groupid')['eta'].aggregate(np.sum)
denominator = np.log(temp).sum()
In the data, we have a column with group ID, and the values to be summed
group eta
1 2.1
1 1.8
1 0.9
2 1.2
2 0.75
2 1.42
The output for the group sums would then be:
group sum
1 4.8
2 3.37
Then, the sum of the log of the sums:
log(4.8) + log(3.37) = 2.7835
This is quick and easy to do in Pandas. How can I do something similar in Thano? Sure, could write a nexted loop, but that seems slow and I try to avoid manually coded loops when possible as they are slow.
Let say you have "X" (a list of all your etas), with the dim. Nx1 (I guess) and a matrix H. H is a NxG matrix that has a on-hot encoding of the groups.
The you you write something like:
import numpy as np
from numpy import newaxis as na
import theano.tensor as T
X = T.vector()
H = T.matrix()
tmp = T.sum(X[:, na] * H, axis=0)
O = T.sum(T.log(tmp))
x = np.array([5, 10, 10, 0.5, 5, 0.5])
# create a 1-hot encoding
g = np.array([1, 2, 2, 0, 1, 0])
h = np.zeros(shape=(len(x), 3))
for i,j in enumerate(g):
h[i,j] = 1.0
O.eval({X:x, H: h})
This should work as long as there is at least one eta per point (or else -inf).
