Grid search function in Python - python

I am trying to write a parameter search function to loop over one of the parameters and repeatedly call a function with all other parameters the same, other than the one I am searching over. Here is some sample code:
def worker1(a, b, c):
return a + b + c
def worker2(d, e, f):
return d * e * f
def search(model, params):
res = []
# Loop over one of the parameters and repeatedly append to res
if model == 1:
res.append(worker1(**params))
elif model == 2:
res.append(worker2(**params))
return res
params = dict(a=1, b=2, c=3)
print search(1, params)
I have two workers and they are called depending on the value of the model flag I pass to search(). The problem I am trying to solve here is to write a loop (commented in the code) over the if statements to repeatedly call say worker1 by varying only one of the parameters. I want my code to be flexible - sometimes I want to loop through a and keep b and c the same, but sometimes I want to loop through b and keeping a and c the same.
I'm open whatever solution suggested, but I think I would be specifying the search parameters in the params dictionary. E.g. To loop a over 1,2,3,4, I would say:
`params = dict(a=[1,2,3,4], b=2, c=3)`
Also it would be nice if I don't have to modify the code for worker1 and worker2.
Thank you!

You could perhaps use itertools.product to call your workers with all combinations of params:
http://docs.python.org/2/library/itertools.html#itertools.product
eg
from itertools import product
def worker1(a, b, c):
return a + b + c
def worker2(d, e, f):
return d * e * f
def search(model, *params):
res = []
# Loop over one of the parameters and repeatedly append to res
for current_params in product(*params):
if model == 1:
res.append(worker1(*current_params))
elif model == 2:
res.append(worker2(*current_params))
return res
print search(1, [1,2,3,4], [2], [3])
# more complicated combinations are possible:
print search(1, [1,2,3,4], [2,7,9], [3,13,23,43])
I've avoided using keyword arguments as your worker functions take differently-named args so it wouldn't make much sense.
I'm assuming your worker functions don't actually look like the ones above as if they did you could further simplify the code using the builtin sum and reduce functions.

I am not sure if I understood the problem. Check if this is what you want (omitted the model parameter):
>>> def worker1(a, b, c):
return a + b + c
>>> def search(params):
params = params.values()
var_param = filter(lambda p: type(p) == list, params)[0]
other_params = filter(lambda p: p != var_param, params)
return [worker1(x, *other_params) for x in var_param]
>>> search({'a':2, 'b':[3,4,5], 'c':3})
[8, 9, 10]
Assuming:
arguments of worker1() are commutative (order does not matter).
variable parameter is a list
other parameters are single values.
In the above sample b is the variable parameter which you want to loop over
Update:
In case order of the arguments of the function worker1 is to be preserved:
def search(params):
params = params.items()
var_param = filter(lambda t: type(t[1]) == list, params)[0]
other_params = filter(lambda t: t != var_param, params)
var_param_key = var_param[0]
var_param_values = var_param[1]
return [worker1(**dict([(var_param_key, x)] + other_params)) for x in var_param_values]

Related

Default values for iterable unpacking

I've often been frustrated by the lack of flexibility in Python's iterable unpacking.
Take the following example:
a, b = range(2)
Works fine. a contains 0 and b contains 1, just as expected. Now let's try this:
a, b = range(1)
Now, we get a ValueError:
ValueError: not enough values to unpack (expected 2, got 1)
Not ideal, when the desired result was 0 in a, and None in b.
There are a number of hacks to get around this. The most elegant I've seen is this:
a, *b = function_with_variable_number_of_return_values()
b = b[0] if b else None
Not pretty, and could be confusing to Python newcomers.
So what's the most Pythonic way to do this? Store the return value in a variable and use an if block? The *varname hack? Something else?
As mentioned in the comments, the best way to do this is to simply have your function return a constant number of values and if your use case is actually more complicated (like argument parsing), use a library for it.
However, your question explicitly asked for a Pythonic way of handling functions that return a variable number of arguments and I believe it can be cleanly accomplished with decorators. They're not super common and most people tend to use them more than create them so here's a down-to-earth tutorial on creating decorators to learn more about them.
Below is a decorated function that does what you're looking for. The function returns an iterator with a variable number of arguments and it is padded up to a certain length to better accommodate iterator unpacking.
def variable_return(max_values, default=None):
# This decorator is somewhat more complicated because the decorator
# itself needs to take arguments.
def decorator(f):
def wrapper(*args, **kwargs):
actual_values = f(*args, **kwargs)
try:
# This will fail if `actual_values` is a single value.
# Such as a single integer or just `None`.
actual_values = list(actual_values)
except:
actual_values = [actual_values]
extra = [default] * (max_values - len(actual_values))
actual_values.extend(extra)
return actual_values
return wrapper
return decorator
#variable_return(max_values=3)
# This would be a function that actually does something.
# It should not return more values than `max_values`.
def ret_n(n):
return list(range(n))
a, b, c = ret_n(1)
print(a, b, c)
a, b, c = ret_n(2)
print(a, b, c)
a, b, c = ret_n(3)
print(a, b, c)
Which outputs what you're looking for:
0 None None
0 1 None
0 1 2
The decorator basically takes the decorated function and returns its output along with enough extra values to fill in max_values. The caller can then assume that the function always returns exactly max_values number of arguments and can use fancy unpacking like normal.
Here's an alternative version of the decorator solution by #supersam654, using iterators rather than lists for efficiency:
def variable_return(max_values, default=None):
def decorator(f):
def wrapper(*args, **kwargs):
actual_values = f(*args, **kwargs)
try:
for count, value in enumerate(actual_values, 1):
yield value
except TypeError:
count = 1
yield actual_values
yield from [default] * (max_values - count)
return wrapper
return decorator
It's used in the same way:
#variable_return(3)
def ret_n(n):
return tuple(range(n))
a, b, c = ret_n(2)
This could also be used with non-user-defined functions like so:
a, b, c = variable_return(3)(range)(2)
Shortest known to me version (thanks to #KellyBundy in comments below):
a, b, c, d, e, *_ = *my_list_or_iterable, *[None]*5
Obviously it's possible to use other default value than None if necessary.
Also there is one nice feature in Python 3.10 which comes handy here when we know upfront possible numbers of arguments - like when unpacking sys.argv
Previous method:
import sys.argv
_, x, y, z, *_ = *sys.argv, *[None]*3
New method:
import sys
match sys.argv[1:]: #slice needed to drop first value of sys.argv
case [x]:
print(f'x={x}')
case [x,y]:
print(f'x={x}, y={y}')
case [x,y,z]:
print(f'x={x}, y={y}, z={z}')
case _:
print('No arguments')

theano and lambda functions

I am having some strange behaviour when I have a list of lambda functions which evaluate theano expressions. The code is below:
# Equivalent functions (or at least I assume so)
def tilted_loss(q,y,f):
e = (y-f)
return (q*tt.sum(e)-tt.sum(e[(e<0).nonzero()]))/e.shape[0]
def tilted_loss2(y,f):
q = 0.05
e = (y-f)
return (q*tt.sum(e)-tt.sum(e[(e<0).nonzero()]))/e.shape[0]
def tilted_loss_np(q,y,f):
e = (y-f)
return (q*sum(e)-sum(e[e<0]))/e.shape[0]
# lambda functions which uses above functions
qs = np.arange(0.05,1,0.05)
q_loss_f = [lambda y,f: tilted_loss(q,y,f) for q in qs]
q_loss_f2 = lambda y,f:tilted_loss(0.05,y,f)
q_loss_f3 = lambda y,f:tilted_loss(qs[0],y,f)
# Test the functions
np.random.seed(1)
a = np.random.randn(1000,1)
b = np.random.randn(1000,1)
print(q_loss_f[0](a,b).eval())
print(q_loss_f2(a,b).eval())
print(q_loss_f3(a,b).eval())
print(tilted_loss2(a,b).eval())
print(tilted_loss_np(qs[0],a,b)[0])
This gives the output:
0.571973847658054
0.5616355181780912
0.5616355181695327
0.5616355181780912
0.56163551817
I must be doing something wrong with the way that the list of functions q_loss_f is defined.
Is the way that q is defined ok? i.e. its a numpy variable that I'm sending in, but this seems to be fine in q_loss_f3.
Any thoughts?
Is a common error, the q value in the lambda expresion will just take the last value from the comprehension loop, you better use partial:
q_loss_f = [partial(tilted_loss, q=q) for q in qs]

Multiprocessing Module Runs Slower with Worker Function?

I am trying to use Python's multiprocessing module to speed up processing. However, when I have run the test_parallel_compute function at the very bottom of the code, on a computing cluster with 32 nodes (EDIT I've found out that I'm only running across one node), the time for the program to run without multiprocessing is longer: 1024 seconds (32 processes) vs 231 seconds (no multiprocessing module used). 1022 of the seconds were spent in the pool.map call within the parallel_compute_new_2 function, so the time is not limited by partitioning the inputs nor by joining the return functions.
I have an input list (b) and several other arguments (a and c) to the function (test_function). In order to prepare these for the multiple processors, I partition b. I then give the function and its partitioned arguments as arguments to worker_function_new, which calls the test_function on its partitioned arguments.
QUESTION EDITTED:
Can you see any inefficiencies in mapping the multiple processes as below? Again, 1022 of the seconds were spent in the pool.map call within the parallel_compute_new_2 function, so the time is not limited by partitioning the inputs nor by joining the return functions.
I am calling running this with inputs of a = 100.0, b = range(10000000), and c = 15.0.
Thank you!
# Partition the input list
def partition_inputs(input, number):
num_inputs = len(input)
return [input[num_inputs * i/number:num_inputs * (i+1)/number] for i in range(number)]
# This is the function that each process is supposed to run.
# It takes in several arguments. b is a long list, which is partitioned
# into multiple slices for each process. a and c are just other numbers.
# This function's return values, d, e, and f, are joined between each process.
def test_function_2(args):
a = args[0]
b = args[1]
c = args[2]
d = []
e = 0
f = {}
for i in b:
d.append(a*i*c)
f[i] = set([a, i, c])
return d, e, f
def parallel_compute_new_2(function, args, input, input_index, partition_input_function, join_functions_dict, pool,
number=32, procnumber=32):
# Partition the b list. In my case, the partition_input_function is
# partition_input_list, as above.
new_inputs = partition_input_function(input, number)
# Since test_function_2 requires arguments (a, c) beyond the partitioned
# list b, create a list of the complete arguments.
worker_function_args_list = []
for i in range(number):
new_args = args[:]
new_args[input_index] = new_inputs[i]
worker_function_args_list.append(new_args)
returnlist = pool.map(function, worker_function_args_list)
# Join the return values from each process.
return_values = list(returnlist[0])
for index in join_functions_dict:
for proc in range(1, number):
return_values[index] = join_functions_dict[index](return_values[index], returnlist[proc][index])
return return_values
def test_parallel_compute(a, b, c, number=32, procnumber=32):
join_functions_dict = {}
join_functions_dict[0] = lambda a, b: a + b
join_functions_dict[2] = combine_dictionaries
# a = 100.
# b = range(1000000000)
# c = 15.
d, e, f = test_function(a, b, c)
pool = mup.Pool(processes=procnumber)
d1, e1, f1 = parallel_compute_new_2(test_function_2, [a, b, c], b, 1, partition_inputs, join_functions_dict, pool, number=number, procnumber=procnumber)

List defining the list of variable of function in Python

I want to have a function foo which outputs another function, whose list of variables depends on an input list.
Precisely:
Suppose func is a function with the free variable t and three parameters A,gamma,x
Example: func = lambda t,A,gamma,x: Somefunction
I want to define a function foo, which takes as input a list and outputs another function. The output function is a sum of func's, where each func summand has his parameters independent from each other.
Depending on the input list the variables of the outputs changes in the following:
If the entry of the list is 'None' then the output function 'gains' a variable and if the entry of the list is a float it 'fixes' the parameter.
Example:
li=[None,None,0.1]
g=foo(li)
gives the same output as
g = lambda t,A,gamma: func(t,A,gamma,0.1)
or
li=[None,None,0.1,None,0.2,0.3]
g=foo(li)
gives the same output as
g = lambda t,A1,gamma1,A2:func(t,A,gamma,0.1)+func(t,A2,0.2,0.3)
Note: the order in the list is relevant and this behaviour is wanted.
I don't have any clue on how to do that...
I first tried to build a string, which depends on the inout list and then to execute it, but this is surely not the way.
First, partition the parameters from li into chunks. Then use an iterator to either get the next from the function parameters *args, if the value in that chunk is None, or the provided value from the parameters chunk.
def foo(li, func, num_params):
chunks = (li[i:i+num_params] for i in range(0, len(li), num_params))
def function(t, *args):
result = 0
iter_args = iter(args)
for chunk in chunks:
actual_params = [next(iter_args) if x is None else x for x in chunk]
result += func(t, *actual_params)
return result
return function
Example:
def f(t, a, b, c):
print t, a, b, c
return a + b + c
func = foo([1,2,None,4,None,6], f, 3)
print func("foo", 3, 5)
Output:
foo 1 2 3 # from first call to f
foo 4 5 6 # from second call to f
21 # result of func

Pass multiple parameters to concurrent.futures.Executor.map?

The concurrent.futures.Executor.map takes a variable number of iterables from which the function given is called. How should I call it if I have a generator that produces tuples that are normally unpacked in place?
The following doesn't work because each of the generated tuples is given as a different argument to map:
args = ((a, b) for (a, b) in c)
for result in executor.map(f, *args):
pass
Without the generator, the desired arguments to map might look like this:
executor.map(
f,
(i[0] for i in args),
(i[1] for i in args),
...,
(i[N] for i in args),
)
One argument that is repeated, one argument in c
from itertools import repeat
for result in executor.map(f, repeat(a), c):
pass
Need to unpack items of c, and can unpack c
from itertools import izip
for result in executor.map(f, *izip(*c)):
pass
Need to unpack items of c, can't unpack c
Change f to take a single argument and unpack the argument in the function.
If each item in c has a variable number of members, or you're calling f only a few times:
executor.map(lambda args, f=f: f(*args), c)
It defines a new function that unpacks each item from c and calls f. Using a default argument for f in the lambda makes f local inside the lambda and so reduces lookup time.
If you've got a fixed number of arguments, and you need to call f a lot of times:
from collections import deque
def itemtee(iterable, n=2):
def gen(it = iter(iterable), items = deque(), next = next):
popleft = items.popleft
extend = items.extend
while True:
if not items:
extend(next(it))
yield popleft()
return [gen()] * n
executor.map(f, *itemtee(c, n))
Where n is the number of arguments to f. This is adapted from itertools.tee.
You need to remove the * on the map call:
args = ((a, b) for b in c)
for result in executor.map(f, args):
pass
This will call f, len(args) times, where f should accept one parameter.
If you want f to accept two parameters you can use a lambda call like:
args = ((a, b) for b in c)
for result in executor.map(lambda p: f(*p), args): # (*p) does the unpacking part
pass
You can use currying to create new function via partial method in Python
from concurrent.futures import ThreadPoolExecutor
from functools import partial
def some_func(param1, param2):
# some code
# currying some_func with 'a' argument is repeated
func = partial(some_func, a)
with ThreadPoolExecutor() as executor:
executor.map(func, list_of_args):
...
If you need to pass more than one the same parameters you can pass them to partial method
func = partial(some_func, a, b, c)
So suppose you have a function which takes 3 arguments and all the 3 arguments are dynamic and keep on changing with every call. For example:
def multiply(a,b,c):
print(a * b * c)
To call this multiple times using threading, I would first create a list of tuples where each tuple is a version of a,b,c:
arguments = [(1,2,3), (4,5,6), (7,8,9), ....]
To we know that concurrent.futures's map function would accept first argument as the target function and second argument as the list of arguments for each version of the function that will be execute. Therefore, you might make a call like this:
for _ in executor.map(multiply, arguments) # Error
But this will give you error that the function expected 3 arguments but got only 1. To solve this problem, we create a helper function:
def helper(numbers):
multiply(numbers[0], numbers[1], numbers[2])
Now, we can call this function using executor as follow:
with ThreadPoolExecutor() as executor:
for _ in executor.map(helper, arguments):
pass
That should give you the desired results.
Here's a code snippet showing how to send multiple arguments to a function with ThreadPoolExecutor:
import concurrent.futures
def hello(first_name: str, last_name: str) -> None:
"""Prints a friendly hello with first name and last name"""
print('Hello %s %s!' % (first_name, last_name))
def main() -> None:
"""Examples showing how to use ThreadPoolExecutor and executer.map
sending multiple arguments to a function"""
# Example 1: Sending multiple arguments using tuples
# Define tuples with sequential arguments to be passed to hello()
args_names = (
('Bruce', 'Wayne'),
('Clark', 'Kent'),
('Diana', 'Prince'),
('Barry', 'Allen'),
)
with concurrent.futures.ThreadPoolExecutor() as executor:
# Using lambda, unpacks the tuple (*f) into hello(*args)
executor.map(lambda f: hello(*f), args_names)
print()
# Example 2: Sending multiple arguments using dict with named keys
# Define dicts with arguments as key names to be passed to hello()
kwargs_names = (
{'first_name': 'Bruce', 'last_name': 'Wayne'},
{'first_name': 'Clark', 'last_name': 'Kent'},
{'first_name': 'Diana', 'last_name': 'Prince'},
{'first_name': 'Barry', 'last_name': 'Allen'},
)
with concurrent.futures.ThreadPoolExecutor() as executor:
# Using lambda, unpacks the dict (**f) into hello(**kwargs)
executor.map(lambda f: hello(**f), kwargs_names)
if __name__ == '__main__':
main()
lets say you have data like this in data frame shown below and you want to pass 1st two columns to a function which will read the images and predict the fetaures and then calculate the difference and return the difference value.
Note: you can have any scenario as per your requirement and respectively you can define the function.
The below code snippet will takes these two columns as argument and pass to the Threadpool mechanism (showing the progress bar also)
''' function that will give the difference of two numpy feature matrix'''
def getDifference(image_1_loc, image_2_loc, esp=1e-7):
arr1 = ''' read 1st image and extract feature '''
arr2 = ''' read 2nd image and extract feature '''
diff = arr1.ravel() - arr2.ravel() + esp
return diff
'''Using ThreadPoolExecutor from concurrent.futures with multiple argument'''
with ThreadPoolExecutor() as executor:
result = np.array(
list(tqdm(
executor.map(lambda x : function(*x), [(i,j) for i,j in df[['image_1','image_2']].values]),
total=len(df)
)
)
)
For ProcessPoolExecutor.map():
Similar to map(func, *iterables) except:
the iterables are collected immediately rather than lazily;
func is executed asynchronously and several calls to func may be made
concurrently.
Therefore, the usage of ProcessPoolExecutor.map() is the same as that of Python's build-in map(). Here is the docs:
Return an iterator that applies function to every item of iterable,
yielding the results. If additional iterable arguments are passed,
function must take that many arguments and is applied to the items
from all iterables in parallel.
Conclusion: pass the several parameters to map().
Try running the following snippet under python 3, and you will be quite clear:
from concurrent.futures import ProcessPoolExecutor
def f(a, b):
print(a+b)
with ProcessPoolExecutor() as pool:
pool.map(f, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), (0, 1, 2))
# 0, 2, 4
array = [(i, i) for i in range(3)]
with ProcessPoolExecutor() as pool:
pool.map(f, *zip(*array))
# 0, 2, 4
I have seen so many answers here, but none of them is as straight forward as using lambda expressions:
foo(x,y):
pass
want to call above method 10 times, with same value i.e. xVal and yVal?
with concurrent.futures.ThreadPoolExecutor() as executor:
for _ in executor.map( lambda _: foo(xVal, yVal), range(0, 10)):
pass
This works for me:
from concurrent.futures import ThreadPoolExecutor
def concurrent_function(function, list):
with ThreadPoolExecutor() as executor:
executor.map(function, list)
def concurrent_multiply(args = {'a': 1, 'b': 2}):
print(args['a']*args['b'])
concurrent_function(multiply, [{'a': 1, 'b': 1},
{'a': 2, 'b': 2},
{'a': 3, 'b': 3}])
A simple utility that I use all the time is below.
########### Start of Utility Code ###########
import os
import sys
import traceback
from concurrent import futures
from functools import partial
def catch(fn):
def wrap(*args, **kwargs):
result = None
try:
result = fn(*args, **kwargs)
except Exception as err:
type_, value_, traceback_ = sys.exc_info()
return None, (
args,
"".join(traceback.format_exception(type_, value_, traceback_)),
)
else:
return result, (args, None)
return wrap
def top_level_wrap(fn, arg_tuple):
args, kwargs = arg_tuple
return fn(*args, *kwargs)
def create_processes(fn, values, handle_error, handle_success):
cores = os.cpu_count()
max_workers = 2 * cores + 1
to_exec = partial(top_level_wrap, fn)
with futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
for result, error in executor.map(to_exec, values):
args, tb = error
if tb is not None:
handle_error(args, tb)
else:
handle_success(result)
########### End of Utility Code ###########
Example usage -
######### Start of example usage ###########
import time
#catch
def fail_when_5(val):
time.sleep(val)
if val == 5:
raise Exception("Error - val was 5")
else:
return f"No error val is {val}"
def handle_error(args, tb):
print("args is", args)
print("TB is", tb)
def top_level(val, val_2, test=None, test2="ok"):
print(val_2, test, test2)
return fail_when_5(val)
handle_success = print
if __name__ == "__main__":
# SHAPE -> ( (args, kwargs), (args, kwargs), ... )
values = tuple(
((x, x + 1), {"test": f"t_{x+2}", "test2": f"t_{x+3}"}) for x in range(10)
)
create_processes(top_level, values, handle_error, handle_success)
######### End of example usage ###########

Categories