Loop a function using the previous output value as input - python

I'm trying to query an API, but it only provides me with 100 records at a time and provides an offshoot record, which I need to use to query the next 100 records. I can write a function to query my results, but am having trouble looping my function to use the output of the previous function as the input of the following function. Here's what I want my loop to essentially do:
def query(my_offset=None):
page = at.get('Accounts',offset=my_offset)
a = page['records']
return str(page['offset'])
query()
query(query())
query(query(query(query())))
query(query(query(query(query()))))
query(query(query(query(query(query())))))
...
...
...
...

I'm guessing res can have a special value indicating no more rows were returned, if so, a while loop can be deployed:
res = query()
while True:
res = query(res)
if not res: break
you just rebind the result of the query to res and re-use it during every iteration.

Try collecting the results externally, and then call the function again:
results = []
MAX_ITERATIONS = 20
offset = None
def query(offset=None):
page = at.get('Accounts', offset=offset)
return page['records'], page['offset']
while len(results) <= MAX_ITERATIONS:
result, offset = query(offset)
results.append(result)

how are you returning the final results? consider:
def query():
offset = None
a = []
while True:
page = at.get('Accounts',offset=offset)
a.extend(page['records'])
offset = page['offset']
if not offset: break
return a
which is really just Jim's answer while collecting and returning the page results

def query(result):
# perform query
return result
def end_query_condition(result):
# if want to continue query:
return True
# if you want to stop:
return False
continue_query = True
result = None
while continue_query:
result = query(result)
continue_query = end_query_condition(result)

You may simply make your function recursive without the need of any loop as:
def query(my_offset=None):
if not offset:
return None
page = at.get('Accounts',offset=my_offset)
a = page['records']
return query(str(page['offset']))

Related

Is there a way to give a function access to the (external-scope) name of the variable being passed in? [duplicate]

This question already has answers here:
Getting the name of a variable as a string
(32 answers)
Closed 4 months ago.
Is it possible to get the original variable name of a variable passed to a function? E.g.
foobar = "foo"
def func(var):
print var.origname
So that:
func(foobar)
Returns:
>>foobar
EDIT:
All I was trying to do was make a function like:
def log(soup):
f = open(varname+'.html', 'w')
print >>f, soup.prettify()
f.close()
.. and have the function generate the filename from the name of the variable passed to it.
I suppose if it's not possible I'll just have to pass the variable and the variable's name as a string each time.
EDIT: To make it clear, I don't recommend using this AT ALL, it will break, it's a mess, it won't help you in any way, but it's doable for entertainment/education purposes.
You can hack around with the inspect module, I don't recommend that, but you can do it...
import inspect
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.getframeinfo(frame[0]).code_context[0].strip()
args = string[string.find('(') + 1:-1].split(',')
names = []
for i in args:
if i.find('=') != -1:
names.append(i.split('=')[1].strip())
else:
names.append(i)
print names
def main():
e = 1
c = 2
foo(e, 1000, b = c)
main()
Output:
['e', '1000', 'c']
To add to Michael Mrozek's answer, you can extract the exact parameters versus the full code by:
import re
import traceback
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
vars_name = re.compile(r'\((.*?)\).*$').search(code).groups()[0]
print vars_name
return
foobar = "foo"
func(foobar)
# PRINTS: foobar
Looks like Ivo beat me to inspect, but here's another implementation:
import inspect
def varName(var):
lcls = inspect.stack()[2][0].f_locals
for name in lcls:
if id(var) == id(lcls[name]):
return name
return None
def foo(x=None):
lcl='not me'
return varName(x)
def bar():
lcl = 'hi'
return foo(lcl)
bar()
# 'lcl'
Of course, it can be fooled:
def baz():
lcl = 'hi'
x='hi'
return foo(lcl)
baz()
# 'x'
Moral: don't do it.
Another way you can try if you know what the calling code will look like is to use traceback:
def func(var):
stack = traceback.extract_stack()
filename, lineno, function_name, code = stack[-2]
code will contain the line of code that was used to call func (in your example, it would be the string func(foobar)). You can parse that to pull out the argument
You can't. It's evaluated before being passed to the function. All you can do is pass it as a string.
#Ivo Wetzel's answer works in the case of function call are made in one line, like
e = 1 + 7
c = 3
foo(e, 100, b=c)
In case that function call is not in one line, like:
e = 1 + 7
c = 3
foo(e,
1000,
b = c)
below code works:
import inspect, ast
def foo(a, f, b):
frame = inspect.currentframe()
frame = inspect.getouterframes(frame)[1]
string = inspect.findsource(frame[0])[0]
nodes = ast.parse(''.join(string))
i_expr = -1
for (i, node) in enumerate(nodes.body):
if hasattr(node, 'value') and isinstance(node.value, ast.Call)
and hasattr(node.value.func, 'id') and node.value.func.id == 'foo' # Here goes name of the function:
i_expr = i
break
i_expr_next = min(i_expr + 1, len(nodes.body)-1)
lineno_start = nodes.body[i_expr].lineno
lineno_end = nodes.body[i_expr_next].lineno if i_expr_next != i_expr else len(string)
str_func_call = ''.join([i.strip() for i in string[lineno_start - 1: lineno_end]])
params = str_func_call[str_func_call.find('(') + 1:-1].split(',')
print(params)
You will get:
[u'e', u'1000', u'b = c']
But still, this might break.
You can use python-varname package
from varname import nameof
s = 'Hey!'
print (nameof(s))
Output:
s
Package below:
https://github.com/pwwang/python-varname
For posterity, here's some code I wrote for this task, in general I think there is a missing module in Python to give everyone nice and robust inspection of the caller environment. Similar to what rlang eval framework provides for R.
import re, inspect, ast
#Convoluted frame stack walk and source scrape to get what the calling statement to a function looked like.
#Specifically return the name of the variable passed as parameter found at position pos in the parameter list.
def _caller_param_name(pos):
#The parameter name to return
param = None
#Get the frame object for this function call
thisframe = inspect.currentframe()
try:
#Get the parent calling frames details
frames = inspect.getouterframes(thisframe)
#Function this function was just called from that we wish to find the calling parameter name for
function = frames[1][3]
#Get all the details of where the calling statement was
frame,filename,line_number,function_name,source,source_index = frames[2]
#Read in the source file in the parent calling frame upto where the call was made
with open(filename) as source_file:
head=[source_file.next() for x in xrange(line_number)]
source_file.close()
#Build all lines of the calling statement, this deals with when a function is called with parameters listed on each line
lines = []
#Compile a regex for matching the start of the function being called
regex = re.compile(r'\.?\s*%s\s*\(' % (function))
#Work backwards from the parent calling frame line number until we see the start of the calling statement (usually the same line!!!)
for line in reversed(head):
lines.append(line.strip())
if re.search(regex, line):
break
#Put the lines we have groked back into sourcefile order rather than reverse order
lines.reverse()
#Join all the lines that were part of the calling statement
call = "".join(lines)
#Grab the parameter list from the calling statement for the function we were called from
match = re.search('\.?\s*%s\s*\((.*)\)' % (function), call)
paramlist = match.group(1)
#If the function was called with no parameters raise an exception
if paramlist == "":
raise LookupError("Function called with no parameters.")
#Use the Python abstract syntax tree parser to create a parsed form of the function parameter list 'Name' nodes are variable names
parameter = ast.parse(paramlist).body[0].value
#If there were multiple parameters get the positional requested
if type(parameter).__name__ == 'Tuple':
#If we asked for a parameter outside of what was passed complain
if pos >= len(parameter.elts):
raise LookupError("The function call did not have a parameter at postion %s" % pos)
parameter = parameter.elts[pos]
#If there was only a single parameter and another was requested raise an exception
elif pos != 0:
raise LookupError("There was only a single calling parameter found. Parameter indices start at 0.")
#If the parameter was the name of a variable we can use it otherwise pass back None
if type(parameter).__name__ == 'Name':
param = parameter.id
finally:
#Remove the frame reference to prevent cyclic references screwing the garbage collector
del thisframe
#Return the parameter name we found
return param
If you want a Key Value Pair relationship, maybe using a Dictionary would be better?
...or if you're trying to create some auto-documentation from your code, perhaps something like Doxygen (http://www.doxygen.nl/) could do the job for you?
I wondered how IceCream solves this problem. So I looked into the source code and came up with the following (slightly simplified) solution. It might not be 100% bullet-proof (e.g. I dropped get_text_with_indentation and I assume exactly one function argument), but it works well for different test cases. It does not need to parse source code itself, so it should be more robust and simpler than previous solutions.
#!/usr/bin/env python3
import inspect
from executing import Source
def func(var):
callFrame = inspect.currentframe().f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
expression = source.asttokens().get_text(callNode.args[0])
print(expression, '=', var)
i = 1
f = 2.0
dct = {'key': 'value'}
obj = type('', (), {'value': 42})
func(i)
func(f)
func(s)
func(dct['key'])
func(obj.value)
Output:
i = 1
f = 2.0
s = string
dct['key'] = value
obj.value = 42
Update: If you want to move the "magic" into a separate function, you simply have to go one frame further back with an additional f_back.
def get_name_of_argument():
callFrame = inspect.currentframe().f_back.f_back
callNode = Source.executing(callFrame).node
source = Source.for_frame(callFrame)
return source.asttokens().get_text(callNode.args[0])
def func(var):
print(get_name_of_argument(), '=', var)
If you want to get the caller params as in #Matt Oates answer answer without using the source file (ie from Jupyter Notebook), this code (combined from #Aeon answer) will do the trick (at least in some simple cases):
def get_caller_params():
# get the frame object for this function call
thisframe = inspect.currentframe()
# get the parent calling frames details
frames = inspect.getouterframes(thisframe)
# frame 0 is the frame of this function
# frame 1 is the frame of the caller function (the one we want to inspect)
# frame 2 is the frame of the code that calls the caller
caller_function_name = frames[1][3]
code_that_calls_caller = inspect.findsource(frames[2][0])[0]
# parse code to get nodes of abstract syntact tree of the call
nodes = ast.parse(''.join(code_that_calls_caller))
# find the node that calls the function
i_expr = -1
for (i, node) in enumerate(nodes.body):
if _node_is_our_function_call(node, caller_function_name):
i_expr = i
break
# line with the call start
idx_start = nodes.body[i_expr].lineno - 1
# line with the end of the call
if i_expr < len(nodes.body) - 1:
# next expression marks the end of the call
idx_end = nodes.body[i_expr + 1].lineno - 1
else:
# end of the source marks the end of the call
idx_end = len(code_that_calls_caller)
call_lines = code_that_calls_caller[idx_start:idx_end]
str_func_call = ''.join([line.strip() for line in call_lines])
str_call_params = str_func_call[str_func_call.find('(') + 1:-1]
params = [p.strip() for p in str_call_params.split(',')]
return params
def _node_is_our_function_call(node, our_function_name):
node_is_call = hasattr(node, 'value') and isinstance(node.value, ast.Call)
if not node_is_call:
return False
function_name_correct = hasattr(node.value.func, 'id') and node.value.func.id == our_function_name
return function_name_correct
You can then run it as this:
def test(*par_values):
par_names = get_caller_params()
for name, val in zip(par_names, par_values):
print(name, val)
a = 1
b = 2
string = 'text'
test(a, b,
string
)
to get the desired output:
a 1
b 2
string text
Since you can have multiple variables with the same content, instead of passing the variable (content), it might be safer (and will be simpler) to pass it's name in a string and get the variable content from the locals dictionary in the callers stack frame. :
def displayvar(name):
import sys
return name+" = "+repr(sys._getframe(1).f_locals[name])
If it just so happens that the variable is a callable (function), it will have a __name__ property.
E.g. a wrapper to log the execution time of a function:
def time_it(func, *args, **kwargs):
start = perf_counter()
result = func(*args, **kwargs)
duration = perf_counter() - start
print(f'{func.__name__} ran in {duration * 1000}ms')
return result

ProcessPoolExecutor not working for function with multiple arguments - python

I have a dataframe df_full that I am trying to rewrite as a dict() while also doing some stuff over it.
agent locations modal_choices
0 agent_1 'loc1', 'loc2', 'loc3', 'loc2' 'mode_1', 'mode_1', 'mode_2', 'mode_3'
1 agent_2 'loc1', 'loc4', 'loc2', 'loc6' 'mode_2', 'mode_3', 'mode_2', 'mode_3'
I am currently facing a problem while trying to multiprocess the following function format_dict() knowing that I only want to iterate over the agent argument, the three others are supposed to be the same for each iterations. So I added the partial() parameter to "freeze" df, dict_ and list_ but the code returns me an empty dict and an empty list by the end and I don't understand why.
I suppose I haven't written the executor.map() properly. I tried following the methods shown here but it still doesn't return anything.
What could be wrong with my code?
I also printed the time taken by the following script to run with time.perf_counter() and compared it with what is given with tqdm() but the two values don't match. The iteration part is done in 7 seconds (tqdm) while the print of time.perf_counter() shows up after 2.3 minutes.
What would explain the delay for the ending of the with concurrent.futures.ProcessPoolExecutor() as executor:?
I am, unfortunately, still not an expert in python and this is the first time I'm trying to multiprocess something (as the agent list I am working with is massive and would take days to process...). Any help would be greatly appreciated! And please do tell me if informations are missing or if something is not explained properly, I'll edit the post right away.
def format_dict(agent, df, dict_, list_):
try:
dict_[agent] = dict()
toto_ = df.loc[df.agent_ID == agent]
toto_mod = toto_['modal_choices'].apply(lambda x: pd.Series(x.split(',')))
toto_loc = toto_['locations'].apply(lambda x: pd.Series(x.split(',')))
for i in toto_mod:
dict_[agent]['step_{}'.format(i)] = dict()
dict_[agent]['step_{}'.format(i)]['mode'] = toto_mod[i].iloc[0]
dict_[agent]['step_{}'.format(i)]['start'] = toto_loc[counter + 1].iloc[0]
dict_[agent]['step_{}'.format(i)]['name'] = dict_agent_edt[agent]['step_0']['name']
except ValueError:
list_.append(agent)
return dict_, list_
dict_name = dict()
list_name = list()
start = time.perf_counter()
agent = df_full['agent'][:1000]
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(partial(format_dict, df=df_full, dict_=dict_name, list_=list_name),
tqdm(agent), chunksize=50)
end = time.perf_counter()
print(f'It took {(end-start)/60} minutes.')
Following #Louis Lac's answer, I modified my script to avoid any concurrence but it still returns an empty dict.
def format_dict(agent, df, dict_):
try:
dict_[agent] = dict()
toto_ = df.loc[df.agent_ID == agent]
(same stuff here)
except ValueError:
pass
return dict_
start = time.perf_counter()
agents = df_full['agent'][:1000]
dict_name = {}
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(partial(format_dict, df=df_full, dict_=dict_name),
tqdm(agents), chunksize=50)
end = time.perf_counter()
print(f'It took {(end-start)/60} minutes.')
When using concurrency such as multithreading and multiprocessing, functions that are executed concurrently such as format_dict should not mutate shared state to avoid data races or the mutations should be synchronized.
You could for instance compute all your stuff concurrently first, then sequentially reduce the result into outputs (dict_ and list_):
def format_dict(agent, df):
list_ = None
try:
dict_ = dict()
toto_ = df.loc[df.agent_ID == agent]
toto_mod = toto_['modal_choices'].apply(lambda x: pd.Series(x.split(',')))
toto_loc = toto_['locations'].apply(lambda x: pd.Series(x.split(',')))
for i in toto_mod:
dict_['step_{}'.format(i)] = dict()
dict_['step_{}'.format(i)]['mode'] = toto_mod[i].iloc[0]
dict_['step_{}'.format(i)]['start'] = toto_loc[counter + 1].iloc[0]
dict_['step_{}'.format(i)]['name'] = dict_agent_edt[agent]['step_0']['name']
except ValueError:
list_ = agent
return dict_, list_
start = time.perf_counter()
agents = df_full['agent'][:1000]
with concurrent.futures.ProcessPoolExecutor() as executor:
elements = executor.map(partial(format_dict, df=df_full),
tqdm(agents), chunksize=50)
dict_ = {}
list_ = []
for agent, (d, l) in zip(agents, elements):
if l is not None:
list_.append(l)
dict_[agent] = d
end = time.perf_counter()
print(f'It took {(end-start)/60} minutes.')

Running functions depend on return from previous function Python Selenium

I have a test, that checks an element on a page: if element A is presented, test(function) returns A and runs another function A. If element B is presented, test(function) returns B and runs another function B. How can I do it in Python? I think of using dictionary and something like "if" expression, but I hardly imagine, how to do it. Thanks in advance!
I don't have exact code, just something like this :/
from Pages import SearchHelper
def finding_pop(browser):
pop_click = SearchHelper(browser)
pop_click.go_to_site()
#element = finding_element_function.get_text()
return element
if element =='A':
#function test_A_click must be run
else:
#function test_B_click must be run
def test_A_click(browser):
pop_click = SearchHelper(browser)
pop_click.go_to_site()
logs = pop_click.setting_logs_raw('performance')
result = pop_click.for_logs(pop_click.log_filter, logs)
binded = pop_click.searching_params_in_logs_pops(result, 'bind_to')
ignored = pop_click.searching_params_in_logs_pops(result, 'ignore_to')
if binded == None and ignored == None:
handles, url, url1 = pop_click.opened_in_new_window()
assert handles > 1
assert url != url1
def test_B_click(browser):
pop_click = SearchHelper(browser)
pop_click.go_to_site()
logs = pop_click.setting_logs_raw('performance')
result = pop_click.for_logs(pop_click.log_filter, logs)
binded = pop_click.searching_params_in_logs_pops(result, 'bind_to')
ignored = pop_click.searching_params_in_logs_pops(result, 'ignore_to')
if binded == None and ignored == None:
handles, url, url1 = pop_click.opened_in_current_window()
print(url, url1)
assert handles > 1
assert url != url1
Remember that the return statement should always be at the end of function, because on the return statement the function ends and no further parts of the function get executed. So in the finding_pop fucntion when you write return element, that is where your function ends. The if/else statements below return element are not being executed.
For this perticular scenario I think you don't need a return statemnet inside your finding_pop function , since you're finding_pop function is returning element and then you're performing the if/else on element inside the same the same function.
You can simply change your finding_pop function to:
def finding_pop(browser):
pop_click = SearchHelper(browser)
pop_click.go_to_site()
element = finding_element_function.get_text()
if element =="A":
test_A_click(browser)
elseif element =="B":
test_B_click(browser)
But if you are really in need of a return statement then you need to introduce a separate runner function that will take in the return of finding_pop as an input and then perform if/else to choose the next function to perform.
So it'll be as:
def finding_pop(browser):
pop_click = SearchHelper(browser)
pop_click.go_to_site()
element = finding_element_function.get_text()
return element
def chose_func_A_or_b(browser):
element = finding_pop(browser)
if element == "A":
test_A_click(browser)
if element == "B":
test_B_click(browser)

Custom method for unlimited *args with set() function usage?

I am working on some project, and we have lots of some code usage like this;
# filtering fields are different from each other, please ignore the similarity below
def function1(self, param):
list_x = Model1.objects.filter(foo=bar, bla=bla).values_list('field', flat=True)
list_y = Model2.objects.filter(foo=bar, bla=bla).values_list('field', flat=True)
lists_to_delete = set(list_x) - set(list_y)
# here is the code line with set() that needed to be method
self._delete(lists_to_delete)
def function2(self, param):
list_z = Model3.objects.filter(foo=bar, bla=bla).values_list('field', flat=True)
list_q = Model4.objects.filter(foo=bar, bla=bla).values_list('field', flat=True).distinct()
list_w = Model5.objects.filter(foo=bar, bla=bla).values_list('field', flat=True)
lists_to_delete = set(list_x) - set(list_y) - set(list_w)
# here is the code line with set() that needed to be method
self._delete(lists_to_delete)
... # other functions continues like above
...
...
So, as you can see we have same usage with set() function. And I need to change this usage with custom method. I tried to write a method like this;
def _get_deleted_lists(self, *args):
value = set()
for arg in args:
value |= set(arg)
return value
and usage will be change like;
lists_to_delete = self._get_deleted_lists(list_x, list_y, ...)
instead of this;
lists_to_delete = set(list_x) - set(list_y)
But my custom method not return same value as before. How can I achieve this?
| operation on sets returns their union. What you want is the difference (-)
def _get_deleted_lists(*lists):
if not lists:
return set()
result = set(lists[0])
for l in lists[1:]:
result -= set(l)
return result

How to assign and return multiple values from Django cache?

Not sure if this is a django issue or just a python issue. I'm trying to get cache working by accepting the values from a function that returns multiple values.
Currently the page is giving me this error on the heavy_view function: "'NoneType' object is not iterable"
Is there anyway to get the "counter, college_user, name_college_list = cache.get(cache_key)" line working? And then the check for if its empty or not. Thanks!
#Views.py
def filter_results():
#some code here
return counter, college_user, name_college_List
def heavy_view(request):
cache_key = 'facebookcache'
cache_time = 180 # time to live in seconds
counter, college_user, name_college_list = cache.get(cache_key)
if not cache.get(cache_key):
result = filter_results() # some calculations here
cache.set(cache_key, result, cache_time)
return result
You can do something like this:
result = cache.get(cache_key)
if not result:
result = filter_result()
cache.set(cache_key, result, cache_time)
counter, college_user, name_college_list = result
It is a Python thing
when you do
counter, college_user, name_college_list = cache.get(cache_key)
the first time no value will be there -> None is returned.
does tis work?
result = cache.get(cache_key)
if result is None:
result = filter_results()
cache.set(cache_key, result, cache_time)
else:
counter, college_user, name_college_list = result
return result

Categories