How to pass stacktrace between processes in Python?

How to pass stacktrace between processes in Python? - python

I'm trying to create a python decorator which takes a function with args and kwargs, executes it in a new process, shuts it down and returns whatever the function returned, including raising the same exception, if any.
For now, my decorator handles functions okay, if they raise no exceptions, but fails to provide the traceback. How do I pass it back to the parent process?
from functools import wraps
from multiprocessing import Process, Queue
import sys
def process_wrapper(func):
#wraps(func)
def wrapper(*args, **kwargs):
# queue for communicating between parent and child processes
q = Queue()
def func_to_q(_q: Queue, *_args, **_kwargs):
# do the same as func, but put result into the queue. Also put
# there an exception if any.
try:
_res = func(*_args, **_kwargs)
_q.put(_res)
except:
_q.put(sys.exc_info())
# start another process and wait for it to join
p = Process(target=func_to_q, args=(q, )+args, kwargs=kwargs)
p.start()
p.join()
# get result from the queue and return it, or raise if it's an exception
res = q.get(False)
if isinstance(res, tuple) and isinstance(res[0], Exception):
raise res[1].with_traceback(res[2])
else:
return res
return wrapper
if __name__ == '__main__':
#process_wrapper
def ok():
return 'ok'
#process_wrapper
def trouble():
def inside():
raise UserWarning
inside()
print(ok())
print(trouble())
I expect result to be something like:
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 44, in trouble
inside()
File "/temp.py", line 43, in inside
raise UserWarning
UserWarning
Process finished with exit code 1
But it seems like the child process cannot put stacktrace into the queue and I get the following:
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 26, in wrapper
res = q.get(False)
File "/usr/lib/python3.6/multiprocessing/queues.py", line 107, in get
raise Empty
queue.Empty
Process finished with exit code 1
Also, if the child puts into the queue only the exception itself _q.put(sys.exc_info()[1]), parent gets it from there and raises but with new stacktrace (note missing call to inside()):
ok
Traceback (most recent call last):
File "/temp.py", line 47, in <module>
print(trouble())
File "/temp.py", line 28, in wrapper
raise res
UserWarning
Process finished with exit code 1

Take a look at multiprocessing.pool.py and the stringification-hack for sending Exceptions to the parent. You can use multiprocessing.pool.ExceptionWithTraceback from there.
That's just enough code for demonstrating the basic principle:
from multiprocessing import Process, Queue
from multiprocessing.pool import ExceptionWithTraceback
def worker(outqueue):
try:
result = (True, 1 / 0) # will raise ZeroDivisionError
except Exception as e:
e = ExceptionWithTraceback(e, e.__traceback__)
result = (False, e)
outqueue.put(result)
if __name__ == '__main__':
q = Queue()
p = Process(target=worker, args=(q,))
p.start()
success, value = q.get()
p.join()
if success:
print(value)
else:
raise value # raise again
Output:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/...", line 7, in worker
result = (True, 1 / 0) # will raise ZeroDivisionError
ZeroDivisionError: division by zero
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/...", line 23, in <module>
raise value
ZeroDivisionError: division by zero
Process finished with exit code 1

Related

How to recover the return value of a function passed to multiprocessing.Process?

I have looked at this question to get started and it works just fine How can I recover the return value of a function passed to multiprocessing.Process?
But in my case I would like to write a small tool, that would connect to many computers and gather some statistics, each stat would be gathered within a Process to make it snappy. But as soon as I try to wrap up the multiprocessing command in a class for a machine then it fails.
Here is my code
import multiprocessing
import pprint
def run_task(command):
p = subprocess.Popen(command, stdout = subprocess.PIPE, universal_newlines = True, shell = False)
result = p.communicate()[0]
return result
MACHINE_NAME = "cptr_name"
A_STAT = "some_stats_A"
B_STAT = "some_stats_B"
class MachineStatsGatherer():
def __init__(self, machineName):
self.machineName = machineName
manager = multiprocessing.Manager()
self.localStats = manager.dict() # creating a shared ressource for the sub processes to use
self.localStats[MACHINE_NAME] = machineName
def gatherStats(self):
self.runInParallel(
self.GatherSomeStatsA,
self.GatherSomeStatsB,
)
self.printStats()
def printStats(self):
pprint.pprint(self.localStats)
def runInParallel(self, *fns):
processes = []
for fn in fns:
process = multiprocessing.Process(target=fn, args=(self.localStats))
processes.append(process)
process.start()
for process in processes:
process.join()
def GatherSomeStatsA(self, returnStats):
# do some remote command, simplified here for the sake of debugging
result = "Windows"
returnStats[A_STAT] = result.find("Windows") != -1
def GatherSomeStatsB(self, returnStats):
# do some remote command, simplified here for the sake of debugging
result = "Windows"
returnStats[B_STAT] = result.find("Windows") != -1
def main():
machine = MachineStatsGatherer("SOMEMACHINENAME")
machine.gatherStats()
return
if __name__ == '__main__':
main()
And here is the error message
Traceback (most recent call last):
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "d:\workdir\trunks6\Tools\VTKAppTester\Utils\NXMachineMonitorShared.py", line 45, in GatherSomeStatsA
returnStats[A_STAT] = result.find("Windows") != -1
TypeError: 'str' object does not support item assignment
Process Process-3:
Traceback (most recent call last):
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "C:\Users\mesirard\AppData\Local\Programs\Python\Python37\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "d:\workdir\trunks6\Tools\VTKAppTester\Utils\NXMachineMonitorShared.py", line 50, in GatherSomeStatsB
returnStats[B_STAT] = result.find("Windows") != -1
TypeError: 'str' object does not support item assignment

The issue is coming from this line
process = multiprocessing.Process(target=fn, args=(self.localStats))
it should have a extra comma at the end of args like so
process = multiprocessing.Process(target=fn, args=(self.localStats,))

RuntimeError: Never call result.get() within a task

My task (check payment status in Sberbank. If no capture - retry check):
from ....celeryconf import app
from . import client as sberbank
from ...models import Payment, Transaction
#app.task(bind=True, default_retry_delay=60, time_limit=1200)
def check_status_sberbank_task(self, order_id, connection_params):
sberbank_client = sberbank.Client(auth=(connection_params['login'], connection_params['password']),
sandbox=connection_params['sandbox_mode'])
response = sberbank_client.payment.get_status(order_id=order_id)
txn = Transaction.objects.get(token=order_id)
if response['actionCode'] == 0:
txn.is_success = True
txn.save()
payment = Payment.objects.get(pk=txn.payment_id)
payment.charge_status = 'fully-charged'
payment.captured_amount = payment.total
payment.save()
return 'Success pay on Sberbank for ' + str(order_id)
else:
self.retry(countdown=60)
in log file I have:
ERROR celery.app.trace Task
saleor.payment.gateways.sberbank.tasks.check_status_sberbank_task[bb384815-4a5b-49d7-bc29-114707f072b1]
raised unexpected: RuntimeError('Never call result.get() within a
task!\nSee
http://docs.celeryq.org/en/latest/userguide/tasks.html#task-synchronous-subtasks\n',)
[PID:26869:Thread-825]
Traceback (most recent call last): File
"/home/korolev/saleor/lib/python3.6/site-packages/celery/app/trace.py",
line 385, in trace_task
R = retval = fun(*args, **kwargs) File "/home/korolev/saleor/saleor/payment/gateways/sberbank/tasks.py", line
26, in check_status_sberbank_task
self.retry(countdown=60) File "/home/korolev/saleor/lib/python3.6/site-packages/celery/app/task.py",
line 715, in retry
S.apply().get() File "/home/korolev/saleor/lib/python3.6/site-packages/celery/result.py",
line 1015, in get
assert_will_not_block() File "/home/korolev/saleor/lib/python3.6/site-packages/celery/result.py",
line 41, in assert_will_not_block
raise RuntimeError(E_WOULDBLOCK)
RuntimeError: Never call result.get() within a task! See
http://docs.celeryq.org/en/latest/userguide/tasks.html#task-synchronous-subtasks
How do I fix this error?

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

KeyError: 0 using multiprocessing in python

I have the following code inwhich I try to call a function compute_cluster which do some computations and write the results in a txt file (each process write its results in different txt files independently), however, when I run the following code:
def main():
p = Pool(19)
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
p.close()
if __name__ == "__main__":
main()
it crashes with the following errors:
File "RMSD_calc.py", line 124, in <module>
main()
File "RMSD_calc.py", line 120, in main
p.map(compute_cluster, [(l, r) for l in range(6, 25) for r in range(1, 4)])
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 225, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/local/lib/python2.7/multiprocessing/pool.py", line 522, in get
raise self._value
KeyError: 0
and when I searched online for the meaning of "KeyError: 0" i didn't find anything helpful so any suggestions why this error happens is highly appreciated

KeyError happens in compute_cluster() in a child process and p.map() reraises it for you in the parent:
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
if __name__=="__main__":
p = Pool()
p.map(f, [None])
Output
Traceback (most recent call last):
File "raise-exception-in-child.py", line 9, in <module>
p.map(f, [None])
File "/usr/lib/python2.7/multiprocessing/pool.py", line 227, in map
return self.map_async(func, iterable, chunksize).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 528, in get
raise self._value
KeyError: 0
To see the full traceback, catch the exception in the child process:
import logging
from multiprocessing import Pool
def f(args):
d = {}
d[0] # <-- raises KeyError
def f_mp(args):
try:
return f(args)
except Exception:
logging.exception("f(%r) failed" % (args,))
if __name__=="__main__":
p = Pool()
p.map(f_mp, [None])
Output
ERROR:root:f(None) failed
Traceback (most recent call last):
File "raise-exception-in-child.py", line 10, in f_mp
return f(args)
File "raise-exception-in-child.py", line 6, in f
d[0] # <-- raises KeyError
KeyError: 0
It shows that d[0] caused the exception.

"Uncatching" an exception in python

How should I "rethrow" an exception, that is, suppose:
I try something in my code, and unfortunately it fails.
I try some "clever" workaround, which happens to also fail this time
If I throw the exception from the (failing) workaround, it's going to be pretty darn confusing for the user, so I think it may be best to rethrow the original exception (?), with the descriptive traceback it comes with (about the actual problem)...
Note: the motivating example for this is when calling np.log(np.array(['1'], dtype=object)), where it tries a witty workaround and gives an AttributeError (it's "really" a TypeError).
One way I can think of is just to re-call the offending function, but this seems doged (for one thing theoretically the original function may exert some different behaviour the second time it's called):
Okay this is one awful example, but here goes...
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
Suppose I did this:
try:
a()
except:
# attempt witty workaround
g()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-4-c76b7509b315> in <module>()
3 except:
4 # attempt witty workaround
----> 5 g()
6
<ipython-input-2-e641f2f9a7dc> in g()
4
5 def g():
----> 6 raise Exception("coconut")
7
8
Exception: coconut
Well, the problem doesn't really lie with the coconut at all, but the sparrow:
try:
a()
except:
# attempt witty workaround
try:
g()
except:
# workaround failed, I want to rethrow the exception from calling a()
a() # ideally don't want to call a() again
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-4-e641f2f9a7dc> in <module>()
19 except:
20 # workaround failed, I want to rethrow the exception from calling a()
---> 21 a() # ideally don't want to call a() again
<ipython-input-3-e641f2f9a7dc> in a()
8
9 def a():
---> 10 f()
11
12
<ipython-input-1-e641f2f9a7dc> in f()
1 def f():
----> 2 raise Exception("sparrow")
3
4
5 def g():
Exception: sparrow
Is there a standard way to deal with this, or am I thinking about it completely wrong?

If you want to make it appear to the end user that you never called g(), then you need to store the traceback from the first error, call the second function and then throw the original with the original traceback. (otherwise, in Python2, bare raise re-raises the second exception rather than the first). The problem is that there is no 2/3 compatible way to raise with traceback, so you have to wrap the Python 2 version in an exec statement (since it's a SyntaxError in Python 3).
Here's a function that lets you do that (I added this to the pandas codebase recently):
import sys
if sys.version_info[0] >= 3:
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc.with_traceback(traceback)
else:
# this version of raise is a syntax error in Python 3
exec("""
def raise_with_traceback(exc, traceback=Ellipsis):
if traceback == Ellipsis:
_, _, traceback = sys.exc_info()
raise exc, None, traceback
""")
raise_with_traceback.__doc__ = (
"""Raise exception with existing traceback.
If traceback is not passed, uses sys.exc_info() to get traceback."""
)
And then you can use it like this (I also changed the Exception types for clarity).
def f():
raise TypeError("sparrow")
def g():
raise ValueError("coconut")
def a():
f()
try:
a()
except TypeError as e:
import sys
# save the traceback from the original exception
_, _, tb = sys.exc_info()
try:
# attempt witty workaround
g()
except:
raise_with_traceback(e, tb)
And in Python 2, you only see a() and f():
Traceback (most recent call last):
File "test.py", line 40, in <module>
raise_with_traceback(e, tb)
File "test.py", line 31, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
But in Python 3, it still notes there was an additional exception too, because you are raising within its except clause [which flips the order of the errors and makes it much more confusing for the user]:
Traceback (most recent call last):
File "test.py", line 38, in <module>
g()
File "test.py", line 25, in g
raise ValueError("coconut")
ValueError: coconut
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 40, in <module>
raise_with_traceback(e, tb)
File "test.py", line 6, in raise_with_traceback
raise exc.with_traceback(traceback)
File "test.py", line 31, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
If you absolutely want it to look like the g() Exception never happened in both Python 2 and Python 3, you need to check that you are out of the except clause first:
try:
a()
except TypeError as e:
import sys
# save the traceback from the original exception
_, _, tb = sys.exc_info()
handled = False
try:
# attempt witty workaround
g()
handled = True
except:
pass
if not handled:
raise_with_traceback(e, tb)
Which gets you the following traceback in Python 2:
Traceback (most recent call last):
File "test.py", line 56, in <module>
raise_with_traceback(e, tb)
File "test.py", line 43, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
And this traceback in Python 3:
Traceback (most recent call last):
File "test.py", line 56, in <module>
raise_with_traceback(e, tb)
File "test.py", line 6, in raise_with_traceback
raise exc.with_traceback(traceback)
File "test.py", line 43, in <module>
a()
File "test.py", line 28, in a
f()
File "test.py", line 22, in f
raise TypeError("sparrow")
TypeError: sparrow
It does add an additional non-useful line of traceback that shows the raise exc.with_traceback(traceback) to the user, but it is relatively clean.

Here is something totally nutty that I wasn't sure would work, but it works in both python 2 and 3. (It does however, require the exception to be encapsulated into a function...)
def f():
print ("Fail!")
raise Exception("sparrow")
def g():
print ("Workaround fail.")
raise Exception("coconut")
def a():
f()
def tryhard():
ok = False
try:
a()
ok = True
finally:
if not ok:
try:
g()
return # "cancels" sparrow Exception by returning from finally
except:
pass
>>> tryhard()
Fail!
Workaround fail.
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 4, in tryhard
File "<stdin>", line 2, in a
File "<stdin>", line 3, in f
Exception: sparrow
Which is the correct exception and the right stack trace, and with no hackery.
>>> def g(): print "Worked around." # workaround is successful in this case
>>> tryhard()
Fail!
Worked around.
>>> def f(): print "Success!" # normal method works
>>> tryhard()
Success!

Ian Bicking has a nice primer on re-raising.
As a corollary, my rule is to only catch Exceptions that the code knows how to deal with. Very few methods actually hit this rule. For example, if I'm reading a file and an IOException is thrown, there is very little that method could reasonably do.
As a corollary to that, catching exceptions in "main" is reasonable if you can return to a good state and you don't just want to dump the user out; this only obtains in interactive programs.
The relevant section from the primer being the update:
try:
a()
except:
exc_info = sys.exc_info()
try:
g()
except:
# If this happens, it clobbers exc_info,
# which is why we had to save it above
import traceback
print >> sys.stderr, "Error in revert_stuff():"
# py3 print("Error in revert_stuff():", file=sys.stderr)
traceback.print_exc()
raise exc_info[0], exc_info[1], exc_info[2]
In python 3, the final raise could be written as:
ei = exc_info[1]
ei.filname = exc_info[0]
ei.__traceback__ = exc_info[2]
raise ei

In Python 3 (specifically tested on 3.3.2), this all works better, there's no need for saving sys.exc_info. Don't re-raise the original exception within the second exception handler. Just note that the 2nd attempt failed and raise the original in the scope of the original handler, like so:
#!python3
try:
a()
except Exception:
g_failed = False
try:
g()
except Exception:
g_failed = True
raise
Python 3 output correctly raising "sparrow" and showing traceback through a() and f():
Traceback (most recent call last):
File "x3.py", line 13, in <module>
a()
File "x3.py", line 10, in a
f()
File "x3.py", line 4, in f
raise Exception("sparrow")
Exception: sparrow
However, the same script on Python 2 incorrectly raising "coconut" and only showing g():
Traceback (most recent call last):
File "x3.py", line 17, in <module>
g()
File "x3.py", line 7, in g
raise Exception("coconut")
Exception: coconut
Here are the modifications to make Python 2 work correctly:
#!python2
import sys
try:
a()
except Exception:
exc = sys.exc_info()
try:
g()
except Exception:
raise exc[0], exc[1], exc[2] # Note doesn't care that it is nested.
Now Python 2 correctly shows "sparrow" and both a() and f() traceback:
Traceback (most recent call last):
File "x2.py", line 14, in <module>
a()
File "x2.py", line 11, in a
f()
File "x2.py", line 5, in f
raise Exception("sparrow")
Exception: sparrow

Capture the error in your except clause, then manually re-raise it later. Capture the traceback, and reprint it via the traceback module.
import sys
import traceback
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
try:
print "trying a"
a()
except Exception as e:
print sys.exc_info()
(_,_,tb) = sys.exc_info()
print "trying g"
try:
g()
except:
print "\n".join(traceback.format_tb(tb))
raise e

In Python 3, within a function, this can be done in a very slick way, following up the answer from #Mark Tolonen, who uses a boolean. You can't do this outside a function because there's no way to break out of the outer try statement: the function is needed for return.
#!python3
def f():
raise Exception("sparrow")
def g():
raise Exception("coconut")
def a():
f()
def h():
try:
a()
except:
try:
g()
return # Workaround succeeded!
except:
pass # Oh well, that didn't work.
raise # Re-raises *first* exception.
h()
This results in:
Traceback (most recent call last):
File "uc.py", line 23, in <module>
h()
File "uc.py", line 14, in h
a()
File "uc.py", line 10, in a
f()
File "uc.py", line 4, in f
raise Exception("sparrow")
Exception: sparrow
...and if instead g succeeds:
def g(): pass
...then it doesn't raise an exception.

try:
1/0 # will raise ZeroDivisionError
except Exception as first:
try:
x/1 # will raise NameError
except Exception as second:
raise first # will re-raise ZeroDivisionError

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to pass stacktrace between processes in Python? - python

Related

How to recover the return value of a function passed to multiprocessing.Process?

RuntimeError: Never call result.get() within a task

Class variable in multiprocessing - python

KeyError: 0 using multiprocessing in python

"Uncatching" an exception in python

Categories

Resources