I have written a fabric script with boto to install a R application on AWS instance. Fedora 23
All the commands using run & sudo function go as expected,
except this one:
#parallel
def install_DvD():
# with settings(hide('warnings', 'running', 'stdout', 'stderr'), warn_only=True):
cmd0 = 'R CMD BATCH %s/DvDdependencies.R' % (DvDpackage_location)
run(cmd0)
As you would noticed, I tried using 'warn_only=true', and that did not help. The installation completes successfully with out errors, I check that manually by logging into the instance and eyeballing DvDdependencies.Rout file.
I think for reasons unkonwn to me the R CMD BATCH command does not return the execution back to fabric.
The traceback output from Ctrl^c the fabric process on my local system is:
[ec2-54-172-154-181.compute-1.amazonaws.com] run: R CMD BATCH ~/DvDdependencies.R
[ec2-54-165-109-62.compute-1.amazonaws.com] run: R CMD BATCH ~/DvDdependencies.R
^C
Stopped.
!!! Parallel execution exception under host u'ec2-54-165-109-62.compute-1.amazonaws.com':
!!! Parallel execution exception under host u'ec2-54-172-154-181.compute-1.amazonaws.com':
Process ec2-54-172-154-181.compute-1.amazonaws.com:
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
Process ec2-54-165-109-62.compute-1.amazonaws.com:
self._target(*self._args, **self._kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/tasks.py", line 242, in inner
Traceback (most recent call last):
File "/usr/lib64/python2.7/multiprocessing/process.py", line 258, in _bootstrap
submit(task.run(*args, **kwargs))
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/tasks.py", line 174, in run
return self.wrapped(*args, **kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/decorators.py", line 181, in inner
self.run()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/tasks.py", line 242, in inner
return func(*args, **kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/fabfile.py", line 70, in install_DvD
run(cmd0)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/network.py", line 649, in host_prompting_wrapper
submit(task.run(*args, **kwargs))
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/tasks.py", line 174, in run
return self.wrapped(*args, **kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/decorators.py", line 181, in inner
return func(*args, **kwargs)
return func(*args, **kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 1056, in run
File "/home/eyebell/local_bin/healX/DvD-installation/fabfile.py", line 70, in install_DvD
run(cmd0)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/network.py", line 649, in host_prompting_wrapper
return func(*args, **kwargs)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 1056, in run
shell_escape=shell_escape)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 925, in _run_command
stderr=stderr, timeout=timeout)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 811, in _execute
time.sleep(ssh.io_sleep)
KeyboardInterrupt
shell_escape=shell_escape)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 925, in _run_command
stderr=stderr, timeout=timeout)
File "/home/eyebell/local_bin/healX/DvD-installation/py2fabvirt/lib/python2.7/site-packages/fabric/operations.py", line 811, in _execute
time.sleep(ssh.io_sleep)
KeyboardInterrupt
The complete script is here in my github repo,
the location of DvDdependencies.R (the script that I want to install)
Any comments, help or pointers in right direction are appreciated.
I am closing this, as the issue is with this script only.
Also the installation method for DvD tool has been updated,
hence this is not of interest anymore.
Related
I'm having trouble trying run a few loops in parallel when employing Pari via cypari2. I'll including a couple of small working examples along with the Tracebacks in case anyone has some insight on this.
Example 1 -- using joblib:
from cypari2 import Pari
from joblib import Parallel, delayed
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#works
newVec = Parallel(n_jobs=1)(delayed(AddOne)(i) for i in vec)
print(newVec)
#doesn't work
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
print(newVec2)
The output:
[x_1, x_2]
[x_1 + 1, x_2 + 1]
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 247, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 240, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "/usr/lib/python3/dist-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 538, in dump
return Pickler.dump(self, obj)
File "stringsource", line 2, in cypari2.pari_instance.Pari.__reduce_cython__
TypeError: no default __reduce__ due to non-trivial __cinit__
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "min_jake_joblib.py", line 16, in <module>
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 1016, in __call__
self.retrieve()
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 908, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/usr/lib/python3/dist-packages/joblib/_parallel_backends.py", line 554, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
Seems to be a problem with pickling the Pari objects, but is there any way around it?
Example 2 -- using multiprocessing:
from cypari2 import Pari
import multiprocessing
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#doesn't work
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 2) ## doesn't matter how many I use
newVec = pool.map(AddOne, (i for i in vec))
print(newVec)
It seg faults, but doesn't completely exit automatically, so I have to use Ctrl^C to kill it. The output:
[x_1, x_2]
Exception in thread Thread-3:
Traceback (most recent call last):
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 576, in _handle_results
task = get()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "cypari2/gen.pyx", line 4705, in cypari2.gen.objtogen
File "cypari2/gen.pyx", line 4812, in cypari2.gen.objtogen
File "cypari2/convert.pyx", line 557, in cypari2.convert.PyObject_AsGEN
cysignals.signals.SignalError: Segmentation fault
^CProcess ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
File "min_jake_multiprocessing.py", line 14, in <module>
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/queues.py", line 356, in get
res = self._reader.recv_bytes()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock:
File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
KeyboardInterrupt
newVec = pool.map(AddOne, (i for i in vec))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.8/multiprocessing/pool.py", line 765, in get
self.wait(timeout)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 762, in wait
self._event.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 558, in wait
signaled = self._cond.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 302, in wait
waiter.acquire()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
finalizer()
File "/usr/lib/python3.8/multiprocessing/util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 695, in _terminate_pool
raise AssertionError(
AssertionError: Cannot have cache with result_hander not alive
I suppose someone will tell me to use sympy or some other symbolic algebra package instead, but the symbolic algebra I need to do is quite complex and Pari can handle it extremely well. However, in the end I'd like to be able to process a queue of class objects that contain Pari objects in parallel. Any thoughts/suggestions are appreciated.
Well, this isn't a full answer, but it works for me so I wanted to share in case anyone else runs into this issue.
The first issue appears to be that the versions of libpari-dev and pari-gp on the apt repository were too old. The apt repository contains version 2.11 whereas the version on Pari's git repository is version 2.14. Uninstalling and following the instructions from here to install from source fixed most of my problems.
Interestingly, I still needed to install libpari-gmp-tls6 from the apt repository to get things to work. But, after that I was able to get the test examples above to run. The example using multiprocessing ran successfully without modification, but the example using joblib required the use of the "threading" backend in order to run.
I have 3 machines with celery workers and rabbitmq as a broker, one worker is running with beat flag, all of this is managed by supervisor, and sometimes celery dies with such error.
This error appears only on beat worker, but when it appears, workers on all machines dies.
(celery==3.1.12, kombu==3.0.20)
[2014-07-05 08:37:04,297: INFO/MainProcess] Connected to amqp://user:**#192.168.15.106:5672//
[2014-07-05 08:37:04,311: ERROR/Beat] Process Beat
Traceback (most recent call last):
File "/var/projects/env/local/lib/python2.7/site-packages/billiard/process.py", line 292, in _bootstrap
self.run()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 527, in run
self.service.start(embedded_process=True)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 453, in start
humanize_seconds(self.scheduler.max_interval))
File "/var/projects/env/local/lib/python2.7/site-packages/kombu/utils/__init__.py", line 322, in __get__
value = obj.__dict__[self.__name__] = self.__get(obj)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 491, in scheduler
return self.get_scheduler()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 486, in get_scheduler
lazy=lazy)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/utils/imports.py", line 53, in instantiate
return symbol_by_name(name)(*args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 357, in __init__
Scheduler.__init__(self, *args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 184, in __init__
self.setup_schedule()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 376, in setup_schedule
self._store['entries']
File "/usr/lib/python2.7/shelve.py", line 121, in __getitem__
f = StringIO(self.dict[key])
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in __getitem__
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
File "/usr/lib/python2.7/bsddb/dbutils.py", line 68, in DeadlockWrap
return function(*_args, **_kwargs)
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in <lambda>
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
DBPageNotFoundError: (-30985, 'DB_PAGE_NOTFOUND: Requested page not found')
I've ran into this issue and the cause was a corrupted db file (usually named "celerybeat-schedule").
Solution would be to delete the existing db file and restart the process.
Relavent:bsddb.db.DBPageNotFoundError
https://mail.python.org/pipermail/python-list/2009-October/554552.html
I had to remove some temp files in the /tmp directory. One was named celeryd-<NAME_OF_WORKER>-state and also celeryd-<NAME_OF_WORKER>-state-renamed. After removing those and I was able to restart my affected worker.
Building on top of How to assert output with nosetest/unittest in python? I would like to acieve this inside Pyharm. However, pycharm does not run:
$ python -m tests.test_mymodule --buffer
Rather it does:
$ /usr/bin/python2.7 /opt/helpers/pycharm/utrunner.py \
/home/oz123/PycharmProjects/account/tests/test_mymodule.py true
So, I ran it with:
$ /usr/bin/python2.7 /opt/helpers/pycharm/utrunner.py \
/home/oz123/PycharmProjects/account/tests/test_mymodule.py --buffer true
And it crashes with:
##teamcity[testFinished duration='72' name='test_1_list_files']
Traceback (most recent call last):
File "/opt/helpers/pycharm/utrunner.py", line 151, in <module>
TeamcityTestRunner().run(all, **options)
File "/opt/helpers/pycharm/tcunittest.py", line 249, in run
test(result)
File "/usr/lib/python2.7/unittest/suite.py", line 70, in __call__
return self.run(*args, **kwds)
File "/usr/lib/python2.7/unittest/suite.py", line 108, in run
test(result)
File "/usr/lib/python2.7/unittest/suite.py", line 70, in __call__
return self.run(*args, **kwds)
File "/usr/lib/python2.7/unittest/suite.py", line 108, in run
test(result)
File "/usr/lib/python2.7/unittest/case.py", line 396, in __call__
return self.run(*args, **kwds)
File "/usr/lib/python2.7/unittest/case.py", line 356, in run
result.addError(self, sys.exc_info())
File "/opt/helpers/pycharm/tcunittest.py", line 108, in addError
TestResult.addError(self, test, err)
File "/usr/lib/python2.7/unittest/result.py", line 19, in inner
return method(self, *args, **kw)
File "/usr/lib/python2.7/unittest/result.py", line 116, in addError
self.errors.append((test, self._exc_info_to_string(err, test)))
File "/usr/lib/python2.7/unittest/result.py", line 167, in _exc_info_to_string
output = sys.stdout.getvalue()
AttributeError: 'file' object has no attribute 'getvalue'
Does any one know where does pycharm store the output? How can I access it?
I am using Skype4Py and create a skype bot.
I wanted to install the bot on a linux enviroment (Ubuntu 12.04 as I recall right)
And I installed skype and the bot + dependicies.
Now whenever I ask for message.Chat.Type, it gives me a command timeout..
Any solution?
error:
Exception in thread Skype4Py MessageStatus event scheduler:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/utils.py", line 225, in run
handler(*self.args, **self.kwargs)
File "functions/messageProcessor.py", line 161, in processMessages
if allowed(message, "url_parse"):
File "functions/messageProcessor.py", line 62, in allowed
chatType = message.Chat.Type
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/chat.py", line 405, in _GetType
return str(self._Property('TYPE'))
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/chat.py", line 33, in _Property
return self._Owner._Property('CHAT', self.Name, PropName, Value, Cache)
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/skype.py", line 296, in _Property
value = self._DoCommand('GET %s' % jarg, jarg)
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/skype.py", line 276, in _DoCommand
self.SendCommand(command)
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/skype.py", line 778, in SendCommand
self._Api.send_command(Command)
File "/usr/local/lib/python2.7/dist-packages/Skype4Py/api/posix_x11.py", line 445, in send_command
raise SkypeAPIError('Skype command timeout')
SkypeAPIError: Skype command timeout
I have 3 machines with celery workers and rabbitmq as a broker, one worker is running with beat flag, all of this is managed by supervisor, and sometimes celery dies with such error.
This error appears only on beat worker, but when it appears, workers on all machines dies.
(celery==3.1.12, kombu==3.0.20)
[2014-07-05 08:37:04,297: INFO/MainProcess] Connected to amqp://user:**#192.168.15.106:5672//
[2014-07-05 08:37:04,311: ERROR/Beat] Process Beat
Traceback (most recent call last):
File "/var/projects/env/local/lib/python2.7/site-packages/billiard/process.py", line 292, in _bootstrap
self.run()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 527, in run
self.service.start(embedded_process=True)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 453, in start
humanize_seconds(self.scheduler.max_interval))
File "/var/projects/env/local/lib/python2.7/site-packages/kombu/utils/__init__.py", line 322, in __get__
value = obj.__dict__[self.__name__] = self.__get(obj)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 491, in scheduler
return self.get_scheduler()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 486, in get_scheduler
lazy=lazy)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/utils/imports.py", line 53, in instantiate
return symbol_by_name(name)(*args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 357, in __init__
Scheduler.__init__(self, *args, **kwargs)
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 184, in __init__
self.setup_schedule()
File "/var/projects/env/local/lib/python2.7/site-packages/celery/beat.py", line 376, in setup_schedule
self._store['entries']
File "/usr/lib/python2.7/shelve.py", line 121, in __getitem__
f = StringIO(self.dict[key])
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in __getitem__
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
File "/usr/lib/python2.7/bsddb/dbutils.py", line 68, in DeadlockWrap
return function(*_args, **_kwargs)
File "/usr/lib/python2.7/bsddb/__init__.py", line 270, in <lambda>
return _DeadlockWrap(lambda: self.db[key]) # self.db[key]
DBPageNotFoundError: (-30985, 'DB_PAGE_NOTFOUND: Requested page not found')
I've ran into this issue and the cause was a corrupted db file (usually named "celerybeat-schedule").
Solution would be to delete the existing db file and restart the process.
Relavent:bsddb.db.DBPageNotFoundError
https://mail.python.org/pipermail/python-list/2009-October/554552.html
I had to remove some temp files in the /tmp directory. One was named celeryd-<NAME_OF_WORKER>-state and also celeryd-<NAME_OF_WORKER>-state-renamed. After removing those and I was able to restart my affected worker.