Related
This is from the ray package official code, it works well on Linux systems but appears some errors on windows. All of the ray version is 2.2.0.
import numpy as np
import ray
from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback
def train_function(config):
for i in range(30):
loss = config["mean"] + config["sd"] * np.random.randn()
session.report({"loss": loss})
def tune_with_callback():
"""Example for using a WandbLoggerCallback with the function API"""
tuner = tune.Tuner(
train_function,
tune_config=tune.TuneConfig(
metric="loss",
mode="min"
),
run_config=air.RunConfig(
callbacks=[
WandbLoggerCallback(project="Wandb_example")
]
),
param_space={
"mean": tune.grid_search([1, 2, 3, 4, 5]),
"sd": tune.uniform(0.2, 0.8),
},
)
tuner.fit()
if __name__ == '__main__':
tune_with_callback()
And this is printed log with running above python code:
2022-12-30 11:50:41,732 INFO worker.py:1538 -- Started a local Ray instance.
2022-12-30 11:50:46,508 INFO wandb.py:250 -- Already logged into W&B.
Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 928, in _wait_and_handle_event
self._on_pg_ready(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1018, in _on_pg_ready
if not _start_trial(next_trial) and next_trial.status != Trial.ERROR:
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1010, in _start_trial
self._callbacks.on_trial_start(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\callback.py", line 317, in on_trial_start
callback.on_trial_start(**info)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\logger\logger.py", line 135, in on_trial_start
self.log_trial_start(trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 527, in log_trial_start
self._trial_processes[trial].start()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 367, in __reduce__
raise RuntimeError("_WandbLoggingProcess is not pickleable.")
RuntimeError: _WandbLoggingProcess is not pickleable.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tuner.py", line 272, in fit
return self._local_tuner.fit()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\impl\tuner_internal.py", line 420, in fit
analysis = self._fit_internal(trainable, param_space)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\impl\tuner_internal.py", line 532, in _fit_internal
analysis = run(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tune.py", line
726, in run
runner.step()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 981, in step
self._wait_and_handle_event(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 960, in _wait_and_handle_event
raise TuneError(traceback.format_exc())
ray.tune.error.TuneError: Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 928, in _wait_and_handle_event
self._on_pg_ready(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1018, in _on_pg_ready
if not _start_trial(next_trial) and next_trial.status != Trial.ERROR:
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1010, in _start_trial
self._callbacks.on_trial_start(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\callback.py", line 317, in on_trial_start
callback.on_trial_start(**info)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\logger\logger.py", line 135, in on_trial_start
self.log_trial_start(trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 527, in log_trial_start
self._trial_processes[trial].start()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 367, in __reduce__
raise RuntimeError("_WandbLoggingProcess is not pickleable.")
RuntimeError: _WandbLoggingProcess is not pickleable.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\shaohan.tian\Desktop\laptop_python_run\wandb_test.py", line 36, in <module>
tune_with_callback()
File "C:\Users\shaohan.tian\Desktop\laptop_python_run\wandb_test.py", line 33, in tune_with_callback
tuner.fit()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tuner.py", line 274, in fit
raise TuneError(
ray.tune.error.TuneError: The Ray Tune run failed. Please inspect the previous error messages for a cause. After
fixing the issue, you can restart the run from scratch or continue this run. To continue this run, you can use `tuner = Tuner.restore("C:\Users\shaohan.tian\ray_results\train_function_2022-12-30_11-50-36")`.
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I tried modifying versions of ray and wandb, but it was of no use. Could you help me to solve it?
I wanna increase the accuracy of my speech2text model with using a N-Gram. So i'm using this line of code to apply the function on the whole dataset as below:
result = dataset.map(predict, batch_size=5, num_proc=int(os.environ.get('cpu_core')))
The CPU core I set for 'cpu_core' is 8.
Here is the predict function code:
def predict(batch):
batch["predicted"] = processor.batch_decode(np.array(batch["logits"])).text[0]
print(batch["predicted"])
return batch
I'm using this line in a try block, which is in a while True loop and when the program will face a multiprocess error, it will stuck in the while true loop. Here is the complete code:
while True:
try:
dataset = dataset.map(speech_file_to_array_fn)
# If we're using n-gram
if os.environ.get('active_ngram') == '1':
dataset = dataset.map(predict_model)
print("\nN-Gram started\n")
result = dataset.map(predict, batch_size=5, num_proc=int(os.environ.get('cpu_core'))) # This is the line that occurs the error
except KeyboardInterrupt:
print('interrupted!')
break
except:
pass
Now I want to know how can i handle this multiprocess error. (python 3.8.10 & ubuntu 20.04.4)
here is the error:
^CProcess
ForkPoolWorker-3335:█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████Process
ForkPoolWorker-3330:██████████████████████████████████████████████████████████████████████████████████████████████████████|
3/3 [02:08<00:00, 37.41s/ex] Process ForkPoolWorker-19: Process
ForkPoolWorker-3333: Process ForkPoolWorker-16: Process
ForkPoolWorker-21: Process ForkPoolWorker-13: Process
ForkPoolWorker-15: Process ForkPoolWorker-12: Process
ForkPoolWorker-14: Process ForkPoolWorker-3336: Process
ForkPoolWorker-3331: Process ForkPoolWorker-3334: Process
ForkPoolWorker-3332: Process ForkPoolWorker-18: Process
ForkPoolWorker-17: #0:
25%|██████████████████████████████████████████████████████████████████████████████▌
| 1/4 [14:09:32<42:28:38, 50972.67s/ex] Process ForkPoolWorker-20:
Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 356, in get
res = self._reader.recv_bytes() File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in
recv_bytes
buf = self._recv_bytes(maxlength) File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in
_recv_bytes
buf = self._recv(4) File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining) KeyboardInterrupt Traceback (most recent call last): Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt interrupted! ^C
Finally I did fix this error. The BrokenPipeError: [Error 32] broken pipe is about linux operation system and it will be occur when you are doing IO tasks. So when the pipeline of read and write on linux getting closed, while at the other side the data is still trying to be written or read, this error will be occur.
now the fun part is here, I was using 6 worker as number of cpu core in my map function and the amount of data in dataset was 25. So the pipeline which was doing the map function had 5 rows with 4 files in each and 1 row with 5 files. I guess it was the cause of error while the last row with 5 files made some disturbance and problem. So I reduced the number of file in dataset from 25 to 24 and the number of workers to 6 and removed the batch_size=5. Then I didn't get any error anymore. Here is the link for more info about BrokenPipeline Error.
I hope it would be helpful
I'm having trouble trying run a few loops in parallel when employing Pari via cypari2. I'll including a couple of small working examples along with the Tracebacks in case anyone has some insight on this.
Example 1 -- using joblib:
from cypari2 import Pari
from joblib import Parallel, delayed
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#works
newVec = Parallel(n_jobs=1)(delayed(AddOne)(i) for i in vec)
print(newVec)
#doesn't work
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
print(newVec2)
The output:
[x_1, x_2]
[x_1 + 1, x_2 + 1]
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 247, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 240, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "/usr/lib/python3/dist-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 538, in dump
return Pickler.dump(self, obj)
File "stringsource", line 2, in cypari2.pari_instance.Pari.__reduce_cython__
TypeError: no default __reduce__ due to non-trivial __cinit__
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "min_jake_joblib.py", line 16, in <module>
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 1016, in __call__
self.retrieve()
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 908, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/usr/lib/python3/dist-packages/joblib/_parallel_backends.py", line 554, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
Seems to be a problem with pickling the Pari objects, but is there any way around it?
Example 2 -- using multiprocessing:
from cypari2 import Pari
import multiprocessing
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#doesn't work
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 2) ## doesn't matter how many I use
newVec = pool.map(AddOne, (i for i in vec))
print(newVec)
It seg faults, but doesn't completely exit automatically, so I have to use Ctrl^C to kill it. The output:
[x_1, x_2]
Exception in thread Thread-3:
Traceback (most recent call last):
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 576, in _handle_results
task = get()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "cypari2/gen.pyx", line 4705, in cypari2.gen.objtogen
File "cypari2/gen.pyx", line 4812, in cypari2.gen.objtogen
File "cypari2/convert.pyx", line 557, in cypari2.convert.PyObject_AsGEN
cysignals.signals.SignalError: Segmentation fault
^CProcess ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
File "min_jake_multiprocessing.py", line 14, in <module>
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/queues.py", line 356, in get
res = self._reader.recv_bytes()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock:
File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
KeyboardInterrupt
newVec = pool.map(AddOne, (i for i in vec))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.8/multiprocessing/pool.py", line 765, in get
self.wait(timeout)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 762, in wait
self._event.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 558, in wait
signaled = self._cond.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 302, in wait
waiter.acquire()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
finalizer()
File "/usr/lib/python3.8/multiprocessing/util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 695, in _terminate_pool
raise AssertionError(
AssertionError: Cannot have cache with result_hander not alive
I suppose someone will tell me to use sympy or some other symbolic algebra package instead, but the symbolic algebra I need to do is quite complex and Pari can handle it extremely well. However, in the end I'd like to be able to process a queue of class objects that contain Pari objects in parallel. Any thoughts/suggestions are appreciated.
Well, this isn't a full answer, but it works for me so I wanted to share in case anyone else runs into this issue.
The first issue appears to be that the versions of libpari-dev and pari-gp on the apt repository were too old. The apt repository contains version 2.11 whereas the version on Pari's git repository is version 2.14. Uninstalling and following the instructions from here to install from source fixed most of my problems.
Interestingly, I still needed to install libpari-gmp-tls6 from the apt repository to get things to work. But, after that I was able to get the test examples above to run. The example using multiprocessing ran successfully without modification, but the example using joblib required the use of the "threading" backend in order to run.
In Python, to share data between different process by using multiprocessing, we use multiprocessing.Manager(). I want to get output [1,2,3,4,5,6,7,8,9,10] in the following code, but I am getting EOFError. Why?
The Code is:
import multiprocessing
manager=multiprocessing.Manager()
final_list=manager.list()
input_list_one=[1,2,3,4,5]
input_list_two=[6,7,8,9,10]
def worker(data):
for item in data:
final_list.append(item)
process_1=multiprocessing.Process(target=worker,args=[input_list_one])
process_2=multiprocessing.Process(target=worker,args=[input_list_two])
process_1.start()
process_2.start()
process_1.join()
process_2.join()
print(final_list)
I am getting the following error:
Process SyncManager-1:
Traceback (most recent call last):
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 539, in _run_server
server = cls._Server(registry, address, authkey, serializer)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 139, in __init__
self.listener = Listener(address=address, backlog=16)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 438, in __init__
self._listener = SocketListener(address, family, backlog)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 576, in __init__
self._socket.bind(address)
PermissionError: [Errno 13] Permission denied
Traceback (most recent call last):
File "/storage/emulated/0/qpython/.last_tmp.py", line 2, in <module>
manager=multiprocessing.Manager()
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/context.py", line 56, in Manager
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 517, in start
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 250, in recv
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 407, in _recv_bytes
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 383, in _recv
EOFError
1|u0_a823#land:/ $
My main method (which is properly protected from being run when importing) looks like this:
def main():
json_file, csv_out = get_user_input()
sessions = get_sessions(json_file)
if sessions:
pool = multiprocessing.Pool()
pool.map_async(process_one_item, sessions, 1)
pool.close()
pool.join()
write_csv(csv_out, json_file, sessions)
It processes every single session (of over 1,000), which is just a list of timestamps that get processed before being written to disk, but when it finishes the final one, it just hangs. It never gets to "write_csv," which reads what has been written to disk.
What am I missing?
Update 1:
def main():
json_file, csv_out = get_user_input()
sessions = get_sessions(json_file)
pool = multiprocessing.Pool()
for results in pool.imap_unordered(func=process_one_item,
iterable=sessions, chunksize=1):
store_results(results)
pool.terminate()
pool.join()
write_csv(csv_out, json_file, sessions)
This also hung.
The program can run if given less than 200 items AND finish. But when I go over 1,000, it hangs...
Update 2:
I added some print statements so I could better tell where it hangs.
def main():
json_file, csv_out = get_user_input()
sessions = get_sessions(json_file)
pool = multiprocessing.Pool()
for results in pool.imap_unordered(func=process_one_item,
iterable=sessions):
store_results(results)
print('...now back to main')
print('Terminate NOW.')
pool.terminate()
print('Join NOW.')
pool.join()
write_csv(csv_out, json_file, sessions)
"...now back to main" always prints.
"Terminate NOW." never prints.
Update 3:
Here's the stacktrace from when it's hanging and I kill the run.
Process ForkPoolWorker-1:
Process ForkPoolWorker-13:
Process ForkPoolWorker-23:
Process ForkPoolWorker-29:
Process ForkPoolWorker-27:
Process ForkPoolWorker-30:
Process ForkPoolWorker-31:
Process ForkPoolWorker-26:
Traceback (most recent call last):
File , line 759, in next
item = self._items.popleft()
IndexError: pop from an empty deque
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "main.py", line 227, in <module>
main()
File "main.py", line 214, in main
for results in pool.imap_unordered(func=process_one_item, iterable=sessions):
File , line 763, in next
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 297, in _bootstrap
self.run()
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 351, in get
with self._rlock:
File , line 110, in worker
task = get()
File , line 110, in worker
task = get()
File , line 351, in get
with self._rlock:
File ", line 95, in __enter__
return self._semlock.__enter__()
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 351, in get
with self._rlock:
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
File , line 351, in get
with self._rlock:
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 351, in get
with self._rlock:
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 352, in get
res = self._reader.recv_bytes()
File , line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File , line 407, in _recv_bytes
buf = self._recv(4)
File , line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 351, in get
with self._rlock:
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
self._cond.wait(timeout)
File , line 296, in wait
Traceback (most recent call last):
File , line 297, in _bootstrap
self.run()
File , line 99, in run
self._target(*self._args, **self._kwargs)
File , line 110, in worker
task = get()
File , line 351, in get
with self._rlock:
File , line 95, in __enter__
return self._semlock.__enter__()
KeyboardInterrupt
waiter.acquire()
KeyboardInterrupt
Process finished with exit code 1
I've removed the filenames to protect the innocent (user name in the path).
Update 4:
I added faulthandling and set a timeout.
if __name__ == '__main__':
faulthandler.enable()
faulthandler.dump_traceback_later(timeout=60, repeat=True, exit=True)
start_time = time.perf_counter()
main()
print('\nDone!\n\nThis code ran in {:.1f} seconds'.format(time.perf_counter()-start_time))
When I did this I got a timeout and a stack trace of:
Thread 0x00001094 (most recent call first): File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 306 in _recv_bytes File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 250 in recv File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 489 in _handle_results File "C:\ProgramData\Anaconda3\lib\threading.py", line 864 in run File "C:\ProgramData\Anaconda3\lib\threading.py", line 916 in _bootstrap_inner File "C:\ProgramData\Anaconda3\lib\threading.py", line 884 in _bootstrapThread 0x0000476c (most recent call first): File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 284 in _send_bytes File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 206 in send File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 450 in _handle_tasks File "C:\ProgramData\Anaconda3\lib\threading.py", line 864 in run File "C:\ProgramData\Anaconda3\lib\threading.py", line 916 in _bootstrap_inner File "C:\ProgramData\Anaconda3\lib\threading.py", line 884 in _bootstrapThread 0x00002d80 (most recent call first): File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 432 in _handle_workers File "C:\ProgramData\Anaconda3\lib\threading.py", line 864 in run File "C:\ProgramData\Anaconda3\lib\threading.py", line 916 in _bootstrap_inner File "C:\ProgramData\Anaconda3\lib\threading.py", line 884 in _bootstrapThread 0x000038a8 (most recent call first): File "C:\ProgramData\Anaconda3\lib\threading.py", line 295 in wait File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 750 in next File "C:/Users/mtanner/IdeaProjects/cti_trending_analysis/main.py", line 215 in main File "C:/Users/mtanner/IdeaProjects/cti_trending_analysis/main.py", line 231 in <module>Process SpawnPoolWorker-36:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 125, in worker put((job, i, result))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closedDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 258, in _bootstrap self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 93, in run self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 130, in worker put((job, i, (False, wrapped)))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closedProcess SpawnPoolWorker-28:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 125, in worker put((job, i, result))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closedDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 258, in _bootstrap self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 93, in run self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 130, in worker put((job, i, (False, wrapped)))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closedProcess SpawnPoolWorker-26:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 125, in worker put((job, i, result))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closedDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 258, in _bootstrap self.run()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 93, in run self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\pool.py", line 130, in worker put((job, i, (False, wrapped)))
File "C:\ProgramData\Anaconda3\lib\multiprocessing\queues.py", line 344, in put self._writer.send_bytes(obj)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 200, in send_bytes self._send_bytes(m[offset:offset + size])
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 280, in _send_bytes ov, err = _winapi.WriteFile(self._handle, buf, overlapped=True)
BrokenPipeError: [WinError 232] The pipe is being closed
Does this make sense to anyone?
Update 5:
In my function process_one_item function, I used the sys.exit(-1) call, attempting to end the run when something code-breaking happened. (this program was originally not multiprocessed). I think the multiprocessing was hiding this effect. I removed this call and the program now runs to completion regardless of session count.
If someone can explain all this, I'm happy to count that as the solution!