I'm having trouble trying run a few loops in parallel when employing Pari via cypari2. I'll including a couple of small working examples along with the Tracebacks in case anyone has some insight on this.
Example 1 -- using joblib:
from cypari2 import Pari
from joblib import Parallel, delayed
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#works
newVec = Parallel(n_jobs=1)(delayed(AddOne)(i) for i in vec)
print(newVec)
#doesn't work
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
print(newVec2)
The output:
[x_1, x_2]
[x_1 + 1, x_2 + 1]
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 247, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "/usr/lib/python3/dist-packages/joblib/externals/loky/backend/reduction.py", line 240, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "/usr/lib/python3/dist-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 538, in dump
return Pickler.dump(self, obj)
File "stringsource", line 2, in cypari2.pari_instance.Pari.__reduce_cython__
TypeError: no default __reduce__ due to non-trivial __cinit__
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "min_jake_joblib.py", line 16, in <module>
newVec2 = Parallel(n_jobs=2)(delayed(AddOne)(i) for i in vec)
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 1016, in __call__
self.retrieve()
File "/usr/lib/python3/dist-packages/joblib/parallel.py", line 908, in retrieve
self._output.extend(job.get(timeout=self.timeout))
File "/usr/lib/python3/dist-packages/joblib/_parallel_backends.py", line 554, in wrap_future_result
return future.result(timeout=timeout)
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 444, in result
return self.__get_result()
File "/usr/lib/python3.8/concurrent/futures/_base.py", line 389, in __get_result
raise self._exception
_pickle.PicklingError: Could not pickle the task to send it to the workers.
Seems to be a problem with pickling the Pari objects, but is there any way around it?
Example 2 -- using multiprocessing:
from cypari2 import Pari
import multiprocessing
def AddOne(v):
return v + pari.one()
pari = Pari()
vec = [pari('x_1'), pari('x_2')]
print(vec)
#doesn't work
if __name__ == '__main__':
pool = multiprocessing.Pool(processes = 2) ## doesn't matter how many I use
newVec = pool.map(AddOne, (i for i in vec))
print(newVec)
It seg faults, but doesn't completely exit automatically, so I have to use Ctrl^C to kill it. The output:
[x_1, x_2]
Exception in thread Thread-3:
Traceback (most recent call last):
File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
self.run()
File "/usr/lib/python3.8/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 576, in _handle_results
task = get()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 251, in recv
return _ForkingPickler.loads(buf.getbuffer())
File "cypari2/gen.pyx", line 4705, in cypari2.gen.objtogen
File "cypari2/gen.pyx", line 4812, in cypari2.gen.objtogen
File "cypari2/convert.pyx", line 557, in cypari2.convert.PyObject_AsGEN
cysignals.signals.SignalError: Segmentation fault
^CProcess ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
File "min_jake_multiprocessing.py", line 14, in <module>
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/queues.py", line 356, in get
res = self._reader.recv_bytes()
File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
buf = self._recv(4)
File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock:
File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in __enter__
return self._semlock.__enter__()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
KeyboardInterrupt
newVec = pool.map(AddOne, (i for i in vec))
File "/usr/lib/python3.8/multiprocessing/pool.py", line 364, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/usr/lib/python3.8/multiprocessing/pool.py", line 765, in get
self.wait(timeout)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 762, in wait
self._event.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 558, in wait
signaled = self._cond.wait(timeout)
File "/usr/lib/python3.8/threading.py", line 302, in wait
waiter.acquire()
File "src/cysignals/signals.pyx", line 320, in cysignals.signals.python_check_interrupt
KeyboardInterrupt
Traceback (most recent call last):
File "/usr/lib/python3.8/multiprocessing/util.py", line 300, in _run_finalizers
finalizer()
File "/usr/lib/python3.8/multiprocessing/util.py", line 224, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/lib/python3.8/multiprocessing/pool.py", line 695, in _terminate_pool
raise AssertionError(
AssertionError: Cannot have cache with result_hander not alive
I suppose someone will tell me to use sympy or some other symbolic algebra package instead, but the symbolic algebra I need to do is quite complex and Pari can handle it extremely well. However, in the end I'd like to be able to process a queue of class objects that contain Pari objects in parallel. Any thoughts/suggestions are appreciated.
Well, this isn't a full answer, but it works for me so I wanted to share in case anyone else runs into this issue.
The first issue appears to be that the versions of libpari-dev and pari-gp on the apt repository were too old. The apt repository contains version 2.11 whereas the version on Pari's git repository is version 2.14. Uninstalling and following the instructions from here to install from source fixed most of my problems.
Interestingly, I still needed to install libpari-gmp-tls6 from the apt repository to get things to work. But, after that I was able to get the test examples above to run. The example using multiprocessing ran successfully without modification, but the example using joblib required the use of the "threading" backend in order to run.
Related
This is from the ray package official code, it works well on Linux systems but appears some errors on windows. All of the ray version is 2.2.0.
import numpy as np
import ray
from ray import air, tune
from ray.air import session
from ray.air.integrations.wandb import setup_wandb
from ray.air.integrations.wandb import WandbLoggerCallback
def train_function(config):
for i in range(30):
loss = config["mean"] + config["sd"] * np.random.randn()
session.report({"loss": loss})
def tune_with_callback():
"""Example for using a WandbLoggerCallback with the function API"""
tuner = tune.Tuner(
train_function,
tune_config=tune.TuneConfig(
metric="loss",
mode="min"
),
run_config=air.RunConfig(
callbacks=[
WandbLoggerCallback(project="Wandb_example")
]
),
param_space={
"mean": tune.grid_search([1, 2, 3, 4, 5]),
"sd": tune.uniform(0.2, 0.8),
},
)
tuner.fit()
if __name__ == '__main__':
tune_with_callback()
And this is printed log with running above python code:
2022-12-30 11:50:41,732 INFO worker.py:1538 -- Started a local Ray instance.
2022-12-30 11:50:46,508 INFO wandb.py:250 -- Already logged into W&B.
Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 928, in _wait_and_handle_event
self._on_pg_ready(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1018, in _on_pg_ready
if not _start_trial(next_trial) and next_trial.status != Trial.ERROR:
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1010, in _start_trial
self._callbacks.on_trial_start(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\callback.py", line 317, in on_trial_start
callback.on_trial_start(**info)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\logger\logger.py", line 135, in on_trial_start
self.log_trial_start(trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 527, in log_trial_start
self._trial_processes[trial].start()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 367, in __reduce__
raise RuntimeError("_WandbLoggingProcess is not pickleable.")
RuntimeError: _WandbLoggingProcess is not pickleable.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tuner.py", line 272, in fit
return self._local_tuner.fit()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\impl\tuner_internal.py", line 420, in fit
analysis = self._fit_internal(trainable, param_space)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\impl\tuner_internal.py", line 532, in _fit_internal
analysis = run(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tune.py", line
726, in run
runner.step()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 981, in step
self._wait_and_handle_event(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 960, in _wait_and_handle_event
raise TuneError(traceback.format_exc())
ray.tune.error.TuneError: Traceback (most recent call last):
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 928, in _wait_and_handle_event
self._on_pg_ready(next_trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1018, in _on_pg_ready
if not _start_trial(next_trial) and next_trial.status != Trial.ERROR:
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\execution\trial_runner.py", line 1010, in _start_trial
self._callbacks.on_trial_start(
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\callback.py", line 317, in on_trial_start
callback.on_trial_start(**info)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\logger\logger.py", line 135, in on_trial_start
self.log_trial_start(trial)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 527, in log_trial_start
self._trial_processes[trial].start()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\air\integrations\wandb.py", line 367, in __reduce__
raise RuntimeError("_WandbLoggingProcess is not pickleable.")
RuntimeError: _WandbLoggingProcess is not pickleable.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\shaohan.tian\Desktop\laptop_python_run\wandb_test.py", line 36, in <module>
tune_with_callback()
File "C:\Users\shaohan.tian\Desktop\laptop_python_run\wandb_test.py", line 33, in tune_with_callback
tuner.fit()
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\site-packages\ray\tune\tuner.py", line 274, in fit
raise TuneError(
ray.tune.error.TuneError: The Ray Tune run failed. Please inspect the previous error messages for a cause. After
fixing the issue, you can restart the run from scratch or continue this run. To continue this run, you can use `tuner = Tuner.restore("C:\Users\shaohan.tian\ray_results\train_function_2022-12-30_11-50-36")`.
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\shaohan.tian\scoop\apps\miniconda3\current\envs\steel\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I tried modifying versions of ray and wandb, but it was of no use. Could you help me to solve it?
I wanna increase the accuracy of my speech2text model with using a N-Gram. So i'm using this line of code to apply the function on the whole dataset as below:
result = dataset.map(predict, batch_size=5, num_proc=int(os.environ.get('cpu_core')))
The CPU core I set for 'cpu_core' is 8.
Here is the predict function code:
def predict(batch):
batch["predicted"] = processor.batch_decode(np.array(batch["logits"])).text[0]
print(batch["predicted"])
return batch
I'm using this line in a try block, which is in a while True loop and when the program will face a multiprocess error, it will stuck in the while true loop. Here is the complete code:
while True:
try:
dataset = dataset.map(speech_file_to_array_fn)
# If we're using n-gram
if os.environ.get('active_ngram') == '1':
dataset = dataset.map(predict_model)
print("\nN-Gram started\n")
result = dataset.map(predict, batch_size=5, num_proc=int(os.environ.get('cpu_core'))) # This is the line that occurs the error
except KeyboardInterrupt:
print('interrupted!')
break
except:
pass
Now I want to know how can i handle this multiprocess error. (python 3.8.10 & ubuntu 20.04.4)
here is the error:
^CProcess
ForkPoolWorker-3335:█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████Process
ForkPoolWorker-3330:██████████████████████████████████████████████████████████████████████████████████████████████████████|
3/3 [02:08<00:00, 37.41s/ex] Process ForkPoolWorker-19: Process
ForkPoolWorker-3333: Process ForkPoolWorker-16: Process
ForkPoolWorker-21: Process ForkPoolWorker-13: Process
ForkPoolWorker-15: Process ForkPoolWorker-12: Process
ForkPoolWorker-14: Process ForkPoolWorker-3336: Process
ForkPoolWorker-3331: Process ForkPoolWorker-3334: Process
ForkPoolWorker-3332: Process ForkPoolWorker-18: Process
ForkPoolWorker-17: #0:
25%|██████████████████████████████████████████████████████████████████████████████▌
| 1/4 [14:09:32<42:28:38, 50972.67s/ex] Process ForkPoolWorker-20:
Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 356, in get
res = self._reader.recv_bytes() File "/usr/lib/python3.8/multiprocessing/connection.py", line 216, in
recv_bytes
buf = self._recv_bytes(maxlength) File "/usr/lib/python3.8/multiprocessing/connection.py", line 414, in
_recv_bytes
buf = self._recv(4) File "/usr/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining) KeyboardInterrupt Traceback (most recent call last): Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt Traceback (most recent call last): File
"/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt File "/usr/lib/python3.8/multiprocessing/process.py", line 315, in
_bootstrap
self.run() File "/usr/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs) File "/usr/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get() File "/usr/lib/python3.8/multiprocessing/queues.py", line 355, in get
with self._rlock: File "/usr/lib/python3.8/multiprocessing/synchronize.py", line 95, in
enter
return self._semlock.enter() KeyboardInterrupt interrupted! ^C
Finally I did fix this error. The BrokenPipeError: [Error 32] broken pipe is about linux operation system and it will be occur when you are doing IO tasks. So when the pipeline of read and write on linux getting closed, while at the other side the data is still trying to be written or read, this error will be occur.
now the fun part is here, I was using 6 worker as number of cpu core in my map function and the amount of data in dataset was 25. So the pipeline which was doing the map function had 5 rows with 4 files in each and 1 row with 5 files. I guess it was the cause of error while the last row with 5 files made some disturbance and problem. So I reduced the number of file in dataset from 25 to 24 and the number of workers to 6 and removed the batch_size=5. Then I didn't get any error anymore. Here is the link for more info about BrokenPipeline Error.
I hope it would be helpful
In Python, to share data between different process by using multiprocessing, we use multiprocessing.Manager(). I want to get output [1,2,3,4,5,6,7,8,9,10] in the following code, but I am getting EOFError. Why?
The Code is:
import multiprocessing
manager=multiprocessing.Manager()
final_list=manager.list()
input_list_one=[1,2,3,4,5]
input_list_two=[6,7,8,9,10]
def worker(data):
for item in data:
final_list.append(item)
process_1=multiprocessing.Process(target=worker,args=[input_list_one])
process_2=multiprocessing.Process(target=worker,args=[input_list_two])
process_1.start()
process_2.start()
process_1.join()
process_2.join()
print(final_list)
I am getting the following error:
Process SyncManager-1:
Traceback (most recent call last):
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 539, in _run_server
server = cls._Server(registry, address, authkey, serializer)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 139, in __init__
self.listener = Listener(address=address, backlog=16)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 438, in __init__
self._listener = SocketListener(address, family, backlog)
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 576, in __init__
self._socket.bind(address)
PermissionError: [Errno 13] Permission denied
Traceback (most recent call last):
File "/storage/emulated/0/qpython/.last_tmp.py", line 2, in <module>
manager=multiprocessing.Manager()
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/context.py", line 56, in Manager
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/managers.py", line 517, in start
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 250, in recv
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 407, in _recv_bytes
File "/data/user/0/org.qpython.qpy3/files/lib/python36.zip/multiprocessing/connection.py", line 383, in _recv
EOFError
1|u0_a823#land:/ $
I have written a custom environment so I can play around with reinforcement learning (PPO) and tf-agents.
This works fine if I wrap my env ( which inherits from py_environment.PyEnvironment) in a TfPyEnvironment, but fails if I try to wrap it into a ParallelPyEnvironment. I have tried playing around with all the keyword arguments of ParallelPyEnvironment but the code just runs up to the line and then nothing happens - no Exception, the program does not terminate etc.
Here is my code initialising the environment and showing off the working variant for the eval_env:
train_env = tf_py_environment.TFPyEnvironment(
ParallelPyEnvironment(
[CardGameEnv()] * hparams['parallel_environments']
)
)
# this works perfectly:
eval_env = tf_py_environment.TFPyEnvironment(CardGameEnv(debug=True))
If I terminate the script via CTRL+C, this is what is being output:
Traceback (most recent call last):
Traceback (most recent call last):
File "E:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\poker_logic\train.py", line 229, in <module>
File "<string>", line 1, in <module>
train(model_num=3)
File "C:\Python37\lib\multiprocessing\spawn.py", line 105, in spawn_main
File "E:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\poker_logic\train.py", line 64, in train
[CardGameEnv()] * hparams['parallel_environments']
exitcode = _main(fd)
File "E:\Users\tmp\AppData\Roaming\Python\Python37\site-packages\gin\config.py", line 1009, in wrapper
File "C:\Python37\lib\multiprocessing\spawn.py", line 113, in _main
preparation_data = reduction.pickle.load(from_parent)
KeyboardInterrupt
return fn(*new_args, **new_kwargs)
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 70, in __init__
self.start()
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 83, in start
env.start(wait_to_start=self._start_serially)
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 223, in start
self._process.start()
File "C:\Python37\lib\multiprocessing\process.py", line 112, in start
self._popen = self._Popen(self)
File "C:\Python37\lib\multiprocessing\context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python37\lib\multiprocessing\context.py", line 322, in _Popen
return Popen(process_obj)
File "C:\Python37\lib\multiprocessing\popen_spawn_win32.py", line 65, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python37\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 264, in __getattr__
return self._receive()
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 333, in _receive
message, payload = self._conn.recv()
File "C:\Python37\lib\multiprocessing\connection.py", line 250, in recv
buf = self._recv_bytes()
File "C:\Python37\lib\multiprocessing\connection.py", line 306, in _recv_bytes
[ov.event], False, INFINITE)
KeyboardInterrupt
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "C:\Python37\lib\site-packages\tf_agents\environments\parallel_py_environment.py", line 289, in close
self._process.join(5)
File "C:\Python37\lib\multiprocessing\process.py", line 139, in join
assert self._popen is not None, 'can only join a started process'
AssertionError: can only join a started process
From that I conclude that the thread ParallelPyEnvironment is trying to start does not do that, but since I'm not very experienced with threading in Python, I have no idea where to go from here, especially how to fix this.
Current training takes a long time and does not use my PC's capabilities at all (3GB of 32GB RAM used, processor at 3%, GPU barely working at all but VRAM full), so this should speed up training time significantly.
The solution is to pass in callables, not environments, so the ParallelPyEnvironment can construct them itself:
train_env = tf_py_environment.TFPyEnvironment(
ParallelPyEnvironment(
[CardGameEnv] * hparams['parallel_environments']
)
)
I was trying to read and output the shared dictionary element to a file. But I tested that I cannot access the shared dictionary and it will return error.
I wrote the following code but it only works if the dictionary object is not been shared (as I commented).
Could anyone suggest how to access the shared memory dictionary?
import multiprocessing as mp
sharedi = mp.Manager().dict()
#sharedi = {}
l1 = ['a','b','c','d']
l2 = ['b','b','c','e']
for i in range(0,2):
name = "abc"+str(i)
sharedi.update({name:[l1,l2]})
def writ(dirname):
outfile = open(dirname,'w')
for i in sharedi:
for object in sharedi[i][0]:
outfile.write(object)
print sharedi
p1 = mp.Process(target = writ,args=('d1',))
p2 = mp.Process(target = writ,args=('d2',))
p1.start()
p2.start()
# adding join() as suggeseted in the comment
p1.join()
p2.join()
I'm expecting to write the shared dictionary to files and this is the error message I get:
Process Process-3:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 114, in run
self.run()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "dict.py", line 14, in writ
self._target(*self._args, **self._kwargs)
File "dict.py", line 14, in writ
for i in sharedi:
for i in sharedi:
File "<string>", line 2, in __getitem__
File "<string>", line 2, in __getitem__
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 755, in _callmethod
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 755, in _callmethod
self._connect()
self._connect()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 742, in _connect
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 742, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
conn = self._Client(self._token.address, authkey=self._authkey)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/connection.py", line 169, in Client
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/connection.py", line 169, in Client
c = SocketClient(address)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/connection.py", line 308, in SocketClient
c = SocketClient(address)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/connection.py", line 308, in SocketClient
s.connect(address)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 228, in meth
s.connect(address)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 228, in meth
return getattr(self._sock,name)(*args)
error: [Errno 2] No such file or directory
return getattr(self._sock,name)(*args)
error: [Errno 2] No such file or directory
EDIT:
adding as suggested in the comment, but still get error...
p1.join()
p2.join()
Below is the error message
Process Process-2:
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Process Process-3:
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "dict.py", line 14, in writ
self.run()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "dict.py", line 14, in writ
for i in sharedi:
File "<string>", line 2, in __getitem__
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 774, in _callmethod
for i in sharedi:
File "<string>", line 2, in __getitem__
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/multiprocessing/managers.py", line 774, in _callmethod
raise convert_to_error(kind, result)
KeyError: 0
raise convert_to_error(kind, result)
KeyError: 0
UPDATE: I just searched and realized that I cannot (maybe) iterate a shared dictionary object. One way to iterate it is to convert it to normal dict again. But in the program I'm going to write, that would take to much memory(each dict is about 4G). Is there any other way to iterate a shared dict object?