Python Threading and Multiprocessing in the same code causing pickling error - python

While using Python3 on Windows 7 to process some large csv files I have run into a issue with the program not running fast enough. The original working version of the code is similar to below, but the process calls are both threads. Upon adding the multiprocessing library and transferring the tdg.Thread to the mp.Process as it shows below I receive this pickling error:
line 70, in <module>
proc1.start()
File "C:\Python34\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Python34\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python34\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Python34\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python34\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '_thread.lock'>: attribute lookup lock on _thread failed
Code:
import multiprocessing as mp
import threading as tdg
import queue as q
def my_p1func1(data, Q):
#performs LDAP for data set 1
print("p1f1:",data)
Q.put(data)
def my_p1func2(data, Q):
#performs LDAP for data set2
print("p1f2:",data)
Q.put(data)
def my_proc1(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP1Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP1Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
def my_p2func1(data, Q):
#perform gethostbyaddr() for data set 1
print("p2f1:",data)
Q.put(data)
def my_p2func2(data, Q):
#perform gethostbyaddr() for data set 2
print("p2f2:",data)
Q.put(data)
def my_proc2(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP2Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP2Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
dataIn = {'1': [1,2,3], '2': ['a','b','c']}
pq1 = q.Queue()
pq2 = q.Queue()
proc1 = mp.Process(target=my_proc1, args=(dataIn, pq1))
proc2 = mp.Process(target=my_proc2, args=(dataIn,pq2))
proc1.start()
proc2.start()
proc1.join()
proc2.join()
p1 = pq1.get()
p2 = pq2.get()
print(p1)
print(p2)
I though the issues was being caused by Locks I had around my print statements, but even after removing them it continues to throw the same pickling error.
I am in over my head with this and would appreciate any help understanding why it is attempting to pickle something not in use and how do I get this running so that it is more efficient?

You can't use a regular Queue.Queue object with multiprocessing. You have to use a multiprocessing.Queue. The standard Queue.Queue won't be shared between the processes, even if you were to make it picklable. It's an easy fix, though:
if __name__ == "__main__":
dataIn = {'1': [1,2,3], '2': ['a','b','c']}
pq1 = mp.Queue()
pq2 = mp.Queue()
proc1 = mp.Process(target=my_proc1, args=(dataIn, pq1))
proc2 = mp.Process(target=my_proc2, args=(dataIn, pq2))
proc1.start()
proc2.start()
proc1.join()
proc2.join()
p1 = pq1.get()
p2 = pq2.get()

Related

Sharing dictionary over multiprocesses (TypeError: cannot pickle 'weakref' object)

I want to create a class Storage where each object has a dictionary orderbooks as a property.
I want to write on orderbooks from the main process by invoking the method write, but I want to defer this action to another process and ensuring that the dictionary orderbooks is accessible from the main process.
To do so, I create a Mananger() that I pass during the definition of the object and that is used to notify the processes about the changes of the dictionary. My code is the following:
from multiprocessing import Process, Manager
class Storage():
def __init__(self,manager):
self.manager = manager
self.orderbooks = self.manager.dict()
def store_value(self,el):
self.orderbooks[el[0]] = el[1]
def write(self,el:list):
p = Process(target=self.store_value,args=(el,))
p.start()
if __name__ == '__main__':
manager=Manager()
book1 = Storage(manager)
book1.write([0,1])
However, when I run this code, I get the following error
Traceback (most recent call last):
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 21, in <module>
book1.write([0,1])
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 13, in write
p.start()
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
What is wrong with my code?
Per Aaron's posted comment:
from multiprocessing import Process, Manager
class Storage():
def __init__(self, orderbooks):
self.orderbooks = orderbooks
def store_value(self, el):
self.orderbooks[el[0]] = el[1]
def write(self, el: list):
p = Process(target=self.store_value, args=(el,))
p.start()
# Ensure we do not return until store_value has
# completed updating the dictionary:
p.join()
if __name__ == '__main__':
manager = Manager()
orderbooks = manager.dict()
book1 = Storage(orderbooks)
book1.write([0, 1])
print(orderbooks)
Prints:
{0: 1}

Python 3.9: multiprocessing process start() got an error| TypeError: cannot pickle 'weakref' object

I'm trying to decrease running time by using multiprocessing.
I got a weird error TypeError: cannot pickle 'weakref' object
I'm not quite sure why this error occurs because I also use this approach to run another program but it run normally. Can someone explain why this error occurs.
I already follow this Solution but it did not work for me.
import multiprocessing
from scipy import stats
import numpy as np
import pandas as pd
class T_TestFeature:
def __init__(self, data, classes):
self.data = data
self.classes = classes
self.manager = multiprocessing.Manager()
self.pval = self.manager.list()
def preform(self):
process = []
for i in range(10):
process.append(multiprocessing.Process(target=self.t_test, args=(i,)))
for p in process:
p.start()
for p in process:
p.join()
def t_test(self, k):
index_samples = np.array(self.data)[:,k]
rs1 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] == "Virginia"]
rs2 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] != "Virginia"]
self.pval.append(stats.ttest_ind(rs1, rs2, equal_var=False).pvalue)
def main():
df = pd.read_excel("/Users/xxx/Documents/Project/src/flattened.xlsx")
flattened = df.values.T
y = df.columns
result = T_TestFeature(flattened, y)
result.preform()
print(result.pval)
if __name__ == "__main__":
main()
Traceback (most recent call last):
File "/Users/xxx/Documents/Project/src/t_test.py", line 41, in <module>
main()
File "/Users/xxx/Documents/Project/src/t_test.py", line 37, in main
result.preform()
File "/Users/xxx/Documents/Project/src/t_test.py", line 21, in preform
p.start()
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/x/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 47, in _xxlaunch
reduction.dump(process_obj, fp)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Here is a simpler way to reproduce your issue:
from multiprocessing import Manager, Process
class A:
def __init__(self):
self.manager = Manager()
def start(self):
print("started")
if __name__ == "__main__":
a = A()
proc = Process(target=a.start)
proc.start()
proc.join()
You cannot pickle instances containing manager objects, because they contain reference to the manager process they started (therefore, in general you can't pickle instances containing objects of class Process).
A simple fix would be to not store the manager. It will automatically be garbage collected once no references to the managed list remains:
def __init__(self, data, classes):
self.data = data
self.classes = classes
manager = multiprocessing.Manager()
self.pval = manager.list()

Proper way to share a list between processes?

I wanted to set up two subprocesses in which subprocess1 keeps generating data (in type of list), and subprocess2 is in charge of processing the data sent from subprocess1.
I used multiprocessing.Manager().list() to create a shared list. But this is the error it reports:
FileNotFoundError: [WinError 2]
Code
I simplified the code as below:
ps: need to run it in terminal.
import multiprocessing as mp
import random
import time
def generator(a, b, tick): # simulating data collection,and a list will be generated at random and passed to another shared list.
counter = 0
while True:
time.sleep(1)
a.append([random.uniform(1,5), random.uniform(1,5), random.uniform(1,5), random.uniform(1,5)])
counter += 1
print('generate says', a[:])
if counter%5 == 0:
b.append(a[:])
tick.value = 1 # Telling 'printer' func to print.
for _ in a:
a.remove(_)
def printer(b, tick): # simulating data processing, and only printing data received from the 'generator' func here.
while True:
time.sleep(1)
if tick.value == 1:
time.sleep(1)
print('printer says', b[:])
tick.value = 0
for _ in b:
b.remove(_)
if __name__=='__main__':
tick=mp.Value('i', 0)
a = mp.Manager().list()
b = mp.Manager().list()
p1 = mp.Process(target=generator, args=(a, b, tick))
p2 = mp.Process(target=printer, args=(b, tick))
p1.start()
p2.start()
Error
Traceback (most recent call last):
File "d:\miniconda\lib\multiprocessing\process.py", line 297, in _bootstrap
self.run()
File "d:\miniconda\lib\multiprocessing\process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "D:\Program Files (x86)\onedrive\nilm\pi\redd\niubi.py", line 9, in generater
a.append([random.uniform(1,5),random.uniform(1,5),random.uniform(1,5),random.uniform(1,5)])
File "<string>", line 2, in append
File "d:\miniconda\lib\multiprocessing\managers.py", line 792, in _callmethod
self._connect()
File "d:\miniconda\lib\multiprocessing\managers.py", line 779, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "d:\miniconda\lib\multiprocessing\connection.py", line 490, in Client
c = PipeClient(address)
File "d:\miniconda\lib\multiprocessing\connection.py", line 691, in PipeClient
_winapi.WaitNamedPipe(address, 1000)
FileNotFoundError: [WinError 2] The system cannot find the file specified.
There were a few things to fix, but the primary issue is that you should include Process.join, as seen below:
import multiprocessing as mp
import random
import time
... # generator and printer definitions are unchanged
if __name__=='__main__':
manager = mp.Manager() # Create an instance of the manager
a = manager.list()
b = manager.list()
tick = mp.Value('i', 0)
p1 = mp.Process(target=generator, args=(tick, a, b))
p2 = mp.Process(target=printer, args=(tick, b))
p1.start()
p2.start()
p1.join() # Join, to ensure p1 and p2 end
p2.join()

program with python, but got an error _pickle.PicklingError

I wrote the code below:
import random, time, queue
from multiprocessing.managers import BaseManager
task_queue = queue.Queue()
result_queue = queue.Queue()
class QueueManager(BaseManager):
pass
QueueManager.register('get_task_queue', callable=lambda: task_queue)
QueueManager.register('get_result_queue', callable=lambda: result_queue)
manager = QueueManager(address=('', 5000), authkey=b'abd')
manager.start()
task = manager.get_task_queue()
result = manager.get_result_queue()
for i in range(10):
n = random.randint(0, 10000)
print('Put task %d...' % n)
task.put(n)
print('Try get result...')
for i in range(10):
r = result.get(timeout=10)
print('Result: %s' % r)
manager.shutdown()
print('master exit.')
but when it runs, I receive this error:
Traceback (most recent call last):
File "D:/PycharmProjects/test/task_master.py", line 23, in <module>
manager.start()
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\managers.py", line 479, in start
self._process.start()
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\tang_ke\AppData\Local\Programs\Python\Python35-32\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function <lambda> at 0x03A67C48>: attribute lookup <lambda> on __main__ failed
Process finished with exit code 1
I got answer in the site:讨论 - 廖雪峰的官方网站
step 1:
don't use "lambda" in "QueueManager.register",you have to replace a function,example:
def return_task_queue():
global task_queue
return task_queue
QueueManager.register('get_task_queue', callable=return_task_queue)
step 2:
you have to add IPAddress when you create "QueueManager",example:
QueueManager(address=('127.0.0.1', 5000), authkey=b'abc')
step 3:
you have to put all functions about queuemanager and task and result in a function...example:
def test():
QueueManager.register('get_task_queue', callable=return_task_queue)
QueueManager.register('get_result_queue', callable=return_result_queue)
manager = QueueManager(address=('127.0.0.1', 5000), authkey=b'abc')
manager.start()
....
and you have to use the function "test" in MAIN function,example:
if __name__ == '__main__':
test()

Python: How to call method in separate process

I want to start the ActorCore method in a seperte process and then process messages that come to that ActorCore. For some reason this code is not working.
import queue
from multiprocessing import Process
class NotMessage(Exception):
def __str__(self):
return 'NotMessage exception'
class Message(object):
def Do(self, Actor):
# Do some stuff to the actor
pass
def __str__(self):
return 'Generic message'
class StopMessage(Message):
def Do(self, Actor):
Actor.__stopped = True
def __str__(self):
return 'Stop message'
class Actor(object):
__DebugName = ''
__MsgQ = None
__stopped = False
def __init__(self, Name):
self.__DebugName = Name
self.__MsgQ = queue.Queue()
def LaunchActor(self):
p = Process(target=self.ActorCore)
p.start()
return self.__MsgQ
def ActorCore(self):
while not self.__stopped:
Msg = self.__MsgQ.get(block=True)
try:
Msg.Do(self)
print(Msg)
except NotMessage as e:
print(str(e), ' occurred in ', self.__DebugName)
def main():
joe = Actor('Joe')
msg = Message()
stop = StopMessage()
qToJoe = joe.LaunchActor()
qToJoe.put(msg)
qToJoe.put(msg)
qToJoe.put(stop)
if __name__ == '__main__':
main()
I am getting weird error when running:
Traceback (most recent call last):
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 64, in <module>
main()
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 58, in main
qToJoe = joe.LaunchActor()
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 40, in LaunchActor
p.start()
File "C:\Program Files\Python35\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python35\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects
Help please! I tried everything :(
Just use Queue instead of queue:
Remove import queue and add Queue to from multiprocessing like:
from multiprocessing import Process,Queue
then change self.__MsgQ = queue.Queue() to self.__MsgQ = Queue()
That's all you need to do to get it to work, the rest is the same for your case.
Edit, explanation:
queue.Queue is only thread-safe, and multiprocessing does actually spawn another process. Because of that, the additional multiprocessing.Queue is implemented to be also process-safe. As another option, if multithreading is wanted, the threading library can be used together with queue.Queue: https://docs.python.org/dev/library/threading.html#module-threading
Additional information:
Another parallelization option, depending on your further requirements is joblib, where the spawning can be defined to be either a process or a thread: https://joblib.readthedocs.io/

Categories