Python multiprocessing threads with return codes [duplicate] - python

I want to run something like this:
from multiprocessing import Pool
import time
import random
class Controler(object):
def __init__(self):
nProcess = 10
pages = 10
self.__result = []
self.manageWork(nProcess,pages)
def BarcodeSearcher(x):
return x*x
def resultCollector(self,result):
self.__result.append(result)
def manageWork(self,nProcess,pages):
pool = Pool(processes=nProcess)
for pag in range(pages):
pool.apply_async(self.BarcodeSearcher, args = (pag, ), callback = self.resultCollector)
print self.__result
if __name__ == '__main__':
Controler()
but the code result the error :
Exception in thread Thread-1:
Traceback (most recent call last):
File "C:\Python26\lib\threading.py", line 522, in __bootstrap_inner
self.run()
File "C:\Python26\lib\threading.py", line 477, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\python26\lib\multiprocessing\pool.py", line 225, in _handle_tasks
put(task)
PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed
I've seen the posts (post1 , post2) to solve my problem. I'm looking for something like Mike McKerns solution in the second post but without using pathos.

This works, using copy_reg, as suggested by Alex Martelli in the first link you provided:
import copy_reg
import types
import multiprocessing
def _pickle_method(m):
if m.im_self is None:
return getattr, (m.im_class, m.im_func.func_name)
else:
return getattr, (m.im_self, m.im_func.func_name)
copy_reg.pickle(types.MethodType, _pickle_method)
class Controler(object):
def __init__(self):
nProcess = 10
pages = 10
self.__result = []
self.manageWork(nProcess, pages)
def BarcodeSearcher(self, x):
return x*x
def resultCollector(self, result):
self.__result.append(result)
def manageWork(self, nProcess, pages):
pool = multiprocessing.Pool(processes=nProcess)
for pag in range(pages):
pool.apply_async(self.BarcodeSearcher, args=(pag,),
callback=self.resultCollector)
pool.close()
pool.join()
print(self.__result)
if __name__ == '__main__':
Controler()

Related

Python 3.9: multiprocessing process start() got an error| TypeError: cannot pickle 'weakref' object

I'm trying to decrease running time by using multiprocessing.
I got a weird error TypeError: cannot pickle 'weakref' object
I'm not quite sure why this error occurs because I also use this approach to run another program but it run normally. Can someone explain why this error occurs.
I already follow this Solution but it did not work for me.
import multiprocessing
from scipy import stats
import numpy as np
import pandas as pd
class T_TestFeature:
def __init__(self, data, classes):
self.data = data
self.classes = classes
self.manager = multiprocessing.Manager()
self.pval = self.manager.list()
def preform(self):
process = []
for i in range(10):
process.append(multiprocessing.Process(target=self.t_test, args=(i,)))
for p in process:
p.start()
for p in process:
p.join()
def t_test(self, k):
index_samples = np.array(self.data)[:,k]
rs1 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] == "Virginia"]
rs2 = [index_samples[i] for i in range(len(index_samples)) if self.classes[i] != "Virginia"]
self.pval.append(stats.ttest_ind(rs1, rs2, equal_var=False).pvalue)
def main():
df = pd.read_excel("/Users/xxx/Documents/Project/src/flattened.xlsx")
flattened = df.values.T
y = df.columns
result = T_TestFeature(flattened, y)
result.preform()
print(result.pval)
if __name__ == "__main__":
main()
Traceback (most recent call last):
File "/Users/xxx/Documents/Project/src/t_test.py", line 41, in <module>
main()
File "/Users/xxx/Documents/Project/src/t_test.py", line 37, in main
result.preform()
File "/Users/xxx/Documents/Project/src/t_test.py", line 21, in preform
p.start()
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/x/opt/anaconda3/lib/python3.9/multiprocessing/popen_spawn_posix.py", line 47, in _xxlaunch
reduction.dump(process_obj, fp)
File "/Users/xxx/opt/anaconda3/lib/python3.9/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
Here is a simpler way to reproduce your issue:
from multiprocessing import Manager, Process
class A:
def __init__(self):
self.manager = Manager()
def start(self):
print("started")
if __name__ == "__main__":
a = A()
proc = Process(target=a.start)
proc.start()
proc.join()
You cannot pickle instances containing manager objects, because they contain reference to the manager process they started (therefore, in general you can't pickle instances containing objects of class Process).
A simple fix would be to not store the manager. It will automatically be garbage collected once no references to the managed list remains:
def __init__(self, data, classes):
self.data = data
self.classes = classes
manager = multiprocessing.Manager()
self.pval = manager.list()

How do I stop an asyncio event loop from a child thread?

from cryptofeed import FeedHandler
from cryptofeed.feed import Feed
from cryptofeed.defines import L2_BOOK, BID, ASK
from cryptofeed.exchange.ftx import FTX
from threading import Thread
from time import sleep
class Executor:
def __init__(self, feed: Feed, coin_symbol: str, fut_symbol: str):
self.coin_symbol = coin_symbol
self.fut_symbol = fut_symbol
self.feed = feed
self.fh = FeedHandler()
self.loop = None
self._coin_top_book: dict = {}
self._fut_top_book: dict = {}
async def _book_update(self, feed, symbol, book, timestamp, receipt_timestamp):
if symbol == self.coin_symbol:
self._coin_top_book[BID] = book[BID].peekitem(-1)
self._coin_top_book[ASK] = book[ASK].peekitem(0)
elif symbol == self.fut_symbol:
self._fut_top_book[BID] = book[BID].peekitem(-1)
self._fut_top_book[ASK] = book[ASK].peekitem(0)
def start_feed(self):
self.fh.add_feed(self.feed(symbols=[self.fut_symbol, self.coin_symbol], channels=[L2_BOOK],
callbacks={L2_BOOK: self._book_update}))
self.fh.run()
def shoot(self):
# give the orderbooks time to be populated
while len(self._coin_top_book) == 0 or len(self._fut_top_book) == 0:
sleep(1)
for i in range(5):
print(self._coin_top_book)
sleep(1) # do some stuff
self.fh.stop()
def run(self):
th1 = Thread(target=self.shoot)
th1.start()
self.start_feed()
def main():
g = Executor(feed=FTX, coin_symbol='SOL-USD', fut_symbol='SOL-PERP')
g.run()
if __name__ == '__main__':
main()
So in my current attempt to stop this program, I call self.fh.stop() when things are finished inside shoot(). However, I get this error:
Exception in thread Thread-1:
Traceback (most recent call last):
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/threading.py", line 954, in _bootstrap_inner
self.run()
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/threading.py", line 892, in run
self._target(*self._args, **self._kwargs)
File "/Users/mc/Library/Application Support/JetBrains/PyCharmCE2021.2/scratches/scratch_1.py", line 43, in shoot
self.fh.stop()
File "/Users/mc/.virtualenvs/crypto/lib/python3.9/site-packages/cryptofeed/feedhandler.py", line 175, in stop
loop = asyncio.get_event_loop()
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/asyncio/events.py", line 642, in get_event_loop
raise RuntimeError('There is no current event loop in thread %r.'
RuntimeError: There is no current event loop in thread 'Thread-1'.
Presumably it's because I'm trying to access the event loop from the child thread whereas it only exists in the parent thread. However, I don't know how to handle this properly.

Prometheus counter not yielding in custom collector

I am writing this custome collector where I want to add a counter.
#!/usr/bin/env python3
import sys
import time
from prometheus_client import start_http_server
from prometheus_client.core import CollectorRegistry, Counter
class MyCollector():
def __init__(self):
self.mymetrics_counter = Counter('observability_total', 'Status of My Services', ['app', 'test'])
def describe(self):
print("Started: Metrics Collector!")
return list()
def collect(self):
self.mymetrics_counter.labels('observability', 'test').inc()
yield self.mymetrics_counter
if __name__ == '__main__':
try:
myregistry = CollectorRegistry()
myregistry.register(MyCollector())
start_http_server(port=9100, registry=myregistry)
while True:
time.sleep(10)
except KeyboardInterrupt:
print("Ended: Metrics Collector!")
sys.exit(0)
But I am getting below error upon yeild
(venv) test_collector % python mycollector.py
Started: Metrics Collector!
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/wsgiref/handlers.py", line 137, in run
self.result = application(self.environ, self.start_response)
File "/Users/myid/Documents/myproj/workspace/test_collector/venv/lib/python3.9/site-packages/prometheus_client/exposition.py", line 123, in prometheus_app
status, header, output = _bake_output(registry, accept_header, params)
File "/Users/myid/Documents/myproj/workspace/test_collector/venv/lib/python3.9/site-packages/prometheus_client/exposition.py", line 105, in _bake_output
output = encoder(registry)
File "/Users/myid/Documents/myproj/workspace/test_collector/venv/lib/python3.9/site-packages/prometheus_client/exposition.py", line 179, in generate_latest
mname = metric.name
AttributeError: ("'Counter' object has no attribute 'name'", prometheus_client.metrics.Counter(observability))
collect returns metric families, not metrics. If you yield each of the results of mymetrics_counter.collect() it'd work.
Also, when you create the Counter its getting registered to the default registry which you don't want in this soft of usage as it'll end up returned twice which is invalid.

Class variable in multiprocessing - python

Here is my code:
import multiprocessing
import dill
class Some_class():
class_var = 'Foo'
def __init__(self, param):
self.name = param
def print_name(self):
print("we are in object "+self.name)
print(Some_class.class_var)
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
list_names = [Some_class('object_1'), Some_class('object_2')]
pool = multiprocessing.Pool(processes=4)
results = [apply_async(pool, Some_class.print_name, args=(x,)) for x in list_names]
output = [p.get() for p in results]
print(output)
It returns error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp_obj_output_standard.py", line 18, in run_dill_encoded
return fun(*args)
File "C:/...temp_obj_output_standard.py", line 14, in print_name
print(Some_class.class_var)
NameError: name 'Some_class' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/...temp_obj_output_standard.py", line 31, in <module>
output = [p.get() for p in results]
File "C:/...temp_obj_output_standard.py", line 31, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'Some_class' is not defined
Process finished with exit code 1
The code works fine without line print(Some_class.class_var). What is wrong with accessing class variables, both objects should have it and I don't think processes should conflict about it. Am I missing something?
Any suggestions on how to troubleshoot it? Do not worry about run_dill_encoded and
apply_async, I am using this solution until I compile multiprocess on Python 3.x.
P.S. This is already enough, but stackoverflow wants me to put more details, not really sure what to put.

Python: How to call method in separate process

I want to start the ActorCore method in a seperte process and then process messages that come to that ActorCore. For some reason this code is not working.
import queue
from multiprocessing import Process
class NotMessage(Exception):
def __str__(self):
return 'NotMessage exception'
class Message(object):
def Do(self, Actor):
# Do some stuff to the actor
pass
def __str__(self):
return 'Generic message'
class StopMessage(Message):
def Do(self, Actor):
Actor.__stopped = True
def __str__(self):
return 'Stop message'
class Actor(object):
__DebugName = ''
__MsgQ = None
__stopped = False
def __init__(self, Name):
self.__DebugName = Name
self.__MsgQ = queue.Queue()
def LaunchActor(self):
p = Process(target=self.ActorCore)
p.start()
return self.__MsgQ
def ActorCore(self):
while not self.__stopped:
Msg = self.__MsgQ.get(block=True)
try:
Msg.Do(self)
print(Msg)
except NotMessage as e:
print(str(e), ' occurred in ', self.__DebugName)
def main():
joe = Actor('Joe')
msg = Message()
stop = StopMessage()
qToJoe = joe.LaunchActor()
qToJoe.put(msg)
qToJoe.put(msg)
qToJoe.put(stop)
if __name__ == '__main__':
main()
I am getting weird error when running:
Traceback (most recent call last):
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 64, in <module>
main()
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 58, in main
qToJoe = joe.LaunchActor()
File "C:/Users/plkruczp/PycharmProjects/ActorFramework/Actor/Actor.py", line 40, in LaunchActor
p.start()
File "C:\Program Files\Python35\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Program Files\Python35\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Program Files\Python35\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: can't pickle _thread.lock objects
Help please! I tried everything :(
Just use Queue instead of queue:
Remove import queue and add Queue to from multiprocessing like:
from multiprocessing import Process,Queue
then change self.__MsgQ = queue.Queue() to self.__MsgQ = Queue()
That's all you need to do to get it to work, the rest is the same for your case.
Edit, explanation:
queue.Queue is only thread-safe, and multiprocessing does actually spawn another process. Because of that, the additional multiprocessing.Queue is implemented to be also process-safe. As another option, if multithreading is wanted, the threading library can be used together with queue.Queue: https://docs.python.org/dev/library/threading.html#module-threading
Additional information:
Another parallelization option, depending on your further requirements is joblib, where the spawning can be defined to be either a process or a thread: https://joblib.readthedocs.io/

Categories