Handling KeyboardInterrupt gracefully when using multiple Processes and Queues - python

I have a producer/consumer system using Process and Queue from multiprocessing. The processes do catch the KeyboardInterrupt, and the final result dictionary is returned successfully. However, the processes keeps on spamming this error message:
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/nlp/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
send_bytes(obj)
File "/home/ubuntu/anaconda3/envs/nlp/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
self._send_bytes(m[offset:offset + size])
File "/home/ubuntu/anaconda3/envs/nlp/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header + buf)
File "/home/ubuntu/anaconda3/envs/nlp/lib/python3.7/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Which would indicate to me that somewhere there is something trying to be put on a Queue, unsuccessfully.
import signal
import sys
from multiprocessing import Process, Queue
from typing import Iterable
def do_work(task):
pass
def pull_worker(
worker_num, work_queue: Queue, result_queue: Queue
):
run = True
def signal_handle(_signal, frame):
nonlocal run
run = False
result_queue.put(None) # Signal to the consumer that the worker is finished
result_queue.close()
result_queue.join_thread()
work_queue.close()
work_queue.join_thread()
sys.exit(0)
signal.signal(signal.SIGINT, signal_handle)
signal.signal(signal.SIGTERM, signal_handle)
signal.signal(signal.SIGHUP, signal_handle)
while run:
try:
task = work_queue.get_nowait()
except work_queue.Empty:
break
else:
result = do_work(task)
if run:
result_queue.put((task, result))
result_queue.put(None) # Signal to the consumer that the worker is finished
def save_consumer(result_queue, final_result_queue, n_workers):
"""
Puts all results into the final result queue when all workers have finished
"""
n_finished_producers = 0
results = {}
def signal_handle(_signal, frame):
print("Caught keyboard interrupt in consumer")
signal.signal(signal.SIGINT, signal_handle)
signal.signal(signal.SIGTERM, signal_handle)
signal.signal(signal.SIGHUP, signal_handle)
while n_finished_producers < n_workers:
result = result_queue.get()
if result is None:
n_finished_producers += 1
continue
(task, task_result) = result
results[task] = task_result
final_result_queue.put(results)
final_result_queue.close()
final_result_queue.join_thread()
def main(
tasks: Iterable[str],
n_workers: int,
):
n_workers = min(len(tasks), n_workers)
# For tasks to be done
work_queue = Queue()
# For results as they are fetched from the workers
result_queue = Queue()
# For the final, single, result dictionary when workers are shut down
final_result_queue = Queue()
for task in tasks:
work_queue.put(task)
consumer = Process(
target=save_consumer, args=(result_queue, final_result_queue, n_workers)
)
consumer.start()
producers = []
for worker_num in range(n_workers):
proc = Process(
target=pull_worker,
args=(worker_num, work_queue, result_queue)
)
proc.start()
producers.append(proc)
try:
for proc in producers:
proc.join()
except (KeyboardInterrupt, BaseException, SystemExit) as e:
print(f"Caught interrupt in Main: {type(e)} {e}")
finally:
for proc in producers:
proc.join()
results = final_result_queue.get()
consumer.join()
return results

Related

How do I stop an asyncio event loop from a child thread?

from cryptofeed import FeedHandler
from cryptofeed.feed import Feed
from cryptofeed.defines import L2_BOOK, BID, ASK
from cryptofeed.exchange.ftx import FTX
from threading import Thread
from time import sleep
class Executor:
def __init__(self, feed: Feed, coin_symbol: str, fut_symbol: str):
self.coin_symbol = coin_symbol
self.fut_symbol = fut_symbol
self.feed = feed
self.fh = FeedHandler()
self.loop = None
self._coin_top_book: dict = {}
self._fut_top_book: dict = {}
async def _book_update(self, feed, symbol, book, timestamp, receipt_timestamp):
if symbol == self.coin_symbol:
self._coin_top_book[BID] = book[BID].peekitem(-1)
self._coin_top_book[ASK] = book[ASK].peekitem(0)
elif symbol == self.fut_symbol:
self._fut_top_book[BID] = book[BID].peekitem(-1)
self._fut_top_book[ASK] = book[ASK].peekitem(0)
def start_feed(self):
self.fh.add_feed(self.feed(symbols=[self.fut_symbol, self.coin_symbol], channels=[L2_BOOK],
callbacks={L2_BOOK: self._book_update}))
self.fh.run()
def shoot(self):
# give the orderbooks time to be populated
while len(self._coin_top_book) == 0 or len(self._fut_top_book) == 0:
sleep(1)
for i in range(5):
print(self._coin_top_book)
sleep(1) # do some stuff
self.fh.stop()
def run(self):
th1 = Thread(target=self.shoot)
th1.start()
self.start_feed()
def main():
g = Executor(feed=FTX, coin_symbol='SOL-USD', fut_symbol='SOL-PERP')
g.run()
if __name__ == '__main__':
main()
So in my current attempt to stop this program, I call self.fh.stop() when things are finished inside shoot(). However, I get this error:
Exception in thread Thread-1:
Traceback (most recent call last):
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/threading.py", line 954, in _bootstrap_inner
self.run()
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/threading.py", line 892, in run
self._target(*self._args, **self._kwargs)
File "/Users/mc/Library/Application Support/JetBrains/PyCharmCE2021.2/scratches/scratch_1.py", line 43, in shoot
self.fh.stop()
File "/Users/mc/.virtualenvs/crypto/lib/python3.9/site-packages/cryptofeed/feedhandler.py", line 175, in stop
loop = asyncio.get_event_loop()
File "/Users/mc/.pyenv/versions/3.9.1/lib/python3.9/asyncio/events.py", line 642, in get_event_loop
raise RuntimeError('There is no current event loop in thread %r.'
RuntimeError: There is no current event loop in thread 'Thread-1'.
Presumably it's because I'm trying to access the event loop from the child thread whereas it only exists in the parent thread. However, I don't know how to handle this properly.

Python asyncio: closing a socket and releasing an await sock_read()

I'm writing a small multi-user game. Users are logged on via console or sockets. I want to be able to kick out other users.
I use asyncio and wait for user input by calling await loop.sock_recv(sock, 256). Now, if some other user (say, from the console) closes the socket, the event crashes since select.select seems to have problems.
How can I kill a connection and release the sock_recv()?
Attached is a small(ish) MWE. It creates listening sockets and will accept a connection on port 4000. After that you can kill a connection by entering "x" on the console. logoff() is my attempt to kill the connection.
import asyncio
import socket
import sys
import threading
# ------ console input -------------------------------------------------------
async def _ainput(loop):
fut = loop.create_future()
def _run():
line = sys.stdin.readline().strip()
loop.call_soon_threadsafe(fut.set_result, line)
threading.Thread(target=_run, daemon=True).start()
return await fut
async def console_input_loop(loop):
while True:
inp = (await _ainput(loop)).strip()
print(f"[{inp.strip()}]")
if inp == "x":
logoff()
# ------ socket input --------------------------------------------------------
alive = True
async def socket_input_loop(loop, sock):
print(f"New connection")
global alive
while alive:
try:
inp = await loop.sock_recv(sock, 256)
except ConnectionResetError:
break
print(inp)
print("shutting down")
sock.shutdown(socket.SHUT_RDWR)
sock.close()
print(f"Connection closed")
alive = True
listen_addr = ('', 4000)
async def _run_server(loop, server):
server.bind(listen_addr)
server.listen(8)
server.setblocking(False)
while loop.is_running():
global sock
sock = (await loop.sock_accept(server))[0]
loop.create_task(socket_input_loop(loop, sock))
async def run_server4(loop):
await _run_server(loop, socket.socket(socket.AF_INET, socket.SOCK_STREAM))
async def run_server6(loop):
await _run_server(loop, socket.socket(socket.AF_INET6, socket.SOCK_STREAM))
def async_driver():
loop = asyncio.get_event_loop()
loop.create_task(console_input_loop(loop))
loop.create_task(run_server4(loop))
loop.create_task(run_server6(loop))
loop.run_forever()
print()
def logoff():
global alive
alive = False
#loop = asyncio.get_event_loop()
#try:
# key = loop._selector.get_key(sock.fileno())
#except KeyError:
# pass
#else:
# mask, (reader, writer) = key.events, key.data
# #loop._add_callback(reader)
# the next line is needed, otherwise we get:
# r, w, x = select.select(r, w, w, timeout)
# OSError: [WinError 10038] An operation was attempted on something that is not a socket
#loop.remove_reader(sock.fileno())
sock.shutdown(socket.SHUT_RDWR)
sock.close()
#reader._run()
async_driver()
I get this crash:
Traceback (most recent call last):
File "C:\xx7.py", line 91, in <module>
async_driver()
File "C:\xx7.py", line 69, in async_driver
loop.run_forever()
File "C:\Users\chris\Anaconda3\lib\asyncio\base_events.py", line 528, in run_forever
self._run_once()
File "C:\Users\chris\Anaconda3\lib\asyncio\base_events.py", line 1728, in _run_once
event_list = self._selector.select(timeout)
File "C:\Users\chris\Anaconda3\lib\selectors.py", line 323, in select
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File "C:\Users\chris\Anaconda3\lib\selectors.py", line 314, in _select
r, w, x = select.select(r, w, w, timeout)
OSError: [WinError 10038] An operation was attempted on something that is not a socket
The solution was simple: just shutdown() without close().

OSError: [Errno 12] Cannot allocate memory

I have implemented below code for multiprocessing that can handle multiple request concurrently but I'm getting below error. For that i use producer and consumer concept where producing putting process in queue and consumer consume that process and do some JOB.
Traceback (most recent call last):
p.start()
File "/usr/lib/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/usr/lib/python2.7/multiprocessing/forking.py", line 121, in __init__
self.pid = os.fork()
OSError: [Errno 12] Cannot allocate memory
queue = Queue()
lock = Lock()
producers = []
consumers = []
for frame in frames:
`enter code here`producers.extend([Process(target=self.producer, args=(queue, lock, frame)) for i in xrange(cpu_count())])
for i in range(50):
p = Process(target=self.consumer, args=(queue, lock))
p.daemon = True
consumers.append(p)
for p in producers:
#time.sleep(random.randint(0, 5))
p.start()
for c in consumers:
#time.sleep(random.randint(0, 5))
c.start()
# Like threading, we have a join() method that synchronizes our program
for p in producers:
p.join()
u_end = time.time()
print u_start, u_end
print('Parent process exiting...')

Python multiprocessing Deadlock using Queue

I have a python program like below.
from multiprocessing import Lock, Process, Queue, current_process
import time
lock = Lock()
def do_job(tasks_to_accomplish, tasks_that_are_done):
while not tasks_to_accomplish.empty():
task = tasks_to_accomplish.get()
print(task)
lock.acquire()
tasks_that_are_done.put(task + ' is done by ' + current_process().name)
lock.release()
time.sleep(1)
return True
def main():
number_of_task = 10
number_of_processes = 4
tasks_to_accomplish = Queue()
tasks_that_are_done = Queue()
processes = []
for i in range(number_of_task):
tasks_to_accomplish.put("Task no " + str(i))
# creating processes
for w in range(number_of_processes):
p = Process(target=do_job, args=(tasks_to_accomplish, tasks_that_are_done))
processes.append(p)
p.start()
# completing process
for p in processes:
p.join()
# print the output
while not tasks_that_are_done.empty():
print(tasks_that_are_done.get())
return True
if __name__ == '__main__':
main()
Sometimes program run perfectly but sometimes it gets stuck and doesn't complete. When quit manually, it produces following error.
$ python3 multiprocessing_example.py
Task no 0
Task no 1
Task no 2
Task no 3
Task no 4
Task no 5
Task no 6
Task no 7
Task no 8
Task no 9
^CProcess Process-1:
Traceback (most recent call last):
File "multiprocessing_example.py", line 47, in <module>
main()
File "multiprocessing_example.py", line 37, in main
p.join()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 121, in join
res = self._popen.wait(timeout)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/popen_fork.py", line 51, in wait
return self.poll(os.WNOHANG if timeout == 0.0 else 0)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/popen_fork.py", line 29, in poll
pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
self.run()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "multiprocessing_example.py", line 9, in do_job
task = tasks_to_accomplish.get()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/queues.py", line 94, in get
res = self._recv_bytes()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
buf = self._recv_bytes(maxlength)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
buf = self._recv(4)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
chunk = read(handle, remaining)
KeyboardInterrupt
Can someone tell me what is the issue with the program? I am using python 3.6.
Note: Lock is not needed around a Queue.
lock.acquire()
tasks_that_are_done.put(task + ' is done by ' + current_process().name)
lock.release()
Queue
The Queue class in this module implements all the required locking semantics.
Question: ... what is the issue with the program?
You are using Queue.empty() and Queue.get(),
such leads to Deadlock on calling join() because there is no guarantee that the empty() State don't change until get()
was reaching.
Deadlock prone:
while not tasks_to_accomplish.empty():
task = tasks_to_accomplish.get()
Instead of using empty/get, Pair use for instance:
import queue
while True:
try:
task = tasks_to_accomplish.get_nowait()
except queue.Empty:
break
else:
# Handle task here
...
tasks_to_accomplish.task_done()

Sometime pathos.multiprocessing.Pool can't be terminated correctly

I try to use pathos.multiprocessing.Pool in my project.
However, it will meet the following problem when I terminate the Pool.
I use CentOS 6.5, I'm not sure if it is caused by pathos.multiprocessing.Pool or other thing, can anyone help me on it?
Traceback (most recent call last):
File "/usr/local/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/usr/local/lib/python2.7/threading.py", line 1073, in run
self.function(*self.args, **self.kwargs)
File "receiver.py", line 132, in kill_clients
pool.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/pool.py", line 465, in terminate
self._terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/util.py", line 207, in __call__
res = self._callback(*self._args, **self._kwargs)
File "/usr/local/lib/python2.7/site-packages/multiprocess/pool.py", line 513, in _terminate_pool
p.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/process.py", line 137, in terminate
self._popen.terminate()
File "/usr/local/lib/python2.7/site-packages/multiprocess/forking.py", line 174, in terminate
os.kill(self.pid, signal.SIGTERM)
OSError: [Errno 3] No such process
The wired thing is that at the beginning, it works well. But when the 4th job is received, there will be such problem.
class Receiver:
def __init__(self):
....
self.results={}
def kill_clients(self, client_list, pool):
for client in client_list:
client.kill()
pool.terminate()
def process_result(self, result):
if result is None:
self.results = {}
return
res = result.split(':')
if len(res) != 4:
raise Exception("result with wrong format: %s" % result)
self.results['%s_%s' % (res[0], res[1])] = {"code": res[3], "msg": res[4]}
...
def handler(self, job):
self.lg.debug("Receive job in rtmp_start_handler.")
self.lg.debug("<%s>" % str(job))
# each client corresponding one process
cli_counts = job['count']
pool = Pool(processes=cli_counts)
clients = []
try:
for i in xrange(cli_counts):
rtmp_cli = RtmpClient(job['case'], i)
clients.append(rtmp_cli)
[pool.apply_async(client.run, callback=self.process_result)
for client in clients]
pool.close()
sleep(1)
self.lg.debug("All clients are started.")
t = Timer(
job['timeout'],
self.kill_clients,
args=(clients, pool)
)
t.start()
self.lg.debug("Timer is started. timeout %s s" % job['timeout'])
pool.join()
except Exception, e:
self.lg.warning("Exception occurred: %s" % e)
self.lg.warning(format_exc())
return "0"
# here the self.results shall be ready
return self.parse_results()
The OSError is not caused by the Pool but by my program issue.
When I use Popen to create a subprocess and exec ffmpeg, it will exit immediately(due to other problem), so when I try to kill the subprocess, it it not existed by then. That's why OSError will be raised.

Categories