Logging to a single file from multiple processes in multiple modules - python

In Logging Cookbook I found Logging to a single file from multiple processes, I would like to use it with multiple modules, each module is a process, do you have any idea?
I used first code in the Logging to a single file from multiple processes.
Lets say the main in module, and worker_process in another, how to do that?
main.py:
def listener_configurer():
root = logging.getLogger()
h = logging.handlers.RotatingFileHandler('mptest.log', 'a', 300, 10)
f = logging.Formatter('%(asctime)s %(processName)-10s %(name)s %(levelname)-8s %(message)s')
h.setFormatter(f)
root.addHandler(h)
def listener_process(queue, configurer):
configurer()
while True:
try:
record = queue.get()
if record is None: # We send this as a sentinel to tell the listener to quit.
break
logger = logging.getLogger(record.name)
logger.handle(record) # No level or filter logic applied - just do it!
except Exception:
import sys, traceback
print('Whoops! Problem:', file=sys.stderr)
traceback.print_exc(file=sys.stderr)
def main():
queue = multiprocessing.Queue(-1)
listener = multiprocessing.Process(target=listener_process,
args=(queue, listener_configurer))
listener.start()
workers = []
for i in range(10):
worker = worker_process.TEST(queue)
workers.append(worker)
worker.start()
for w in workers:
w.join()
queue.put_nowait(None)
listener.join()
if __name__ == '__main__':
main()
worker_process.py
def worker_configurer(queue):
h = logging.handlers.QueueHandler(queue)
root = logging.getLogger()
root.addHandler(h)
root.setLevel(logging.DEBUG)
class TEST(multiprocessing.Process):
def __init__(self, queue, func=worker_configurer):
super(TEST, self).__init__()
self.queue = queue
self.func = func
self.LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING,
logging.ERROR, logging.CRITICAL]
self.LOGGERS = ['a.b.c', 'd.e.f']
self.MESSAGES = [
'Random message #1',
'Random message #2',
'Random message #3',
]
def run(self):
self.func(self.queue)
name = multiprocessing.current_process().name
print('Worker started: %s' % name)
for i in range(10):
time.sleep(random())
logger = logging.getLogger(choice(self.LOGGERS))
level = choice(self.LEVELS)
message = choice(self.MESSAGES)
logger.log(level, message)
print('Worker finished: %s' % name)
This is not work correctly, I would like to use the code (first code) in the doc with multiple modules, as I mentioned before.

Related

How to solve Producer/Consumer problem with ThreadPoolExecutor

I have set up a short consumer/producer class, maybe already here there are some remarks on improvement potential.
My question is: I have read in some remarks that this should be better done by using a ThreadPoolExecutor. I didn't find out yet how to implement starting and stopping of the consumer/producer thread
import logging
import random
from queue import Queue
from threading import Thread, Event
from time import sleep
class Updater:
def __init__(self):
self.update_queue = Queue(5)
self._logger = logging.getLogger(__name__)
self._producer = None
self._consumer = None
self.producer_running = Event()
self.consumer_running = Event()
def producer(self):
while self.producer_running.is_set():
item = random.randint(0, 10)
self.update_queue.put(item)
sleep(0.1*float(random.randint(0,10)))
def consumer(self):
while self.consumer_running.is_set() or not self.update_queue.empty():
item = self.update_queue.get()
sleep(0.2*float(random.randint(0,10)))
def start(self):
if not self.producer_running.is_set():
self.producer_running.set()
self._producer = Thread( target=self.producer)
self._producer.start()
self.consumer_running.set()
self._consumer = Thread( target=self.consumer)
self._consumer.start()
def stop(self):
self.producer_running.clear()
self._producer.join()
self.consumer_running.clear()
self._consumer.join()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
foo = Updater()
foo.start()
sleep(5)
foo.stop()
Update
I've created the stop and start method now with concurrent.futures , it works but I am not sure if that's how it is intended
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
def start(self):
if not self.producer_running.is_set():
self.producer_running.set()
self._producer = self.executor.submit(self.producer)
self.consumer_running.set()
self._consumer = self.executor.submit(self.consumer)
def stop(self):
self.producer_running.clear()
concurrent.futures.wait([self._producer])
self.consumer_running.clear()
concurrent.futures.wait([self._consumer])
self.executor.shutdown()

python thread not exiting with atexit

Here is my script. When I run it in a shell it just hangs indefinitely whereas I would expect it to terminate cleanly.
import logging
from logging import StreamHandler
import pymsteams
import queue
import threading
import atexit
class TeamsHandler(StreamHandler):
def __init__(self, channel_url):
super().__init__()
self.channel_url = channel_url
self.queue = queue.Queue()
self.thread = threading.Thread(target=self._worker)
self.thread.start()
atexit.register(self.queue.put, None)
def _worker(self):
while True:
record = self.queue.get()
if record is None:
break
msg = self.format(record)
print(msg)
def emit(self, record):
# enqueue the record to log and return control to the caller
self.queue.put(record)
if __name__ == "__main__":
my_logger = logging.getLogger('TestLogging')
my_logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
my_logger.addHandler(console_handler)
CHANNEL_ID = "not_used_anyway"
teamshandler = TeamsHandler(CHANNEL_ID)
teamshandler.setFormatter(logging.Formatter('%(levelname)s %(message)s'))
teamshandler.setLevel(logging.DEBUG)
my_logger.addHandler(teamshandler)
for i in range(1, 2):
my_logger.error(f"this is an error [{i}]")
my_logger.info(f"this is an info [{i}]")
The None record that should be sent by atexit (line 28) never arrives so the thread stays open forever.
How to make sure that the program exits cleanly by modifying the TeamsHandler only ?
I got something working, have a look:
import queue
import threading
class Worker:
def __init__(self):
self.queue = queue.Queue()
threading.Thread(target=self._worker).start()
def _worker(self):
print("starting thread")
while True:
record = self.queue.get()
if record is None:
print("exiting")
break
print(f"Got message: {record}")
def emit(self, record):
self.queue.put(record)
class Wrapper:
def __init__(self):
self._worker = Worker()
def __del__(self):
print("Wrapper is being deleted")
self._worker.emit(None)
def emit(self, record):
self._worker.emit(record)
def main():
worker = Wrapper()
worker.emit("foo")
worker.emit("bar")
print("main exits")
if __name__ == "__main__":
main()
The point here is that when main exits, worker (which is an instance of Wrapper) goes out of scope, and its __del__ method is called, and it sends stop message to a real worker object.
The results of running this code ("Got message" lines can be in different places, of course):
starting thread
main exits
Wrapper is being deleted
Got message: foo
Got message: bar
exiting
As pointed out by avysk, the problem is likely that atexit handlers fire too late, after the waiting for the non-daemon threads is already (supposed to be) done, which leads to deadlock.
If I were you, I'd just add a call like TeamsHandler.finish() at the end of if __name__ == '__main__' block, and modify TeamsHandler along these lines (untested):
_queues = []
class TeamsHandler(StreamHandler):
def __init__(self, channel_url):
super().__init__()
self.channel_url = channel_url
self.queue = queue.Queue()
self.thread = threading.Thread(target=self._worker)
self.thread.start()
_queues.append(self.queue)
def _worker(self):
while True:
record = self.queue.get()
if record is None:
break
msg = self.format(record)
print(msg)
def emit(self, record):
# enqueue the record to log and return control to the caller
self.queue.put(record)
#staticmethod
def finish(self):
for q in _queues:
q.put(None)
del _queues[:]

Python Multiprocessing Logging via QueueHandler

I have a Python multiprocessing application to which I would like to add some logging functionality.
The Python logging cookbook recommends using a Queue. Every process will put log records into it via the QueueHandler and a Listener Process will handle the records via a predefined Handler.
Here is the example provided by the Python logging cookbook:
# You'll need these imports in your own code
import logging
import logging.handlers
import multiprocessing
# Next two import lines for this demo only
from random import choice, random
import time
#
# Because you'll want to define the logging configurations for listener and workers, the
# listener and worker process functions take a configurer parameter which is a callable
# for configuring logging for that process. These functions are also passed the queue,
# which they use for communication.
#
# In practice, you can configure the listener however you want, but note that in this
# simple example, the listener does not apply level or filter logic to received records.
# In practice, you would probably want to do this logic in the worker processes, to avoid
# sending events which would be filtered out between processes.
#
# The size of the rotated files is made small so you can see the results easily.
def listener_configurer():
root = logging.getLogger()
h = logging.handlers.RotatingFileHandler('mptest.log', 'a', 300, 10)
f = logging.Formatter('%(asctime)s %(processName)-10s %(name)s %(levelname)-8s %(message)s')
h.setFormatter(f)
root.addHandler(h)
# This is the listener process top-level loop: wait for logging events
# (LogRecords)on the queue and handle them, quit when you get a None for a
# LogRecord.
def listener_process(queue, configurer):
configurer()
while True:
try:
record = queue.get()
if record is None: # We send this as a sentinel to tell the listener to quit.
break
logger = logging.getLogger(record.name)
logger.handle(record) # No level or filter logic applied - just do it!
except Exception:
import sys, traceback
print('Whoops! Problem:', file=sys.stderr)
traceback.print_exc(file=sys.stderr)
# Arrays used for random selections in this demo
LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING,
logging.ERROR, logging.CRITICAL]
LOGGERS = ['a.b.c', 'd.e.f']
MESSAGES = [
'Random message #1',
'Random message #2',
'Random message #3',
]
# The worker configuration is done at the start of the worker process run.
# Note that on Windows you can't rely on fork semantics, so each process
# will run the logging configuration code when it starts.
def worker_configurer(queue):
h = logging.handlers.QueueHandler(queue) # Just the one handler needed
root = logging.getLogger()
root.addHandler(h)
# send all messages, for demo; no other level or filter logic applied.
root.setLevel(logging.DEBUG)
# This is the worker process top-level loop, which just logs ten events with
# random intervening delays before terminating.
# The print messages are just so you know it's doing something!
def worker_process(queue, configurer):
configurer(queue)
name = multiprocessing.current_process().name
print('Worker started: %s' % name)
for i in range(10):
time.sleep(random())
logger = logging.getLogger(choice(LOGGERS))
level = choice(LEVELS)
message = choice(MESSAGES)
logger.log(level, message)
print('Worker finished: %s' % name)
# Here's where the demo gets orchestrated. Create the queue, create and start
# the listener, create ten workers and start them, wait for them to finish,
# then send a None to the queue to tell the listener to finish.
def main():
queue = multiprocessing.Queue(-1)
listener = multiprocessing.Process(target=listener_process,
args=(queue, listener_configurer))
listener.start()
workers = []
for i in range(10):
worker = multiprocessing.Process(target=worker_process,
args=(queue, worker_configurer))
workers.append(worker)
worker.start()
for w in workers:
w.join()
queue.put_nowait(None)
listener.join()
if __name__ == '__main__':
main()
My question: The example from the Python logging cookbook implies that the Queue has to be passed to every function that will be executed in multiprocessing mode. This sure works if you have a small application, but it gets ugly if you have a bigger programm. Is there a way to use something like a Singleton Queue that is created once via logging.config.dictConfig and then shared by all processes without having to pass it to every function?
In your case a few simple classes will do the trick.
Have a look and let me know if you need some further explanations or want something different.
import logging
import logging.handlers
import multiprocessing
import multiprocessing.pool
from random import choice, random
import time
class ProcessLogger(multiprocessing.Process):
_global_process_logger = None
def __init__(self):
super().__init__()
self.queue = multiprocessing.Queue(-1)
#classmethod
def get_global_logger(cls):
if cls._global_process_logger is not None:
return cls._global_process_logger
raise Exception("No global process logger exists.")
#classmethod
def create_global_logger(cls):
cls._global_process_logger = ProcessLogger()
return cls._global_process_logger
#staticmethod
def configure():
root = logging.getLogger()
h = logging.handlers.RotatingFileHandler('mptest.log', 'a', 1024**2, 10)
f = logging.Formatter('%(asctime)s %(processName)-10s %(name)s %(levelname)-8s %(message)s')
h.setFormatter(f)
root.addHandler(h)
def stop(self):
self.queue.put_nowait(None)
def run(self):
self.configure()
while True:
try:
record = self.queue.get()
if record is None:
break
logger = logging.getLogger(record.name)
logger.handle(record)
except Exception:
import sys, traceback
print('Whoops! Problem:', file=sys.stderr)
traceback.print_exc(file=sys.stderr)
def new_process(self, target, args=[], kwargs={}):
return ProcessWithLogging(self, target, args, kwargs)
def configure_new_process(log_process_queue):
h = logging.handlers.QueueHandler(log_process_queue)
root = logging.getLogger()
root.addHandler(h)
root.setLevel(logging.DEBUG)
class ProcessWithLogging(multiprocessing.Process):
def __init__(self, target, args=[], kwargs={}, log_process=None):
super().__init__()
self.target = target
self.args = args
self.kwargs = kwargs
if log_process is None:
log_process = ProcessLogger.get_global_logger()
self.log_process_queue = log_process.queue
def run(self):
configure_new_process(self.log_process_queue)
self.target(*self.args, **self.kwargs)
class PoolWithLogging(multiprocessing.pool.Pool):
def __init__(self, processes=None, context=None, log_process=None):
if log_process is None:
log_process = ProcessLogger.get_global_logger()
super().__init__(processes=processes, initializer=configure_new_process,
initargs=(log_process.queue,), context=context)
LEVELS = [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL]
LOGGERS = ['a.b.c', 'd.e.f']
MESSAGES = [
'Random message #1',
'Random message #2',
'Random message #3',
]
def worker_process(param=None):
name = multiprocessing.current_process().name
print('Worker started: %s' % name)
for i in range(10):
time.sleep(random())
logger = logging.getLogger(choice(LOGGERS))
level = choice(LEVELS)
message = choice(MESSAGES)
logger.log(level, message)
print('Worker finished: {}, param: {}'.format(name, param))
return param
def main():
process_logger = ProcessLogger.create_global_logger()
process_logger.start()
workers = []
for i in range(10):
worker = ProcessWithLogging(worker_process)
workers.append(worker)
worker.start()
for w in workers:
w.join()
with PoolWithLogging(processes=4) as pool:
print(pool.map(worker_process, range(8)))
process_logger.stop()
process_logger.join()
if __name__ == '__main__':
main()

zmq PUBhandler could not be used in __init__() method

I have a class MyLogger for sending messages to log server by using PUBhandler.
An exception gets raised when MyLogger is instanced in LogWorker.init() method (like version 1), however, it is ok if MyLogger is instanced in LogWorker.log_worker() method (version 2).
Any suggestions would be appreciated.
import logging
from multiprocessing import Process
import os
import random
import sys
import time
import zmq
from zmq.log.handlers import PUBHandler
class MyLogger(object):
''''''
def __init__(self, port, handler=None):
self.port = port
self.handler = handler or self._construct_sock_handler()
self.logger = logging.getLogger()
self.logger.setLevel(logging.INFO)
if not self.logger.handlers:
self.logger.addHandler(self.handler)
def _construct_sock_handler(self):
context = zmq.Context()
log_sock = context.socket(zmq.PUB)
log_sock.connect("tcp://127.0.0.1:%i" % self.port)
time.sleep(0.1)
handler = PUBHandler(log_sock)
return handler
def get_logger(self):
return self.logger
def sub_logger(port, level=logging.DEBUG):
ctx = zmq.Context()
sub = ctx.socket(zmq.SUB)
sub.bind('tcp://127.0.0.1:%i' % port)
sub.setsockopt(zmq.SUBSCRIBE, "")
logging.basicConfig(level=level)
while True:
level, message = sub.recv_multipart()
if message.endswith('\n'):
# trim trailing newline, which will get appended again
message = message[:-1]
log = getattr(logging, level.lower())
log(message)
class LogWorker(object):
def __init__(self):
- pass # version 1
+ self.logger = MyLogger(port).get_logger() # version 2
def log_worker(self, port):
- self.logger = MyLogger(port).get_logger() # version 1
print "starting logger at %i with level=%s" % (os.getpid(), logging.DEBUG)
while True:
level = logging.INFO
self.logger.log(level, "Hello from %i!" % os.getpid())
time.sleep(1)
if __name__ == '__main__':
if len(sys.argv) > 1:
n = int(sys.argv[1])
else:
n = 2
port = 5555
workers = [Process(target=LogWorker().log_worker, args=(port,)) for _ in range(n)]
[w.start() for w in workers]
try:
sub_logger(port)
except KeyboardInterrupt:
pass
finally:
[ w.terminate() for w in workers ]
answer from pyzmq owner minrk:
You cannot pass zmq contexts or sockets across the fork boundary that happens when you instantiate a subprocess with multiprocessing. You have to make sure that you create your Context after you are in the subprocess.
solution:
def work():
worker = LogWorker(port)
worker.log_worker()
workers = [ Process(target=work) for _ in range(n) ]

Logging with Multiprocessing in python

I am currently struggling to get a simple multiprocessing log working:
I am using the MultiProcessingLog from this answer: https://stackoverflow.com/a/894284/293195
I have a simple ConverterImage which should be able to spit out the log, which works, but the exceptions and tracelog never appear in the log?
Does somebody know whats the problem here?
import os, traceback,logging
import multiprocessing, threading, logging, sys, traceback
from multiprocessing import Pool, Manager
from logging import FileHandler
class MultiProcessingLog(logging.Handler):
def __init__(self, name, mode):
logging.Handler.__init__(self)
self._handler = FileHandler(name, mode)
self.queue = multiprocessing.Queue(-1)
t = threading.Thread(target=self.receive)
t.daemon = True
t.start()
def setFormatter(self, fmt):
logging.Handler.setFormatter(self, fmt)
self._handler.setFormatter(fmt)
def receive(self):
while True:
try:
record = self.queue.get()
self._handler.emit(record)
except (KeyboardInterrupt, SystemExit):
raise
except EOFError:
break
except:
traceback.print_exc(file=sys.stderr)
def send(self, s):
self.queue.put_nowait(s)
def _format_record(self, record):
# ensure that exc_info and args
# have been stringified. Removes any chance of
# unpickleable things inside and possibly reduces
# message size sent over the pipe
if record.args:
record.msg = record.msg % record.args
record.args = None
if record.exc_info:
dummy = self.format(record)
record.exc_info = None
return record
def emit(self, record):
try:
s = self._format_record(record)
self.send(s)
except (KeyboardInterrupt, SystemExit):
raise
except:
self.handleError(record)
def close(self):
self._handler.close()
logging.Handler.close(self)
class ConvertImage:
def __init__(self, logger=None):
self.logger = logger
def __call__(self,f):
self.process(f)
def process(self,f):
try:
logging.info("Process %i" % os.getpid() )
raise NameError("Stupid error")
except Exception as e:
logging.info("Exception: " + e.message)
exc_buffer = io.StringIO()
traceback.print_exc(file=exc_buffer)
logging.info(exc_buffer.getvalue())
raise e
except:
logging.info("Exception!")
exc_buffer = io.StringIO()
traceback.print_exc(file=exc_buffer)
logging.info(exc_buffer.getvalue())
raise
mpl = MultiProcessingLog("ImageProcessing.log", mode='w+')
mpl.setFormatter( logging.Formatter('%(asctime)s - %(lineno)d - %(levelname)-8s - %(message)s') )
logger = logging.getLogger()
logger.addHandler(mpl)
logger.setLevel(logging.DEBUG)
pool = Pool();
converter = ConvertImage()
# map converter.process function over all files
result = pool.map_async(converter, ["A","B","C"]);
pool.close();
pool.join()
logging.shutdown()
In ConvertImage, you use logging module functions instead of self.logger methods. Can you try the code bellow?
def process(self,f):
try:
self.logger.info("Process %i" % os.getpid() )
raise NameError("Stupid error")
except Exception as e:
self.logger.info("Exception: " + e.message)
exc_buffer = io.StringIO()
traceback.print_exc(file=exc_buffer)
self.logger.info(exc_buffer.getvalue())
raise e
except:
self.logger.info("Exception!")
exc_buffer = io.StringIO()
traceback.print_exc(file=exc_buffer)
self.logger.info(exc_buffer.getvalue())
raise

Categories