How do I add log handler in celery? - python

So I have my tasks.py here is an exerpt:
import builtins
import logging
import os
import urllib
import inspect
from celery import Celery
from common.environment_helper import EnvironmentHelper
from config import log
# Logging functionality
logger = logging.getLogger(__name__)
EnvironmentHelper.set_environment(logger)
app = Celery('tasks', broker=BROKER__URL, broker_transport_options=BROKER_TRANSPORT_OPTIONS)
app.conf.CELERY_ACCEPT_CONTENT = ['json']
app.conf.CELERY_TASK_SERIALIZER = 'json'
app.conf.CELERYD_PREFETCH_MULTIPLIER = 1
app.log.already_setup=True
app.conf.CELERY_ENABLE_REMOTE_CONTROL = False
app.conf.CELERY_DEFAULT_QUEUE = queue_name
#app.task
def convert_file(file_conversion_json):
file_conversion_json_copy = file_conversion_json.copy()
So basically I want to take a value from the dictionary and add it to my log. I have done this successfully in my actual application by using the following code:
import uuid
import logging
import flask
# Generate a new request ID, optionally including an original request ID
def generate_request_id(original_id):
new_id = uuid.uuid4()
if not original_id:
return new_id
else:
new_id = original_id
return new_id
# Returns the current request ID or a new one if there is none
# In order of preference:
# * If we've already created a request ID and stored it in the flask.g context local, use that
# * If a client has passed in the X-Request-Id header, create a new ID with that prepended
# * Otherwise, generate a request ID and store it in flask.g.request_id
def request_id():
if getattr(flask.g, 'request_id', None):
return flask.g.request_id
headers = flask.request.headers
original_request_id = headers.get("X-Request-Id")
new_uuid = generate_request_id(original_request_id)
flask.g.request_id = new_uuid
return new_uuid
class RequestIdFilter(logging.Filter):
# This is a logging filter that makes the request ID available for use in
# the logging format. Note that we're checking if we're in a request
# context, as we may want to log things before Flask is fully loaded.
def filter(self, record):
record.request_id = request_id() if flask.has_request_context() else ''
return True
............
# log.py
import logging.config
import os
LOGGER_CONFIGURATION = {
'version': 1,
'filters': {
'request_id': {
'()': 'config.utils.RequestIdFilter',
},
},
'formatters': {
'standard': {
'format': '%(asctime)s - %(name)s.%(module)s.%(funcName)s:%(lineno)d - %(levelname)s - %(request_id)s - %(message)s',
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'level': 'INFO',
'filters': ['request_id'],
'formatter': 'standard'
}
},
'loggers': {
'': {
'handlers': ['console'],
'level':'INFO',
},
'app': {
'handlers': ['console'],
'level':'INFO',
},
}
}
logging.config.dictConfig(LOGGER_CONFIGURATION)
But it doesn't work for celery and I have no idea how to add a variable on the fly like that. Any advice?

So I figured out that if I add this to my task:
logFormatter = logging.Formatter('%(asctime)s - %(name)s.%(module)s.%(funcName)s:%(lineno)d - %(levelname)s - Request ID:'+ file_conversion_json['x_request_id'] + ' - %(message)s')
rootLogger = logging.getLogger()
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logFormatter)
rootLogger.addHandler(consoleHandler)
It will add what I want to the log output, but now it duplicated my log messages, I am getting each message twice in the log output....

Related

Can't add username to logging record using Middleware

I'm trying to log (by default) username and project (which can be decided from request object). I don't want to add context to every log manually.
The problem is that I can't make Django to add request or straight username and project to the LogRecord. I tried tens of ways.
This is my code:
middlewares.py
import threading
local = threading.local()
class LoggingRequestMiddleware:
def __init__(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def __call__(self, request):
# Code to be executed for each request before
# the view (and later middleware) are called.
setattr(local, 'request', request)
response = self.get_response(request)
# Code to be executed for each request/response after
# the view is called.
return response
settings.py
def add_username_to_log(record):
local = threading.local()
record.username = '-'
request = getattr(local,'request',None)
print(request)
return True
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': LOGGING_VERBOSE_FORMAT,
'style': '{',
},
},
'filters': {
'context_filter': {
'()': 'django.utils.log.CallbackFilter',
'callback': add_username_to_log,
},
},
'handlers': {
'console': {
'level': DEFAULT_LOG_LEVEL,
'class': 'logging.StreamHandler',
'formatter': 'verbose',
'filters': ['context_filter'],
},
'file_main': {
'level': DEFAULT_LOG_LEVEL,
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_PATH, 'main.log'),
'maxBytes': DEFAULT_LOG_SIZE,
'formatter': 'verbose',
'filters': ['context_filter'],
'backupCount': 0,
},
},
'loggers': {
'': {
'handlers': ['file_main'],
'level': DEFAULT_LOG_LEVEL,
'propagate': False,
},
},
}
But the request object is always None. Do you know why?
threading.local() returns a new object every time, you have to read and write to the same object.
locals_a = threading.local()
locals_a.foo = 1
hasattr(locals_a, 'foo') # True
locals_b = threading.local()
hasattr(locals_b, 'foo') # False
You need to define your locals object in 1 place that you can then import everywhere you need to access the request and read and write to that object every time. As a basic example this should work
def add_username_to_log(record):
from middleware import local
request = getattr(local,'request',None)

python: logging: can we added multiple filters to the logger and which one is considered

I am trying to understand how multiple Filters (one defined in config and other in the code) in Python logging work.
I am working on a Django project and below is my logger config in settings.py
My goal is to switch on and switch off the logger whenever i want. So using filters i am trying to switch off the logger by returning False (0)
1) Switch off the logger in the starting
class StartFilter(object):
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0 for no, nonzero for
yes. If deemed appropriate, the record may be modified in-place.
"""
return 0
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': '%(levelname)s %(funcName)s() %(pathname)s[:%(lineno)s] %(name)s \n%(message)s'
}
},
'handlers': {
'console': {
'level': 'DEBUG',
'formatter': 'verbose',
'class': 'logging.StreamHandler',
},
},
'filters': {
'myfilter': {
'()': StartFilter,
}
},
'loggers': {
'log_testing': {
'handlers': ['console'],
'level': 'DEBUG',
'propagate': False,
'filters': ['myfilter']
},
}
}
I have added the filter to the logger. 'filters': ['myfilter']
2) Switch on and off the logger in the views.py file where i want the logging to be seen
# to switch on logger
class LoggerGateStart(object):
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0 for no, nonzero for
yes. If deemed appropriate, the record may be modified in-place.
"""
return 1
# to switch off logger
class LoggerGateStop(object):
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0 for no, nonzero for
yes. If deemed appropriate, the record may be modified in-place.
"""
return 0
import logging
logger = logging.getLogger("log_testing")
...
logging.debug("Some text Before) # i dont want this to be logged
...
gatestart = LoggerGateStart()
logger_database.addFilter(gatestart)
...
logging.debug("Some text) # i want this to be logged
...
gatestop = LoggerGateStop()
logger_database.addFilter(gatestop)
...
logging.debug("Some text after") # i dont want this to be logged even
if it exist
...
I found its not working this way. It considers the StartFilter only and not consider LoggerGateStart or LoggerGateStop and does not print any log to the console
How can i do this
ANSWER I USED BASED ON THE ANSWER OF Gabriel C
My goal was to log sql using django django.db.backends. But the problem with it is that it will log all the sqls. I want to log only sqls in a particular section of a code or whereever i want to see the sqls. So the following way i could do it.
logging config inside settings.py:
# Filter class to stop or start logging for "django.db.backends"
class LoggerGate:
def __init__(self, state='closed'):
# We found that the settings.py runs twice and the filters are created twice. So we have to keep only one. So we delete all the previous filters before we create the new one
import logging
logger_database = logging.getLogger("django.db.backends")
try:
for filter in logger_database.filters:
logger_database.removeFilter(filter)
except Exception as e:
pass
self.state = state
def open(self):
self.state = 'open'
def close(self):
self.state = 'closed'
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0/False for no, nonzero/True for
yes. If deemed appropriate, the record may be modified in-place.
"""
return self.state == 'open'
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'sql': {
'class': 'logging.StreamHandler',
'level': 'DEBUG',
}
},
'filters': {
'myfilter': {
'()': LoggerGate,
}
},
'loggers': {
'django.db.backends': {
'handlers': ['sql'],
'level': 'DEBUG',
'propagate': False,
'filters': ['myfilter']
}
}
}
Then in the views.py
import logging
logger = logging.getLogger(__name__)
logger_database = logging.getLogger("django.db.backends")
def test1(request):
logger_database.filters[0].open()
#Will allow priting of sql satatements from here
from django import db
user_set = User.objects.all()
for user in user_set: # Here sql is executed and is printed to console
pass
#Will stop priting of sql satatements after this
logger_database.filters[0].close()
from django import db
user_set = User.objects.all()
for user in user_set: # Here sql is executed and is not printed to console
pass
now = datetime.datetime.now()
html = "<html><body>Internal purpose</body></html>"
return HttpResponse(html)
If one wants to print the sql in formatted and colorful way use this in the settings.py
# SQL formatter to be used for the handler used in logging "django.db.backends"
class SQLFormatter(logging.Formatter):
def format(self, record):
# Check if Pygments is available for coloring
try:
import pygments
from pygments.lexers import SqlLexer
from pygments.formatters import TerminalTrueColorFormatter
except ImportError:
pygments = None
# Check if sqlparse is available for indentation
try:
import sqlparse
except ImportError:
sqlparse = None
# Remove leading and trailing whitespaces
sql = record.sql.strip()
if sqlparse:
# Indent the SQL query
sql = sqlparse.format(sql, reindent=True)
if pygments:
# Highlight the SQL query
sql = pygments.highlight(
sql,
SqlLexer(),
#TerminalTrueColorFormatter(style='monokai')
TerminalTrueColorFormatter()
)
# Set the record's statement to the formatted query
record.statement = sql
return super(SQLFormatter, self).format(record)
# Filter class to stop or start logging for "django.db.backends"
class LoggerGate:
def __init__(self, state='closed'):
# We found that the settings.py runs twice and the filters are created twice. So we have to keep only one. So we delete all the previous filters before we create the new one
import logging
logger_database = logging.getLogger("django.db.backends")
try:
for filter in logger_database.filters:
logger_database.removeFilter(filter)
except Exception as e:
pass
self.state = state
def open(self):
self.state = 'open'
def close(self):
self.state = 'closed'
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0/False for no, nonzero/True for
yes. If deemed appropriate, the record may be modified in-place.
"""
return self.state == 'open'
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'sql': {
'()': SQLFormatter,
'format': '[%(duration).3f] %(statement)s',
}
},
'handlers': {
'sql': {
'class': 'logging.StreamHandler',
'formatter': 'sql',
'level': 'DEBUG',
}
},
'filters': {
'myfilter': {
'()': LoggerGate,
}
},
'loggers': {
'django.db.backends': {
'handlers': ['sql'],
'level': 'DEBUG',
'propagate': False,
'filters': ['myfilter']
}
}
}
Create only one filter and use it's instance to control if logs should be accepted or not.
from logging import getLogger
class LoggerGate(object):
def __init__(self):
self.started = False
def start(self):
self.started = True
def stop(self):
self.started = False
def filter(self, record):
"""
Determine if the specified record is to be logged.
Returns True is this LoggerGate is started, False otherwise.
"""
return self.started
logger_database = getLogger("log_testing")
logger_gate = LoggerGate()
logger_database.addFilter(logger_gate)
logger_database.critical('this is not logged')
logger_gate.start()
logger_database.critical('this is logged')
logger_gate.stop()
logger_database.critical('this is not logged')
Gabriel C gives a great solution. And to explain more, filter works in sequence, which means the record is passed to each filter one by one. And will stop at the one returns a zero. So as your StartFilter returns 0, it will directly drop all records.

python: How to start and stop a logger whenever i want

I am trying to log sql statements in a code in my Django Application
Currently i am using the following logger config in my settings.py
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'sql': {
'()': SQLFormatter,
'format': '[%(duration).3f] %(statement)s',
},
'verbose': {
'format': '%(levelname)s %(funcName)s() %(pathname)s[:%(lineno)s] %(name)s \n%(message)s'
}
},
'handlers': {
'console': {
'level': 'DEBUG',
'formatter': 'verbose',
'class': 'logging.StreamHandler',
},
'sql': {
'class': 'logging.StreamHandler',
'formatter': 'sql',
'level': 'DEBUG',
}
}
}
In genereal to log sql in django we can add the django.db.backends to the logger.config in the settings.py
'loggers': {
'django.db.backends': {
'handlers': ['sql'],
'level': 'DEBUG',
'propagate': False,
},
But the problem is it will log every sql statement. So how can we start and stop logging for django.db.backends in between code.
I have the following code in my views.py
def someview(request)
# start logging from here
user_set = User.objects.all()
for user in user_set:
print(user.last_name)
# stop logging from here
Also I want to use the sql handler which I defined in the logging config.
What code will go in start and stop logging place in the above view function.
Create a filter class and add an instance to the logger or handler.
class LoggerGate:
def __init__(self, state='open'):
self.state = state
def open(self):
self.state = 'open'
def close(self):
self.state = 'closed'
def filter(self, record):
return self.state == 'open'
Create a filter, initialized in the 'closed' state.
Get the 'django.db.backends' logger and add the filter.
gate = LoggerGate('closed')
sql_logger = logging.getLogger('django.db.backends')
sql_logger.addFilter(gate)
Then call the open or close method to limit logging to where you want it.
def someview(request)
gate.open() # start logging from here
user_set = User.objects.all()
for user in user_set:
print(user.last_name)
gate.close() # stop logging here
Just summarizing from the above answer and also from the answer of Gabriel C, which both are same and also from the answer of Sraw
My goal was to log sql using django django.db.backends. But the problem with it is that it will log all the sqls. I want to log only sqls in a particular section of a code or whereever i want to see the sqls. So the following way i could do it.
logging config inside settings.py:
# Filter class to stop or start logging for "django.db.backends"
class LoggerGate:
def __init__(self, state='closed'):
# We found that the settings.py runs twice and the filters are created twice. So we have to keep only one. So we delete all the previous filters before we create the new one
import logging
logger_database = logging.getLogger("django.db.backends")
try:
for filter in logger_database.filters:
logger_database.removeFilter(filter)
except Exception as e:
pass
self.state = state
def open(self):
self.state = 'open'
def close(self):
self.state = 'closed'
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0/False for no, nonzero/True for
yes. If deemed appropriate, the record may be modified in-place.
"""
return self.state == 'open'
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'sql': {
'class': 'logging.StreamHandler',
'level': 'DEBUG',
}
},
'filters': {
'myfilter': {
'()': LoggerGate,
}
},
'loggers': {
'django.db.backends': {
'handlers': ['sql'],
'level': 'DEBUG',
'propagate': False,
'filters': ['myfilter']
}
}
}
Then in the views.py
import logging
logger = logging.getLogger(__name__)
logger_database = logging.getLogger("django.db.backends")
def test1(request):
logger_database.filters[0].open()
#Will allow priting of sql satatements from here
from django import db
user_set = User.objects.all()
for user in user_set: # Here sql is executed and is printed to console
pass
#Will stop priting of sql satatements after this
logger_database.filters[0].close()
from django import db
user_set = User.objects.all()
for user in user_set: # Here sql is executed and is not printed to console
pass
now = datetime.datetime.now()
html = "<html><body>Internal purpose</body></html>"
return HttpResponse(html)
If one wants to print the sql in formatted and colorful way use this in the settings.py
# SQL formatter to be used for the handler used in logging "django.db.backends"
class SQLFormatter(logging.Formatter):
def format(self, record):
# Check if Pygments is available for coloring
try:
import pygments
from pygments.lexers import SqlLexer
from pygments.formatters import TerminalTrueColorFormatter
except ImportError:
pygments = None
# Check if sqlparse is available for indentation
try:
import sqlparse
except ImportError:
sqlparse = None
# Remove leading and trailing whitespaces
sql = record.sql.strip()
if sqlparse:
# Indent the SQL query
sql = sqlparse.format(sql, reindent=True)
if pygments:
# Highlight the SQL query
sql = pygments.highlight(
sql,
SqlLexer(),
#TerminalTrueColorFormatter(style='monokai')
TerminalTrueColorFormatter()
)
# Set the record's statement to the formatted query
record.statement = sql
return super(SQLFormatter, self).format(record)
# Filter class to stop or start logging for "django.db.backends"
class LoggerGate:
def __init__(self, state='closed'):
# We found that the settings.py runs twice and the filters are created twice. So we have to keep only one. So we delete all the previous filters before we create the new one
import logging
logger_database = logging.getLogger("django.db.backends")
try:
for filter in logger_database.filters:
logger_database.removeFilter(filter)
except Exception as e:
pass
self.state = state
def open(self):
self.state = 'open'
def close(self):
self.state = 'closed'
def filter(self, record):
"""
Determine if the specified record is to be logged.
Is the specified record to be logged? Returns 0/False for no, nonzero/True for
yes. If deemed appropriate, the record may be modified in-place.
"""
return self.state == 'open'
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'sql': {
'()': SQLFormatter,
'format': '[%(duration).3f] %(statement)s',
}
},
'handlers': {
'sql': {
'class': 'logging.StreamHandler',
'formatter': 'sql',
'level': 'DEBUG',
}
},
'filters': {
'myfilter': {
'()': LoggerGate,
}
},
'loggers': {
'django.db.backends': {
'handlers': ['sql'],
'level': 'DEBUG',
'propagate': False,
'filters': ['myfilter']
}
}
}

django database query log linenumber

I am logging my database queries in Django along with the pathname and linenumber.
Right now i am getting these logs:
07/Dec/2018 14:25:00 DEBUG django.db.backends utils **/Users/XXXXX/.idea/lib/python2.7/site-packages/django/db/backends/utils.py:89**
(0.340) SELECT "metadata"."metaname", "metadata"."description", "metadata"."attributes" FROM "metadata" WHERE "metadata"."metaname" = 'date_type'; args=('date_type',)
For all queries, I am getting the same path and line number. Is there any way I can capture the line number from my main application instead of the one from utils.
Current logging Implementation:
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'color'
},
},
'loggers': {
'django.db.backends': {
'handlers': ['console'],
'level': 'DEBUG',
'propogate': True,
}
}
}
Using python 2.7 and django 1.9
The slightly optimized version from User #will-keeling
Logging configuration for Django for output line numbers for each db request.
Note: If you want to use it for tests you need to set DEBUG=True for tests How do you set DEBUG to True when running a Django test?
import logging
import traceback
from django.conf import settings
class StackInfoHandler(logging.StreamHandler):
trim = 5
def emit(self, record):
super(StackInfoHandler, self).emit(record)
trace = traceback.format_stack()
stack1 = [str(row) for row in trace]
stack2 = [s for s in stack1 if settings.BASE_DIR in s and 'format_stack' not in s]
stack3 = [s for s in stack2 if 'test' not in s]
if not stack3:
stack3 = stack2 # include test call
if stack3:
stack4 = ''.join(stack3[-self.trim:]) # take only last records
stack5 = f"Stack {self.terminator} {''.join(stack4)}"
self.stream.write(stack5)
self.stream.write(self.terminator)
self.flush()
Logging Config (partitial)
LOGGING = {
'handlers': {
'db-console': {
'level': 'DEBUG',
'class': 'settings.local.StackInfoHandler', # Reference the custom handler
'formatter': 'simple',
},
'loggers': {
'django.db.backends': {
'handlers': ['db-console'],
'level': 'DEBUG',
'propagate': False
},
}
}
}
This will show you only stack trace from your Django codebase like below
[2020-05-25 17:49:17,977]: (0.000) INSERT INTO `contacts_contactscount` (`user_id`, `date`, `amount`) VALUES (338, '2020-05-25 17:49:17', 7); args=[338, '2020-05-25 17:49:17', 7]
Stack
File "<project-root>/api/views/contacts.py", line 164, in create
Contact.objects.filter(pk__in=to_delete).delete()
File "<project-root>/<folder>/contacts/models.py", line 54, in delete
create_deletion_log.delay(obj, deleted_timestamp)
File "<project-root>/<folder>/contacts/tasks.py", line 31, in create_deletion_log
contact.save()
File "<project-root>/<folder>/contacts/models.py", line 118, in save
Contact.objects.contacts_added_hook(self.user)
File "<project-root>/<folder>/contacts/models.py", line 67, in contacts_added_hook
current_total = user.profile.contacts_total
File "<project-root>/<folder>/profile/models.py", line 631, in contacts_total
ContactsCount.objects.create(user=self.user, amount=count)
I'm guessing that you're trying to determine which lines in your application are responsible for running which queries.
One way to achieve this would be to create a custom handler that prints out the current stack at the point where Django logs the query. That would allow you to see which line in your application is executing.
You could create a custom handler such as:
import logging
import traceback
class StackInfoHandler(logging.StreamHandler):
trim = 5
def emit(self, record):
super(StackInfoHandler, self).emit(record)
stack = ''.join(
str(row) for row in traceback.format_stack()[:-self.trim]
)
self.stream.write(stack)
And then in your logging config, you could just switch the handler class to use the StackInfoHandler:
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'my.package.StackInfoHandler', # Reference the custom handler
'formatter': 'color'
},
},
Note that the StackInfoHandler trims 5 lines off the stack so that it doesn't show you stack frames from the logging framework itself. You might need to tweak this number (5 works for me locally).

Python logging into file as a dictionary or JSON

I am trying to set up logging where I can log in both stdout and on to a file. This i have accomplished using the following code:
logging.basicConfig(
level=logging.DEBUG, format='%(asctime)-15s %(levelname)-8s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S', handlers=[logging.FileHandler(path), logging.StreamHandler()])
The output of this something like this:
2018-05-02 18:43:33,295 DEBUG Starting new HTTPS connection (1): google.com
2018-05-02 18:43:33,385 DEBUG https://google.com:443 "GET / HTTP/1.1" 301 220
2018-05-02 18:43:33,389 DEBUG Starting new HTTPS connection (1): www.google.com
2018-05-02 18:43:33,490 DEBUG https://www.google.com:443 "GET / HTTP/1.1" 200 None
What I am trying to accomplish is logging this output to a file not as it is printing to stdout, but as a dictionary or JSON object similar to something like this (while keeping the stdout as it is at the moment):
[{'time': '2018-05-02 18:43:33,295', 'level': 'DEBUG', 'message': 'Starting new HTTPS connection (1): google.com'}, {...}, {...}]
Is this doable? I understand that I can post process this log file after my process is finished, but I am looking for a more elegant solution because certain things i am logging are quite big objects themselves.
I too dealt with this and I personally believe that an external library might be an overkill for something like this.
I studied a bit the code behind logging.Formatter and came up with a subclass which in my case does the trick (my goal was to have a JSON file that Filebeat can read to further log into ElasticSearch).
Class:
import logging
import json
class JsonFormatter(logging.Formatter):
"""
Formatter that outputs JSON strings after parsing the LogRecord.
#param dict fmt_dict: Key: logging format attribute pairs. Defaults to {"message": "message"}.
#param str time_format: time.strftime() format string. Default: "%Y-%m-%dT%H:%M:%S"
#param str msec_format: Microsecond formatting. Appended at the end. Default: "%s.%03dZ"
"""
def __init__(self, fmt_dict: dict = None, time_format: str = "%Y-%m-%dT%H:%M:%S", msec_format: str = "%s.%03dZ"):
self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"}
self.default_time_format = time_format
self.default_msec_format = msec_format
self.datefmt = None
def usesTime(self) -> bool:
"""
Overwritten to look for the attribute in the format dict values instead of the fmt string.
"""
return "asctime" in self.fmt_dict.values()
def formatMessage(self, record) -> dict:
"""
Overwritten to return a dictionary of the relevant LogRecord attributes instead of a string.
KeyError is raised if an unknown attribute is provided in the fmt_dict.
"""
return {fmt_key: record.__dict__[fmt_val] for fmt_key, fmt_val in self.fmt_dict.items()}
def format(self, record) -> str:
"""
Mostly the same as the parent's class method, the difference being that a dict is manipulated and dumped as JSON
instead of a string.
"""
record.message = record.getMessage()
if self.usesTime():
record.asctime = self.formatTime(record, self.datefmt)
message_dict = self.formatMessage(record)
if record.exc_info:
# Cache the traceback text to avoid converting it multiple times
# (it's constant anyway)
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
message_dict["exc_info"] = record.exc_text
if record.stack_info:
message_dict["stack_info"] = self.formatStack(record.stack_info)
return json.dumps(message_dict, default=str)
Usage:
The formatter must simply be passed to the logging handler.
json_handler = FileHandler("foo.json")
json_formatter = JsonFormatter({"level": "levelname",
"message": "message",
"loggerName": "name",
"processName": "processName",
"processID": "process",
"threadName": "threadName",
"threadID": "thread",
"timestamp": "asctime"})
json_handler.setFormatter(json_formatter)
Explanation :
While the logging.Formatter takes a string which it interpolates to output the formatted log record, the JsonFormatter takes a dictionary where the key will be the key of the logged value in the JSON string and the value is a string corresponding to an attribute of the LogRecord that can be logged.
(List available in the docs here)
Main "problem" would be parsing dates and timestamps, and the default formatter implementation has these class attributes, default_time_format and default_msec_format.
default_msec_format gets passed to time.strftime() and default_msec_format is interpolated to append miliseconds as time.strftime() doesn't provide formatting options for those.
The principle is that those are now instance attributes which can be provided in the form of time_format and msec_format to customize how the parent's class (unchanged, as it's not overwritten) formatTime() method behaves.
You could technically override it if you want to customize time formatting, but I personally found that using something else would either be redundant or limit the actual formatting options. But feel free to adjust as to your needs.
Output:
An example JSON record logged by the formatting options above, with the default time formatting options set in the class, would be:
{"level": "INFO", "message": "Starting service...", "loggerName": "root", "processName": "MainProcess", "processID": 25103, "threadName": "MainThread", "threadID": 4721200640, "timestamp": "2021-12-04T08:25:07.610Z"}
So based on #abarnert, i found this Link which provided a good path to making this concept work for the most part. The code as it stands is:
logger=logging.getLogger()
logger.setLevel(logging.DEBUG)
file_handler=logging.FileHandler('foo.log')
stream_handler=logging.StreamHandler()
stream_formatter=logging.Formatter(
'%(asctime)-15s %(levelname)-8s %(message)s')
file_formatter=logging.Formatter(
"{'time':'%(asctime)s', 'name': '%(name)s', \
'level': '%(levelname)s', 'message': '%(message)s'}"
)
file_handler.setFormatter(file_formatter)
stream_handler.setFormatter(stream_formatter)
logger.addHandler(file_handler)
logger.addHandler(stream_handler)
Although it does not fully meet the requirement, it doesnt require any pre processing, and allows me to create two log handlers.
Afterwards, i can use something like:
with open('foo.log') as f:
logs = f.read().splitlines()
for l in logs:
for key, value in eval(l):
do something ...
to pull dict objects instead of fighting with improperly formatted JSON to accomplish what i had set out to accomplish.
Still am hoping for a more elegant solution.
I wanted to have JSON output, so that I could handle it nicer in Promtail and Loki. I just updated the formatter in my logging.json, which I use as a dictConfig
"formatters": {
"normalFormatter": {
"format": "{\"time\": \"%(asctime)s\", \"name\": \"[%(name)s]\", \"levelname\": \"%(levelname)s\", \"message\": \"%(message)s\"}"
}
}
Load the config and get the root logger like:
import json
import logging
# setup logger
with open("logging.json") as f:
config_dict = json.load(f)
logging.config.dictConfig(config_dict)
# get root logger
logger = logging.getLogger(__name__)
If you want to use dictConfig, please make sure you gave all necessary fields.
https://docs.python.org/3/library/logging.config.html
You may want to define a formatter, a handler (which is using the formatter) and the logger (which is using the handler)
e.g. logging.json
{
"version": 1,
"disable_existing_loggers": false,
"formatters": {
"normalFormatter": {
"format": "{\"time\": \"%(asctime)s\", \"name\": \"[%(name)s]\", \"levelname\": \"%(levelname)s\", \"message\": \"%(message)s\"}"
}
},
"handlers": {
"demohandler": {
"level": "INFO",
"formatter": "normalFormatter",
"class": "logging.handlers.TimedRotatingFileHandler",
"filename": "./files/logs/YourLogFile.log",
"when": "d",
"interval": 30,
"backupCount": 4,
"utc": true
}
},
"loggers": {
"root": {
"handlers": ["demohandler"],
"level": "INFO"
},
"someModule": {
"handlers": ["demohandler"],
"level": "INFO",
"propagate": 0
}
}
}
With this code you can add the full traceback, timestamp and level to a json file of choice.
import json
import traceback
from datetime import datetime
def addLogging(logDict:dict):
loggingsFile = 'loggings.json'
with open(loggingsFile) as f:
data = json.load(f)
data.append(logDict)
with open(loggingsFile, 'w') as f:
json.dump(data, f)
def currentTimeUTC():
return datetime.now().strftime('%d/%m/%Y %H:%M:%S')
try:
print(5 / 0)
except ZeroDivisionError:
fullTraceback = str(traceback.format_exc())
addLogging({'timestamp': currentTimeUTC(), 'level': 'error', 'traceback': fullTraceback})
Output:
[
{
"timestamp": "09/06/2020 17:38:00",
"level": "error",
"traceback": "Traceback (most recent call last):\n File \"d:testFullTraceback.py\", line 19, in <module>\n print(5/0)\nZeroDivisionError: division by zero\n"
}
]
I could achieve this result using this custom formatter:
import json
import logging
class CustomJsonFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
super(CustomJsonFormatter, self).format(record)
output = {k: str(v) for k, v in record.__dict__.items()}
return json.dumps(output)
cf = CustomJsonFormatter()
sh = logging.StreamHandler()
sh.setFormatter(cf)
logger = logging.getLogger("my.module")
logger.addHandler(sh)
# simple json output
logger.warning("This is a great %s!", "log")
# enrich json output
logger.warning("This is an even greater %s!", "log", extra={'foo': 'bar'})
Output:
{"name": "my.module", "msg": "This is a great %s!", "args": "('log',)", "levelname": "WARNING", "levelno": "30", "pathname": "/Users/olivier/test.py", "filename": "test.py", "module": "test", "exc_info": "None", "exc_text": "None", "stack_info": "None", "lineno": "20", "funcName": "<module>", "created": "1661868378.5048351", "msecs": "504.8351287841797", "relativeCreated": "1.3060569763183594", "thread": "4640826880", "threadName": "MainThread", "processName": "MainProcess", "process": "81360", "message": "This is a great log!"}
{"name": "my.module", "msg": "This is an even greater %s!", "args": "('log',)", "levelname": "WARNING", "levelno": "30", "pathname": "/Users/olivier/test.py", "filename": "test.py", "module": "test", "exc_info": "None", "exc_text": "None", "stack_info": "None", "lineno": "22", "funcName": "<module>", "created": "1661868378.504962", "msecs": "504.9619674682617", "relativeCreated": "1.4328956604003906", "thread": "4640826880", "threadName": "MainThread", "processName": "MainProcess", "process": "81360", "foo": "bar", "message": "This is an even greater log!"}
If you don't mind pip installing a module to help, there is the json_log_formatter module. The json output does have more attributes than requested. The repo mentions customizing the attributes outputted, but I have not yet integrated that into a working example just yet.
json_log_formatter
import logging
import json_log_formatter
# Set Basic Logging
self.loggers = logging.getLogger(__name__)
self.loggers.setLevel(logging.DEBUG)
self.formatter = logging.Formatter(fmt='%(asctime)-15s %(levelname)-8s %(message)s', datefmt = '%a, %d %b %Y %H:%M:%S')
# Config for JSON File Handler
self.logFileHandler = logging.FileHandler(SOME-PATH, mode='a')
self.fileFormatter = json_log_formatter.VerboseJSONFormatter()
self.logFileHandler.setFormatter(self.fileFormatter)
self.logFileHandler.setLevel(logging.INFO)
self.loggers.addHandler(self.logFileHandler)
# Config for Stream Handler
self.logStreamHandler = logging.StreamHandler()
self.logStreamHandler.setFormatter(self.formatter)
self.logStreamHandler.setLevel(logging.INFO)
self.loggers.addHandler(self.logStreamHandler)
I wanted to save full LogRecord object, so I can later inspect my log with maximum of integration with module. So I inspected object like that:
class Handler_json(Handler):
def emit(self, record: LogRecord) -> None:
json_data = {}
for attr in filter(lambda attr: not attr.endswith("__"), dir(record)):
json_data[attr] = record.__getattribute__(attr)
del json_data["getMessage"]
print(json_data)
this is subclass of Handler, emit is rewritten method that is been called with every LogRecord. Dir returns all attributes and methods of the object. I am excluding special methods, and also deleting getMessage method, whitch is not needed for json object representation.
This can be nicely integrated in logging like that:
logger = getLogger(__name__)
logger.setLevel(DEBUG)
handle_json = Handler_json()
logger.addHandler(handle_json)
logger.info("my info")
result looks like this:
{
'args': (),
'created': 1639925351.0648422,
'exc_info': None,
'exc_text': None,
'filename': 'my_logging.py',
'funcName': 'restore_log_from_disk',
'levelname': 'INFO',
'levelno': 20,
'lineno': 142,
'module': 'my_logging',
'msecs': 64.84222412109375,
'msg': 'my info',
'name': '__main__',
'pathname':
'/home/jindrich/PycharmProjects/my_logging.py',
'process': 146331,
'processName': 'MainProcess',
'relativeCreated': 1.6417503356933594,
'stack_info': None,
'thread': 140347192436544,
'threadName': 'MainThread'
}
then you can load data from disk and recreat objects after some digging in doc.
I was able to create it using python-json-logger lib, it was simple and really easily to use.
Django
from pythonjsonlogger import jsonlogger
##This is to add custom keys
class CustomJsonFormatter(jsonlogger.JsonFormatter):
def add_fields(self, log_record, record, message_dict):
super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)
log_record['level'] = record.levelname
# Logging
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'json': {
'()': CustomJsonFormatter, # if you want to use custom logs class defined above
# '()': jsonlogger.JsonFormatter, # without custom logs
'format': '%(level)s %(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s %(threadName)s %(special)s %(run)s\
%(name)s %(created)s %(processName)s %(relativeCreated)d %(funcName)s %(levelno)d %(msecs)d %(pathname)s %(lineno)d %(filename)s'
},
},
'handlers': {
'null': {
'class': 'logging.NullHandler',
},
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'json'
},
},
.....
}
Flask
from logging.config import dictConfig
from pythonjsonlogger import jsonlogger
import os
# This will set global root logging config across all the modules using in the app
##This is to add custom keys
class CustomJsonFormatter(jsonlogger.JsonFormatter):
def add_fields(self, log_record, record, message_dict):
super(CustomJsonFormatter, self).add_fields(log_record, record, message_dict)
log_record['level'] = record.levelname
def setup():
LOG_FILE = '/tmp/app/app.json'
if not os.path.exists(os.path.dirname(LOG_FILE)): # if LOG_FILE dir doesn't exist, creates it.
os.makedirs(os.path.dirname(LOG_FILE))
dictConfig({
'version': 1,
'formatters': {
'default': {
'format': '%(asctime)s - %(module)s - %(levelname)s - %(message)s',
},
'json': {
'()': CustomJsonFormatter, # if you want to use custom logs class defined above
# '()': jsonlogger.JsonFormatter, # without custom logs
'format': '%(level)s %(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s %(threadName)s %(special)s %(run)s\
%(name)s %(created)s %(processName)s %(relativeCreated)d %(funcName)s %(levelno)d %(msecs)d %(pathname)s %(lineno)d %(filename)s'
},
},
'handlers': {'file': {
'class': 'logging.handlers.RotatingFileHandler',
'filename': LOG_FILE,
'maxBytes': 10485760,
'backupCount': 5,
'formatter': 'json'
},
'console':{
'class':'logging.StreamHandler',
'formatter': 'json'
}},
'root': {
'level': 'INFO',
'handlers': ['file', 'console']
}
})
Hope this helps easy way setup.

Categories