I try to add a logging feature to my cloud function. And integrate the logging module with google-cloud-logging. Below is the code I deploy on cloud function:
import logging
import platform
import sys
from datetime import datetime
from google.cloud import logging as gcp_logging
from google.cloud.logging.handlers import CloudLoggingHandler
from google.cloud.logging.resource import Resource
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
else:
# uncommnet this if you need initialzation everytime
cls._instances[cls].__init__(*args, **kwargs)
return cls._instances[cls]
class LogManager(object, metaclass=Singleton):
#TODO duplicate log issue
gcp_client = None
gcp_handler = None
log_file = ''
log_options = {
'console': {
'log_level': logging.DEBUG
}
}
default_fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(filename)s - %(lineno)s - %(message)s')
def __new__(cls, options=None):
print('new LOG manager')
return object.__new__(cls)
def __init__(self, options=None):
print('init LOG manager')
if options is not None:
self.log_options = options
if 'gcp' in self.log_options:
if platform.node() and platform.node() != 'localhost':
print('use auth_token when executed on local')
self.gcp_client = gcp_logging.Client.from_service_account_json('MY_SECRET.json')
else:
print('get environment default, use it when executed on cloud')
self.gcp_client = gcp_logging.Client()
def get_logger(self, logger_name, fmt=None, log_tags={}):
# there is a manager keeping all loggers for reusing
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
if logger.hasHandlers():
logger.handlers.clear()
if fmt is not None:
self.formatter = fmt
else:
self.formatter = self.default_fmt
if 'console' in self.log_options:
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(self.log_options['console']['log_level'])
ch.setFormatter(self.formatter)
logger.addHandler(ch) # add the handlers to the logger
if 'gcp' in self.log_options:
gcp_handler = CloudLoggingHandler(
client=self.gcp_client,
resource=Resource(
type=self.log_options['gcp']['log_type'],
labels=self.log_options['gcp']['labels']
)
)
gcp_handler.setFormatter(self.formatter)
gcp_handler.setLevel(self.log_options['gcp']['log_level'])
logger.addHandler(gcp_handler)
return logger
def stop_logging(self):
logging.shutdown()
def hello_world(request):
"""Responds to any HTTP request.
Args:
request (flask.Request): HTTP request object.
Returns:
The response text or any set of values that can be turned into a
Response object using
`make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
"""
mode = request.args.get('mode', default = 'gcp')
if mode=='console':
print('using default logger setup')
log_manager = LogManager() # use this for default setup
else:
print('using gcp logger setup')
log_manager = LogManager(
options={
'gcp': {
'log_level': logging.INFO,
'log_type': 'cloud_function',
'labels': {
'function_name': 'test_logging',
},
},
}
)
logger = log_manager.get_logger(__name__)
logger.debug('debug msg1')
logger.info('info msg1')
logger.warning('warning msg1')
logger.error('error msg1')
logger.critical('critical msg1')
logger = LogManager().get_logger(__name__)
logger.debug('debug msg2')
logger.info('info msg2')
logger.warning('warning msg2')
logger.error('error msg2')
logger.critical('critical msg2')
log_manager.stop_logging()
return 'ok'
With dependency google-cloud-logging==1.11.0 and google-api-core==1.22.2.
Here is the result when I use StreamHandler:
As you can see, there are 2 types of logging messages:
formatted log output from logger
pure log message
And both of these messages are automatically tagged with a random ID by GCP.
Here is another result when I use CloudLoggingHandler:
As you can see, there are 2 types of logging messages:
formatted log output from logger without random ID tagged by GCP
pure log message tagged with a random ID by GCP
My questions are:
How to resolve the duplicates issue? (either CloudLoggingHandler or StreamHandler)
How is the auto-tagging mechanism work? I am trying to keep these tags since they could be useful for searching (e.g. test_logging & r2b4rhft6z1i in the screenshot)
The last one is a little bit beyond this topic. Should I use singleton in this case? Am I use it right? Is there any improvement for this code snippet?
Sorry for asking so many questions at once. Any suggestion will be appreciated!
I have read Flask - How to store logs and add additional information and so on.
But I don't want to write code like extra={} everywhere.
I try custom logger of FlaskApp by use AppFormatter, but it dosen't work. Here is the code sample:
import logging
from flask import session, Flask
from logging.handlers import RotatingFileHandler
class AppFormatter(logging.Formatter):
def format(self, record):
# fixme: AppFormatter.format is not called
s = super(AppFormatter, self).format(record)
user_id = session.get('user_id', '?')
username = session.get('fullanme', '??')
msg = '{} - {} - {}'.format(s, user_id, username)
return msg
LOG_FORMAT = '[%(asctime)s]%(module)s - %(funcName)s - %(message)s'
defaultFormat = AppFormatter(LOG_FORMAT)
def initLogger(logger, **kwargs):
file = kwargs.pop('file', 'debug.log')
fmt = kwargs.pop('format', defaultFormat)
level = kwargs.pop('level', logging.DEBUG)
maxBytes = kwargs.pop('maxBytes', 10 * 1024 * 1024)
backupCount = kwargs.pop('backupCount', 5)
hdl_file = RotatingFileHandler(file, maxBytes=maxBytes, backupCount=backupCount)
hdl_file.setLevel(level)
logger.addHandler(hdl_file)
for hdl in logger.handlers:
hdl.setFormatter(fmt)
app = Flask(__name__)
initLogger(app.logger)
app.run()
Why AppFormatter.format is not called while app.logger stdout the messages ?
Try this out
class AppFormatter(logging.Formatter):
def format(self, record):
user_id = session.get('user_id', '?')
username = session.get('fullanme', '??')
record.msg = '{} - {} - {}'.format(record.getMessage(), user_id, username)
return super(AppFormatter, self).format(record)
I would like to use the following log format:
'format': '{"message": "%(message)s", "user": "%(user)s"}'
However, I would like to call it in two different ways:
log.info("hi", extra={"user": "asmith"})
log.info("hi")
The first log statement works because it provides the user argument, but the second one fails with a KeyError.
Is there any way to make a format string argument optional?
Optional format args replaced with None
Given a fixed logging format string, a custom Formatter class can be used to replace missing arguments with None.
import logging
import re
class CustomFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
arg_pattern = re.compile(r'%\((\w+)\)')
arg_names = [x.group(1) for x in arg_pattern.finditer(self._fmt)]
for field in arg_names:
if field not in record.__dict__:
record.__dict__[field] = None
return super().format(record)
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
formatter = CustomFormatter('{"message": "%(message)s", "user": "%(user)s"}')
handler.setFormatter(formatter)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
logger.info('hi')
logger.info('hi', extra={"user": "asmith"})
Output
{"message": "hi", "user": "None"}
{"message": "hi", "user": "asmith"}
Dynamically add extra args to logging output
A custom Formatter can dynamically update the format string based on the dictionary passed to extra.
import logging
class ExtraFormatter(logging.Formatter):
def format(self, record: logging.LogRecord) -> str:
default_attrs = logging.LogRecord(None, None, None, None, None, None, None).__dict__.keys()
extras = set(record.__dict__.keys()) - default_attrs
log_items = ['"message": "%(message)s"']
for attr in extras:
log_items.append(f'"{attr}": "%({attr})s"')
format_str = f'{{{", ".join(log_items)}}}'
self._style._fmt = format_str
return super().format(record)
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
formatter = ExtraFormatter()
handler.setFormatter(formatter)
logger.setLevel(logging.INFO)
logger.addHandler(handler)
logger.info('hi')
logger.info('hi', extra={"user": "asmith", "number": "42"})
Output
{"message": "hi"}
{"message": "hi", "user": "asmith", "number": "42"}
Right now I am using Flask and a flask 3rd party library Flask-Session
Using the code below, I reload the page 4 times and get the following output:
set userid[0]
127.0.0.1 - - [27/Sep/2014 22:28:35] "GET / HTTP/1.1" 200 -
set userid[1]
127.0.0.1 - - [27/Sep/2014 22:28:37] "GET / HTTP/1.1" 200 -
set userid[2]
127.0.0.1 - - [27/Sep/2014 22:28:37] "GET / HTTP/1.1" 200 -
set userid[3]
127.0.0.1 - - [27/Sep/2014 22:28:38] "GET / HTTP/1.1" 200 -
Code:
from flask import Flask, session
from flask.ext.session import Session
app = Flask(__name__)
sess = Session()
nextId = 0
def verifySessionId():
global nextId
if not 'userId' in session:
session['userId'] = nextId
nextId += 1
sessionId = session['userId']
print ("set userid[" + str(session['userId']) + "]")
else:
print ("using already set userid[" + str(session['userId']) + "]")
sessionId = session.get('userId', None)
return sessionId
#app.route("/")
def hello():
userId = verifySessionId()
return str(userId)
if __name__ == "__main__":
app.config['SECRET_KEY'] = 'super secret key'
app.config['SESSION_TYPE'] = 'filesystem'
sess.init_app(app)
app.debug = True
app.run()
Shouldn't session['userId] be 'saved out' each time I reload the page?
You need to have cookies enabled for sessions to work. Even Flask-Session cannot track a browser without those.
Flask-Session sets a cookie with a unique id, then later on finds your session data again by that cookie.
I'm seeking a way to let the python logger module to log to database and falls back to file system when the db is down.
So basically 2 things: How to let the logger log to database and how to make it fall to file logging when the db is down.
I recently managed to write my own database logger in Python. Since I couldn't find any example I thought I post mine here. Works with MS SQL.
Database table could look like this:
CREATE TABLE [db_name].[log](
[id] [bigint] IDENTITY(1,1) NOT NULL,
[log_level] [int] NULL,
[log_levelname] [char](32) NULL,
[log] [char](2048) NOT NULL,
[created_at] [datetime2](7) NOT NULL,
[created_by] [char](32) NOT NULL,
) ON [PRIMARY]
The class itself:
class LogDBHandler(logging.Handler):
'''
Customized logging handler that puts logs to the database.
pymssql required
'''
def __init__(self, sql_conn, sql_cursor, db_tbl_log):
logging.Handler.__init__(self)
self.sql_cursor = sql_cursor
self.sql_conn = sql_conn
self.db_tbl_log = db_tbl_log
def emit(self, record):
# Set current time
tm = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(record.created))
# Clear the log message so it can be put to db via sql (escape quotes)
self.log_msg = record.msg
self.log_msg = self.log_msg.strip()
self.log_msg = self.log_msg.replace('\'', '\'\'')
# Make the SQL insert
sql = 'INSERT INTO ' + self.db_tbl_log + ' (log_level, ' + \
'log_levelname, log, created_at, created_by) ' + \
'VALUES (' + \
'' + str(record.levelno) + ', ' + \
'\'' + str(record.levelname) + '\', ' + \
'\'' + str(self.log_msg) + '\', ' + \
'(convert(datetime2(7), \'' + tm + '\')), ' + \
'\'' + str(record.name) + '\')'
try:
self.sql_cursor.execute(sql)
self.sql_conn.commit()
# If error - print it out on screen. Since DB is not working - there's
# no point making a log about it to the database :)
except pymssql.Error as e:
print sql
print 'CRITICAL DB ERROR! Logging to database not possible!'
And usage example:
import pymssql
import time
import logging
db_server = 'servername'
db_user = 'db_user'
db_password = 'db_pass'
db_dbname = 'db_name'
db_tbl_log = 'log'
log_file_path = 'C:\\Users\\Yourname\\Desktop\\test_log.txt'
log_error_level = 'DEBUG' # LOG error level (file)
log_to_db = True # LOG to database?
class LogDBHandler(logging.Handler):
[...]
# Main settings for the database logging use
if (log_to_db):
# Make the connection to database for the logger
log_conn = pymssql.connect(db_server, db_user, db_password, db_dbname, 30)
log_cursor = log_conn.cursor()
logdb = LogDBHandler(log_conn, log_cursor, db_tbl_log)
# Set logger
logging.basicConfig(filename=log_file_path)
# Set db handler for root logger
if (log_to_db):
logging.getLogger('').addHandler(logdb)
# Register MY_LOGGER
log = logging.getLogger('MY_LOGGER')
log.setLevel(log_error_level)
# Example variable
test_var = 'This is test message'
# Log the variable contents as an error
log.error('This error occurred: %s' % test_var)
Above will log both to the database and to the file. If file is not needed - skip the 'logging.basicConfig(filename=log_file_path)' line. Everything logged using 'log' - will be logged as MY_LOGGER. If some external error appears (i.e. in the module imported or something) - error will appear as 'root', since 'root' logger is also active, and is using the database handler.
Write yourself a handler that directs the logs to the database in question. When it fails, you can remove it from the handler list of the logger. There are many ways to deal with the failure-modes.
Python logging to a database with a backup logger
Problem
I had the same problem when I ran a Django project inside the server since sometimes you need to check the logs remotely.
Solution
First, there is a need for a handler for the logger to insert logs in to the database. Before that and since my SQL is not good, an ORM is needed that I choose SQLAlchemy.
model:
# models.py
from sqlalchemy import Column, Integer, String, DateTime, Text
from sqlalchemy.ext.declarative import declarative_base
import datetime
base = declarative_base()
class Log(base):
__tablename__ = "log"
id = Column(Integer, primary_key=True, autoincrement=True)
time = Column(DateTime, nullable=False, default=datetime.datetime.now)
level_name = Column(String(10), nullable=True)
module = Column(String(200), nullable=True)
thread_name = Column(String(200), nullable=True)
file_name = Column(String(200), nullable=True)
func_name = Column(String(200), nullable=True)
line_no = Column(Integer, nullable=True)
process_name = Column(String(200), nullable=True)
message = Column(Text)
last_line = Column(Text)
This is the crud for insertion into the database:
#crud.py
import sqlalchemy
from .models import base
from traceback import print_exc
class Crud:
def __init__(self, connection_string=f'sqlite:///log_db.sqlite3',
encoding='utf-8',
pool_size=10,
max_overflow=20,
pool_recycle=3600):
self.connection_string = connection_string
self.encoding = encoding
self.pool_size = pool_size
self.max_overflow = max_overflow
self.pool_recycle = pool_recycle
self.engine = None
self.session = None
def initiate(self):
self.create_engine()
self.create_session()
self.create_tables()
def create_engine(self):
self.engine = sqlalchemy.create_engine(self.connection_string)
def create_session(self):
self.session = sqlalchemy.orm.Session(bind=self.engine)
def create_tables(self):
base.metadata.create_all(self.engine)
def insert(self, instances):
try:
self.session.add(instances)
self.session.commit()
self.session.flush()
except:
self.session.rollback()
raise
def __del__(self):
self.close_session()
self.close_all_connections()
def close_session(self):
try:
self.session.close()
except:
print_exc()
else:
self.session = None
def close_all_connections(self):
try:
self.engine.dispose()
except:
print_exc()
else:
self.engine = None
The handler:
# handler.py
from logging import Handler, getLogger
from traceback import print_exc
from .crud import Crud
from .models import Log
my_crud = Crud(
connection_string=<connection string to reach your db>,
encoding='utf-8',
pool_size=10,
max_overflow=20,
pool_recycle=3600)
my_crud.initiate()
class DBHandler(Handler):
backup_logger = None
def __init__(self, level=0, backup_logger_name=None):
super().__init__(level)
if backup_logger_name:
self.backup_logger = getLogger(backup_logger_name)
def emit(self, record):
try:
message = self.format(record)
try:
last_line = message.rsplit('\n', 1)[-1]
except:
last_line = None
try:
new_log = Log(module=record.module,
thread_name=record.threadName,
file_name=record.filename,
func_name=record.funcName,
level_name=record.levelname,
line_no=record.lineno,
process_name=record.processName,
message=message,
last_line=last_line)
# raise
my_crud.insert(instances=new_log)
except:
if self.backup_logger:
try:
getattr(self.backup_logger, record.levelname.lower())(record.message)
except:
print_exc()
else:
print_exc()
except:
print_exc()
Test to check the logger:
# test.py
from logging import basicConfig, getLogger, DEBUG, FileHandler, Formatter
from .handlers import DBHandler
basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%d-%b-%y %H:%M:%S',
level=DEBUG)
format = Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
backup_logger = getLogger('backup_logger')
file_handler = FileHandler('file.log')
file_handler.setLevel(DEBUG)
file_handler.setFormatter(format)
backup_logger.addHandler(file_handler)
db_logger = getLogger('logger')
db_handler = DBHandler(backup_logger_name='backup_logger')
db_handler.setLevel(DEBUG)
db_handler.setFormatter(format)
db_logger.addHandler(db_handler)
if __name__ == "__main__":
db_logger.debug('debug: hello world!')
db_logger.info('info: hello world!')
db_logger.warning('warning: hello world!')
db_logger.error('error: hello world!')
db_logger.critical('critical: hello world!!!!')
You can see the handler accepts a backup logger that can use it when the database insertion fails.
A good improvement can be logging into the database by threading.
I am digging this out again.
There is a solution with SqlAlchemy (Pyramid is NOT required for this recipe):
https://docs.pylonsproject.org/projects/pyramid-cookbook/en/latest/logging/sqlalchemy_logger.html
And you could improve logging by adding extra fields, here is a guide: https://stackoverflow.com/a/17558764/1115187
Fallback to FS
Not sure that this is 100% correct, but you could have 2 handlers:
database handler (write to DB)
file handler (write to file or stream)
Just wrap the DB-commit with a try-except. But be aware: the file will contain ALL log entries, but not only entries for which DB saving was failed.
Old question, but dropping this for others. If you want to use python logging, you can add two handlers. One for writing to file, a rotating file handler. This is robust, and can be done regardless if the dB is up or not.
The other one can write to another service/module, like a pymongo integration.
Look up logging.config on how to setup your handlers from code or json.