I'm trying to use HTTPHandler class of standard python logging library to send logs. I need to make a https post request with basic credentials(username and password). This is how i'm setting up the HTTPHandler-
host = 'example.com'
url = '/path'
handler = logging.handlers.HTTPHandler(host, url, method='POST', secure=True, credentials=('username','password'), context=None)
logger.addHandler(handler)
But the problem is, I'm not getting anylogs in my remote server.I'm not even seeing any exception from the handler. Am I setting up the handler arguments incorrectly? I can send similar logs using simple pythong http request-
url = 'https://username:password#example.com/path'
headers = {'content-type': 'application/json'}
jsonLog = { 'id': '4444','level': 'info', 'message': 'python log' };
r = requests.post(url, data = json.dumps(jsonLog), headers=headers)
Do i need to setup header somehow because of json content-type? If yes than how do i set that up in the httphandler?
Update
I thought I should update what I ended up doing. After numerous search i found i can create a custom handler by overriding emit() of logging.Handler.
class CustomHandler(logging.Handler):
def emit(self, record):
log_entry = self.format(record)
# some code....
url = 'url'
# some code....
return requests.post(url, log_entry, headers={"Content-type": "application/json"}).content
Feel free to post if any has any better suggestions.
Expanding on the solution saz gave, here's how add a custom HTTP handler that
will forward the logs emitted to the specified URL using a bearer token.
It uses a requests session instead of having to establish a new session every log
event.
Furthermore, if the request fails it attempts to resend the logs for a given number of retries.
Note: make sure your logging handler is as simple as possible to prevent the application from halting because of a log event.
I tested it with a simple localhost echo server and it works.
Feel free to suggest any changes.
import json
import logging
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class CustomHttpHandler(logging.Handler):
def __init__(self, url: str, token: str, silent: bool = True):
'''
Initializes the custom http handler
Parameters:
url (str): The URL that the logs will be sent to
token (str): The Authorization token being used
silent (bool): If False the http response and logs will be sent
to STDOUT for debug
'''
self.url = url
self.token = token
self.silent = silent
# sets up a session with the server
self.MAX_POOLSIZE = 100
self.session = session = requests.Session()
session.headers.update({
'Content-Type': 'application/json',
'Authorization': 'Bearer %s' % (self.token)
})
self.session.mount('https://', HTTPAdapter(
max_retries=Retry(
total=5,
backoff_factor=0.5,
status_forcelist=[403, 500]
),
pool_connections=self.MAX_POOLSIZE,
pool_maxsize=self.MAX_POOLSIZE
))
super().__init__()
def emit(self, record):
'''
This function gets called when a log event gets emitted. It recieves a
record, formats it and sends it to the url
Parameters:
record: a log record
'''
logEntry = self.format(record)
response = self.session.post(self.url, data=logEntry)
if not self.silent:
print(logEntry)
print(response.content)
# create logger
log = logging.getLogger('')
log.setLevel(logging.INFO)
# create formatter - this formats the log messages accordingly
formatter = logging.Formatter(json.dumps({
'time': '%(asctime)s',
'pathname': '%(pathname)s',
'line': '%(lineno)d',
'logLevel': '%(levelname)s',
'message': '%(message)s'
}))
# create a custom http logger handler
httpHandler = CustomHttpHandler(
url='<YOUR_URL>',
token='<YOUR_TOKEN>',
silent=False
)
httpHandler.setLevel(logging.INFO)
# add formatter to custom http handler
httpHandler.setFormatter(formatter)
# add handler to logger
log.addHandler(httpHandler)
log.info('Hello world!')
You will need to subclass HTTPHandler and override the emit() method to do what you need. You can use the current implementation of HTTPHandler.emit() as a guide.
following up to istvan, you can use threads to prevent slowing down the program
import asyncio
import concurrent.futures
executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
import time
import json
import logging
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
class CustomHttpHandler(logging.Handler):
def __init__(self, url: str, token: str, silent: bool = True):
'''
Initializes the custom http handler
Parameters:
url (str): The URL that the logs will be sent to
token (str): The Authorization token being used
silent (bool): If False the http response and logs will be sent
to STDOUT for debug
'''
self.url = url
self.token = token
self.silent = silent
# sets up a session with the server
self.MAX_POOLSIZE = 100
self.session = session = requests.Session()
session.headers.update({
'Content-Type': 'application/json',
'Authorization': 'Bearer %s' % (self.token)
})
self.session.mount('https://', HTTPAdapter(
max_retries=Retry(
total=5,
backoff_factor=0.5,
status_forcelist=[403, 500]
),
pool_connections=self.MAX_POOLSIZE,
pool_maxsize=self.MAX_POOLSIZE
))
super().__init__()
def emit(self, record):
'''
This function gets called when a log event gets emitted. It recieves a
record, formats it and sends it to the url
Parameters:
record: a log record
'''
executor.submit(actual_emit, self, record)
def actual_emit(self, record):
logEntry = self.format(record)
response = self.session.post(self.url, data=logEntry)
print(response)
if not self.silent:
print(logEntry)
print(response.content)
# create logger
log = logging.getLogger('test')
log.setLevel(logging.INFO)
# create formatter - this formats the log messages accordingly
formatter = logging.Formatter(json.dumps({
'time': '%(asctime)s',
'pathname': '%(pathname)s',
'line': '%(lineno)d',
'logLevel': '%(levelname)s',
'message': '%(message)s'
}))
# create a custom http logger handler
httpHandler = CustomHttpHandler(
url='<URL>',
token='<YOUR_TOKEN>',
silent=False
)
httpHandler.setLevel(logging.INFO)
log.addHandler(httpHandler)
def main():
print("start")
log.error("\nstop")
print("now")
if __name__ == "__main__":
main()
what this program does is send the logs to the threadpoolexecutor, with 10 max threads, if there are more logs then the threads can handle, it should queue up, this prevents slowdowns of the program.
What you can also do, atleast what I am doing on my project of making a local host logging central database and viewer, I make a seperate thread on the serverside, and then instantly return a HTTP response to make it so all the database stuff happens after the HTTP resonse has been send back. This removes the need for threads on client, seen it is on localhost and then latancy is almost 0
Related
I have a python client application that calls (http) a python Flask api.
Both of these applications are logging to Azure Application insights using the opencensus libraries.
I want to do the logging in a fashion so that I can correlate the logs in ApplicationInsights end to end.
Python client app
For example, when the client app initiates an HTTP GET call to the Flask API, it generates an http request dependency log entry in ApplicationInsights.
The app also logs individual entries about the http request and http response into the trace table.
Flask API
I am logging the incoming HTTP request in the Flask API using request decorator, and also logging the HTTP response using a request decorator.
Also the actual method ( that the Flask routing invokes ) has its own logging.
Note These are logs go into trace table.
Expectation
I am trying to get the logs generated from the Flask API have a correlation with the log generated from the client application.
Current behaviour
Logs of Python client app
The logs in the dependency table have a operation_Id - All good!
The logs in the trace table have the same operation_Id and operation_ParentId as above - All good!
Logs of Flask api
The logs in the request table have the same operation_Id as above - All good!
The logs in the trace table generated by the before_request, after_request decorators - The operation_Id and operation_ParentId are blank. - Problematic!
The logs in the trace table generated by the logging statements inside the route/methods - The operation_Id and operation_ParentId are blank. - Problematic!
Help please
I can see that Traceparent http header is coming in as part of the http request in the Flask API, but looks like logging is ignoring this.
How do I get the logging statements to use the Traceparent data so that operation_Id and operation_ParentId show up correctly in the traces table for the Flask API?
Flask API Code
import flask
from flask import request, jsonify
import logging
import json
import requests
from opencensus.ext.azure.log_exporter import AzureLogHandler,AzureEventHandler
from opencensus.ext.flask.flask_middleware import FlaskMiddleware
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace.samplers import ProbabilitySampler, AlwaysOnSampler
from opencensus.trace.tracer import Tracer
from opencensus.trace import config_integration
import os
logger = logging.getLogger()
class MyJSONEncoder(flask.json.JSONEncoder):
def default(self, obj):
if isinstance(obj, decimal.Decimal):
# Convert decimal instances to strings.
return str(obj)
if isinstance(obj, datetime.datetime):
return obj.strftime(strftime_iso_regular_format_str)
return super(MyJSONEncoder, self).default(obj)
# Initialize logging with Azure Application Insights
class CustomDimensionsFilter(logging.Filter):
"""Add custom-dimensions like run_id in each log by using filters."""
def __init__(self, custom_dimensions=None):
"""Initialize CustomDimensionsFilter."""
self.custom_dimensions = custom_dimensions or {}
def filter(self, record):
"""Add the default custom_dimensions into the current log record."""
dim = {**self.custom_dimensions, **
getattr(record, "custom_dimensions", {})}
record.custom_dimensions = dim
return True
APPLICATION_INSIGHTS_CONNECTIONSTRING=os.getenv('APPLICATION_INSIGHTS_CONNECTIONSTRING')
modulename='FlaskAPI'
APPLICATION_NAME='FlaskAPI'
ENVIRONMENT='Development'
def callback_function(envelope):
envelope.tags['ai.cloud.role'] = APPLICATION_NAME
return True
logger = logging.getLogger(__name__)
log_handler = AzureLogHandler(
connection_string=APPLICATION_INSIGHTS_CONNECTIONSTRING)
log_handler.addFilter(CustomDimensionsFilter(
{
'ApplicationName': APPLICATION_NAME,
'Environment': ENVIRONMENT
}))
log_handler.add_telemetry_processor(callback_function)
logger.addHandler(log_handler)
azureExporter = AzureExporter(
connection_string=APPLICATION_INSIGHTS_CONNECTIONSTRING)
azureExporter.add_telemetry_processor(callback_function)
tracer = Tracer(exporter=azureExporter, sampler=AlwaysOnSampler())
app = flask.Flask("app")
app.json_encoder = MyJSONEncoder
app.config["DEBUG"] = True
middleware = FlaskMiddleware(
app,
exporter=azureExporter,
sampler=ProbabilitySampler(rate=1.0),
)
config_integration.trace_integrations(['logging', 'requests'])
def getJsonFromRequestBody(request):
isContentTypeJson = request.headers.get('Content-Type') == 'application/json'
doesHaveBodyJson = False
if isContentTypeJson:
try:
doesHaveBodyJson = request.get_json() != None
except:
doesHaveBodyJson = False
if doesHaveBodyJson == True:
return json.dumps(request.get_json())
else:
return None
def get_properties_for_customDimensions_from_request(request):
values = ''
if len(request.values) == 0:
values += '(None)'
for key in request.values:
values += key + ': ' + request.values[key] + ', '
properties = {'custom_dimensions':
{
'request_method': request.method,
'request_url': request.url,
'values': values,
'body': getJsonFromRequestBody(request)
}}
return properties
def get_properties_for_customDimensions_from_response(request,response):
request_properties = request_properties = get_properties_for_customDimensions_from_request(request)
request_customDimensions = request_properties.get('custom_dimensions')
response_properties = {'custom_dimensions':
{
**request_customDimensions,
'response_status':response.status,
'response_body':response.data.decode('utf-8')
}
}
return response_properties
# Useful debugging interceptor to log all values posted to the endpoint
#app.before_request
def before():
properties = get_properties_for_customDimensions_from_request(request)
logger.warning("request {} {}".format(
request.method, request.url), extra=properties)
# Useful debugging interceptor to log all endpoint responses
#app.after_request
def after(response):
response_properties = get_properties_for_customDimensions_from_response(request,response)
logger.warning("response: {}".format(
response.status
),extra=response_properties)
return response
#app.route('/api/{}/status'.format("v1"), methods=['GET'])
def health_check():
message = "Health ok!"
logger.info(message)
return message
if __name__ == '__main__':
app.run()
References used
Microsoft's guidance on Application Insights Log Correlation
My code repository where I have tested and reproduced the problem
So I'm aware of the logging.getlogger(_name) that returns a logging object.
However, now I have created a semi-complex logging scheme with QueueHandler, QueueListener and HTTPHandler (this is custom_logging.py):
class CustomHttpHandler(logging.Handler):
def __init__(self, url: str, token: str, silent: bool = True):
"""
Initializes the custom http handler
Parameters:
url (str): The URL that the logs will be sent to
token (str): The Authorization token being used
silent (bool): If False the http response and logs will be sent
to STDOUT for debug
"""
self.url = url
self.token = token
self.silent = silent
# sets up a session with the server
self.MAX_POOLSIZE = 100
self.session = session = requests.Session()
"""session.headers.update({
"Content-Type": "application/json",
"Authorization": "Bearer %s" % (self.token)
})"""
self.session.mount("https://", HTTPAdapter(
max_retries=Retry(
total=5,
backoff_factor=0.5,
status_forcelist=[403, 500]
),
pool_connections=self.MAX_POOLSIZE,
pool_maxsize=self.MAX_POOLSIZE
))
super().__init__()
def emit(self, record):
"""
This function gets called when a log event gets emitted. It recieves a
record, formats it and sends it to the url
Parameters:
record: a log record
"""
logEntry = self.format(record)
logEntry=json.loads(logEntry)
response = self.session.post(self.url, json=logEntry)
if not self.silent:
print(logEntry)
print(response.content)
def init():
logger = logging.getLogger(__name__)
console_handler = logging.StreamHandler()
formatter = logging.Formatter("%(levelname)s: %(message)s")
console_handler.setFormatter(formatter)
# create a custom http logger handler
httpHandler = CustomHttpHandler(
url="https://someAddress/log",
token="1234",
silent=True
)
MACAddr=uuid.getnode()
# create formatter - this formats the log messages accordingly
formatter = logging.Formatter(json.dumps({
"time": "%(asctime)s",
"MAC Node": MACAddr,
"line": "%(lineno)d",
"module":"%(name)s",
"logLevel": "%(levelname)s",
"message": "%(message)s"
}))
# add formatter to custom http handler
httpHandler.setFormatter(formatter)
log_queue = queue.Queue(-1)
queue_handler = QueueHandler(log_queue)
logger.addHandler(queue_handler)
listener = QueueListener(log_queue, console_handler, httpHandler)
listener.start()
logger.setLevel(logging.DEBUG)
logger.info("Hello world!")
This works perfectly. I can see on the HTTP server all the logging info.
However, what if I want to use that same logger across different classes or files?
From my main.py I call the init() method:
custom_Logging.init()
Then how could I obtain all the structure created in that method?. The first idea that comes to mind is to return the logger object on the init() method, this method indeed works. But I don't know if it is the most elegant way to get around this problem.
You don't need to return a logger or pass it around, as long as you know its name (the module name - via __name__ - in the above case). You can get that logger from anywhere just using e.g. logging.getLogger('custom_Logging').
Or, if your use case allows, attach those handlers to the root logger and let other loggers use those handlers automatically. (By default, handlers of ancestor loggers are offered the chance to handle events logged to descendant loggers.)
I have created a custom class, which push my logs to splunk, but somehow it is not working. Here is the class.
class Splunk(logging.StreamHandler):
def __init__(self, url, token):
super().__init__()
self.url = url
self.headers = {f'Authorization': f'Splunk {token}'}
self.propagate = False
def emit(self, record):
mydata = dict()
mydata['sourcetype'] = 'mysourcetype'
mydata['event'] = record.__dict__
response = requests.post(self.url, data=json.dumps(mydata), headers=self.headers)
return response
I call the class from my logger class, somehow like this (adding additional handler), so that it can log on console along with send to splunk
if splunk_config is not None:
splunk_handler = Splunk(splunk_config["url"], splunk_config["token"])
self.default_logger.addHandler(splunk_handler)
But somehow, I am not able to see any logs in splunk. Though I can see the logs in console.
When I try to run the strip down version of above logic from python3 terminal, it is successful.
import requests
import json
url = 'myurl'
token = 'mytoken'
headers = {'Authorization': 'Splunk mytoken'}
propagate = False
mydata = dict()
mydata['sourcetype'] = 'mysourcetype'
mydata['event'] = {'name': 'root', 'msg': 'this is a sample message'}
response = requests.post(url, data=json.dumps(mydata), headers=headers)
print(response.text)
Things I have already tried, making my dictionary data as JSON serializable using below link but it didn't helped.
https://pynative.com/make-python-class-json-serializable/
Any other things to try ?
I've successfully used this Python Class for Sending Events to Splunk HTTP Event Collector instead of writing a dedicated class
https://github.com/georgestarcher/Splunk-Class-httpevent
Advantage is that it implements batchEvent() and flushBatch() methods to submit multiple events at once across multiple threads.
The example here should get you started:
https://github.com/georgestarcher/Splunk-Class-httpevent/blob/master/example.py
If this answers your question, take a moment to accept the answer. This can be done by clicking on the check mark beside the answer to toggle it from greyed out to filled in!
I'm just beginning to learn about python/django. I know PHP, but I wanted to get to know about this framework. I'm trying to work with yelp's API. I'm trying to figure out what to do when someone brings in a new file into the project.
In their business.py they have this:
import json
import oauth2
import optparse
import urllib
import urllib2
parser = optparse.OptionParser()
parser.add_option('-c', '--consumer_key', dest='consumer_key', help='OAuth consumer key (REQUIRED)')
parser.add_option('-s', '--consumer_secret', dest='consumer_secret', help='OAuth consumer secret (REQUIRED)')
parser.add_option('-t', '--token', dest='token', help='OAuth token (REQUIRED)')
parser.add_option('-e', '--token_secret', dest='token_secret', help='OAuth token secret (REQUIRED)')
parser.add_option('-a', '--host', dest='host', help='Host', default='api.yelp.com')
parser.add_option('-i', '--id', dest='id', help='Business')
parser.add_option('-u', '--cc', dest='cc', help='Country code')
parser.add_option('-n', '--lang', dest='lang', help='Language code')
options, args = parser.parse_args()
# Required options
if not options.consumer_key:
parser.error('--consumer_key required')
if not options.consumer_secret:
parser.error('--consumer_secret required')
if not options.token:
parser.error('--token required')
if not options.token_secret:
parser.error('--token_secret required')
if not options.id:
parser.error('--id required')
url_params = {}
if options.cc:
url_params['cc'] = options.cc
if options.lang:
url_params['lang'] = options.lang
path = '/v2/business/%s' % (options.id,)
def request(host, path, url_params, consumer_key, consumer_secret, token, token_secret):
"""Returns response for API request."""
# Unsigned URL
encoded_params = ''
if url_params:
encoded_params = urllib.urlencode(url_params)
url = 'http://%s%s?%s' % (host, path, encoded_params)
print 'URL: %s' % (url,)
# Sign the URL
consumer = oauth2.Consumer(consumer_key, consumer_secret)
oauth_request = oauth2.Request('GET', url, {})
oauth_request.update({'oauth_nonce': oauth2.generate_nonce(),
'oauth_timestamp': oauth2.generate_timestamp(),
'oauth_token': token,
'oauth_consumer_key': consumer_key})
token = oauth2.Token(token, token_secret)
oauth_request.sign_request(oauth2.SignatureMethod_HMAC_SHA1(), consumer, token)
signed_url = oauth_request.to_url()
print 'Signed URL: %s\n' % (signed_url,)
# Connect
try:
conn = urllib2.urlopen(signed_url, None)
try:
response = json.loads(conn.read())
finally:
conn.close()
except urllib2.HTTPError, error:
response = json.loads(error.read())
return response
response = request(options.host, path, url_params, options.consumer_key, options.consumer_secret, options.token, options.token_secret)
print json.dumps(response, sort_keys=True, indent=2)
Its very lengthy, I appologize for that. But my concern is what do I do with this? They've set up a def request() in here, and I'm assuming that I have to import this into my views?
I've been following the django documentation of creating a new app. In the documentation they've set up a bunch of def inside the views.py file. I'm just confused as to how am I supposed to make this work with my project? If I wanted to search for a business in the URL, how would it send the data out?
Thanks for your help.
This is a command line script that makes http requests to the yelp api. You probably don't want to make such an external request within the context of a main request handler. Well, you could call a request handler that makes this call to yelp. Let's see ...
You could import the request function and instead of invoking it with command line options, call it yourself.
from yelp.business import request as yelp_req
def my_request_handler(request):
json_from_yelp = yelp_req(...
# do stuff and return a response
Making this kind of external call inside a request handler is pretty meh though (that is, making an http request to an external service within a request handler). If the call is in ajax, it may be ok for the ux.
This business.py is just an example showing you how to create a signed request with oauth2. You may be able to just import the request function and use it. OTOH, you may prefer to write your own (perhaps using the requests library). You probably want to use celery or some other async means to make the calls outside of your request handlers and/or cache the responses to avoid costly external http io with every request.
Given that webtest doesn't seem to have a 3.x version (or any plans to develop one), are there any solutions for automated system testing of a WSGI application? I know unittest for unit testing - I'm more interested in the moment in whole systems tests.
I'm not looking for tools to help develop an application - just test it.
In case anyone else comes upon this, I ended up writing a solution myself. Here's a very simple class I use - I just inherit from WSGIBaseTest instead of TestCase, and get a method self.request() that I can pass requests into. It stores cookies, and will automatically send them into the application on later requests (until self.new_session() is called).
import unittest
from wsgiref import util
import io
class WSGIBaseTest(unittest.TestCase):
'''Base class for unit-tests. Provides up a simple interface to make requests
as though they came through a wsgi interface from a user.'''
def setUp(self):
'''Set up a fresh testing environment before each test.'''
self.cookies = []
def request(self, application, url, post_data = None):
'''Hand a request to the application as if sent by a client.
#param application: The callable wsgi application to test.
#param url: The URL to make the request against.
#param post_data: A string.'''
self.response_started = False
temp = io.StringIO(post)
environ = {
'PATH_INFO': url,
'REQUEST_METHOD': 'POST' if post_data else 'GET',
'CONTENT_LENGTH': len(post),
'wsgi.input': temp,
}
util.setup_testing_defaults(environ)
if self.cookies:
environ['HTTP_COOKIE'] = ';'.join(self.cookies)
self.response = ''
for ret in application(environ, self._start_response):
assert self.response_started
self.response += str(ret)
temp.close()
return response
def _start_response(self, status, headers):
'''A callback passed into the application, to simulate a wsgi
environment.
#param status: The response status of the application ("200", "404", etc)
#param headers: Any headers to begin the response with.
'''
assert not self.response_started
self.response_started = True
self.status = status
self.headers = headers
for header in headers:
# Parse out any cookies and save them to send with later requests.
if header[0] == 'Set-Cookie':
var = header[1].split(';', 1)
if len(var) > 1 and var[1][0:9] == ' Max-Age=':
if int(var[1][9:]) > 0:
# An approximation, since our cookies never expire unless
# explicitly deleted (by setting Max-Age=0).
self.cookies.append(var[0])
else:
index = self.cookies.index(var[0])
self.cookies.pop(index)
def new_session(self):
'''Start a new session (or pretend to be a different user) by deleting
all current cookies.'''
self.cookies = []