How to add metrics to external services using aioprometheus and FastAPI? - python

I'm trying to add metrics to external services with aioprometheus in an app built with FastAPI. Here is a simplified example of what I'm trying to achieve.
Say I have a wrapper App class as such:
from aioprometheus import Registry, Counter, Histogram
from fastapi import FastAPI
class App:
def __init__(self, ...):
self.registry = Registry()
self.counter = Counter(
name="counts", doc="request counts"
)
self.latency = Histogram(
name="latency",
doc="request latency",
buckets=[0.1, 0.5, 1, 1.5, 2]
)
self.app = FastAPI()
self._metrics()
def _metrics(self):
# Counter metrics
#self.app.middleware("http")
async def counter_metrics(request, call_next):
response = await call_next(request)
self.counter.inc(
{"path": str(request.url.path), "status": response.status_code}
)
return response
# Latency metrics
#self.app.middleware("http")
async def latency_metrics(request, call_next):
start = time.time()
response = await call_next(request)
total_duration = time.time() - start
self.latency.observe(
{"path": str(request.url.path)}, total_duration
)
return response
#self.app.on_event("startup")
async def startup():
self.app.include_router(some_router(...))
self.registry.register(self.counter)
self.registry.register(self.latency)
Basically, I have Registry, Counter, and Histogram initiated. In _metrics, I have Counter and Histogram specific logics that are later added to Registry. This will do its magic and catch the metrics when an endpoint in some_router is called (this is good! I would want to keep this, as well as having the external service metrics).
However, say I call an external service from some_router as such:
from fastapi import APIRouter
def some_router():
router = APIRouter()
#router.get("/some_router")
async def some_router():
response = await external_service()
return response
return router
In this case, how would I add metrics specifically to external_service, i.e., Latency of this specific external service?

As per the documentation, you would need to attach your metrics to the app instance using the generic app.state attribute (see the implementation of Starlette's State class as well), so they can easily be accessed in the route handler—as metrics are often created in a different module than where they are used (as in your case). Thus, you could use the following in your App class, after instantiating the metrics:
self.app.state.registry = registry
self.app.state.counter = counter
self.app.state.latency = latency
In your routers module, you could get the app instance using the Request object, as described here and here, and then use it to get the metrics instances (as shown below), which will let you add metrics to your external_service:
from fastapi import Request
...
#router.get("/some_router")
async def some_router(request: Request):
registry = request.app.state.registry
counter = request.app.state.counter
latency = request.app.state.latency
response = await external_service()
return response

Related

how to overwrite a route in sanic when using blueprint.copy?

from sanic import Blueprint
from sanic.response import json
from sanic import Sanic
app = Sanic('test')
bpv1 = Blueprint('bpv1', version=1)
#bpv1.route('/hello')
async def root(request):
return json('hello v1')
app.blueprint(bpv1)
bpv2 = bpv1.copy('bpv2', version=2)
#bpv2.route('/hello')
async def root(request):
return json('hello v2')
app.blueprint(bpv2)
I want to overwrite the implement of route partially when they belong to different blueprint, but it raises sanic_routing.exceptions.RouteExists.
How can I get this target?
I got the answer from forum.
bpv2 = bpv1.copy("bpv2", version=2)
bpv2._future_routes = {
route for route in bpv2._future_routes if route.uri != "/hello"
}
#bpv2.route("/hello")
async def root2(request):
return json("hello v2")
link
https://community.sanicframework.org/t/how-to-overwrite-a-route-when-using-blueprint-copy/1067

How to access request_id defined in fastapi middleware in function

Hi i have my middleware written like this
#app.middleware("http")
async def request_middleware(request, call_next):
end_point = request.url.path
global request_id
request_id = get_request_id()
with logger.contextualize(request_id=request_id, end_point=end_point):
logger.info("----------Request started----------")
try:
response = await call_next(request)
except Exception as ex:
logger.error(f"Request failed: {ex}")
response = JSONResponse()
finally:
response.headers["X-Request-Id"] = request_id
logger.info("----------Request ended----------")
return response
i want the request_id defined in middleware to be accessible in other function defined , how can we do that?
Instead of a global request_id, you can use a context variable, which is not shared between async tasks
from contextvars import ContextVar
req_id: ContextVar[str] = ContextVar('req_id', default='')
# Inside your middleware
req_id.set(get_request_id())
# Inside other functions, even different files, you import that variable
req_id.get()
Another 2 solutions:
Store data in request.state in your middleware, and then access request.state in the view functions. More info in https://fastapi.tiangolo.com/tutorial/sql-databases/?h=request.#about-requeststate
Or use starlette-context, just like g in flask, which is much easier
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware
app.add_middleware(RawContextMiddleware)
#app.middleware("http")
async def request_middleware(request, call_next):
request_id = get_request_id()
context['request_id'] = request_id
#router.post('foobar')
async def foorbar():
context['request_id']

Sharing an aiohttp.ClientSession between multiple asynchronous callers

The goal
I want to make regular HTTP requests to a REST API. The requests happen at an interval ranging from a few seconds up to multiple hours, depending on the user input. I want to keep a single connection alive and close it in a smart way.
The questions
Is there an existing library that provides features similar to what i wrote with the class SessionOneToN?
Would it be possible to use a single session of type aiohttp.ClientSession that runs forever and is never closed?
What i have tried
What i have tried does work, but i wonder if there is an established library that can achieve the goals.
My code
My application uses the event loop module asyncio, and the HTTP module aiohttp.
My application opens a single session of type aiohttp.ClientSession and shares it between multiple asynchronous callers.
The callers can make their requests concurrently and asynchronously.
Whenever all callers have received their responses at the same time, the aiohttp.ClientSession is closed. A new aiohttp.ClientSession is opened as necessary when a caller makes a new request.
import aiohttp
import asyncio
from contextlib import asynccontextmanager
import sys
url = 'http://stackoverflow.com/questions/64305548/sharing-an-aiohttp-clientsession-between-multiple-asynchronous-callers'
# The callers look like so
async def request():
async with session() as s:
async with s.get(url) as response:
return await response.text()
# The session manager looks like so:
class SessionOneToN:
""" Manage one session that is reused by multiple clients, instantiate a new
session object from the given session_class as required. Provide a context
manager for accessing the session. The session_class returns a context
manager when instantiated, this context manager is referred to as the
internal context manager, and is entered when the first
client enters the context manager returned by context_manager, and is exited
when the last client exits context_manager."""
def __init__(self, session_class):
self.n = 0
self.session_class = session_class
self.context = None
self.aenter_result = None
async def plus(self):
self.n += 1
if self.n == 1:
# self.context: The internal context manager
self.context = context = self.session_class()
# self.aenter_result: The result from entering the internal context
# manager
self.aenter_result = await context.__aenter__()
assert self.aenter_result is not None
return self.aenter_result
def minus(self):
self.n -= 1
if self.n == 0:
return True
def cleanup(self):
self.context = None
self.aenter_result = None
#asynccontextmanager
async def get_context(self):
try:
aenter_result = await self.plus()
yield aenter_result
except:
if self.minus():
if not await self.context.__aexit__(*sys.exc_info()):
self.cleanup()
raise
self.cleanup()
else:
if self.minus():
await self.context.__aexit__(None, None, None)
self.cleanup()
class SessionOneToNaiohttp:
def __init__(self):
self.sotn = SessionOneToN(aiohttp.ClientSession)
def get_context(self):
return self.sotn.get_context()
sotnaiohttp = SessionOneToNaiohttp()
def session():
return sotnaiohttp.get_context()
response_text = asyncio.run(request())
print(response_text[0:10])

Threads can only be started once in Django Channels

I created a simple Django Channels consumer that should connects to an external source, retrieve data and send it to the client. So, the user opens the page > the consumer connects to the external service and gets the data > the data is sent to the websocket.
Here is my code:
import json
from channels.generic.websocket import WebsocketConsumer, AsyncConsumer, AsyncJsonWebsocketConsumer
from binance.client import Client
import json
from binance.websockets import BinanceSocketManager
import time
import asyncio
client = Client('', '')
trades = client.get_recent_trades(symbol='BNBBTC')
bm = BinanceSocketManager(client)
class EchoConsumer(AsyncJsonWebsocketConsumer):
async def connect(self):
await self.accept()
await self.send_json('test')
bm.start_trade_socket('BNBBTC', self.process_message)
bm.start()
def process_message(self, message):
JSON1 = json.dumps(message)
JSON2 = json.loads(JSON1)
#define variables
Rate = JSON2['p']
Quantity = JSON2['q']
Symbol = JSON2['s']
Order = JSON2['m']
asyncio.create_task(self.send_json(Rate))
print(Rate)
This code works when i open one page; if i try to open a new window with a new account, though, it will throw the following error:
File "C:\Users\User\Desktop\Heroku\github\master\myapp\consumers.py", line 54, in connect
bm.start()
File "C:\Users\User\lib\threading.py", line 843, in start
raise RuntimeError("threads can only be started once")
threads can only be started once
I'm new to Channels, so this is a noob question, but how can i fix this problem? What i wanted to do was: user opens the page and gets the data, another user opens the page and gets the data; is there no way to do that? Or am i simply misunderstanding how Django Channels and websockets works?
Do you really need a secondary thread ?
class EchoConsumer(AsyncJsonWebsocketConsumer):
symbol = ''
async def connect(self):
self.symbol = 'BNBBTC'
# or, more probably, retrieve the value for "symbol" from query_string
# so the client can specify which symbol he's interested into:
# socket = new WebSocket("ws://.../?symbol=BNBBTC");
await self.accept()
def process_message(self, message):
# PSEUDO-CODE BELOW !
if self.symbol == message['symbol']:
await self.send({
'type': 'websocket.send',
'text': json.dumps(message),
})
For extra flexibility, you might also accept al list of symbols from the client, instead:
//HTML
socket = new WebSocket("ws://.../?symbols=XXX,YYY,ZZZ");
then (in the consumer):
class EchoConsumer(AsyncJsonWebsocketConsumer):
symbols = []
async def connect(self):
# here we need to parse "?symbols=XXX,YYY,ZZZ" ...
# the code below has been stolen from another project of mine and should be suitably adapted
params = urllib.parse.parse_qs(self.scope.get('query_string', b'').decode('utf-8'))
try:
self.symbols = json.loads(params.get('symbols', ['[]'])[0])
except:
self.symbols = []
def process_message(self, message):
if message['symbol'] in self.symbols:
...
I'm no Django developer, but if I understand correctly, the function connect is being called more than once-- and bm.start references the same thread most likely made in bm.start_trade_socket (or somewhere else in connect). In conclusion, when bm.start is called, a thread is started, and when it is done again, you get that error.
Here start() Start the thread’s activity.
It should be called at most once per thread object. You have made a global object of BinanceSocketManager as "bm".
It will always raise a RuntimeError if called more than once on the same thread object.
Please refer the below mentioned code, it may help you
from channels.generic.websocket import WebsocketConsumer, AsyncConsumer, AsyncJsonWebsocketConsumer
from binance.client import Client
import json
from binance.websockets import BinanceSocketManager
import time
import asyncio
class EchoConsumer(AsyncJsonWebsocketConsumer):
client = Client('', '')
trades = client.get_recent_trades(symbol='BNBBTC')
bm = BinanceSocketManager(client)
async def connect(self):
await self.accept()
await self.send_json('test')
self.bm.start_trade_socket('BNBBTC', self.process_message)
self.bm.start()
def process_message(self, message):
JSON1 = json.dumps(message)
JSON2 = json.loads(JSON1)
#define variables
Rate = JSON2['p']
Quantity = JSON2['q']
Symbol = JSON2['s']
Order = JSON2['m']
asyncio.create_task(self.send_json(Rate))
print(Rate)

Tornado: How to get and return large data with less memory usage?

I have web-crawler and http interface for it.
Crawler gets grouped urls as dictionary. I need to return a result in the same format in JSON. But I was faced with a large memory usage, which is not returned to the operating system. How can I implement this solution without large memory usage?
Code:
#!/usr/bin/env python
# coding=utf-8
import collections
import tornado.web
import tornado.ioloop
import tornado.queues
import tornado.httpclient
class ResponseError(Exception):
pass
class Crawler(object):
client = tornado.httpclient.AsyncHTTPClient()
def __init__(self, groups, concurrency=10, retries=3, validators=None):
self.groups = groups
self.concurrency = concurrency
self.retries = retries
self.validators = validators or []
self.requests = tornado.queues.Queue()
self.responses = collections.defaultdict(list)
async def worker(self):
while True:
await self.consume()
async def validate(self, response):
for validator in self.validators:
validator(response)
async def save(self, response):
self.responses[response.request.group].append(response.body.decode('utf-8'))
async def consume(self):
async for request in self.requests:
try:
response = await self.client.fetch(request, raise_error=False)
await self.validate(response)
await self.save(response)
except ResponseError:
if request.retries < self.retries:
request.retries += 1
await self.requests.put(request)
finally:
self.requests.task_done()
async def produce(self):
for group, urls in self.groups.items():
for url in urls:
request = tornado.httpclient.HTTPRequest(url)
request.group = group
request.retries = 0
await self.requests.put(request)
async def fetch(self):
await self.produce()
for __ in range(self.concurrency):
tornado.ioloop.IOLoop.current().spawn_callback(self.worker)
await self.requests.join()
class MainHandler(tornado.web.RequestHandler):
async def get(self):
urls = []
with open('urls') as f: # mock
for line in f:
urls.append(line.strip())
crawler = Crawler({'default': urls})
await crawler.fetch()
self.write(crawler.responses)
if __name__ == '__main__':
app = tornado.web.Application(
(tornado.web.url(r'/', MainHandler),), debug=True
)
app.listen(8000)
tornado.ioloop.IOLoop.current().start()
It looks to me like most of the memory usage is devoted to self.responses. Since you seem to be ordering responses by "group" before writing them to a file, I can understand why you do it this way. One idea is to store them in a database (MySQL or MongoDB or whatever) with the "group" as column or field value in the database record.
The database might be the final destination of your data, or else it might be a temporary place to store the data until crawler.fetch completes. Then, query all the data from the database, ordered by "group", and write it to the file.
This doesn't solve the problem, it just means that the database process is responsible for most of your memory usage, instead of the Python process. This may be preferable for you, however.

Categories