Here is the aiohttp server initialising:
self.app = web.Application()
self.app.add_routes([
web.get('/{endpoint:.*}', self.handle),
web.post('/{endpoint:.*}', self.handle),
web.options('/{endpoint:.*}', self.handle),
web.put('/{endpoint:.*}', self.handle),
])
runner = web.AppRunner(self.app)
await runner.setup()
site = web.TCPSite(runner, **self.config)
await site.start()
and handle function looks something like this:
async def handle(self, request):
endpoint = request.match_info['endpoint']
if endpoint == 'api/user/{user_id}/':
some_stuff()
How can I check if the request URL matches the endpoint mask? It's easy to add this mask to add_routes, but I'm looking for an alternative solution without any changes in initialising.
Related
from sanic import Blueprint
from sanic.response import json
from sanic import Sanic
app = Sanic('test')
bpv1 = Blueprint('bpv1', version=1)
#bpv1.route('/hello')
async def root(request):
return json('hello v1')
app.blueprint(bpv1)
bpv2 = bpv1.copy('bpv2', version=2)
#bpv2.route('/hello')
async def root(request):
return json('hello v2')
app.blueprint(bpv2)
I want to overwrite the implement of route partially when they belong to different blueprint, but it raises sanic_routing.exceptions.RouteExists.
How can I get this target?
I got the answer from forum.
bpv2 = bpv1.copy("bpv2", version=2)
bpv2._future_routes = {
route for route in bpv2._future_routes if route.uri != "/hello"
}
#bpv2.route("/hello")
async def root2(request):
return json("hello v2")
link
https://community.sanicframework.org/t/how-to-overwrite-a-route-when-using-blueprint-copy/1067
I'm trying to add metrics to external services with aioprometheus in an app built with FastAPI. Here is a simplified example of what I'm trying to achieve.
Say I have a wrapper App class as such:
from aioprometheus import Registry, Counter, Histogram
from fastapi import FastAPI
class App:
def __init__(self, ...):
self.registry = Registry()
self.counter = Counter(
name="counts", doc="request counts"
)
self.latency = Histogram(
name="latency",
doc="request latency",
buckets=[0.1, 0.5, 1, 1.5, 2]
)
self.app = FastAPI()
self._metrics()
def _metrics(self):
# Counter metrics
#self.app.middleware("http")
async def counter_metrics(request, call_next):
response = await call_next(request)
self.counter.inc(
{"path": str(request.url.path), "status": response.status_code}
)
return response
# Latency metrics
#self.app.middleware("http")
async def latency_metrics(request, call_next):
start = time.time()
response = await call_next(request)
total_duration = time.time() - start
self.latency.observe(
{"path": str(request.url.path)}, total_duration
)
return response
#self.app.on_event("startup")
async def startup():
self.app.include_router(some_router(...))
self.registry.register(self.counter)
self.registry.register(self.latency)
Basically, I have Registry, Counter, and Histogram initiated. In _metrics, I have Counter and Histogram specific logics that are later added to Registry. This will do its magic and catch the metrics when an endpoint in some_router is called (this is good! I would want to keep this, as well as having the external service metrics).
However, say I call an external service from some_router as such:
from fastapi import APIRouter
def some_router():
router = APIRouter()
#router.get("/some_router")
async def some_router():
response = await external_service()
return response
return router
In this case, how would I add metrics specifically to external_service, i.e., Latency of this specific external service?
As per the documentation, you would need to attach your metrics to the app instance using the generic app.state attribute (see the implementation of Starlette's State class as well), so they can easily be accessed in the route handler—as metrics are often created in a different module than where they are used (as in your case). Thus, you could use the following in your App class, after instantiating the metrics:
self.app.state.registry = registry
self.app.state.counter = counter
self.app.state.latency = latency
In your routers module, you could get the app instance using the Request object, as described here and here, and then use it to get the metrics instances (as shown below), which will let you add metrics to your external_service:
from fastapi import Request
...
#router.get("/some_router")
async def some_router(request: Request):
registry = request.app.state.registry
counter = request.app.state.counter
latency = request.app.state.latency
response = await external_service()
return response
Hi i have my middleware written like this
#app.middleware("http")
async def request_middleware(request, call_next):
end_point = request.url.path
global request_id
request_id = get_request_id()
with logger.contextualize(request_id=request_id, end_point=end_point):
logger.info("----------Request started----------")
try:
response = await call_next(request)
except Exception as ex:
logger.error(f"Request failed: {ex}")
response = JSONResponse()
finally:
response.headers["X-Request-Id"] = request_id
logger.info("----------Request ended----------")
return response
i want the request_id defined in middleware to be accessible in other function defined , how can we do that?
Instead of a global request_id, you can use a context variable, which is not shared between async tasks
from contextvars import ContextVar
req_id: ContextVar[str] = ContextVar('req_id', default='')
# Inside your middleware
req_id.set(get_request_id())
# Inside other functions, even different files, you import that variable
req_id.get()
Another 2 solutions:
Store data in request.state in your middleware, and then access request.state in the view functions. More info in https://fastapi.tiangolo.com/tutorial/sql-databases/?h=request.#about-requeststate
Or use starlette-context, just like g in flask, which is much easier
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware
app.add_middleware(RawContextMiddleware)
#app.middleware("http")
async def request_middleware(request, call_next):
request_id = get_request_id()
context['request_id'] = request_id
#router.post('foobar')
async def foorbar():
context['request_id']
I have web-crawler and http interface for it.
Crawler gets grouped urls as dictionary. I need to return a result in the same format in JSON. But I was faced with a large memory usage, which is not returned to the operating system. How can I implement this solution without large memory usage?
Code:
#!/usr/bin/env python
# coding=utf-8
import collections
import tornado.web
import tornado.ioloop
import tornado.queues
import tornado.httpclient
class ResponseError(Exception):
pass
class Crawler(object):
client = tornado.httpclient.AsyncHTTPClient()
def __init__(self, groups, concurrency=10, retries=3, validators=None):
self.groups = groups
self.concurrency = concurrency
self.retries = retries
self.validators = validators or []
self.requests = tornado.queues.Queue()
self.responses = collections.defaultdict(list)
async def worker(self):
while True:
await self.consume()
async def validate(self, response):
for validator in self.validators:
validator(response)
async def save(self, response):
self.responses[response.request.group].append(response.body.decode('utf-8'))
async def consume(self):
async for request in self.requests:
try:
response = await self.client.fetch(request, raise_error=False)
await self.validate(response)
await self.save(response)
except ResponseError:
if request.retries < self.retries:
request.retries += 1
await self.requests.put(request)
finally:
self.requests.task_done()
async def produce(self):
for group, urls in self.groups.items():
for url in urls:
request = tornado.httpclient.HTTPRequest(url)
request.group = group
request.retries = 0
await self.requests.put(request)
async def fetch(self):
await self.produce()
for __ in range(self.concurrency):
tornado.ioloop.IOLoop.current().spawn_callback(self.worker)
await self.requests.join()
class MainHandler(tornado.web.RequestHandler):
async def get(self):
urls = []
with open('urls') as f: # mock
for line in f:
urls.append(line.strip())
crawler = Crawler({'default': urls})
await crawler.fetch()
self.write(crawler.responses)
if __name__ == '__main__':
app = tornado.web.Application(
(tornado.web.url(r'/', MainHandler),), debug=True
)
app.listen(8000)
tornado.ioloop.IOLoop.current().start()
It looks to me like most of the memory usage is devoted to self.responses. Since you seem to be ordering responses by "group" before writing them to a file, I can understand why you do it this way. One idea is to store them in a database (MySQL or MongoDB or whatever) with the "group" as column or field value in the database record.
The database might be the final destination of your data, or else it might be a temporary place to store the data until crawler.fetch completes. Then, query all the data from the database, ordered by "group", and write it to the file.
This doesn't solve the problem, it just means that the database process is responsible for most of your memory usage, instead of the Python process. This may be preferable for you, however.
I have a simple coroutine register that accepts login and password
as post arguments, then it goes into the database and so on.
The problem I have is that I do not know how to test the coroutine.
I followed examples from
https://aiohttp.readthedocs.io/en/latest/testing.html.
And everything seemed easy until I started writing tests myself.
Code for test_register.py
from main import make_app
pytest_plugins = 'aiohttp.pytest_plugin'
#pytest.fixture
def cli(loop, test_client):
return loop.run_until_complete(test_client(make_app))
async def test_register(cli):
resp = await cli.post('/register', data={'login': 'emil', 'password': 'qwerty'})
assert resp.status == 200
text = await resp.text()
And register.py
from settings import db
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await db.users.find({'login': login}).count()
...
main.py
from aiohttp import web
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
def main():
web.run_app(make_app())
if __name__ == "__main__":
main()
settings.py
from motor.motor_asyncio import AsyncIOMotorClient
DBNAME = 'testdb'
db = AsyncIOMotorClient()[DBNAME]
And then I ran py.test test_register.py and it got stuck on database operation
matches = await db.users.find({'login': login}).count()
The root of your problem is global variable usage.
I suggest the following changes:
from aiohttp import web
from motor.motor_asyncio import AsyncIOMotorClient
from routes import routes
def make_app(loop=None):
app = web.Application(loop=loop)
DBNAME = 'testdb'
mongo = AsyncIOMotorClient(io_loop=loop)
db = mongo[DBNAME]
app['db'] = db
async def cleanup(app):
mongo.close()
app.on_cleanup.append(cleanup)
for route in routes:
app.router.add_route(route.method, route.url, route.handler)
return app
register.py
async def register(request):
post_data = await request.post()
print('Gotta: ', post_data)
login, password = post_data['login'], post_data['password']
matches = await request.app['db'].users.find(
{'login': login}).count()
...
Pushing common-used objects into application's storage is an appreciated way for handling database connections etc.