So I'm trying to port a code to work with Asyncio.
My problem are the #propertys, which - if not yet cached - need a network request to complete.
While I can await them without a problem in most contexts using that property in the __str__ method fails which is kinda expected.
What is the most pythonic way to solve this, other than caching the value on instance creation or just not using Asyncio.
Here is a example code illustrating the issue:
import requests
from typing import Union
class Example(object):
_username: Union[str, None]
#property
def username(self):
if not _username:
r = request.get('https://api.example.com/getMe')
self._username = r.json()['username']
return self._username
def __str__(self):
return f"Example(username={self.username!r})"
e = Example()
str(e)
So far I could come up with the following.
I could substitute #property with #async_property (
PyPi, Github, Docs),
and requests with httpx (PyPi, Github, Docs).
However it is still not working.
My attempt at porting it to Asyncio:
import httpx
from typing import Union
from async_property import async_property
class Example(object):
_username: Union[str, None]
#async_property
async def username(self):
if not _username:
async with httpx.AsyncClient() as client:
r = await client.get('https://www.example.org/')
self._username = r.json()['username']
return self._username
async def __str__(self):
return f"Example(username={await self.username!r})"
e = Example()
str(e)
Fails with TypeError: __str__ returned non-string (type coroutine).
Related
To start, I'm working with aiosmtpd, and am trying to write a class that wraps around it to start up the SMTP server programmatically with StartTLS. Now, up until very recently, this code worked as expected with any handler you might pass into it, such as a basic Message handler that I wrote to adjust parameters of the message, etc. and passing that in as part of the message headers.
import asyncio
import aiosmtpd
import aiosmtpd.controller
import aiosmtpd.handlers
import aiosmtpd.smtp
import email
import regex
import logging
import ssl
EMPTYBYTES = b''
COMMASPACE = ', '
CRLF = b'\r\n'
NLCRE = regex.compile(br'\r\n|\r|\n')
class StartTLSServer(aiosmtpd.controller.Controller):
def __init__(self, handler, ssl_cert_file, ssl_key_file, loop=None, hostname=None,
port=8025, *, ready_timeout=1.0, enable_SMTPUTF8=True, decode_data=False,
require_starttls=True, smtp_ident=None, data_size_limit=10485760,
smtp_timeout=300):
context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
context.load_cert_chain(ssl_cert_file, ssl_key_file)
self.tls_context = context
self.require_starttls = require_starttls
self.enable_SMTPUTF8 = enable_SMTPUTF8
self.decode_data = decode_data
self.smtp_ident = smtp_ident
self.data_size_limit = data_size_limit
self.smtp_timeout = smtp_timeout
super().__init__(handler, loop=loop, hostname=hostname, port=port,
ready_timeout=ready_timeout, enable_SMTPUTF8=enable_SMTPUTF8)
def factory(self):
return aiosmtpd.smtp.SMTP(self.handler, data_size_limit=self.data_size_limit,
enable_SMTPUTF8=self.enable_SMTPUTF8,
decode_data=self.decode_data,
require_starttls=self.require_starttls,
hostname=self.smtp_ident,
ident=self.smtp_ident,
tls_context=self.tls_context,
timeout=self.smtp_timeout)
class MessageHandler(aiosmtpd.handlers.Message):
def __init__(self, message_class=None, *, loop=None):
super().__init__(message_class)
self.loop = loop or asyncio.get_event_loop()
async def handle_DATA(self, server, session, envelope):
message = self.prepare_message(session, envelope)
await self.handle_message(message)
return '250 OK'
def prepare_message(self, session, envelope):
# If the server was created with decode_data True, then data will be a
# str, otherwise it will be bytes.
data = envelope.content
if isinstance(data, bytes):
message = email.message_from_bytes(data, self.message_class)
else:
assert isinstance(data, str), (
'Expected str or bytes, got {}'.format(type(data)))
message = email.message_from_string(data, self.message_class)
message['X-Peer'] = str(session.peer)
message['X-Envelope-MailFrom'] = envelope.mail_from
message['X-Envelope-RcptTo'] = COMMASPACE.join(envelope.rcpt_tos)
return message # This is handed off to handle_message directly.
async def handle_message(self, message):
print(message.as_string())
return
This resides in custom_handlers.py which is then subsequently called in testing via the Python console as follows:
>>> from custom_handlers import StartTLSServer, MessageHandler
>>> server = StartTLSServer(MessageHandler, ssl_cert_file="valid cert path", ssl_key_file="valid key path", hostname="0.0.0.0", port=25, require_starttls=True, smtp_ident="StartTLSSMTPServer01")
>>> server.start()
When I want to stop the test server, I'll simply do a server.stop() however during processing of any message, we get hard-stopped by this evil error:
Traceback (most recent call last):
File "/home/sysadmin/.local/lib/python3.8/site-packages/aiosmtpd/smtp.py", line 728, in _handle_client
await method(arg)
File "/home/sysadmin/.local/lib/python3.8/site-packages/aiosmtpd/smtp.py", line 1438, in smtp_DATA
status = await self._call_handler_hook('DATA')
File "/home/sysadmin/.local/lib/python3.8/site-packages/aiosmtpd/smtp.py", line 465, in _call_handler_hook
status = await hook(self, self.session, self.envelope, *args)
TypeError: handle_DATA() missing 1 required positional argument: 'envelope'
Now, I can replicate this with ANY handler passed into the SMTP factory.
However, I can't replicate this with a plain aiosmtpd with a Debugging handler, like defined in the docs:
aiosmtpd -c aiosmtpd.handlers.Debugging stdout -l 0.0.0.0:8025
... which works fine. Passing the Debugging handler into the StartTLSServer causes the same error as the custom MessageHandler class, even with the Debugging handler.
Am I missing something obvious here about my class that's exploding here in a way that is different to the programmatic usage as expected by aiosmtpd?
You are missing () in order to instantiate an object of your MessageHandler class:
>>> server = StartTLSServer(MessageHandler(), ...)
When you simply pass MessageHandler without the (), aiosmtpd will try to invoke the regular function MessageHandler.handle_DATA(...) (as opposed to the bound method function MessageHandler().handle_DATA(...)).
This regular function takes four arguments: an instance of MessageHandler as its first argument followed by the usual server, session and envelope arguments. This explains why the error message complains about a missing positional argument.
PS, do note that your handle_DATA implementation is superfluous since it is identical to the implementation in the base class aiosmtpd.handlers.Message - so you can just delete it, and it should still work just fine.
The goal
I want to make regular HTTP requests to a REST API. The requests happen at an interval ranging from a few seconds up to multiple hours, depending on the user input. I want to keep a single connection alive and close it in a smart way.
The questions
Is there an existing library that provides features similar to what i wrote with the class SessionOneToN?
Would it be possible to use a single session of type aiohttp.ClientSession that runs forever and is never closed?
What i have tried
What i have tried does work, but i wonder if there is an established library that can achieve the goals.
My code
My application uses the event loop module asyncio, and the HTTP module aiohttp.
My application opens a single session of type aiohttp.ClientSession and shares it between multiple asynchronous callers.
The callers can make their requests concurrently and asynchronously.
Whenever all callers have received their responses at the same time, the aiohttp.ClientSession is closed. A new aiohttp.ClientSession is opened as necessary when a caller makes a new request.
import aiohttp
import asyncio
from contextlib import asynccontextmanager
import sys
url = 'http://stackoverflow.com/questions/64305548/sharing-an-aiohttp-clientsession-between-multiple-asynchronous-callers'
# The callers look like so
async def request():
async with session() as s:
async with s.get(url) as response:
return await response.text()
# The session manager looks like so:
class SessionOneToN:
""" Manage one session that is reused by multiple clients, instantiate a new
session object from the given session_class as required. Provide a context
manager for accessing the session. The session_class returns a context
manager when instantiated, this context manager is referred to as the
internal context manager, and is entered when the first
client enters the context manager returned by context_manager, and is exited
when the last client exits context_manager."""
def __init__(self, session_class):
self.n = 0
self.session_class = session_class
self.context = None
self.aenter_result = None
async def plus(self):
self.n += 1
if self.n == 1:
# self.context: The internal context manager
self.context = context = self.session_class()
# self.aenter_result: The result from entering the internal context
# manager
self.aenter_result = await context.__aenter__()
assert self.aenter_result is not None
return self.aenter_result
def minus(self):
self.n -= 1
if self.n == 0:
return True
def cleanup(self):
self.context = None
self.aenter_result = None
#asynccontextmanager
async def get_context(self):
try:
aenter_result = await self.plus()
yield aenter_result
except:
if self.minus():
if not await self.context.__aexit__(*sys.exc_info()):
self.cleanup()
raise
self.cleanup()
else:
if self.minus():
await self.context.__aexit__(None, None, None)
self.cleanup()
class SessionOneToNaiohttp:
def __init__(self):
self.sotn = SessionOneToN(aiohttp.ClientSession)
def get_context(self):
return self.sotn.get_context()
sotnaiohttp = SessionOneToNaiohttp()
def session():
return sotnaiohttp.get_context()
response_text = asyncio.run(request())
print(response_text[0:10])
I have web-crawler and http interface for it.
Crawler gets grouped urls as dictionary. I need to return a result in the same format in JSON. But I was faced with a large memory usage, which is not returned to the operating system. How can I implement this solution without large memory usage?
Code:
#!/usr/bin/env python
# coding=utf-8
import collections
import tornado.web
import tornado.ioloop
import tornado.queues
import tornado.httpclient
class ResponseError(Exception):
pass
class Crawler(object):
client = tornado.httpclient.AsyncHTTPClient()
def __init__(self, groups, concurrency=10, retries=3, validators=None):
self.groups = groups
self.concurrency = concurrency
self.retries = retries
self.validators = validators or []
self.requests = tornado.queues.Queue()
self.responses = collections.defaultdict(list)
async def worker(self):
while True:
await self.consume()
async def validate(self, response):
for validator in self.validators:
validator(response)
async def save(self, response):
self.responses[response.request.group].append(response.body.decode('utf-8'))
async def consume(self):
async for request in self.requests:
try:
response = await self.client.fetch(request, raise_error=False)
await self.validate(response)
await self.save(response)
except ResponseError:
if request.retries < self.retries:
request.retries += 1
await self.requests.put(request)
finally:
self.requests.task_done()
async def produce(self):
for group, urls in self.groups.items():
for url in urls:
request = tornado.httpclient.HTTPRequest(url)
request.group = group
request.retries = 0
await self.requests.put(request)
async def fetch(self):
await self.produce()
for __ in range(self.concurrency):
tornado.ioloop.IOLoop.current().spawn_callback(self.worker)
await self.requests.join()
class MainHandler(tornado.web.RequestHandler):
async def get(self):
urls = []
with open('urls') as f: # mock
for line in f:
urls.append(line.strip())
crawler = Crawler({'default': urls})
await crawler.fetch()
self.write(crawler.responses)
if __name__ == '__main__':
app = tornado.web.Application(
(tornado.web.url(r'/', MainHandler),), debug=True
)
app.listen(8000)
tornado.ioloop.IOLoop.current().start()
It looks to me like most of the memory usage is devoted to self.responses. Since you seem to be ordering responses by "group" before writing them to a file, I can understand why you do it this way. One idea is to store them in a database (MySQL or MongoDB or whatever) with the "group" as column or field value in the database record.
The database might be the final destination of your data, or else it might be a temporary place to store the data until crawler.fetch completes. Then, query all the data from the database, ordered by "group", and write it to the file.
This doesn't solve the problem, it just means that the database process is responsible for most of your memory usage, instead of the Python process. This may be preferable for you, however.
I need to call a celery task for each GRPC request, and return the result.
In default GRPC implementation, each request is processed in a separate thread from a threadpool.
In my case, the server is supposed to process ~400 requests in batch mode per second. So one request may have to wait 1 second for the result due to the batch processing, which means the size of the threadpool must be larger than 400 to avoid blocking.
Can this be done asynchronously?
Thanks a lot.
class EventReporting(ss_pb2.BetaEventReportingServicer, ss_pb2.BetaDeviceMgtServicer):
def ReportEvent(self, request, context):
res = tasks.add.delay(1,2)
result = res.get() ->here i have to block
return ss_pb2.GeneralReply(message='Hello, %s!' % result.message)
As noted by #Michael in a comment, as of version 1.32, gRPC now supports asyncio in its Python API. If you're using an earlier version, you can still use the asyncio API via the experimental API: from grpc.experimental import aio. An asyncio hello world example has also been added to the gRPC repo. The following code is a copy of the example server:
import logging
import asyncio
from grpc import aio
import helloworld_pb2
import helloworld_pb2_grpc
class Greeter(helloworld_pb2_grpc.GreeterServicer):
async def SayHello(self, request, context):
return helloworld_pb2.HelloReply(message='Hello, %s!' % request.name)
async def serve():
server = aio.server()
helloworld_pb2_grpc.add_GreeterServicer_to_server(Greeter(), server)
listen_addr = '[::]:50051'
server.add_insecure_port(listen_addr)
logging.info("Starting server on %s", listen_addr)
await server.start()
await server.wait_for_termination()
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
asyncio.run(serve())
See my other answer for how to implement the client.
It can be done asynchronously if your call to res.get can be done asynchronously (if it is defined with the async keyword).
While grpc.server says it requires a futures.ThreadPoolExecutor, it will actually work with any futures.Executor that calls the behaviors submitted to it on some thread other than the one on which they were passed. Were you to pass to grpc.server a futures.Executor implemented by you that only used one thread to carry out four hundred (or more) concurrent calls to EventReporting.ReportEvent, your server should avoid the kind of blocking that you describe.
In my opinion is good simple implementation async grpc server, same like http based on aiohttp.
import asyncio
from concurrent import futures
import functools
import inspect
import threading
from grpc import _server
def _loop_mgr(loop: asyncio.AbstractEventLoop):
asyncio.set_event_loop(loop)
loop.run_forever()
# If we reach here, the loop was stopped.
# We should gather any remaining tasks and finish them.
pending = asyncio.Task.all_tasks(loop=loop)
if pending:
loop.run_until_complete(asyncio.gather(*pending))
class AsyncioExecutor(futures.Executor):
def __init__(self, *, loop=None):
super().__init__()
self._shutdown = False
self._loop = loop or asyncio.get_event_loop()
self._thread = threading.Thread(target=_loop_mgr, args=(self._loop,),
daemon=True)
self._thread.start()
def submit(self, fn, *args, **kwargs):
if self._shutdown:
raise RuntimeError('Cannot schedule new futures after shutdown')
if not self._loop.is_running():
raise RuntimeError("Loop must be started before any function can "
"be submitted")
if inspect.iscoroutinefunction(fn):
coro = fn(*args, **kwargs)
return asyncio.run_coroutine_threadsafe(coro, self._loop)
else:
func = functools.partial(fn, *args, **kwargs)
return self._loop.run_in_executor(None, func)
def shutdown(self, wait=True):
self._loop.stop()
self._shutdown = True
if wait:
self._thread.join()
# --------------------------------------------------------------------------- #
async def _call_behavior(rpc_event, state, behavior, argument, request_deserializer):
context = _server._Context(rpc_event, state, request_deserializer)
try:
return await behavior(argument, context), True
except Exception as e: # pylint: disable=broad-except
with state.condition:
if e not in state.rpc_errors:
details = 'Exception calling application: {}'.format(e)
_server.logging.exception(details)
_server._abort(state, rpc_event.operation_call,
_server.cygrpc.StatusCode.unknown, _server._common.encode(details))
return None, False
async def _take_response_from_response_iterator(rpc_event, state, response_iterator):
try:
return await response_iterator.__anext__(), True
except StopAsyncIteration:
return None, True
except Exception as e: # pylint: disable=broad-except
with state.condition:
if e not in state.rpc_errors:
details = 'Exception iterating responses: {}'.format(e)
_server.logging.exception(details)
_server._abort(state, rpc_event.operation_call,
_server.cygrpc.StatusCode.unknown, _server._common.encode(details))
return None, False
async def _unary_response_in_pool(rpc_event, state, behavior, argument_thunk,
request_deserializer, response_serializer):
argument = argument_thunk()
if argument is not None:
response, proceed = await _call_behavior(rpc_event, state, behavior,
argument, request_deserializer)
if proceed:
serialized_response = _server._serialize_response(
rpc_event, state, response, response_serializer)
if serialized_response is not None:
_server._status(rpc_event, state, serialized_response)
async def _stream_response_in_pool(rpc_event, state, behavior, argument_thunk,
request_deserializer, response_serializer):
argument = argument_thunk()
if argument is not None:
# Notice this calls the normal `_call_behavior` not the awaitable version.
response_iterator, proceed = _server._call_behavior(
rpc_event, state, behavior, argument, request_deserializer)
if proceed:
while True:
response, proceed = await _take_response_from_response_iterator(
rpc_event, state, response_iterator)
if proceed:
if response is None:
_server._status(rpc_event, state, None)
break
else:
serialized_response = _server._serialize_response(
rpc_event, state, response, response_serializer)
print(response)
if serialized_response is not None:
print("Serialized Correctly")
proceed = _server._send_response(rpc_event, state,
serialized_response)
if not proceed:
break
else:
break
else:
break
_server._unary_response_in_pool = _unary_response_in_pool
_server._stream_response_in_pool = _stream_response_in_pool
if __name__ == '__main__':
server = grpc.server(AsyncioExecutor())
# Add Servicer and Start Server Here
link to original:
https://gist.github.com/seglberg/0b4487b57b4fd425c56ad72aba9971be
Hi
I know this is a common issue for user who is not familiar with async method....
i want to query db with user id using asyncmongo to check if the user log on, but obviously this doesn't work and i don't want to use self.render in call back.
Thanks for your help.
class MainPage(BaseHandler):
def get(self):
if not self.current_user:
#### get no result here
.............
get_current_user function using asyncmongo method:
def get_current_user(self):
user_id = self.get_secure_cookie("user")
if not user_id: return None
self.db.users.find({'user_id': bson.ObjectId(str(user_id))}, limit=1, callback=self._on_response)
def _on_response(self, response, error):
if error:
raise tornado.web.HTTPError(500)
how to return the value of response instead of self.render('template',response) ?
see Tornado Asynchronous Handler
def _on_response(self, response, error):
if error:
raise tornado.web.HTTPError(500)
self.render(str(response))