A blocked Python async function invocation also block another async function - python

I use FastAPI to develope data layer APIs accessing SQL Server.
No mater using pytds or pyodbc,
if there is a database transaction caused any request hangs,
all the other requests would be blocked. (even without database operation)
Reproduce:
Intentaionally do a serializable SQL Server session, begin a transaction and do not rollback or commit
INSERT INTO [dbo].[KVStore] VALUES ('1', '1', 0)
begin tran
SET TRANSACTION ISOLATION LEVEL Serializable
SELECT * FROM [dbo].[KVStore]
Send a request to the API with async handler function like this:
def kv_delete_by_key_2_sql():
conn = pytds.connect(dsn='192.168.0.1', database=cfg.kvStore_db, user=cfg.kvStore_uid,
password=cfg.kvStore_upwd, port=1435, autocommit=True)
engine = conn.cursor()
try:
sql = "delete KVStore; commit"
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(engine.execute, sql)
rs = future.result()
j = {
'success': True,
'rowcount': rs.rowcount
}
return jsonable_encoder(j)
except Exception as exn:
j = {
'success': False,
'reason': exn_handle(exn)
}
return jsonable_encoder(j)
#app.post("/kvStore/delete")
async def kv_delete(request: Request, type_: Optional[str] = Query(None, max_length=50)):
request_data = await request.json()
return kv_delete_by_key_2_sql()
And send a request to the API of the same app with async handler function like this:
async def hangit0(request: Request, t: int = Query(0)):
print(t, datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3])
await asyncio.sleep(t)
print(t, datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3])
j = {
'success': True
}
return jsonable_encoder(j)
#app.get("/kvStore/hangit/")
async def hangit(request: Request, t: int = Query(0)):
return await hangit0(request, t)
I expected step.2 would hang and step.3 should directly return after 2 seconds.
However step.3 never return if the transaction doesn't commit or rollback...
How do I make these handler functions work concurrently?

The reason is that rs = future.result() is actually a blocking call - see python docs. Unfortunately, executor.submit() doesn't return an awaitable object (concurrent.futures.Future is different from asyncio.Future.
You can use asyncio.wrap_future which takes concurrent.futures.Future and returns asyncio.Future (see python docs). The new Future object is awaitable thus you can convert your blocking function into an async function.
An Example:
import asyncio
import concurrent.futures
async def my_async():
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(lambda x: x + 1, 1)
return await asyncio.wrap_future(future)
print(asyncio.run(my_async()))
In your code, simply change the rs = future.result() to rs = await asyncio.wrap_future(future) and make the whole function async. That should do the magic, good luck! :)

Related

How to map shuffled response to input using python asyncio aiohttp

I make asynchronous post requests to API using asyncio and aiohttp. I send parameter (X, Y) (float, float) to get list of data in response - let's call it scores. The data points coming in response is not in order it got sent, so I can not zip it on index to the input, which I can do using synchronous requests. I tried mapping input to response on parameter (X, Y), which is included in the response, but it gets rounded and decimal points get cut off on the API side. I have no way of finding out what is the exact API rounding mechanism. Also I can't round it before sending request.
Is there a way to somehow tag requests and send it as kind of passive attribute to be able to map responses back?
Or maybe there is other way to map input to response?
I am not sure if my code is needed, but here is a sample.
The scores response has to be matched to corresponding xy input.
btw. Yes I know that one response consists of 1000 xy. You will notice that if you read into _get_scores_async method. It is just the way API is built, that you can send 1000 xy.
import asyncio
import logging
from typing import Awaitable, Dict, List, Union
import aiohttp
import requests
import random
logger = logging.getLogger(__name__)
class APIWrapper:
base_urls = {
"prod": "https://apiprodlink.com/",
"stage": "https://apistagelink.com/",
}
_max_concurrent_connections = 20
def __init__(self, user: str, secret: str, env: str) -> None:
try:
self.base_url = self.base_urls[env]
except KeyError:
raise EnvironmentNotSupported(f"Environment {env} not supported.")
self._user = user
self._secret = secret
#property
def _headers(self) -> Dict:
"""Returns headers for requests"""
return {"Accept": "application/json"}
#property
def _client_session(self) -> aiohttp.ClientSession:
"""Returns aiohttp ClientSession"""
session = aiohttp.ClientSession(
auth=aiohttp.BasicAuth(self._user, self._secret), headers=self._headers
)
return session
async def _post_url_async(
self,
url: str,
session: aiohttp.ClientSession,
semaphore: asyncio.Semaphore,
**params,
) -> Awaitable:
"""Creates awaitable post request. To be awaited with async function.
Parameters
----------
url : str
post request will be done to this url
session : aiohttp.ClientSession
instance of ClientSession with auth and headers
semaphore : asyncio.Semaphore
Semaphore with defined max concurrent connections
Returns
-------
Awaitable
Coroutine object from response
"""
async with semaphore, session.post(url=url, json=params) as res:
res.raise_for_status()
response = await res.json()
return response
async def _get_scores_async(
self, xy: List[Tuple]
) -> Awaitable:
"""Creates coroutine of awaitable requests to scores endpoint
Parameters
----------
locations : List[Tuples]
Returns
-------
Awaitable
Coroutine of tasks to be run
"""
PER_REQUEST_LIMIT = 1000
semaphore = asyncio.Semaphore(self._max_concurrent_connections)
tasks = []
async with self._client_session as session:
for batch in range(0, len(xy), PER_REQUEST_LIMIT):
subset = xy[batch : batch + PER_REQUEST_LIMIT]
task = asyncio.create_task(
self._post_url_async(
f"{self.base_url}scores/endpoint",
session,
semaphore,
xy_param=subset,
)
)
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
def get_scores(self, xy: List[Tuple]) -> List[Dict]:
"""Get scores for given xy
Parameters
----------
locations : List[Tuple]
Returns
-------
List[Dict]
"""
response = asyncio.run(self._get_scores_async(xy))
return [x for batch in response for x in batch]
if __name__ == "__main__":
api_client = APIWrapper("user", "secret", "prod")
xy = [(random.uniform(1,100),random.uniform(1,100)) for i in range(0,500000)]
scores = api_client.get_scores(xy)

python asyncio asynchronously fetch data by key from a dict when the key becomes available

Like title told, my use case is like this:
I have one aiohttp server, which accept request from client, when i have the request i generate one unique request id for it, and then i send the {req_id: req_pyaload} dict to some workers (the worker is not in python thus running in another process), when the workers complete the work, i get back the response and put them in a result dict like this: {req_id_1: res_1, req_id_2: res_2}.
Then I want my aiohttp server handler to await on above result dict, so when the specific response become available (by req_id) it can send it back.
I build below example code to try to simulate the process, but got stuck in implementing the coroutine async def fetch_correct_res(req_id) which should asynchronously/unblockly fetch the correct response by req_id.
import random
import asyncio
import shortuuid
n_tests = 1000
idxs = list(range(n_tests))
req_ids = []
for _ in range(n_tests):
req_ids.append(shortuuid.uuid())
res_dict = {}
async def fetch_correct_res(req_id):
pass
async def handler(req):
res = await fetch_correct_res(req)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict():
for _ in range(n_tests):
random_idx = random.choice(idxs)
await asyncio.sleep(random_idx / 1000)
res_dict[req_ids[random_idx]] = req_ids[random_idx]
print("req: {} is back".format(req_ids[random_idx]))
So:
Is it possible to make this solution work? how?
If above solution is not possible, what should be the correct solution for this use case with asyncio?
Many thanks.
The only approach i can think of for now to make this work is: pre-created some asyncio.Queue with pre-assigned id, then for each incoming request assign one queue to it, so the handler just await on this queue, when the response come back i put it into this pre-assigned queue only, after the request fulfilled, i collect back the queue to use it for next incoming request. Not very elegant, but will solve the problem.
See if the below sample implementation fulfils your need
basically you want to respond back to the request(id) with your response(unable to predict the order) in an asynchronous way
So at the time of request handling, populate the dict with {request_id: {'event':<async.Event>, 'result': <result>}} and await on asyncio.Event.wait(), once the response is received, signal the event with asyncio.Event.set() which will release the await and then fetch the response from the dict based on the request id
I modified your code slightly to pre-populate the dict with request id and put the await on asyncio.Event.wait() until the signal comes from the response
import random
import asyncio
import shortuuid
n_tests = 10
idxs = list(range(n_tests))
req_ids = []
for _ in range(n_tests):
req_ids.append(shortuuid.uuid())
res_dict = {}
async def fetch_correct_res(req_id, event):
await event.wait()
res = res_dict[req_id]['result']
return res
async def handler(req, loop):
print("incoming request id: {}".format(req))
event = asyncio.Event()
data = {req :{}}
res_dict.update(data)
res_dict[req]['event']=event
res_dict[req]['result']='pending'
res = await fetch_correct_res(req, event)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict():
random.shuffle(req_ids)
for i in req_ids:
await asyncio.sleep(random.randrange(2,4))
print("req: {} is back".format(i))
if res_dict.get(i) is not None:
event = res_dict[i]['event']
res_dict[i]['result'] = i
event.set()
loop = asyncio.get_event_loop()
tasks = asyncio.gather(handler(req_ids[0], loop),
handler(req_ids[1], loop),
handler(req_ids[2], loop),
handler(req_ids[3], loop),
randomly_put_res_to_res_dict())
loop.run_until_complete(tasks)
loop.close()
sample response from the above code
incoming request id: NDhvBPqMiRbteFD5WqiLFE
incoming request id: fpmk8yC3iQcgHAJBKqe2zh
incoming request id: M7eX7qeVQfWCCBnP4FbRtK
incoming request id: v2hAfcCEhRPUDUjCabk45N
req: VeyvAEX7YGgRZDHqa2UGYc is back
req: M7eX7qeVQfWCCBnP4FbRtK is back
got correct res for req: M7eX7qeVQfWCCBnP4FbRtK
req: pVvYoyAzvK8VYaHfrFA9SB is back
req: soP8NDxeQKYjgeT7pa3wtG is back
req: j3rcg5Lp59pQXuvdjCAyZe is back
req: NDhvBPqMiRbteFD5WqiLFE is back
got correct res for req: NDhvBPqMiRbteFD5WqiLFE
req: v2hAfcCEhRPUDUjCabk45N is back
got correct res for req: v2hAfcCEhRPUDUjCabk45N
req: porzHqMqV8SAuttteHRwNL is back
req: trVVqZrUpsW3tfjQajJfb7 is back
req: fpmk8yC3iQcgHAJBKqe2zh is back
got correct res for req: fpmk8yC3iQcgHAJBKqe2zh
This may work (note: I removed UUID in order to know req id in advance)
import random
import asyncio
n_tests = 1000
idxs = list(range(n_tests))
req_ids = []
for i in range(n_tests):
req_ids.append(i)
res_dict = {}
async def fetch_correct_res(req_id):
while not res_dict.get(req_id):
await asyncio.sleep(0.1)
return req_ids[req_id]
async def handler(req):
print("fetching req: ", req)
res = await fetch_correct_res(req)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict(future):
for i in range(n_tests):
res_dict[req_ids[i]] = req_ids[i]
await asyncio.sleep(0.5)
print("req: {} is back".format(req_ids[i]))
future.set_result("done")
loop = asyncio.get_event_loop()
future = asyncio.Future()
asyncio.ensure_future(randomly_put_res_to_res_dict(future))
loop.run_until_complete(handler(10))
loop.close()
Is it the best solution? according to me No, basically its kind of requesting long running job status, and you should have (REST) api for doing the job submission and knowing job status like:
http POST server:port/job
{some job json paylod}
Response: 200 OK {"req_id": 1}
http GET server:port/job/1
Response: 200 OK {"req_id": 1, "status": "in process"}
http GET server:port/job/1
Response: 200 OK {"req_id": 1, "status": "done", "result":{}}

Asyncio and aiohttp returning task instead of results

I have a script to run parallel requests against an API within a class. However, the results I'm getting is basically a task instead of the actual results. Any reason why?
I mimicked the modified Client code on https://pawelmhm.github.io/asyncio/python/aiohttp/2016/04/22/asyncio-aiohttp.html.
import asyncio
from aiohttp import ClientSession
class Requestor:
async def _async_request(self, url, session, sema_sz=10):
sema = asyncio.Semaphore(sema_sz)
async with sema:
async with session.get(url) as response:
req = await response.json()
return req
async def _async_chunk_request(self, url, chunks, headers=None, sema_sz=10):
async with ClientSession(headers=headers) as session:
futures = [asyncio.ensure_future(self._async_request(url.format(chunk), session, sema_sz)) for chunk in chunks]
responses = asyncio.gather(*futures)
await responses
def get_request(self, url, chunks):
loop = asyncio.get_event_loop()
bulk_req = asyncio.ensure_future(self._async_chunk_request(url, chunks))
loop.run_until_complete(bulk_req)
return bulk_req
bulk_req is actually a Task variable and not the results and shows this in PyCharm, Task finished coro=<Requestor._async_chunk_request() done, defined at ...
When I debug, I see that req has a full and proper response value, so there's no issue with that. I feel like it's something to do with the actual gathering of the futures?
Your _chunk_request does not return anything.
async def _chunk_request(...):
...
...
await responses
I made a toy example trying to mimic your process. If I ended _chunk_request like you did, i got the same result - a finished Task with No results. Changing _chunk_request to return something fixed it:
async def _chunk_request(...):
...
...
return await responses
If you only need the return values from the tasks, get_request should return the result of the loop.run_until_complete() call.
My toy example
import asyncio
import random
from pprint import pprint
async def response(n):
asyncio.sleep(random.choice([1,3,5]))
return f'i am {n}'
async def _request(n):
req = await response(n)
#print(req)
return req
async def _chunk_request(chunks):
futures = [asyncio.ensure_future(_request(chunk)) for chunk in chunks]
#pprint(futures)
responses = asyncio.gather(*futures, return_exceptions=True)
#pprint(responses)
return await responses
def get_request(chunks):
loop = asyncio.get_event_loop()
bulk_req = asyncio.ensure_future(_chunk_request(chunks))
return loop.run_until_complete(bulk_req)
In [7]: result = get_request(range(1,6))
In [8]: print(result)
['i am 1', 'i am 2', 'i am 3', 'i am 4', 'i am 5']

Requests/aiohttp: closing response objects

I'm a bit confused about the need to .close() a response object in both requests and aiohttp. (Note that this is a separate instance method than session.close()--I'm talking about the response object itself.)
Does Response (requests) or ClientResponse (aiohttp) ever need explicitly call .close()?
If not, what is the purpose of using the response itself as a context manager? (async with session.request('GET', 'https://www.pastebin.com') below.) Why define the two dunder methods for this if it gets closed implicitly as shown below?
Some simple tests (below) seem to imply that responses are closed automatically when they are defined inside of a Session context manager. (Which itself calls self.close() in __exit__ or __aexit__. But this is the closing of the Session, not the Response object.)
Example - requests
>>> import requests
>>>
>>> with requests.Session() as s:
... resp = s.request('GET', 'https://www.pastebin.com')
... resp.raise_for_status()
... print(resp.raw.closed) # `raw` is urllib3.response.HTTPResponse object
... print(resp.raw._pool)
... print(resp.raw._connection)
... c = resp.text
...
True
HTTPSConnectionPool(host='pastebin.com', port=443)
None
>>>
>>> while 1:
... print(resp.raw.closed)
... print(resp.raw._pool)
... print(resp.raw._connection)
... break
...
True
HTTPSConnectionPool(host='pastebin.com', port=443)
None
Example - aiohttp
>>> import asyncio
>>> import aiohttp
>>>
>>> async def get():
... async with aiohttp.ClientSession() as s:
... # The response is already closed after this `with` block.
... # Why would it need to be used as a context manager?
... resp = await s.request('GET', 'https://www.pastebin.com')
... print(resp._closed)
... print(resp._connection)
... print(resp._released)
... c = await resp.text()
... print()
... print(resp._closed)
... print(resp._connection)
... print(resp._released)
... return c
...
>>> c = asyncio.run(get()) # Python 3.7 +
False
Connection<ConnectionKey(host='pastebin.com', port=443, is_ssl=True, ssl=None, proxy=None, proxy_auth=None, proxy_headers_hash=None)>
False
True
None
False
Here's the source for requests.models.Response. What does "Should not normally need to be called explicitly" mean? What are the exceptions?
def close(self):
"""Releases the connection back to the pool. Once this method has been
called the underlying ``raw`` object must not be accessed again.
*Note: Should not normally need to be called explicitly.*
"""
if not self._content_consumed:
self.raw.close()
release_conn = getattr(self.raw, 'release_conn', None)
if release_conn is not None:
release_conn()
Requests: You need not explicitly call close(). request will automatically close after finished because it bases on urlopen (this is why resp.raw.closed is True), This is the simplified code after i watched session.py and adapters.py:
from urllib3 import PoolManager
import time
manager = PoolManager(10)
conn = manager.connection_from_host('host1.example.com')
conn2 = manager.connection_from_host('host2.example.com')
res = conn.urlopen(url="http://host1.example.com/",method="get")
print(len(manager.pools))
manager.clear()
print(len(manager.pools))
print(res.closed)
#2
#0
#True
Then what did the __exit__ do? It uses to clear PoolManager(self.poolmanager=PoolManager(...)) and proxy.
# session.py
def __exit__(self, *args): #line 423
self.close()
def close(self): #line 733
for v in self.adapters.values():
v.close()
# adapters.py
# v.close()
def close(self): #line 307
self.poolmanager.clear()
for proxy in self.proxy_manager.values():
proxy.clear()
So when should you need to use close() , as the note said Releases the connection back to the pool, because DEFAULT_POOLSIZE = 10(http/https are independent). That means if you want to access more than 10 website with one session , you can chose to close some you do not need otherwise manager will close connection from the first to the newest when you have one more. But actually you need not to care about this , you can specify pool size and it would not waste much time to rebuild connection
aiohttp aiohttp.ClientSession() is using one TCPConnector for all requests. When it triggered __aexit__ , self._connector will be closed.
Edit: s.request() is set up a connection from host but it did not get response. await resp.text() can only be done after got response, if you did not do such step(wait for response), you will exit without having response.
if connector is None: #line 132
connector = TCPConnector(loop=loop)
...
self._connector = connector #line 151
# connection timeout
try:
with CeilTimeout(real_timeout.connect,loop=self._loop):
assert self._connector is not None
conn = await self._connector.connect(
req,
traces=traces,
timeout=real_timeout
)
...
async def close(self) -> None:
if not self.closed:
if self._connector is not None and self._connector_owner:
self._connector.close()
self._connector = None
...
async def __aexit__(self,
...) -> None:
await self.close()
This is code to show what i said
import asyncio
import aiohttp
import time
async def get():
async with aiohttp.ClientSession() as s:
# The response is already closed after this `with` block.
# Why would it need to be used as a context manager?
resp = await s.request('GET', 'https://www.stackoverflow.com')
resp2 = await s.request('GET', 'https://www.github.com')
print("resp:",resp._closed)
print("resp:",resp._connection)
print("resp2:",resp2._closed)
print("resp2:",resp2._connection)
s.close()
print(s.closed)
c = await resp.text()
d = await resp2.text()
print()
print(s._connector)
print("resp:",resp._closed)
print("resp:",resp._connection)
print("resp2:",resp2._closed)
print("resp2:",resp2._connection)
loop = asyncio.get_event_loop()
loop.run_until_complete(get()) # Python 3.5 +
#dead loop

Graphene run all resolvers in a context manager

Aiohttp provides a context manager to create client session. It's recommended to use one session per many http queries ( in most cases per application ) https://aiohttp.readthedocs.io/en/stable/client_quickstart.html#make-a-request
But graphene uses resolvers needs to be declared as a class method:
http://docs.graphene-python.org/en/latest/execution/execute/
For graphene also exists asyncio executor https://github.com/graphql-python/aiohttp-graphql
Is there any way to execute all resolvers in async with context?
Example:
async def get_task(session, api_url, id):
""" Function to resolve task from rest API"""
async with session.get(api_url+id) as response:
return await response.json()
class Query(graphene.ObjectType):
task = graphene.Field(Task)
async def resolve_task(self, info, session, id=1):
"""This method needs to be run
in async with aiohttp.ClientSession() as session:
context"""
return await get_task(session, url, id)
I think about decorator or middleware with global variable, but it looks ugly. Is there more estate and pythonic way to do it?
I would use context for that. See https://docs.graphene-python.org/en/latest/execution/execute/
Example:
import aiohttp
import asyncio
import graphene
from graphql.execution.executors.asyncio import AsyncioExecutor
from pprint import pprint
async def get_task(session, api_url, id):
async with session.get(api_url + str(id)) as response:
print(f'> Retrieving {id} using session {session}')
return await response.json()
class Query(graphene.ObjectType):
task = graphene.Field(
type=graphene.types.json.JSONString,
id=graphene.Int())
async def resolve_task(self, info, id=1):
return await get_task(
session=info.context['session'],
api_url=info.context['api_url'],
id=id)
schema = graphene.Schema(query=Query)
async def main():
query = '''
query q1 {
t1: task(id: 1)
t2: task(id: 2)
}
'''
async with aiohttp.ClientSession() as session:
res = await schema.execute(
query,
context={
'session': session,
'api_url': 'https://jsonplaceholder.typicode.com/todos/',
},
executor=AsyncioExecutor(loop=asyncio.get_running_loop()),
return_promise=True)
assert not res.errors, repr(res.errors)
pprint(res.data, width=150)
if __name__ == '__main__':
asyncio.run(main())
Output:
$ python3 example.py
> Retrieving 2 using session <aiohttp.client.ClientSession object at 0x10917bfd0>
> Retrieving 1 using session <aiohttp.client.ClientSession object at 0x10917bfd0>
OrderedDict([('t1', '{"userId": 1, "id": 1, "title": "delectus aut autem", "completed": false}'),
('t2', '{"userId": 1, "id": 2, "title": "quis ut nam facilis et officia qui", "completed": false}')])

Categories