How to map shuffled response to input using python asyncio aiohttp - python

I make asynchronous post requests to API using asyncio and aiohttp. I send parameter (X, Y) (float, float) to get list of data in response - let's call it scores. The data points coming in response is not in order it got sent, so I can not zip it on index to the input, which I can do using synchronous requests. I tried mapping input to response on parameter (X, Y), which is included in the response, but it gets rounded and decimal points get cut off on the API side. I have no way of finding out what is the exact API rounding mechanism. Also I can't round it before sending request.
Is there a way to somehow tag requests and send it as kind of passive attribute to be able to map responses back?
Or maybe there is other way to map input to response?
I am not sure if my code is needed, but here is a sample.
The scores response has to be matched to corresponding xy input.
btw. Yes I know that one response consists of 1000 xy. You will notice that if you read into _get_scores_async method. It is just the way API is built, that you can send 1000 xy.
import asyncio
import logging
from typing import Awaitable, Dict, List, Union
import aiohttp
import requests
import random
logger = logging.getLogger(__name__)
class APIWrapper:
base_urls = {
"prod": "https://apiprodlink.com/",
"stage": "https://apistagelink.com/",
}
_max_concurrent_connections = 20
def __init__(self, user: str, secret: str, env: str) -> None:
try:
self.base_url = self.base_urls[env]
except KeyError:
raise EnvironmentNotSupported(f"Environment {env} not supported.")
self._user = user
self._secret = secret
#property
def _headers(self) -> Dict:
"""Returns headers for requests"""
return {"Accept": "application/json"}
#property
def _client_session(self) -> aiohttp.ClientSession:
"""Returns aiohttp ClientSession"""
session = aiohttp.ClientSession(
auth=aiohttp.BasicAuth(self._user, self._secret), headers=self._headers
)
return session
async def _post_url_async(
self,
url: str,
session: aiohttp.ClientSession,
semaphore: asyncio.Semaphore,
**params,
) -> Awaitable:
"""Creates awaitable post request. To be awaited with async function.
Parameters
----------
url : str
post request will be done to this url
session : aiohttp.ClientSession
instance of ClientSession with auth and headers
semaphore : asyncio.Semaphore
Semaphore with defined max concurrent connections
Returns
-------
Awaitable
Coroutine object from response
"""
async with semaphore, session.post(url=url, json=params) as res:
res.raise_for_status()
response = await res.json()
return response
async def _get_scores_async(
self, xy: List[Tuple]
) -> Awaitable:
"""Creates coroutine of awaitable requests to scores endpoint
Parameters
----------
locations : List[Tuples]
Returns
-------
Awaitable
Coroutine of tasks to be run
"""
PER_REQUEST_LIMIT = 1000
semaphore = asyncio.Semaphore(self._max_concurrent_connections)
tasks = []
async with self._client_session as session:
for batch in range(0, len(xy), PER_REQUEST_LIMIT):
subset = xy[batch : batch + PER_REQUEST_LIMIT]
task = asyncio.create_task(
self._post_url_async(
f"{self.base_url}scores/endpoint",
session,
semaphore,
xy_param=subset,
)
)
tasks.append(task)
responses = await asyncio.gather(*tasks)
return responses
def get_scores(self, xy: List[Tuple]) -> List[Dict]:
"""Get scores for given xy
Parameters
----------
locations : List[Tuple]
Returns
-------
List[Dict]
"""
response = asyncio.run(self._get_scores_async(xy))
return [x for batch in response for x in batch]
if __name__ == "__main__":
api_client = APIWrapper("user", "secret", "prod")
xy = [(random.uniform(1,100),random.uniform(1,100)) for i in range(0,500000)]
scores = api_client.get_scores(xy)

Related

Problem with getting data from redis using aioredis

I am using redis to store tokens and their vector representation in database. First I convert vector as list of floats to str and then, getting it back, convert from str to list of floats, however "AttributeError: 'int' object has no attribute 'strip'" occurs
import aioredis #aioredis==1.3.1
app = FastAPI()
redis = None
#app.on_event('startup')
async def startup_event():
global redis
redis = await aioredis.create_redis(address=('redis', 6379))
#app.on_event('shutdown')
async def shutdown_event():
redis.close()
await redis.wait_closed()
#app.get("/vectorize_token")
async def vectorize_token(
token: str = Query("python", max_length=250),
model_name: ModelSelection = ModelSelection.BERT,
):
# get cache from memory
cache = await redis.get(token)
# check value from cache, if exists return it
if cache is not None:
vector = [float(char.strip('[,]')) for char in cache]
return {'query':token, 'vector':vector}
vector = model_mapping[model_name.value].vectorize_token(token)
response = QueryResponse(query=token, vector=vector)
# save cache in memory
await redis.set(token, str(vector))
return response

How to split array and use several requests.get in parallel with python?

My originale requeste is:
def get_foo_by_bars(authorisation_token: str, bar_ids: list):
r = requests.get(BASE_URL + "/api/v1/foo/bar",
params={"bar_ids": bar_ids, "data_type": "Float"},
headers={"Authorization": authorisation_token})
if r.status_code == 200:
return r.json()["data"]["data"]
My problem is bar_ids size contain more 80 element so my url size is more 2048 char. I want to be able to launch several requests in parallel with for example 10 bar_id then do a merge of the x responses at the end before the return.
That might be possible via asyncio + aiohttp. Unfortunatly I have no API to test this against right now, so the following code might have some issues, but should at least give you an idea:
import asyncio
import aiohttp
# async function to get json result for subset of bar_ids
async def get(session, **kwargs):
try:
async with session.get(**kwargs) as response:
await response.read()
if response.status == 200:
return await response.json()
return {}
except Exception as exc:
print(f"ERROR:\n{kwargs}\n{exc}")
return {}
# async function to split bar_ids into subsets, get their result and join them to the final result
async def main(bar_ids, package_size):
async with aiohttp.ClientSession() as session:
packaged_kwargs = [{
"url": BASE_URL + "/api/v1/foo/bar",
"params": {"bar_ids": bar_ids[i:i + package_size], "data_type": "Float"},
"headers": {"Authorization": AUTHORIZATION_TOKEN},
} for i in range(0, len(bar_ids), package_size)]
json_list = await asyncio.gather(*[get(session, **kwargs) for kwargs in packaged_kwargs])
result = {key: value for json_dict in json_list for key, value in json_dict.items()}
print(result)
# parameters
BASE_URL = "https://www.google.com"
AUTHORIZATION_TOKEN = "823ljf9823puj8รถ3"
bar_ids = list(range(100))
package_size = 10
# run
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) # only required for windows
asyncio.run(main(bar_ids,package_size))

Python asyncio link body and response from it in aiohttp asyncio

I have async requests, i send 100 requests and get response from them
I send body to get resonse, the problem is i caanot join request body with response,
sometime there is no response
i collect requests into a list
def __gaterTasks(self, session, total_schets):
_tasks = []
_updates = []
for schet in total_schets:
_payload_objct = PayloadParams.objects.using('analytic_db').get(payload=schet['payload'])
_payload_objct.status = True
_updates.append(_payload_objct)
_tasks.append(
session.get(url=self.url, body=schet['payload'])
)
PayloadParams.objects.using('analytic_db').bulk_update(_updates, ['status'])
return _tasks
i need to link body=schet['payload'] with the response with i get from it
the function with runs
async def get_data(self, total_schets):
async with aiohttp.ClientSession(trust_env=True, auth=aiohttp.BasicAuth()) as session:
tasks = self.__gaterTasks(session=session, total_schets=total_schets)
responses = await asyncio.gather(*tasks)
for each in responses:
_each = await each.json()

python asyncio asynchronously fetch data by key from a dict when the key becomes available

Like title told, my use case is like this:
I have one aiohttp server, which accept request from client, when i have the request i generate one unique request id for it, and then i send the {req_id: req_pyaload} dict to some workers (the worker is not in python thus running in another process), when the workers complete the work, i get back the response and put them in a result dict like this: {req_id_1: res_1, req_id_2: res_2}.
Then I want my aiohttp server handler to await on above result dict, so when the specific response become available (by req_id) it can send it back.
I build below example code to try to simulate the process, but got stuck in implementing the coroutine async def fetch_correct_res(req_id) which should asynchronously/unblockly fetch the correct response by req_id.
import random
import asyncio
import shortuuid
n_tests = 1000
idxs = list(range(n_tests))
req_ids = []
for _ in range(n_tests):
req_ids.append(shortuuid.uuid())
res_dict = {}
async def fetch_correct_res(req_id):
pass
async def handler(req):
res = await fetch_correct_res(req)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict():
for _ in range(n_tests):
random_idx = random.choice(idxs)
await asyncio.sleep(random_idx / 1000)
res_dict[req_ids[random_idx]] = req_ids[random_idx]
print("req: {} is back".format(req_ids[random_idx]))
So:
Is it possible to make this solution work? how?
If above solution is not possible, what should be the correct solution for this use case with asyncio?
Many thanks.
The only approach i can think of for now to make this work is: pre-created some asyncio.Queue with pre-assigned id, then for each incoming request assign one queue to it, so the handler just await on this queue, when the response come back i put it into this pre-assigned queue only, after the request fulfilled, i collect back the queue to use it for next incoming request. Not very elegant, but will solve the problem.
See if the below sample implementation fulfils your need
basically you want to respond back to the request(id) with your response(unable to predict the order) in an asynchronous way
So at the time of request handling, populate the dict with {request_id: {'event':<async.Event>, 'result': <result>}} and await on asyncio.Event.wait(), once the response is received, signal the event with asyncio.Event.set() which will release the await and then fetch the response from the dict based on the request id
I modified your code slightly to pre-populate the dict with request id and put the await on asyncio.Event.wait() until the signal comes from the response
import random
import asyncio
import shortuuid
n_tests = 10
idxs = list(range(n_tests))
req_ids = []
for _ in range(n_tests):
req_ids.append(shortuuid.uuid())
res_dict = {}
async def fetch_correct_res(req_id, event):
await event.wait()
res = res_dict[req_id]['result']
return res
async def handler(req, loop):
print("incoming request id: {}".format(req))
event = asyncio.Event()
data = {req :{}}
res_dict.update(data)
res_dict[req]['event']=event
res_dict[req]['result']='pending'
res = await fetch_correct_res(req, event)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict():
random.shuffle(req_ids)
for i in req_ids:
await asyncio.sleep(random.randrange(2,4))
print("req: {} is back".format(i))
if res_dict.get(i) is not None:
event = res_dict[i]['event']
res_dict[i]['result'] = i
event.set()
loop = asyncio.get_event_loop()
tasks = asyncio.gather(handler(req_ids[0], loop),
handler(req_ids[1], loop),
handler(req_ids[2], loop),
handler(req_ids[3], loop),
randomly_put_res_to_res_dict())
loop.run_until_complete(tasks)
loop.close()
sample response from the above code
incoming request id: NDhvBPqMiRbteFD5WqiLFE
incoming request id: fpmk8yC3iQcgHAJBKqe2zh
incoming request id: M7eX7qeVQfWCCBnP4FbRtK
incoming request id: v2hAfcCEhRPUDUjCabk45N
req: VeyvAEX7YGgRZDHqa2UGYc is back
req: M7eX7qeVQfWCCBnP4FbRtK is back
got correct res for req: M7eX7qeVQfWCCBnP4FbRtK
req: pVvYoyAzvK8VYaHfrFA9SB is back
req: soP8NDxeQKYjgeT7pa3wtG is back
req: j3rcg5Lp59pQXuvdjCAyZe is back
req: NDhvBPqMiRbteFD5WqiLFE is back
got correct res for req: NDhvBPqMiRbteFD5WqiLFE
req: v2hAfcCEhRPUDUjCabk45N is back
got correct res for req: v2hAfcCEhRPUDUjCabk45N
req: porzHqMqV8SAuttteHRwNL is back
req: trVVqZrUpsW3tfjQajJfb7 is back
req: fpmk8yC3iQcgHAJBKqe2zh is back
got correct res for req: fpmk8yC3iQcgHAJBKqe2zh
This may work (note: I removed UUID in order to know req id in advance)
import random
import asyncio
n_tests = 1000
idxs = list(range(n_tests))
req_ids = []
for i in range(n_tests):
req_ids.append(i)
res_dict = {}
async def fetch_correct_res(req_id):
while not res_dict.get(req_id):
await asyncio.sleep(0.1)
return req_ids[req_id]
async def handler(req):
print("fetching req: ", req)
res = await fetch_correct_res(req)
assert req == res, "the correct res for the req should exactly be the req itself."
print("got correct res for req: {}".format(req))
async def randomly_put_res_to_res_dict(future):
for i in range(n_tests):
res_dict[req_ids[i]] = req_ids[i]
await asyncio.sleep(0.5)
print("req: {} is back".format(req_ids[i]))
future.set_result("done")
loop = asyncio.get_event_loop()
future = asyncio.Future()
asyncio.ensure_future(randomly_put_res_to_res_dict(future))
loop.run_until_complete(handler(10))
loop.close()
Is it the best solution? according to me No, basically its kind of requesting long running job status, and you should have (REST) api for doing the job submission and knowing job status like:
http POST server:port/job
{some job json paylod}
Response: 200 OK {"req_id": 1}
http GET server:port/job/1
Response: 200 OK {"req_id": 1, "status": "in process"}
http GET server:port/job/1
Response: 200 OK {"req_id": 1, "status": "done", "result":{}}

how to get response_time and response_size while using aiohttp

Is it possible to get response time and response size for each request made using aiohttp?
The documentation seems not to have those properties anywhere.
Thanks
len(response.text()) will return size of decompressed response.
If you want the size of the raw compressed response you need to set auto_decompress=False during creation of aiohttp.ClientSession. After that you can get it with len(await response.read()).
But it'll make response.text() unavailable since it needs uncompressed response. To make it available again you'll have to decompress it manually:
import time
import zlib
import brotli
async with aiohttp.ClientSession(auto_decompress=False) as session:
start = time.monotonic()
response = await session.get(url='www.test.com')
response_time = time.monotonic() - start
response_size = len(await response.read())
encoding = response.headers['Content-Encoding']
if encoding == 'gzip':
response._body = zlib.decompress(response._body, 16 + zlib.MAX_WBITS)
elif encoding == 'deflate':
response._body = zlib.decompress(response._body, -zlib.MAX_WBITS)
elif encoding == 'br':
response._body == brotli.decompress(response._body)
response_text = await response.text()
About time.time() from pymotw.com:
Because time.time() looks at the system clock, and the system clock can be changed by the user or system services for synchronizing clocks across multiple computers, calling time.time() repeatedly may produce values that go forwards and backwards. This can result in unexpected behavior when trying to measure durations or otherwise use those times for computation. Avoid those situations by using time.monotonic(), which always returns values that go forward.
aiohttp docs suggest to use loop.time() (which is also monotonic):
async def on_request_start(session, trace_config_ctx, params):
trace_config_ctx.start = asyncio.get_event_loop().time()
async def on_request_end(session, trace_config_ctx, params):
elapsed = asyncio.get_event_loop().time() - trace_config_ctx.start
print("Request took {}".format(elapsed))
trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)
trace_config.on_request_end.append(on_request_end)
async with aiohttp.ClientSession(trace_configs=[trace_config]) as client:
client.get('http://example.com/some/redirect/')
One possibility might be:
measure point in time before request
measure point in time after request
the difference is the response time
with 'response.text()' you get the response and can determine the length with 'len()'
A small self-contained example could look like this:
import time
import asyncio
from aiohttp import ClientSession
async def fetch(session, url):
start = time.time()
async with session.get(url) as response:
result = await response.text()
end = time.time()
print(url, ": ", end - start, "response length:", len(result))
return result
async def crawl(urls: set):
async with ClientSession() as session:
tasks = []
for url in urls:
tasks.append(
fetch(session, url)
)
await asyncio.gather(*tasks)
if __name__ == "__main__":
urlSet = {"https://www.software7.biz/tst/number.php",
"https://www.software7.biz/tst/number1.php",
"https://www.software7.biz"}
asyncio.run(crawl(urlSet))
Test
The two endpoints number.php and number1.php have a delay on server side of 3 respective 1 second and are returning a two digit number each.
The output in the debug console looks like this then:
https://www.software7.biz : 0.16438698768615723 response length: 4431
https://www.software7.biz/tst/number1.php : 1.249755859375 response length: 2
https://www.software7.biz/tst/number.php : 3.214473009109497 response length: 2
You can get the size of the response content from the headers:
response.headers['content-length']

Categories