import asyncio
import httpx
from datetime import datetime
async def request_test(url):
async with httpx.AsyncClient() as client:
r = await client.get(url, timeout=None, headers=None)
return len(r.text)
async def main(rest_api_url_list ):
futures = [asyncio.ensure_future(request_test(url)) for url in rest_api_url_list ]
results = await asyncio.gather(*futures)
print(results)
print(len(results))
start = datetime.now()
rest_api_url_list = [~~~~~~~~~~~~~] # 2000EA
loop = asyncio.get_event_loop()
loop.run_until_complete(main(rest_api_url_list ))
loop.close()
end = datetime.now()
Hi,
I have 2000 api adress.
And I need to call 2000 concurrently in one VM.
So, I used the asyncio library to modify the code as above.
But, This solution is not satisfactory.
How can I increase the effect of parallel processing?
I think I have to use multiprocessing and asyncio at the same time.
Related
I found this package aiomultiprocess that seems like it can do both multiprocessing and asyncio.
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.text("utf-8")
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
Trying to run the sample code, though, does absolutely nothing.
Trying to call the main() gives me an error RuntimeWarning: coroutine 'main' was never awaited. I can't find an actual example of how to trigger the code.
The only other question about this isn't answered.
The aiomultiprocess documentation example does not cover how to call the loop. The function needs to be called via asyncio.
import asyncio
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.read()
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
if __name__ == '__main__':
# for Python 3.7
asyncio.run(main())
# for Python 3.6
# loop = asyncio.get_event_loop()
# loop.run_until_complete(main())
Based on the solution that i got: Running multiple sockets using asyncio in python
i tried to add also the computation part using asyncio
Setup: Python 3.7.4
import msgpack
import threading
import os
import asyncio
import concurrent.futures
import functools
import nest_asyncio
nest_asyncio.apply()
class ThreadSafeElem(bytes):
def __init__(self, * p_arg, ** n_arg):
self._lock = threading.Lock()
def __enter__(self):
self._lock.acquire()
return self
def __exit__(self, type, value, traceback):
self._lock.release()
elem = ThreadSafeElem()
async def serialize(data):
return msgpack.packb(data, use_bin_type=True)
async def serialize1(data1):
return msgpack.packb(data1, use_bin_type=True)
async def process_data(data,data1):
loop = asyncio.get_event_loop()
future = await loop.run_in_executor(None, functools.partial(serialize, data))
future1 = await loop.run_in_executor(None, functools.partial(serialize1, data1))
return await asyncio.gather(future,future1)
################ Calculation#############################
def calculate_data():
global elem
while True:
try:
... data is calculated (some dictionary))...
elem, elem1= asyncio.run(process_data(data, data1))
except:
pass
#####################################################################
def get_data():
return elem
def get_data1():
return elem1
########### START SERVER AND get data contionusly ################
async def client_thread(reader, writer):
while True:
try:
bytes_received = await reader.read(100)
package_type = np.frombuffer(bytes_received, dtype=np.int8)
if package_type ==1 :
nn_output = get_data1()
if package_type ==2 :
nn_output = get_data()
writer.write(nn_output)
await writer.drain()
except:
pass
async def start_servers(host, port):
server = await asyncio.start_server(client_thread, host, port)
await server.serve_forever()
async def start_calculate():
await asyncio.run(calculate_data())
def enable_sockets():
try:
host = '127.0.0.1'
port = 60000
sockets_number = 6
loop = asyncio.get_event_loop()
for i in range(sockets_number):
loop.create_task(start_servers(host,port+i))
loop.create_task(start_calculate())
loop.run_forever()
except:
print("weird exceptions")
##############################################################################
enable_sockets()
The issue is that when i make a call from client, the server does not give me anything.
I tested the program with dummy data and no asyncio on calculation part so without this loop.create_task(start_calculate()) and the server responded correctly.
I also run the calculate data without adding it in the enable sockets and it worked. It also working with this implementation, but the problem is the server is not returning anything.
I did it like this cos i need the calculate part to run continuously and when one of the clients is calling to return the data at that point.
An asyncio event loop cannot be nested inside another, and there is no point in doing so: asyncio.run (and similar) blocks the current thread until done. This does not increase parallelism, and merely disables any outer event loop.
If you want to nest another asyncio task, directly run it in the current event loop. If you want to run a non-cooperative, blocking task, run it in the event loop executor.
async def start_calculate():
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, calculate_data)
The default executor uses threads – this allows running blocking tasks, but does not increase parallelism. Use a custom ProcessPoolExecutor to use additional cores:
import concurrent.futures
async def start_calculate():
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as pool:
await loop.run_in_executor(pool, calculate_data)
Why do you call asyncio.run() multiple times?
This function always creates a new event loop and closes it at the end. It should be used as a main entry point for asyncio programs, and should ideally >only be called once.
I would advise you to read the docs
I am trying to use aiohttp to send requests one after another like this
import aiohttp
import asyncio
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
await asyncio.sleep(0.2)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
So I would expect each datetime print to be 0.2s apart. However, they seem to be about 0.35s apart as I think it takes 0.15s to get the data from the response. Why is this happening? I want it to be asynchronous so it should just go onto the next one?
How can I fix this?
When you use await all next code will wait for end of this code.
If you want to run asyncio code asynchronously, you should use functions like asyncio.gather
import asyncio
import aiohttp
import datetime
async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
print('#', response.status)
async def worker(queue):
print('START WORKER')
while True:
url = await queue.get()
await fetch(url)
queue.task_done()
async def control(queue):
while True:
print(datetime.datetime.now())
queue.put_nowait('https://docs.python.org/')
await asyncio.sleep(0.2)
async def main():
queue = asyncio.Queue()
await asyncio.gather(
control(queue),
asyncio.gather(*[worker(queue) for _ in range(10)])
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sending http request and fetching response back takes some time. You need excluding this time from asyncio.sleep() call:
import aiohttp
import asyncio
import time
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
t0 = time.monotonic()
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
t1 = time.monotonic()
await asyncio.sleep(0.2 - (t1 - t0))
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Disclaimer: I am a total beginner in aiohttp
I was experimenting with aiohttp to handle get requests asynchronously but It turned out to be horribly slower than the pool version of gevent.
GEVENT VERSION
import gevent
from gevent import monkey
monkey.patch_all()
from gevent.pool import Pool
import requests
import time
def pooling_task(url):
requests.get(url)
def pooling_main():
start = time.time()
pool = Pool(10)
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"
]
for url in urls:
pool.apply_async(pooling_task, args=(url,))
pool.join()
end = time.time()
print("POOL TIME {}".format(end-start))
if __name__ == '__main__':
print("POOLING VERSION")
pooling_main()
OUTPUT - POOL TIME 6.299163818359375
Following is the aiohttp version
import aiohttp
import asyncio
import time
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"]
async with aiohttp.ClientSession() as session:
for url in urls:
await fetch(session, url)
if __name__ == "__main__":
start = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
end = time.time()
print("Time taken {}".format(end - start))
OUTPUT - Time taken 15.399710178375244
I really don't understand why aiohttp is so much slower.
As for gevent version requests.get is still a blocking call , but not for aiohttp.
I expected aiohttp version to be faster.
for url in urls:
await fetch(session, url)
await here means that you don't start downloading next url before previous done. To make all downloadings concurrent you should use something like asyncio.gather.
Modify your code like this:
async with aiohttp.ClientSession() as session:
await asyncio.gather(*[
fetch(session, url)
for url
in urls
])
You'll see huge speedup.
I have the following problem that my code for api requests is really non deterministic. I use asyncio to make asynchronous requests, because I want to send multiple requests and have big frequency of changes(that's why I am sending 30 the same requests). Sometimes my code executes really quickly about 0.5s but sometimes it stucks after sending for example a half of the requests. Could anyone see some code bugs which can produce the following error? Or such thing is caused by some delays of the server responses?
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
data = await response.json()
print(data)
return await response.read()
async def run(r):
url = "https://www.bitstamp.net/api/ticker/"
tasks = []
async with ClientSession() as session:
for i in range(r):
task = asyncio.ensure_future(fetch(url.format(i), session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
t1 = time.time()
number = 30
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)
t2= time.time()
print(t2-t1)