Disclaimer: I am a total beginner in aiohttp
I was experimenting with aiohttp to handle get requests asynchronously but It turned out to be horribly slower than the pool version of gevent.
GEVENT VERSION
import gevent
from gevent import monkey
monkey.patch_all()
from gevent.pool import Pool
import requests
import time
def pooling_task(url):
requests.get(url)
def pooling_main():
start = time.time()
pool = Pool(10)
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"
]
for url in urls:
pool.apply_async(pooling_task, args=(url,))
pool.join()
end = time.time()
print("POOL TIME {}".format(end-start))
if __name__ == '__main__':
print("POOLING VERSION")
pooling_main()
OUTPUT - POOL TIME 6.299163818359375
Following is the aiohttp version
import aiohttp
import asyncio
import time
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"]
async with aiohttp.ClientSession() as session:
for url in urls:
await fetch(session, url)
if __name__ == "__main__":
start = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
end = time.time()
print("Time taken {}".format(end - start))
OUTPUT - Time taken 15.399710178375244
I really don't understand why aiohttp is so much slower.
As for gevent version requests.get is still a blocking call , but not for aiohttp.
I expected aiohttp version to be faster.
for url in urls:
await fetch(session, url)
await here means that you don't start downloading next url before previous done. To make all downloadings concurrent you should use something like asyncio.gather.
Modify your code like this:
async with aiohttp.ClientSession() as session:
await asyncio.gather(*[
fetch(session, url)
for url
in urls
])
You'll see huge speedup.
Related
import asyncio
import httpx
from datetime import datetime
async def request_test(url):
async with httpx.AsyncClient() as client:
r = await client.get(url, timeout=None, headers=None)
return len(r.text)
async def main(rest_api_url_list ):
futures = [asyncio.ensure_future(request_test(url)) for url in rest_api_url_list ]
results = await asyncio.gather(*futures)
print(results)
print(len(results))
start = datetime.now()
rest_api_url_list = [~~~~~~~~~~~~~] # 2000EA
loop = asyncio.get_event_loop()
loop.run_until_complete(main(rest_api_url_list ))
loop.close()
end = datetime.now()
Hi,
I have 2000 api adress.
And I need to call 2000 concurrently in one VM.
So, I used the asyncio library to modify the code as above.
But, This solution is not satisfactory.
How can I increase the effect of parallel processing?
I think I have to use multiprocessing and asyncio at the same time.
I found this package aiomultiprocess that seems like it can do both multiprocessing and asyncio.
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.text("utf-8")
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
Trying to run the sample code, though, does absolutely nothing.
Trying to call the main() gives me an error RuntimeWarning: coroutine 'main' was never awaited. I can't find an actual example of how to trigger the code.
The only other question about this isn't answered.
The aiomultiprocess documentation example does not cover how to call the loop. The function needs to be called via asyncio.
import asyncio
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.read()
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
if __name__ == '__main__':
# for Python 3.7
asyncio.run(main())
# for Python 3.6
# loop = asyncio.get_event_loop()
# loop.run_until_complete(main())
I need to know how much Mbytes per second there are in the request with is sent with asyncio in python. I tried with resp.read() or with resp.content.read() but it isn't working.
import aiohttp
import asyncio
async def get_requests(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
result = resp
return result
big_urls = ['http://google.com', 'http://yahoo.com']
loop = asyncio.get_event_loop()
coroutines = [get_requests(url) for url in big_urls]
results = loop.run_until_complete(asyncio.gather(*coroutines))
print(results)
I am trying to use aiohttp to send requests one after another like this
import aiohttp
import asyncio
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
await asyncio.sleep(0.2)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
So I would expect each datetime print to be 0.2s apart. However, they seem to be about 0.35s apart as I think it takes 0.15s to get the data from the response. Why is this happening? I want it to be asynchronous so it should just go onto the next one?
How can I fix this?
When you use await all next code will wait for end of this code.
If you want to run asyncio code asynchronously, you should use functions like asyncio.gather
import asyncio
import aiohttp
import datetime
async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
print('#', response.status)
async def worker(queue):
print('START WORKER')
while True:
url = await queue.get()
await fetch(url)
queue.task_done()
async def control(queue):
while True:
print(datetime.datetime.now())
queue.put_nowait('https://docs.python.org/')
await asyncio.sleep(0.2)
async def main():
queue = asyncio.Queue()
await asyncio.gather(
control(queue),
asyncio.gather(*[worker(queue) for _ in range(10)])
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sending http request and fetching response back takes some time. You need excluding this time from asyncio.sleep() call:
import aiohttp
import asyncio
import time
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
t0 = time.monotonic()
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
t1 = time.monotonic()
await asyncio.sleep(0.2 - (t1 - t0))
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
This question already has answers here:
Python asyncio.semaphore in async-await function
(3 answers)
Closed 3 years ago.
What is the best approach to deliver say 100k API calls using asyncio async/await with Python 3.7+ The idea is to use 100 tasks in parallel all the time?
What should be avoided is:
1. To start working on all 100k tasks
2. To wait for all 100 parallel tasks to finish so new batch of 100 is scheduled.
This example illustrates the first approach, that is not what is needed.
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = [
'http://python.org',
'https://google.com',
'http://yifei.me'
]
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
htmls = await asyncio.gather(*tasks)
for html in htmls:
print(html[:100])
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Use semaphore. Semaphores are used to limit concurrent actions. Python's asyncio comes with its own async version of semaphore.
import aiohttp
import asyncio
async def fetch(session, url, sema):
async with sema, session.get(url) as response:
return await response.text()
async def main():
urls = [
'http://python.org',
'https://google.com',
'http://yifei.me',
'other urls...'
]
tasks = []
sema = asyncio.BoundedSemaphore(value=100)
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url, sema))
htmls = await asyncio.gather(*tasks)
for html in htmls:
print(html[:100])
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())