I'm trying to access an API with aiohttp but something is causing this code to block each iteration.
def main():
async with aiohttp.ClientSession() as session:
for i, (image, target) in enumerate(dataset_val):
image_bytes = pil_to_bytes(image)
async with session.post('http://localhost:8080/predictions/resnet50', data=image_bytes) as resp:
print(await resp.text())
print(i, flush=True, end='\r')
asyncio.run(main())
As explained by #deceze, await will wait for your result inside your loop. If you want to call everything at the same time, you need to call everything from an external loop and gather the results.
Here's a way of doing it
import asyncio
import aiohttp
async def call(session: aiohttp.ClientSession, url: str, image):
image_bytes = pil_to_bytes(image)
async with session.post(url, data=image_bytes) as response:
return await response.text()
async def call_all(url:str, tasks: list):
async with aiohttp.ClientSession() as session:
results = await asyncio.gather(
*[call(session, url, img) for img, target in tasks],
return_exceptions=True
)
return results
loop = asyncio.get_event_loop()
res = loop.run_until_complete(
call_all('http://localhost:8080/predictions/resnet50', dataset_val)
)
I am executing the below code on a windows pc. I read that, by default, Windows can use only 64 sockets in asyncio loop. I don't know if this is the reason for the error.
import aiohttp
import asyncio
import time
async def download_file(url):
print(f'started downloading{url}')
connector = aiohttp.TCPConnector(limit=60)
async with aiohttp.clientSession(connector) as session:
async with session.get(url) as resp:
content = await resp.read()
print (f'Finished download{url}')
return content
async def write_file(n, content):
filename = f'async_{n}.html'
with open(filename,'wb') as f:
print(f'started writing{filename}')
f.write(content)
print(f'Finished writing{filename}')
async def scrape_task(n,url):
content = await download_file(url)
await write_file(n,content)
async def main():
tasks = []
for n,url in enumerate(open('urls.txt').readlines()):
tasks.append((scrape_task(n, url)))
await asyncio.wait(tasks)
if __name__ == '__main__':
t=time.perf_counter()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
t2 = time.perf_counter() - t
print(f'Total time taken: {t2:0.2f} seconds')
I made the below changes to limit the connections to 60
connector = aiohttp.TCPConnector(limit=60)
async with aiohttp.clientSession(connector) as session:
I can't figure out where I am going wrong.
I have this code i am using to get the status of a list of websites.
import aiohttp
import asyncio
import json
import sys
import time
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
and since get is depreciated await aiohttp.get(url) i edited the code as such
import aiohttp
import asyncio
import json
import sys
import time
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def get_statuses(websites):
statuses = {}
tasks = [get_website_status(website) for website in websites]
for status in await asyncio.gather(*tasks):
if not statuses.get(status):
statuses[status] = 0
statuses[status] += 1
print(json.dumps(statuses))
async def get_website_status(url):
async with aiohttp.ClientSession() as session:
response = await fetch(session, url)
#response = await aiohttp.get(url)
status = response.status
response.close()
return status
if __name__ == '__main__':
with open(sys.argv[1], 'r') as f:
websites = f.read().splitlines()
t0 = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(get_statuses(websites))
t1 = time.time()
print("getting website statuses took {0:.1f} seconds".format(t1-t0))
I copied the session code from the docs https://aiohttp.readthedocs.io/en/stable/
However when i run my code i get this error:
c:\asyncio>a.py list.txt
Traceback (most recent call last):
File "C:\asyncio\a.py", line 35, in <module>
loop.run_until_complete(get_statuses(websites))
File "C:\Users\user\AppData\Local\Programs\Python\Python37\lib\asyncio\base_ev
ents.py", line 579, in run_until_complete
return future.result()
File "C:\asyncio\a.py", line 14, in get_statuses
for status in await asyncio.gather(*tasks):
File "C:\asyncio\a.py", line 25, in get_website_status
status = response.status
AttributeError: 'str' object has no attribute 'status'
c:\asyncio>
here is a sample list.txt
https://facebook.com/
https://twitter.com/
https://google.com/
https://youtube.com/
https://linkedin.com/
https://instagram.com/
https://pinterest.com/
get_website_status routine delegates call to fetch function which returns text content response.text(), not the response itself.
That's why , in further, response.status throws an obvious error.
In case if response content is not needed, to fix the error, change fetch function to return the response object:
async def fetch(session, url):
response = await session.get(url)
return response
I have a class that create a url and some json to execute in a post method that looks like that and I was following this guide
import vk_api
from vk_api.execute import VkFunction
import time
from datetime import datetime
import numpy as np
import asyncio
from ratelimit import limits
import requests
import aiohttp
class Execute:
def __init__(self, access_token):
self.access_token = access_token
def posts_to_push(self, posts, limit):
arr = []
data = list(self.posts_chunks_limit(posts, limit))
for i in range(len(data)):
code = f"data.push(API.wall.getById( {{'posts': {data[i]} }} )); "
arr.append(code)
return arr
def posts_execute_command(self, posts): # TODO make async
limit = 100
code = self.posts_to_push(posts, limit)
execute_limit = 25
for i in range(len(code)):
data = ''.join(code[i * execute_limit: (i * execute_limit) + execute_limit])
var = f'var data = []; {data} return data ;'
yield var
async def fetch(url, json_data, session):
async with session.post(url, json=json_data) as response:
return await response.read()
async def result_posts(self, posts):
result = []
command = self.posts_execute_command(posts)
async with aiohttp.ClientSession() as session:
for i in command:
execute = asyncio.ensure_future(self.fetch(url="https://api.vk.com/method/execute",
json_data={
"code": i,
"access_token": self.access_token,
"v": 5.101,
}), session)
result.append(execute)
responses = await asyncio.gather(*result)
print(responses)
async def posts_chunks_limit(self, data, limit):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(data), limit):
await asyncio.sleep(0.1)
yield data[i:i + limit]
def run_async(self, posts):
loop = asyncio.get_event_loop()
loop.run_until_complete(self.result_posts(posts))
and then i run it like this
df = pd.read_csv('/some_path')
arr = []
for i in df['ids']:
arr.append(i)
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(vk.result_posts(arr))
loop.run_until_complete(future)
error message looks like this
Traceback (most recent call last):
File "../test_python.py", line 83, in <module>
loop.run_until_complete(future)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 484, in run_until_complete
return future.result()
File "../test_python.py", line 45, in result_posts
for i in command:
File "../test_python.py", line 29, in posts_execute_command
code = self.posts_to_push(posts, limit)
File "../test_python.py", line 21, in posts_to_push
data = list(self.posts_chunks_limit(posts, limit))
TypeError: 'async_generator' object is not iterable
This is my frist time using aiohttp/asyncio, I find it quite complicated and easy to get lost, may be I can get some directions or solutions in my case ?
In this line:
data = list(self.posts_chunks_limit(posts, limit))
As post_chunks_limit is an async iterator, list doesn't know what to do with it. You need to iterate over it with async for or with an async list comprehension:
data = [x async for x in self.posts_chunks_limit(posts, limit)]
This requires, posts_to_push and posts_execute_command to be defined with async def. Also posts_execute_command must await the call to posts_to_push and result_posts needs to await the call to posts_execute_command.
With the help of #user4815162342 and bunch of SO posts, I was able to fix my issue and my code looks like this.
Issue was I was calling/awaiting a generator which would not be iterable in my result_postsmethod.
import vk_api
from vk_api.execute import VkFunction
import time
from datetime import datetime
import numpy as np
import asyncio
from ratelimit import limits
import requests
import aiohttp
import socket
from concurrent.futures import ThreadPoolExecutor
class Execute: # TODO auth, parsers, limits, timeouts
def __init__(self, access_token):
self.access_token = access_token
async def posts_to_push(self, posts, limit):
arr = []
data = [x async for x in self.posts_chunks_limit(posts, limit)]
for i in range(len(data)):
code = f"data.push(API.wall.getById( {{'posts': {data[i]} }} )); "
arr.append(code)
return arr # < len() = 1000, 1k lists with 100 post IDs inside for 100k total ids
async def posts_execute_command(self, posts): # TODO make async
limit = 100
code = await self.posts_to_push(posts, limit)
execute_limit = 25
for i in range(len(code)):
data = ''.join(code[i * execute_limit: (i * execute_limit) + execute_limit])
var = f'var data = []; {data} return data ;'
print(var, '---var---')
yield var
async def fetch(self, url, json_data, session):
async with session.post(url, data=json_data) as response:
return await response.read()
#limits(calls=1, period=1)
async def result_posts(self, posts):
result = []
command = [i async for i in self.posts_execute_command(posts) ] #<note this iteration
conn = aiohttp.TCPConnector(
family=socket.AF_INET,
verify_ssl=False,)
async with aiohttp.ClientSession(connector=conn) as session:
for i in command:
print('---code---', len(command)) #TODO fix command range that's the bug
execute = asyncio.ensure_future(self.fetch(url="https://api.vk.com/method/execute",
json_data={
"code": i,
"access_token": self.access_token,
"v": 5.101,
}, session = session))
await asyncio.sleep(1)
result.append(execute)
responses = await asyncio.gather(*result)
print(responses, 'responses')
return 'Done'
async def posts_chunks_limit(self, data, limit):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(data), limit):
yield data[i:i + limit]
First of all heres the code:
import random
import asyncio
from aiohttp import ClientSession
import csv
headers =[]
def extractsites(file):
sites = []
readfile = open(file, "r")
reader = csv.reader(readfile, delimiter=",")
raw = list(reader)
for a in raw:
sites.append((a[1]))
return sites
async def bound_fetch(sem, url):
async with sem:
print("doing request for "+ url)
async with ClientSession() as session:
async with session.get(url) as response:
responseheader = await response.headers
print(headers)
async def run():
urls = extractsites("cisco-umbrella.csv")
tasks = []
sem = asyncio.Semaphore(100)
for i in urls:
task = asyncio.ensure_future(bound_fetch(sem, "http://"+i))
tasks.append(task)
headers = await asyncio.wait(*tasks)
print(headers)
def main():
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
if __name__ == '__main__':
main()
As per my last question I'm following this blog post:
https://pawelmhm.github.io/asyncio/python/aiohttp/2016/04/22/asyncio-aiohttp.html
I tried to adapt my code as close as possible to the example implementation but this code is still not making any requests and printing the headers in bound_headers as I wish.
Can somebody spot whats wrong with this code ?
response.headers is a regular property, no need to put await before the call
asyncio.wait on other hand accepts a list of futures and returns (done, pending) pair.
Looks like you should replace await wait() call with await asyncio.gather(*tasks) (gather doc)