Python package - aiohttp has a warning message "Unclosed client session" - python

My code is as follows:
import asyncio
import aiohttp
urls = [
'http://www.163.com/',
'http://www.sina.com.cn/',
'https://www.hupu.com/',
'http://www.csdn.net/'
]
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
resp = await aiohttp.ClientSession().get(url=u)
headers = resp.headers
print(u, headers)
return headers
async def request_url(u):
"""
main func
:param u:
:return:
"""
res = await get_url_data(u)
return res
loop = asyncio.get_event_loop()
task_lists = asyncio.wait([request_url(u) for u in urls])
loop.run_until_complete(task_lists)
loop.close()
When i running my code, it's display a warning message:
Unclosed client session
Anybody can give me some solutions about that?
Thanks a lot

You should close the connection in the end.
You have 2 options:
You can close the connection manually:
import aiohttp
session = aiohttp.ClientSession()
# use the session here
session.close()
Or you can use it with a contex manager:
import aiohttp
import asyncio
async def fetch(client):
async with client.get('http://python.org') as resp:
assert resp.status == 200
return await resp.text()
async def main(loop):
async with aiohttp.ClientSession(loop=loop) as client:
html = await fetch(client)
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
The client session supports the context manager protocol for self closing.

If you are not using context manager, the proper way to close it would also need an await. Many answers on the internet miss that part, and few people actually notice it, presumably because most people use the more convenient context manager. But the manual await session.close() is essential when/if you are closing a class-wide session inside the tearDownClass() when doing unittesting.
import aiohttp
session = aiohttp.ClientSession()
# use the session here
await session.close()

You should use ClientSession using async context manager for proper blocking/freeing resources:
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
async with aiohttp.ClientSession() as session:
resp = await session.get(url=u)
headers = resp.headers
print(u, headers)
return headers

Related

aiohttp session closed without exiting the context manager

I have a pretty complicated API with custom parameters and headers so I created a class to wrap around it. Here's a contrived example:
import asyncio
import aiohttp
# The wrapper class around my API
class MyAPI:
def __init__(self, base_url: str):
self.base_url = base_url
async def send(self, session, method, url) -> aiohttp.ClientResponse:
request_method = getattr(session, method.lower())
full_url = f"{self.base_url}/{url}"
async with request_method(full_url) as response:
return response
async def main():
api = MyAPI("https://httpbin.org")
async with aiohttp.ClientSession() as session:
response = await api.send(session, "GET", "/uuid")
print(response.status) # 200 OK
print(await response.text()) # Exception: Connection closed
asyncio.run(main())
Why is my session closed? I didn't exit the context manager of session.
If I ignore the wrapper class, everything works as expected:
async def main():
async with aiohttp.ClientSession() as session:
async with session.get("https://httpbin.org/uuid") as response:
print(await response.text())
You can't call response.text() once you have left the request_method(full_url) context.
If you write:
async with request_method(full_url) as response:
text = await response.text()
return response.status, text
then the send() method returns without error.

What the fastest to post million requests with url, headers, and body?

I have the static url, headers, and data.
Is it possible to make million post requests simultaneously with python?
This is the file.py:
import json
import requests
url = "https://abcd.com"
headers = "headers"
body = "body"
resp = requests.post(url, headers=headers, data=body)
json_resp = json.loads(resp.content)["data"]
print(json_resp)
You might want to use some python tools for that such as:
https://locust.io/
Your file would look like:
from locust import HttpUser, task, between
class QuickstartUser(HttpUser):
#task
def task_name(self):
self.client.post(url, headers=headers, data=body)
You could feed it to locust in such a way:
locust --headless --users <number_of_user> -f <your_file.py>
You can do this in several ways, which is the best method and idea of async work
The second method is ThreadPoolExecutor, which I do not highly recommend
there's a example for do this.
# modified fetch function with semaphore
import random
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
delay = response.headers.get("DELAY")
date = response.headers.get("DATE")
print("{}:{} with delay {}".format(date, response.url, delay))
return await response.read()
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run(r):
url = "http://localhost:8080/{}"
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession() as session:
for i in range(r):
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url.format(i), session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
number = 10000
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)

How do i use Aiohttp with PyPac

At my work the system won't let us make http requests without using a proxy. However the proxy is found through creating a pypac session.
How can I use this pypac session inside an aiohttp session to make async requests?
So, I've found the answer is to use the pypac session to get a proxy which you can then put into the aiohttp request:
import asyncio
import aiohttp
import re
from pypac import PACSession, get_pac
pac = get_pac(url="https://somewebaddress.pac")
pac_session = PACSession(pac)
async def _fetch_async(session, url, proxy:bool=False):
#Get the proxy for this url
if proxy:
proxies = pac_session \
._get_proxy_resolver(pac) \
.get_proxy_for_requests(url)
match = re.search("^(\w*)", str(url))
proxy = proxies[match.group()]
else:
proxy = None
#Fetch with aiohttp session
async with session.request("get", url, proxy=proxy) as resp:
return resp
async def _fetch_all_async(urls):
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(
_fetch_async(
session,
url
)
)
return await asyncio.gather(*tasks)
def request_all(self, urls:list=[]):
#Start the loop
loop = asyncio.get_event_loop()
#Gaher tasks and run
coro = _fetch_all_async(urls)
resps = loop.run_until_complete(coro)
return resps
Hope this helps any fellow sufferers.

Fetch HEAD request's status asynchronously in aiohttp

Question is regarding aiohttp libriary usage.
My goal here is to check list of urls by sending bunch of HEAD requests, potentially asynchronously ,and eventually create dict of
url: status pairs.
I am new in asyncio and stuff and I found a lot of examples where people use GET requests to fetch html ,for example ,and they use await resp.read() or await resp.text() and it works fine but with HEAD request I don’t have body, I just have header, that's it. If I try to await resp.status or resp itself as an object – it does not work as they are not awaitable.
Code below works only synchronously step by step and I can’t figure out how to make it run asynchronously. Seems like whatever i do with status turns code to sync mode somehow...
I would be glad to see your ideas.
Thanks.
import asyncio
import aiohttp
urls_list = [url1, url2, url3, etc, etc, etc, ]
status_dict = {}
async def main():
async with aiohttp.ClientSession() as session:
for individual_url in urls_list:
async with session.head(individual_url) as resp:
status_dict.update({url: resp.status})
asyncio.run(main())
You can you asyncio.gather:
import asyncio
import aiohttp
urls_list = ["https://google.com", "https://yahoo.com", "http://hello123456789.com"]
status_dict = {}
async def head_status(session, url) -> dict:
async with session.head(url) as resp:
return {url: resp.status}
async def main():
async with aiohttp.ClientSession() as session:
statuses = await asyncio.gather(*[head_status(session, url) for url in urls_list], return_exceptions=True)
for a in statuses:
if not isinstance(a, Exception):
status_dict.update(a)
asyncio.run(main())

Python Aiohttp: Regarding utility of the Session object

Here below are 1 working piece of code that scrape links from the website of interactive brokers.
In the documentation of aiohttp they say to always use the aiohttp.ClientSession() object so that "sessions" are reused from one requests to another. But what i can see from the multiple requests example (here for instance) is that 1 session is created per request...? So what is the interest of that Session object?
import asyncio
from aiohttp import ClientSession
exchanges_by_locs=[]
inst_type_dicts=[]
async def inst_types(url):
async with ClientSession() as session:
async with session.get(url) as response:
response = await response.text()
html = lxml.html.fromstring(response)
p=html.xpath('//*[#id="toptabs"]/ul/li')
for e in p:
inst=dict(inst_type=e.find('a/span').text,
url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
inst_type_dicts.append(inst)
async def inst_by_loc(inst):
url=inst['url']
print("start: ",inst['inst_type'])
async with ClientSession() as session:
async with session.get(url) as response:
doc = requests.get(url).content
html = lxml.html.fromstring(doc)
p=html.xpath('//*[#class="subtabsmenu"]/li')
for e in p:
exchanges_by_loc=dict(loc=e.find('a/span').text,
loc_url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
exchanges_by_locs.append(exchanges_by_loc)
print("complete: ",inst['inst_type'])
loop = asyncio.get_event_loop()
loop.run_until_complete(inst_types(url))
loop.run_until_complete(
asyncio.gather(
*(inst_by_loc(inst) for inst in inst_type_dicts)
)
)
aiohttp's maintainer recommend re-using the session object when possible. It's a small performance trick.

Categories