What the fastest to post million requests with url, headers, and body? - python

I have the static url, headers, and data.
Is it possible to make million post requests simultaneously with python?
This is the file.py:
import json
import requests
url = "https://abcd.com"
headers = "headers"
body = "body"
resp = requests.post(url, headers=headers, data=body)
json_resp = json.loads(resp.content)["data"]
print(json_resp)

You might want to use some python tools for that such as:
https://locust.io/
Your file would look like:
from locust import HttpUser, task, between
class QuickstartUser(HttpUser):
#task
def task_name(self):
self.client.post(url, headers=headers, data=body)
You could feed it to locust in such a way:
locust --headless --users <number_of_user> -f <your_file.py>

You can do this in several ways, which is the best method and idea of async work
The second method is ThreadPoolExecutor, which I do not highly recommend
there's a example for do this.
# modified fetch function with semaphore
import random
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
delay = response.headers.get("DELAY")
date = response.headers.get("DATE")
print("{}:{} with delay {}".format(date, response.url, delay))
return await response.read()
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run(r):
url = "http://localhost:8080/{}"
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession() as session:
for i in range(r):
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url.format(i), session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
number = 10000
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)

Related

When trying to get the response data of a request, I get the response data of another request in aiohttp

I am using aiohttp and asyncio to run multiple requests asynchronously, the problem is when i try to print the data i receive i end up getting the data of another request in the task queue. I have tried to debug this and look at the docs for any answers but i am unable to solve this problem.
here's my code:
from time import sleep
import aiohttp
import asyncio
async def search(query, session):
search_params = {
"query":query
}
async with session.get(
url,
params=search_params,
) as response:
json_response = await response.json()
data = json_response["data"]
print(data)
"""the above line always prints the data from the response of the first task to get executed
and not the current data from this request with a different query"""
async def main():
async with aiohttp.ClientSession() as session:
await init_session(session)
await enable_search(session)
while True:
tasks = [asyncio.create_task(search(session=session, query)) for query in inputs]
await asyncio.gather(*tasks)
sleep(5)
if __name__ == "__main__":
asyncio.run(main())

How do i use Aiohttp with PyPac

At my work the system won't let us make http requests without using a proxy. However the proxy is found through creating a pypac session.
How can I use this pypac session inside an aiohttp session to make async requests?
So, I've found the answer is to use the pypac session to get a proxy which you can then put into the aiohttp request:
import asyncio
import aiohttp
import re
from pypac import PACSession, get_pac
pac = get_pac(url="https://somewebaddress.pac")
pac_session = PACSession(pac)
async def _fetch_async(session, url, proxy:bool=False):
#Get the proxy for this url
if proxy:
proxies = pac_session \
._get_proxy_resolver(pac) \
.get_proxy_for_requests(url)
match = re.search("^(\w*)", str(url))
proxy = proxies[match.group()]
else:
proxy = None
#Fetch with aiohttp session
async with session.request("get", url, proxy=proxy) as resp:
return resp
async def _fetch_all_async(urls):
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(
_fetch_async(
session,
url
)
)
return await asyncio.gather(*tasks)
def request_all(self, urls:list=[]):
#Start the loop
loop = asyncio.get_event_loop()
#Gaher tasks and run
coro = _fetch_all_async(urls)
resps = loop.run_until_complete(coro)
return resps
Hope this helps any fellow sufferers.

Fetch HEAD request's status asynchronously in aiohttp

Question is regarding aiohttp libriary usage.
My goal here is to check list of urls by sending bunch of HEAD requests, potentially asynchronously ,and eventually create dict of
url: status pairs.
I am new in asyncio and stuff and I found a lot of examples where people use GET requests to fetch html ,for example ,and they use await resp.read() or await resp.text() and it works fine but with HEAD request I don’t have body, I just have header, that's it. If I try to await resp.status or resp itself as an object – it does not work as they are not awaitable.
Code below works only synchronously step by step and I can’t figure out how to make it run asynchronously. Seems like whatever i do with status turns code to sync mode somehow...
I would be glad to see your ideas.
Thanks.
import asyncio
import aiohttp
urls_list = [url1, url2, url3, etc, etc, etc, ]
status_dict = {}
async def main():
async with aiohttp.ClientSession() as session:
for individual_url in urls_list:
async with session.head(individual_url) as resp:
status_dict.update({url: resp.status})
asyncio.run(main())
You can you asyncio.gather:
import asyncio
import aiohttp
urls_list = ["https://google.com", "https://yahoo.com", "http://hello123456789.com"]
status_dict = {}
async def head_status(session, url) -> dict:
async with session.head(url) as resp:
return {url: resp.status}
async def main():
async with aiohttp.ClientSession() as session:
statuses = await asyncio.gather(*[head_status(session, url) for url in urls_list], return_exceptions=True)
for a in statuses:
if not isinstance(a, Exception):
status_dict.update(a)
asyncio.run(main())

Python package - aiohttp has a warning message "Unclosed client session"

My code is as follows:
import asyncio
import aiohttp
urls = [
'http://www.163.com/',
'http://www.sina.com.cn/',
'https://www.hupu.com/',
'http://www.csdn.net/'
]
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
resp = await aiohttp.ClientSession().get(url=u)
headers = resp.headers
print(u, headers)
return headers
async def request_url(u):
"""
main func
:param u:
:return:
"""
res = await get_url_data(u)
return res
loop = asyncio.get_event_loop()
task_lists = asyncio.wait([request_url(u) for u in urls])
loop.run_until_complete(task_lists)
loop.close()
When i running my code, it's display a warning message:
Unclosed client session
Anybody can give me some solutions about that?
Thanks a lot
You should close the connection in the end.
You have 2 options:
You can close the connection manually:
import aiohttp
session = aiohttp.ClientSession()
# use the session here
session.close()
Or you can use it with a contex manager:
import aiohttp
import asyncio
async def fetch(client):
async with client.get('http://python.org') as resp:
assert resp.status == 200
return await resp.text()
async def main(loop):
async with aiohttp.ClientSession(loop=loop) as client:
html = await fetch(client)
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
The client session supports the context manager protocol for self closing.
If you are not using context manager, the proper way to close it would also need an await. Many answers on the internet miss that part, and few people actually notice it, presumably because most people use the more convenient context manager. But the manual await session.close() is essential when/if you are closing a class-wide session inside the tearDownClass() when doing unittesting.
import aiohttp
session = aiohttp.ClientSession()
# use the session here
await session.close()
You should use ClientSession using async context manager for proper blocking/freeing resources:
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
async with aiohttp.ClientSession() as session:
resp = await session.get(url=u)
headers = resp.headers
print(u, headers)
return headers

Python Aiohttp: Regarding utility of the Session object

Here below are 1 working piece of code that scrape links from the website of interactive brokers.
In the documentation of aiohttp they say to always use the aiohttp.ClientSession() object so that "sessions" are reused from one requests to another. But what i can see from the multiple requests example (here for instance) is that 1 session is created per request...? So what is the interest of that Session object?
import asyncio
from aiohttp import ClientSession
exchanges_by_locs=[]
inst_type_dicts=[]
async def inst_types(url):
async with ClientSession() as session:
async with session.get(url) as response:
response = await response.text()
html = lxml.html.fromstring(response)
p=html.xpath('//*[#id="toptabs"]/ul/li')
for e in p:
inst=dict(inst_type=e.find('a/span').text,
url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
inst_type_dicts.append(inst)
async def inst_by_loc(inst):
url=inst['url']
print("start: ",inst['inst_type'])
async with ClientSession() as session:
async with session.get(url) as response:
doc = requests.get(url).content
html = lxml.html.fromstring(doc)
p=html.xpath('//*[#class="subtabsmenu"]/li')
for e in p:
exchanges_by_loc=dict(loc=e.find('a/span').text,
loc_url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
exchanges_by_locs.append(exchanges_by_loc)
print("complete: ",inst['inst_type'])
loop = asyncio.get_event_loop()
loop.run_until_complete(inst_types(url))
loop.run_until_complete(
asyncio.gather(
*(inst_by_loc(inst) for inst in inst_type_dicts)
)
)
aiohttp's maintainer recommend re-using the session object when possible. It's a small performance trick.

Categories