At my work the system won't let us make http requests without using a proxy. However the proxy is found through creating a pypac session.
How can I use this pypac session inside an aiohttp session to make async requests?
So, I've found the answer is to use the pypac session to get a proxy which you can then put into the aiohttp request:
import asyncio
import aiohttp
import re
from pypac import PACSession, get_pac
pac = get_pac(url="https://somewebaddress.pac")
pac_session = PACSession(pac)
async def _fetch_async(session, url, proxy:bool=False):
#Get the proxy for this url
if proxy:
proxies = pac_session \
._get_proxy_resolver(pac) \
.get_proxy_for_requests(url)
match = re.search("^(\w*)", str(url))
proxy = proxies[match.group()]
else:
proxy = None
#Fetch with aiohttp session
async with session.request("get", url, proxy=proxy) as resp:
return resp
async def _fetch_all_async(urls):
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(
_fetch_async(
session,
url
)
)
return await asyncio.gather(*tasks)
def request_all(self, urls:list=[]):
#Start the loop
loop = asyncio.get_event_loop()
#Gaher tasks and run
coro = _fetch_all_async(urls)
resps = loop.run_until_complete(coro)
return resps
Hope this helps any fellow sufferers.
Related
I have the static url, headers, and data.
Is it possible to make million post requests simultaneously with python?
This is the file.py:
import json
import requests
url = "https://abcd.com"
headers = "headers"
body = "body"
resp = requests.post(url, headers=headers, data=body)
json_resp = json.loads(resp.content)["data"]
print(json_resp)
You might want to use some python tools for that such as:
https://locust.io/
Your file would look like:
from locust import HttpUser, task, between
class QuickstartUser(HttpUser):
#task
def task_name(self):
self.client.post(url, headers=headers, data=body)
You could feed it to locust in such a way:
locust --headless --users <number_of_user> -f <your_file.py>
You can do this in several ways, which is the best method and idea of async work
The second method is ThreadPoolExecutor, which I do not highly recommend
there's a example for do this.
# modified fetch function with semaphore
import random
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
delay = response.headers.get("DELAY")
date = response.headers.get("DATE")
print("{}:{} with delay {}".format(date, response.url, delay))
return await response.read()
async def bound_fetch(sem, url, session):
# Getter function with semaphore.
async with sem:
await fetch(url, session)
async def run(r):
url = "http://localhost:8080/{}"
tasks = []
# create instance of Semaphore
sem = asyncio.Semaphore(1000)
# Create client session that will ensure we dont open new connection
# per each request.
async with ClientSession() as session:
for i in range(r):
# pass Semaphore and session to every GET request
task = asyncio.ensure_future(bound_fetch(sem, url.format(i), session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
number = 10000
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(number))
loop.run_until_complete(future)
i'm new to web development and i'm testing my site with sending http get request to check on how well my site will handle request. with my code i can send multiple get request, how can i make code send more than multiple request i want the loop to never stop, i mean send the get request over and over again how can i do that.. i am very sorry for my bad English hope u get my question.
import time
import datetime
import asyncio
import aiohttp
domain = 'http://myserver.com'
a = '{}/page1?run={}'.format(domain, time.time())
b = '{}/page2?run={}'.format(domain, time.time())
async def get(url):
print('GET: ', url)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))
loop = asyncio.get_event_loop()
tasks = [
asyncio.ensure_future(get(a)),
asyncio.ensure_future(get(b))
]
loop.run_until_complete(asyncio.wait(tasks))
If you want something to happen over and over, add a for or while loop - see https://docs.python.org/3/tutorial/index.html
async def get(url):
async with aiohttp.ClientSession() as session:
while True:
print('GET: ', url)
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))
I try to reuse HTTP-session as aiohttp docs advice
Don’t create a session per request. Most likely you need a session per
application which performs all requests altogether.
But usual pattern which I use with requests lib doesn`t work:
def __init__(self):
self.session = aiohttp.ClientSession()
async def get_u(self, id):
async with self.session.get('url') as resp:
json_resp = await resp.json()
return json_resp.get('data', {})
Then I try to
await client.get_u(1)
I got error
RuntimeError: Timeout context manager should be used inside a task
Any workarounds with async_timeout didn't help.
Another way is working:
async def get_u(self, id):
async with aiohttp.ClientSession() as session:
with async_timeout.timeout(3):
async with session.get('url') as resp:
json_resp = await resp.json()
return json_resp.get('data', {})
But it seems like creating session per request.
So my question: how to properly reuse aiohttp-session?
UPD: minimal working example. Sanic application with following view
import aiohttp
from sanic.views import HTTPMethodView
class Client:
def __init__(self):
self.session = aiohttp.ClientSession()
self.url = 'https://jsonplaceholder.typicode.com/todos/1'
async def get(self):
async with self.session.get(self.url) as resp:
json_resp = await resp.json()
return json_resp
client = Client()
class ExView(HTTPMethodView):
async def get(self, request):
todo = await client.get()
print(todo)
I had the same error. The solution for me was initializing the client within an async function. EG:
class SearchClient(object):
def __init__(self, search_url: str, api_key: str):
self.search_url = search_url
self.api_key = api_key
self.session = None
async def _get(self, url, attempt=1):
if self.session is None:
self.session = aiohttp.ClientSession(raise_for_status=True)
headers = {
'Content-Type': 'application/json',
'api-key': self.api_key
}
logger.info("Running Search: {}".format(url))
try:
with timeout(60):
async with self.session.get(url, headers=headers) as response:
results = await response.json()
return results
For example you can create ClientSession on app start (using on_startup signal https://docs.aiohttp.org/en/stable/web_advanced.html#signals).
Store it to you app (aiohttp application has dict interface for such issues https://aiohttp.readthedocs.io/en/stable/faq.html#id4) and get access to your session through request.app['YOU_CLIENT_SESSION'] in request.
My code is as follows:
import asyncio
import aiohttp
urls = [
'http://www.163.com/',
'http://www.sina.com.cn/',
'https://www.hupu.com/',
'http://www.csdn.net/'
]
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
resp = await aiohttp.ClientSession().get(url=u)
headers = resp.headers
print(u, headers)
return headers
async def request_url(u):
"""
main func
:param u:
:return:
"""
res = await get_url_data(u)
return res
loop = asyncio.get_event_loop()
task_lists = asyncio.wait([request_url(u) for u in urls])
loop.run_until_complete(task_lists)
loop.close()
When i running my code, it's display a warning message:
Unclosed client session
Anybody can give me some solutions about that?
Thanks a lot
You should close the connection in the end.
You have 2 options:
You can close the connection manually:
import aiohttp
session = aiohttp.ClientSession()
# use the session here
session.close()
Or you can use it with a contex manager:
import aiohttp
import asyncio
async def fetch(client):
async with client.get('http://python.org') as resp:
assert resp.status == 200
return await resp.text()
async def main(loop):
async with aiohttp.ClientSession(loop=loop) as client:
html = await fetch(client)
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
The client session supports the context manager protocol for self closing.
If you are not using context manager, the proper way to close it would also need an await. Many answers on the internet miss that part, and few people actually notice it, presumably because most people use the more convenient context manager. But the manual await session.close() is essential when/if you are closing a class-wide session inside the tearDownClass() when doing unittesting.
import aiohttp
session = aiohttp.ClientSession()
# use the session here
await session.close()
You should use ClientSession using async context manager for proper blocking/freeing resources:
async def get_url_data(u):
"""
read url data
:param u:
:return:
"""
print('running ', u)
async with aiohttp.ClientSession() as session:
resp = await session.get(url=u)
headers = resp.headers
print(u, headers)
return headers
Here below are 1 working piece of code that scrape links from the website of interactive brokers.
In the documentation of aiohttp they say to always use the aiohttp.ClientSession() object so that "sessions" are reused from one requests to another. But what i can see from the multiple requests example (here for instance) is that 1 session is created per request...? So what is the interest of that Session object?
import asyncio
from aiohttp import ClientSession
exchanges_by_locs=[]
inst_type_dicts=[]
async def inst_types(url):
async with ClientSession() as session:
async with session.get(url) as response:
response = await response.text()
html = lxml.html.fromstring(response)
p=html.xpath('//*[#id="toptabs"]/ul/li')
for e in p:
inst=dict(inst_type=e.find('a/span').text,
url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
inst_type_dicts.append(inst)
async def inst_by_loc(inst):
url=inst['url']
print("start: ",inst['inst_type'])
async with ClientSession() as session:
async with session.get(url) as response:
doc = requests.get(url).content
html = lxml.html.fromstring(doc)
p=html.xpath('//*[#class="subtabsmenu"]/li')
for e in p:
exchanges_by_loc=dict(loc=e.find('a/span').text,
loc_url='https://www.interactivebrokers.com'+e.find('a').attrib['href'])
exchanges_by_locs.append(exchanges_by_loc)
print("complete: ",inst['inst_type'])
loop = asyncio.get_event_loop()
loop.run_until_complete(inst_types(url))
loop.run_until_complete(
asyncio.gather(
*(inst_by_loc(inst) for inst in inst_type_dicts)
)
)
aiohttp's maintainer recommend re-using the session object when possible. It's a small performance trick.