I need to send over 1 million HTTP requests and so far every option I've tried is just way too slow. I thought I could speed it up with aiohttp but that doesn't seem any faster than requests.
I was trying to do it with python but I'm open to other options as well.
Here is the code using both requests and aiohttp, any tips for speeding up the process?
requests code:
import requests
url = 'https://mysite.mysite:443/login'
users = [line.strip() for line in open("ids.txt", "r")]
try:
for user in users:
r = requests.post(url,data ={'username':user})
if 'login.error.invalid.username' not in r.text:
print(user, " is valid")
else:
print(user, " not found")
except Exception as e:
print(e)
aiohttp code:
import aiohttp
import asyncio
url = 'https://mysite.mysite:443/login'
users = [line.strip() for line in open("ids.txt", "r")]
async def main():
async with aiohttp.ClientSession() as session:
try:
for user in users:
payload = {"timeZoneOffSet": "240", "useragent": '', "username": user}
async with session.post(url, data=payload) as resp:
if 'login.error.invalid.username' not in await resp.text():
print(user, " is valid")
else:
print(user, " not found")
except Exception as e:
print(e)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You could use an asyncio.gather to collect results from a bunch of requests working in parallel.
Warning: code is just an example and is not tested.
import asyncio
from aiohttp import ClientSession
async def fetch(url, session, payload):
async with session.post(url, data=payload) as resp:
if 'login.error.invalid.username' not in await resp.text():
print(user, " is valid")
else:
print(user, " not found")
async def run(r):
url = "http://your_url:8000/{}"
tasks = []
async with ClientSession() as session:
for i in range(r):
task = asyncio.ensure_future(fetch(url.format(i), session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
def print_responses(result):
print(result)
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(4))
loop.run_until_complete(future)
Related
I am using aiohttp and asyncio to run multiple requests asynchronously, the problem is when i try to print the data i receive i end up getting the data of another request in the task queue. I have tried to debug this and look at the docs for any answers but i am unable to solve this problem.
here's my code:
from time import sleep
import aiohttp
import asyncio
async def search(query, session):
search_params = {
"query":query
}
async with session.get(
url,
params=search_params,
) as response:
json_response = await response.json()
data = json_response["data"]
print(data)
"""the above line always prints the data from the response of the first task to get executed
and not the current data from this request with a different query"""
async def main():
async with aiohttp.ClientSession() as session:
await init_session(session)
await enable_search(session)
while True:
tasks = [asyncio.create_task(search(session=session, query)) for query in inputs]
await asyncio.gather(*tasks)
sleep(5)
if __name__ == "__main__":
asyncio.run(main())
Let's say I have some list of URLs to get some data from them. I try to do it in async way, but one of the URLs is incorrect. How can I catch this error and is it possible to change the URL address after catching the error to send the request again?
I use the following code to get data using asyncio and aiohttp:
import asyncio
import aiohttp
urls = ["a", "b", "c"] # some list of urls
results = []
def get_tasks(session):
tasks = []
for url in urls:
tasks.append(asyncio.create_task(session.get(url, ssl=False)))
return tasks
async def get_symbols():
async with aiohttp.ClientSession() as session:
tasks = get_tasks(session)
responses = await asyncio.gather(*tasks)
for response in responses:
results.append(await response.json())
asyncio.run(get_symbols())
And then I get the next error:
ContentTypeError: 0, message='Attempt to decode JSON with unexpected mimetype: ', url=URL('b')
How could I catch this error to continue the whole process and if it is possible to fix "b" to other correct URL (let's say "bb") and send request again?
The easiest way is to put try...except block around await response.json() and if it throws exception change the URL and schedule it again. For more complex task use for example asyncio.Queue.
import asyncio
import aiohttp
urls = [
"https://reqbin.com/echo/get/json?1",
"https://reqbin.com/echo/get/json?2",
"https://reqbin.com/echo/get/json-BAD",
]
results = []
def get_tasks(session, urls):
tasks = []
for url in urls:
tasks.append(asyncio.create_task(session.get(url, ssl=False)))
return tasks
async def get_symbols():
async with aiohttp.ClientSession() as session:
while urls:
for task in asyncio.as_completed(get_tasks(session, urls)):
response = await task
urls.remove(str(response.url))
try:
data = await response.json()
print(response.url, data)
results.append(data)
except Exception as e:
new_url = str(response.url).split("-")[0]
print(
f"Error with URL {response.url} Attempting new URL {new_url}"
)
urls.append(new_url)
asyncio.run(get_symbols())
Prints:
https://reqbin.com/echo/get/json?2 {'success': 'true'}
https://reqbin.com/echo/get/json?1 {'success': 'true'}
Error with URL https://reqbin.com/echo/get/json-BAD Attempting new URL https://reqbin.com/echo/get/json
https://reqbin.com/echo/get/json {'success': 'true'}
I am trying to do get requests with proxy support however, it simply does not hit that part of the code.
proxy = f"http://{line}"
try:
async with aiohttp.ClientSession() as session:
async with session.get('https://www.google.com', proxy=proxy, timeout=1) as resp:
text = await resp.read()
print(f'Status: {text}')
if(resp.status == 200):
if(proxy not in working_proxies):
working_proxies.append(proxy)
print(f'{Fore.GREEN}[PROXY] {proxy} did work.')
sum += 1
except(Exception) as e:
print(f'Error: {e}')
It does not hit the await resp.text() and does not print it out at all.
I need to know how much Mbytes per second there are in the request with is sent with asyncio in python. I tried with resp.read() or with resp.content.read() but it isn't working.
import aiohttp
import asyncio
async def get_requests(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
result = resp
return result
big_urls = ['http://google.com', 'http://yahoo.com']
loop = asyncio.get_event_loop()
coroutines = [get_requests(url) for url in big_urls]
results = loop.run_until_complete(asyncio.gather(*coroutines))
print(results)
i'm new to web development and i'm testing my site with sending http get request to check on how well my site will handle request. with my code i can send multiple get request, how can i make code send more than multiple request i want the loop to never stop, i mean send the get request over and over again how can i do that.. i am very sorry for my bad English hope u get my question.
import time
import datetime
import asyncio
import aiohttp
domain = 'http://myserver.com'
a = '{}/page1?run={}'.format(domain, time.time())
b = '{}/page2?run={}'.format(domain, time.time())
async def get(url):
print('GET: ', url)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))
loop = asyncio.get_event_loop()
tasks = [
asyncio.ensure_future(get(a)),
asyncio.ensure_future(get(b))
]
loop.run_until_complete(asyncio.wait(tasks))
If you want something to happen over and over, add a for or while loop - see https://docs.python.org/3/tutorial/index.html
async def get(url):
async with aiohttp.ClientSession() as session:
while True:
print('GET: ', url)
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))