Let's say I have some list of URLs to get some data from them. I try to do it in async way, but one of the URLs is incorrect. How can I catch this error and is it possible to change the URL address after catching the error to send the request again?
I use the following code to get data using asyncio and aiohttp:
import asyncio
import aiohttp
urls = ["a", "b", "c"] # some list of urls
results = []
def get_tasks(session):
tasks = []
for url in urls:
tasks.append(asyncio.create_task(session.get(url, ssl=False)))
return tasks
async def get_symbols():
async with aiohttp.ClientSession() as session:
tasks = get_tasks(session)
responses = await asyncio.gather(*tasks)
for response in responses:
results.append(await response.json())
asyncio.run(get_symbols())
And then I get the next error:
ContentTypeError: 0, message='Attempt to decode JSON with unexpected mimetype: ', url=URL('b')
How could I catch this error to continue the whole process and if it is possible to fix "b" to other correct URL (let's say "bb") and send request again?
The easiest way is to put try...except block around await response.json() and if it throws exception change the URL and schedule it again. For more complex task use for example asyncio.Queue.
import asyncio
import aiohttp
urls = [
"https://reqbin.com/echo/get/json?1",
"https://reqbin.com/echo/get/json?2",
"https://reqbin.com/echo/get/json-BAD",
]
results = []
def get_tasks(session, urls):
tasks = []
for url in urls:
tasks.append(asyncio.create_task(session.get(url, ssl=False)))
return tasks
async def get_symbols():
async with aiohttp.ClientSession() as session:
while urls:
for task in asyncio.as_completed(get_tasks(session, urls)):
response = await task
urls.remove(str(response.url))
try:
data = await response.json()
print(response.url, data)
results.append(data)
except Exception as e:
new_url = str(response.url).split("-")[0]
print(
f"Error with URL {response.url} Attempting new URL {new_url}"
)
urls.append(new_url)
asyncio.run(get_symbols())
Prints:
https://reqbin.com/echo/get/json?2 {'success': 'true'}
https://reqbin.com/echo/get/json?1 {'success': 'true'}
Error with URL https://reqbin.com/echo/get/json-BAD Attempting new URL https://reqbin.com/echo/get/json
https://reqbin.com/echo/get/json {'success': 'true'}
Related
I am using aiohttp and asyncio to run multiple requests asynchronously, the problem is when i try to print the data i receive i end up getting the data of another request in the task queue. I have tried to debug this and look at the docs for any answers but i am unable to solve this problem.
here's my code:
from time import sleep
import aiohttp
import asyncio
async def search(query, session):
search_params = {
"query":query
}
async with session.get(
url,
params=search_params,
) as response:
json_response = await response.json()
data = json_response["data"]
print(data)
"""the above line always prints the data from the response of the first task to get executed
and not the current data from this request with a different query"""
async def main():
async with aiohttp.ClientSession() as session:
await init_session(session)
await enable_search(session)
while True:
tasks = [asyncio.create_task(search(session=session, query)) for query in inputs]
await asyncio.gather(*tasks)
sleep(5)
if __name__ == "__main__":
asyncio.run(main())
this is my code:
payload = {'text': input_text,
'question_info': '',
'include_intonation': 1,
'stress_version': stress_version,
'include_fluency': 1,
'include_ielts_subscore': 1}
files = [
('user_audio_file', open(saved_file_path, 'rb'))
]
headers = {}
form = aiohttp.FormData()
for key, value in payload.items():
form.add_field(key, value)
form.add_field('user_audio_file', open(saved_file_path, 'rb'))
async with aiohttp.ClientSession() as session:
async with session.post(url,data=form) as response:
response_json = await response.json()
and I want to send file with aiohttp to URL but I got this exception
'Can not serialize value type: <class \'int\'> headers: {} value: 1'
I do that with requests library like this
response = request(
"POST", url, headers=headers, data=payload, files=files)
response_json = response.json()
but I decided to use aiohttp because it shoud be async
please help me for this decision
thanks
you need to serialize payload data using data= b'form'
e.g.
async with aiohttp.ClientSession() as session:
async with session.post(url,data=b'form') as response:
response_json = await response.json()
By default session uses python’s standard json module for serialization. But it is possible to use different serializer. ClientSession accepts json_serialize parameter. Then you dont need to explicitly serialize your payload.
import ujson
async with aiohttp.ClientSession(
json_serialize=ujson.dumps) as session:
await session.post(url,data=form) as response:
response_json = await response.json()
....
Warning: above code is not tested.
Update
I tried setting up a local http server and upload a json. I am getting past your error and able to upload data. Are your serializing form data using b'form'?
As per this GitHub issue discussion, we need asyncio to control async event loop and execute async/await through a function.
Here's relevant code.
async def uploadForm():
async with aiohttp.ClientSession() as session:
async with session.post(url,data=b'form') as response: #Converting form to binary payload using b'form'
response_json = await response.json(content_type='text/html')
print(response_json)
def main():
loop = asyncio.get_event_loop()
loop.run_until_complete(uploadForm())
loop.close()
if __name__ == '__main__':
main()
Hope this helps you.
I need to send over 1 million HTTP requests and so far every option I've tried is just way too slow. I thought I could speed it up with aiohttp but that doesn't seem any faster than requests.
I was trying to do it with python but I'm open to other options as well.
Here is the code using both requests and aiohttp, any tips for speeding up the process?
requests code:
import requests
url = 'https://mysite.mysite:443/login'
users = [line.strip() for line in open("ids.txt", "r")]
try:
for user in users:
r = requests.post(url,data ={'username':user})
if 'login.error.invalid.username' not in r.text:
print(user, " is valid")
else:
print(user, " not found")
except Exception as e:
print(e)
aiohttp code:
import aiohttp
import asyncio
url = 'https://mysite.mysite:443/login'
users = [line.strip() for line in open("ids.txt", "r")]
async def main():
async with aiohttp.ClientSession() as session:
try:
for user in users:
payload = {"timeZoneOffSet": "240", "useragent": '', "username": user}
async with session.post(url, data=payload) as resp:
if 'login.error.invalid.username' not in await resp.text():
print(user, " is valid")
else:
print(user, " not found")
except Exception as e:
print(e)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You could use an asyncio.gather to collect results from a bunch of requests working in parallel.
Warning: code is just an example and is not tested.
import asyncio
from aiohttp import ClientSession
async def fetch(url, session, payload):
async with session.post(url, data=payload) as resp:
if 'login.error.invalid.username' not in await resp.text():
print(user, " is valid")
else:
print(user, " not found")
async def run(r):
url = "http://your_url:8000/{}"
tasks = []
async with ClientSession() as session:
for i in range(r):
task = asyncio.ensure_future(fetch(url.format(i), session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
def print_responses(result):
print(result)
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(4))
loop.run_until_complete(future)
I am using the below code to make 599 asynchronous requests to Strava API.
For some reason the response I get for each of them is
{"message":"Authorization
Error","errors":[{"resource":"Application","field":"","code":"invalid"}]}
This is the type of error you typically get when your access_token query string parameter
is invalid.
But in this case the token is 100% correct: the URL returns correct response when just
copy-pasted manually in the browser.
What might be the reason of the error and how to fix it? Might it be that the aiohttp session is somehow
messing the authentication procedure up?
Note: for privacy reasons the token in the code below is fake.
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
print(await response.text())
async def main():
urls = ['''https://www.strava.com/api/v3/activities/
280816027?include_all_efforts=true&
access_token=11111111'''] * 599
async with aiohttp.ClientSession() as session:
tasks = [
asyncio.ensure_future(fetch(session, url))
for url in urls
]
await asyncio.gather(*tasks)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You shouldn't use a multiline string as the URL, because it will keep all whitespaces and as a result you will get the wrong URL.
i'm new to web development and i'm testing my site with sending http get request to check on how well my site will handle request. with my code i can send multiple get request, how can i make code send more than multiple request i want the loop to never stop, i mean send the get request over and over again how can i do that.. i am very sorry for my bad English hope u get my question.
import time
import datetime
import asyncio
import aiohttp
domain = 'http://myserver.com'
a = '{}/page1?run={}'.format(domain, time.time())
b = '{}/page2?run={}'.format(domain, time.time())
async def get(url):
print('GET: ', url)
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))
loop = asyncio.get_event_loop()
tasks = [
asyncio.ensure_future(get(a)),
asyncio.ensure_future(get(b))
]
loop.run_until_complete(asyncio.wait(tasks))
If you want something to happen over and over, add a for or while loop - see https://docs.python.org/3/tutorial/index.html
async def get(url):
async with aiohttp.ClientSession() as session:
while True:
print('GET: ', url)
async with session.get(url) as response:
t = '{0:%H:%M:%S}'.format(datetime.datetime.now())
print('Done: {}, {} ({})'.format(t, response.url, response.status))