I'm trying to run this code in parallel:
availablePrefix = {"http://URL-to-somewehere.com": "true", "http://URL-to-somewehere-else.com": "true"}
def main():
while True:
prefixUrl = getFreePrefix() # Waits until new url is free
sendRequest("https://stackoverflow.com/", prefixUrl)
def getFreePrefix():
while True:
for prefix in self.availablePrefix.keys():
if availablePrefix.get(prefix) == "true":
availablePrefix[prefix] = "false" # Can't be used for another request
return prefix
async def sendRequest(self, prefix, suffix):
url = prefix + "/" + suffix
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
response = await resp.text()
availablePrefix[prefix] = "true" # Can be used again
return json.loads(response)
Basically, I'm trying to run the main() function in parallel.
The main() function is stuck until getFreePrefix() returns a new prefix (URL to my server). With the help of this prefix we can access my server and start a request.
If this Prefix is used, it is set to false, to indicate that it can't be used for another request right now (If request is completed, it is set to true again).
What I want to achieve is, that every time a new prefix is ready, a new request is run in parallel.
Thanks for helping!
With your inconsistent use of self, I can't tell whether whether parts of your code is supposed to be part of a class or not. It also appears that your intention is for function main to run in an infinite loop and as soon as a key of availablePrefix has been processed, it is available for processing again. In your current, non-concurrent code, I believe that this could have been accomplished more simply as:
# simple list:
availablePrefix = ["http://URL-to-somewehere.com", "http://URL-to-somewehere-else.com"]
def main():
while True:
for prefixUrl in availablePrefix:
sendRequest("https://stackoverflow.com/", prefixUrl)
And you get rid of method getFreePrefix and remove the code from sendRequest that updates the heretofore availablePrefix dictionary, which is now a list. The other improvement I would make is to have the aiohttp.ClientSession() instance created only once in main and passed as an argument to whomever needs it.
Moving on. To repeatedly process the prefixes concurrently, the simplest way I know is:
import asyncio
import aiohttp
availablePrefix = ["http://URL-to-somewehere.com", "http://URL-to-somewehere-else.com"]
async def main():
# create the session instance once and pass it as an argument:
async with aiohttp.ClientSession() as session:
while True:
tasks = {asyncio.create_task(sendRequest(session, "https://stackoverflow.com/", prefixUrl)) for prefixUrl in availablePrefix}
for task in asyncio.as_completed(tasks):
result = await task
async def sendRequest(session, prefix, suffix):
url = prefix + "/" + suffix
async with session.get(url) as resp:
response = await resp.text()
return json.loads(response)
await(main())
Related
On Python Asyncio I am trying to return a value from one function to another .
So when the if statement in the end of func "check" is True the returned will value
will go the the "print" func .
async def check(i):
async with aiohttp.ClientSession() as session:
url = f'https://pokeapi.co/api/v2/pokemon/{i}'
async with session.get(url) as resp:
data = await resp.text()
if data['state'] == 'yes':
return data['state']
## how do i keeping the structre of the asyncio and pass this result to the "print" function
async def print(here should be the return of "check" funtion is there is):
print()
await asyncio.sleep(0)
async def main():
for i in range(0,5):
await asyncio.gather(check(i),
print() )
Thank You (-:
Your code is going to run everything synchronously. You need to restructure things a bit to see any value from asyncio.
async def check(i):
async with aiohttp.ClientSession() as session:
url = f'https://pokeapi.co/api/v2/pokemon/{i}'
async with session.get(url) as resp:
data = await resp.text()
if data['state'] == 'yes':
return data['state']
async def main():
aws = [check(i) for i in range(5)]
results = await asyncio.gather(*aws)
for result in results:
print(result)
This will allow your aiohttp requests to run asynchronously. Assuming print is really just a wrapper around the builtin, you don't need it and can just use the builtin.
If, however, print actually does something else, you should use asyncio.as_completed instead of asyncio.gather.
async def my_print(result):
print(result)
await asyncio.sleep(0)
async def main():
aws = [check(i) for i in range(5)]
for coro in asyncio.as_completed(aws):
result = await coro
await my_print(result)
Simple solution: do not run the two functions concurrently. One of them clearly needs the other to finish.
async def print_it(i):
value = await check(i)
if value is not None:
print(value)
There is an implicit return None when a function finishes its last statement, i.e. when return data['state'] is NOT executed in check(). In that case nothing is printed - adjust the code if that is not correct.
Of course, you should start only print_it coroutines, without starting checks.
If you really need to run the functions concurrently for some reason, use a Queue. The producer puts the data into the queue, the consument gets the value when it is available.
I'm trying to figure out a way to handle bad requests from an API within an asynchronous function using aiohttp. This is what I've got for testing:
async def fetch(session):
url = 'http://httpbin.org/status/404'
async with session.request('GET', url) as response:
if response.status == 200:
try:
r = await response.json()
return r
except ValueError:
return
else:
return None
async def fetch_all(project_list):
output = []
async with ClientSession() as session:
tasks = [asyncio.ensure_future(fetch(session, project)) for project in project_list]
for future in await asyncio.gather(*tasks):
output += future
return output
def get_data(project_list):
loop = asyncio.get_event_loop()
futures = asyncio.ensure_future(fetch_all(project_list))
output = loop.run_until_complete(futures)
return output
In this example, project_list is just a list of integers.
In this instance, fetch() should return none since the response will undoubtedly be 404. The problem arises in fetch_all() where I tell it to += future. I get a TypeError: 'coroutine' object is not iterable. Basically I'd like this to return nothing and in this case, += nothing to that list. In a perfect world I'd receive a proper json response every time, but I'd like to account for a random instance wherein I receive a bad response from the server.
From what I've read, #asyncio.coroutine would return None but async values have to be awaited if I'm understanding it correctly.
At first, you don't need to wrap task in ensure_future if you want to use gather. Second, you are trying to add fetch tasks with 2 arguments: session and project, but you have only session in your fetch function definition. And one more thing you can change is removing the loop where you are iterating through gather result because it already gives you the output you want to return.
Code will be like that:
async def fetch(session, project):
url = 'http://httpbin.org/status/404'
async with session.request('GET', url) as response:
if response.status == 200:
try:
return await response.json()
except ValueError:
pass
return None
async def fetch_all(project_list):
output = []
async with ClientSession() as session:
tasks = [fetch(session, project) for project in project_list]
return await asyncio.gather(*tasks)
def get_data(project_list):
loop = asyncio.get_event_loop()
output = loop.run_until_complete(futures)
return output
As for advice: use ensure_future only if you want to run coroutine instantly but the result will be needed only in the future.
I'm not completely sure this is the best way to do it but I put this together and it worked. If anyone can correct me, please do.
async def fetch_all(project_list):
output = []
async with ClientSession() as session:
tasks = [asyncio.ensure_future(fetch(session, project)) for project in project_list]
for future in await asyncio.gather(*tasks):
if future is not None: #Check if the future is None before adding it
output += future
return output
I'm making a small application that attempts to find company website URLs by searching for their names via Bing. It takes in a big list of company names, uses the Bing Search API to obtain the 1st URL, & saves those URLs back in the list.
I'm having a problem with aiohttp's ClientSession.get() method, specifically, it fails silently & I can't figure out why.
Here's how I'm initializing the script. Keep an eye out for worker.perform_mission():
async def _execute(workers,*, loop=None):
if not loop:
loop = asyncio.get_event_loop()
[asyncio.ensure_future(i.perform_mission(verbose=True), loop=loop) for i in workers]
def main():
filepth = 'c:\\SOME\\FILE\\PATH.xlsx'
cache = pd.read_excel(filepth)
# CHANGE THE NUMBER IN range(<here>) TO ADD MORE WORKERS.
workers = (Worker(cache) for i in range(1))
loop = asyncio.get_event_loop()
loop.run_until_complete(_execute(workers, loop=loop))
...<MORE STUFF>...
The worker.perform_mission() method does the following (scroll to the bottom and look at _split_up_request_like_they_do_in_the_docs()):
class Worker(object):
def __init__(self, shared_cache):
...<MORE STUFF>...
async def perform_mission(self, verbose=False):
while not self.mission_complete:
if not self.company_name:
await self.find_company_name()
if verbose:
print('Obtained Company Name')
if self.company_name and not self.website:
print('Company Name populated but no website found yet.')
data = await self.call_bing() #<<<<< THIS IS SILENTLY FAILING.
if self.website and ok_to_set_website(self.shared_cache, self):
await self.try_set_results(data)
self.mission_complete = True
else:
print('{} worker failed at setting website.'.format(self.company_name))
pass
else:
print('{} worker failed at obtaining data from Bing.'.format(self.company_name))
pass
async def call_bing(self):
async with aiohttp.ClientSession() as sesh:
sesh.headers = self.headers
sesh.params = self.params
return await self._split_up_request_like_they_do_in_the_docs(sesh)
async def _split_up_request_like_they_do_in_the_docs(self, session):
print('_bing_request() successfully called.') #<<<THIS CATCHES
async with session.get(self.search_url) as resp:
print('Session.get() successfully called.') #<<<THIS DOES NOT.
return await resp.json()
And finally my output is:
Obtained Company Name
Company Name populated but no website found yet.
_bing_request() successfully called.
Process finished with exit code 0
Can anyone help me figure out why print('Session.get() successfully called.'), isn't triggering?...or maybe help me ask this question better?
Take a look at this part:
async def _execute(workers,*, loop=None):
# ...
[asyncio.ensure_future(i.perform_mission(verbose=True), loop=loop) for i in workers]
You create a bunch of tasks, but you don't await these tasks are finished. It means _execute itself will be done right after tasks are created, long before these tasks are finished. And since you run event loop until _execute done, it will stop shortly after start.
To fix this, use asyncio.gather to wait multiple awaitables are finished:
async def _execute(workers,*, loop=None):
# ...
tasks = [asyncio.ensure_future(i.perform_mission(verbose=True), loop=loop) for i in workers]
await asyncio.gather(*tasks)
I have the below code that will do GET requests at an http endpoint. However, doing them one at a time is super slow. So the code below will do them 50 at a time, but I need to add them to a set (I figured a set would be fastest, because there will be duplicate objects returned with this script. Right now, this just returns the objects in a string 50 at a time, when I need them separated so I can sort them after they are all in a set. I'm new to python so I'm not sure what else to try
import asyncio
from aiohttp import ClientSession
async def fetch(url, session):
async with session.get(url) as response:
return await response.read()
async def run(r):
url = "http://httpbin.org/get"
tasks = []
# Fetch all responses within one Client session,
# keep connection alive for all requests.
async with ClientSession() as session:
for i in range(r):
task = asyncio.ensure_future(fetch(url.format(i), session))
tasks.append(task)
responses = await asyncio.gather(*tasks)
# you now have all response bodies in this variable
print(responses)
def print_responses(result):
print(result)
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(20))
loop.run_until_complete(future)
Right now, it just dumps all of the request responses to result, I need it to add each response to a set so I can work with the data later
I'm attempting to get some data from a paginated API (specifically github's, but the API doesn't matter for this question). I'm using a python asynchronous generator to yield each individual row from each page. The code looks something like this:
async def get_data():
cursor = None
with aiohttp.ClientSession() as session:
while True:
async with session.get(build_url(cursor)):
data = await response.json()
yield from get_rows(data)
if not has_next_page(data):
return
cursor = get_next_cursor(data)
So, this basically works. However, one of the minor flaws is that it doesn't initiate the next request until after all the rows have been yielded from the current page. Is there a good way to initiate that processing inside of this loop, before starting to yield? In particular, I want to make sure that the async with is still evaluated correctly when doing asyncio.ensure_future, which is the API for initiating background work.
You'll need at least one extra coroutine to achieve that, and bridge the two with an asyncio.Queue:
async def get_data():
queue = asyncio.Queue()
async def fetch_all_pages():
cursor = None
with aiohttp.ClientSession() as session:
while True:
async with session.get(build_url(cursor)):
data = await response.json()
await queue.put(data)
if not has_next_page(data):
# signal the peer to exit
await queue.put(None)
break
cursor = get_next_cursor(data)
asyncio.ensure_future(fetch_all_pages())
while True:
data = await queue.get()
if not data:
break
yield from get_rows(data)