async def main():
uuids = await get_uuids_from_text_file()
tasks = []
# create a task for each uuid
# and add it to the list of tasks
for uuid in uuids:
task = asyncio.create_task(make_hypixel_request(uuid))
tasks.append(task)
# wait for all the tasks to finish
responses = await asyncio.gather(*tasks)
# run the functions to process the data
for response in responses:
print(response['success'])
data2 = await anti_sniper_request(response)
await store_data_in_json_file(response, data2)
await compare_stats(response)
# loop the main function
async def main_loop():
for _ in itertools.repeat([]):
await main()
# run the loop
loop = asyncio.get_event_loop()
loop.run_until_complete(main_loop())
loop.close()
basically this is my code the functions have very clear and explanatory name
the make_hypixel_requests part i have no issues there, the requests are executed immediately and in parallel,
the problem is after that when "for response in responses" it goes hella slow? how do i get the responses instantly and loop through them very fast? i will try to attach a gif.
basically this is the issue:
the reason is because after waiting for all the responses to return, u process them in a loop instead of asynchronously, since none of the requests seem to depend on each other, waiting for them all to finish before processing them doesn't make sense, the best way to handle this is to couple the request and the processing e.g.
async def request_and_process(uuid):
response = await make_hypixel_request(uuid)
print(response['success'])
compare_stats_task = asyncio.create_task(compare_stats(response))
data2 = await anti_sniper_request(response)
await asyncio.gather(store_data_in_json_file(response, data2), compare_stats_task)
async def main():
while True:
uuids = await get_uuids_from_text_file()
await asyncio.gather(*map(request_and_process, uuids))
asyncio.run(main())
You can use asyncio.wait and return when at least a task is completed, then continue awaiting for the pending tasks. asyncio.wait return a tuple with two sets, the first with the completed tasks, the second with the still pending tasks. You can call to the result method of the done tasks and get its return value.
async def main():
uuids = await get_uuids_from_text_file()
tasks = []
# create a task for each uuid
# and add it to the list of tasks
for uuid in uuids:
task = asyncio.create_task(make_hypixel_request(uuid))
tasks.append(task)
# wait for all the tasks to finish
while tasks:
done_tasks, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
for done in done_tasks:
response = done.result()
print(response['success'])
data2 = await anti_sniper_request(response)
await store_data_in_json_file(response, data2)
await compare_stats(response)
Related
I encountered a strange situation. StreamReader in asyncio seems to be blocking.
received = asyncio.Queue()
async def read_loop():
while True:
received_data = await reader.read(4096)
await received.put(received_data)
Then when I do
task = asyncio.create_task(read_loop())
# some time later....
task.cancel()
The task won't get cancelled. However, when I add one single line to this function:
async def read_loop():
while True:
asyncio.sleep(0) # THIS LINE
received_data = await reader.read(4096)
await received.put(received_data)
Everything is working and the task gets cancelled.
It's like the await before reader.read doesn't return control to the event loop. I thought both await and asyncio.sleep return control to the event loop.
I want to understand why adding the sleeping function helped. I'd be grateful for help.
I'm scraping some websites, paralelizing requests library using asyncio:
def run():
asyncio.run(scrape());
def check_link(link):
#.... code code code ...
response = requests.get(link)
#.... code code code ...
write_some_stats_into_db()
async def scrape():
#.... code code code ...
task = asyncio.get_event_loop().run_in_executor(check_link(link));
#.... code code code ...
if done:
for task in all_tasks:
task.cancel();
I only need to find one 'correct' link, after that, I can stop the program. However, because the check_link is run in executor, it's threads are automatically daemonized, thus even after calling taks.cancel(), I have to wait for all of the other still running check_link to complete.
Do you have any ideas how to 'force-kill' the other running checks in the thread executor?
You can do it the following way, actually from my point of view, if you do not have to use asyncio for the task, use only threads without any async loop, since it makes your code more complicated.
import asyncio
from random import randint
import time
from functools import partial
# imagine that this is links array
LINKS = list(range(1000))
# how many thread-worker you want to have simultaneously
WORKERS_NUM = 10
# stops the app
STOP_EVENT = asyncio.Event()
STOP_EVENT.clear()
def check_link(link: str) -> int:
"""checks link in another thread and returns result"""
time.sleep(3)
r = randint(1, 11)
print(f"{link}____{r}\n")
return r
async def check_link_wrapper(q: asyncio.Queue):
"""Async wrapper around sync function"""
loop = asyncio.get_event_loop()
while not STOP_EVENT.is_set():
link = await q.get()
if not link:
break
value = await loop.run_in_executor(None, func=partial(check_link, link))
if value == 10:
STOP_EVENT.set()
print("Hurray! We got TEN !")
async def feeder(q: asyncio.Queue):
"""Send tasks and "poison pill" to all workers"""
# send tasks to workers
for link in LINKS:
await q.put(link)
# ask workers to stop
for _ in range(WORKERS_NUM):
await q.put(None)
async def amain():
"""Main async function of the app"""
# maxsize is one since we want the app
# to stop as fast as possible if stop condition is met
q = asyncio.Queue(maxsize=1)
# we create separate task, since we do not want to await feeder
# we are interested only in workers
asyncio.create_task(feeder(q))
await asyncio.gather(
*[check_link_wrapper(q) for _ in range(WORKERS_NUM)],
)
if __name__ == '__main__':
asyncio.run(amain())
I first make a simple request to get a JSON containing all the names, then I iterate over all the names and make asynchronous awaitable calls corresponding to each name, and store them in a list called "tasks", and then I gather all of them.
The problem is, the response server has a limit to the api responses per minute, and no matter how low I keep the semaphore value, this code takes the same time (small enough to not meet the server's expectations) to make the API calls, as if the semaphore doesn't exist at all. How do I control the API call rate?
<some code>
url = http://example.com/
response = requests.request("GET", url, headers=headers)
async def get_api(session, url_dev):
async with session.get(url_dev, headers = headers) as resp:
result = await resp.json()
return result
async def main():
async with aiohttp.ClientSession() as session:
sem = asyncio.Semaphore(1)
tasks = []
for i in response.json()["Names"]:
url_dev = "https://example.com/example/" + str(i["Id"])
await sem.acquire()
async with sem:
tasks.append(asyncio.create_task(get_api(session, url_dev)))
full_list = list()
async with sem:
full_list = await asyncio.gather(*tasks)
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())
Semaphore here really isn't the right tool to manage rate limiting unless you are going to increment the semaphore in a separate loop, or add a sleep inside the critical section. You could also schedule a follow up task to sleep and then deque the semaphore.
Further, you've queued all of the tasks inside the critical section, but the execution happens async to the critical section because you queued it as a task. You need to have the semaphore inside the get_api method.
Also, you're acquiring the semaphore twice; either use the acquire method and try/ finally, or use async with, but not both. See the docs
Here is a simple script to illustrate how you can have a task loop that does not exceed starting more than 5 tasks per 5 second interval:
async def dequeue(sem, sleep):
"""Wait for a duration and then increment the semaphore"""
try:
await asyncio.sleep(sleep)
finally:
sem.release()
async def task(sem, sleep, data):
"""Decrement the semaphore, schedule an increment, and then work"""
await sem.acquire()
asyncio.create_task(dequeue(sem, sleep))
# logic here
print(data)
async def main():
max_concurrent = 5
sleep = 5
sem = asyncio.Semaphore(max_concurrent)
tasks = [asyncio.create_task(task(sem, sleep, i)) for i in range(15)]
await asyncio.gather(*tasks)
if __name__ == "__main__":
asyncio.run(main())
You could also wrap this logic in a decorator if you want to get really fancy:
def rate_limited(max_concurrent, duration):
def decorator(func):
semaphore = asyncio.Semaphore(max_concurrent)
async def dequeue():
try:
await asyncio.sleep(duration)
finally:
semaphore.release()
#functools.wraps(func)
async def wrapper(*args, **kwargs):
await semaphore.acquire()
asyncio.create_task(dequeue())
return await func(*args, **kwargs)
return wrapper
return decorator
Then the code becomes the follow (note semaphore was created outside of asyncio.run, so you need to query the default loop for it to work properly):
#rate_limited(max_concurrent=5, duration=5)
async def task(i):
print(i)
async def main():
tasks = [asyncio.create_task(task(i)) for i in range(7)]
await asyncio.gather(*tasks)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You should acquire and release the semaphore object when you run the request to the API endpoint in get_api, instead of when you create the tasks and gather the results. Also, based on your sample use case, there should be no need to manually call sem.acquire and sem.release when you use its context manager instead:
async def get_api(session, sem:asyncio.Semaphore, url_dev):
#below, using both the semaphore and session.get in a context manager
#now, the semaphore will properly block requests when the limit has been reached, until others have finished
async with sem, session.get(url_dev, headers = headers) as resp:
result = await resp.json()
return result
async def main():
sem = asyncio.Semaphore(1)
async with aiohttp.ClientSession() as session:
tasks = []
for i in response.json()["Names"]:
url_dev = "https://example.com/example/" + str(i["Id"])
#passing the semaphore instance to get_api
tasks.append(asyncio.create_task(get_api(session, sem, url_dev)))
full_list = await asyncio.gather(*tasks)
I am new to study about asyncio.I don't know how to
describe my question.But here is a minimal example:
import asyncio
async def work():
await asyncio.sleep(3)
async def check_it():
task = asyncio.create_task(work())
await task
while True:
if task.done():
print("Done")
break
print("Trying...")
asyncio.run(check_it())
My idea is very simple:
create a async task in check_it().And await it.
Use a while loop to check whether the task is finished.
If task.done() return True,break the while loop.Then exit the script.
If my question is duplicate, please flag my question.Thanks!
Try asyncio.wait or use asyncio.sleep. Otherwise, your program will output a lot without some pauses.
import asyncio
async def work():
await asyncio.sleep(3)
async def check_it():
task = asyncio.create_task(work())
# "await" block until the task finish. Do not do here.
timeout = 0 # Probably the first timeout is 0
while True:
done, pending = await asyncio.wait({task}, timeout=timeout)
if task in done:
print('Done')
# Do an await here is favourable in case any exception is raised.
await task
break
print('Trying...')
timeout = 1
asyncio.run(check_it())
I'm confused about how to use asyncio.Queue for a particular producer-consumer pattern in which both the producer and consumer operate concurrently and independently.
First, consider this example, which closely follows that from the docs for asyncio.Queue:
import asyncio
import random
import time
async def worker(name, queue):
while True:
sleep_for = await queue.get()
await asyncio.sleep(sleep_for)
queue.task_done()
print(f'{name} has slept for {sleep_for:0.2f} seconds')
async def main(n):
queue = asyncio.Queue()
total_sleep_time = 0
for _ in range(20):
sleep_for = random.uniform(0.05, 1.0)
total_sleep_time += sleep_for
queue.put_nowait(sleep_for)
tasks = []
for i in range(n):
task = asyncio.create_task(worker(f'worker-{i}', queue))
tasks.append(task)
started_at = time.monotonic()
await queue.join()
total_slept_for = time.monotonic() - started_at
for task in tasks:
task.cancel()
# Wait until all worker tasks are cancelled.
await asyncio.gather(*tasks, return_exceptions=True)
print('====')
print(f'3 workers slept in parallel for {total_slept_for:.2f} seconds')
print(f'total expected sleep time: {total_sleep_time:.2f} seconds')
if __name__ == '__main__':
import sys
n = 3 if len(sys.argv) == 1 else sys.argv[1]
asyncio.run(main())
There is one finer detail about this script: the items are put into the queue synchronously, with queue.put_nowait(sleep_for) over a conventional for-loop.
My goal is to create a script that uses async def worker() (or consumer()) and async def producer(). Both should be scheduled to run concurrently. No one consumer coroutine is explicitly tied to or chained from a producer.
How can I modify the program above so that the producer(s) is its own coroutine that can be scheduled concurrently with the consumers/workers?
There is a second example from PYMOTW. It requires the producer to know the number of consumers ahead of time, and uses None as a signal to the consumer that production is done.
How can I modify the program above so that the producer(s) is its own coroutine that can be scheduled concurrently with the consumers/workers?
The example can be generalized without changing its essential logic:
Move the insertion loop to a separate producer coroutine.
Start the consumers in the background, letting them process the items as they are produced.
With the consumers running, start the producers and wait for them to finish producing items, as with await producer() or await gather(*producers), etc.
Once all producers are done, wait for consumers to process the remaining items with await queue.join().
Cancel the consumers, all of which are now idly waiting for the queue to deliver the next item, which will never arrive as we know the producers are done.
Here is an example implementing the above:
import asyncio, random
async def rnd_sleep(t):
# sleep for T seconds on average
await asyncio.sleep(t * random.random() * 2)
async def producer(queue):
while True:
# produce a token and send it to a consumer
token = random.random()
print(f'produced {token}')
if token < .05:
break
await queue.put(token)
await rnd_sleep(.1)
async def consumer(queue):
while True:
token = await queue.get()
# process the token received from a producer
await rnd_sleep(.3)
queue.task_done()
print(f'consumed {token}')
async def main():
queue = asyncio.Queue()
# fire up the both producers and consumers
producers = [asyncio.create_task(producer(queue))
for _ in range(3)]
consumers = [asyncio.create_task(consumer(queue))
for _ in range(10)]
# with both producers and consumers running, wait for
# the producers to finish
await asyncio.gather(*producers)
print('---- done producing')
# wait for the remaining tasks to be processed
await queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
asyncio.run(main())
Note that in real-life producers and consumers, especially those that involve network access, you probably want to catch IO-related exceptions that occur during processing. If the exception is recoverable, as most network-related exceptions are, you can simply catch the exception and log the error. You should still invoke task_done() because otherwise queue.join() will hang due to an unprocessed item. If it makes sense to re-try processing the item, you can return it into the queue prior to calling task_done(). For example:
# like the above, but handling exceptions during processing:
async def consumer(queue):
while True:
token = await queue.get()
try:
# this uses aiohttp or whatever
await process(token)
except aiohttp.ClientError as e:
print(f"Error processing token {token}: {e}")
# If it makes sense, return the token to the queue to be
# processed again. (You can use a counter to avoid
# processing a faulty token infinitely.)
#await queue.put(token)
queue.task_done()
print(f'consumed {token}')