Cancelling asyncio task run in executor - python

I'm scraping some websites, paralelizing requests library using asyncio:
def run():
asyncio.run(scrape());
def check_link(link):
#.... code code code ...
response = requests.get(link)
#.... code code code ...
write_some_stats_into_db()
async def scrape():
#.... code code code ...
task = asyncio.get_event_loop().run_in_executor(check_link(link));
#.... code code code ...
if done:
for task in all_tasks:
task.cancel();
I only need to find one 'correct' link, after that, I can stop the program. However, because the check_link is run in executor, it's threads are automatically daemonized, thus even after calling taks.cancel(), I have to wait for all of the other still running check_link to complete.
Do you have any ideas how to 'force-kill' the other running checks in the thread executor?

You can do it the following way, actually from my point of view, if you do not have to use asyncio for the task, use only threads without any async loop, since it makes your code more complicated.
import asyncio
from random import randint
import time
from functools import partial
# imagine that this is links array
LINKS = list(range(1000))
# how many thread-worker you want to have simultaneously
WORKERS_NUM = 10
# stops the app
STOP_EVENT = asyncio.Event()
STOP_EVENT.clear()
def check_link(link: str) -> int:
"""checks link in another thread and returns result"""
time.sleep(3)
r = randint(1, 11)
print(f"{link}____{r}\n")
return r
async def check_link_wrapper(q: asyncio.Queue):
"""Async wrapper around sync function"""
loop = asyncio.get_event_loop()
while not STOP_EVENT.is_set():
link = await q.get()
if not link:
break
value = await loop.run_in_executor(None, func=partial(check_link, link))
if value == 10:
STOP_EVENT.set()
print("Hurray! We got TEN !")
async def feeder(q: asyncio.Queue):
"""Send tasks and "poison pill" to all workers"""
# send tasks to workers
for link in LINKS:
await q.put(link)
# ask workers to stop
for _ in range(WORKERS_NUM):
await q.put(None)
async def amain():
"""Main async function of the app"""
# maxsize is one since we want the app
# to stop as fast as possible if stop condition is met
q = asyncio.Queue(maxsize=1)
# we create separate task, since we do not want to await feeder
# we are interested only in workers
asyncio.create_task(feeder(q))
await asyncio.gather(
*[check_link_wrapper(q) for _ in range(WORKERS_NUM)],
)
if __name__ == '__main__':
asyncio.run(amain())

Related

python coroutine, perform tasks periodically and cancel

For every 10 minutes, do the following tasks.
- generate list of image urls to download
- (if previous download is not finished, we have to cancel the download)
- download images concurrently
I'm relatively new to coroutines..
Can I structure the above with coroutines?
I think coroutine is essentially sequential flow..
So having problem thinking about it..
Actually, come to think of it myself, following would work?
async def generate_urls():
await sleep(10)
result = _generate_urls()
return result
async def download_image(url):
# download images
image = await _download_image()
return image
async def main():
while True:
urls = await generate_urls()
for url in urls:
download_task = asyncio.create_task(download_image(url))
await download_task
asyncio.run(main())
You current code is quite close. Below are some modifications to make it more closely align with your original spec:
import asyncio
def generate_urls():
return _generate_urls() #no need to sleep in the URL generation function
async def download_image(url):
image = await _download_image()
return image
async def main():
tasks = []
while True:
tasks.extend(t:=[asyncio.create_task(download_image(url)) for url in generate_urls()])
await asyncio.gather(*t) #run downloads concurrently
await asyncio.sleep(10) #sleep after creating tasks
for i in d: #after 10 seconds, check if any of the downloads are still running
if not i.done():
i.cancel() #cancel if task is not complete

Alternative to asyncio.gather which I can keep adding coroutines to at runtime?

I need to be able to keep adding coroutines to the asyncio loop at runtime. I tried using create_task() thinking that this would do what I want, but it still needs to be awaited.
This is the code I had, not sure if there is a simple edit to make it work?
async def get_value_from_api():
global ASYNC_CLIENT
return ASYNC_CLIENT.get(api_address)
async def print_subs():
count = await get_value_from_api()
print(count)
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
time.sleep(0.1)
async def start():
global ASYNC_CLIENT
async with httpx.AsyncClient() as ASYNC_CLIENT:
await save_subs_loop()
asyncio.run(start())
I once created similar pattern when I was mixing trio and kivy, which was demonstration of running multiple coroutines asynchronously.
It use a trio.MemoryChannel which is roughly equivalent to asyncio.Queue, I'll just refer it as queue here.
Main idea is:
Wrap each task with class, which has run function.
Make class object's own async method to put object itself into queue when execution is done.
Create a global task-spawning loop to wait for the object in queue and schedule execution/create task for the object.
import asyncio
import traceback
import httpx
async def task_1(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/")
print(resp.read())
await asyncio.sleep(0.1) # without this would be IP ban
async def task_2(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/meow/")
print(resp.read())
await asyncio.sleep(0.5)
class CoroutineWrapper:
def __init__(self, queue: asyncio.Queue, coro_func, *param):
self.func = coro_func
self.param = param
self.queue = queue
async def run(self):
try:
await self.func(*self.param)
except Exception:
traceback.print_exc()
return
# put itself back into queue
await self.queue.put(self)
class KeepRunning:
def __init__(self):
# queue for gathering CoroutineWrapper
self.queue = asyncio.Queue()
def add_task(self, coro, *param):
wrapped = CoroutineWrapper(self.queue, coro, *param)
# add tasks to be executed in queue
self.queue.put_nowait(wrapped)
async def task_processor(self):
task: CoroutineWrapper
while task := await self.queue.get():
# wait for new CoroutineWrapper Object then schedule it's async method execution
asyncio.create_task(task.run())
async def main():
keep_running = KeepRunning()
async with httpx.AsyncClient() as client:
keep_running.add_task(task_1, client)
keep_running.add_task(task_2, client)
await keep_running.task_processor()
asyncio.run(main())
Server
import time
from flask import Flask
app = Flask(__name__)
#app.route("/")
def hello():
return str(time.time())
#app.route("/meow/")
def meow():
return "meow"
app.run()
Output:
b'meow'
b'1639920445.965701'
b'1639920446.0767004'
b'1639920446.1887035'
b'1639920446.2986999'
b'1639920446.4067013'
b'meow'
b'1639920446.516704'
b'1639920446.6267014'
...
You can see tasks running repeatedly on their own pace.
Old answer
Seems like you only want to cycle fixed amount of tasks.
In that case just iterate list of coroutine with itertools.cycle
But this is no different with synchronous, so lemme know if you need is asynchronous.
import asyncio
import itertools
import httpx
async def main_task(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/")
print(resp.read())
await asyncio.sleep(0.1) # without this would be IP ban
async def main():
async with httpx.AsyncClient() as client:
for coroutine in itertools.cycle([main_task]):
await coroutine(client)
asyncio.run(main())
Server:
import time
from flask import Flask
app = Flask(__name__)
#app.route("/")
def hello():
return str(time.time())
app.run()
Output:
b'1639918937.7694323'
b'1639918937.8804302'
b'1639918937.9914327'
b'1639918938.1014295'
b'1639918938.2124324'
b'1639918938.3204308'
...
asyncio.create_task() works as you describe it. The problem you are having here is that you create an infinite loop here:
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
time.sleep(0.1) # do not use time.sleep() in async code EVER
save_subs_loop() keeps creating tasks but control is never yielded back to the event loop, because there is no await in there. Try
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
await asyncio.sleep(0.1) # yield control back to loop to give tasks a chance to actually run
This problem is so common I'm thinking python should raise a RuntimeError if it detects time.sleep() within a coroutine :-)
You might want to try the TaskThread framework
It allows you to add tasks in runtime
Tasks are re-scheduled periodically (like in your while loop up there)
There is a consumer / producer framework built in (parent/child relationships) which you seem to need
disclaimer: I wrote TaskThread out of necessity & it's been a life saver.

Is it possible to run multiple asyncio in the same time in python?

Based on the solution that i got: Running multiple sockets using asyncio in python
i tried to add also the computation part using asyncio
Setup: Python 3.7.4
import msgpack
import threading
import os
import asyncio
import concurrent.futures
import functools
import nest_asyncio
nest_asyncio.apply()
class ThreadSafeElem(bytes):
def __init__(self, * p_arg, ** n_arg):
self._lock = threading.Lock()
def __enter__(self):
self._lock.acquire()
return self
def __exit__(self, type, value, traceback):
self._lock.release()
elem = ThreadSafeElem()
async def serialize(data):
return msgpack.packb(data, use_bin_type=True)
async def serialize1(data1):
return msgpack.packb(data1, use_bin_type=True)
async def process_data(data,data1):
loop = asyncio.get_event_loop()
future = await loop.run_in_executor(None, functools.partial(serialize, data))
future1 = await loop.run_in_executor(None, functools.partial(serialize1, data1))
return await asyncio.gather(future,future1)
################ Calculation#############################
def calculate_data():
global elem
while True:
try:
... data is calculated (some dictionary))...
elem, elem1= asyncio.run(process_data(data, data1))
except:
pass
#####################################################################
def get_data():
return elem
def get_data1():
return elem1
########### START SERVER AND get data contionusly ################
async def client_thread(reader, writer):
while True:
try:
bytes_received = await reader.read(100)
package_type = np.frombuffer(bytes_received, dtype=np.int8)
if package_type ==1 :
nn_output = get_data1()
if package_type ==2 :
nn_output = get_data()
writer.write(nn_output)
await writer.drain()
except:
pass
async def start_servers(host, port):
server = await asyncio.start_server(client_thread, host, port)
await server.serve_forever()
async def start_calculate():
await asyncio.run(calculate_data())
def enable_sockets():
try:
host = '127.0.0.1'
port = 60000
sockets_number = 6
loop = asyncio.get_event_loop()
for i in range(sockets_number):
loop.create_task(start_servers(host,port+i))
loop.create_task(start_calculate())
loop.run_forever()
except:
print("weird exceptions")
##############################################################################
enable_sockets()
The issue is that when i make a call from client, the server does not give me anything.
I tested the program with dummy data and no asyncio on calculation part so without this loop.create_task(start_calculate()) and the server responded correctly.
I also run the calculate data without adding it in the enable sockets and it worked. It also working with this implementation, but the problem is the server is not returning anything.
I did it like this cos i need the calculate part to run continuously and when one of the clients is calling to return the data at that point.
An asyncio event loop cannot be nested inside another, and there is no point in doing so: asyncio.run (and similar) blocks the current thread until done. This does not increase parallelism, and merely disables any outer event loop.
If you want to nest another asyncio task, directly run it in the current event loop. If you want to run a non-cooperative, blocking task, run it in the event loop executor.
async def start_calculate():
loop = asyncio.get_running_loop()
await loop.run_in_executor(None, calculate_data)
The default executor uses threads – this allows running blocking tasks, but does not increase parallelism. Use a custom ProcessPoolExecutor to use additional cores:
import concurrent.futures
async def start_calculate():
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as pool:
await loop.run_in_executor(pool, calculate_data)
Why do you call asyncio.run() multiple times?
This function always creates a new event loop and closes it at the end. It should be used as a main entry point for asyncio programs, and should ideally >only be called once.
I would advise you to read the docs

Fire coroutine from instide a for loop

I'm trying to fire a coroutine from within a loop. Here's a simple example of what I'm trying to achieve:
import time
import random
import asyncio
def listen():
while True:
yield random.random()
time.sleep(3)
async def dosomething(data: float):
print("Working on data", data)
asyncio.sleep(2)
print("Processed data!")
async def main():
for pos in listen():
asyncio.create_task(dosomething(pos))
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Unfortunately, this doesn't work and my dosomething coroutine never executes... what am I doing wrong?
asyncio.create_task function is aimed to schedule Task execution, it should be awaited to wait until it is complete.
Moreover, asyncio.sleep(2) in your code also should awaited, otherwise it'll throw an error/warning.
The right way:
import time
import random
import asyncio
def listen():
while True:
yield random.random()
time.sleep(3)
async def dosomething(data: float):
print("Working on data", data)
await asyncio.sleep(2)
print("Processed data!")
async def main():
for pos in listen():
await asyncio.create_task(dosomething(pos))
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sample output:
Working on data 0.9645515392725723
Processed data!
Working on data 0.9249656672476657
Processed data!
Working on data 0.13635467058997397
Processed data!
Working on data 0.03941252405458562
Processed data!
Working on data 0.6299882183389822
Processed data!
Working on data 0.9143748948769984
Processed data!
...
I wanted to point out that after playing around I ended up using a producer, consumer architecture to achieve what I wanted. I appreciate that I didn't make my exact use case clear in the original question. But here's a simplified snippet of what I ended up implementing:
import asyncio
import random
from datetime import datetime
from pydantic import BaseModel
class Measurement(BaseModel):
data: float
time: datetime
async def measure(queue: asyncio.Queue):
while True:
# Replicate blocking call to recieve data
await asyncio.sleep(1)
print("Measurement complete!")
for i in range(3):
data = Measurement(
data=random.random(),
time=datetime.utcnow()
)
await queue.put(data)
await queue.put(None)
async def process(queue: asyncio.Queue):
while True:
data = await queue.get()
print(f"Got measurement! {data}")
# Replicate pause for http request
await asyncio.sleep(0.3)
print("Sent data to server")
loop = asyncio.get_event_loop()
queue = asyncio.Queue(loop=loop)
meansurement = measure(queue)
processor = process(queue)
loop.run_until_complete(asyncio.gather(processor, meansurement))
loop.close()
I should point out here (something I didn't quite understand) that it's imperative that any blocking calls you make are able to be await-ed. Otherwise, you might find that the consumer will never execute.

Python event handler with Async (non-blocking while loop)

import queue
qq = queue.Queue()
qq.put('hi')
class MyApp():
def __init__(self, q):
self._queue = q
def _process_item(self, item):
print(f'Processing this item: {item}')
def get_item(self):
try:
item = self._queue.get_nowait()
self._process_item(item)
except queue.Empty:
pass
async def listen_for_orders(self):
'''
Asynchronously check the orders queue for new incoming orders
'''
while True:
self.get_item()
await asyncio.sleep(0)
a = MyApp(qq)
loop = asyncio.get_event_loop()
loop.run_until_complete(a.listen_for_orders())
Using Python 3.6.
I'm trying to write an event handler that constantly listens for messages in the queue, and processes them (prints them in this case). But it must be asynchronous - I need to be able to run it in a terminal (IPython) and manually feed things to the queue (at least initially, for testing).
This code does not work - it blocks forever.
How do I make this run forever but return control after each iteration of the while loop?
Thanks.
side note:
To make the event loop work with IPython (version 7.2), I'm using this code from the ib_insync library, I'm using this library for the real-world problem in the example above.
You need to make your queue an asyncio.Queue, and add things to the queue in a thread-safe manner. For example:
qq = asyncio.Queue()
class MyApp():
def __init__(self, q):
self._queue = q
def _process_item(self, item):
print(f'Processing this item: {item}')
async def get_item(self):
item = await self._queue.get()
self._process_item(item)
async def listen_for_orders(self):
'''
Asynchronously check the orders queue for new incoming orders
'''
while True:
await self.get_item()
a = MyApp(qq)
loop = asyncio.get_event_loop()
loop.run_until_complete(a.listen_for_orders())
Your other thread must put stuff in the queue like this:
loop.call_soon_threadsafe(qq.put_nowait, <item>)
call_soon_threadsafe will ensure correct locking, and also that the event loop is woken up when a new queue item is ready.
This is not an async queue. You need to use asyncio.Queue
qq = queue.Queue()
Async is an event loop. You call the loop transferring control to it and it loops until your function is complete which never happens:
loop.run_until_complete(a.listen_for_orders())
You commented:
I have another Thread that polls an external network resource for data (I/O intensive) and dumps the incoming messages into this thread.
Write that code async - so you'd have:
async def run():
while 1:
item = await get_item_from_network()
process_item(item)
loop = asyncio.get_event_loop()
loop.run_until_complete( run() )
If you don't want to do that what you can do is step through the loop though you don't want to do this.
import asyncio
def run_once(loop):
loop.call_soon(loop.stop)
loop.run_forever()
loop = asyncio.get_event_loop()
for x in range(100):
print(x)
run_once(loop)
Then you simply call your async function and each time you call run_once it will check your (asyncio queue) and pass control to your listen for orders function if the queue has an item in it.

Categories