How to use aiomultiprocess? - python

I found this package aiomultiprocess that seems like it can do both multiprocessing and asyncio.
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.text("utf-8")
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
Trying to run the sample code, though, does absolutely nothing.
Trying to call the main() gives me an error RuntimeWarning: coroutine 'main' was never awaited. I can't find an actual example of how to trigger the code.
The only other question about this isn't answered.

The aiomultiprocess documentation example does not cover how to call the loop. The function needs to be called via asyncio.
import asyncio
from aiohttp import request
from aiomultiprocess import Pool
async def get(url):
async with request("GET", url) as response:
return await response.read()
async def main():
urls = ["https://jreese.sh", "https://www.google.com", ]
async with Pool() as pool:
async for result in pool.map(get, urls):
print(result)
if __name__ == '__main__':
# for Python 3.7
asyncio.run(main())
# for Python 3.6
# loop = asyncio.get_event_loop()
# loop.run_until_complete(main())

Related

Alternative to asyncio.gather which I can keep adding coroutines to at runtime?

I need to be able to keep adding coroutines to the asyncio loop at runtime. I tried using create_task() thinking that this would do what I want, but it still needs to be awaited.
This is the code I had, not sure if there is a simple edit to make it work?
async def get_value_from_api():
global ASYNC_CLIENT
return ASYNC_CLIENT.get(api_address)
async def print_subs():
count = await get_value_from_api()
print(count)
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
time.sleep(0.1)
async def start():
global ASYNC_CLIENT
async with httpx.AsyncClient() as ASYNC_CLIENT:
await save_subs_loop()
asyncio.run(start())
I once created similar pattern when I was mixing trio and kivy, which was demonstration of running multiple coroutines asynchronously.
It use a trio.MemoryChannel which is roughly equivalent to asyncio.Queue, I'll just refer it as queue here.
Main idea is:
Wrap each task with class, which has run function.
Make class object's own async method to put object itself into queue when execution is done.
Create a global task-spawning loop to wait for the object in queue and schedule execution/create task for the object.
import asyncio
import traceback
import httpx
async def task_1(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/")
print(resp.read())
await asyncio.sleep(0.1) # without this would be IP ban
async def task_2(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/meow/")
print(resp.read())
await asyncio.sleep(0.5)
class CoroutineWrapper:
def __init__(self, queue: asyncio.Queue, coro_func, *param):
self.func = coro_func
self.param = param
self.queue = queue
async def run(self):
try:
await self.func(*self.param)
except Exception:
traceback.print_exc()
return
# put itself back into queue
await self.queue.put(self)
class KeepRunning:
def __init__(self):
# queue for gathering CoroutineWrapper
self.queue = asyncio.Queue()
def add_task(self, coro, *param):
wrapped = CoroutineWrapper(self.queue, coro, *param)
# add tasks to be executed in queue
self.queue.put_nowait(wrapped)
async def task_processor(self):
task: CoroutineWrapper
while task := await self.queue.get():
# wait for new CoroutineWrapper Object then schedule it's async method execution
asyncio.create_task(task.run())
async def main():
keep_running = KeepRunning()
async with httpx.AsyncClient() as client:
keep_running.add_task(task_1, client)
keep_running.add_task(task_2, client)
await keep_running.task_processor()
asyncio.run(main())
Server
import time
from flask import Flask
app = Flask(__name__)
#app.route("/")
def hello():
return str(time.time())
#app.route("/meow/")
def meow():
return "meow"
app.run()
Output:
b'meow'
b'1639920445.965701'
b'1639920446.0767004'
b'1639920446.1887035'
b'1639920446.2986999'
b'1639920446.4067013'
b'meow'
b'1639920446.516704'
b'1639920446.6267014'
...
You can see tasks running repeatedly on their own pace.
Old answer
Seems like you only want to cycle fixed amount of tasks.
In that case just iterate list of coroutine with itertools.cycle
But this is no different with synchronous, so lemme know if you need is asynchronous.
import asyncio
import itertools
import httpx
async def main_task(client: httpx.AsyncClient):
resp = await client.get("http://127.0.0.1:5000/")
print(resp.read())
await asyncio.sleep(0.1) # without this would be IP ban
async def main():
async with httpx.AsyncClient() as client:
for coroutine in itertools.cycle([main_task]):
await coroutine(client)
asyncio.run(main())
Server:
import time
from flask import Flask
app = Flask(__name__)
#app.route("/")
def hello():
return str(time.time())
app.run()
Output:
b'1639918937.7694323'
b'1639918937.8804302'
b'1639918937.9914327'
b'1639918938.1014295'
b'1639918938.2124324'
b'1639918938.3204308'
...
asyncio.create_task() works as you describe it. The problem you are having here is that you create an infinite loop here:
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
time.sleep(0.1) # do not use time.sleep() in async code EVER
save_subs_loop() keeps creating tasks but control is never yielded back to the event loop, because there is no await in there. Try
async def save_subs_loop():
while True:
asyncio.create_task(print_subs())
await asyncio.sleep(0.1) # yield control back to loop to give tasks a chance to actually run
This problem is so common I'm thinking python should raise a RuntimeError if it detects time.sleep() within a coroutine :-)
You might want to try the TaskThread framework
It allows you to add tasks in runtime
Tasks are re-scheduled periodically (like in your while loop up there)
There is a consumer / producer framework built in (parent/child relationships) which you seem to need
disclaimer: I wrote TaskThread out of necessity & it's been a life saver.

Send asynchronous HTTP requests one after another

I am trying to use aiohttp to send requests one after another like this
import aiohttp
import asyncio
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
await asyncio.sleep(0.2)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
So I would expect each datetime print to be 0.2s apart. However, they seem to be about 0.35s apart as I think it takes 0.15s to get the data from the response. Why is this happening? I want it to be asynchronous so it should just go onto the next one?
How can I fix this?
When you use await all next code will wait for end of this code.
If you want to run asyncio code asynchronously, you should use functions like asyncio.gather
import asyncio
import aiohttp
import datetime
async def fetch(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
print('#', response.status)
async def worker(queue):
print('START WORKER')
while True:
url = await queue.get()
await fetch(url)
queue.task_done()
async def control(queue):
while True:
print(datetime.datetime.now())
queue.put_nowait('https://docs.python.org/')
await asyncio.sleep(0.2)
async def main():
queue = asyncio.Queue()
await asyncio.gather(
control(queue),
asyncio.gather(*[worker(queue) for _ in range(10)])
)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sending http request and fetching response back takes some time. You need excluding this time from asyncio.sleep() call:
import aiohttp
import asyncio
import time
from datetime import datetime
async def main():
request_url = "https://..."
async with aiohttp.ClientSession() as session:
while True:
print(datetime.now())
t0 = time.monotonic()
async with session.get(request_url) as response:
json_data = await response.json()
print(json_data)
t1 = time.monotonic()
await asyncio.sleep(0.2 - (t1 - t0))
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

Using Python 3.7+ to make 100k API calls, making 100 in parallel using asyncio [duplicate]

This question already has answers here:
Python asyncio.semaphore in async-await function
(3 answers)
Closed 3 years ago.
What is the best approach to deliver say 100k API calls using asyncio async/await with Python 3.7+ The idea is to use 100 tasks in parallel all the time?
What should be avoided is:
1. To start working on all 100k tasks
2. To wait for all 100 parallel tasks to finish so new batch of 100 is scheduled.
This example illustrates the first approach, that is not what is needed.
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = [
'http://python.org',
'https://google.com',
'http://yifei.me'
]
tasks = []
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url))
htmls = await asyncio.gather(*tasks)
for html in htmls:
print(html[:100])
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Use semaphore. Semaphores are used to limit concurrent actions. Python's asyncio comes with its own async version of semaphore.
import aiohttp
import asyncio
async def fetch(session, url, sema):
async with sema, session.get(url) as response:
return await response.text()
async def main():
urls = [
'http://python.org',
'https://google.com',
'http://yifei.me',
'other urls...'
]
tasks = []
sema = asyncio.BoundedSemaphore(value=100)
async with aiohttp.ClientSession() as session:
for url in urls:
tasks.append(fetch(session, url, sema))
htmls = await asyncio.gather(*tasks)
for html in htmls:
print(html[:100])
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

Why is aiohttp horribly slower than gevent?

Disclaimer: I am a total beginner in aiohttp
I was experimenting with aiohttp to handle get requests asynchronously but It turned out to be horribly slower than the pool version of gevent.
GEVENT VERSION
import gevent
from gevent import monkey
monkey.patch_all()
from gevent.pool import Pool
import requests
import time
def pooling_task(url):
requests.get(url)
def pooling_main():
start = time.time()
pool = Pool(10)
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"
]
for url in urls:
pool.apply_async(pooling_task, args=(url,))
pool.join()
end = time.time()
print("POOL TIME {}".format(end-start))
if __name__ == '__main__':
print("POOLING VERSION")
pooling_main()
OUTPUT - POOL TIME 6.299163818359375
Following is the aiohttp version
import aiohttp
import asyncio
import time
import uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
urls = [
"http://google.com",
"http://yahoo.com",
"http://linkedin.com",
"http://shutterfly.com",
"http://mypublisher.com",
"http://facebook.com"]
async with aiohttp.ClientSession() as session:
for url in urls:
await fetch(session, url)
if __name__ == "__main__":
start = time.time()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
end = time.time()
print("Time taken {}".format(end - start))
OUTPUT - Time taken 15.399710178375244
I really don't understand why aiohttp is so much slower.
As for gevent version requests.get is still a blocking call , but not for aiohttp.
I expected aiohttp version to be faster.
for url in urls:
await fetch(session, url)
await here means that you don't start downloading next url before previous done. To make all downloadings concurrent you should use something like asyncio.gather.
Modify your code like this:
async with aiohttp.ClientSession() as session:
await asyncio.gather(*[
fetch(session, url)
for url
in urls
])
You'll see huge speedup.

python asyncio REST api call gives error

I am trying to call a REST API async using asyncio but i keep getting the error "coroutine was never awaited" which i understand. But i want this behavior, i want my function to end just by posting and not waiting for the result. Here is my code
async def callCoroutine:
#call a REST API
def lambda_handler(event, context):
loop = asyncio.get_event_loop()
task = loop.create_task(callCoroutine(data))
return
Can someone help?
If you just need to call the API, disregarding the result, you can use an Executor in another thread, which will not block the main thread.
To run in an Executor, use AbstractEventLoop.run_in_executor() with an Executor from concurrent.futures.
For the current example, you need to have running loop somewhere (e.g. if you have some web-server or worker - loop.run_forever())
Fixed code example with running loop
import asyncio
async def callCoroutine(data):
print('This is data: %s' % data)
def lambda_handler(event, context):
loop = asyncio.get_event_loop()
task = loop.create_task(callCoroutine(context))
return
lambda_handler(None, {'a': 1})
loop = asyncio.get_event_loop()
loop.run_forever()
Example with run_in_executor()
import asyncio
import requests
def call_rest_api(data):
print('Triggered REST API in background')
response = requests.get(data['url'])
print('Response: %s' % response)
async def main(loop):
print('Some operations here...')
data = {'url': 'http://example.com#some_rest_api'}
loop.run_in_executor(None, call_rest_api, data)
print('Continue work of main thread...')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
Example with simple await (if you need to call API syncronously)
But it is not necessary to use asyncio if you want to write synchronous code.
import asyncio
import requests
def call_rest_api(data):
print('Triggered REST API in background')
response = requests.get(data['url'])
print('Response: %s' % response)
async def main(loop):
print('Some operations here...')
data = {'url': 'http://example.com#some_rest_api'}
await call_rest_api(data)
print('Continue work of main thread...')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
Can you provide more verbose example of what you have and what you want to achive?
In your case, the main thread is running synchronously.
So you need to run the asyncio loop in another thread Using asyncio.run_coroutine_threadsafe.
import asyncio
import threading
# Run asyncio loop in other thread
loop = asyncio.new_event_loop()
threading.Thread(target=loop.run_forever).start()
async def call_coroutine(data):
# call a REST API
return
def lambda_handler(event, context):
# Run the coroutine on the loop of other thread
future = asyncio.run_coroutine_threadsafe(call_coroutine(data), loop)
# If need to get result:
result = future.result() # This will block the main thread
do_something_else()
return
# If need to stop the asyncio loop
loop.call_soon_threadsafe(loop.stop)

Categories