Python Faust, how to use take to run on multiple values - python

I'm trying to implement a faust agent using take to process multiple messages at the same time.
app = faust.App('vectors-stream')
vector_topic = app.topic('vector', value_type=VectorRecord)
#app.agent(vector_topic)
async def process_entities(stream: faust.streams.Stream):
async for records in stream.take(max_=500, within=timedelta(seconds=5)):
# yield await update_company_partition(records=records)
yield print(len(records))
now, I'm trying to write a test, just to see that the behaviour is as I except.
import asyncio
import random
from unittest import IsolatedAsyncioTestCase
import pytest
from app.data.kafka.consumer import process_entities, VectorRecord, app
class TestKafkaStream(IsolatedAsyncioTestCase):
async def asyncSetUp(self) -> None:
app.conf.store = 'memory://'
def generate_vector(self, dim: int):
return [random.uniform(0.001, 1) for i in range(dim)]
#pytest.mark.asyncio()
async def test_vectors_kafka_stream(self):
async with process_entities.test_context() as agent:
companies = ['se', 'spotify']
for company in companies:
for i in range(10):
_type = random.choice(['JobCascaded', 'UserHistory'])
msg = VectorRecord(company_slug=company, operation='upsert',
vector=self.generate_vector(16), vector_type=_type, id=i)
await agent.put(msg)
But when I put a break point on the yield print(len(records)) row, it prints that the len of records is just 1.

Related

return a value from while loop when using asyncio function

I am trying to connect and recieve messages from multiple websockets concurrently.
For this purpose I made it with asyncio, and it prints messages correctly. But the problem is that I just can print it, not return it.
The simplified example of pseudo code which I am struggle with is as below:
import websockets
import json
symbols_id = [1,2]
## LOOP RUNNING EXAMPLE OF ASYNCIO
async def get_connect(symbols_id):
tasks = []
for _id in symbols_id:
print('conncetion to', _id)
if _id == 1:
a = 0
elif _id == 2:
a = 200
tasks.append(asyncio.create_task(_loop(a)))
return tasks
async def _loop(a):
while True:
print(a)
a+=1
await asyncio.sleep(2.5)
async def ping_func():
while True:
print('------ ping')
await asyncio.sleep(5)
async def main():
tasks = await get_connect(symbols_id)
asyncio.create_task(ping_func())
await asyncio.gather(*tasks)
asyncio.run(main())
As you can see from the code above I used print(a) to print a in each loop.
I test return a instead of print(a) but it was not helpful.
thanks
yield a? return a will exit the function and the loop, yield is usually what you want in asyncio for looped tasks
Finally I found the way of using yield and async for to read data in each loop.
It will work correctly, by changing the code to the following one.
import websockets
import json
symbols_id = [1,2]
global a
a=0
## LOOP RUNNING EXAMPLE OF ASYNCIO
async def get_connect(symbols_id):
tasks = []
for _id in symbols_id:
print('conncetion to', _id)
if _id == 1:
a = 0
elif _id == 2:
a = 200
tasks.append(asyncio.create_task(_loop(a)))
return tasks
async def _loop(param):
global a
a = param
while True:
print(a)
a+=1
await asyncio.sleep(2.5)
async def ping_func():
while True:
print('------ ping')
await asyncio.sleep(5)
async def get_result():
global a
while True:
yield a
await asyncio.sleep(1)
async def main():
tasks = await get_connect(symbols_id)
asyncio.create_task(ping_func())
async for x in get_result():
print(x)
await asyncio.gather(*tasks)
asyncio.run(main())
I was confused with how to use generated data from this code snippet inside the other code snippet. what I found is:
1- Generated data can be accessible with global variables.
2- By defining a class and a property, it can be accessible from every part of the code.

asyncio Add Callback after Task is completed, instead of asyncio.as_completed?

In asyncio, it there a way to attach a callback function to a Task such that this callback function will run after the Task has been completed?
So far, the only way I can figure out is to use asyncio.completed in a loop, as shown below. But this requires 2 lists (tasks and cb_tasks) to hold all the tasks/futures.
Is there a better way to do this?
import asyncio
import random
class Foo:
async def start(self):
tasks = []
cb_tasks = []
# Start ten `do_work` tasks simultaneously
for i in range(10):
task = asyncio.create_task(self.do_work(i))
tasks.append(task)
# How to run `self.handle_work_done` as soon as this `task` is completed?
for f in asyncio.as_completed(tasks):
res = await f
t = asyncio.create_task(self.work_done_cb(res))
cb_tasks.append(t)
await asyncio.wait(tasks + cb_tasks)
async def do_work(self, i):
""" Simulate doing some work """
x = random.randint(1, 10)
await asyncio.sleep(x)
print(f"Finished work #{i}")
return x
async def work_done_cb(self, x):
""" Callback after `do_work` has been completed """
await asyncio.sleep(random.randint(1, 3))
print(f"Finished additional work {x}")
if __name__ == "__main__":
foo = Foo()
asyncio.run(foo.start())

While loop and asyncio

Good day!
I am trying to code a WebSocket connector and using asyncio. I am not that much familiar with asynchronous approaches therefore an incorrect behaviour occurs. Below is the simplified version of the code.
import pandas as pd
import json
import websockets
import asyncio
import time
class BinanceQuotesWS:
def __init__(self,client,pair):
self.quotes = pd.DataFrame(columns=['Timestamp','Price'])
self.pair = pair
self.socket='wss://fstream.binance.com/ws'
self.websocket = None
self.loop = None
self.result = None
def get_quotes(self):
return self.quotes
def start(self):
self.loop = asyncio.get_event_loop()
self.result = self.loop.create_task(self.connect())
async def connect(self):
self.websocket = await websockets.connect(self.socket)
await self.subscribe_quotes()
async def subscribe_quotes(self):
subscribe_message = {
"method": "SUBSCRIBE",
"params":
[
self.pair.lower()+"#trade"
],
"id": 1
}
subscribe_message = json.dumps(subscribe_message)
await self.websocket.send(subscribe_message)
async for msg in self.websocket:
msg = json.loads(msg)
if('p' in msg):
self.quotes.loc[0] = [msg['E'],float(msg['p'])]
temp_ws = BinanceQuotesWS(client,'BTCUSDT')
temp_ws.start()
When I am testing it in Jupyter and execute a cell with temp_ws.get_quotes() manually then every single time the correct dataframe with fresh quotes is returned.
Though in my program I need to have some infinite loop and there comes up an error.
while(True):
quotes = temp_ws.get_quotes()
print(quotes)
time.sleep(3)
The quotes DF is always empty but I can't sort out why (probably because the while cycle is blocking). I will be glad if someone could help to sort out the issue (and give some hints if anything else could be improved in the code in terms of async requests). Thank you.
You could use asyncio.sleep to create async function
async def display(self):
while True:
await asyncio.sleep(3)
quotes = self.get_quotes()
print('time:', quotes['Timestamp'][0], 'price:', quotes['Price'][0])
and add it to loop
self.result2 = self.loop.create_task(self.display())
and then you can run all in the same loop
temp_ws.loop.run_forever()
If you not use run_forever() then it not run connect() - and you don't get values in your standard loop. But this loop has to runs all time and it can't runs at the same time with normal loop (which also has to run all the time). One of the loop would have to run in separated thread.
But await (whit asyncio.sleep) resolves problem. When it sleeps in while True then it goes to other functions and it can run other code - and later some other code uses await and then it can go back to while True.
Maybe in Jupyter it could work with run_forever() because they add many extra functions to make life easier (and elements used in Jupyter may need this loop to work correctly) but in normal program you have to use run_forever() manually.
Minimal working code:
import pandas as pd
import json
import websockets
import asyncio
import time
class BinanceQuotesWS:
def __init__(self,client,pair):
self.quotes = pd.DataFrame(columns=['Timestamp','Price'])
self.pair = pair
self.socket='wss://fstream.binance.com/ws'
self.websocket = None
self.loop = None
self.result = None
def get_quotes(self):
return self.quotes
def start(self):
self.loop = asyncio.get_event_loop()
self.result = self.loop.create_task(self.connect())
self.result2 = self.loop.create_task(self.display())
async def connect(self):
self.websocket = await websockets.connect(self.socket)
await self.subscribe_quotes()
async def subscribe_quotes(self):
subscribe_message = {
"method": "SUBSCRIBE",
"params": [
self.pair.lower()+"#trade"
],
"id": 1
}
subscribe_message = json.dumps(subscribe_message)
await self.websocket.send(subscribe_message)
async for msg in self.websocket:
msg = json.loads(msg)
if('p' in msg):
self.quotes.loc[0] = [msg['E'],float(msg['p'])]
#print(self.quotes)
async def display(self):
while True:
await asyncio.sleep(3)
quotes = self.get_quotes()
print('time:', quotes['Timestamp'][0], 'price:', quotes['Price'][0])
client = ''
temp_ws = BinanceQuotesWS(client,'BTCUSDT')
temp_ws.start()
temp_ws.loop.run_forever()

Asyncio.sleep() seems to sleep forever

I need to write function which adds object to array and deletes it after x seconds. I use asyncio.sleep for delay. Here is the code:
import asyncio
class Stranger:
def __init__(self, address):
self.address = address
class Fortress:
def __init__(self, time_: int, attempts: int):
self.time = time_
self.attempts = attempts
self.strangers_list: list = []
async def _handle_task(self, stranger):
self.strangers_list.append(stranger)
index = len(self.strangers_list) - 1
await asyncio.sleep(self.time)
print('Woke up')
self.strangers_list.pop(index)
async def _create_handle_task(self, stranger):
task = asyncio.create_task(self._handle_task(stranger))
print('Ran _handle_task')
def handle(self, stranger):
asyncio.run(self._create_handle_task(stranger))
async def main(tim):
await asyncio.sleep(tim)
if __name__ == "__main__":
f = Fortress(2, 4)
s = Stranger('Foo street, 32')
f.handle(s)
asyncio.run(main(3))
Theoretically, the output might be:
Ran _handle_task
Woke up
But it is just Ran _handle_task
What's the problem that interferes program to come out of the sleep?
You've created a task, which is an example of an awaitable in asyncio.
You need to await the task in your _create_handle_task method.
async def _create_handle_task(self, stranger):
task = asyncio.create_task(self._handle_task(stranger))
await task
# ^blocks until the task is complete.
print('Ran _handle_task')
Source: asyncio docs

How to run async coroutines from init, wait until it is complete

I am connecting to aioredis from __init__ (I do not want to move it out since this means I have to some extra major changes). How can I wait for aioredis connection task in below __init__ example code and have it print self.sub and self.pub object? Currently it gives an error saying
abc.py:42> exception=AttributeError("'S' object has no attribute
'pub'")
I do see redis connections created and coro create_connetion done.
Is there a way to call blocking asyncio calls from __init__. If I replace asyncio.wait with asyncio.run_until_complete I get an error that roughly says
loop is already running.
asyncio.gather is
import sys, json
from addict import Dict
import asyncio
import aioredis
class S():
def __init__(self, opts):
print(asyncio.Task.all_tasks())
task = asyncio.wait(asyncio.create_task(self.create_connection()), return_when="ALL_COMPLETED")
print(asyncio.Task.all_tasks())
print(task)
print(self.pub, self.sub)
async def receive_message(self, channel):
while await channel.wait_message():
message = await channel.get_json()
await asyncio.create_task(self.callback_loop(Dict(json.loads(message))))
async def run_s(self):
asyncio.create_task(self.listen())
async def callback_loop(msg):
print(msg)
self.callback_loop = callback_loop
async def create_connection(self):
self.pub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.sub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.db = await aioredis.create_redis("redis://c8:7070/0", password="abc")
self.listener = await self.sub.subscribe(f"abc")
async def listen(self):
self.tsk = asyncio.ensure_future(self.receive_message(self.listener[0]))
await self.tsk
async def periodic(): #test function to show current tasks
number = 5
while True:
await asyncio.sleep(number)
print(asyncio.Task.all_tasks())
async def main(opts):
loop.create_task(periodic())
s = S(opts)
print(s.pub, s.sub)
loop.create_task(s.run_s())
if __name__ == "__main__":
loop = asyncio.get_event_loop()
main_task = loop.create_task(main(sys.argv[1:]))
loop.run_forever() #I DONT WANT TO MOVE THIS UNLESS IT IS NECESSARY
I think what you want to do is to make sure the function create_connections runs to completion BEFORE the S constructor. A way to do that is to rearrange your code a little bit. Move the create_connections function outside the class:
async def create_connection():
pub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
sub = await aioredis.create_redis("redis://c8:7070/0", password="abc")
db = await aioredis.create_redis("redis://c8:7070/0", password="abc")
listener = await self.sub.subscribe(f"abc")
return pub, sub, db, listener
Now await that function before constructing S. So your main function becomes:
async def main(opts):
loop.create_task(periodic())
x = await create_connections()
s = S(opts, x) # pass the result of create_connections to S
print(s.pub, s.sub)
loop.create_task(s.run_s())
Now modify the S constructor to receive the objects created:
def __init__(self, opts, x):
self.pub, self.sub, self.db, self.listener = x
I'm not sure what you're trying to do with the return_when argument and the call to asyncio.wait. The create_connections function doesn't launch a set of parallel tasks, but rather awaits each of the calls before moving on to the next one. Perhaps you could improve performance by running the four calls in parallel but that's a different question.

Categories