Can anyone help me out, I'm trying to get the program to pause if the condition is met. But as of now, its not sleeping at all. And I can't wrap my head around why. Im completely new to asyncio
time.sleep() doesnt really work either, so I would prefer to use asyncio. Thanks alot!
from python_graphql_client import GraphqlClient
import asyncio
import os
import requests
loop = asyncio.get_event_loop()
def print_handle(data):
print(data["data"]["liveMeasurement"]["timestamp"]+" "+str(data["data"]["liveMeasurement"]["power"]))
tall = (data["data"]["liveMeasurement"]["power"])
if tall >= 1000:
print("OK")
# schedule async task from sync code
asyncio.create_task(send_push_notification(data))
print("msg sent")
asyncio.create_task(sleep())
client = GraphqlClient(endpoint="wss://api.tibber.com/v1-beta/gql/subscriptions")
query = """
subscription{
liveMeasurement(homeId:"fd73a8a6ca"){
timestamp
power
}
}
"""
query2 = """
mutation{
sendPushNotification(input: {
title: "Advarsel! Høyt forbruk",
message: "Du bruker 8kw eller mer",
screenToOpen: CONSUMPTION
}){
successful
pushedToNumberOfDevices
}
}
"""
async def sleep():
await asyncio.sleep(10)
async def send_push_notification(data):
#maybe update your query with the received data here
await client.execute_async(query=query2,headers={'Authorization': "2bTCaFx74"})
async def main():
await client.subscribe(query=query, headers={'Authorization': "2bTCaFxDiYdHlxBSt074"}, handle=print_handle)
asyncio.run(main())
If I understand correctly, you want to observe broadcasts of some data, and react to those broadcasts, keeping the right to pause those reactions. Something like:
async def monitor(read_broadcast):
while True:
data = await read_broadcast()
print(data["data"]["liveMeasurement"]["timestamp"]+" "+str(data["data"]["liveMeasurement"]["power"]))
tall = (data["data"]["liveMeasurement"]["power"])
if tall >= 1000:
print("OK")
await send_push_notification(data)
print("msg sent")
# sleep for a while before sending another one
await asyncio.sleep(10)
To implement read_broadcast, we can use a "future":
# client, query, query2, send_push_notification defined as before
async def main():
broadcast_fut = None
def update_broadcast_fut(_fut=None):
nonlocal broadcast_fut
broadcast_fut = asyncio.get_event_loop().create_future()
broadcast_fut.add_done_callback(update_broadcast_fut)
update_broadcast_fut()
def read_broadcast():
return broadcast_fut
asyncio.create_task(monitor(read_broadcast))
await client.subscribe(
query=query, headers={'Authorization': "2bTCaFxDiYdHlxBSt074"},
handle=lambda data: broadcast_fut.set_result(data),
)
asyncio.run(main())
Note that I haven't tested the above code, so there could be typos.
I think the easiest way to reduce the number of messages being sent, is to define a minimum interval in which no notification is sent while the value is still over the threshold.
import time
last_notification_timestamp = 0
NOTIFICATION_INTERVAL = 5 * 60 # 5 min
def print_handle(data):
global last_notification_timestamp
print(
data["data"]["liveMeasurement"]["timestamp"]
+ " "
+ str(data["data"]["liveMeasurement"]["power"])
)
tall = data["data"]["liveMeasurement"]["power"]
current_time = time.time()
if (
tall >= 1000
and current_time - NOTIFICATION_INTERVAL > last_notification_timestamp
):
print("OK")
# schedule async task from sync code
asyncio.create_task(send_push_notification(data))
last_notification_timestamp = current_time
print("msg sent")
The timestamp of the last message sent needs to be stored somewhere, so we'll define a variable in the global scope to hold it and use the global keyword within print_handle() to be able to write to it from within the function. In the function we will then check, if the value is above the threshold and enough time passed after the last message. This way you will still keep your subscription alive as well as limit the number of notifications you receive. This is simple enough but probably you will soon want to extend the range of what you want to do with your received data. Just keep in mind that print_handle() is a sync callback and should be as short as possible.
Related
I'm trying to learn asyncio.
I have a list of sensors which should be pulled. Each sensor takes about 1 second to pull. So asyncio is the right task to do this. The sensors may change dynamically and so asyncio should change.
I have already following code, but now I don't know how to store the fetched values into a resulting dict. Since the consumer stores the value into a dict this very fast task could be also done in the producer, so there would be no need at all for a consumer, but I guess for asyncio paradgim there has to be a consumer.
Perhaps I'm also thinking too complicated and a much easier programming paradigm with less code could be used here?
Please see also comments in the code for further detailed questions and suggestions.
#!/usr/bin/python3
import asyncio
import random
sensors={ #This list changes often
"sensor1" : "http://abc.example.org",
"sensor2" : "http://outside.example.org",
"temperature" : "http://xe.example.com",
"outdoor" : "http://anywhere.example.org"
}
results = dict() #Result from sensor query should go here
async def queryAll(sensors):
q = asyncio.Queue()
queries = [asyncio.create_task(querySensor(sensorname,q)) for sensorname in sensors]
process = [asyncio.create_task(storeValues(sensorname, q)) for sensorname in sensors] #Since value is only stored to dict, one consumer should be sufficient, or no consumer at all, since producer could also store variable into dict
await asyncio.gather(*queries)
await q.join()
for c in process:
c.cancel()
async def querySensor(sensorname: str, q: asyncio.Queue):
res = str(random.randint(0,100))
resString = "Result for " + sensorname + " is " + res
await q.put(resString)
async def storeValues(sensorname: str, q: asyncio.Queue):
while True:
res = await q.get()
print("Value: ", res)
q.task_done()
if __name__ == "__main__":
asyncio.run(queryAll(sensors))
for result in results: #Now results should be in results
print(result, "measured:", results[result])
Solution
Thanks for both answers. Resulting code is now:
#!/usr/bin/python3
import asyncio
import random
sensors={ #This list changes often
"sensor1" : "http://abc.example.org",
"sensor2" : "http://outside.example.org",
"temperature" : "http://xe.example.com",
"outdoor" : "http://anywhere.example.org"
}
results = dict() #Result from sensor query should go here
async def queryAll(sensors):
queries = [asyncio.create_task(querySensor(sensorname)) for sensorname in sensors]
await asyncio.gather(*queries)
async def querySensor(sensorname: str):
res = str(random.randint(0,100))
resString = "Result for " + sensorname + " is " + res
results[sensorname] = resString
if __name__ == "__main__":
asyncio.run(queryAll(sensors))
for result in results: #Now results should be in results
print(result, "measured:", results[result])
There is some more complex code behind fetchting the acutual value. So this was only an example. I wanted to hide the additional layer of code.
Both answers are very worthful to me. To support new users reputation I accept Sreejith's answer to mark this question as solved.
It's not clear about what you're exactly looking to achieve. If you only want to update the dictionary, the code could be much simpler. Let me know if you were expecting anything else.
sensors={ #This list changes often
"sensor1" : "http://abc.example.org",
"sensor2" : "http://outside.example.org",
"temperature" : "http://xe.example.com",
"outdoor" : "http://anywhere.example.org"
}
results = dict() #Result from sensor query should go here
async def queryAll(sensors):
queries = [asyncio.create_task(querySensor(sensorname, results)) for sensorname in sensors]
await asyncio.gather(*queries)
async def querySensor(sensorname: str, q: dict):
res = str(random.randint(0, 100))
resString = "Result for " + sensorname + " is " + res
q[sensorname] = resString
if __name__ == "__main__":
asyncio.run(queryAll(sensors))
print(results)
The main problem with this code is that you never actually store
the sensor values to you result dict. If the code in stooreValues would includ the line results.setdefault(sensorname, []).append(res) you'd see your results already. (The dictionary .setdefault method is a utilityto create a value in the dicionary if it does not exist, and return that value, or the existing one: therefore we create an empty list on the first call for each sensor, and keep appending to it).
But, as you noted, there is no need to have a producer/consumer separated pattern in this code (whatever code will consume the "results" dict is actually the consumer)
...
from aiohttp_requests import requests
results = dict() #Result from sensor query should go here
async def queryAll(sensors):
queries = [asyncio.create_task(querySensor(sensorname)) for sensorname in sensors]
await asyncio.gather(*queries)
async def querySensor(sensorname: str):
res = str(random.randint(0,100))
# important, when writting the actual call to read the sensor, to use
# an async expresion and await for it.
response = await requests.get(sensors[sensorname], ...)
text = await response.text()
resString = f"Result for {sensorname} is {text}"
results.setdefault(sensorname, []).append(resString)
if __name__ == "__main__":
asyncio.run(queryAll(sensors))
for result in results: #Now results should be in results
print(result, "measured:", results[result])
This example is using https://pypi.org/project/aiohttp-requests/
I have been attempting to generate a ping scan that uses a limited number of processes. I tried as_completed without success and switched to asyncio.wait with asyncio.FIRST_COMPLETED.
The following complete script works if the offending line is commented out. I'd like to collect the tasks to a set in order to get rid of pending = list(pending) however pending_set.union(task) throws await wasn't used with future.
"""Test simultaneous pings, limiting processes."""
import asyncio
from time import asctime
pinglist = [
'127.0.0.1', '192.168.1.10', '192.168.1.20', '192.168.1.254',
'192.168.177.20', '192.168.177.100', '172.17.1.1'
]
async def ping(ip):
"""Run external ping."""
p = await asyncio.create_subprocess_exec(
'ping', '-n', '-c', '1', ip,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL
)
return await p.wait()
async def run():
"""Run the test, uses some processes and will take a while."""
iplist = pinglist[:]
pending = []
pending_set = set()
tasks = {}
while len(pending) or len(iplist):
while len(pending) < 3 and len(iplist):
ip = iplist.pop()
print(f"{asctime()} adding {ip}")
task = asyncio.create_task(ping(ip))
tasks[task] = ip
pending.append(task)
pending_set.union(task) # comment this line and no error
done, pending = await asyncio.wait(
pending, return_when=asyncio.FIRST_COMPLETED
)
pending = list(pending)
for taskdone in done:
print(' '.join([
asctime(),
('BAD' if taskdone.result() else 'good'),
tasks[taskdone]
]))
if __name__ == '__main__':
asyncio.run(run())
There are two problems with pending_set.union(task):
union doesn't update the set in-place, it returns a new set consisting of the original one and the one it receives as argument.
It accepts an iterable collection (such as another set), not a single element. Thus union attempts to iterate over task, which doesn't make sense. To make things more confusing, task objects are technically iterable in order to be usable in yield from expressions, but they detect iteration attempts in non-async contexts, and report the error you've observed.
To fix both issues, you should use the add method instead, which operates by side effect and accepts a single element to add to the set:
pending_set.add(task)
Note that a more idiomatic way to limit concurrency in asyncio is using a Semaphore. For example (untested):
async def run():
limit = asyncio.Semaphore(3)
async def wait_and_ping(ip):
async with limit:
print(f"{asctime()} adding {ip}")
result = await ping(ip)
print(asctime(), ip, ('BAD' if result else 'good'))
await asyncio.gather(*[wait_and_ping(ip) for ip in pinglist])
Use await asyncio.gather(*pending_set)
asyncio.gather() accepts any number of awaitables and also returns one
* unpacks the set
>>> "{} {} {}".format(*set((1,2,3)))
'1 2 3'
Example from the docs
await asyncio.gather(
factorial("A", 2),
factorial("B", 3),
factorial("C", 4),
)
I solved this without queuing the ping targets in my original application, which simplified things. This answer includes a gradually received list of targets and the useful pointers from #user4815162342. This completes the answer to the original question.
import asyncio
import time
pinglist = ['127.0.0.1', '192.168.1.10', '192.168.1.20', '192.168.1.254',
'192.168.177.20', '192.168.177.100', '172.17.1.1']
async def worker(queue):
limit = asyncio.Semaphore(4) # restrict the rate of work
async def ping(ip):
"""Run external ping."""
async with limit:
print(f"{time.time():.2f} starting {ip}")
p = await asyncio.create_subprocess_exec(
'ping', '-n', '1', ip,
stdout=asyncio.subprocess.DEVNULL,
stderr=asyncio.subprocess.DEVNULL
)
return (ip, await p.wait())
async def get_assign():
return await queue.get()
assign = {asyncio.create_task(get_assign())}
pending = set()
Maintaining two distinct pending sets proved key. One set is a single task that receives assigned addresses. This completes and needs restarted each time. The other set is for the ping messages which run once and are then complete.
while len(assign) + len(pending) > 0: # stop condition
done, pending = await asyncio.wait(
set().union(assign, pending),
return_when=asyncio.FIRST_COMPLETED
)
for job in done:
if job in assign:
if job.result() is None:
assign = set() # for stop condition
else:
pending.add(asyncio.create_task(ping(job.result())))
assign = {asyncio.create_task(get_assign())}
else:
print(
f"{time.time():.2f} result {job.result()[0]}"
f" {['good', 'BAD'][job.result()[1]]}"
)
The remainder is pretty straight forward.
async def assign(queue):
"""Assign tasks as if these are arriving gradually."""
print(f"{time.time():.2f} start assigning")
for task in pinglist:
await queue.put(task)
await asyncio.sleep(0.1)
await queue.put(None) # to stop nicely
async def main():
queue = asyncio.Queue()
await asyncio.gather(worker(queue), assign(queue))
if __name__ == '__main__':
asyncio.run(main())
The output of this is (on my network with 172 failing to respond):
1631611141.70 start assigning
1631611141.70 starting 127.0.0.1
1631611141.71 result 127.0.0.1 good
1631611141.80 starting 192.168.1.10
1631611141.81 result 192.168.1.10 good
1631611141.91 starting 192.168.1.20
1631611142.02 starting 192.168.1.254
1631611142.03 result 192.168.1.254 good
1631611142.13 starting 192.168.177.20
1631611142.23 starting 192.168.177.100
1631611142.24 result 192.168.177.100 good
1631611142.34 starting 172.17.1.1
1631611144.47 result 192.168.1.20 good
1631611145.11 result 192.168.177.20 good
1631611145.97 result 172.17.1.1 BAD
I'm trying to create a class which allows me to put data into it while a websocket connection is open, but i can't figure out how to make the main call to the class non-blocking. Can someone point me in the right direction?
Here is what I have so far (some extraneous code removed):
class Audio_Sender:
def __init__(self,IP_Address):
self.Remote_IP_Address = IP_Address
self.audio_queue = queue.Queue(10) #Max of 10 items
async def Connect(self):
uri = "ws://127.0.0.1:8765"
async with websockets.connect(uri) as websocket:
await websocket.send(json_voice_start)
while self.status == "Run":
if not self.audio_queue.empty():
audio_data = self.audio_queue.get()
await websocket.send(audio_data)
#pull any remaing data out:
while not self.audio_queue.empty():
audio_data = self.audio_queue.get()
await websocket.send(audio_data)
await websocket.send(json_voice_stop)
voice_response = await websocket.recv()
message = json.loads(voice_response)
print("\t- " + message["result"])
async def run_connect(self):
task = asyncio.create_task(self.Connect())
while not task.done():
print("Task is not Done")
await asyncio.sleep(1)
def go(self):
asyncio.run(self.run_connect())
#Create the Audio Sender
A = Audio_Sender("127.0.0.1","r")
#Put some data into it's queue
A.audio_queue.put(b"abc")
A.audio_queue.put(b"abc")
A.audio_queue.put(b"abc")
#Finished putting data in
A.status = "Done"
#Now send the data
#Ideally I would like to have the go part way through the queuing of
# data (above), but A.go() is blocking.. how to make it not blocking?
A.go()
So playing with this a bit more I realized i could use a thread for the go(), which works:
Updated code is as follows:
#Create the Audio Sender
A = Audio_Sender("127.0.0.1","r")
s = threading.Thread(target=A.go,args=())
s.start()
i=0
while i<10:
#Put some data into it's queue
A.audio_queue.put(b"abc")
i = i +1
print(i)
#Finished putting data in
A.status = "Done"
def initial_number_adder(first_number_ever):
# takes an integer from user and adds 1
added_number = first_number_ever + 1
return added_number
async def repeated_number_adder(old_result):
while 1:
await asyncio.sleep(0.5)
if datetime.utcnow().second == 0 or datetime.utcnow().second == 30:
# takes the integer from the initial fxn, initially.
# takes the return value from itself on the 0th and 30th second of
# every minute, from then on.
# adds 1 as well.
new_result = old_result + 1
print(new_result)
return await new_result
async def other_cool_fxn():
print('extra function to make things more complex')
await asyncio.sleep(1)
async def main():
first_time = 0
# first result comes from user input. for example '1'
if first_time == 0:
first_result = initial_number_adder(1)
first_time = 1
second_result = await asyncio."""wait or gather?"""([repeated_number_adder(first_result)])
else:
# for all results after the first (and the second in this case. I'm hoping to clean this up and only run repeated_number_adder in the loop below.
all_subsequent_results = await asyncio."""wait/gather"""([repeated_number_adder("""result of return of repeated_number_adder"""),other_cool_fxn()])
# want something like a future here
loop = asyncio.get_event_loop()
loop.run_forever(main())
I'm hoping to run an initial (synchronous) function (initial_number_adder), pass it's return to a second function that's async (repeated_number_adder), and then run that function in a loop, passing it's previous return value back into itself, forever.
If I can make both functions async, and manage the single initial run of the first, from the task list, that would be great also. This was just the best way I could imagine doing it to begin with (initial synchronous pass into async loop)
I'll be using a setup like this with aiohttp to make GET requests. One function returns a large DataFrame initially, the second function will async append one new row and make calculations every 15 minutes, then pass the DataFrame and calcs back into itself. (just wanted to give a gist for anyone curious)
I appreciate your time. Let me know if there's anything that needs clarification.
There are several issues with the pseudo-code in the question.
run_forever() doesn't accept a coroutine; it accepts no arguments and runs the event loop forever (or until loop.stop() is invoked). To run a coroutine forever, simply use a while True loop in its body, and add it to the loop before calling run_forever().
await new_result won't work because new_result is an integer. Valid arguments to await are awaitable objects, such as coroutines and futures. If the result is ready, it is perfectly fine to return it from the coroutine using the regular return statement.
You don't need gather or wait to await a single coroutine, as in the first invocation of repeated_number_adder, you can await it directly. You need gather to await the results of multiple coroutines running in parallel.
With those in mind, you could write the program as follows:
import asyncio
from datetime import datetime
def initial_number_adder(first_number_ever):
# takes an integer from user and adds 1
added_number = first_number_ever + 1
return added_number
async def repeated_number_adder(old_result):
while True:
await asyncio.sleep(0.5)
if datetime.utcnow().second == 0 or datetime.utcnow().second == 30:
return old_result + 1
async def other_cool_fxn():
print('extra function to make things more complex')
await asyncio.sleep(1)
async def main():
next_number = initial_number_adder(1)
while True:
next_number, _cool_result = await asyncio.gather(
repeated_number_adder(next_number), other_cool_fxn())
print('got', next_number)
loop = asyncio.get_event_loop()
# or simply loop.run_until_complete(main()), which will never complete
loop.create_task(main())
loop.run_forever()
APIs often have rate limits that users have to follow. As an example let's take 50 requests/second. Sequential requests take 0.5-1 second and thus are too slow to come close to that limit. Parallel requests with aiohttp, however, exceed the rate limit.
To poll the API as fast as allowed, one needs to rate limit parallel calls.
Examples that I found so far decorate session.get, approximately like so:
session.get = rate_limited(max_calls_per_second)(session.get)
This works well for sequential calls. Trying to implement this in parallel calls does not work as intended.
Here's some code as example:
async with aiohttp.ClientSession() as session:
session.get = rate_limited(max_calls_per_second)(session.get)
tasks = (asyncio.ensure_future(download_coroutine(
timeout, session, url)) for url in urls)
process_responses_function(await asyncio.gather(*tasks))
The problem with this is that it will rate-limit the queueing of the tasks. The execution with gather will still happen more or less at the same time. Worst of both worlds ;-).
Yes, I found a similar question right here aiohttp: set maximum number of requests per second, but neither replies answer the actual question of limiting the rate of requests. Also the blog post from Quentin Pradet works only on rate-limiting the queueing.
To wrap it up: How can one limit the number of requests per second for parallel aiohttp requests?
If I understand you well, you want to limit the number of simultaneous requests?
There is a object inside asyncio named Semaphore, it works like an asynchronous RLock.
semaphore = asyncio.Semaphore(50)
#...
async def limit_wrap(url):
async with semaphore:
# do what you want
#...
results = asyncio.gather([limit_wrap(url) for url in urls])
updated
Suppose I make 50 concurrent requests, and they all finish in 2 seconds. So, it doesn't touch the limitation(only 25 requests per seconds).
That means I should make 100 concurrent requests, and they all finish in 2 seconds too(50 requests per seconds). But before you actually make those requests, how could you determine how long will they finish?
Or if you doesn't mind finished requests per second but requests made per second. You can:
async def loop_wrap(urls):
for url in urls:
asyncio.ensure_future(download(url))
await asyncio.sleep(1/50)
asyncio.ensure_future(loop_wrap(urls))
loop.run_forever()
The code above will create a Future instance every 1/50 second.
I approached the problem by creating a subclass of aiohttp.ClientSession() with a ratelimiter based on the leaky-bucket algorithm. I use asyncio.Queue() for ratelimiting instead of Semaphores. I’ve only overridden the _request() method. I find this approach cleaner since you only replace session = aiohttp.ClientSession() with session = ThrottledClientSession(rate_limit=15).
class ThrottledClientSession(aiohttp.ClientSession):
"""
Rate-throttled client session class inherited from aiohttp.ClientSession)
USAGE:
replace `session = aiohttp.ClientSession()`
with `session = ThrottledClientSession(rate_limit=15)`
see https://stackoverflow.com/a/60357775/107049
"""
MIN_SLEEP = 0.1
def __init__(self, rate_limit: float = None, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.rate_limit = rate_limit
self._fillerTask = None
self._queue = None
self._start_time = time.time()
if rate_limit is not None:
if rate_limit <= 0:
raise ValueError('rate_limit must be positive')
self._queue = asyncio.Queue(min(2, int(rate_limit) + 1))
self._fillerTask = asyncio.create_task(self._filler(rate_limit))
def _get_sleep(self) -> Optional[float]:
if self.rate_limit is not None:
return max(1 / self.rate_limit, self.MIN_SLEEP)
return None
async def close(self) -> None:
"""Close rate-limiter's "bucket filler" task"""
if self._fillerTask is not None:
self._fillerTask.cancel()
try:
await asyncio.wait_for(self._fillerTask, timeout=0.5)
except asyncio.TimeoutError as err:
print(str(err))
await super().close()
async def _filler(self, rate_limit: float = 1):
"""Filler task to fill the leaky bucket algo"""
try:
if self._queue is None:
return
self.rate_limit = rate_limit
sleep = self._get_sleep()
updated_at = time.monotonic()
fraction = 0
extra_increment = 0
for i in range(0, self._queue.maxsize):
self._queue.put_nowait(i)
while True:
if not self._queue.full():
now = time.monotonic()
increment = rate_limit * (now - updated_at)
fraction += increment % 1
extra_increment = fraction // 1
items_2_add = int(min(self._queue.maxsize - self._queue.qsize(), int(increment) + extra_increment))
fraction = fraction % 1
for i in range(0, items_2_add):
self._queue.put_nowait(i)
updated_at = now
await asyncio.sleep(sleep)
except asyncio.CancelledError:
print('Cancelled')
except Exception as err:
print(str(err))
async def _allow(self) -> None:
if self._queue is not None:
# debug
# if self._start_time == None:
# self._start_time = time.time()
await self._queue.get()
self._queue.task_done()
return None
async def _request(self, *args, **kwargs) -> aiohttp.ClientResponse:
"""Throttled _request()"""
await self._allow()
return await super()._request(*args, **kwargs)
I liked #sraw's approached this with asyncio, but their answer didn't quite cut it for me. Since I don't know if my calls to download are going to each be faster or slower than the rate limit I want to have the option to run many in parallel when requests are slow and run one at a time when requests are very fast so that I'm always right at the rate limit.
I do this by using a queue with a producer that produces new tasks at the rate limit, then many consumers that will either all wait on the next job if they're fast, or there will be work backed up in the queue if they are slow, and will run as fast as the processor/network allow:
import asyncio
from datetime import datetime
async def download(url):
# download or whatever
task_time = 1/10
await asyncio.sleep(task_time)
result = datetime.now()
return result, url
async def producer_fn(queue, urls, max_per_second):
for url in urls:
await queue.put(url)
await asyncio.sleep(1/max_per_second)
async def consumer(work_queue, result_queue):
while True:
url = await work_queue.get()
result = await download(url)
work_queue.task_done()
await result_queue.put(result)
urls = range(20)
async def main():
work_queue = asyncio.Queue()
result_queue = asyncio.Queue()
num_consumer_tasks = 10
max_per_second = 5
consumers = [asyncio.create_task(consumer(work_queue, result_queue))
for _ in range(num_consumer_tasks)]
producer = asyncio.create_task(producer_fn(work_queue, urls, max_per_second))
await producer
# wait for the remaining tasks to be processed
await work_queue.join()
# cancel the consumers, which are now idle
for c in consumers:
c.cancel()
while not result_queue.empty():
result, url = await result_queue.get()
print(f'{url} finished at {result}')
asyncio.run(main())
I developed a library named octopus-api (https://pypi.org/project/octopus-api/), that enables you to rate limit and set the number of connections (parallel) calls to the endpoint using aiohttp under the hood. The goal of it is to simplify all the aiohttp setup needed.
Here is an example of how to use it, where the get_ethereum is the user-defined request function:
from octopus_api import TentacleSession, OctopusApi
from typing import Dict, List
if __name__ == '__main__':
async def get_ethereum(session: TentacleSession, request: Dict):
async with session.get(url=request["url"], params=request["params"]) as response:
body = await response.json()
return body
client = OctopusApi(rate=50, resolution="sec", connections=6)
result: List = client.execute(requests_list=[{
"url": "https://api.pro.coinbase.com/products/ETH-EUR/candles?granularity=900&start=2021-12-04T00:00:00Z&end=2021-12-04T00:00:00Z",
"params": {}}] * 1000, func=get_ethereum)
print(result)
The TentacleSession works the same as how you write POST, GET, PUT and PATCH for aiohttp.ClientSession.
Let me know if it helps your issue related to rate limits and parallel calls.
As far as the question here regarding n requests being sent at the same time when gather() is called, the key is using create_task() with an await asyncio.sleep(1.1) before every call. Any task created with create_task is immediately run:
for i in range(THREADS):
await asyncio.sleep(1.1)
tasks.append(
asyncio.create_task(getData(session, q, ''.join(random.choice(string.ascii_lowercase) for i in range(10))))
)
await asyncio.gather(*tasks)
The other issue of limiting # of simultaneous connections is also solved in the below example by using ClientSession() context in async_payload_wrapper and setting the connector with a limit.
With this setup I can run 25 coroutines (THREADS=25) that each loop over a queue of URLS and not violate a 25 concurrent connection rule:
async def send_request(session, url, routine):
start_time = time.time()
print(f"{routine}, sending request: {datetime.now()}")
params = {
'api_key': 'nunya',
'url': '%s' % url,
'render_js': 'false',
'premium_proxy': 'false',
'country_code':'us'
}
try:
async with session.get(url='http://yourAPI.com',params=params,) as response:
data = await response.content.read()
print(f"{routine}, done request: {time.time() - start_time} seconds")
return data
except asyncio.TimeoutError as e:
print('timeout---------------------')
errors.append(url)
except aiohttp.ClientResponseError as e:
print('request failed - Server Error')
errors.append(url)
except Exception as e:
errors.append(url)
async def getData(session, q, test):
while True:
if not q.empty():
url = q.get_nowait()
resp = await send_request(session, url ,test)
if resp is not None:
processData(resp, test, url)
else:
print(f'{test} queue empty')
break
async def async_payload_wrapper():
tasks = []
q = asyncio.Queue()
for url in urls:
await q.put(url)
async with ClientSession(connector=aiohttp.TCPConnector(limit=THREADS), timeout=ClientTimeout(total=61), raise_for_status=True) as session:
for i in range(THREADS):
await asyncio.sleep(1.1)
tasks.append(
asyncio.create_task(getData(session, q, ''.join(random.choice(string.ascii_lowercase) for i in range(10))))
)
await asyncio.gather(*tasks)
if __name__ == '__main__':
start_time = time.time()
asyncio.run(async_payload_wrapper())