ThreadPoolExecutor: Not executing in parallel

ThreadPoolExecutor: Not executing in parallel - python

I did follow the other SO threads but I am not able to figure this out.
max_thread = 40
def main():
variable_name = 1
with concurrent.futures.ThreadPoolExecutor(max_workers=max_thread) as executor:
futures = []
for i in range(5):
if count < max_count:
futures.append(executor.submit(thread_function, xx, yy, zz))
for future in concurrent.futures.as_completed(futures):
future.result()
print("Main done with it")

Made a small mistake . lol
reduced indent for this section.
for future in concurrent.futures.as_completed(futures):
future.result()

Related

Asyncio with locks not working as expected with add_done_callback

I have an async method, as shown below.
I pass in lists of 1000 numbers, where the method will pass in each number to a helper function which will return something from a website.
I have a global variable called count, which i surround with locks to make sure it doesnt get changed by anything else
I use add_done_callback with the task to make this method async.
The goal is to keep sending a number in the list of 1000 numbers to the server, and only when the server returns data (can take anywhere from 0.1 to 2 seconds), to pause, write the data to a sql database, and then continue
The code works as expected without locks, or without making the callback function, (which is named 'function' below) asyncrounous. But adding locks gives me an error: RuntimeWarning: coroutine 'function' was never awaited self._context.run(self._callback, *self._args) RuntimeWarning: Enable tracemalloc to get the object allocation traceback
I am super new to async in python so any help/advice is greatly appriciated
My code is shown below. It is just a simple draft:
import time
import random
import asyncio
# from helper import get_message_from_server
async def get(number):
# get_message_from_server(number), which takes somewhere between 0.1 to 2 seconds
await asyncio.sleep(random.uniform(0.1, 2))
s = 'Done with number ' + number
return s
async def function(future, lock):
global count
print(future.result())
# write future.result() to db
acquired = await lock.acquire()
count -= 1 if (count > 1) else 0
lock.release()
async def main(numbers, lock):
global count
count = 0
for i, number in enumerate(numbers):
print('number:', number, 'count:', count)
acquired = await lock.acquire()
count += 1
lock.release()
task = asyncio.create_task(get(number))
task.add_done_callback(
lambda x: function(x, lock)
)
if (count == 50):
print('Reached 50')
await task
acquired = await lock.acquire()
count = 0
lock.release()
if (i == len(numbers) - 1):
await task
def make_numbers():
count = []
for i in range(1001):
count.append(str(i))
return count
if __name__ == '__main__':
numbers = make_numbers()
loop = asyncio.get_event_loop()
lock = asyncio.Lock()
try:
loop.run_until_complete(main(numbers, lock))
except Exception as e:
pass
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.stop()

The above comment helped a lot
This is what the final working code looks like:
import time
import random
import asyncio
from functools import partial
# from helper import get_message_from_server
async def get(number):
# get_message_from_server(number), which takes somewhere between 0.1 to 2 seconds
await asyncio.sleep(random.uniform(0.1, 2))
s = 'Done with number ' + number
return s
def function(result, lock):
print(result.result())
async def count_decrement(lock):
global count
print('in count decrement')
acquired = await lock.acquire()
count -= 1 if (count > 1) else 0
lock.release()
asyncio.create_task(count_decrement(lock))
async def main(numbers, lock):
global count
count = 0
for i, number in enumerate(numbers):
print('number:', number, 'count:', count)
acquired = await lock.acquire()
count += 1
lock.release()
task = asyncio.create_task(get(number))
task.add_done_callback(partial(function, lock = lock))
if (count == 50):
print('Reached 50')
await task
acquired = await lock.acquire()
count = 0
lock.release()
if (i == len(numbers) - 1):
await task
def make_numbers():
count = []
for i in range(1001):
count.append(str(i))
return count
if __name__ == '__main__':
numbers = make_numbers()
loop = asyncio.get_event_loop()
lock = asyncio.Lock()
try:
loop.run_until_complete(main(numbers, lock))
except Exception as e:
pass
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.stop()

Number of queued tasks; how can I avoid a global variable?

I am writing a minimalist code based on asyncio and composed of tasks, workers and a queue:
Workers look in the queue for a task and execute it
Tasks are represented as a random asyncio.sleep()
When a task is done it adds two more tasks to the queue
Two restrictions:
10 max worker at a time
100 tasks should be executed in total, the script should end then
To monitor the total number of tasks executed, I use a global variable task_queued updated each time a task is being added to the queue.
I'm sure there's a better, pythonic way, to do that and not using a global variable, but all the solution I came up with are much more complicated.
I'm missing something here, any clue ?
Here's my code:
import asyncio
from random import random
import sys
MAX_WORKERS = 10
MAX_TASKS = 100
task_queued = 0
async def task(queue, id="1"):
global task_queued
sleep_time = 0.5 + random()
print(' Begin task #{}'.format(id))
await asyncio.sleep(sleep_time)
if task_queued < MAX_TASKS:
await queue.put(id + ".1")
task_queued += 1
if task_queued < MAX_TASKS:
await queue.put(id + ".2")
task_queued += 1
print(' End task #{} ({} item(s) in the queue)'.format(id, queue.qsize()))
async def worker(worker_id, queue):
while True:
task_id = await queue.get()
print('Worker #{} takes charge of task {}'.format(worker_id, task_id))
await task(queue, task_id)
queue.task_done()
async def main():
global task_queued
print('Begin main \n')
queue = asyncio.Queue()
await queue.put("1") # We add one task to the queue
task_queued += 1
workers = [asyncio.create_task((worker(worker_id + 1, queue))) for worker_id in range(MAX_WORKERS)]
await queue.join()
print('Queue is empty, {} tasks completed'.format(task_queued))
for w in workers:
w.cancel()
print('\n End main')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
except KeyboardInterrupt:
print('\nBye bye')
sys.exit(0)

Thank to user4815162342 for the answer, here's the code if anyone is interested
import asyncio
from random import random
import sys
class Factory:
"""
Factory
"""
def __init__(self, max_workers, max_tasks):
self.task_queued = 0
self.max_workers = max_workers
self.max_tasks = max_tasks
self.queue = asyncio.Queue()
async def task(self, task_id):
sleep_time = 0.5 + random()
print(' Begin task #{}'.format(task_id))
await asyncio.sleep(sleep_time)
if self.task_queued < self.max_tasks:
await self.queue.put(task_id + ".1")
self.task_queued += 1
if self.task_queued < self.max_tasks:
await self.queue.put(task_id + ".2")
self.task_queued += 1
print(' End task #{} ({} item(s) in the queue)'.format(task_id, self.queue.qsize()))
async def worker(self, worker_id):
while True:
task_id = await self.queue.get()
print('Worker #{} takes charge of task {}'.format(worker_id, task_id))
await self.task(task_id)
self.queue.task_done()
async def organize_work(self):
print('Begin work \n')
await self.queue.put("1") # We add one task to the queue to start
self.task_queued += 1
workers = [asyncio.create_task((self.worker(worker_id + 1))) for worker_id in range(self.max_workers)]
await self.queue.join()
print('Queue is empty, {} tasks completed'.format(self.task_queued))
for w in workers:
w.cancel()
print('\nEnd work')
if __name__ == '__main__':
loop = asyncio.get_event_loop()
factory = Factory(max_workers=3, max_tasks=50)
try:
loop.run_until_complete(factory.organize_work())
except KeyboardInterrupt:
print('\nBye bye')
sys.exit(0)

Results of asyncio are not as expected

I am learning asyncio library to do some tasks I want to achieve. I wrote the following code to teach myself on how to be able to switch to so another task while the original one is being executed. As you can see below, the summation() should be performed until a condition is met where it should jump to the secondaryTask(). After secondaryTask() is finished, it should return back to summation() where hopefully it gets finished. The potential results should be sum=1225 and mul=24.
import asyncio, time
async def summation():
print('Running summation from 0 to 50:')
sum = 0
for i in range(25):
sum = sum + i
if i != 25:
time.sleep(0.1)
else:
await asyncio.sleep(0) # pretend to be non-blocking work (Jump to the next task)
print('This message is shown because summation() is completed! sum= %d' % sum)
async def secondaryTask():
print('Do some secondaryTask here while summation() is on progress')
mul = 1
for i in range(1, 5):
mul = mul * i
time.sleep(0.1)
await asyncio.sleep(0)
print('This message is shown because secondaryTask() is completed! Mul= %d' % mul)
t0 = time.time()
ioloop = asyncio.get_event_loop()
tasks = [ioloop.create_task(summation()), ioloop.create_task(secondaryTask())]
wait_tasks = asyncio.wait(tasks)
ioloop.run_until_complete(wait_tasks)
ioloop.close()
t1 = time.time()
print('Total time= %.3f' % (t1-t0))
This code does not perform as expected because sum=300 as oppose to be sum=1225. Clearly that summation() does not continue while secondaryTask() is being processed. How can I modify summation() to be able to do the summation of the remaining 25 values on the background?
Thank you

It's just your careless. You want to run summation from 0 to 50, your summation function should be for i in range(50)

Multiprocessing: How to determine whether a job is waiting or submitted?

Background
A small server which waits for different types of jobs which are represented
as Python functions (async_func and async_func2 in the sample code below).
Each job gets submitted to a Pool with apply_async and takes a different amount of time, i.e. I cannot be sure that a job which was submitted first, also finishes first
I can check whether the job was finished with .get(timeout=0.1)
Question
How I can check whether the job is still waiting in the queue or is already running?
Is using a Queue the correct way or is there a more simple way?
Code
import multiprocessing
import random
import time
def async_func(x):
iterations = 0
x = (x + 0.1) % 1
while (x / 10.0) - random.random() < 0:
iterations += 1
time.sleep(0.01)
return iterations
def async_func2(x):
return(async_func(x + 0.5))
if __name__ == "__main__":
results = dict()
status = dict()
finished_processes = 0
worker_pool = multiprocessing.Pool(4)
jobs = 10
for i in range(jobs):
if i % 2 == 0:
results[i] = worker_pool.apply_async(async_func, (i,))
else:
results[i] = worker_pool.apply_async(async_func2, (i,))
status[i] = 'submitted'
while finished_processes < jobs:
for i in range(jobs):
if status[i] != 'finished':
try:
print('{0}: iterations needed = {1}'.format(i, results[i].get(timeout=0.1)))
status[i] = 'finished'
finished_processes += 1
except:
# how to distinguish between "running but no result yet" and "waiting to run"
status[i] = 'unknown'

Just send the status dict, to the function, since dicts are mutable all you need to do is change a bit your functions:
def async_func2(status, x):
status[x] = 'Started'
return(async_func(x + 0.5))
Of course you can change the status to pending just before calling your apply_async

Python saving execution time when multithreading

I am having a problem when multithreading and using queues in python 2.7. I want the code with threads to take about half as long as the one without, but I think I'm doing something wrong. I am using a simple looping technique for the fibonacci sequence to best show the problem.
Here is the code without threads and queues. It printed 19.9190001488 seconds as its execution time.
import time
start_time = time.time()
def fibonacci(priority, num):
if num == 1 or num == 2:
return 1
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
return c
print fibonacci(0, 200000)
print fibonacci(1, 100)
print fibonacci(2, 200000)
print fibonacci(3, 2)
print("%s seconds" % (time.time() - start_time))
Here is the code with threads and queues. It printed 21.7269999981 seconds as its execution time.
import time
start_time = time.time()
from Queue import *
from threading import *
numbers = [200000,100,200000,2]
q = PriorityQueue()
threads = []
def fibonacci(priority, num):
if num == 1 or num == 2:
q.put((priority, 1))
return
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
q.put((priority, c))
return
for i in range(4):
priority = i
num = numbers[i]
t = Thread(target = fibonacci, args = (priority, num))
threads.append(t)
#print threads
for t in threads:
t.start()
for t in threads:
t.join()
while not q.empty():
ans = q.get()
q.task_done()
print ans[1]
print("%s seconds" % (time.time() - start_time))
What I thought would happen is the multithreaded code takes half as long as the code without threads. Essentially I thought that all the threads work at the same time, so the 2 threads calculating the fibonacci number at 200,000 would finish at the same time, so execution is about twice as fast as the code without threads. Apparently that's not what happened. Am I doing something wrong? I just want to execute all threads at the same time, print in the order that they started and the thread that takes the longest time is pretty much the execution time.
EDIT:
I updated my code to use processes, but now the results aren't being printed. Only an execution time of 0.163000106812 seconds is showing. Here is the new code:
import time
start_time = time.time()
from Queue import *
from multiprocessing import *
numbers = [200000,100,200000,2]
q = PriorityQueue()
processes = []
def fibonacci(priority, num):
if num == 1 or num == 2:
q.put((priority, 1))
return
a = 1
b = 1
for i in range(num-2):
c = a + b
b = a
a = c
q.put((priority, c))
return
for i in range(4):
priority = i
num = numbers[i]
p = Process(target = fibonacci, args = (priority, num))
processes.append(p)
#print processes
for p in processes:
p.start()
for p in processes:
p.join()
while not q.empty():
ans = q.get()
q.task_done()
print ans[1]
print("%s seconds" % (time.time() - start_time))

You've run in one of the basic limiting factors of the CPython implementation, the Global Interpreter Lock or GIL. Effectively this serializes your program, your threads will take turns executing. One thread will own the GIL, while the other threads will wait for the GIL to come free.
One solution would to be use separate processes. Each process would have its own GIL so would execute in parallel. Probably the easiest way to do this is to use Python's multiprocessing module as replacement for the threading module.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ThreadPoolExecutor: Not executing in parallel - python

Made a small mistake . lol reduced indent for this section. for future in concurrent.futures.as_completed(futures): future.result()

Related

Asyncio with locks not working as expected with add_done_callback

Number of queued tasks; how can I avoid a global variable?

Results of asyncio are not as expected

Multiprocessing: How to determine whether a job is waiting or submitted?

Python saving execution time when multithreading

Categories

Resources