I have been working on a small PoC where I am trying to do a I/O Bound application to execute functions without being blocked. Currently I have created something like this:
import time
import concurrent.futures
found_products = []
site_catalog = [
"https://www.graffitishop.net/Sneakers",
"https://www.graffitishop.net/T-shirts",
"https://www.graffitishop.net/Sweatshirts",
"https://www.graffitishop.net/Shirts"
]
def threading_feeds():
# Create own thread for each URL as we want to run concurrent
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(monitor_feed, site_catalog)
def monitor_feed(link: str) -> None:
old_payload = product_data(...)
while True:
new_payload = product_data(...)
if old_payload != new_payload:
for links in new_payload:
if links not in found_products:
logger.info(f'Detected new link -> {found_link} | From -> {link}')
# Execute filtering function without blocking, how?
filtering(link=found_link)
else:
logger.info("Nothing new")
time.sleep(60)
continue
def filtering(found_link):
# More code will be added in the future to handle logical code parts
...
# Test
time.sleep(60)
Problem: Currently the issue is that whenever we enter the row filtering(link=found_link) there will be a call to filtering(...) which sleeps for 60 seconds (This is only a mock data, in the future I will have a logical code part instead), what it does then is that the monitor_feed stops the execution and waits until the filtering() is finished.
My Question: I wonder how can I be able to execute the filtering(...) and still continue to loop through the monitor_feed without being blocked when we call filtering(...)?
This is your code with small modifications - mostly problem was with wrong names of variable (because then are very similar)
To make sure I use names executor1, executor2 and executor2 has to be create before while True because it has to exist all time when threads are used.
If you have def filtering(filtered_link) then you have to use the same name filtered_link in submit(..., filtered_link=...)
import concurrent.futures
import time
found_products = []
site_catalog = [
"https://www.graffitishop.net/Sneakers",
"https://www.graffitishop.net/T-shirts",
"https://www.graffitishop.net/Sweatshirts",
"https://www.graffitishop.net/Shirts"
]
def threading_feeds():
print('[threading_feeds] running')
# Create own thread for each URL as we want to run concurrent
with concurrent.futures.ThreadPoolExecutor() as executor1:
executor1.map(monitor_feed, site_catalog)
def monitor_feed(link: str) -> None:
print('[monitor_feed] start')
old_payload = ['old'] # product_data(...)
# executor has to exist all time
with concurrent.futures.ThreadPoolExecutor() as executor2:
while True:
print('[monitor_feed] run loop')
new_payload = ['new1', 'new2', 'new3'] # product_data(...)
if old_payload != new_payload:
for product_link in new_payload:
if product_link not in found_products:
print(f'Detected new link -> {product_link} | From -> {link}')
executor2.submit(filtering, filtered_link=product_link)
#executor2.submit(filtering, product_link)
print("Continue")
time.sleep(2)
def filtering(filtered_link):
# More code will be added in the future to handle logical code parts
#...
# Test
print(f'[filtering]: start: {filtered_link}')
time.sleep(60)
print(f'[filtering]: end: {filtered_link}')
# --- start --
threading_feeds()
Related
This seems like it should be simple, but I can't figure out how to include both state and data dependencies in a single flow. Here is what I attempted (simplified):
def main():
with Flow("load_data") as flow:
test_results = prepare_file1()
load_file1(test_results)
participants = prepare_file2()
load_file2(participants)
email = flow.add_task(EmailTask(name='email', subject='Flow succeeded!', msg='flow succeeded', email_to='xxx', email_from='xxx', smtp_server='xxx',smtp_port=25, smtp_type='INSECURE',))
flow.set_dependencies(task=email, upstream_tasks=[load_file1,load_file2])
flow.visualize()
I get the following graph:
Which means that load_file1 and load_file2 run twice. Can I just set up an additional dependency so that email runs when the two load tasks finish?
The issue is how you add the task to your Flow. When using tasks from the Prefect task library, it's best to first initialize those and then call those in your Flow as follows:
send_email = EmailTask(name='email', subject='Flow succeeded!', msg='flow succeeded', email_to='xxx', email_from='xxx', smtp_server='xxx', smtp_port=25, smtp_type='INSECURE')
with Flow("load_data") as flow:
send_email()
Or alternatively, do it in one step with double round brackets EmailTask(init_kwargs)(run_kwargs). The first pair of brackets will initialize the task and the second one will call the task by invoking the task's .run() method.
with Flow("load_data") as flow:
EmailTask(name='email', subject='Flow succeeded!', msg='flow succeeded', email_to='xxx', email_from='xxx', smtp_server='xxx', smtp_port=25, smtp_type='INSECURE')()
The full flow example could look as follows:
from prefect import task, Flow
from prefect.tasks.notifications import EmailTask
from prefect.triggers import always_run
#task(log_stdout=True)
def prepare_file1():
print("File1 prepared!")
return "file1"
#task(log_stdout=True)
def prepare_file2():
print("File2 prepared!")
return "file2"
#task(log_stdout=True)
def load_file1(file: str):
print(f"{file} loaded!")
#task(log_stdout=True)
def load_file2(file: str):
print(f"{file} loaded!")
send_email = EmailTask(
name="email",
subject="Flow succeeded!",
msg="flow succeeded",
email_to="xxx",
email_from="xxx",
smtp_server="xxx",
smtp_port=25,
smtp_type="INSECURE",
trigger=always_run,
)
with Flow("load_data") as flow:
test_results = prepare_file1()
load1_task = load_file1(test_results)
participants = prepare_file2()
load2_task = load_file2(participants)
send_email(upstream_tasks=[load1_task, load2_task])
if __name__ == "__main__":
flow.visualize()
async def simultaneous_chunked_download(urls_paths, label):
timeout = ClientTimeout(total=60000)
sem = asyncio.Semaphore(5)
async with aiohttp.ClientSession(timeout=timeout, connector=aiohttp.TCPConnector(verify_ssl=False)) as cs:
async def _fetch(r, path):
async with sem:
async with aiofiles.open(path, "wb") as f:
async for chunk in r.content.iter_any():
if not chunk:
break
size = await f.write(chunk)
if not indeterminate:
bar._done += size
bar.show(bar._done)
if indeterminate:
bar._done += 1
bar.show(bar._done)
indeterminate = False
total_length = 0
tasks = []
for url, path in urls_paths.items():
r = await cs.get(url)
if not indeterminate:
try:
total_length += r.content_length
except Exception:
indeterminate = True
tasks.append(_fetch(r, path))
verbose_print(f"url: {url},\npath: {path}\n\n")
if not indeterminate:
bar = progress.Bar(
expected_size=total_length, label=label, width=28, hide=False
)
else:
bar = progress.Bar(
expected_size=len(tasks), label=label, width=28, hide=False
)
logger._pause_file_output = True
bar.show(0)
bar._done = 0
await asyncio.gather(*tasks)
logger._pause_file_output = False
bar.done()
The function I have above is for downloading a dictionary of urls asynchronously and then printing out a progress bar. An example of its usage:
The code itself runs perfectly fine, however i keep getting these errors:
Whilst benign, they are an eyesore and could point towards my lack of knowledge on both http and asynchronous code, so i would rather try and get it fixed. However im at a loss on where or what is causing it, especially as i like i said the code runs perfectly fine regardless.
If you would like a more practical hands on attempt at recreating this the full code is on my github repo on the dev branch: https://github.com/ohitstom/spicetify-easyinstall/tree/dev
Most of the program can be disregarding if you are testing this out, just press the install button and the problematic code will show itself towards the end.
Bare in mind this is a spotify themer so if you have spotify/spicetify installed you will want to use a vm.
FIXED!:
# Create App
globals.app = QtWidgets.QApplication(sys.argv)
globals.app.setStyleSheet(gui.QSS)
# Configure asyncio loop to work with PyQt5
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
loop = QEventLoop(globals.app)
asyncio.set_event_loop(loop)
# Setup GUI
globals.gui = gui.MainWindow()
globals.gui.show()
# Set off loop
with loop:
sys.exit(loop.run_until_complete(globals.gui.exit_request.wait()))
class MainWindow(QuickWidget):
def __init__(self):
super().__init__(
name="main_window",
...etc
)
self.exit_request = asyncio.Event()
......etc
def closeEvent(self, *args):
self.exit_request.set()
Asyncio and aiohttp have some problems when running a lot of tasks concurrently on Windows, I've been having a lot of problems with it lately.
There are some workarounds available, the ones I use most are:
# set this before your event loop initialization or main function
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
Or:
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
loop.run_until_complete(your_main())
So, currently, I am using multiprocessing to run these 3 functions together.
As only tokens changes, is it recommended to switch to multi-threading? (if yes, will it really help in a performance like speed-up and I think memory will be for sure used less)
This is my code:
from database_function import *
from kiteconnect import KiteTicker
import pandas as pd
from datetime import datetime, timedelta
import schedule
import time
from multiprocessing import Process
def tick_A():
#credentials code here
tokens = [x[0] for x in db_fetchquery("SELECT zerodha FROM script ORDER BY id ASC LIMIT 50")] #FETCHING FIRST 50 SCRIPTS TOKEN
#print(tokens)
##### TO MAKE SURE THE TASK STARTS AFTER 8:59 ONLY ###########
t = datetime.today()
future = datetime(t.year,t.month,t.day,8,59)
if ((future-t).total_seconds()) < 0:
future = datetime(t.year,t.month,t.day,t.hour,t.minute,(t.second+2))
time.sleep((future-t).total_seconds())
##### TO MAKE SURE THE TASK STARTS AFTER 8:59 ONLY ###########
def on_ticks(ws, ticks):
global ltp
ltp = ticks[0]["last_price"]
for tick in ticks:
print(f"{tick['instrument_token']}A")
db_runquery(f'UPDATE SCRIPT SET ltp = {tick["last_price"]} WHERE zerodha = {tick["instrument_token"]}') #UPDATING LTP IN DATABASE
#print(f"{tick['last_price']}")
def on_connect(ws, response):
#print(f"response from connect :: {response}")
# Subscribe to a list of instrument_tokens (TOKENS FETCHED ABOVE WILL BE SUBSCRIBED HERE).
# logging.debug("on connect: {}".format(response))
ws.subscribe(tokens)
ws.set_mode(ws.MODE_LTP,tokens) # SETTING TOKEN TO TICK MODE (LTP / FULL / QUOTE)
kws.on_ticks = on_ticks
kws.on_connect = on_connect
kws.connect(threaded=True)
#####TO STOP THE TASK AFTER 15:32 #######
end_time = datetime(t.year,t.month,t.day,15,32)
while True:
schedule.run_pending()
#time.sleep(1)
if datetime.now() > end_time:
break
#####TO STOP THE TASK AFTER 15:32 #######
def tick_B():
everything remains the same only tokens value changes
tokens = [x[0] for x in db_fetchquery("SELECT zerodha FROM script ORDER BY id ASC OFFSET (50) ROWS FETCH NEXT (50) ROWS ONLY")]
def tick_C():
everything remains the same only tokens value changes
tokens = [x[0] for x in db_fetchquery("SELECT zerodha FROM script ORDER BY id ASC OFFSET (100) ROWS FETCH NEXT (50) ROWS ONLY")]
if __name__ == '__main__':
def runInParallel(*fns):
proc = []
for fn in fns:
p = Process(target=fn)
p.start()
proc.append(p)
for p in proc:
p.join()
runInParallel(tick_A , tick_B , tick_C)
So, currently, I am using multiprocessing to run these 3 functions together.
As only tokens changes, is it recommended to switch to multi-threading? (if yes, will it really help in a performance like speed-up and I think memory will be for sure used less)
most Python implementations do not have true multi-threading, because they use global lock (GIL). So only one thread runs at a time.
For I/O heavy applications it should not make difference. But if you need CPU heavy operations done in parallel (and I see that you use Panda - so the answer must be yes) - you will be better off staying with multi-process app.
def get_price_history_data(ticker):
pricelist = []
try:
pricedata = False
tradingdays = 252
Historical_Prices = pdr.get_data_yahoo(symbols=ticker, start=(datetime.today()-timedelta(tradingdays)), end=(datetime.today()))#-timedelta(years4-1)))
price_df = pd.DataFrame(Historical_Prices)
pricelist = price_df['Adj Close']
pricedata = True
except:
print(ticker,' failed to get price data')
return(pricelist, pricedata)
tickers = ['FB','V']
for ticker in tickers:
[pricelist, pricedata] = get_price_data(ticker)
I have a list of a few thousand tickers that i run through this for loop. It outputs a single column df and a boolean. Overall it works just fine and does what I need it to. However, it inconsistently freezes indefinitely with no error message and stops running forcing me to close the program and re-run from the beginning.
I am looking for a way for me to skip the iteration of the for loop if a certain amount of time has passed. I have looked into the time.sleep() and the continue function but cant figure out how to apply it to this specific application. If it freezes, it freezes on the "pdr.get_data_yahoo() section". Help would be apprec
I'm guessing that get_data_yahoo() probably freezes because it's making some kind of request to a server that never gets answered. It doesn't have a timeout option so the most obvious option is to start it in another thread/process and terminate it if it takes too long. You can use concurrent.futures for that. Once you're happy about how the code below works, you can replace sleeps_for_a_while with get_price_history_data and (3, 1, 4, 0) with tickers.
from concurrent.futures import ThreadPoolExecutor, TimeoutError
from time import sleep
TIMEOUT = 2 # seconds
def sleeps_for_a_while(sleep_for):
print('starting {}s sleep'.format(sleep_for))
sleep(sleep_for)
print('finished {}s sleep'.format(sleep_for))
# return a value to break out of the while loop
return sleep_for * 100
if __name__ == '__main__':
# this only works with functions that return values
results = []
for sleep_secs in (3, 1, 4, 0):
with ThreadPoolExecutor(max_workers=1) as executor:
# a future represents something that will be done
future = executor.submit(sleeps_for_a_while, sleep_secs)
try:
# result() raises an error if it times out
results.append(future.result(TIMEOUT))
except TimeoutError as e:
print('Function timed out')
results.append(None)
print('Got results:', results)
It is fairly easy to do parallel work with Python 3's concurrent.futures module as shown below.
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
future_to = {executor.submit(do_work, input, 60): input for input in dictionary}
for future in concurrent.futures.as_completed(future_to):
data = future.result()
It is also very handy to insert and retrieve items into a Queue.
q = queue.Queue()
for task in tasks:
q.put(task)
while not q.empty():
q.get()
I have a script running in background listening for updates. Now, in theory assume that, as those updates arrive, I would queue them and do work on them concurrently using the ThreadPoolExecutor.
Now, individually, all of these components work in isolation, and make sense, but how do I go about using them together? I am not aware if it is possible to feed the ThreadPoolExecutor work from the queue in real time unless the data to work from is predetermined?
In a nutshell, all I want to do is, receive updates of say 4 messages a second, shove them in a queue, and get my concurrent.futures to work on them. If I don't, then I am stuck with a sequential approach which is slow.
Let's take the canonical example in the Python documentation below:
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
else:
print('%r page is %d bytes' % (url, len(data)))
The list of URLS is fixed. Is it possible to feed this list in real-time and get the worker to process it as they come by, perhaps from a queue for management purposes? I am a bit confused on whether my approach is actually possible?
The example from the Python docs, expanded to take its work from a queue. A change to note, is that this code uses concurrent.futures.wait instead of concurrent.futures.as_completed to allow new work to be started while waiting for other work to complete.
import concurrent.futures
import urllib.request
import time
import queue
q = queue.Queue()
URLS = ['http://www.foxnews.com/',
'http://www.cnn.com/',
'http://europe.wsj.com/',
'http://www.bbc.co.uk/',
'http://some-made-up-domain.com/']
def feed_the_workers(spacing):
""" Simulate outside actors sending in work to do, request each url twice """
for url in URLS + URLS:
time.sleep(spacing)
q.put(url)
return "DONE FEEDING"
def load_url(url, timeout):
""" Retrieve a single page and report the URL and contents """
with urllib.request.urlopen(url, timeout=timeout) as conn:
return conn.read()
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
# start a future for a thread which sends work in through the queue
future_to_url = {
executor.submit(feed_the_workers, 0.25): 'FEEDER DONE'}
while future_to_url:
# check for status of the futures which are currently working
done, not_done = concurrent.futures.wait(
future_to_url, timeout=0.25,
return_when=concurrent.futures.FIRST_COMPLETED)
# if there is incoming work, start a new future
while not q.empty():
# fetch a url from the queue
url = q.get()
# Start the load operation and mark the future with its URL
future_to_url[executor.submit(load_url, url, 60)] = url
# process any completed futures
for future in done:
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print('%r generated an exception: %s' % (url, exc))
else:
if url == 'FEEDER DONE':
print(data)
else:
print('%r page is %d bytes' % (url, len(data)))
# remove the now completed future
del future_to_url[future]
Output from fetching each url twice:
'http://www.foxnews.com/' page is 67574 bytes
'http://www.cnn.com/' page is 136975 bytes
'http://www.bbc.co.uk/' page is 193780 bytes
'http://some-made-up-domain.com/' page is 896 bytes
'http://www.foxnews.com/' page is 67574 bytes
'http://www.cnn.com/' page is 136975 bytes
DONE FEEDING
'http://www.bbc.co.uk/' page is 193605 bytes
'http://some-made-up-domain.com/' page is 896 bytes
'http://europe.wsj.com/' page is 874649 bytes
'http://europe.wsj.com/' page is 874649 bytes
At work I found a situation where I wanted to do parallel work on an unbounded stream of data. I created a small library inspired by the excellent answer already provided by Stephen Rauch.
I originally approached this problem by thinking about two separate threads, one that submits work to a queue and one that monitors the queue for any completed tasks and makes more room for new work to come in. This is similar to what Stephen Rauch proposed, where he consumes the stream using a feed_the_workers function that runs in a separate thread.
Talking to one of my colleagues, he helped me realize that you can get away with doing everything in a single thread if you define a buffered iterator that allows you to control how many elements are let out of the input stream every time you are ready to submit more work to the thread pool.
So we introduce the BufferedIter class
class BufferedIter(object):
def __init__(self, iterator):
self.iter = iterator
def nextN(self, n):
vals = []
for _ in range(n):
vals.append(next(self.iter))
return vals
which allows us to define the stream processor in the following way
import logging
import queue
import signal
import sys
import time
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
level = logging.DEBUG
log = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter('%(asctime)s %(message)s'))
handler.setLevel(level)
log.addHandler(handler)
log.setLevel(level)
WAIT_SLEEP = 1 # second, adjust this based on the timescale of your tasks
def stream_processor(input_stream, task, num_workers):
# Use a queue to signal shutdown.
shutting_down = queue.Queue()
def shutdown(signum, frame):
log.warning('Caught signal %d, shutting down gracefully ...' % signum)
# Put an item in the shutting down queue to signal shutdown.
shutting_down.put(None)
# Register the signal handler
signal.signal(signal.SIGTERM, shutdown)
signal.signal(signal.SIGINT, shutdown)
def is_shutting_down():
return not shutting_down.empty()
futures = dict()
buffer = BufferedIter(input_stream)
with ThreadPoolExecutor(num_workers) as executor:
num_success = 0
num_failure = 0
while True:
idle_workers = num_workers - len(futures)
if not is_shutting_down():
items = buffer.nextN(idle_workers)
for data in items:
futures[executor.submit(task, data)] = data
done, _ = wait(futures, timeout=WAIT_SLEEP, return_when=ALL_COMPLETED)
for f in done:
data = futures[f]
try:
f.result(timeout=0)
except Exception as exc:
log.error('future encountered an exception: %r, %s' % (data, exc))
num_failure += 1
else:
log.info('future finished successfully: %r' % data)
num_success += 1
del futures[f]
if is_shutting_down() and len(futures) == 0:
break
log.info("num_success=%d, num_failure=%d" % (num_success, num_failure))
Below we show an example for how to use the stream processor
import itertools
def integers():
"""Simulate an infinite stream of work."""
for i in itertools.count():
yield i
def task(x):
"""The task we would like to perform in parallel.
With some delay to simulate a time consuming job.
With a baked in exception to simulate errors.
"""
time.sleep(3)
if x == 4:
raise ValueError('bad luck')
return x * x
stream_processor(integers(), task, num_workers=3)
The output for this example is shown below
2019-01-15 22:34:40,193 future finished successfully: 1
2019-01-15 22:34:40,193 future finished successfully: 0
2019-01-15 22:34:40,193 future finished successfully: 2
2019-01-15 22:34:43,201 future finished successfully: 5
2019-01-15 22:34:43,201 future encountered an exception: 4, bad luck
2019-01-15 22:34:43,202 future finished successfully: 3
2019-01-15 22:34:46,208 future finished successfully: 6
2019-01-15 22:34:46,209 future finished successfully: 7
2019-01-15 22:34:46,209 future finished successfully: 8
2019-01-15 22:34:49,215 future finished successfully: 11
2019-01-15 22:34:49,215 future finished successfully: 10
2019-01-15 22:34:49,215 future finished successfully: 9
^C <=== THIS IS WHEN I HIT Ctrl-C
2019-01-15 22:34:50,648 Caught signal 2, shutting down gracefully ...
2019-01-15 22:34:52,221 future finished successfully: 13
2019-01-15 22:34:52,222 future finished successfully: 14
2019-01-15 22:34:52,222 future finished successfully: 12
2019-01-15 22:34:52,222 num_success=14, num_failure=1
I really liked the interesting approach by #pedro above. However, when processing thousands of files, I noticed that at the end a StopIteration would be thrown and some files would always be skipped. I had to make a little modification to as follows. Very useful answer again.
class BufferedIter(object):
def __init__(self, iterator):
self.iter = iterator
def nextN(self, n):
vals = []
try:
for _ in range(n):
vals.append(next(self.iter))
return vals, False
except StopIteration as e:
return vals, True
-- Call as follows
...
if not is_shutting_down():
items, is_finished = buffer.nextN(idle_workers)
if is_finished:
stop()
...
-- Where stop is a function that simply tells to shutdown
def stop():
shutting_down.put(None)
It is possible to gain the benefits of the executor without strictly having to use a Queue. New tasks are submitted from the main thread. The undone futures are tracked and waited on until all futures are done.
import concurrent.futures
import sys
import time
sys.setrecursionlimit(64) # This is only for demonstration purposes to trigger a RecursionError. Do not set in practice.
def slow_factorial(n: int) -> int:
time.sleep(0.01)
if n == 0:
return 1
else:
return n * slow_factorial(n-1)
initial_inputs = [0, 1, 5, 20, 200, 100, 50, 51, 55, 40, 44, 21, 222, 333, 202, 1000, 10, 9000, 9009, 99, 9999]
for executor_class in (concurrent.futures.ThreadPoolExecutor, concurrent.futures.ProcessPoolExecutor):
for max_workers in (4, 8, 16, 32):
start_time = time.monotonic()
with executor_class(max_workers=max_workers) as executor:
futures_to_n = {executor.submit(slow_factorial, n): n for n in initial_inputs}
while futures_to_n:
futures_done, futures_not_done = concurrent.futures.wait(futures_to_n, return_when=concurrent.futures.FIRST_COMPLETED)
# Note: Length of futures_done is often > 1.
for future in futures_done:
n = futures_to_n.pop(future)
try:
factorial_n = future.result()
except RecursionError:
n_smaller = int(n ** 0.9)
future = executor.submit(slow_factorial, n_smaller)
futures_to_n[future] = n_smaller
# print(f'Failed to compute factorial of {n}. Trying to compute factorial of a smaller number {n_smaller} instead.')
else:
# print(f'Factorial of {n} is {factorial_n}.')
pass
used_time = time.monotonic() - start_time
executor_type = executor_class.__name__.removesuffix('PoolExecutor').lower()
print(f'Workflow took {used_time:.1f}s with {max_workers} {executor_type} workers.')
print()
Output:
Workflow took 9.4s with 4 thread workers.
Workflow took 6.3s with 8 thread workers.
Workflow took 5.4s with 16 thread workers.
Workflow took 5.2s with 32 thread workers.
Workflow took 9.0s with 4 process workers.
Workflow took 5.9s with 8 process workers.
Workflow took 5.1s with 16 process workers.
Workflow took 4.9s with 32 process workers.
For more clarity, uncomment the two print statements. As per the output above, there is an asymptotic speed benefit with more workers.