how to add multiprocessing to loops?

how to add multiprocessing to loops? - python

I have a large customer data set (10 million+) , that I am running my loop calculation. I am trying to add multiprocessing, but it seems to take longer when I use multiprocessing, by splitting data1 into chunks running it in sagemaker studio. I am not sure what I am doing wrong but the calculation takes longer when using multiprocessing, please help.
input data example:
state_list = ['A','B','C','D','E'] #possible states
data1 = pd.DataFrame({"cust_id": ['x111','x112'], #customer data
"state": ['B','E'],
"amount": [1000,500],
"year":[3,2],
"group":[10,10],
"loan_rate":[0.12,0.13]})
data1['state'] = pd.Categorical(data1['state'],
categories=state_list,
ordered=True).codes
lookup1 = pd.DataFrame({'year': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'lim %': [0.1, 0.1, 0.1, 0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1]}).set_index(['year'])
matrix_data = np.arange(250).reshape(10,5,5) #3d matrix by state(A-E) and year(1-10)
end = pd.Timestamp(year=2021, month=9, day=1) # creating a list of dates
df = pd.DataFrame({"End": pd.date_range(end, periods=10, freq="M")})
df['End']=df['End'].dt.day
End=df.values
end_dates = End.reshape(-1) # array([30, 31, 30, 31, 31, 28, 31, 30, 31, 30]); just to simplify access to the end date values
calculation:
num_processes = 4
# Split the customer data into chunks
chunks = np.array_split(data1, num_processes)
queue = mp.Queue()
def calc(chunk):
results1={}
for cust_id, state, amount, start, group, loan_rate in chunks.itertuples(name=None, index=False):
res1 = [amount * matrix_data[start-1, state, :]]
for year in range(start+1, len(matrix_data)+1,):
res1.append(lookup1.loc[year].iat[0] * np.array(res1[-1]))
res1.append(res1[-1] * loan_rate * end_dates[year-1]/365) # year - 1 here
res1.append(res1[-1]+ 100)
res1.append(np.linalg.multi_dot([res1[-1],matrix_data[year-1]]))
results1[cust_id] = res1
queue.put(results1)
processes = [mp.Process(target=calc, args=(chunk,)) for chunk in chunks]
for p in processes:
p.start()
for p in processes:
p.join()
results1 = {}
while not queue.empty():
results1.update(queue.get())

I think it would be easier to use a multiprocessing pool with the map method, which will submit tasks in chunks anyway but your worker function calc just needs to deal with individuals tuples since the chunking is done in a transparent function. The pool will compute what it thinks is an optimal number of rows to be chunked together based on the total number of rows and the number of processes in the pool, but you can override this. So a solution would look something like the following. Since you have not tagged your question with the OS you are running under, the code below should run under Windows, Linux or MacOS in the most efficient way for that platform. But as I mentioned in a comment, multiprocessing may actually slow down getting your results if calc is not sufficiently CPU-intensive.
from multiprocessing import Pool
import pandas as pd
import numpy as np
def init_pool_processes(*args):
global lookup1, matrix_data, end_dates
lookup1, matrix_data, end_dates = args # unpack
def calc(t):
cust_id, state, amount, start, group, loan_rate = t # unpack
results1 = {}
res1 = [amount * matrix_data[start-1, state, :]]
for year in range(start+1, len(matrix_data)+1,):
res1.append(lookup1.loc[year].iat[0] * np.array(res1[-1]))
res1.append(res1[-1] * loan_rate * end_dates[year-1]/365) # year - 1 here
res1.append(res1[-1] + 100)
return (cust_id, res1) # return tuple
def main():
state_list = ['A','B','C','D','E'] #possible states
data1 = pd.DataFrame({"cust_id": ['x111','x112'], #customer data
"state": ['B','E'],
"amount": [1000,500],
"year":[3,2],
"group":[10,10],
"loan_rate":[0.12,0.13]})
data1['state'] = pd.Categorical(data1['state'],
categories=state_list,
ordered=True).codes
lookup1 = pd.DataFrame({'year': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'lim %': [0.1, 0.1, 0.1, 0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1]}).set_index(['year'])
matrix_data = np.arange(250).reshape(10,5,5) #3d matrix by state(A-E) and year(1-10)
end = pd.Timestamp(year=2021, month=9, day=1) # creating a list of dates
df = pd.DataFrame({"End": pd.date_range(end, periods=10, freq="M")})
df['End']=df['End'].dt.day
End=df.values
end_dates = End.reshape(-1) # array([30, 31, 30, 31, 31, 28, 31, 30, 31, 30]); just to simplify access to the end date values
with Pool(initializer=init_pool_processes, initargs=(lookup1, matrix_data, end_dates)) as pool:
results = {cust_id: arr for cust_id, arr in pool.map(calc, data1.itertuples(name=None, index=False))}
for cust_id, arr in results.items():
print(cust_id, arr)
if __name__ == '__main__':
main()
Prints:
x111 [array([55000, 56000, 57000, 58000, 59000]), array([5500., 5600., 5700., 5800., 5900.]), array([56.05479452, 57.0739726 , 58.09315068, 59.11232877, 60.13150685]), array([156.05479452, 157.0739726 , 158.09315068, 159.11232877,
160.13150685]), array([15.60547945, 15.70739726, 15.80931507, 15.91123288, 16.01315068]), array([0.15904763, 0.16008635, 0.16112507, 0.1621638 , 0.16320252]), array([100.15904763, 100.16008635, 100.16112507, 100.1621638 ,
100.16320252]), array([10.01590476, 10.01600864, 10.01611251, 10.01621638, 10.01632025]), array([0.09220121, 0.09220216, 0.09220312, 0.09220407, 0.09220503]), array([100.09220121, 100.09220216, 100.09220312, 100.09220407,
100.09220503]), array([10.00922012, 10.00922022, 10.00922031, 10.00922041, 10.0092205 ]), array([0.10201178, 0.10201178, 0.10201178, 0.10201178, 0.10201178]), array([100.10201178, 100.10201178, 100.10201178, 100.10201178,
100.10201178]), array([10.01020118, 10.01020118, 10.01020118, 10.01020118, 10.01020118]), array([0.09873075, 0.09873075, 0.09873075, 0.09873075, 0.09873075]), array([100.09873075, 100.09873075, 100.09873075, 100.09873075,
100.09873075]), array([10.00987308, 10.00987308, 10.00987308, 10.00987308, 10.00987308]), array([0.10201843, 0.10201843, 0.10201843, 0.10201843, 0.10201843]), array([100.10201843, 100.10201843, 100.10201843, 100.10201843,
100.10201843]), array([10.01020184, 10.01020184, 10.01020184, 10.01020184, 10.01020184]), array([0.09873076, 0.09873076, 0.09873076, 0.09873076, 0.09873076]), array([100.09873076, 100.09873076, 100.09873076, 100.09873076,
100.09873076])]
x112 [array([22500, 23000, 23500, 24000, 24500]), array([2250., 2300., 2350., 2400., 2450.]), array([24.04109589, 24.57534247, 25.10958904, 25.64383562, 26.17808219]), array([124.04109589, 124.57534247, 125.10958904, 125.64383562,
126.17808219]), array([12.40410959, 12.45753425, 12.5109589 , 12.56438356, 12.61780822]), array([0.13695496, 0.13754483, 0.1381347 , 0.13872456, 0.13931443]), array([100.13695496, 100.13754483, 100.1381347 , 100.13872456,
100.13931443]), array([10.0136955 , 10.01375448, 10.01381347, 10.01387246, 10.01393144]), array([0.11056217, 0.11056282, 0.11056347, 0.11056413, 0.11056478]), array([100.11056217, 100.11056282, 100.11056347, 100.11056413,
100.11056478]), array([10.01105622, 10.01105628, 10.01105635, 10.01105641, 10.01105648]), array([0.09983629, 0.09983629, 0.09983629, 0.09983629, 0.09983629]), array([100.09983629, 100.09983629, 100.09983629, 100.09983629,
100.09983629]), array([10.00998363, 10.00998363, 10.00998363, 10.00998363, 10.00998363]), array([0.11052119, 0.11052119, 0.11052119, 0.11052119, 0.11052119]), array([100.11052119, 100.11052119, 100.11052119, 100.11052119,
100.11052119]), array([10.01105212, 10.01105212, 10.01105212, 10.01105212, 10.01105212]), array([0.10696741, 0.10696741, 0.10696741, 0.10696741, 0.10696741]), array([100.10696741, 100.10696741, 100.10696741, 100.10696741,
100.10696741]), array([10.01069674, 10.01069674, 10.01069674, 10.01069674, 10.01069674]), array([0.11052906, 0.11052906, 0.11052906, 0.11052906, 0.11052906]), array([100.11052906, 100.11052906, 100.11052906, 100.11052906,
100.11052906]), array([10.01105291, 10.01105291, 10.01105291, 10.01105291, 10.01105291]), array([0.10696741, 0.10696741, 0.10696741, 0.10696741, 0.10696741]), array([100.10696741, 100.10696741, 100.10696741, 100.10696741,
100.10696741])]
If you wish to save memory, you could use method imap_unordered:
def main():
... # code omitted
def compute_chunksize(iterable_size, pool_size):
chunksize, remainder = divmod(iterable_size, 4 * pool_size)
if remainder:
chunksize += 1
return chunksize
from multiprocessing import cpu_count
pool_size = cpu_count()
iterable_size = 100_000 # Your best estimate
chunksize = compute_chunksize(iterable_size, pool_size)
with Pool(pool_size, initializer=init_pool_processes, initargs=(lookup1, matrix_data, end_dates)) as pool:
it = pool.imap_unordered(calc, data1.itertuples(name=None, index=False), chunksize=chunksize)
"""
# Create dictionary in memory:
results = {cust_id: arr for cust_id, arr in it}
"""
# Or to save memory, iterate the results:
for cust_id, arr in it:
print(cust_id, arr)
if __name__ == '__main__':
main()

Related

duplicated values multiprocessing python

I'm trying to do multiprocessing with python but I have duplicated values in results files.
Could you please help me to solve that?
here is my code:
import itertools
from multiprocessing import Pool
from multiprocessing import Manager
import pandas as pd
PARAMS = {}
LPT_LIMIT = [4, 6, 8, 10]
HPT_LIMIT = [1.6, 1.8, 2.0]
NB_FLIGHT = [10, 30]
LPT_EXCEEDENCE = [1, 4]
HPT_EXCEEDENCE = [3, 4]
tmp = [LPT_LIMIT, HPT_LIMIT, NB_FLIGHT, LPT_EXCEEDENCE, HPT_EXCEEDENCE]
parameters = list(itertools.product(*tmp))
def toto(param):
PARAMS['LPT_LMIT'] = param[0]
PARAMS['HPT_LMIT'] = param[1]
PARAMS['NB_FLIGHT'] = param[2]
PARAMS['LPT_EXCEEDENCE'] = param[3]
PARAMS['HPT_EXCEEDENCE'] = param[4]
return PARAMS
if __name__=='__main__':
pool = Pool()
manager = Manager()
my_list = manager.list()
my_list.append(pool.map(toto, parameters))
flat_list = [item for sublist in my_list for item in sublist]
pd.DataFrame(flat_list).to_excel('results.xlsx', index=False)
the results is that I have only value of 4 in HPT_EXCEEDENCE (please see attached file)
enter image description here
I have only HPT_EXCEEDENCE = 4 but HPT_EXCEEDENCE is 3 or 4
So I don't know what's wrong with my code

Unless you are doing something more complex, you don't need Manager(). The problem specifically was the location of PARAMS = {}. See updated code below. This seems to get the result you want.
import itertools
from multiprocessing import Pool
from multiprocessing import Manager
import pandas as pd
LPT_LIMIT = [4, 6, 8, 10]
HPT_LIMIT = [1.6, 1.8, 2.0]
NB_FLIGHT = [10, 30]
LPT_EXCEEDENCE = [1, 4]
HPT_EXCEEDENCE = [3, 4]
tmp = [LPT_LIMIT, HPT_LIMIT, NB_FLIGHT, LPT_EXCEEDENCE, HPT_EXCEEDENCE]
parameters = list(itertools.product(*tmp))
def toto(param):
PARAMS = {}
PARAMS['LPT_LMIT'] = param[0]
PARAMS['HPT_LMIT'] = param[1]
PARAMS['NB_FLIGHT'] = param[2]
PARAMS['LPT_EXCEEDENCE'] = param[3]
PARAMS['HPT_EXCEEDENCE'] = param[4]
return PARAMS
if __name__=='__main__':
pool = Pool()
my_list = pool.map(toto, parameters)
pd.DataFrame(my_list).to_excel('results1.xlsx', index=False)

concurrent.futures.ProcessPoolExecutor(): json file is not created

I'm new to this package. When studying the codes from https://github.com/diningphil/graph-mixture-density-networks (with some minor modification). In the notebook file SIR Simulation with DGL_ERDOS-RENYI.ipynb, during the simulation process, I encountered a weird thing:
If I set debug = True, which means I'm not using the pool = concurrent.futures.ProcessPoolExecutor(max_workers=processes) but just run it one by one, both the .json files and the .bin files will be created according to the json_filepath variable.
However, when I deleted the output and run it by setting debug = False so all the codes will run simultaneously if my understanding is correct, but the json_file will not be created and the program seems to terminate at the step graph.to(torch.device(device)) as all my print command is not executed afterward. I only have the .bin files created.
Could anyone help me by telling me the possible reason or waht I should do about it? Thanks a lot!
'''
run simulation and store
1) state of all nodes at each time step
into a single pandas dataframe for all beta, gamma and repetitions
2) R_0
3) number of total people infected (total - susceptible at the end of the iteration)
'''
seed = 38
torch.manual_seed(seed)
device = 'cuda'
beta_range = [0, 1]
gamma_range = [0.1, 1]
iterations = 5
no_graph_samples = 20
no_realizations = 100
family_name = 'erdos_renyi'
folder = Path(f'{family_name}')
if not os.path.exists(folder):
os.makedirs(folder)
def simulate(p, graph_size, graph_sample, graphs_folder):
json_filepath = str(Path(graphs_folder, f'data_{graph_sample}.json'))
graph_filename = graphs_folder / Path(f'sample{graph_sample}.bin')
json_data = {'family': family_name,
'p': p,
'graph_size': graph_size,
'no_graph_samples': no_graph_samples,
'graph_samples': []
}
sample = {'graph_filename': str(graph_filename),
'simulations': []}
if not os.path.exists(graph_filename):
graph = create_erdos_renyi_graph(graph_size, p)
save_graphs(str(graph_filename), graph)
else:
graph = load_graphs(str(graph_filename))[0][0]
#print('test')
graph.to(torch.device(device))
## every code above this line will run, at least print() will work
if not os.path.exists(json_filepath):
print('test: json_does not exit')
for realizations in range(no_realizations):
beta = float(torch.FloatTensor(1).uniform_(beta_range[0], beta_range[1]))
gamma = float(torch.FloatTensor(1).uniform_(gamma_range[0], gamma_range[1]))
R0 = beta/gamma
graph.register_message_func(lambda x: SIR_message_func(beta, x))
graph.register_reduce_func(lambda x: SIR_reduce_func(gamma, x))
for initial_probability_of_infection in [0.01, 0.05, 0.1]:
simulation = {'beta': beta, 'gamma': gamma, 'R0': R0, 'init_infection_prob': initial_probability_of_infection}
S, I, R, first_infected = simulate_SIR(graph, initial_probability_of_infection, iterations)
simulation['S'] = S
simulation['I'] = I
simulation['R'] = R
simulation['first_infected'] = first_infected
simulation['total_infected'] = graph_size - S[-1]
sample['simulations'].append(deepcopy(simulation))
#print("Realization ", realizations, "produced ", graph_size - S[-1], "infected")
json_data['graph_samples'].append(sample)
with open(json_filepath, 'w') as f:
line = json.dumps(json_data)
f.write(line + '\n')
#json.dump(json_data, f)
print('dumped')
else:
print('test: there is json')
print(sample)
# with open(json_filepath, 'r') as f:
# json.load(f)
# print('loaded but why')
debug = False
processes = 100
import concurrent.futures
pool = concurrent.futures.ProcessPoolExecutor(max_workers=processes)
#for graph_size in [10, 50, 100, 200, 500, 1000]:
for graph_size in [10]:
for p in [0.01, 0.05]:
#for p in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5]:
graphs_folder = folder / Path(f'graphs_size{graph_size}_p{float(p)}')
#store each graph in a different folder (create path based on graph size, prob of edge and graph sample)
if not os.path.exists(graphs_folder):
os.makedirs(graphs_folder)
for graph_sample in range(no_graph_samples):
if not debug:
pool.submit(simulate, p, graph_size, graph_sample, graphs_folder)
else: # DEBUG
simulate(p, graph_size, graph_sample, graphs_folder)
pool.shutdown() # wait the batch of configs to terminate

How can I parallelize the following snippet of code in python?

I have a bunch of matrix multiplication operations that are performed only row-wise. I was wondering how to speed-up the computation by parallelization:
data = np.random.randint(1, 100, (100000, 800))
indices_1 = np.equal(data, 1)
A = np.zeros((100000, 100))
B = np.random.randn(800, 100)
for i in range(100000):
ones = indices_1[i]
not_ones = ~indices_1[i]
B_ones = B[ones]
B_not_ones = B[not_ones]
A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones)
data[i][ones] = A[i] # B_ones.T
I tried multiprocessor but for some reason, but it did not perform better than sequential. Here is my multiprocessor implementation:
from multiprocessing.pool import ThreadPool, Pool
pool = ThreadPool() # can also use Pool
def f(i):
ones = indices_1[i]
not_ones = ~indices_1[i]
B_ones = B[ones]
B_not_ones = B[not_ones]
A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones)
data[i][ones] = A[i] # B_ones.T
pool.map(f, range(100000))
Both yielded the same amount of running time (around 32 seconds). Other parallelization method like concurrent.futures did not improve the runtime (used like below):
with concurrent.futures.ThreadPoolExecutor() as executor:
result = executor.map(f, range(100000))
I also tried to apply dask but could not make their framework work in my case. Any help will be much appreciated! Thanks!

import numpy as np
import multiprocessing as mp
data = list(np.random.randint(1, 100, (100000, 800)))
indices_1 = np.equal(data, 1)
A = list(np.zeros((100000, 100)))
B = np.random.randn(800, 100)
def f(data, A, i):
ones = indices_1[i]
not_ones = ~indices_1[i]
B_ones = B[ones]
B_not_ones = B[not_ones]
A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones)
data[i][ones] = A[i] # B_ones.T
with mp.Manager() as manager:
data_global = manager.list(data)
A_global = manager.list(A)
with mp.Pool() as p:
results = [ p.apply_async(f, (data_global, A_global, i,)) for i in range(100000) ]
for i in results:
i.wait()
data_global = list(data_global)
A_global = list(A_global)

python time sliding window variation

I'm stuck with a variation of sliding window problem!
Usually we set the number of element to slide but in my case I want to slide the time!
The goal that I would like to reach is a function (thread in this case)
that is able to create a "time" windows in seconds (given by user).
Starting from the first element of the queue in this case:
[datetime.time(7, 6, 14, 537370), 584 add 5 seconds -> 7:6:19.537370 (ending point) and sum all elements in this interval:
[datetime.time(7, 6, 14, 537370), 584]
[datetime.time(7, 6, 18, 542798), 761]
Total: 584+761= 1345
Then create another "windows" with the second elements and goes on.
IMPORTANT: One item can be part of several window. the item are generated meanwhile, a naif solution with function that sleep for n second and then flush the queue is not good for my problem.
I think its a variation of this post:
Flexible sliding window (in Python)
But still can't solve the problem! Any help or suggests will be appreciated.
Thanks!
Example list of elements:
[datetime.time(7, 6, 14, 537370), 584]
[datetime.time(7, 6, 18, 542798), 761]
[datetime.time(7, 6, 20, 546007), 848]
[datetime.time(7, 6, 24, 550969), 20]
[datetime.time(7, 6, 27, 554370), 478]
[datetime.time(7, 6, 27, 554628), 12]
[datetime.time(7, 6, 31, 558919), 29]
[datetime.time(7, 6, 31, 559562), 227]
[datetime.time(7, 6, 32, 560863), 379]
[datetime.time(7, 6, 35, 564863), 132]
[datetime.time(7, 6, 37, 567276), 651]
[datetime.time(7, 6, 38, 568652), 68]
[datetime.time(7, 6, 40, 569861), 100]
[datetime.time(7, 6, 41, 571459), 722]
[datetime.time(7, 6, 44, 574802), 560]
...
Code:
import random
import time
import threading
import datetime
from multiprocessing import Queue
q = Queue()
#this is a producer that put elements in queue
def t1():
element = [0,0]
while True:
time.sleep(random.randint(0, 5))
element[0] = datetime.datetime.now().time()
element[1] = random.randint(0, 1000)
q.put(element)
#this is a consumer that sum elements inside a window of n seconds
#Ineed something a sliding window time of ten seconds that sum all elements for n seconds
def t2():
windowsize = 5 #size of the window 5 seconds
while not queue.empty():
e = q.get()
start = e[0] #the first element is the beginning point
end = start + datetime.timedelta(seconds=windowsize) #ending point
sum += e[1]
#some code that solve the problem :)
a = threading.Thread(target=t1)
a.start()
b = threading.Thread(target=t2)
b.start()
while True:
time.sleep(1)

Would this do? This is how I understood your problem. What this does is it creates a class that keeps track of things. You either add to this by tw.insert() or sum with tw.sum_window(seconds).
When you initialise TimeWindow, you can give it a max size parameter, default is 10 seconds. When you add elements or calculate sums, it does a clean up so that before every insert or sum operation, first element time e[0][0] and last element time e[n][0] are within 10 seconds of each other. Older entries are expunged. A "poller" thread is there to track your requests.
I have added two queues as I do not know what you intend to do with results. Now if you want to request data starting from now to 5 seconds in the future, you create a request and put it in queue. The request has a random id so that you can match it to results. Your main thread needs to monitor result queue and after five seconds, every request sent to queue return with the same id and the sum.
If this is not what you want to do, then I just don't understand what is it that you try to achieve here. Even this is already rather complicated and there may be a much simpler way to achieve what you intend to do.
import random
import time
import threading
import datetime
import Queue
import uuid
from collections import deque
q_lock = threading.RLock()
class TimeWindow(object):
def __init__(self, max_size=10):
self.max_size = max_size
self.q = deque()
def expire(self):
time_now = datetime.datetime.now()
while True:
try:
oldest_element = self.q.popleft()
oe_time = oldest_element[0]
if oe_time + datetime.timedelta(seconds=self.max_size) > time_now:
self.q.appendleft(oldest_element)
break
except IndexError:
break
def insert(self,elm):
self.expire()
self.q.append(elm)
def sum_window(self, start, end):
self.expire()
try:
_ = self.q[0]
except IndexError:
return 0
result=0
for f in self.q:
if start < f[0] < end:
result += f[1]
else:
pass
return result
tw = TimeWindow()
def t1():
while True:
time.sleep(random.randint(0, 3))
element = [datetime.datetime.now(), random.randint(0,1000)]
with q_lock:
tw.insert(element)
def poller(in_q, out_q):
pending = []
while True:
try:
new_request = in_q.get(0.1)
new_request["end"] = new_request["start"] + datetime.timedelta(seconds=new_request["frame"])
pending.append(new_request)
except Queue.Empty:
pass
new_pending = []
for a in pending:
if a["end"] < datetime.datetime.now():
with q_lock:
r_sum = tw.sum_window(a["start"], a["end"])
r_structure = {"id": a["id"], "result": r_sum}
out_q.put(r_structure)
else:
new_pending.append(a)
pending = new_pending
a = threading.Thread(target=t1)
a.daemon = True
a.start()
in_queue = Queue.Queue()
result_queue = Queue.Queue()
po = threading.Thread(target=poller, args=(in_queue, result_queue,))
po.daemon = True
po.start()
while True:
time.sleep(1)
newr = {"id": uuid.uuid4(), "frame": 5, "start": datetime.datetime.now()}
in_queue.put(newr)
try:
ready = result_queue.get(0)
print ready
except Queue.Empty:
pass

garim#wof:~$ python solution.py
1 t1 produce element: 16:09:30.472497 1
2 t1 produce element: 16:09:33.475714 9
3 t1 produce element: 16:09:34.476922 10
4 t1 produce element: 16:09:37.480100 7
solution: 16:09:37.481171 {'id': UUID('adff334f-a97a-459d-8dcc-f28309e25574'), 'result': 19}
5 t1 produce element: 16:09:38.481352 10
solution: 16:09:38.482687 {'id': UUID('0a7481e5-e993-439a-9f7e-2c5aeef86155'), 'result': 19}
It still doent works :( I add a counter for each element it inserts with function t1. The goal is do the sum (result_queue.get) at this time:
16:09:35.472497 ---> 16:09:30.472497 + 5 seconds
no before. Only then the element goes out. The next time the sum will be done at:
16:09:35.475714 ---> 16:09:33.475714 + 5 seconds
I understand that it's hard to explain.. With both of your solution the time window slide so I can consider the problem solved :) I will try to improve when the function sum will be execute, that time trigger is important. I acquire a lot useful knowledge. Thanks for helping.

How to Update a Chaco Plot

I have a Chaco plot that I am creating and when I change one of the variables instead of updating, it draws the whole plot on top of the other plot and the plots start to stack up and overlay each other while remaining on the same window. How can I overwrite the plot instead? Note: I am changing the variable "duration" in the Foo class at the bottom. Here is my code:
import threading
import time
from enthought.traits.api \
import HasTraits, Int, Range, Array, Enum, on_trait_change
from enthought.traits.ui.api import View, Item
from enthought.chaco.chaco_plot_editor import ChacoPlotItem
class Hyetograph(HasTraits):
""" Creates a simple hyetograph demo. """
timeline = Array
intensity = Array
nrcs = Array
duration = Int(12, desc='In Hours')
year_storm = Enum(2, 10, 25, 100)
county = Enum('Brazos', 'Dallas', 'El Paso', 'Harris')
curve_number = Range(70, 100)
plot_type = Enum('line', 'scatter')
view1 = View(Item('plot_type'),
ChacoPlotItem('timeline', 'intensity',
type_trait='plot_type',
resizable=True,
x_label='Time (hr)',
y_label='Intensity (in/hr)',
color='blue',
bgcolor='white',
border_visible=True,
border_width=1,
padding_bg_color='lightgray'),
Item(name='duration'),
Item(name='year_storm'),
Item(name='county'))
def calculate_intensity(self):
""" The Hyetograph calculations. """
# Assigning A, B, and C values based on year, storm, and county
counties = {'Brazos': 0, 'Dallas': 3, 'El Paso': 6, 'Harris': 9}
years = {
2 : [65, 8, .806, 54, 8.3, .791, 24, 9.5, .797, 68, 7.9, .800],
10: [80, 8.5, .763, 78, 8.7, .777, 42, 12., .795,81, 7.7, .753],
25: [89, 8.5, .754, 90, 8.7, .774, 60, 12.,.843, 81, 7.7, .724],
100: [96, 8., .730, 106, 8.3, .762, 65, 9.5, .825, 91, 7.9, .706]
}
year = years[self.year_storm]
value = counties[self.county]
a, b, c = year[value], year[value+1], year[value+2]
self.timeline=range(2, self.duration + 1, 2)
intensity=a / (self.timeline * 60 + b)**c
cumdepth=intensity * self.timeline
temp=cumdepth[0]
result=[]
for i in cumdepth[1:]:
result.append(i-temp)
temp=i
result.insert(0,cumdepth[0])
# Alternating block method implementation.
result.reverse()
switch = True
o, e = [], []
for i in result:
if switch:
o.append(i)
else:
e.append(i)
switch = not switch
e.reverse()
result = o + e
self.intensity = result
def calculate_runoff(self):
""" NRCS method to get run-off based on permeability of ground. """
s = (1000 / self.curve_number) - 10
a = self.intensity - (.2 * s)
vr = a**2 / (self.intensity + (.8 * s))
# There's no such thing as negative run-off.
for i in range(0, len(a)):
if a[i] <= 0:
vr[i] = 0
self.nrcs = vr
#on_trait_change('duration, year_storm, county, curve_number')
def _perform_calculations(self):
self.calculate_intensity()
self.calculate_runoff()
def start(self):
self._perform_calculations()
self.configure_traits()
f=Hyetograph()
f.start()
# d = range(10,13)
# for n in d:
# f.duration = n
# time.sleep(2)
class Foo (threading.Thread):
def __init__(self,x):
self.__x = x
threading.Thread.__init__(self)
def run (self):
print str(self.__x)
d = range(10,13)
for n in d:
f.duration = n
time.sleep(2)
Foo(1).start()
This is what it looks like:

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

how to add multiprocessing to loops? - python

Related

duplicated values multiprocessing python

concurrent.futures.ProcessPoolExecutor(): json file is not created

How can I parallelize the following snippet of code in python?

python time sliding window variation

How to Update a Chaco Plot

Categories

Resources