how to add multiprocessing to loops? - python
I have a large customer data set (10 million+) , that I am running my loop calculation. I am trying to add multiprocessing, but it seems to take longer when I use multiprocessing, by splitting data1 into chunks running it in sagemaker studio. I am not sure what I am doing wrong but the calculation takes longer when using multiprocessing, please help.
input data example:
state_list = ['A','B','C','D','E'] #possible states
data1 = pd.DataFrame({"cust_id": ['x111','x112'], #customer data
"state": ['B','E'],
"amount": [1000,500],
"year":[3,2],
"group":[10,10],
"loan_rate":[0.12,0.13]})
data1['state'] = pd.Categorical(data1['state'],
categories=state_list,
ordered=True).codes
lookup1 = pd.DataFrame({'year': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'lim %': [0.1, 0.1, 0.1, 0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1]}).set_index(['year'])
matrix_data = np.arange(250).reshape(10,5,5) #3d matrix by state(A-E) and year(1-10)
end = pd.Timestamp(year=2021, month=9, day=1) # creating a list of dates
df = pd.DataFrame({"End": pd.date_range(end, periods=10, freq="M")})
df['End']=df['End'].dt.day
End=df.values
end_dates = End.reshape(-1) # array([30, 31, 30, 31, 31, 28, 31, 30, 31, 30]); just to simplify access to the end date values
calculation:
num_processes = 4
# Split the customer data into chunks
chunks = np.array_split(data1, num_processes)
queue = mp.Queue()
def calc(chunk):
results1={}
for cust_id, state, amount, start, group, loan_rate in chunks.itertuples(name=None, index=False):
res1 = [amount * matrix_data[start-1, state, :]]
for year in range(start+1, len(matrix_data)+1,):
res1.append(lookup1.loc[year].iat[0] * np.array(res1[-1]))
res1.append(res1[-1] * loan_rate * end_dates[year-1]/365) # year - 1 here
res1.append(res1[-1]+ 100)
res1.append(np.linalg.multi_dot([res1[-1],matrix_data[year-1]]))
results1[cust_id] = res1
queue.put(results1)
processes = [mp.Process(target=calc, args=(chunk,)) for chunk in chunks]
for p in processes:
p.start()
for p in processes:
p.join()
results1 = {}
while not queue.empty():
results1.update(queue.get())
I think it would be easier to use a multiprocessing pool with the map method, which will submit tasks in chunks anyway but your worker function calc just needs to deal with individuals tuples since the chunking is done in a transparent function. The pool will compute what it thinks is an optimal number of rows to be chunked together based on the total number of rows and the number of processes in the pool, but you can override this. So a solution would look something like the following. Since you have not tagged your question with the OS you are running under, the code below should run under Windows, Linux or MacOS in the most efficient way for that platform. But as I mentioned in a comment, multiprocessing may actually slow down getting your results if calc is not sufficiently CPU-intensive.
from multiprocessing import Pool
import pandas as pd
import numpy as np
def init_pool_processes(*args):
global lookup1, matrix_data, end_dates
lookup1, matrix_data, end_dates = args # unpack
def calc(t):
cust_id, state, amount, start, group, loan_rate = t # unpack
results1 = {}
res1 = [amount * matrix_data[start-1, state, :]]
for year in range(start+1, len(matrix_data)+1,):
res1.append(lookup1.loc[year].iat[0] * np.array(res1[-1]))
res1.append(res1[-1] * loan_rate * end_dates[year-1]/365) # year - 1 here
res1.append(res1[-1] + 100)
return (cust_id, res1) # return tuple
def main():
state_list = ['A','B','C','D','E'] #possible states
data1 = pd.DataFrame({"cust_id": ['x111','x112'], #customer data
"state": ['B','E'],
"amount": [1000,500],
"year":[3,2],
"group":[10,10],
"loan_rate":[0.12,0.13]})
data1['state'] = pd.Categorical(data1['state'],
categories=state_list,
ordered=True).codes
lookup1 = pd.DataFrame({'year': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'lim %': [0.1, 0.1, 0.1, 0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1]}).set_index(['year'])
matrix_data = np.arange(250).reshape(10,5,5) #3d matrix by state(A-E) and year(1-10)
end = pd.Timestamp(year=2021, month=9, day=1) # creating a list of dates
df = pd.DataFrame({"End": pd.date_range(end, periods=10, freq="M")})
df['End']=df['End'].dt.day
End=df.values
end_dates = End.reshape(-1) # array([30, 31, 30, 31, 31, 28, 31, 30, 31, 30]); just to simplify access to the end date values
with Pool(initializer=init_pool_processes, initargs=(lookup1, matrix_data, end_dates)) as pool:
results = {cust_id: arr for cust_id, arr in pool.map(calc, data1.itertuples(name=None, index=False))}
for cust_id, arr in results.items():
print(cust_id, arr)
if __name__ == '__main__':
main()
Prints:
x111 [array([55000, 56000, 57000, 58000, 59000]), array([5500., 5600., 5700., 5800., 5900.]), array([56.05479452, 57.0739726 , 58.09315068, 59.11232877, 60.13150685]), array([156.05479452, 157.0739726 , 158.09315068, 159.11232877,
160.13150685]), array([15.60547945, 15.70739726, 15.80931507, 15.91123288, 16.01315068]), array([0.15904763, 0.16008635, 0.16112507, 0.1621638 , 0.16320252]), array([100.15904763, 100.16008635, 100.16112507, 100.1621638 ,
100.16320252]), array([10.01590476, 10.01600864, 10.01611251, 10.01621638, 10.01632025]), array([0.09220121, 0.09220216, 0.09220312, 0.09220407, 0.09220503]), array([100.09220121, 100.09220216, 100.09220312, 100.09220407,
100.09220503]), array([10.00922012, 10.00922022, 10.00922031, 10.00922041, 10.0092205 ]), array([0.10201178, 0.10201178, 0.10201178, 0.10201178, 0.10201178]), array([100.10201178, 100.10201178, 100.10201178, 100.10201178,
100.10201178]), array([10.01020118, 10.01020118, 10.01020118, 10.01020118, 10.01020118]), array([0.09873075, 0.09873075, 0.09873075, 0.09873075, 0.09873075]), array([100.09873075, 100.09873075, 100.09873075, 100.09873075,
100.09873075]), array([10.00987308, 10.00987308, 10.00987308, 10.00987308, 10.00987308]), array([0.10201843, 0.10201843, 0.10201843, 0.10201843, 0.10201843]), array([100.10201843, 100.10201843, 100.10201843, 100.10201843,
100.10201843]), array([10.01020184, 10.01020184, 10.01020184, 10.01020184, 10.01020184]), array([0.09873076, 0.09873076, 0.09873076, 0.09873076, 0.09873076]), array([100.09873076, 100.09873076, 100.09873076, 100.09873076,
100.09873076])]
x112 [array([22500, 23000, 23500, 24000, 24500]), array([2250., 2300., 2350., 2400., 2450.]), array([24.04109589, 24.57534247, 25.10958904, 25.64383562, 26.17808219]), array([124.04109589, 124.57534247, 125.10958904, 125.64383562,
126.17808219]), array([12.40410959, 12.45753425, 12.5109589 , 12.56438356, 12.61780822]), array([0.13695496, 0.13754483, 0.1381347 , 0.13872456, 0.13931443]), array([100.13695496, 100.13754483, 100.1381347 , 100.13872456,
100.13931443]), array([10.0136955 , 10.01375448, 10.01381347, 10.01387246, 10.01393144]), array([0.11056217, 0.11056282, 0.11056347, 0.11056413, 0.11056478]), array([100.11056217, 100.11056282, 100.11056347, 100.11056413,
100.11056478]), array([10.01105622, 10.01105628, 10.01105635, 10.01105641, 10.01105648]), array([0.09983629, 0.09983629, 0.09983629, 0.09983629, 0.09983629]), array([100.09983629, 100.09983629, 100.09983629, 100.09983629,
100.09983629]), array([10.00998363, 10.00998363, 10.00998363, 10.00998363, 10.00998363]), array([0.11052119, 0.11052119, 0.11052119, 0.11052119, 0.11052119]), array([100.11052119, 100.11052119, 100.11052119, 100.11052119,
100.11052119]), array([10.01105212, 10.01105212, 10.01105212, 10.01105212, 10.01105212]), array([0.10696741, 0.10696741, 0.10696741, 0.10696741, 0.10696741]), array([100.10696741, 100.10696741, 100.10696741, 100.10696741,
100.10696741]), array([10.01069674, 10.01069674, 10.01069674, 10.01069674, 10.01069674]), array([0.11052906, 0.11052906, 0.11052906, 0.11052906, 0.11052906]), array([100.11052906, 100.11052906, 100.11052906, 100.11052906,
100.11052906]), array([10.01105291, 10.01105291, 10.01105291, 10.01105291, 10.01105291]), array([0.10696741, 0.10696741, 0.10696741, 0.10696741, 0.10696741]), array([100.10696741, 100.10696741, 100.10696741, 100.10696741,
100.10696741])]
If you wish to save memory, you could use method imap_unordered:
def main():
... # code omitted
def compute_chunksize(iterable_size, pool_size):
chunksize, remainder = divmod(iterable_size, 4 * pool_size)
if remainder:
chunksize += 1
return chunksize
from multiprocessing import cpu_count
pool_size = cpu_count()
iterable_size = 100_000 # Your best estimate
chunksize = compute_chunksize(iterable_size, pool_size)
with Pool(pool_size, initializer=init_pool_processes, initargs=(lookup1, matrix_data, end_dates)) as pool:
it = pool.imap_unordered(calc, data1.itertuples(name=None, index=False), chunksize=chunksize)
"""
# Create dictionary in memory:
results = {cust_id: arr for cust_id, arr in it}
"""
# Or to save memory, iterate the results:
for cust_id, arr in it:
print(cust_id, arr)
if __name__ == '__main__':
main()
Related
duplicated values multiprocessing python
I'm trying to do multiprocessing with python but I have duplicated values in results files. Could you please help me to solve that? here is my code: import itertools from multiprocessing import Pool from multiprocessing import Manager import pandas as pd PARAMS = {} LPT_LIMIT = [4, 6, 8, 10] HPT_LIMIT = [1.6, 1.8, 2.0] NB_FLIGHT = [10, 30] LPT_EXCEEDENCE = [1, 4] HPT_EXCEEDENCE = [3, 4] tmp = [LPT_LIMIT, HPT_LIMIT, NB_FLIGHT, LPT_EXCEEDENCE, HPT_EXCEEDENCE] parameters = list(itertools.product(*tmp)) def toto(param): PARAMS['LPT_LMIT'] = param[0] PARAMS['HPT_LMIT'] = param[1] PARAMS['NB_FLIGHT'] = param[2] PARAMS['LPT_EXCEEDENCE'] = param[3] PARAMS['HPT_EXCEEDENCE'] = param[4] return PARAMS if __name__=='__main__': pool = Pool() manager = Manager() my_list = manager.list() my_list.append(pool.map(toto, parameters)) flat_list = [item for sublist in my_list for item in sublist] pd.DataFrame(flat_list).to_excel('results.xlsx', index=False) the results is that I have only value of 4 in HPT_EXCEEDENCE (please see attached file) enter image description here I have only HPT_EXCEEDENCE = 4 but HPT_EXCEEDENCE is 3 or 4 So I don't know what's wrong with my code
Unless you are doing something more complex, you don't need Manager(). The problem specifically was the location of PARAMS = {}. See updated code below. This seems to get the result you want. import itertools from multiprocessing import Pool from multiprocessing import Manager import pandas as pd LPT_LIMIT = [4, 6, 8, 10] HPT_LIMIT = [1.6, 1.8, 2.0] NB_FLIGHT = [10, 30] LPT_EXCEEDENCE = [1, 4] HPT_EXCEEDENCE = [3, 4] tmp = [LPT_LIMIT, HPT_LIMIT, NB_FLIGHT, LPT_EXCEEDENCE, HPT_EXCEEDENCE] parameters = list(itertools.product(*tmp)) def toto(param): PARAMS = {} PARAMS['LPT_LMIT'] = param[0] PARAMS['HPT_LMIT'] = param[1] PARAMS['NB_FLIGHT'] = param[2] PARAMS['LPT_EXCEEDENCE'] = param[3] PARAMS['HPT_EXCEEDENCE'] = param[4] return PARAMS if __name__=='__main__': pool = Pool() my_list = pool.map(toto, parameters) pd.DataFrame(my_list).to_excel('results1.xlsx', index=False)
concurrent.futures.ProcessPoolExecutor(): json file is not created
I'm new to this package. When studying the codes from https://github.com/diningphil/graph-mixture-density-networks (with some minor modification). In the notebook file SIR Simulation with DGL_ERDOS-RENYI.ipynb, during the simulation process, I encountered a weird thing: If I set debug = True, which means I'm not using the pool = concurrent.futures.ProcessPoolExecutor(max_workers=processes) but just run it one by one, both the .json files and the .bin files will be created according to the json_filepath variable. However, when I deleted the output and run it by setting debug = False so all the codes will run simultaneously if my understanding is correct, but the json_file will not be created and the program seems to terminate at the step graph.to(torch.device(device)) as all my print command is not executed afterward. I only have the .bin files created. Could anyone help me by telling me the possible reason or waht I should do about it? Thanks a lot! ''' run simulation and store 1) state of all nodes at each time step into a single pandas dataframe for all beta, gamma and repetitions 2) R_0 3) number of total people infected (total - susceptible at the end of the iteration) ''' seed = 38 torch.manual_seed(seed) device = 'cuda' beta_range = [0, 1] gamma_range = [0.1, 1] iterations = 5 no_graph_samples = 20 no_realizations = 100 family_name = 'erdos_renyi' folder = Path(f'{family_name}') if not os.path.exists(folder): os.makedirs(folder) def simulate(p, graph_size, graph_sample, graphs_folder): json_filepath = str(Path(graphs_folder, f'data_{graph_sample}.json')) graph_filename = graphs_folder / Path(f'sample{graph_sample}.bin') json_data = {'family': family_name, 'p': p, 'graph_size': graph_size, 'no_graph_samples': no_graph_samples, 'graph_samples': [] } sample = {'graph_filename': str(graph_filename), 'simulations': []} if not os.path.exists(graph_filename): graph = create_erdos_renyi_graph(graph_size, p) save_graphs(str(graph_filename), graph) else: graph = load_graphs(str(graph_filename))[0][0] #print('test') graph.to(torch.device(device)) ## every code above this line will run, at least print() will work if not os.path.exists(json_filepath): print('test: json_does not exit') for realizations in range(no_realizations): beta = float(torch.FloatTensor(1).uniform_(beta_range[0], beta_range[1])) gamma = float(torch.FloatTensor(1).uniform_(gamma_range[0], gamma_range[1])) R0 = beta/gamma graph.register_message_func(lambda x: SIR_message_func(beta, x)) graph.register_reduce_func(lambda x: SIR_reduce_func(gamma, x)) for initial_probability_of_infection in [0.01, 0.05, 0.1]: simulation = {'beta': beta, 'gamma': gamma, 'R0': R0, 'init_infection_prob': initial_probability_of_infection} S, I, R, first_infected = simulate_SIR(graph, initial_probability_of_infection, iterations) simulation['S'] = S simulation['I'] = I simulation['R'] = R simulation['first_infected'] = first_infected simulation['total_infected'] = graph_size - S[-1] sample['simulations'].append(deepcopy(simulation)) #print("Realization ", realizations, "produced ", graph_size - S[-1], "infected") json_data['graph_samples'].append(sample) with open(json_filepath, 'w') as f: line = json.dumps(json_data) f.write(line + '\n') #json.dump(json_data, f) print('dumped') else: print('test: there is json') print(sample) # with open(json_filepath, 'r') as f: # json.load(f) # print('loaded but why') debug = False processes = 100 import concurrent.futures pool = concurrent.futures.ProcessPoolExecutor(max_workers=processes) #for graph_size in [10, 50, 100, 200, 500, 1000]: for graph_size in [10]: for p in [0.01, 0.05]: #for p in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5]: graphs_folder = folder / Path(f'graphs_size{graph_size}_p{float(p)}') #store each graph in a different folder (create path based on graph size, prob of edge and graph sample) if not os.path.exists(graphs_folder): os.makedirs(graphs_folder) for graph_sample in range(no_graph_samples): if not debug: pool.submit(simulate, p, graph_size, graph_sample, graphs_folder) else: # DEBUG simulate(p, graph_size, graph_sample, graphs_folder) pool.shutdown() # wait the batch of configs to terminate
How can I parallelize the following snippet of code in python?
I have a bunch of matrix multiplication operations that are performed only row-wise. I was wondering how to speed-up the computation by parallelization: data = np.random.randint(1, 100, (100000, 800)) indices_1 = np.equal(data, 1) A = np.zeros((100000, 100)) B = np.random.randn(800, 100) for i in range(100000): ones = indices_1[i] not_ones = ~indices_1[i] B_ones = B[ones] B_not_ones = B[not_ones] A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones) data[i][ones] = A[i] # B_ones.T I tried multiprocessor but for some reason, but it did not perform better than sequential. Here is my multiprocessor implementation: from multiprocessing.pool import ThreadPool, Pool pool = ThreadPool() # can also use Pool def f(i): ones = indices_1[i] not_ones = ~indices_1[i] B_ones = B[ones] B_not_ones = B[not_ones] A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones) data[i][ones] = A[i] # B_ones.T pool.map(f, range(100000)) Both yielded the same amount of running time (around 32 seconds). Other parallelization method like concurrent.futures did not improve the runtime (used like below): with concurrent.futures.ThreadPoolExecutor() as executor: result = executor.map(f, range(100000)) I also tried to apply dask but could not make their framework work in my case. Any help will be much appreciated! Thanks!
import numpy as np import multiprocessing as mp data = list(np.random.randint(1, 100, (100000, 800))) indices_1 = np.equal(data, 1) A = list(np.zeros((100000, 100))) B = np.random.randn(800, 100) def f(data, A, i): ones = indices_1[i] not_ones = ~indices_1[i] B_ones = B[ones] B_not_ones = B[not_ones] A[i] = (data[i][not_ones] # B_not_ones) # np.linalg.inv(B_not_ones.T # B_not_ones) data[i][ones] = A[i] # B_ones.T with mp.Manager() as manager: data_global = manager.list(data) A_global = manager.list(A) with mp.Pool() as p: results = [ p.apply_async(f, (data_global, A_global, i,)) for i in range(100000) ] for i in results: i.wait() data_global = list(data_global) A_global = list(A_global)
python time sliding window variation
I'm stuck with a variation of sliding window problem! Usually we set the number of element to slide but in my case I want to slide the time! The goal that I would like to reach is a function (thread in this case) that is able to create a "time" windows in seconds (given by user). Starting from the first element of the queue in this case: [datetime.time(7, 6, 14, 537370), 584 add 5 seconds -> 7:6:19.537370 (ending point) and sum all elements in this interval: [datetime.time(7, 6, 14, 537370), 584] [datetime.time(7, 6, 18, 542798), 761] Total: 584+761= 1345 Then create another "windows" with the second elements and goes on. IMPORTANT: One item can be part of several window. the item are generated meanwhile, a naif solution with function that sleep for n second and then flush the queue is not good for my problem. I think its a variation of this post: Flexible sliding window (in Python) But still can't solve the problem! Any help or suggests will be appreciated. Thanks! Example list of elements: [datetime.time(7, 6, 14, 537370), 584] [datetime.time(7, 6, 18, 542798), 761] [datetime.time(7, 6, 20, 546007), 848] [datetime.time(7, 6, 24, 550969), 20] [datetime.time(7, 6, 27, 554370), 478] [datetime.time(7, 6, 27, 554628), 12] [datetime.time(7, 6, 31, 558919), 29] [datetime.time(7, 6, 31, 559562), 227] [datetime.time(7, 6, 32, 560863), 379] [datetime.time(7, 6, 35, 564863), 132] [datetime.time(7, 6, 37, 567276), 651] [datetime.time(7, 6, 38, 568652), 68] [datetime.time(7, 6, 40, 569861), 100] [datetime.time(7, 6, 41, 571459), 722] [datetime.time(7, 6, 44, 574802), 560] ... Code: import random import time import threading import datetime from multiprocessing import Queue q = Queue() #this is a producer that put elements in queue def t1(): element = [0,0] while True: time.sleep(random.randint(0, 5)) element[0] = datetime.datetime.now().time() element[1] = random.randint(0, 1000) q.put(element) #this is a consumer that sum elements inside a window of n seconds #Ineed something a sliding window time of ten seconds that sum all elements for n seconds def t2(): windowsize = 5 #size of the window 5 seconds while not queue.empty(): e = q.get() start = e[0] #the first element is the beginning point end = start + datetime.timedelta(seconds=windowsize) #ending point sum += e[1] #some code that solve the problem :) a = threading.Thread(target=t1) a.start() b = threading.Thread(target=t2) b.start() while True: time.sleep(1)
Would this do? This is how I understood your problem. What this does is it creates a class that keeps track of things. You either add to this by tw.insert() or sum with tw.sum_window(seconds). When you initialise TimeWindow, you can give it a max size parameter, default is 10 seconds. When you add elements or calculate sums, it does a clean up so that before every insert or sum operation, first element time e[0][0] and last element time e[n][0] are within 10 seconds of each other. Older entries are expunged. A "poller" thread is there to track your requests. I have added two queues as I do not know what you intend to do with results. Now if you want to request data starting from now to 5 seconds in the future, you create a request and put it in queue. The request has a random id so that you can match it to results. Your main thread needs to monitor result queue and after five seconds, every request sent to queue return with the same id and the sum. If this is not what you want to do, then I just don't understand what is it that you try to achieve here. Even this is already rather complicated and there may be a much simpler way to achieve what you intend to do. import random import time import threading import datetime import Queue import uuid from collections import deque q_lock = threading.RLock() class TimeWindow(object): def __init__(self, max_size=10): self.max_size = max_size self.q = deque() def expire(self): time_now = datetime.datetime.now() while True: try: oldest_element = self.q.popleft() oe_time = oldest_element[0] if oe_time + datetime.timedelta(seconds=self.max_size) > time_now: self.q.appendleft(oldest_element) break except IndexError: break def insert(self,elm): self.expire() self.q.append(elm) def sum_window(self, start, end): self.expire() try: _ = self.q[0] except IndexError: return 0 result=0 for f in self.q: if start < f[0] < end: result += f[1] else: pass return result tw = TimeWindow() def t1(): while True: time.sleep(random.randint(0, 3)) element = [datetime.datetime.now(), random.randint(0,1000)] with q_lock: tw.insert(element) def poller(in_q, out_q): pending = [] while True: try: new_request = in_q.get(0.1) new_request["end"] = new_request["start"] + datetime.timedelta(seconds=new_request["frame"]) pending.append(new_request) except Queue.Empty: pass new_pending = [] for a in pending: if a["end"] < datetime.datetime.now(): with q_lock: r_sum = tw.sum_window(a["start"], a["end"]) r_structure = {"id": a["id"], "result": r_sum} out_q.put(r_structure) else: new_pending.append(a) pending = new_pending a = threading.Thread(target=t1) a.daemon = True a.start() in_queue = Queue.Queue() result_queue = Queue.Queue() po = threading.Thread(target=poller, args=(in_queue, result_queue,)) po.daemon = True po.start() while True: time.sleep(1) newr = {"id": uuid.uuid4(), "frame": 5, "start": datetime.datetime.now()} in_queue.put(newr) try: ready = result_queue.get(0) print ready except Queue.Empty: pass
garim#wof:~$ python solution.py 1 t1 produce element: 16:09:30.472497 1 2 t1 produce element: 16:09:33.475714 9 3 t1 produce element: 16:09:34.476922 10 4 t1 produce element: 16:09:37.480100 7 solution: 16:09:37.481171 {'id': UUID('adff334f-a97a-459d-8dcc-f28309e25574'), 'result': 19} 5 t1 produce element: 16:09:38.481352 10 solution: 16:09:38.482687 {'id': UUID('0a7481e5-e993-439a-9f7e-2c5aeef86155'), 'result': 19} It still doent works :( I add a counter for each element it inserts with function t1. The goal is do the sum (result_queue.get) at this time: 16:09:35.472497 ---> 16:09:30.472497 + 5 seconds no before. Only then the element goes out. The next time the sum will be done at: 16:09:35.475714 ---> 16:09:33.475714 + 5 seconds I understand that it's hard to explain.. With both of your solution the time window slide so I can consider the problem solved :) I will try to improve when the function sum will be execute, that time trigger is important. I acquire a lot useful knowledge. Thanks for helping.
How to Update a Chaco Plot
I have a Chaco plot that I am creating and when I change one of the variables instead of updating, it draws the whole plot on top of the other plot and the plots start to stack up and overlay each other while remaining on the same window. How can I overwrite the plot instead? Note: I am changing the variable "duration" in the Foo class at the bottom. Here is my code: import threading import time from enthought.traits.api \ import HasTraits, Int, Range, Array, Enum, on_trait_change from enthought.traits.ui.api import View, Item from enthought.chaco.chaco_plot_editor import ChacoPlotItem class Hyetograph(HasTraits): """ Creates a simple hyetograph demo. """ timeline = Array intensity = Array nrcs = Array duration = Int(12, desc='In Hours') year_storm = Enum(2, 10, 25, 100) county = Enum('Brazos', 'Dallas', 'El Paso', 'Harris') curve_number = Range(70, 100) plot_type = Enum('line', 'scatter') view1 = View(Item('plot_type'), ChacoPlotItem('timeline', 'intensity', type_trait='plot_type', resizable=True, x_label='Time (hr)', y_label='Intensity (in/hr)', color='blue', bgcolor='white', border_visible=True, border_width=1, padding_bg_color='lightgray'), Item(name='duration'), Item(name='year_storm'), Item(name='county')) def calculate_intensity(self): """ The Hyetograph calculations. """ # Assigning A, B, and C values based on year, storm, and county counties = {'Brazos': 0, 'Dallas': 3, 'El Paso': 6, 'Harris': 9} years = { 2 : [65, 8, .806, 54, 8.3, .791, 24, 9.5, .797, 68, 7.9, .800], 10: [80, 8.5, .763, 78, 8.7, .777, 42, 12., .795,81, 7.7, .753], 25: [89, 8.5, .754, 90, 8.7, .774, 60, 12.,.843, 81, 7.7, .724], 100: [96, 8., .730, 106, 8.3, .762, 65, 9.5, .825, 91, 7.9, .706] } year = years[self.year_storm] value = counties[self.county] a, b, c = year[value], year[value+1], year[value+2] self.timeline=range(2, self.duration + 1, 2) intensity=a / (self.timeline * 60 + b)**c cumdepth=intensity * self.timeline temp=cumdepth[0] result=[] for i in cumdepth[1:]: result.append(i-temp) temp=i result.insert(0,cumdepth[0]) # Alternating block method implementation. result.reverse() switch = True o, e = [], [] for i in result: if switch: o.append(i) else: e.append(i) switch = not switch e.reverse() result = o + e self.intensity = result def calculate_runoff(self): """ NRCS method to get run-off based on permeability of ground. """ s = (1000 / self.curve_number) - 10 a = self.intensity - (.2 * s) vr = a**2 / (self.intensity + (.8 * s)) # There's no such thing as negative run-off. for i in range(0, len(a)): if a[i] <= 0: vr[i] = 0 self.nrcs = vr #on_trait_change('duration, year_storm, county, curve_number') def _perform_calculations(self): self.calculate_intensity() self.calculate_runoff() def start(self): self._perform_calculations() self.configure_traits() f=Hyetograph() f.start() # d = range(10,13) # for n in d: # f.duration = n # time.sleep(2) class Foo (threading.Thread): def __init__(self,x): self.__x = x threading.Thread.__init__(self) def run (self): print str(self.__x) d = range(10,13) for n in d: f.duration = n time.sleep(2) Foo(1).start() This is what it looks like: