if __name__=='__main__':
print("================================================= \n")
print 'The test will be running for: 18 hours ...'
get_current_time = datetime.now()
test_ended_time = get_current_time + timedelta(hours=18)
print 'Current time is:', get_current_time.time(), 'Your test will be ended at:', test_ended_time.time()
autodb = autodb_connect()
db = bw_dj_connect()
started_date, full_path, ips = main()
pid = os.getpid()
print('Main Process is started and PID is: ' + str(pid))
start_time = time.time()
process_list = []
for ip in ips:
p = Process(target=worker, args=(ip, started_date, full_path))
p.start()
p.join()
child_pid = str(p.pid)
print('PID is:' + child_pid)
process_list.append(child_pid)
child = multiprocessing.active_children()
print process_list
while child != []:
time.sleep(1)
child = multiprocessing.active_children()
print ' All processes are completed successfully ...'
print '_____________________________________'
print(' All processes took {} second!'.format(time.time()-start_time))
I have got a python test script which should be running for 18 hours and then kill itself. The script uses multiprocessing for multi devices. The data I am getting from main() function will be changed by time.
I am passing these three args to worker method in multiprocessing.
How can I achieve that ?
if you don't need to worry about cleanup too much on the child processes you can kill them using .terminate()
...
time.sleep(18 * 60 * 60) # go to sleep for 18 hours
children = multiprocessing.active_children()
for child in children:
child.terminate()
for child in multiprocessing.active_children():
child.join() # wait for the children to terminate
if you do need to do some cleanup in all the child processes then you need to modify their run loop (I'm assuming while True) to monitor the time passing and only have the second while loop above in the main program, waiting for the children to go away on their own.
you are never comparing datetime.now() to test_ended_time.
# check if my current time is greater than the 18 hour check point.
While datetime.now() < test_ended_time and multiprocessing.active_children():
print('still running my process.')
sys.exit(0)
Related
Sorry this is a uni assigment and i'm lost. Have been breaking my head over it for quite some time.
Here is the text: On the screen print the next data : PID, real user ID, priority for the current subprocess that is running, for his parent, and for the parents parent. Also check if there is the amount of processes running. If not infrom the user.
The code so far.
The output is Partly good as far as i understand. I get the pid and ppid of the current running process. I'm just confused about the parent of parent thing. I fixed up the code and got this output that should not be separated.
def f1():
while True:
print ('\nInput a number in range from 1 to 10: ')
n = input()
if int(n) not in range(1,11):
print('Number is not in given range')
continue
else:
pid_fork = os.fork()
change_dir() #
if pid_fork == 0:
user = os.getlogin()
pid = os.getpid()
print ("{:<15}\t{:<15}".format('PID','User','Priorty'))
print("{:<15}\t{:<15}".format(pid,user))
if pid_fork != 0:
user = os.getlogin()
pid = os.getpid()
ppid = os.getppid()
print ("{:<15}\t{:<15}".format('PID','User','Priorty'))
print("{:<15}\t{:<15}".format(pid,user))
print("{:<15}\t{:<15}".format(ppid,user))
break
PID Korisnik
3481 user
3473 user
PID Korisnik
3500 user
Sorry again for posting uni assigments but i ain't getting much help anywhere else.
In the main function, I am calling a process to run imp_workload() method parallely for each DP_WORKLOAD
#!/usr/bin/env python
import multiprocessing
import subprocess
if __name__ == "__main__":
for DP_WORKLOAD in DP_WORKLOAD_NAME:
p1 = multiprocessing.Process(target=imp_workload, args=(DP_WORKLOAD, DP_DURATION_SECONDS, DP_CONCURRENCY, ))
p1.start()
However, inside this imp_workload() method, I need the import_command_run() method to run a number of processes (the number is equivalent to variable DP_CONCURRENCY) but with the sleep of 60 seconds before new execution.
This is the sample code I have written.
def imp_workload(DP_WORKLOAD, DP_DURATION_SECONDS, DP_CONCURRENCY):
while DP_DURATION_SECONDS > 0:
pool = multiprocessing.Pool(processes = DP_CONCURRENCY)
for j in range(DP_CONCURRENCY):
pool.apply_async(import_command_run, args=(DP_WORKLOAD, dp_workload_cmd, j,)
# Sleep for 1 minute
time.sleep(60)
pool.close()
# Clean the schemas after import is completed
clean_schema(DP_WORKLOAD)
# Sleep for 1 minute
time.sleep(60)
def import_command_run(DP_WORKLOAD):
abccmd = 'impdp admin/DP_PDB_ADMIN_PASSWORD#DP_PDB_FULL_NAME SCHEMAS=ABC'
defcmd = 'impdp admin/DP_PDB_ADMIN_PASSWORD#DP_PDB_FULL_NAME SCHEMAS=DEF'
# any of the above commands
run_imp_cmd(eval(dp_workload_cmd))
def run_imp_cmd(cmd):
output = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
stdout,stderr = output.communicate()
return stdout
When I tried running it in this format, I got the following error:
time.sleep(60)
^
SyntaxError: invalid syntax
So, how can I kickoff the 'abccmd' job for DP_CONCURRENCY times parallely with a sleep of 1 min between each job and also each of these pool running in multiProcess?
Working on Python 2.7.5 (Due to restrictions, can't use Python 3.x so, will appreciate answers specific to Python 2.x)
P.S. This is a very large script and complex file so I have tried posting only relevant excerpts. Please ask for more details if necessary (or if it is not clear from this much)
Let me offer two possibilities:
Possibility 1
Here is an example of how you would kick off a worker function in parallel with DP_CURRENCY == 4 possible arguments, 0, 1, 2 and 3, cycling over and over for up to DP_DURATION_SECONDS seconds with a pool size of DP_CURRENCY and as soon as a job completes restarting the job but guaranteeing that at least TIME_BETWEEN_SUBMITS == 60 seconds has elapsed between successive restarts.
from __future__ import print_function
from multiprocessing import Pool
import time
from queue import SimpleQueue
TIME_BETWEEN_SUBMITS = 60
def worker(i):
print(i, 'started at', time.time())
time.sleep(40)
print(i, 'ended at', time.time())
return i # the argument
def main():
q = SimpleQueue()
def callback(result):
# every time a job finishes, put result (the argument) on the queue
q.put(result)
DP_CURRENCY = 4
DP_DURATION_SECONDS = TIME_BETWEEN_SUBMITS * 10
pool = Pool(DP_CURRENCY)
t = time.time()
expiration = t + DP_DURATION_SECONDS
# kick off initial tasks:
start_times = [None] * DP_CURRENCY
for i in range(DP_CURRENCY):
pool.apply_async(worker, args=(i,), callback=callback)
start_times[i] = time.time()
while True:
i = q.get() # wait for a job to complete
t = time.time()
if t >= expiration:
break
time_to_wait = TIME_BETWEEN_SUBMITS - (t - start_times[i])
if time_to_wait > 0:
time.sleep(time_to_wait)
pool.apply_async(worker, args=(i,), callback=callback)
start_times[i] = time.time()
# wait for all jobs to complete:
pool.close()
pool.join()
# required by Windows:
if __name__ == '__main__':
main()
Possibility 2
This is closer to what you had in that DP_DURATION_SECONDS == 60 seconds of sleeping is done between successive submission of any two jobs. But to me this doesn't make as much sense. If, for example, the worker function only took 50 seconds to complete, you would not be doing any parallel processing at all. In fact, each job would need to take at least 180 (i.e. (DP_CURRENCY - 1) * TIME_BETWEEN_SUBMITS) seconds to complete in order to have all 4 processes in the pool busy running jobs at the same time.
from __future__ import print_function
from multiprocessing import Pool
import time
from queue import SimpleQueue
TIME_BETWEEN_SUBMITS = 60
def worker(i):
print(i, 'started at', time.time())
# A task must take at least 180 seconds to run to have 4 tasks running in parallel if
# you wait 60 seconds between starting each successive task:
# take 182 seconds to run
time.sleep(3 * TIME_BETWEEN_SUBMITS + 2)
print(i, 'ended at', time.time())
return i # the argument
def main():
q = SimpleQueue()
def callback(result):
# every time a job finishes, put result (the argument) on the queue
q.put(result)
# at most 4 tasks at a time but only if worker takes at least 3 * TIME_BETWEEN_SUBMITS
DP_CURRENCY = 4
DP_DURATION_SECONDS = TIME_BETWEEN_SUBMITS * 10
pool = Pool(DP_CURRENCY)
t = time.time()
expiration = t + DP_DURATION_SECONDS
# kick off initial tasks:
for i in range(DP_CURRENCY):
if i != 0:
time.sleep(TIME_BETWEEN_SUBMITS)
pool.apply_async(worker, args=(i,), callback=callback)
time_last_job_submitted = time.time()
while True:
i = q.get() # wait for a job to complete
t = time.time()
if t >= expiration:
break
time_to_wait = TIME_BETWEEN_SUBMITS - (t - time_last_job_submitted)
if time_to_wait > 0:
time.sleep(time_to_wait)
pool.apply_async(worker, args=(i,), callback=callback)
time_last_job_submitted = time.time()
# wait for all jobs to complete:
pool.close()
pool.join()
# required by Windows:
if __name__ == '__main__':
main()
i have a script that finds all prime numbers with multiprocessing, however half of the spawned workers die very quickly.
i noticed that workers that are about to die early has no I/O operations at all, while others are running normally.
I spawned 8 workers and half die, this is the task manager view:
This is the function given to workers:
import time
import multiprocessing
def prime(i, processes, maxnum, primes):
while maxnum >= i:
f = False
if i <= 1:
i += processes
continue
else:
for j in range(2, int(i**0.5)+1, 1):
if i % j == 0:
i += processes
f = True
break
if f:
continue
primes.append(i) # append if prime.
i += processes
# increment by number of processes, example: p1 (i =1) p2 (i=2)
#up to i = processes, then all jumps by num of processes, check for bugs
and here is the main, in which workers are spawned:
def main():
start = time.monotonic()
manager = multiprocessing.Manager()
primes = manager.list()
maxnum = 10000000
processes = 8
plist = []
for i in range(1, processes + 1): # adds each new process to plist
plist.append(multiprocessing.Process(target=prime, args=(i, processes, maxnum, primes,)))
for p in plist: # starts the processes in plist and prints out process.pid
p.start()
print(p.pid)
[p.join() for p in plist]
print("time taken: " + str((time.monotonic() - start) / 60) + ' mins')
print(plist)
print(sorted(primes)) #unsure how long does the sorting takes
if __name__ == "__main__": # multiprocessing needs guarding. so all code goes into main i guess
main()
Here are the processes state after 5 seconds of starting:
[<Process(Process-2, started)>, <Process(Process-3, stopped)>, <Process(Process-4, started)>, <Process(Process-5, stopped)>,
<Process(Process-6, started)>, <Process(Process-7, stopped)>, <Process(Process-8, started)>, <Process(Process-9, stopped)>]
What i find unusual here is there is a pattern, for each spawned worker the next dies.
So I've been tooling around with threads and processes in Python, and along the way I cooked up a pattern that allows the same class to be pitched back and forth between threads and/or processes without losing state data by using by-name RPC calls and Pipes.
Everything works fine, but it takes an absurd amount of time to start a process as compared to loading the state from a pickled file, and Thread.start() returns immediately, so there's only the minor cost of the constructor. So: what's the best way start a Process with a large initial state without an absurd startup time. Snips and debug output below, the size of "counter" is just over 34,000K pickled to file with mode 2.
...
elif command == "load":
# RPC call - Loads state from file "pickle_name":
timestart = time.time()
print do_remote("take_pickled_state", pickle_name)
print "Load cost: " + str(time.time() - timestart)
elif command == "asproc":
if type(_async) is multiprocessing.Process:
print "Already running as a Process you fool!."
else:
do_remote("stop")
_async.join()
p_pipe.close()
p_pipe, c_pipe = multiprocessing.Pipe()
timestart = time.time()
_async = multiprocessing.Process(target = counter, args = (c_pipe,))
# Why is this so expensive!!?!?!?! AAARRG!!?!
_async.start()
print "Start cost: " + str(time.time() - timestart)
elif command == "asthread":
if type(_async) is threading.Thread:
print "Already running as a Thread you fool!."
else:
# Returns the state of counter on stop:
timestart = time.time()
counter = do_remote("stop")
print "Proc stop time: " + str(time.time() - timestart)
_async.join()
p_pipe.close()
p_pipe, c_pipe = multiprocessing.Pipe()
timestart = time.time()
_async = threading.Thread(target = counter, args = (c_pipe,))
_async.start()
print "Start cost: " + str(time.time() - timestart)
...
Corresponding debug statements:
Waiting for command...
>>> load
Load complete.
Load cost: 2.18700003624
Waiting for command...
>>> asproc
Start cost: 23.3910000324
Waiting for command...
>>> asthread
Proc stop time: 0.921999931335
Start cost: 0.0629999637604
Edit 1:
OS: Win XP 64.
Python version: 2.7.x
Processor: Xeon quad core.
Edit 2:
The thing I really don't get is it takes ~1 sec for the process stop to return the entire state, but it takes 20x longer to receive the state and start. (debug outputs added)
import multiprocessing as mp
import time
def build(q):
print 'I build things'
time.sleep(10)
#return 42
q.put(42)
def run(q):
num = q.get()
print num
if num == 42:
print 'I run after build is done'
return
else:
raise Exception("I don't know build..I guess")
def get_number(q):
q.put(3)
if __name__ == '__main__':
queue = mp.Queue()
run_p = mp.Process(name='run process', target=run, args=(queue,))
build_p = mp.Process(name='build process', target=build, args=(queue,))
s3 = mp.Process(name='s3', target=get_number, args=(queue,))
build_p.start()
run_p.start()
s3.start()
print 'waiting on build'
build_p.join(1) # timeout set to 1 second
s3.join()
print 'waiting on run'
run_p.join()
queue.close()
print 'waiting on queue'
queue.join_thread()
print 'done'
My goal is to send build and run into different workers, and run has to get result back from build in order to proceed.
The above revised code based on your help will actually return exception, because s3 is returned before build has the chance.
The value in the front of the queue is now 3. How can we make sure we get the answer back from build process?
Thanks.
Your question is a little murky..the problem you are describing sounds synchronous so 3 processes are a little overkill.
Assuming you are just trying to pass values to run you could use the queue object.
import multiprocessing as mp
import time
def build(q):
print 'I build things'
time.sleep(5)
q.put(42)
return
def run(q):
while True:
num = q.get()
if num == 42:
print 'I run after build is done'
return
else:
print 'not the right number...'
def get_number():
return 41
if __name__ == '__main__':
queue = mp.Queue()
run_p = mp.Process(name='run process', target=run, args=(queue,))
build_p = mp.Process(name='build process', target=build, args=(queue,))
run_p.start()
build_p.start()
print 'waiting on build'
build_p.join()
print 'waiting on run'
run_p.join()
queue.close()
print 'waiting on queue'
queue.join_thread()
print 'done'