python multiprocessing: call function from main process? - python

I need to call function from subprocess, but I don't know how to do it right.
My code:
def GzipSend(pth):
path, filename = os.path.split(pth)
day0 = os.path.join(pth, 'daily.0')
if os.path.exists(day0):
try:
tar = Popen("tar cPf - %s" %day0, shell=True, stderr=PIPE, stdout=PIPE)
pigz = Popen("pigz -1 -kc", stdin=tar.stdout, shell=True, stdout=PIPE, stderr=PIPE)
send = Popen("./s3cmd -c ./.s3cfg sync - s3://%s/%s/daily.0.tar.gz" %(bucket_name,filename), stdin=pigz.stdout, shell=True, stderr=PIPE, stdout=PIPE)
tar.stdout.close()
pigz.stdout.close()
out, err = send.communicate()
tar.wait()
pigz.wait()
return True
except Exception, e:
return False
else:
return False
def log(result, pth=None):
if result:
logger.info('path %s SUCCESS', pth)
else:
logger.info('path %s ERROR', pth)
if __name__ == '__main__':
all = []
for i in SubDirPath(work_path):
all.append(i)
p = mp.Pool(processes=workers_num)
for pth in all:
p.apply_async(GzipSend, args=(pth, ), callback=partial(log, pth=pth))
p.close()
p.join()
I want to add function "if os.path.exists(day0)" false.
My question: where do I need to create a function - on the main process or on the subprocess?
I want something like this:
def GzipSend(pth):
if:
try:
def func(param1)
except
else:
def func(param2)
def log(result, pth=None):
....
if __name__ == '__main__':
...

Related

Print tqdm progress bar from external python script called by subprocess

My main goal is to run an external python script (client script) by subprocess in another python script (caller script). The console of the caller script displays all output from the client script except the tqdm output - so it is not a general problem of displaying output by subprocess, but a specific problem related to subprocess interacting with tqdm.
My secondary goal is that I'd like to understand it :). So thoughtful explanations are much appreciated.
The client script (train.py) contains several tqdm calls. So far, I haven't seen much difference in outputs between various tqdm argument configurations, so let's use the simplest one.
In train.py:
...
from tqdm import tqdm
with tqdm(total = 10, ncols = 80,
file=sys.stdout, position = 0, leave = True,
desc='f5b: pbar.set_postfix') as pbar:
for i in range(10):
pbar.update(1)
postfix = {'loss': '{0:.4f}'.format(1+i)}
pbar.set_postfix(**postfix)
sleep(0.1)
The caller script experiment.py executes the function execute_experiment which calls train.py by the argument command_list:
def execute_experiment(command_list):
tic = time.time()
try:
process = subprocess.Popen(
command_list, shell=False,
encoding='utf-8',
bufsize=0,
stdin=subprocess.DEVNULL,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# Poll process for new output until finished
# Source: https://stackoverflow.com/q/37401654/7769076
while process.poll() is None:
nextline = process.stdout.readline()
sys.stdout.write(nextline)
sys.stdout.flush()
except CalledProcessError as err:
print("CalledProcessError: {0}".format(err))
sys.exit(1)
except OSError as err:
print("OS error: {0}".format(err))
sys.exit(1)
except:
print("Unexpected error:", sys.exc_info()[0])
raise
if (process.returncode == 0):
toc = time.time()
time1 = str(round(toc - tic))
return time1
else:
return 1
This script call to the above code snipped of train.py does return output but the tqdm output is stopped after 0 seconds and looks like this:
f5b: pbar.set_postfix: 0%| | 0/10 [00:00<?, ?it/s]
f5b: pbar.set_postfix: 10%|█▊ | 1/10 [00:00<00:00, 22310.13it/s]
The script call to the original code of train.py returns all output except tqdm output:
Training default configuration
train.py data --use-cuda ...
device: cuda
...
Comments:
shell = False: As python script calls python script. When shell=True, the client script is not called at all
bufsize=0: To prevent buffering
The train.py call is preceded with sys.executable to ensure that the python interpreter of the corresponding conda environment is called when on local machine.
Questions:
Does tqdm.set_postfix prevent passing the progress bar output upstream? I know this happens when tqdm.set_description is invoked, e.g. by:
pbar.set_description('processed: %d' %(1 + i))
This code contains it:
def train(self, dataloader, max_batches=500, verbose=True, **kwargs):
with tqdm(total=max_batches, disable=not verbose, **kwargs) as pbar:
for results in self.train_iter(dataloader, max_batches=max_batches):
pbar.update(1)
postfix = {'loss': '{0:.4f}'.format(results['mean_outer_loss'])}
if 'accuracies_after' in results:
postfix['accuracy'] = '{0:.4f}'.format(
np.mean(results['accuracies_after']))
pbar.set_postfix(**postfix)
# for logging
return results
Is the nested function call the reason why the progress bar is not shown?
The order of calls is experiment.py > train.py > nested.py.
train.py calls the train function in nested.py by:
for epoch in range(args.num_epochs):
results_metatraining = metalearner.train(meta_train_dataloader,
max_batches=args.num_batches,
verbose=args.verbose,
desc='Training',
# leave=False
leave=True
)
Alternatives tried out with no success:
### try2
process = subprocess.Popen(command_list, shell=False, encoding='utf-8',
stdin=DEVNULL, stdout=subprocess.PIPE)
while True:
output = process.stdout.readline().strip()
print('output: ' + output)
if output == '' and process.poll() is not None: # end of output
break
if output: # print output in realtime
print(output)
else:
output = process.communicate()
process.wait()
### try6
process = subprocess.Popen(command_list, shell=False,
stdout=subprocess.PIPE, universal_newlines=True)
for stdout_line in iter(process.stdout.readline, ""):
yield stdout_line
process.stdout.close()
return_code = process.wait()
print('return_code' + str(return_code))
if return_code:
raise subprocess.CalledProcessError(return_code, command_list)
### try7
with subprocess.Popen(command_list, stdout=subprocess.PIPE,
bufsize=1, universal_newlines=True) as p:
while True:
line = p.stdout.readline()
if not line:
break
print(line)
exit_code = p.poll()
I think readline is waiting for '\n', and tqdm is not creating new lines, maybe this could help (I did not try):
import io
def execute_experiment(command_list):
tic = time.time()
try:
process = subprocess.Popen(
command_list, shell=False,
encoding='utf-8',
bufsize=1,
stdin=subprocess.DEVNULL,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
# Poll process for new output until finished
# Source: https://stackoverflow.com/q/37401654/7769076
reader = io.TextIOWrapper(process.stdout, encoding='utf8')
while process.poll() is None:
char = reader.read(1)
sys.stdout.write(char)
sys.stdout.flush()
except CalledProcessError as err:
print("CalledProcessError: {0}".format(err))
sys.exit(1)
except OSError as err:
print("OS error: {0}".format(err))
sys.exit(1)
except:
print("Unexpected error:", sys.exc_info()[0])
raise
if (process.returncode == 0):
toc = time.time()
time1 = str(round(toc - tic))
return time1
else:
return 1

Python subprocess stderr/stdout field is None and returns returncode 1 if run a program in remote server

When i am trying to run an unix script in remote server using python subprocess,it is failing and returning returncode 1 and stderr and stdout None
def runCCPAjob(self):
out = []
err = []
session = FTP('#####.com','####','#####')
session.cwd('/app/IIS/bin')
print(session.pwd())
proc = subprocess.Popen(["sh", "-x","/app/IIS/bin/E271_CCPA_ACCESS.ksh"],stdout=subprocess.PIPE,encoding='utf-8',stderr=subprocess.PIPE,shell=True)
rc = proc.wait()
(stdoutdata, stderrdata) = proc.communicate()
out.append(stdoutdata)
err.append(stdoutdata)
if proc.returncode == 0:
return out
else :
raise Exception(proc.returncode,str(out),str(err),out,rc)
p = sendFiletoServer()
p.runCCPAjob()
Error :
raise Exception(proc.returncode,str(out),str(err),out,rc)
Exception: (1, "['']", "['']", [''], 1)

how to use python popen to print and also store communicate and return code?

this is the function i am creating, i have one argument that can tell to print real time or not since some of the process take like an hour. and since i am subprocesing several at the same time, another argument to raise an error and stop everything, or just let the main script run.
but if i do print_real_time True, i loose the p.communicate()
i could store all the prints from the iter in a variable and return that, but how do i put in order the std out, and the stderr, and get the return value to see if did fail or not?
def launch_subprocess_cmd(command_to_lunch, cwd=None, print_real_time=False, raise_errors=True):
"""
for a given command line will lunch that as a subprocess
:param command_to_lunch: string
:param print_real_time: boolean
:param cwd: the folder path from where the command should be run.
:param raise_errors: boolean if the return code of the subprocess is different than 0 raise an error an stop all scripts.
else the main script will keep running and can access the third return value of this function and decide what to do with it.
:return: list com return the stdout and the stderr of the Popen subprocess.
"""
if cwd is None:
p = subprocess.Popen(command_to_lunch, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
else:
p = subprocess.Popen(command_to_lunch, cwd=cwd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
if print_real_time is True:
for i in iter(p.stdout.readline, b''):
print i
com = p.communicate()
if raise_errors is True:
if p.returncode != 0:
raise ValueError("\n\nSubprocess fail: \n" + "Error captures: \n" + "stdout:\n" + com[0] + "\nstderr:\n" + com[1] + "\n")
# com[0] is std_out, com[1] is std_err and p.return code is if the subprocess was successful or not with a int number
return com[0], com[1], p.returncode
thanks guys =)
A possible solution to your problem is to store the stdout stream in a list when print_real_time is True and then use the content of the list to generate the stdout data string. If print_real_time is not True, then use the content from com[0] instead.
def launch_subprocess_cmd(cmd, cwd=None, print_real_time=False, raise_errors=True):
"""
for a given command line will lunch that as a subprocess
:param cmd: string
:param print_real_time: boolean
:param cwd: the folder path from where the command should be run.
:param raise_errors: boolean if the return code of the subprocess is different
than 0 raise an error an stop all scripts else
the main script will keep running and can access the third
return value of this function and decide what to do with it.
:return: list com return the stdout and the stderr of the Popen subprocess.
"""
if cwd is None:
p = subprocess.Popen(cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, shell=True)
else:
p = subprocess.Popen(cmd, cwd=cwd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, shell=True)
stdout_list = []
if print_real_time is True:
for i in iter(p.stdout.readline, b''):
stdout_list.append(i)
print i
com = p.communicate()
stdout_data = "".join(stdout_list) if print_real_time is True else com[0]
if raise_errors is True:
if p.returncode != 0:
raise ValueError("\n\nSubprocess fail: \n" + "Error captures: \n" +
"stdout:\n" + stdout_data + "\nstderr:\n" +
com[1] + "\n")
# stdout_data is stdout, com[1] is stderr and
# p.return code is if the subprocess was successful or not with a int number
return stdout_data, com[1], p.returncode
As a side note, I would also urge you to try to rewrite the program to not use shell=True in your Popen calls. It may require that you preprocess the input cmd into a list of base command and arguments, but it is generally considered a bad idea to use shell=True.
launch_subprocess_cmd(command_to_lunch, cwd=None, print_real_time=False, raise_errors=True):
if cwd is None:
p = subprocess.Popen(command_to_lunch, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
else:
p = subprocess.Popen(command_to_lunch, cwd=cwd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
stdout_list = []
errout_list = []
if print_real_time is True:
for i in iter(p.stdout.readline, b''):
stdout_list.append(i)
print i
for j in iter(p.stderr.readline, b''):
errout_list.append(j)
print j
com = p.communicate()
if print_real_time is True:
stdout_data = "".join(stdout_list)
std_err_data = "".join(errout_list)
else:
stdout_data = com[0]
std_err_data = com[1]
if raise_errors is True:
if p.returncode != 0:
raise ValueError("\n\npopen fail:\n" + command_to_lunch + "\nError:\n" + "Error captures:\n" + "stdout:\n" + stdout_data + "\nstderr:\n" + std_err_data + "\n")
# com[0] is out, com[1] is errors and p.return code is if it fail or not
return stdout_data, std_err_data, p.returncode

python subprocess handle exception

I am trying to handle exception and errors from Popen, but I can handle it onle from send. How can I handle it from tar and pigz ?
here is my code:
try:
tar = Popen("tar cPf - %s" %pth, shell=True, stderr=PIPE, stdout=PIPE)
pigz = Popen("pigz -1 -kc", stdin=tar.stdout, shell=True, stdout=PIPE, stderr=PIPE)
send = Popen("./s3cmd -c ./.s3cfg sync - s3://%s/%s.tar.gz" %(bucket_name,filename), stdin=pigz.stdout, shell=True, stderr=PIPE, stdout=PIPE)
send_err = send.communicate()[1]
tar.stdout.close()
pigz.stdout.close()
tar.wait()
pigz.wait()
if send_err:
print send_err
return False
else:
return True
except:
return False

How to get the last N lines of a subprocess' stderr stream output?

I am a Python newbie writing a Python (2.7) script that needs to exec a number of external applications, one of which writes a lot of output to its stderr stream. What I am trying to figure out is a concise and succinct way (in Python) to get the last N lines from that subprocess' stderr output stream.
Currently, I am running that external application from my Python script like so:
p = subprocess.Popen('/path/to/external-app.sh', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
print "ERROR: External app did not complete successfully (error code is " + str(p.returncode) + ")"
print "Error/failure details: ", stderr
status = False
else:
status = True
I'd like to capture the last N lines of output from its stderr stream so that they can be written to a log file or emailed, etc.
N = 3 # for 3 lines of output
p = subprocess.Popen(['/path/to/external-app.sh'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
print ("ERROR: External app did not complete successfully "
"(error code is %s)" % p.returncode)
print "Error/failure details: ", '\n'.join(stderr.splitlines()[-N:])
status = False
else:
status = True
If the whole output can't be stored in RAM then:
import sys
from collections import deque
from subprocess import Popen, PIPE
from threading import Thread
ON_POSIX = 'posix' in sys.builtin_module_names
def start_thread(func, *args):
t = Thread(target=func, args=args)
t.daemon = True
t.start()
return t
def consume(infile, output):
for line in iter(infile.readline, ''):
output(line)
infile.close()
p = Popen(['cat', sys.argv[1]], stdout=PIPE, stderr=PIPE,
bufsize=1, close_fds=ON_POSIX)
# preserve last N lines of stdout, print stderr immediately
N = 100
queue = deque(maxlen=N)
threads = [start_thread(consume, *args)
for args in (p.stdout, queue.append), (p.stderr, sys.stdout.write)]
for t in threads: t.join() # wait for IO completion
print ''.join(queue), # print last N lines
retcode = p.wait()

Categories