Calling multiple linux processes in Python and collecting output - python

From a Python script, I need to call a PL->EN translation service. The translation requires 3 steps: tokenization, translation, detoknization
From Linux, I can achieve this using 3 processes by the following commands executed in mentioned order:
/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl -l pl < path_to_input.txt > path_to_output.tok.txt
/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file path_to_output.tok.txt -th 8 > path_to_output.trans.txt
/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en < path_to_output.trans.txt > path_to_output.final.txt
which translates the file path_to_input.txt and outputs to path_to_output.final.txt
I have made the following script for combining the 3 processes:
import shlex
import subprocess
from subprocess import STDOUT,PIPE
import os
import socket
class Translator:
#staticmethod
def pl_to_en(input_file, output_file):
# Tokenize
print("Tokenization started")
with open("tokenized.txt", "w+") as tokenizer_output:
with open(input_file) as tokenizer_input:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl - l pl"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=tokenizer_input, stdout=tokenizer_output)
p.wait()
print("Tokenization finished")
#Translate
print("Translation started")
with open("translated.txt", "w+") as translator_output:
cmd = "/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file tokenized.txt -th 8"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdout=translator_output)
p.wait()
print("Translation finished")
# Detokenize
print("Detokenization started")
with open("translated.txt") as detokenizer_input:
with open("detokenized.txt", "w+") as detokenizer_output:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=detokenizer_input, stdout=detokenizer_output)
p.wait()
print("Detokenization finished")
translator = Translator()
translator.pl_to_en("some_input_file.txt", "some_output_file.txt")
But only the tokenization part works.
The translator just outputs an empty file translated.txt. When looking at the output in the terminal, it looks like the translator loads the file tokenized.txt correctly, and does a translation. The problem is just how I collect the output from that process.

I would try something like the following - sending the output of the translator process to the pipe, and making the input of the detokenizer the pipe instead of using the files.
import shlex
import subprocess
from subprocess import STDOUT,PIPE
import os
import socket
class Translator:
#staticmethod
def pl_to_en(input_file, output_file):
# Tokenize
print("Tokenization started")
with open("tokenized.txt", "w+") as tokenizer_output:
with open(input_file) as tokenizer_input:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl - l pl"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=tokenizer_input, stdout=tokenizer_output)
p.wait()
print("Tokenization finished")
#Translate
print("Translation started")
cmd = "/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file tokenized.txt -th 8"
args = shlex.split(cmd)
translate_p = subprocess.Popen(args, stdout=subprocess.PIPE)
translate_p.wait()
print("Translation finished")
# Detokenize
print("Detokenization started")
with open("detokenized.txt", "w+") as detokenizer_output:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en"
args = shlex.split(cmd)
detokenizer_p = subprocess.Popen(args, stdin=translate_p.stdout, stdout=detokenizer_output)
detokenizer_p.wait()
print("Detokenization finished")
translator = Translator()
translator.pl_to_en("some_input_file.txt", "some_output_file.txt")

Related

python subprocess.call and pipes [duplicate]

This question already has answers here:
How do I use subprocess.Popen to connect multiple processes by pipes?
(9 answers)
Closed 1 year ago.
I have a script in which I am trying to use subprocess.call to execute a series of shell commands, but which appears to have some commands omitted when executed.
Specifically:
#!/usr/bin/python
import tempfile
import subprocess
import os
import re
grepfd, grepfpath = tempfile.mkstemp(suffix=".xx")
sedfd, sedfpath = tempfile.mkstemp(suffix=".xx")
# grepoutfile = open( grepfpath, 'w')
sedoutfile = open( sedfpath, 'w' )
subprocess.call(['cp','/Users/bobby/Downloads/sample.txt', grepfpath])
sedcmd = [ 'sort',
grepfpath,
'|',
'uniq',
'|',
'sed',
'-e',
'"s/bigstring of word/ smaller /"',
'|',
'column',
'-t',
'-s',
'"=>"' ]
print "sedcmd = ", sedcmd
subprocess.call( ['ls', grepfpath ] )
subprocess.call( ['sort', '|', 'uniq' ], stdin = grepfd )
subprocess.call( sedcmd, stdout = sedoutfile )
And it generates this as output:
python d3.py
sedcmd = ['sort', /var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx', '|', 'uniq', '|', 'sed', '-e', '"s/bigstring of word/ smaller /"', '|', 'column', '-t', '-s', '"=>"']
/var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx
sort: open failed: |: No such file or directory
sort: invalid option -- e
Try `sort --help' for more information.
The first 'sort: open failed: |:No such file... is from the first subprocess call ['sort','|','uniq'], stdin = grepfd )
The 'sort: invalid option -- e .. is from the second subprocess call (sedcmd).
I have seen a lot of examples that use pipes in this context -- so what am I doing wrong?
Thanks!
This is a class that will run a command with an arbitrary number of pipes:
pipeline.py
import shlex
import subprocess
class Pipeline(object):
def __init__(self, command):
self.command = command
self.command_list = self.command.split('|')
self.output = None
self.errors = None
self.status = None
self.result = None
def run(self):
process_list = list()
previous_process = None
for command in self.command_list:
args = shlex.split(command)
if previous_process is None:
process = subprocess.Popen(args, stdout=subprocess.PIPE)
else:
process = subprocess.Popen(args,
stdin=previous_process.stdout,
stdout=subprocess.PIPE)
process_list.append(process)
previous_process = process
last_process = process_list[-1]
self.output, self.errors = last_process.communicate()
self.status = last_process.returncode
self.result = (0 == self.status)
return self.result
This example shows how to use the class:
harness.py
from pipeline import Pipeline
if __name__ == '__main__':
command = '|'.join([
"sort %s",
"uniq",
"sed -e 's/bigstring of word/ smaller /'",
"column -t -s '=>'"
])
command = command % 'sample.txt'
pipeline = Pipeline(command)
if not pipeline.run():
print "ERROR: Pipeline failed"
else:
print pipeline.output
I created this sample file to for testing:
sample.txt
word1>word2=word3
list1>list2=list3
a>bigstring of word=b
blah1>blah2=blah3
Output
a smaller b
blah1 blah2 blah3
list1 list2 list3
word1 word2 word3
So if in a command you want to use shell pipes you can add shell=True in subprocess:
so it will be like this:
sedcmd = 'sort /var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx | uniq | sed -e "s/bigstring of word/ smaller /" | column -t -s "=>" '
subprocess.call(sedcmd, shell=True)
But be carefull with shell=True, it's strongly discouraged to use it : subprocess official documentation
So if you want to use pipes without shell=True you can use subprocees.PIPE in the stdout , and here's an example on how to do it: stackoveflow answer

Print output of external command in realtime and have it in a string at the same time in python

For example:
#!/usr/bin/env python3
# cmd.py
import time
for i in range(10):
print("Count %d" % i)
time.sleep(1)
#!/usr/bin/env python3
import subprocess
# useCmd.py
p = subprocess.Popen(['./cmd.py'], stdout=subprocess.PIPE)
out, err = p.communicate()
out = out.decode()
print(out)
In useCmd.py I can print out the output of cmd.py, but only after it's finished outputting. How can I print out it in realtime and still have it stored in a string? (sort of like tee in bash.)
If you don't have to deal with stdin, you could avoid using communicate that is blocking, and read directly from the process stdout until your stdout ends:
p = subprocess.Popen(['python', 'cmd.py'], stdout=subprocess.PIPE)
# out, err = p.communicate()
while True:
line = p.stdout.readline()
if line != '':
print line,
else:
break
related

Python Do While sh functions

I need to run a python script at the time a sh file is called and during all the time this process is running.
basically it is a python spinner during installation
import sys
import time
do
def spinn():
print "processing...\\",
syms = ['\\', '|', '/', '-']
bs = '\b'
for _ in range(10):
for sym in syms:
sys.stdout.write("\b%s" % sym)
sys.stdout.flush()
time.sleep(.1)
spinn()
while
def installing():
import subprocess
subprocess.call(["sudo sh", "installer.sh"],shell=True)
installing()
is there a way to to this on python?
subprocess.call() waits for the subprocess to exit. Use subprocess.Popen instead. Then use .poll() periodically to check for when the process exits.
import itertools
import os
import subprocess
import sys
import time
def installing():
null = open(os.devnull, 'wb')
p = subprocess.Popen('echo blah && sleep 5', shell=True, stdout=null)
#p = subprocess.Popen('sudo sh installer.sh', shell=True, stdout=null)
return p, null
def spin(p_stdout):
p, stdout = p_stdout
syms = itertools.cycle(['\\', '|', '/', '-'])
sys.stdout.write('processing....')
sys.stdout.flush()
while p.poll() is None:
sys.stdout.write('\b'+next(syms))
sys.stdout.flush()
time.sleep(0.1)
p.wait()
stdout.close()
spin(installing())

Execute two process in parallel in Python

I am trying to execute two commands in parallel for 10 seconds using the following piece of code, but the whole process takes more than 10 seconds as you can see in the output. Would you please help me to better understand the reason and the best solution for this question.
stime = datetime.datetime.now()
print stime
commands = ("sudo /usr/local/bin/snort -v -u snort -g snort -c /usr/local/snort/etc/snort.conf -i eth0 &", "sudo gedit test")
for p in commands:
p = subprocess.Popen(shlex.split(p), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
class Alarm(Exception):
pass
def alarm_handler(signum, frame):
raise Alarm
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(10) #in seconds
try:
stdoutdata, stderrdata = p.communicate()
signal.alarm(0) #reset the alarm
except Alarm:
print 'Ooops, taking too long!!!!'
etime = datetime.datetime.now()
print etime
And the output:
2013-01-08 03:30:00.836412
Ooops, taking too long!!!!
2013-01-08 03:30:16.548519
I feel like a threading.Timer might be more appropriate:
from threading import Timer
from subprocess import Popen,PIPE
import shlex
import datetime
import sys
jobs = ['sleep 100','sleep 200']
timers = []
processes = []
print datetime.datetime.now()
for job in jobs:
p = Popen(shlex.split(job),stdout = PIPE)
t = Timer(10,lambda p=p: p.terminate())
t.start()
timers.append(t)
processes.append(p)
for t in timers:
t.join()
stdout,stderr = processes[0].communicate()
stdout,stderr = processes[1].communicate()
print datetime.datetime.now()
import multiprocessing
import subprocess
import shlex
import time
commands = ("echo -n HI-FIRST ", "echo -n HI-SECOND ")
def parallel():
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
stdoutdata, stderrdata = p.communicate()
print stdoutdata + "\t" + time.ctime()
for cmd in commands:
p = multiprocessing.Process(target=parallel)
p.start()
Output:
$ python stack.py
HI-FIRST Fri Jan 11 08:47:18 2013
HI-SECOND Fri Jan 11 08:47:18 2013

How to get the last N lines of a subprocess' stderr stream output?

I am a Python newbie writing a Python (2.7) script that needs to exec a number of external applications, one of which writes a lot of output to its stderr stream. What I am trying to figure out is a concise and succinct way (in Python) to get the last N lines from that subprocess' stderr output stream.
Currently, I am running that external application from my Python script like so:
p = subprocess.Popen('/path/to/external-app.sh', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
print "ERROR: External app did not complete successfully (error code is " + str(p.returncode) + ")"
print "Error/failure details: ", stderr
status = False
else:
status = True
I'd like to capture the last N lines of output from its stderr stream so that they can be written to a log file or emailed, etc.
N = 3 # for 3 lines of output
p = subprocess.Popen(['/path/to/external-app.sh'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode != 0:
print ("ERROR: External app did not complete successfully "
"(error code is %s)" % p.returncode)
print "Error/failure details: ", '\n'.join(stderr.splitlines()[-N:])
status = False
else:
status = True
If the whole output can't be stored in RAM then:
import sys
from collections import deque
from subprocess import Popen, PIPE
from threading import Thread
ON_POSIX = 'posix' in sys.builtin_module_names
def start_thread(func, *args):
t = Thread(target=func, args=args)
t.daemon = True
t.start()
return t
def consume(infile, output):
for line in iter(infile.readline, ''):
output(line)
infile.close()
p = Popen(['cat', sys.argv[1]], stdout=PIPE, stderr=PIPE,
bufsize=1, close_fds=ON_POSIX)
# preserve last N lines of stdout, print stderr immediately
N = 100
queue = deque(maxlen=N)
threads = [start_thread(consume, *args)
for args in (p.stdout, queue.append), (p.stderr, sys.stdout.write)]
for t in threads: t.join() # wait for IO completion
print ''.join(queue), # print last N lines
retcode = p.wait()

Categories