I am trying to capture the output of a tcpdump/grep pipeline from Python. I am using Python 2.6 on Mac OS 10.6.7.
When I try it with dmesg/grep, the caller receives output from the subprocesses, as expected.
When I try it with tcpdump/grep, select never returns anything.
What am I doing wrong?
#! /usr/bin/python
def tcpdump():
import subprocess, fcntl, os
# This works
# cmd1 = ['sudo', 'dmesg']
# cmd2 = ['grep', '-E', '.*']
# This doesn't work
# sudo tcpdump -i en0 -n -s 0 -w - | grep -a -o -E "Host\: .*|GET \/.*"
cmd1 = ['sudo', 'tcpdump', '-i', 'en0', '-n', '-s', '0', '-w', '-']
cmd2 = ['grep', '-a', '-o', '-E', 'Host\: .*|GET \/.*']
p1 = subprocess.Popen(cmd1, stdout=subprocess.PIPE)
p2 = subprocess.Popen(cmd2, stdout=subprocess.PIPE, stdin=p1.stdout)
# set stdout file descriptor to nonblocking
flags = \
fcntl.fcntl(p2.stdout.fileno(), fcntl.F_GETFL)
fcntl.fcntl(p2.stdout.fileno(), fcntl.F_SETFL, (flags | os.O_NDELAY | os.O_NONBLOCK))
return p2
def poll_tcpdump(proc):
import select
txt = None
while True:
# wait 1/10 of a second and check whether proc has written anything to stdout
readReady, _, _ = select.select([proc.stdout.fileno()], [], [], 0.1)
if not len(readReady):
break
for line in iter(proc.stdout.readline, ""):
if txt is None:
txt = ''
txt += line
break
return txt
proc = tcpdump()
while True:
text = poll_tcpdump(proc)
if text:
print '>>>> ' + text
Try
cmd2 = ['grep', '--line-buffered', '-a', '-o', '-E', 'Host\: .*|GET \/.*']
Related
The command:
tar -tf ~/dataset.tar | pv -l | wc -l
works without issue for monitoring progress. But, when I use it in the following code snippet:
def run_command_in_realtime(command):
process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
)
while process.poll() is None:
print("READING ...")
out = process.stdout.read(1)
if out:
decoded = out.decode("utf-8")
print(decoded, end="")
time.sleep(0.1)
out = process.stdout.read()
if process.returncode != 0:
err = process.stderr.read()
raise RuntimeError(
f"Error running command: {command}. Return code: {process.returncode} Error: {err.decode('utf-8')}"
)
decoded = out.decode("utf-8")
print(decoded)
as follows: run_command_in_realtime(f"tar -tf {cfg.tar_path} | pv -l | wc -l"), no output is ever printed to the console. My guess is that this has to do with the fact that the pv command never prints out a newline? Any thoughts on how to fix this?
This question already has answers here:
How do I use subprocess.Popen to connect multiple processes by pipes?
(9 answers)
Closed 1 year ago.
I have a script in which I am trying to use subprocess.call to execute a series of shell commands, but which appears to have some commands omitted when executed.
Specifically:
#!/usr/bin/python
import tempfile
import subprocess
import os
import re
grepfd, grepfpath = tempfile.mkstemp(suffix=".xx")
sedfd, sedfpath = tempfile.mkstemp(suffix=".xx")
# grepoutfile = open( grepfpath, 'w')
sedoutfile = open( sedfpath, 'w' )
subprocess.call(['cp','/Users/bobby/Downloads/sample.txt', grepfpath])
sedcmd = [ 'sort',
grepfpath,
'|',
'uniq',
'|',
'sed',
'-e',
'"s/bigstring of word/ smaller /"',
'|',
'column',
'-t',
'-s',
'"=>"' ]
print "sedcmd = ", sedcmd
subprocess.call( ['ls', grepfpath ] )
subprocess.call( ['sort', '|', 'uniq' ], stdin = grepfd )
subprocess.call( sedcmd, stdout = sedoutfile )
And it generates this as output:
python d3.py
sedcmd = ['sort', /var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx', '|', 'uniq', '|', 'sed', '-e', '"s/bigstring of word/ smaller /"', '|', 'column', '-t', '-s', '"=>"']
/var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx
sort: open failed: |: No such file or directory
sort: invalid option -- e
Try `sort --help' for more information.
The first 'sort: open failed: |:No such file... is from the first subprocess call ['sort','|','uniq'], stdin = grepfd )
The 'sort: invalid option -- e .. is from the second subprocess call (sedcmd).
I have seen a lot of examples that use pipes in this context -- so what am I doing wrong?
Thanks!
This is a class that will run a command with an arbitrary number of pipes:
pipeline.py
import shlex
import subprocess
class Pipeline(object):
def __init__(self, command):
self.command = command
self.command_list = self.command.split('|')
self.output = None
self.errors = None
self.status = None
self.result = None
def run(self):
process_list = list()
previous_process = None
for command in self.command_list:
args = shlex.split(command)
if previous_process is None:
process = subprocess.Popen(args, stdout=subprocess.PIPE)
else:
process = subprocess.Popen(args,
stdin=previous_process.stdout,
stdout=subprocess.PIPE)
process_list.append(process)
previous_process = process
last_process = process_list[-1]
self.output, self.errors = last_process.communicate()
self.status = last_process.returncode
self.result = (0 == self.status)
return self.result
This example shows how to use the class:
harness.py
from pipeline import Pipeline
if __name__ == '__main__':
command = '|'.join([
"sort %s",
"uniq",
"sed -e 's/bigstring of word/ smaller /'",
"column -t -s '=>'"
])
command = command % 'sample.txt'
pipeline = Pipeline(command)
if not pipeline.run():
print "ERROR: Pipeline failed"
else:
print pipeline.output
I created this sample file to for testing:
sample.txt
word1>word2=word3
list1>list2=list3
a>bigstring of word=b
blah1>blah2=blah3
Output
a smaller b
blah1 blah2 blah3
list1 list2 list3
word1 word2 word3
So if in a command you want to use shell pipes you can add shell=True in subprocess:
so it will be like this:
sedcmd = 'sort /var/folders/3h/_0xwt5bx0hx8tgx06cmq9h_4f183ql/T/tmp5Gp0ff.xx | uniq | sed -e "s/bigstring of word/ smaller /" | column -t -s "=>" '
subprocess.call(sedcmd, shell=True)
But be carefull with shell=True, it's strongly discouraged to use it : subprocess official documentation
So if you want to use pipes without shell=True you can use subprocees.PIPE in the stdout , and here's an example on how to do it: stackoveflow answer
From a Python script, I need to call a PL->EN translation service. The translation requires 3 steps: tokenization, translation, detoknization
From Linux, I can achieve this using 3 processes by the following commands executed in mentioned order:
/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl -l pl < path_to_input.txt > path_to_output.tok.txt
/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file path_to_output.tok.txt -th 8 > path_to_output.trans.txt
/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en < path_to_output.trans.txt > path_to_output.final.txt
which translates the file path_to_input.txt and outputs to path_to_output.final.txt
I have made the following script for combining the 3 processes:
import shlex
import subprocess
from subprocess import STDOUT,PIPE
import os
import socket
class Translator:
#staticmethod
def pl_to_en(input_file, output_file):
# Tokenize
print("Tokenization started")
with open("tokenized.txt", "w+") as tokenizer_output:
with open(input_file) as tokenizer_input:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl - l pl"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=tokenizer_input, stdout=tokenizer_output)
p.wait()
print("Tokenization finished")
#Translate
print("Translation started")
with open("translated.txt", "w+") as translator_output:
cmd = "/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file tokenized.txt -th 8"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdout=translator_output)
p.wait()
print("Translation finished")
# Detokenize
print("Detokenization started")
with open("translated.txt") as detokenizer_input:
with open("detokenized.txt", "w+") as detokenizer_output:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=detokenizer_input, stdout=detokenizer_output)
p.wait()
print("Detokenization finished")
translator = Translator()
translator.pl_to_en("some_input_file.txt", "some_output_file.txt")
But only the tokenization part works.
The translator just outputs an empty file translated.txt. When looking at the output in the terminal, it looks like the translator loads the file tokenized.txt correctly, and does a translation. The problem is just how I collect the output from that process.
I would try something like the following - sending the output of the translator process to the pipe, and making the input of the detokenizer the pipe instead of using the files.
import shlex
import subprocess
from subprocess import STDOUT,PIPE
import os
import socket
class Translator:
#staticmethod
def pl_to_en(input_file, output_file):
# Tokenize
print("Tokenization started")
with open("tokenized.txt", "w+") as tokenizer_output:
with open(input_file) as tokenizer_input:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/tokenizer.perl - l pl"
args = shlex.split(cmd)
p = subprocess.Popen(args, stdin=tokenizer_input, stdout=tokenizer_output)
p.wait()
print("Tokenization finished")
#Translate
print("Translation started")
cmd = "/home/nlp/opt/moses/bin/moses -f /home/nlp/Downloads/TED/tuning/moses.tuned.ini.1 -drop-unknown -input-file tokenized.txt -th 8"
args = shlex.split(cmd)
translate_p = subprocess.Popen(args, stdout=subprocess.PIPE)
translate_p.wait()
print("Translation finished")
# Detokenize
print("Detokenization started")
with open("detokenized.txt", "w+") as detokenizer_output:
cmd = "/home/nlp/opt/moses/scripts/tokenizer/detokenizer.perl -l en"
args = shlex.split(cmd)
detokenizer_p = subprocess.Popen(args, stdin=translate_p.stdout, stdout=detokenizer_output)
detokenizer_p.wait()
print("Detokenization finished")
translator = Translator()
translator.pl_to_en("some_input_file.txt", "some_output_file.txt")
This question already has answers here:
How do I use subprocess.Popen to connect multiple processes by pipes?
(9 answers)
Closed 8 years ago.
I'd like to use subprocess on the following line:
convert ../loxie-orig.png bmp:- | mkbitmap -f 2 -s 2 -t 0.48 | potrace -t 5 --progress -s -o ../DSC00232.svg
I found thank to other posts the subprocess documentation but in the example we use only twice pipe.
So, I try for two of the three commands and it works
p1 = subprocess.Popen(['convert', fileIn, 'bmp:-'], stdout=subprocess.PIPE)
# p2 = subprocess.Popen(['mkbitmap', '-f', '2', '-s', '2', '-t', '0.48'], stdout=subprocess.PIPE)
p3 = subprocess.Popen(['potrace', '-t' , '5', '-s' , '-o', fileOut], stdin=p1.stdout,stdout=subprocess.PIPE)
p1.stdout.close() # Allow p1 to receive a SIGPIPE if p3 exits.
output = p3.communicate()[0]
Can you help me for the third command?
Thank you very much.
Just add a third command following the same example:
p1 = subprocess.Popen(['convert', fileIn, 'bmp:-'], stdout=subprocess.PIPE)
p2 = subprocess.Popen(['mkbitmap', '-f', '2', '-s', '2', '-t', '0.48'],
stdin=p1.stdout, stdout=subprocess.PIPE)
p1.stdout.close()
p3 = subprocess.Popen(['potrace', '-t' , '5', '-s' , '-o', fileOut],
stdin=p2.stdout,stdout=subprocess.PIPE)
p2.stdout.close()
output = p3.communicate()[0]
def runPipe(cmds):
try:
p1 = subprocess.Popen(cmds[0].split(' '), stdin = None, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
prev = p1
for cmd in cmds[1:]:
p = subprocess.Popen(cmd.split(' '), stdin = prev.stdout, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
prev = p
stdout, stderr = p.communicate()
p.wait()
returncode = p.returncode
except Exception, e:
stderr = str(e)
returncode = -1
if returncode == 0:
return (True, stdout.strip().split('\n'))
else:
return (False, stderr)
Then execute it like:
runPipe(['ls -1','head -n 2', 'head -n 1'])
Use subprocess.Popen() with the option shell=True, and you can pass it your entire command as a single string.
This is the simplest solution and makes it possible to embed a complicated pipeline in python without head-scratching; but in some cases it might not work, e.g. (as #torek commented) if there are spaces in the filenames passed for input or output. In that case, take the trouble to build up the robust solution in the accepted answer.
I know how to run a command using cmd = subprocess.Popen and then subprocess.communicate.
Most of the time I use a string tokenized with shlex.split as 'argv' argument for Popen.
Example with "ls -l":
import subprocess
import shlex
print subprocess.Popen(shlex.split(r'ls -l'), stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
However, pipes seem not to work... For instance, the following example returns noting:
import subprocess
import shlex
print subprocess.Popen(shlex.split(r'ls -l | sed "s/a/b/g"'), stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE).communicate()[0]
Can you tell me what I am doing wrong please?
Thx
I think you want to instantiate two separate Popen objects here, one for 'ls' and the other for 'sed'. You'll want to pass the first Popen object's stdout attribute as the stdin argument to the 2nd Popen object.
Example:
p1 = subprocess.Popen('ls ...', stdout=subprocess.PIPE)
p2 = subprocess.Popen('sed ...', stdin=p1.stdout, stdout=subprocess.PIPE)
print p2.communicate()
You can keep chaining this way if you have more commands:
p3 = subprocess.Popen('prog', stdin=p2.stdout, ...)
See the subprocess documentation for more info on how to work with subprocesses.
I've made a little function to help with the piping, hope it helps. It will chain Popens as needed.
from subprocess import Popen, PIPE
import shlex
def run(cmd):
"""Runs the given command locally and returns the output, err and exit_code."""
if "|" in cmd:
cmd_parts = cmd.split('|')
else:
cmd_parts = []
cmd_parts.append(cmd)
i = 0
p = {}
for cmd_part in cmd_parts:
cmd_part = cmd_part.strip()
if i == 0:
p[i]=Popen(shlex.split(cmd_part),stdin=None, stdout=PIPE, stderr=PIPE)
else:
p[i]=Popen(shlex.split(cmd_part),stdin=p[i-1].stdout, stdout=PIPE, stderr=PIPE)
i = i +1
(output, err) = p[i-1].communicate()
exit_code = p[0].wait()
return str(output), str(err), exit_code
output, err, exit_code = run("ls -lha /var/log | grep syslog | grep gz")
if exit_code != 0:
print "Output:"
print output
print "Error:"
print err
# Handle error here
else:
# Be happy :D
print output
shlex only splits up spaces according to the shell rules, but does not deal with pipes.
It should, however, work this way:
import subprocess
import shlex
sp_ls = subprocess.Popen(shlex.split(r'ls -l'), stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
sp_sed = subprocess.Popen(shlex.split(r'sed "s/a/b/g"'), stdin = sp_ls.stdout, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
sp_ls.stdin.close() # makes it similiar to /dev/null
output = sp_ls.communicate()[0] # which makes you ignore any errors.
print output
according to help(subprocess)'s
Replacing shell pipe line
-------------------------
output=`dmesg | grep hda`
==>
p1 = Popen(["dmesg"], stdout=PIPE)
p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE)
output = p2.communicate()[0]
HTH
"""
Why don't you use shell
"""
def output_shell(line):
try:
shell_command = Popen(line, stdout=PIPE, stderr=PIPE, shell=True)
except OSError:
return None
except ValueError:
return None
(output, err) = shell_command.communicate()
shell_command.wait()
if shell_command.returncode != 0:
print "Shell command failed to execute"
return None
return str(output)
Thank #hernvnc, #glglgl, and #Jacques Gaudin for the answers. I fixed the code from #hernvnc. His version will cause hanging in some scenarios.
import shlex
from subprocess import PIPE
from subprocess import Popen
def run(cmd, input=None):
"""Runs the given command locally and returns the output, err and exit_code."""
if "|" in cmd:
cmd_parts = cmd.split('|')
else:
cmd_parts = []
cmd_parts.append(cmd)
i = 0
p = {}
for cmd_part in cmd_parts:
cmd_part = cmd_part.strip()
if i == 0:
if input:
p[i]=Popen(shlex.split(cmd_part),stdin=PIPE, stdout=PIPE, stderr=PIPE)
else:
p[i]=Popen(shlex.split(cmd_part),stdin=None, stdout=PIPE, stderr=PIPE)
else:
p[i]=Popen(shlex.split(cmd_part),stdin=p[i-1].stdout, stdout=PIPE, stderr=PIPE)
i = i +1
# close the stdin explicitly, otherwise, the following case will hang.
if input:
p[0].stdin.write(input)
p[0].stdin.close()
(output, err) = p[i-1].communicate()
exit_code = p[0].wait()
return str(output), str(err), exit_code
# test case below
inp = b'[ CMServer State ]\n\nnode node_ip instance state\n--------------------------------------------\n1 linux172 10.90.56.172 1 Primary\n2 linux173 10.90.56.173 2 Standby\n3 linux174 10.90.56.174 3 Standby\n\n[ ETCD State ]\n\nnode node_ip instance state\n--------------------------------------------------\n1 linux172 10.90.56.172 7001 StateFollower\n2 linux173 10.90.56.173 7002 StateLeader\n3 linux174 10.90.56.174 7003 StateFollower\n\n[ Cluster State ]\n\ncluster_state : Normal\nredistributing : No\nbalanced : No\ncurrent_az : AZ_ALL\n\n[ Datanode State ]\n\nnode node_ip instance state | node node_ip instance state | node node_ip instance state\n------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n1 linux172 10.90.56.172 6001 P Standby Normal | 2 linux173 10.90.56.173 6002 S Primary Normal | 3 linux174 10.90.56.174 6003 S Standby Normal'
cmd = "grep -E 'Primary' | tail -1 | awk '{print $3}'"
run(cmd, input=inp)