Python-Subprocess-Popen Deadlock in a multi-threaded environment

Python-Subprocess-Popen Deadlock in a multi-threaded environment - python

I have following piece of code running inside thread..where 'expand' C executable produces unique string output for each input 'url':
p = Popen(["expand", url], bufsize=65536, stdout=PIPE, stderr=PIPE, close_fds=True)
output,error = p.communicate()
print output
I have implemented a Queue based multithreading solution which processes 5000 urls in a batch of 100 each..
When I run the script; it hangs.. and ps -aef shows that 2 processes are still running:
1. 10177 5721 6662 6 09:25 pts/15 00:04:36 python expandPlaylist.py -s -t
2. 10177 11004 5721 0 09:26 pts/15 00:00:00 expand http://www.sample.com
Stack trace for main python script:
# ThreadID: 140332211570432
File: "expandPlaylist.py", line 902, in <module>
Main()
File: "expandPlaylist.py", line 894, in Main
startmain(db, c, conf)
File: "expandPlaylist.py", line 834, in startmain
stream_queue.join()
File: "/usr/lib64/python2.7/Queue.py", line 82, in join
self.all_tasks_done.wait()
File: "/usr/lib64/python2.7/threading.py", line 238, in wait
waiter.acquire()
Stack trace for Thread which got deadlocked
# ThreadID: 140332016596736
File: "/usr/lib64/python2.7/threading.py", line 503, in __bootstrap
self.__bootstrap_inner()
File: "/usr/lib64/python2.7/threading.py", line 530, in __bootstrap_inner
self.run()
File: "expandPlaylist.py", line 120, in run
self.process.wait()
File: "/usr/lib64/python2.7/subprocess.py", line 1242, in wait
pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0)
File: "/usr/lib64/python2.7/subprocess.py", line 471, in _eintr_retry_call
return func(*args)
GDB details for process_id: 11004
(gdb) bt
#0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1 0x00007fc36bd33294 in _L_lock_999 () from /lib64/libpthread.so.0
#2 0x00007fc36bd330aa in __pthread_mutex_lock (mutex=0x6a8c20) at pthread_mutex_lock.c:61
#3 0x00007fc36c204dcd in g_mutex_lock (mutex=0x6a8c50) at gthread-posix.c:213
#4 0x00007fc36c1b11df in g_source_unref_internal (source=0x844f90, context=0x6a8c50, have_lock=0) at gmain.c:1975
#5 0x00007fc36c1b13e3 in g_source_unref (source=0x844f90) at gmain.c:2044
#6 0x00007fc36cb475a9 in soup_session_dispose (object=0x61e100) at soup-session.c:305
#7 0x00007fc36c4d270e in g_object_unref (_object=0x61e100) at gobject.c:3160
#8 0x000000000040584b in dispose_session (parser=0x618020) at al_playlist_parser.c:859
#9 0x0000000000403b0b in al_playlist_parser_dispose (obj=0x618020) at al_playlist_parser.c:129
#10 0x00007fc36c4d270e in g_object_unref (_object=0x618020) at gobject.c:3160
#11 0x0000000000403315 in main (argc=1, argv=0x7fff462cdca8) at al_expand.c:143
How can I avoid the deadlock?
Otherwise is there any way to bind timeout with self.process.wait() and terminate that thread if the subprocess is taking too long to process?

If you only have to call a subprocess on a list of arguments, I tend to do something like this:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith <rsmith#xs4all.nl>
# $Date: 2013-11-24 11:06:39 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mp4.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to H.264/AAC streams in
an MP4 container."""
from __future__ import print_function, division # for compatibility with Python 2.
__version__ = '$Revision: cac4808 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to H.264/AAC
streams in an MP4 container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mp4'
args = ['ffmpeg', '-i', fname, '-c:v', 'libx264', '-crf', '29', '-flags',
'+aic+mv4', '-c:a', 'libfaac', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)
If hanging processes are an issue, you could adapt to manageprocs to kill a subprocess after a certain amount of time.

Related

multiprocessing - execute external command and wait before proceeding

I am using Linux. I have an external executable called "combine" and a loop of 20 iterations.
Per each iteration, "combine" needs to be called with an argument that depends on the i-th iteration. Example:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
How do I achieve this using the multiprocessing module in Python?

You can use subprocess.Popen to launch the external commands asynchronously, and store each Popen object returned in a list. Once you've launched all the processes, just iterate over them and wait for each to finish using popen_object.wait.
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
However, this will launch twenty concurrent processes, which is probably going to hurt performance.
To avoid that, you can use a ThreadPool to limit yourself to some lower number of concurrent processes (multiprocessing.cpu_count is a good number), and then use pool.join to wait for them all to finish.
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
Each thread will release the GIL while waiting for the subprocess to complete, so they'll all run in parallel.

My solution to this problem is to create and manage a list of subprocesses. Pay special attention to startencoder and manageprocs. That is where the actual work is being started and managed.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)

Read a file for ten seconds

I'm working with Logfiles right now. My need is I want to read a file line by line for a specified period of time, say 10s. Can anybody help me if there is a way to accomplish this in Python?

Run tail or tac using Popen and iterate over output until you find a line you want to stop. Here is a example snippet.
filename = '/var/log/nginx/access.log'
# Command to read file from the end
cmd = sys.platform == 'darwin' and ['tail', '-r', filename] or ['tac', filename]
# But if you want read it from beginning, use the following
#cmd = ['cat', filename]
proc = Popen(cmd, close_fds=True, stdout=PIPE, stderr=PIPE)
output = proc.stdout
FORMAT = [
# 'foo',
# 'bar',
]
def extract_log_data(line):
'''Extact data in you log format, normalize it.
'''
return dict(zip(FORMAT, line))
csv.register_dialect('nginx', delimiter=' ', quoting=csv.QUOTE_MINIMAL)
lines = csv.reader(output, dialect='nginx')
started_at = dt.datetime.utcnow()
for line in lines:
data = extract_log_data(line)
print data
if (dt.datetime.utcnow() - started_at) >= dt.timedelta(seconds=10):
break
output.close()
proc.terminate()

Code
from multiprocessing import Process
import time
def read_file(path):
try:
# open file for writing
f = open(path, "r")
try:
for line in f:
# do something
pass
# always close the file when leaving the try block
finally:
f.close()
except IOError:
print "Failed to open/read from file '%s'" % (path)
def read_file_limited_time(path, max_seconds):
# init Process
p = Process(target=read_file, args=(path,))
# start process
p.start()
# for max seconds
for i in range(max_seconds):
# sleep for 1 seconds (you may change the sleep time to suit your needs)
time.sleep(1)
# if process is not alive, we can break the loop
if not p.is_alive():
break
# if process is still alive after max_seconds, kiil it!
if p.is_alive():
p.terminate()
def main():
path = "f1.txt"
read_file_limited_time(path,10)
if __name__ == "__main__":
main()
Notes
The reason why we "wake up" every 1 second and check whether the process we started is still alive is just to prevent us from keep sleeping when the process has finished. time wasting to sleep for 9 seconds if the process ended after 1 second.

chaining line by line writing/reading of pipes in Python with subprocess

I have the following code which appears to work, for chaining pipes together in python with subprocess while reading / writing to them line by line (without using communicate() upfront). The code just calls a Unix command (mycmd), reads its output, then writes that to the stdin of another Unix command (next_cmd), and redirects the output of that last command to a file.
# some unix command that uses a pipe: command "a"
# writes to stdout and "b" reads it and writes to stdout
mycmd = "a | b"
mycmd_proc = subprocess.Popen(mycmd, shell=True,
stdin=sys.stdin,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# nextCmd reads from stdin, and I'm passing it mycmd's output
next_cmd = "nextCmd -stdin"
output_file = open(output_filename, "w")
next_proc = subprocess.Popen(next_cmd, shell=True,
stdin=subprocess.PIPE,
stdout=output_file)
for line in iter(mycmd.stdout.readline, ''):
# do something with line
# ...
# write it to next command
next_proc.stdin.write(line)
### If I wanted to call another command here that passes next_proc output
### line by line to another command, would I need
### to call next_proc.communicate() first?
next_proc.communicate()
output_file.close()
This appears to work, and it only calls communicate() at the end of the command.
I'm trying to extend this code to add another command so you can do:
mycmd1 | mycmd2 | mycmd3 > some_file
meaning: line by line, read output of mycmd1 from Python, process the line, feed it to mycmd2, read mycmd2's output and line by line process it and feed it to mycmd3 which in turns puts its output in some_file. Is this possible or is this bound to end in deadlock/blocking/unflushed buffers? Note that I'm not just calling three unix commands as a pipe since I want to intervene with Python in between and post-process each command's output line by line before feeding it to the next command.
I want to avoid calling communicate and loading all the output into memory - instead I want to parse it line by line. thanks.

This should handle an arbitrary number of commands:
import sys
import subprocess
def processFirst(out):
return out
def processSecond(out):
return out
def processThird(out):
return out
commands = [("a|b", processFirst), ("nextCmd -stdin", processSecond), ("thirdCmd", processThird)]
previous_output = None
for cmd,process_func in commands:
if previous_output is None:
stdin = sys.stdin
else:
stdin = subprocess.PIPE
proc = subprocess.Popen(cmd, shell=True,
stdin = stdin,
stdout = subprocess.PIPE)
if previous_output is not None:
proc.stdin.write(previous_output)
out,err = proc.communicate()
out = process_func(out)
previous_output = out
Just add any command you want to run to the list of commands along with the function that should process its output. The output from the last command will end up being in previous_output at the end of the loop.
To avoid any deadlocking/buffering/etc issues, you simply run each command to completion using proc.communicate() which will return the output(instead of reading it directly as in your example). You then feed that into the next command before letting it run to completion, so on and so forth.
Edit: Just noticed that you don't want to use communicate() upfront and that you want to react line by line. I will edit my answer in a bit to address that
This answer provides an example on how to read line-by-line from a pipe without blocking using select.select().
Below is an example that uses it for your particular case:
import sys
import subprocess
import select
import os
class LineReader(object):
def __init__(self, fd, process_func):
self._fd = fd
self._buf = ''
self._process_func = process_func
self.next_proc = None
def fileno(self):
return self._fd
def readlines(self):
data = os.read(self._fd, 4096)
if not data:
# EOF
if self.next_proc is not None:
self.next_proc.stdin.close()
return None
self._buf += data
if '\n' not in data:
return []
tmp = self._buf.split('\n')
tmp_lines, self._buf = tmp[:-1], tmp[-1]
lines = []
for line in tmp_lines:
lines.append(self._process_func(line))
if self.next_proc is not None:
self.next_proc.stdin.write("%s\n" % lines[-1])
return lines
def processFirst(line):
return line
def processSecond(line):
return line
def processThird(line):
return line
commands = [("a|b", processFirst), ("nextCmd -stdin", processSecond), ("thirdCmd", processThird)]
readers = []
previous_reader = None
for cmd,process_func in commands:
if previous_reader is None:
stdin = sys.stdin
else:
stdin = subprocess.PIPE
proc = subprocess.Popen(cmd, shell=True,
stdin = stdin,
stdout = subprocess.PIPE)
if previous_reader is not None:
previous_reader.next_proc = proc
previous_reader = LineReader(proc.stdout.fileno(), process_func)
readers.append(previous_reader)
while readers:
ready,_,_ = select.select(readers, [], [], 10.0)
for stream in ready:
lines = stream.readlines()
if lines is None:
readers.remove(stream)

convert Ghostscript from os.popen to subsession.popen python

I need to create a Monkey patch for Ghostscript, I have to migrate from os.popen to subsession.popen because I can't use the shell in my system.
I tried it in this way:
def mioGhostscript(tile, size, fp):
"""Render an image using Ghostscript (Unix only)"""
# Unpack decoder tile
decoder, tile, offset, data = tile[0]
length, bbox = data
import tempfile, os
file = tempfile.mktemp()
# Build ghostscript command
command = ["gs",
"-q", # quite mode
"-g%dx%d" % size, # set output geometry (pixels)
"-dNOPAUSE -dSAFER", # don't pause between pages, safe mode
"-sDEVICE=ppmraw", # ppm driver
"-sOutputFile=%s" % file,# output file
"- >/dev/null 2>/dev/null"
]
#command = shlex.split(string.join(command))
# push data through ghostscript
try:
#gs = os.popen(command, "w")
args = command#['gs','-dSAFER','-dNOPAUSE','-dBATCH','-sDEVICE=jpeg','-sOutputFile=/home/user/output2.jpg /home/user/downloads/test.pdf']
gs = subprocess.Popen( args, stdout = PIPE, stderr = STDOUT, stdin=PIPE )
# adjust for image origin
if bbox[0] != 0 or bbox[1] != 0:
#gs.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
gs.stdin.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
fp.seek(offset)
while length > 0:
s = fp.read(8192)
if not s:
break
length = length - len(s)
raise Exception(s)
gs.stdin.write(s)
gs.communicate()[0]
status = gs.stdin.close()
#status = gs.close()
#if status:
# raise IOError("gs failed (status %d)" % status)
im = Image.core.open_ppm(file)
finally:
try: os.unlink(file)
except: pass
return im
import PIL
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
but i have this traceback:
Traceback (most recent call last): File "/home/web/lib/driver_mod_python.py", line 252, in handler buf = m.__dict__[pard['program']](pard) File "/home/dtwebsite/bin/cms_gest_ordini.py", line 44, in wrapped return func(pard) File "/home/dtwebsite/bin/cms_gest_ordini.py", line 95, in wrapped return func(pard) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 341, in picking_list tr_modelllo = render_row_picking_list(pard, item, picked=0, plist_allowed=plist_allowed) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 432, in render_row_picking_list aa = a.tostring() File "/rnd/apps/interpreters/python-2.5.6/lib/python2.5/site-packages/PIL/Image.py", line 532, in tostring self.load() File "/rnd/apps/interpreters/python-2.5.6/lib/python2.5/site-packages/PIL/EpsImagePlugin.py", line 283, in load self.im = Ghostscript(self.tile, self.size, self.fp) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 64, in mioGhostscript gs.stdin.write(s) IOError: [Errno 32] Broken pipe
someone can help me please?

I found the solution at the problem.
It was with the PIL package, something didn't compile right during the installation.
After that i had a dependencies problem.
I fixed it in the following way:
import PIL.EpsImagePlugin
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
Then I saw this in the command:
"- >/dev/null 2>/dev/null"
the code is a shell's code and it didn't work on my system because python tried to read a file literally named - >/dev/null 2>/dev/null and it doesn't exist.
I replaced
"- >/dev/null 2>/dev/null"
with
"-"
and the program now read from the stdin.
The final code is:
def mioGhostscript(tile, size, fp):
"""Render an image using Ghostscript (Unix only)"""
# Unpack decoder tile
decoder, tile, offset, data = tile[0]
length, bbox = data
import tempfile, os
file = tempfile.mktemp()
# Build ghostscript command
command = ["gs",
"-q", # quite mode
"-g%dx%d" % size, # set output geometry (pixels)
"-dNOPAUSE -dSAFER", # don't pause between pages, safe mode
"-sDEVICE=ppmraw", # ppm driver
"-sOutputFile=%s" % file,# output file
"-"
]
#command = shlex.split(string.join(command))
# push data through ghostscript
try:
#gs = os.popen(command, "w")
args = command#['gs','-dSAFER','-dNOPAUSE','-dBATCH','-sDEVICE=jpeg','-sOutputFile=/home/user/output2.jpg /home/user/downloads/test.pdf']
gs = subprocess.Popen( args, stdout = PIPE, stderr = STDOUT, stdin=PIPE )
# adjust for image origin
if bbox[0] != 0 or bbox[1] != 0:
#gs.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
gs.stdin.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
fp.seek(offset)
while length > 0:
s = fp.read(8192)
if not s:
break
length = length - len(s)
gs.stdin.write(s)
gs.communicate()[0]
status = gs.stdin.close()
#status = gs.close()
#if status:
# raise IOError("gs failed (status %d)" % status)
im = Image.core.open_ppm(file)
finally:
try: os.unlink(file)
except: pass
return im
import PIL.EpsImagePlugin
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
I hope this posts can help someone.

How do I avoid processing an empty stdin with python?

The sys.stdin.readline() waits for an EOF (or new line) before returning, so if I have a console input, readline() waits for user input. Instead I want to print help and exit with an error if there is nothing to process, not wait for user input.
Reason:
I'm looking to write a python program with command line behaviour similar to grep.
Test cases:
No input and nothing piped, print help
$ argparse.py
argparse.py - prints arguments
echo $? # UNIX
echo %ERRORLEVEL% # WINDOWS
2
Command line args parsed
$ argparse.py a b c
0 a
1 b
2 c
Accept piped commands
$ ls | argparse.py
0 argparse.py
1 aFile.txt
parseargs.py listing:
# $Id: parseargs.py
import sys
import argparse
# Tried these too:
# import fileinput - blocks on no input
# import subprocess - requires calling program to be known
def usage():
sys.stderr.write("{} - prints arguments".fomrat(sys.argv[0])
sys.stderr.flush()
sys.exit(2)
def print_me(count, msg):
print '{}: {:>18} {}'.format(count, msg.strip(), map(ord,msg))
if __name__ == '__main__':
USE_BUFFERED_INPUT = False
# Case 1: Command line arguments
if len(sys.argv) > 1:
for i, arg in enumerate(sys.argv[1:]):
print_me( i, arg)
elif USE_BUFFERED_INPUT: # Note: Do not use processing buffered inputs
for i, arg in enumerate(sys.stdin):
print_me( i, arg)
else:
i=0
##### Need to deterime if the sys.stdin is empty.
##### if READLINE_EMPTY:
##### usage()
while True:
arg = sys.stdin.readline() #Blocks if no input
if not arg:
break
print_me( i, arg)
i += 1
sys.exit(0)

grep can work the way it does because it has one non-optional argument: the pattern. For example
$ grep < /dev/zero
Usage: grep [OPTION]... PATTERN [FILE]...
Try `grep --help' for more information.
even though there was infinite input available on stdin, grep didn't get the required argument and therefore complained.
If you want to use only optional arguments and error out if stdin is a terminal, look at file.isatty().

import sys,os
print os.fstat(sys.stdin.fileno()).st_size > 0
Calling script
c:\py_exp>peek_stdin.py < peek_stdin.py
True
c:\py_exp>peek_stdin.py
False

You may want to check getopt module. Basic example:
import getopt
import sys
def main(argv):
try:
opts, args = getopt.getopt(argv, "has:f:") # "has:f:" are the arguments
except getopt.GetoptError:
print "print usage()"
sys.exit(1)
if not opts and not args:
print "print usage()"
sys.exit(1)
print "args passed", opts, args
if __name__ == "__main__":
main(sys.argv[1:])
~> python blabla.py
print usage()
~> python blabla.py -a arg
args passed [('-a', '')] ['arg']
~> python blabla.py -b as ----> this fails because -b is not defined for getopt at second parameter
print usage()
What about this one:
#!/usr/bin/env python
import getopt
import sys
import select
def main(argv):
try:
opts, args = getopt.getopt(argv, "has:f:") # "has:f:" are the arguments
except getopt.GetoptError:
print "print usage()"
sys.exit(1)
if not opts and not args:
a, b, c = select.select([sys.stdin], [], [], 0.2)
if a:
itera = iter(a[0].readline, "")
for line in itera:
data = line.strip()
print data
else:
print "print usage()"
print "args passed", opts, args
if __name__ == "__main__":
main(sys.argv[1:])
select.select helps to check if there is data coming
:~> ./hebele.py
print usage()
args passed [] []
:~> ping www.google.com | ./hebele.py
PING www.google.com (173.194.67.105) 56(84) bytes of data.
64 bytes from blabla (173.194.67.105): icmp_seq=1 ttl=48 time=16.7 ms
64 bytes from blabla (173.194.67.105): icmp_seq=2 ttl=48 time=17.1 ms
64 bytes from blabla (173.194.67.105): icmp_seq=3 ttl=48 time=17.1 ms
^CTraceback (most recent call last):
File "./hebele.py", line 25, in <module>
main(sys.argv[1:])
File "./hebele.py", line 17, in main
for line in itera:
KeyboardInterrupt
:~> ls | ./hebele.py
Aptana_Studio_3
Desktop
...
workspace
args passed [] []
:~> ./hebele.py -a bla
args passed [('-a', '')] ['bla']
:~> ./hebele.py sdfsdf sadf sdf
args passed [] ['sdfsdf', 'sadf', 'sdf']

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python-Subprocess-Popen Deadlock in a multi-threaded environment - python

Related

multiprocessing - execute external command and wait before proceeding

Read a file for ten seconds

chaining line by line writing/reading of pipes in Python with subprocess

convert Ghostscript from os.popen to subsession.popen python

How do I avoid processing an empty stdin with python?

Categories

Resources