The sys.stdin.readline() waits for an EOF (or new line) before returning, so if I have a console input, readline() waits for user input. Instead I want to print help and exit with an error if there is nothing to process, not wait for user input.
Reason:
I'm looking to write a python program with command line behaviour similar to grep.
Test cases:
No input and nothing piped, print help
$ argparse.py
argparse.py - prints arguments
echo $? # UNIX
echo %ERRORLEVEL% # WINDOWS
2
Command line args parsed
$ argparse.py a b c
0 a
1 b
2 c
Accept piped commands
$ ls | argparse.py
0 argparse.py
1 aFile.txt
parseargs.py listing:
# $Id: parseargs.py
import sys
import argparse
# Tried these too:
# import fileinput - blocks on no input
# import subprocess - requires calling program to be known
def usage():
sys.stderr.write("{} - prints arguments".fomrat(sys.argv[0])
sys.stderr.flush()
sys.exit(2)
def print_me(count, msg):
print '{}: {:>18} {}'.format(count, msg.strip(), map(ord,msg))
if __name__ == '__main__':
USE_BUFFERED_INPUT = False
# Case 1: Command line arguments
if len(sys.argv) > 1:
for i, arg in enumerate(sys.argv[1:]):
print_me( i, arg)
elif USE_BUFFERED_INPUT: # Note: Do not use processing buffered inputs
for i, arg in enumerate(sys.stdin):
print_me( i, arg)
else:
i=0
##### Need to deterime if the sys.stdin is empty.
##### if READLINE_EMPTY:
##### usage()
while True:
arg = sys.stdin.readline() #Blocks if no input
if not arg:
break
print_me( i, arg)
i += 1
sys.exit(0)
grep can work the way it does because it has one non-optional argument: the pattern. For example
$ grep < /dev/zero
Usage: grep [OPTION]... PATTERN [FILE]...
Try `grep --help' for more information.
even though there was infinite input available on stdin, grep didn't get the required argument and therefore complained.
If you want to use only optional arguments and error out if stdin is a terminal, look at file.isatty().
import sys,os
print os.fstat(sys.stdin.fileno()).st_size > 0
Calling script
c:\py_exp>peek_stdin.py < peek_stdin.py
True
c:\py_exp>peek_stdin.py
False
You may want to check getopt module. Basic example:
import getopt
import sys
def main(argv):
try:
opts, args = getopt.getopt(argv, "has:f:") # "has:f:" are the arguments
except getopt.GetoptError:
print "print usage()"
sys.exit(1)
if not opts and not args:
print "print usage()"
sys.exit(1)
print "args passed", opts, args
if __name__ == "__main__":
main(sys.argv[1:])
~> python blabla.py
print usage()
~> python blabla.py -a arg
args passed [('-a', '')] ['arg']
~> python blabla.py -b as ----> this fails because -b is not defined for getopt at second parameter
print usage()
What about this one:
#!/usr/bin/env python
import getopt
import sys
import select
def main(argv):
try:
opts, args = getopt.getopt(argv, "has:f:") # "has:f:" are the arguments
except getopt.GetoptError:
print "print usage()"
sys.exit(1)
if not opts and not args:
a, b, c = select.select([sys.stdin], [], [], 0.2)
if a:
itera = iter(a[0].readline, "")
for line in itera:
data = line.strip()
print data
else:
print "print usage()"
print "args passed", opts, args
if __name__ == "__main__":
main(sys.argv[1:])
select.select helps to check if there is data coming
:~> ./hebele.py
print usage()
args passed [] []
:~> ping www.google.com | ./hebele.py
PING www.google.com (173.194.67.105) 56(84) bytes of data.
64 bytes from blabla (173.194.67.105): icmp_seq=1 ttl=48 time=16.7 ms
64 bytes from blabla (173.194.67.105): icmp_seq=2 ttl=48 time=17.1 ms
64 bytes from blabla (173.194.67.105): icmp_seq=3 ttl=48 time=17.1 ms
^CTraceback (most recent call last):
File "./hebele.py", line 25, in <module>
main(sys.argv[1:])
File "./hebele.py", line 17, in main
for line in itera:
KeyboardInterrupt
:~> ls | ./hebele.py
Aptana_Studio_3
Desktop
...
workspace
args passed [] []
:~> ./hebele.py -a bla
args passed [('-a', '')] ['bla']
:~> ./hebele.py sdfsdf sadf sdf
args passed [] ['sdfsdf', 'sadf', 'sdf']
Related
Is there any way I can get the PID by process name in Python?
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
3110 meysam 20 0 971m 286m 63m S 14.0 7.9 14:24.50 chrome
For example I need to get 3110 by chrome.
You can get the pid of processes by name using pidof through subprocess.check_output:
from subprocess import check_output
def get_pid(name):
return check_output(["pidof",name])
In [5]: get_pid("java")
Out[5]: '23366\n'
check_output(["pidof",name]) will run the command as "pidof process_name", If the return code was non-zero it raises a CalledProcessError.
To handle multiple entries and cast to ints:
from subprocess import check_output
def get_pid(name):
return map(int,check_output(["pidof",name]).split())
In [21]: get_pid("chrome")
Out[21]:
[27698, 27678, 27665, 27649, 27540, 27530, 27517, 14884, 14719, 13849, 13708, 7713, 7310, 7291, 7217, 7208, 7204, 7189, 7180, 7175, 7166, 7151, 7138, 7127, 7117, 7114, 7107, 7095, 7091, 7087, 7083, 7073, 7065, 7056, 7048, 7028, 7011, 6997]
Or pas the -s flag to get a single pid:
def get_pid(name):
return int(check_output(["pidof","-s",name]))
In [25]: get_pid("chrome")
Out[25]: 27698
You can use psutil package:
Install
pip install psutil
Usage:
import psutil
process_name = "chrome"
pid = None
for proc in psutil.process_iter():
if process_name in proc.name():
pid = proc.pid
break
print("Pid:", pid)
you can also use pgrep, in prgep you can also give pattern for match
import subprocess
child = subprocess.Popen(['pgrep','program_name'], stdout=subprocess.PIPE, shell=True)
result = child.communicate()[0]
you can also use awk with ps like this
ps aux | awk '/name/{print $2}'
For posix (Linux, BSD, etc... only need /proc directory to be mounted) it's easier to work with os files in /proc.
It's pure python, no need to call shell programs outside.
Works on python 2 and 3 ( The only difference (2to3) is the Exception tree, therefore the "except Exception", which I dislike but kept to maintain compatibility. Also could've created a custom exception.)
#!/usr/bin/env python
import os
import sys
for dirname in os.listdir('/proc'):
if dirname == 'curproc':
continue
try:
with open('/proc/{}/cmdline'.format(dirname), mode='rb') as fd:
content = fd.read().decode().split('\x00')
except Exception:
continue
for i in sys.argv[1:]:
if i in content[0]:
print('{0:<12} : {1}'.format(dirname, ' '.join(content)))
Sample Output (it works like pgrep):
phoemur ~/python $ ./pgrep.py bash
1487 : -bash
1779 : /bin/bash
Complete example based on the excellent #Hackaholic's answer:
def get_process_id(name):
"""Return process ids found by (partial) name or regex.
>>> get_process_id('kthreadd')
[2]
>>> get_process_id('watchdog')
[10, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61] # ymmv
>>> get_process_id('non-existent process')
[]
"""
child = subprocess.Popen(['pgrep', '-f', name], stdout=subprocess.PIPE, shell=False)
response = child.communicate()[0]
return [int(pid) for pid in response.split()]
To improve the Padraic's answer: when check_output returns a non-zero code, it raises a CalledProcessError. This happens when the process does not exists or is not running.
What I would do to catch this exception is:
#!/usr/bin/python
from subprocess import check_output, CalledProcessError
def getPIDs(process):
try:
pidlist = map(int, check_output(["pidof", process]).split())
except CalledProcessError:
pidlist = []
print 'list of PIDs = ' + ', '.join(str(e) for e in pidlist)
if __name__ == '__main__':
getPIDs("chrome")
The output:
$ python pidproc.py
list of PIDS = 31840, 31841, 41942
if you're using windows,
you can get PID of process/app with it's image name with this code:
from subprocess import Popen, PIPE
def get_pid_of_app(app_image_name):
final_list = []
command = Popen(['tasklist', '/FI', f'IMAGENAME eq {app_image_name}', '/fo', 'CSV'], stdout=PIPE, shell=False)
msg = command.communicate()
output = str(msg[0])
if 'INFO' not in output:
output_list = output.split(app_image_name)
for i in range(1, len(output_list)):
j = int(output_list[i].replace("\"", '')[1:].split(',')[0])
if j not in final_list:
final_list.append(j)
return final_list
it will return you all PID of a app like firefox or chrome e.g.
>>> get_pid_of_app("firefox.exe")
[10908, 4324, 1272, 6936, 1412, 2824, 6388, 1884]
let me know if it helped
If your OS is Unix base use this code:
import os
def check_process(name):
output = []
cmd = "ps -aef | grep -i '%s' | grep -v 'grep' | awk '{ print $2 }' > /tmp/out"
os.system(cmd % name)
with open('/tmp/out', 'r') as f:
line = f.readline()
while line:
output.append(line.strip())
line = f.readline()
if line.strip():
output.append(line.strip())
return output
Then call it and pass it a process name to get all PIDs.
>>> check_process('firefox')
['499', '621', '623', '630', '11733']
Since Python 3.5, subprocess.run() is recommended over subprocess.check_output():
>>> int(subprocess.run(["pidof", "-s", "your_process"], stdout=subprocess.PIPE).stdout)
Also, since Python 3.7, you can use the capture_output=true parameter to capture stdout and stderr:
>>> int(subprocess.run(["pidof", "-s", "your process"], capture_output=True).stdout)
On Unix, you can use pyproc2 package.
Installation
pip install pyproc2
Usage
import pyproc2
chrome_pid=pyproc2.find("chrome").pid #Returns PID of first process with name "chrome"
I have an python program which parse the stream of data as shown below
tail -F /path1/restapi.log -F /path2/restapi.log | parse.py
parse.py is parsing data from sys.stdin.readline
import re
import sys
import json
def deep_get(dictionary, keys, default=None):
return reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary)
regexp_date_status = re.compile(r'(\d+-\d+-\d+ \d+:\d+:\d+.\d+\+\d+) (\w+)')
while True:
line = sys.stdin.readline()
if not line:
break
if re.search(r'Request #\d+: {', line):
date_status = regexp_date_status.match(line)
json_str = '{\n'
while True:
json_str += sys.stdin.readline()
try:
d = json.loads(json_str) # we have our dictionary, perhaps
except Exception:
pass
else:
username = (deep_get(d,"context.authorization.authUserName", default="Username not found"))
hostname = (deep_get(d,"context.headers.X-Forwarded-For"))
uri = (deep_get(d,"context.uri"))
verb = (deep_get(d,"context.verb"))
print("State->{} : Date->{} : User->{} : Host->{} : URI->{} : Verb->{}".format(date_status.group(2), date_status.group(1), username,hostname,uri,verb))
break
I would like to do multithreading as number of files can increase upto 30
tail -F /path1/restapi.log -F /path2/restapi.log /path3/restapi.log -F /path4/restapi.log .... | parse.py
How do I divide the work among the threads in this case as data is streamed and parsed until I get the valid dictionary in try block ? also do I need to leverage Queues here ?
Let bash deal with the multiple instances of parse.py.
Something similar to:
echo -e 'file1.log\nfile2.log\nfile3.log'| xargs -n 1 --max-procs 10 -I % sh -c 'tail -f % |parse.py'
xargs will handle things in multiple instances.
Note the '\n' in the files list.
Example to play round with:
echo -e "hello\nto\nyou" |xargs -n 1 --max-procs 2 -I % sh -c 'sleep 3; echo %'
This will use two threads to do the sleep and echo.
The result will be 'hello' and 'to' with a delay before 'you' is seen.
I'm trying to use the library getopt in order to catch a list of path given from command line something like this :
python script.py -l ["a","b","c","d","e"] -p 80 ....
what I wrote is this:
def getValue(self):
'''
get value from command line and initialize
variable !
'''
try:
opts,args = getopt.getopt( self.args ,
"hl:us:r:p:" ,
['help','local_path','update',"remote_host","remote_path","parameter"])
except getopt.GetoptError as err:
print(str(err))
self.usage()
## ----------------- SETTING VARIABLE PASSED BY COMMAND LINE ------------------ ##
for opt,arg in opts:
#----------------------------------------------------------
if opt in ("-l","--local_path"):
self.local_path = arg
if DEBUG:
print('local path: ',self.local_path)
#-----------------------------------------------------------
elif opt in ("-h", "--help"):
self.usage()
#-----------------------------------------------------------
elif opt in ("-s", "--remote_host"):
self.remote_host = arg
if DEBUG:
print('Simulation host: ', self.remote_host)
#-----------------------------------------------------------
elif opt in ("-r", "--remote_path"):
self.remote_path = arg
if DEBUG:
print('Simulation path: ', self.remote_path)
#-----------------------------------------------------------
elif opt in ("-p", "--parameter"):
self.parameter = arg
if DEBUG:
print('Simulation parameter: ',self.parameter)
#-----------------------------------------------------------
elif opt in ("-u","--update"):
#if self.remote_host and self.remote_path:
self.update()
#-----------------------------------------------------------
else:
assert False, "Unhandled Option"
#-----------------------------------------------------------
but unfortunately this take just single value for each opts (-l, -p ....)
how can reach my aim ?
thanks in advance !!
I've simplified your script a bit to specifically address your question about passing a list of arguments from the command line.
One option you have is to specify the same flag multiple times to send several arguments into your program. For example:
import getopt
import sys
opts, args = getopt.getopt(
sys.argv[1:],
'hl:p:',
['help', 'local_path', 'parameter'],
)
local_paths = []
for opt, arg in opts:
if opt in ('-l', '--local_path'):
local_paths.append(arg)
print(opt + ': ' + arg)
if opt in ('-p', '--parameter'):
print(opt + ': ' + arg)
print('local_paths: ' + str(local_paths))
Used as follows:
$ python script.py -la -lb -p 80
-l: a
-l: b
-p: 80
local_paths: ['a', 'b']
Another option (if you must pass the list itself via the command line to a single instance of the flag) is to use some sort of serialization. JSON is up to the task but you could also use csv or others. Example:
import getopt
import json
import sys
opts, args = getopt.getopt(
sys.argv[1:],
'hl:p:',
['help', 'local_path', 'parameter'],
)
for opt, arg in opts:
if opt in ('-l', '--local_path'):
list_arg = json.loads(arg)
print(opt + ': ' + str(list_arg))
if opt in ('-p', '--parameter'):
print(opt + ': ' + arg)
$ python script.py -l '["a","b"]' -p 80
-l: ['a', 'b']
-p: 80
Note the quotes (') around the JSON after the -l flag ('["a","b"]'). This notation "protects" the argument from being evaluated by bash.
If you pass the argument as you have done in your example, python still receives a single argument but it does not quite work as I think you intend:
import getopt
import sys
opts, args = getopt.getopt(
sys.argv[1:],
'hl:',
['help', 'local_path'],
)
for opt, arg in opts:
if opt in ('-l', '--local_path'):
print(opt + ': ' + arg)
print('type: ' + str(type(opt)))
$ python script.py -l ["a","b"]
-l: [a,b]
type: <class 'str'>
The argument for the -l flag is literally the string "[a,b]" in python. This happens because bash evaluated the expression ["a","b"] before running the script, so now deserialization in python is a bit trickier. It is probably worth avoiding this way and sticking with a standard serialization pattern.
To clarify the notation above from the docs:
shortopts is the string of option letters that the script wants to recognize, with options that require an argument followed by a colon (':')
So hl: means we accept -h and -l, but -l must have an argument, otherwise we will get something like:
getopt.GetoptError: option -l requires argument
I am using Linux. I have an external executable called "combine" and a loop of 20 iterations.
Per each iteration, "combine" needs to be called with an argument that depends on the i-th iteration. Example:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
How do I achieve this using the multiprocessing module in Python?
You can use subprocess.Popen to launch the external commands asynchronously, and store each Popen object returned in a list. Once you've launched all the processes, just iterate over them and wait for each to finish using popen_object.wait.
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
However, this will launch twenty concurrent processes, which is probably going to hurt performance.
To avoid that, you can use a ThreadPool to limit yourself to some lower number of concurrent processes (multiprocessing.cpu_count is a good number), and then use pool.join to wait for them all to finish.
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
Each thread will release the GIL while waiting for the subprocess to complete, so they'll all run in parallel.
My solution to this problem is to create and manage a list of subprocesses. Pay special attention to startencoder and manageprocs. That is where the actual work is being started and managed.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)
I have following piece of code running inside thread..where 'expand' C executable produces unique string output for each input 'url':
p = Popen(["expand", url], bufsize=65536, stdout=PIPE, stderr=PIPE, close_fds=True)
output,error = p.communicate()
print output
I have implemented a Queue based multithreading solution which processes 5000 urls in a batch of 100 each..
When I run the script; it hangs.. and ps -aef shows that 2 processes are still running:
1. 10177 5721 6662 6 09:25 pts/15 00:04:36 python expandPlaylist.py -s -t
2. 10177 11004 5721 0 09:26 pts/15 00:00:00 expand http://www.sample.com
Stack trace for main python script:
# ThreadID: 140332211570432
File: "expandPlaylist.py", line 902, in <module>
Main()
File: "expandPlaylist.py", line 894, in Main
startmain(db, c, conf)
File: "expandPlaylist.py", line 834, in startmain
stream_queue.join()
File: "/usr/lib64/python2.7/Queue.py", line 82, in join
self.all_tasks_done.wait()
File: "/usr/lib64/python2.7/threading.py", line 238, in wait
waiter.acquire()
Stack trace for Thread which got deadlocked
# ThreadID: 140332016596736
File: "/usr/lib64/python2.7/threading.py", line 503, in __bootstrap
self.__bootstrap_inner()
File: "/usr/lib64/python2.7/threading.py", line 530, in __bootstrap_inner
self.run()
File: "expandPlaylist.py", line 120, in run
self.process.wait()
File: "/usr/lib64/python2.7/subprocess.py", line 1242, in wait
pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0)
File: "/usr/lib64/python2.7/subprocess.py", line 471, in _eintr_retry_call
return func(*args)
GDB details for process_id: 11004
(gdb) bt
#0 __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1 0x00007fc36bd33294 in _L_lock_999 () from /lib64/libpthread.so.0
#2 0x00007fc36bd330aa in __pthread_mutex_lock (mutex=0x6a8c20) at pthread_mutex_lock.c:61
#3 0x00007fc36c204dcd in g_mutex_lock (mutex=0x6a8c50) at gthread-posix.c:213
#4 0x00007fc36c1b11df in g_source_unref_internal (source=0x844f90, context=0x6a8c50, have_lock=0) at gmain.c:1975
#5 0x00007fc36c1b13e3 in g_source_unref (source=0x844f90) at gmain.c:2044
#6 0x00007fc36cb475a9 in soup_session_dispose (object=0x61e100) at soup-session.c:305
#7 0x00007fc36c4d270e in g_object_unref (_object=0x61e100) at gobject.c:3160
#8 0x000000000040584b in dispose_session (parser=0x618020) at al_playlist_parser.c:859
#9 0x0000000000403b0b in al_playlist_parser_dispose (obj=0x618020) at al_playlist_parser.c:129
#10 0x00007fc36c4d270e in g_object_unref (_object=0x618020) at gobject.c:3160
#11 0x0000000000403315 in main (argc=1, argv=0x7fff462cdca8) at al_expand.c:143
How can I avoid the deadlock?
Otherwise is there any way to bind timeout with self.process.wait() and terminate that thread if the subprocess is taking too long to process?
If you only have to call a subprocess on a list of arguments, I tend to do something like this:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith <rsmith#xs4all.nl>
# $Date: 2013-11-24 11:06:39 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mp4.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to H.264/AAC streams in
an MP4 container."""
from __future__ import print_function, division # for compatibility with Python 2.
__version__ = '$Revision: cac4808 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to H.264/AAC
streams in an MP4 container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mp4'
args = ['ffmpeg', '-i', fname, '-c:v', 'libx264', '-crf', '29', '-flags',
'+aic+mv4', '-c:a', 'libfaac', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)
If hanging processes are an issue, you could adapt to manageprocs to kill a subprocess after a certain amount of time.