Extracting frame fails with: Invalid data found when processing input

Extracting frame fails with: Invalid data found when processing input - python

I have the following method to create a dummy video file:
def create_dummy_mp4_video() -> None:
cmd = (
f"ffmpeg -y " # rewrite if exists
f"-f lavfi -i color=size=100x100:rate=10:color=black " # blank video
f"-f lavfi -i anullsrc=channel_layout=stereo:sample_rate=44100 " # silent audio
f"-t 1 " # video duration, seconds
"output.mp4" # file name
)
proc = subprocess.run(
shlex.split(cmd),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=False,
)
if proc.returncode != 0:
raise Exception()
#dataclass(frozen=True)
class FakeVideo:
body: bytes
width: int
height: int
fps: int
size: int
frames: int
length_s: int
def video() -> FakeVideo:
w, h, fps, sec, filename = 100, 100, 10, 1, "output.mp4"
create_dummy_mp4_video()
video_path = os.path.join(os.getcwd(), filename)
with open(video_path, "rb") as file:
body = file.read()
size = len(body)
frames = fps // sec
return FakeVideo(
body=body, width=w, height=h, fps=fps,
size=size, frames=frames, length_s=sec,
)
then I want to extract a frame at specific time, I did it like this:
async def run_shell_command(frame_millisecond, data: bytes) -> bytes:
async with aiofiles.tempfile.NamedTemporaryFile("wb") as file:
await file.write(data)
proc = await asyncio.create_subprocess_exec(
"ffmpeg",
"-i",
file.name,
"-ss",
f"{frame_millisecond}ms", # seek the position to the specific millisecond
"-vframes", "1", # only handle one video frame
"-c:v", "png", # select the output encoder
"-f", "image2pipe", "-", # force output file to stdout,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
level = logging.DEBUG if proc.returncode == 0 else logging.WARN
LOGGER.log(level, f"[cmd exited with {proc.returncode}]")
if stderr:
print(level, f"[stderr]{stderr.decode()}")
LOGGER.log(level, f"[stderr]{stderr.decode()}")
return stdout
async def runner():
v = video()
time = int(v.length_s / 2 * 1000)
res = await run_shell_command(time, v.body)
assert isinstance(res, bytes)
assert imghdr.what(h=res, file=None) == "png"
loop = asyncio.get_event_loop()
loop.run_until_complete(runner())
This code fails whit the following error:
/tmp/tmpzo786lfg: Invalid data found when processing input
Please help to find the problem with my code.
During investigation I found that it works if I change the size of the video like that:
f"-f lavfi -i color=size=1280x720:rate=25:color=black " # blank video
but I want to be able to process any video.
I use ffmpg 4.3.3-0+deb11u1

It looks like you have to make sure the data is written to the temporary file, before executing FFmpeg.
I don't have any experience with asyncio and aiofiles and I am running Windows 10, so I am not sure about the Linux behavior...
I tried to add await file.flush() after file.write(data), but the FFmpeg execution result was "Permission denied".
I solved it using the solution from the following post:
Add delete=False argument to tempfile.NamedTemporaryFile:
async with aiofiles.tempfile.NamedTemporaryFile("wb", delete=False) as file:
Add await file.close() after await file.write(data).
Closing the file is used for making sure that all the data is written to the file, before executing FFmpeg.
Add os.unlink(file.name) before return stdout.
Complete code:
import subprocess
import asyncio
from dataclasses import dataclass
import shlex
import aiofiles
import os
import logging
import imghdr
def create_dummy_mp4_video() -> None:
cmd = (
f"ffmpeg -y " # rewrite if exists
f"-f lavfi -i color=size=100x100:rate=10:color=black " # blank video
f"-f lavfi -i anullsrc=channel_layout=stereo:sample_rate=44100 " # silent audio
f"-t 1 " # video duration, seconds
"output.mp4" # file name
)
proc = subprocess.run(
shlex.split(cmd),
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, #stderr=subprocess.PIPE,
shell=False,
)
if proc.returncode != 0:
raise Exception()
#dataclass(frozen=True)
class FakeVideo:
body: bytes
width: int
height: int
fps: int
size: int
frames: int
length_s: int
def video() -> FakeVideo:
w, h, fps, sec, filename = 100, 100, 10, 1, "output.mp4"
create_dummy_mp4_video()
video_path = os.path.join(os.getcwd(), filename)
with open(video_path, "rb") as file:
body = file.read()
size = len(body)
frames = fps // sec
return FakeVideo(
body=body, width=w, height=h, fps=fps,
size=size, frames=frames, length_s=sec,
)
async def run_shell_command(frame_millisecond, data: bytes) -> bytes:
# https://stackoverflow.com/questions/23212435/permission-denied-to-write-to-my-temporary-file/23212515
async with aiofiles.tempfile.NamedTemporaryFile("wb", delete=False) as file:
await file.write(data)
#await file.flush() # Flush data to file before executing FFmpeg ?
await file.close() # Close the file before executing FFmpeg.
proc = await asyncio.create_subprocess_exec(
"ffmpeg",
"-i",
file.name,
"-ss",
f"{frame_millisecond}ms", # seek the position to the specific millisecond
"-vframes", "1", # only handle one video frame
"-c:v", "png", # select the output encoder
"-f", "image2pipe", "-", # force output file to stdout,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await proc.communicate()
level = logging.DEBUG if proc.returncode == 0 else logging.WARN
#LOGGER.log(level, f"[cmd exited with {proc.returncode}]")
if stderr:
print(level, f"[stderr]{stderr.decode()}")
#LOGGER.log(level, f"[stderr]{stderr.decode()}")
os.unlink(file.name) # Unlink is required because delete=False was used
return stdout
async def runner():
v = video()
time = int(v.length_s / 2 * 1000)
res = await run_shell_command(time, v.body)
assert isinstance(res, bytes)
assert imghdr.what(h=res, file=None) == "png"
loop = asyncio.get_event_loop()
loop.run_until_complete(runner())
Notes:
I removed the LOGGER because I couldn't find the LOGGER module.
Next time, please add all the imports to your posted code (it's not so trivial finding them).

Related

How to wildly and/or alternatingly read from stdout and/or stderr and write to stdin of a process in python?

I searched here a lot of different questions and answers, but I did not found a general approach for:
reading from stdout and stderr, what is available - until the last byte (currently!) available (even if it is not a \n)
depending upon the read information write something to stdin
the command line tool will react to this stdin input and write (much later) something/nothing
start from beginning - or leave loop if process has finished, capturing its return code
Most examples here findable write only ONCE to stdin and read only ONCE (before/afterwards) from stdout and/or stderr.
My intention is to "weave" reading from stdout and/or stderr and writing to stdin!
Here an example:
starting a command line tool (finally with parameters) - e. g. python3.exe
reading always from the stdout and stderr
e. g. read everything and after reading >>> from stdout
write print('Hello World.')\n
e. g. read everything (Hello World.\n)and after reading >>> from stdout
write x = [6, 0]\n
e. g. read everything and after reading >>> from stdout
write y = x[0] / x[1]\n
e. g. read everything ( ... ZeroDivisionError: division by zero on stdout/stderr)
...
I tried to solve it with this found internet example (after other failed attempts):
# Example #27
# of https://www.programcreek.com/python/example/85342/asyncio.create_subprocess_shell
# Source Project: Python-Journey-from-Novice-to-Expert Author: PacktPublishing File: 07_processes.py License: MIT License 5 votes vote downvote up
import asyncio
import sys
async def read_from_pipe(pipe, buf, timeout_sec):
while True:
try:
pipe_byte = await asyncio.wait_for(pipe.read(1), timeout_sec)
except asyncio.TimeoutError:
break
else:
if len(pipe_byte) == 1:
buf.append(pipe_byte[0])
else:
pipe_byte == b'\n' # in case of end of file: fake end of line
if pipe_byte == b'\n':
return len(buf)
async def run_script(version):
process = await asyncio.create_subprocess_shell(
r'C:\Programs\Python\Python38-32\python.exe',
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=asyncio.subprocess.PIPE,
)
if version == 0:
# Write a simple Python script to the interpreter
process.stdin.write(b'\n'.join((
b'import math',
b'x = 2 ** 8',
b'y = math.sqrt(x)',
b'z = math.sqrt(y)',
b'print("x: %d" % x)',
b'print("y: %d" % y)',
b'print("z: %d" % z)',
b'for i in range(int(z)):',
b' print("i: %d" % i)',
)))
# Make sure the stdin is flushed asynchronously
await process.stdin.drain()
# And send the end of file so the Python interpreter will
# start processing the input. Without this the process will
# stall forever.
process.stdin.write_eof()
# Fetch the lines from the stdout asynchronously
async for out in process.stdout:
# Decode the output from bytes and strip the whitespace
# (newline) at the right
print(out.decode('utf-8').rstrip())
# Wait for the process to exit
await process.wait()
elif version == 1:
cmds = [b'import math',
b'x = 2 ** 8',
b'y = math.sqrt(x)',
b'z = math.sqrt(y)',
# b'q = z / 0',
b'print("x: %d" % x)',
b'print("y: %d" % y)',
b'print("z: %d" % z)',
b'for i in range(int(z)):',
b' print("i: %d" % i)',
b'exit(0)',
]
idx = 0
while True:
stdout_buf = bytearray(b'')
out_read = await read_from_pipe(process.stdout, stdout_buf, 0.5)
print(f'stdout[{out_read}]: {stdout_buf.decode("ascii")}\n') if out_read else None
stderr_buf = bytearray(b'')
err_read = await read_from_pipe(process.stderr, stderr_buf, 0.5)
print(f'stderr[{err_read}]: {stdout_buf.decode("ascii")}\n') if err_read else None
if idx < len(cmds):
current_cmd = cmds[idx].decode('ascii')
print(f'writing command at index {idx}: "{current_cmd}"')
process.stdin.write(cmds[idx])
process.stdin.write(b'\n')
await process.stdin.drain()
process.stdin.write_eof() # tried with/without this line, afterwards program hangs
idx += 1
else:
break
await process.wait()
if sys.platform == "win32":
codepage = 'cp437'
loop = asyncio.ProactorEventLoop() # For subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
codepage = 'utf-8'
loop = asyncio.get_event_loop()
version = 1 # version = 0 runs but is not alternatingly reading stdout/stderr and writing to stdin!
returncode = loop.run_until_complete(run_script(1))
print(f'done with return code = {returncode}.')
Currently it doesn't read anything from the stdout and stderr.
And after the entries in cmds are written, program hangs too.
Finally it should run under linux.
How do I write the program correctly?
Is python3.exe a "too special" command line tool and is the root cause of these problems?
Hint:
This example and the solution do not have to be performant at all. The intended command line tool to control is quite slow (overall execution 20 s to 20 min). Multithreading and multiprocessing is not really required, if not needed for a (simplified) working solution.

I found out that python3.exe is a bit too special to control. I better use e. g. cmd /S on windows (I read /bin/bash for Linux) - this works now:
# Example #27
# of https://www.programcreek.com/python/example/85342/asyncio.create_subprocess_shell
# Source Project: Python-Journey-from-Novice-to-Expert Author: PacktPublishing File: 07_processes.py License: MIT License 5 votes vote downvote up
import asyncio
import sys
async def read_from_pipe(pipe, buf, timeout_sec):
while True:
try:
pipe_byte = await asyncio.wait_for(pipe.read(1), timeout_sec)
except asyncio.TimeoutError:
return len(buf) # no more bytes available currently on that pipe
else:
if len(pipe_byte) == 1:
buf.append(pipe_byte[0])
else:
return len(buf) # end of pipe reached
async def run_script():
process = await asyncio.create_subprocess_shell(
'cmd /S',
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=asyncio.subprocess.PIPE,
)
cmds = [b'dir P*C*S*.*',
b'echo %temp%',
b'exit']
idx = 0
while True:
stdout_buf = bytearray(b'')
out_read = await read_from_pipe(process.stdout, stdout_buf, 0.5)
print(f'stdout[{out_read}]: {stdout_buf.decode("ascii")}\n') if out_read else None
stderr_buf = bytearray(b'')
err_read = await read_from_pipe(process.stderr, stderr_buf, 0.5)
print(f'stderr[{err_read}]: {stdout_buf.decode("ascii")}\n') if err_read else None
if idx < len(cmds):
current_cmd = cmds[idx].decode('ascii')
print(f'writing command at index {idx}: "{current_cmd}"')
process.stdin.write(cmds[idx])
process.stdin.write(b'\n')
await process.stdin.drain()
idx += 1
else:
pass
if process.returncode is not None:
print(f'return code = {process.returncode}')
return process.returncode
if sys.platform == "win32":
codepage = 'cp437'
loop = asyncio.ProactorEventLoop() # For subprocess' pipes on Windows
asyncio.set_event_loop(loop)
else:
codepage = 'utf-8'
loop = asyncio.get_event_loop()
returncode = loop.run_until_complete(run_script())
print(f'done with return code = {returncode}.')
The output is on my computer:
PS C:\Git\ownPythonRepository\Python\CliTap> c:; cd 'c:\Git\ownPythonRepository\Python\CliTap'; & 'C:\Programs\Python\Python38-32\python.exe' 'c:\Users\BitLauncher\.vscode\extensions\ms-python.python-2022.14.0\pythonFiles\lib\python\debugpy\adapter/../..\debugpy\launcher' '63136' '--' 'c:\Git\ownPythonRepository\Python\CliTap\PythonConsoleSandbox.py'
stdout[137]: Microsoft Windows [Version 10.0.11111.2222]
(c) Microsoft Corporation. All rights reserved.
C:\Git\ownPythonRepository\Python\CliTap>
stdout[340]: dir P*C*S*.*
Volume in drive C is What
Volume Serial Number is 9999-9999
Directory of C:\Git\ownPythonRepository\Python\CliTap
2022-09-26 23:52 2,365 PythonConsoleSandbox.py
1 File(s) 2,365 bytes
0 Dir(s) 99,999,999,999 bytes free
C:\Git\ownPythonRepository\Python\CliTap>
writing command at index 1: "echo %temp%"
stdout[93]: echo %temp%
C:\Users\BitLau~1\AppData\Local\Temp
C:\Git\ownPythonRepository\Python\CliTap>
writing command at index 2: "exit"
stdout[5]: exit
return code = 1
done with return code = 1.
PS C:\Git\ownPythonRepository\Python\CliTap>
That's it - now I will be able to write depending upon stdout and/or stderr specific commands to stdin... great. Later I can improve it by multithreading :-) if needed.

How to send data from a Python asyncio socket server to a subprocess?

Python 3.6
This program:
launches ffmpeg as a subprocess
waits for a socket connection
receives PNG images on the socket
sends the PNG images to ffmpeg
stdin
The problem is step 4. I can't work out how to send the received PNG image from the coroutine to the stdin of the ffmpeg subprocess. Can anyone please point me in the right direction to send the PNG image to the stdin of the ffmpeg subprocess?
EDIT: to clarify - there's nothing wrong with this code, it receives the PNGs fine over the socket. I just don't have any idea how to send the PNGs on into the stdin of ffmpeg. I've done quite alot of Python but asyncio is new to me and how things tie together is a mystery.
thanks!
import asyncio
import argparse, sys
import sys
import base64
from struct import unpack
parser = argparse.ArgumentParser()
parser.add_argument('--port', help='ffmpeg listen port')
parser.add_argument('--outputfilename', help='ffmpeg output filename')
args = parser.parse_args()
if not args.port:
print("port is required")
sys.exit(1)
if not args.outputfilename:
print("outputfilename is required")
sys.exit(1)
async def _read_stream(stream, cb):
while True:
line = await stream.readline()
if line:
cb(line)
else:
break
async def _stream_subprocess(cmd, stdout_cb, stderr_cb):
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=asyncio.subprocess.PIPE,
)
await asyncio.wait([
_read_stream(process.stdout, stdout_cb),
_read_stream(process.stderr, stderr_cb)
])
return await process.wait()
def process_stderr(line):
# ffmpeg finishes processing and writes the output file when its input is closed
# thus the completion message will come out of stderr only when the socket or stdin or whatever is closed
line = line.decode()
print(line)
if "Output" in line:
if args.outputfilename in line:
print('finished!!!!')
sys.exit(0)
def process_stdout(line):
print("STDOUT: %s" % line)
def spawn_ffmpeg(listenport, outputfilename, framerate=30, format='webm'):
outputdirectory = "sftp://username:password#10.0.0.196/var/www/static/"
input_type = "pipe:0" #stdin
params = \
f"ffmpeg " \
f"-loglevel 56 " \
f"-y -framerate {framerate} " \
f"-f image2pipe " \
f"-i {input_type} " \
f"-c:v libvpx-vp9 " \
f"-b:v 1024k " \
f"-q:v 0 " \
f"-pix_fmt yuva420p " \
f"{outputdirectory}{outputfilename} "
return params
async def socket_png_receiver(reader, writer):
while True:
# first the client sends the length of the data to us
lengthbuf = await reader.read(4)
length, = unpack('!I', lengthbuf)
if length == 0:
print("length was 0, finish") # a zero length PNG says that there are no more frames
break
# then we read the PNG
data = await reader.read(length)
data = data.decode() # from bytes to string
png_bytes = base64.b64decode(data) # from base64 to bytes
# next line was just a guess, so I have commented it out.
#await proc.communicate(png_bytes)
print("Got PNG, length", length)
return
loop = asyncio.get_event_loop()
command = spawn_ffmpeg("24897", args.outputfilename)
ffmpeg_process = _stream_subprocess(
command.split(),
process_stdout,
process_stderr,
)
#coro = asyncio.start_server(socket_png_receiver, '0.0.0.0', args.port, ffmpeg_process, loop=loop)
coro = asyncio.start_server(socket_png_receiver, '0.0.0.0', args.port, loop=loop)
several_futures = asyncio.gather(ffmpeg_process, coro)
server = loop.run_until_complete(several_futures)
server.close()
loop.close()
Here are the changes suggested by #user4815162342
import asyncio
import argparse, sys
import sys
import base64
from struct import unpack
parser = argparse.ArgumentParser()
parser.add_argument('--port', help='ffmpeg listen port')
parser.add_argument('--outputfilename', help='ffmpeg output filename')
args = parser.parse_args()
if not args.port:
print("port is required")
sys.exit(1)
if not args.outputfilename:
print("outputfilename is required")
sys.exit(1)
if not args.outputfilename.endswith('.webm'):
print("outputfilename must end with '.webm'")
sys.exit(1)
async def _read_stream(stream, cb):
while True:
line = await stream.readline()
if line:
cb(line)
else:
break
async def _stream_subprocess(cmd, stdout_cb, stderr_cb):
global process
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
stdin=asyncio.subprocess.PIPE,
)
await asyncio.wait([
_read_stream(process.stdout, stdout_cb),
_read_stream(process.stderr, stderr_cb)
])
return await process.wait()
def process_stderr(line):
# ffmpeg finishes processing and writes the output file when its input is closed
# thus the completion message will come out of stderr only when the socket or stdin or whatever is closed
line = line.decode()
print(line)
if "Output" in line:
if args.outputfilename in line:
print('finished!!!!')
sys.exit(0)
def process_stdout(line):
print("STDOUT: %s" % line)
def spawn_ffmpeg(listenport, outputfilename, framerate=30, format='webm'):
outputdirectory = "sftp://username:password#10.0.0.196/var/www/static/"
input_type = "pipe:0" # stdin
params = \
f"ffmpeg " \
f"-loglevel 56 " \
f"-y " \
f"-framerate {framerate} " \
f"-i {input_type} " \
f"{outputdirectory}{outputfilename} "
print(params)
return params
async def socket_png_receiver(reader, writer):
while True:
# first the client sends the length of the data to us
lengthbuf = await reader.readexactly(4)
length, = unpack('!I', lengthbuf)
if length == 0:
print("length was 0, finish") # a zero length PNG says that there are no more frames
break
# then we read the PNG
print("Got PNG, length", length)
data = await reader.readexactly(length)
print(data)
png_bytes = base64.b64decode(data) # from base64 to bytes
process.stdin.write(png_bytes)
return
loop = asyncio.get_event_loop()
command = spawn_ffmpeg("24897", args.outputfilename)
ffmpeg_process = _stream_subprocess(
command.split(),
process_stdout,
process_stderr,
)
coro = asyncio.start_server(socket_png_receiver, '0.0.0.0', args.port, loop=loop)
several_futures = asyncio.gather(ffmpeg_process, coro)
server = loop.run_until_complete(several_futures)
server.close()
loop.close()

There are several issues with the code:
await reader.read(length) should be await reader.readexactly(length) because the argument to StreamReader.read is the maximum number of bytes to read, and it can return fewer.
proc.communicate(png_bytes) should be changed to proc.stdin.write(png_bytes). The call to communicate() is incorrect here because you want to continue talking to the program, while communicate() waits for all the streams to close.
The instance of process returned by asyncio.create_subprocess_exec(...) must be made available to socket_png_receiver, e.g. by making the process variable global using global process. (It would be better to use a class and assign to self.process, but that is beyond the scope of this answer.)
Some potential issues:
There is no need to decode data from bytes to string, base64.b64decode can accept bytes just fine.
spawn_ffmpeg() doesn't appear to use its listenport parameter.

ValueError: Separator is not found, and chunk exceed the limit

I'm running an external downloader script through asyncio.subprocess and whenever I try to download large data asyncio gives the following error:
asyncio.streams.LimitOverrunError: Separator is not found, and chunk
exceed the limit
What cause this, and how do I solve it?
import asyncio, subprocess, websockets, json
from os.path import expanduser, sep
async def handler(websocket, path):
print("New client connected.")
await websocket.send('CONNECTED')
path = expanduser("~") + sep
try:
while True:
inbound = await websocket.recv()
if inbound is None:
break
while inbound != None:
cmd = ('downloader_script', '-v', '-p', '-o', '/home/blah/blah', inbound)
process = await asyncio.create_subprocess_exec(*cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
async for output in process.stdout:
for line in output.decode().split('\r'):
line = line.strip()
if line == '':
continue
data = {}
await asyncio.sleep(1)
if line.startswith('INFO:'):
data['INFO'] = line.split('INFO: ')[1]
elif line.startswith('['):
data['progress'] = line.split(']')[0][1:]
elif line.startswith('ERROR:'):
data['ERROR'] = line.split('ERROR: ')[1]
else:
data['message'] = line
print (data)
await websocket.send(json.dumps(data))
await websocket.send(json.dumps({'progress': 'DONE'}))
await websocket.send('bye!')
break
except websockets.exceptions.ConnectionClosed:
print("Client disconnected.")
if __name__ == "__main__":
server = websockets.serve(handler, '0.0.0.0', 8080)
loop = asyncio.get_event_loop()
loop.run_until_complete(server)
loop.run_forever()

Since asyncio.subprocess currently works best with newlines, maybe try running your script through something that replaces \r with \n.
Here is a small bash script that can help (cr2lf.sh):
#!/bin/bash
$# | tr '\r' '\n'
Example:
# slow_ugly_writer.py
import sys
import time
n = int(sys.argv[1])
for i in range(1, n + 1):
time.sleep(0.5)
print("*" * i, end="\r")
Usage:
./cr2lf.sh python -u slow_ugly_writer.sh 10
And from within your program:
cmd = ('crlf.sh', 'downloader_script', '-v', '-p', '-o', '/home/blah/blah', inbound)

multiprocessing - execute external command and wait before proceeding

I am using Linux. I have an external executable called "combine" and a loop of 20 iterations.
Per each iteration, "combine" needs to be called with an argument that depends on the i-th iteration. Example:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
How do I achieve this using the multiprocessing module in Python?

You can use subprocess.Popen to launch the external commands asynchronously, and store each Popen object returned in a list. Once you've launched all the processes, just iterate over them and wait for each to finish using popen_object.wait.
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
However, this will launch twenty concurrent processes, which is probably going to hurt performance.
To avoid that, you can use a ThreadPool to limit yourself to some lower number of concurrent processes (multiprocessing.cpu_count is a good number), and then use pool.join to wait for them all to finish.
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
Each thread will release the GIL while waiting for the subprocess to complete, so they'll all run in parallel.

My solution to this problem is to create and manage a list of subprocesses. Pay special attention to startencoder and manageprocs. That is where the actual work is being started and managed.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)

convert Ghostscript from os.popen to subsession.popen python

I need to create a Monkey patch for Ghostscript, I have to migrate from os.popen to subsession.popen because I can't use the shell in my system.
I tried it in this way:
def mioGhostscript(tile, size, fp):
"""Render an image using Ghostscript (Unix only)"""
# Unpack decoder tile
decoder, tile, offset, data = tile[0]
length, bbox = data
import tempfile, os
file = tempfile.mktemp()
# Build ghostscript command
command = ["gs",
"-q", # quite mode
"-g%dx%d" % size, # set output geometry (pixels)
"-dNOPAUSE -dSAFER", # don't pause between pages, safe mode
"-sDEVICE=ppmraw", # ppm driver
"-sOutputFile=%s" % file,# output file
"- >/dev/null 2>/dev/null"
]
#command = shlex.split(string.join(command))
# push data through ghostscript
try:
#gs = os.popen(command, "w")
args = command#['gs','-dSAFER','-dNOPAUSE','-dBATCH','-sDEVICE=jpeg','-sOutputFile=/home/user/output2.jpg /home/user/downloads/test.pdf']
gs = subprocess.Popen( args, stdout = PIPE, stderr = STDOUT, stdin=PIPE )
# adjust for image origin
if bbox[0] != 0 or bbox[1] != 0:
#gs.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
gs.stdin.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
fp.seek(offset)
while length > 0:
s = fp.read(8192)
if not s:
break
length = length - len(s)
raise Exception(s)
gs.stdin.write(s)
gs.communicate()[0]
status = gs.stdin.close()
#status = gs.close()
#if status:
# raise IOError("gs failed (status %d)" % status)
im = Image.core.open_ppm(file)
finally:
try: os.unlink(file)
except: pass
return im
import PIL
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
but i have this traceback:
Traceback (most recent call last): File "/home/web/lib/driver_mod_python.py", line 252, in handler buf = m.__dict__[pard['program']](pard) File "/home/dtwebsite/bin/cms_gest_ordini.py", line 44, in wrapped return func(pard) File "/home/dtwebsite/bin/cms_gest_ordini.py", line 95, in wrapped return func(pard) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 341, in picking_list tr_modelllo = render_row_picking_list(pard, item, picked=0, plist_allowed=plist_allowed) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 432, in render_row_picking_list aa = a.tostring() File "/rnd/apps/interpreters/python-2.5.6/lib/python2.5/site-packages/PIL/Image.py", line 532, in tostring self.load() File "/rnd/apps/interpreters/python-2.5.6/lib/python2.5/site-packages/PIL/EpsImagePlugin.py", line 283, in load self.im = Ghostscript(self.tile, self.size, self.fp) File "/home/dtwebsite/bin/cms_gest_picking_list.py", line 64, in mioGhostscript gs.stdin.write(s) IOError: [Errno 32] Broken pipe
someone can help me please?

I found the solution at the problem.
It was with the PIL package, something didn't compile right during the installation.
After that i had a dependencies problem.
I fixed it in the following way:
import PIL.EpsImagePlugin
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
Then I saw this in the command:
"- >/dev/null 2>/dev/null"
the code is a shell's code and it didn't work on my system because python tried to read a file literally named - >/dev/null 2>/dev/null and it doesn't exist.
I replaced
"- >/dev/null 2>/dev/null"
with
"-"
and the program now read from the stdin.
The final code is:
def mioGhostscript(tile, size, fp):
"""Render an image using Ghostscript (Unix only)"""
# Unpack decoder tile
decoder, tile, offset, data = tile[0]
length, bbox = data
import tempfile, os
file = tempfile.mktemp()
# Build ghostscript command
command = ["gs",
"-q", # quite mode
"-g%dx%d" % size, # set output geometry (pixels)
"-dNOPAUSE -dSAFER", # don't pause between pages, safe mode
"-sDEVICE=ppmraw", # ppm driver
"-sOutputFile=%s" % file,# output file
"-"
]
#command = shlex.split(string.join(command))
# push data through ghostscript
try:
#gs = os.popen(command, "w")
args = command#['gs','-dSAFER','-dNOPAUSE','-dBATCH','-sDEVICE=jpeg','-sOutputFile=/home/user/output2.jpg /home/user/downloads/test.pdf']
gs = subprocess.Popen( args, stdout = PIPE, stderr = STDOUT, stdin=PIPE )
# adjust for image origin
if bbox[0] != 0 or bbox[1] != 0:
#gs.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
gs.stdin.write("%d %d translate\n" % (-bbox[0], -bbox[1]))
fp.seek(offset)
while length > 0:
s = fp.read(8192)
if not s:
break
length = length - len(s)
gs.stdin.write(s)
gs.communicate()[0]
status = gs.stdin.close()
#status = gs.close()
#if status:
# raise IOError("gs failed (status %d)" % status)
im = Image.core.open_ppm(file)
finally:
try: os.unlink(file)
except: pass
return im
import PIL.EpsImagePlugin
PIL.EpsImagePlugin.Ghostscript = mioGhostscript
I hope this posts can help someone.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extracting frame fails with: Invalid data found when processing input - python

Related

How to wildly and/or alternatingly read from stdout and/or stderr and write to stdin of a process in python?

How to send data from a Python asyncio socket server to a subprocess?

ValueError: Separator is not found, and chunk exceed the limit

multiprocessing - execute external command and wait before proceeding

convert Ghostscript from os.popen to subsession.popen python

Categories

Resources