Trimming videos with 'ffmpeg and ffprobe' - python

I am working on an ETL process, and I'm now in the final stage of preprocessing my videos. I used the script below (reference: #FarisHijazi) to first auto detected black-screen frames using ffprobe and trim them out using ffmpeg.
The script worked for me but the problems are:
It cut off all other good frames together with the first bad frames. e.g. if gBgBgBgB represents a sequence of good and BAD frames for 5sec each, the script only returned the first g(5sec) and cut off the other BgBgBgB after it. I want to have only g g g g where all B B B B has been removed
I also want to detect other colors aside black-screen e.g. green-screen or red-screen or blurry part of video
Script doesn't work if video has no audio in it.
import argparse
import os
import shlex
import subprocess
parser = argparse.ArgumentParser(
__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("input", type=str, help="input video file")
parser.add_argument(
"--invert",
action="store_true",
help="remove nonblack instead of removing black",
)
args = parser.parse_args()
##FIXME: sadly you must chdir so that the ffprobe command will work
os.chdir(os.path.split(args.input)[0])
args.input = os.path.split(args.input)[1]
spl = args.input.split(".")
outpath = (
".".join(spl[:-1])
+ "."
+ ("invert" if args.invert else "")
+ "out."
+ spl[-1]
)
def delete_back2back(l):
from itertools import groupby
return [x[0] for x in groupby(l)]
def construct_ffmpeg_trim_cmd(timepairs, inpath, outpath):
cmd = f'ffmpeg -i "{inpath}" -y -r 20 -filter_complex '
cmd += '"'
for i, (start, end) in enumerate(timepairs):
cmd += (
f"[0:v]trim=start={start}:end={end},setpts=PTS-STARTPTS,format=yuv420p[{i}v]; "
+ f"[0:a]atrim=start={start}:end={end},asetpts=PTS-STARTPTS[{i}a]; "
)
for i, (start, end) in enumerate(timepairs):
cmd += f"[{i}v][{i}a]"
cmd += f"concat=n={len(timepairs)}:v=1:a=1[outv][outa]"
cmd += '"'
cmd += f' -map [outv] -map [outa] "{outpath}"'
return cmd
def get_blackdetect(inpath, invert=False):
ffprobe_cmd = f'ffprobe -f lavfi -i "movie={inpath},blackdetect[out0]" -show_entries tags=lavfi.black_start,lavfi.black_end -of default=nw=1 -v quiet'
print("ffprobe_cmd:", ffprobe_cmd)
lines = (
subprocess.check_output(shlex.split(ffprobe_cmd))
.decode("utf-8")
.split("\n")
)
times = [
float(x.split("=")[1].strip()) for x in delete_back2back(lines) if x
]
assert len(times), "no black scene detected"
if not invert:
times = [0] + times[:-1]
timepairs = [
(times[i], times[i + 1]) for i in range(0, len(times) // 2, 2)
]
return timepairs
if __name__ == "__main__":
timepairs = get_blackdetect(args.input, invert=args.invert)
cmd = construct_ffmpeg_trim_cmd(timepairs, args.input, outpath)
print(cmd)
os.system(cmd)

Related

How to convert images to SVG using Python potrace and Imagemagick by batch or per folder?

Credits to #eprev for this conversion (per image)
$ convert -alpha remove party-never.png pgm: \ | mkbitmap -f 32 -t 0.4 - -o - \ | potrace --svg -o party-never.svg
I would like to have it in Python. Integrate in this code:
from PIL import Image
import os
Image.MAX_IMAGE_PIXELS = None
path = "./*your-source-folder*"
resize_ratio = 2 # where 0.5 is half size, 2 is double size
def resize_aspect_fit():
dirs = os.listdir(path)
for item in dirs:
print(item)
if item == '.DS_Store':
continue
if item == 'Icon\r':
continue
if item.endswith(".mp4"):
continue
if item.endswith(".txt"):
continue
if item.endswith(".db"):
continue
if os.path.isfile(path+item):
image = Image.open(path+item)
file_path, extension = os.path.splitext(path+item)
new_image_height = int(image.size[0] / (1/resize_ratio))
new_image_length = int(image.size[1] / (1/resize_ratio))
image = image.resize((new_image_height, new_image_length), Image.ANTIALIAS)
image.save("./*your-output-folder*/" + item)
resize_aspect_fit()
I don't know how can I integrate the codes or if its even possible

Accessing MALLET's diagnostics file via Gensim

Is there a way to access MALLET's diagnostics file or its content by using the provided API via Gensim in Python?
Seems like there is no possibility.
I solved this issue by running MALLET in the command line via Python's subprocess module:
import subprocess
from pathlib import Path
MALLET_PATH = r"C:\mallet" # set to where your "bin/mallet" path is
seglen = 500
topic_count = 20
start = 0
iterations = 20
num_threads = 10 # determines threads used for parallel training
# remember to change backslashes if needed
wdir = Path("../..")
corpusdir = wdir.joinpath("5_corpus", f"seglen-{seglen}")
corpusdir.mkdir(exist_ok=True, parents=True)
mallet_dir = wdir.joinpath("6_evaluation/models/mallet", f"seglen-{seglen}")
topic_dir = mallet_dir.joinpath(f"topics-{topic_count}")
def create_input_files():
# create MALLETs input files
for file in corpusdir.glob("*.txt"):
output = mallet_dir.joinpath(f"{file.stem}.mallet")
# doesn't need to happen more than once -- usually.
if output.is_file(): continue
print(f"--{file.stem}")
cmd = f"bin\\mallet import-file " \
f"--input {file.absolute()} " \
f"--output {output.absolute()} " \
f"--keep-sequence"
subprocess.call(cmd, cwd=MALLET_PATH, shell=True)
print("import finished")
def modeling():
# start modeling
for file in mallet_dir.glob("*.mallet"):
for i in range(start, iterations):
print("iteration ", str(i))
print(f"--{file.stem}")
# output directory
formatdir = topic_dir.joinpath(f"{file.stem.split('-')[0]}")
outputdir = formatdir.joinpath(f"iteration-{i}")
outputdir.mkdir(parents=True, exist_ok=True)
outputdir = str(outputdir.absolute())
# output files
statefile = outputdir + r"\topic-state.gz"
keysfile = outputdir + r"\keys.txt"
compfile = outputdir + r"\composition.txt"
diagnostics_xml = outputdir + r"\diagnostics.xml"
# building cmd string
cmd = f"bin\\mallet train-topics " \
f"--input {file.absolute()} " \
f"--num-topics {topic_count} " \
f"--output-state {statefile} " \
f"--output-topic-keys {keysfile} " \
f"--output-doc-topics {compfile} " \
f"--diagnostics-file {diagnostics_xml} " \
f"--num-threads {num_threads}"
# call mallet
subprocess.call(cmd, cwd=MALLET_PATH, shell=True)
print("models trained")
#create_input_files()
modeling()

Trucanted string in subprocess.call()

import os,subprocess,io
path = "C:\\Users\\Awesome\\Music\\unconverted"
des = "C:\\Users\\Awesome\\Music\\converted"
def convert( path, des):
command = "ffmpeg -i " +path+" -ab 192k "+des + "-y "
subprocess.call(command)
for song in os.listdir(path):
filepath = os.path.join(path,song)
despath = os.path.join(des, song[len(song)-3]+"mp3")
convert(filepath,despath)
print("complete")
this code return this error
C:\Users\Awesome\Music\unconverted\KYLE: No such file or directory
the full file name is C:\Users\Awesome\Music\unconverted\KYLE - Playinwitme (feat Kehlani).m4a I have no idea why it is truncating after the first word.
The problem is that the command will have a path with a space in it like this ffmpeg -i C:\\Users\\Awesome\\Music\\unconverted\\KYLE - Playinwitme (feat Kehlani).m4a .....,
You should remove the spaces from the name of the file or insert the whole name inside double-quotes. Also change song[len(song)-3]+"mp3" to song[0 : len(song)-3]+"mp3"
import os,subprocess,io
path = "C:\\Users\\Awesome\\Music\\unconverted"
des = "C:\\Users\\Awesome\\Music\\converted"
def convert( path, des):
command = "ffmpeg -i " + f"\"{path}\"" + " -ab 192k " + f"\"{des}\"" + " -y"
subprocess.call(command)
for song in os.listdir(path):
filepath = os.path.join(path,song)
despath = os.path.join(des, song[0 : len(song)-3]+"mp3")
convert(filepath,despath)
print("complete")
Instead of forming a command string and passing it to subprocess.call, passing it as a list of arguments to the method will do the trick.
import os,subprocess,io
path = "C:\\Users\\Awesome\\Music\\unconverted"
des = "C:\\Users\\Awesome\\Music\\converted"
def convert( path, des):
command_lis = ["ffmpeg", "-i", path, "-ab", "192k",des,"-y"]
subprocess.call(command_lis)
for song in os.listdir(path):
filepath = os.path.join(path,song)
despath = os.path.join(des, song[0:len(song)-3]+"mp3")
convert(filepath,despath)
print("complete")

How to integrate a bash command into a python code [duplicate]

This question already has answers here:
Variable interpolation in Python [duplicate]
(5 answers)
Closed 3 years ago.
everyone,
I'm looking to integrate a bash command into my python code to calculate indices. My problem is that I want to have an output image with a band for each of the calculated indices, but I can't integrate these indices by the bash command into my 'im_index' matrix created with my python code. I don't see how to link both of them... Do you have any idea?
import numpy as np
import sys
import os
import spectral as sp
from scipy import ndimage
import pylab as pl
from math import *
import spectral.io.envi as envi
#------------------------------------
def reject_outliers(data, m=1):
return data[abs(data - np.mean(data)) < m * np.std(data)]
#------------------------------------
def find_nearest(array, value):
#For a given value, find the nearest value in an array
array = np.asarray(array)
idx = (np.abs(array - value)).argmin()
return idx
#------------------------------------
#Open existing dataset
src_directory = "/d/afavro/Bureau/4_reflectance/"
dossier = os.listdir (src_directory)
print(dossier)
for fichier in dossier:
print (fichier)
ssrc_directory = "/d/afavro/Bureau/4_reflectance/" + fichier + "/"
rasters = os.listdir (ssrc_directory)
print(rasters)
OUTPUT_FOLDER = "/d/afavro/Bureau/5_indices2/" + 'indices_' + fichier + '/'
print(OUTPUT_FOLDER)
if not os.path.exists(OUTPUT_FOLDER):
os.makedirs(OUTPUT_FOLDER)
for image in rasters:
print(image)
name, ext = os.path.splitext(image)
if ext == '.hdr':
img = sp.open_image(ssrc_directory + image)
print(image)
im_HS = img[:,:,:]
cols = im_HS.shape[0] # Number of column
rows = im_HS.shape[1] # Number of lines
bands = im_HS.shape[2] # Number of bands
NbPix = cols * rows # Number of pixels
#Get wavelengths from hdr file
wv = np.asarray(img.bands.centers)
if len(wv) == 0 :
print("Wavelengths not defined in the hdr file")
sys.exit("Try again!")
if wv[0] > 100:
wv=wv*0.001 # Convert to micrometers if necessary
im_HS=im_HS.reshape(NbPix, bands)
#Compute HC index------------------------------------------------------
Nind=4 # Number of indice to be computed
im_index=np.zeros((cols*rows, Nind))
names = []
##NDVI computation
names.append('NDVI')
bande_ref=[0.67, 0.8]
bRef0 = find_nearest(wv,bande_ref[0])
bRef1 = find_nearest(wv,bande_ref[1])
#Check if the required specral bands are available
if (np.abs(wv[bRef0]-bande_ref[0])<=0.1 and np.abs(wv[bRef1]-bande_ref[1])<=0.1):
b0 = im_HS[:, bRef0]
b1 = im_HS[:, bRef1]
index = (b0 - b1) / (b0 + b1)
else:
index = np.zeros_like(im_HS[:,0])
print("Wavelengths selection problem, NDVI not computed")
im_index[:,0]= index
# bash command :
inRaster = ssrc_directory + image
print(inRaster)
outRaster = OUTPUT_FOLDER + 'indices_' + image
print (outRaster)
cmd = 'otbcli_RadiometricIndices -in inRaster -list Soil:BI Vegetation:MSAVI Vegetation:SAVI -out outRaster'
os.system(cmd)
#saving
im_index=im_index.reshape(cols, rows, Nind)
file_image = OUTPUT_FOLDER + "indices2_" + fichier
header = envi.read_envi_header(ssrc_directory + image)
header ['description'] = "fichier d'origine " + image
header ['band names'] = ['NDVI', 'Sober filter', 'NDWI', 'IB(1)', 'IB(2)']
del header['wavelength units']
del header['wavelength']
sp.envi.save_image(file_image + '.hdr', im_index, metadata=header, force = True, interleave = 'bsq')
Assuming this is the code you are actually asking about:
inRaster = ssrc_directory + image
print(inRaster)
outRaster = OUTPUT_FOLDER + 'indices_' + image
print (outRaster)
cmd = 'otbcli_RadiometricIndices -in inRaster -list Soil:BI Vegetation:MSAVI Vegetation:SAVI -out outRaster'
os.system(cmd)
Of course, inRaster inside of singe quotes is just a literal string; to interpolate the variable's value you can say
cmd = 'otbcli_RadiometricIndices -in ' + inRaster + \
' -list Soil:BI Vegetation:MSAVI Vegetation:SAVI -out ' + \
outRaster
or
cmd = 'otbcli_RadiometricIndices -in {0} -list Soil:BI Vegetation:MSAVI Vegetation:SAVI -out {1}'.format(
inRaster, outRaster)
or a number of other string interpolation techniques in Python (legacy % formatting, f-string, etc). But a better solution is to replace os.system with the more flexible and versatile subprocess, as suggested even in the os.system documentation.
subprocess.run([
'otbcli_RadiometricIndices',
'-in', inRaster,
'-list', 'Soil:BI', 'Vegetation:MSAVI', 'Vegetation:SAVI',
'-out', outRaster], check=True)
subprocess.run was introduced in Python 3.5; if you need compatibility with older versions, try subprocess.check_call or even the crude subprocess.call.
I think you might be looking for the subprocess package. An example:
>>> import subprocess as sp
>>> output = sp.check_output('echo hello world', shell=True)
>>> print(output)
b'hello world\n'
The check_output() method can be used to collect the stdout from a command. You'd need to parse the output to get integer indices afterwards.

Imagemagick's convert errors in python script with subprocess.Popen

I am trying to generate transparent background images with a python script run from the command line but I have a hard time passing all the arguments to subprocess.Popen so that Imagemagick's convert doesn't through me errors.
Here is my code:
# Import modules
import os
import subprocess as sp
# Define useful variables
fileList = os.listdir('.')
fileList.remove(currentScriptName)
# Interpret return code
def interpretReturnCode(returnCode) :
return 'OK' if returnCode is 0 else 'ERROR, check the script'
# Create background images
def createDirectoryAndBackgroundImage() :
# Ask if numbers-height or numbers-width before creating the directory
numbersDirectoryType = raw_input('Numbers directory: type "h" for "numbers-height" or "w" for "numbers-width": ')
if numbersDirectoryType == 'h' :
# Create 'numbers-height' directory
numbersDirectoryName = 'numbers-height'
numbersDirectory = interpretReturnCode(sp.call(['mkdir', numbersDirectoryName]))
print '%s%s' % ('Create "numbers-height" directory...', numbersDirectory)
# Create background images
startNumber = int(raw_input('First number for the background images: '))
endNumber = (startNumber + len(fileList) + 1)
for x in range(startNumber, endNumber) :
createNum = []
print 'createNum just after reset and before adding things to it: ', createNum, '\n'
print 'start' , x, '\n'
createNum = 'convert -size 143x263 xc:transparent -font "FreeSans-Bold" -pointsize 22 -fill \'#242325\' "text 105,258'.split()
createNum.append('\'' + str(x) + '\'"')
createNum.append('-draw')
createNum.append('./' + numbersDirectoryName + '/' + str(x) + '.png')
print 'createNum set up, createNum submittet to subprocess.Popen: ', createNum
createNumImage = sp.Popen(createNum, stdout=sp.PIPE)
createNumImage.wait()
creationNumReturnCode = interpretReturnCode(createNumImage.returncode)
print '%s%s%s' % ('\tCreate numbers image...', creationNumReturnCode, '\n')
elif numbersDirectoryType == 'w' :
numbersDirectoryName = 'numbers-width'
numbersDirectory = interpretReturnCode(sp.call(['mkdir', numbersDirectoryName]))
print '%s%s' % ('Create "numbers-width" directory...', numbersDirectory)
# Create background images
startNumber = int(raw_input('First number for the background images: '))
endNumber = (startNumber + len(fileList) + 1)
for x in range(startNumber, endNumber) :
createNum = []
print 'createNum just after reset and before adding things to it: ', createNum, '\n'
print 'start' , x, '\n'
createNum = 'convert -size 224x122 xc:transparent -font "FreeSans-Bold" -pointsize 22-fill \'#242325\' "text 105,258'.split()
createNum.append('\'' + str(x) + '\'"')
createNum.append('-draw')
createNum.append('./' + numbersDirectoryName + '/' + str(x) + '.png')
print 'createNum set up, createNum submittet to subprocess.Popen: ', createNum
createNumImage = sp.Popen(createNum, stdout=sp.PIPE)
createNumImage.wait()
creationNumReturnCode = interpretReturnCode(createNumImage.returncode)
print '%s%s%s' % ('\tCreate numbers image...', creationNumReturnCode, '\n')
else :
print 'No such directory type, please start again'
numbersDirectoryType = raw_input('Numbers directory: type "h" for "numbers-height" or "w" for "numbers-width": ')
For this I get the following errors, for each picture:
convert.im6: unable to open image `'#242325'': No such file or directory # error/blob.c/OpenBlob/2638.
convert.im6: no decode delegate for this image format `'#242325'' # error/constitute.c/ReadImage/544.
convert.im6: unable to open image `"text': No such file or directory # error/blob.c/OpenBlob/2638.
convert.im6: no decode delegate for this image format `"text' # error/constitute.c/ReadImage/544.
convert.im6: unable to open image `105,258': No such file or directory # error/blob.c/OpenBlob/2638.
convert.im6: no decode delegate for this image format `105,258' # error/constitute.c/ReadImage/544.
convert.im6: unable to open image `'152'"': No such file or directory # error/blob.c/OpenBlob/2638.
convert.im6: no decode delegate for this image format `'152'"' # error/constitute.c/ReadImage/544.
convert.im6: option requires an argument `-draw' # error/convert.c/ConvertImageCommand/1294.
I tried to change the order of the arguments without success, to use shell=True in Popen (but then the function interpretReturCode returns a OK while no image is created (number-heights folder is empty).
I would strongly recommend following the this process:
Pick a single file and directory
change the above so that sp.Popen is replaced by a print statement
Run the modified script from the command line
Try using the printed command output from the command line
Modify the command line until it works
Modify the script until it produces the command line that is exactly the same
Change the print back to sp.Popen - Then, (if you still have a problem:
Try modifying your command string to start echo convert so that
you can see what, if anything, is happening to the parameters during
the processing by sp.Popen.
There is also this handy hint from the python documents:
>>> import shlex, subprocess
>>> command_line = raw_input()
/bin/vikings -input eggs.txt -output "spam spam.txt" -cmd "echo '$MONEY'"
>>> args = shlex.split(command_line)
>>> print args
['/bin/vikings', '-input', 'eggs.txt', '-output', 'spam spam.txt', '-cmd', "echo '$MONEY'"]
>>> p = subprocess.Popen(args) # Success!

Categories