Traverse folder to convert all contained files

Traverse folder to convert all contained files - python

I've recently started using ffmpeg with the intention of converting my video library to h265 due to its compression benefits. I would like to run one command and have ffmpeg traverse the folder converting each file, one-by-one into h265. I've checked the documentation Here but I can't get my head around it. Does anybody have a template loop script for me to use?
I have ffmpeg installed on a Linux box and have successfully converted single files but I have around 400 files to convert, hence the looping question.
Thanks in advance.
EDIT:
The files I'm waiting to convert are videos with varying containers. I have bee using the python script below, which I have tweaked to suit my needs but isn't working. I will include the error I'm getting and a link to the original below my code.
import os
import sys
import re
import shutil
import subprocess
__author__ = 'Nikhil'
# Edit options here ##################################################
outmode = 'mp4' #Extension of file
remover = True # Delete original file after conversion complete
accept_ext = 'mp4 mkv avi divx m4v mpeg mpg wmv' #Extensions of video files to convert
ffmpeg_exe = "ffmpeg" #Path to ffmpeg executable
ffprobe_exe = "ffprobe" #Path to ffprobe executable
mkvextract_exe = "mkvextract" #Path to mkvextract executable
video_codec = 'libx265' #Video codec to use
video_type = 'h265' #Name of video codec to check for remux
audio_codec = 'aac' #Audio codec to use
audio_type = 'aac' #Name of audio codec to check for remux
crf = "28" #Video quality for libx264
vbr = '' #Audio quality
extract_subtitle = True #Extract subtitles?
subtitle_languages = "en eng english" #Codes for languages to extract
threads = 0 #Number of threads to use in ffmpeg, 0 defaults to all
additional_ffmpeg = '-preset slow -movflags +faststart' #Additional flags for ffmpeg, preset sets speed and compression, movflags to make file web optimized
## END OPTIONS - DO NOT EDIT BELOW THIS LINE UNLESS YOU KNOW WHAT YOU ARE DOING ##
outformat = 'mp4'
if outmode == 'mp4':
outformat = 'mp4'
elif outmode == 'mkv':
outformat = 'matroska'
def ffmpeg(*args, **kwargs):
largs = [ffmpeg_exe, ]
largs.extend(args)
try:
return subprocess.check_output(largs, **kwargs).decode('utf-8')
except:
return None
def getoutput(cmd):
if sys.version < '3':
try:
return subprocess.check_output(cmd.split(' '))
except:
return None
else:
return subprocess.getoutput(cmd)
formats = ""
if getoutput(ffmpeg_exe + ' -formats'):
formats = getoutput(ffmpeg_exe + ' -formats 2')
else:
exit(1)
if ('E mp4' in formats) and ('E matroska' in formats):
print("You have the suitable formats")
else:
print("You do not have both the mkv and mp4 formats...Exiting!")
exit(1)
codecs = getoutput(ffmpeg_exe + ' -codecs 2')
if video_codec in codecs:
print("Check " + video_codec + " Audio Encoder ... OK")
else:
print("Check " + video_codec + " Audio Encoder ... NOK")
exit(1)
if audio_codec in codecs:
print("Check " + audio_codec + " Audio Encoder ... OK")
else:
print("Check " + audio_codec + " Audio Encoder ... NOK")
exit(1)
print("Your FFMpeg is OK\nEntering File Processing\n")
subtitle_languages = subtitle_languages.lower()
def process_file(path, file):
extension = os.path.splitext(file)[1].replace(".", "")
filename = os.path.splitext(file)[0]
if extension in accept_ext:
print(file + " is an acceptable extension. Checking file...")
else:
print(file + " is not an acceptable extension. Skipping...")
return
if ffprobe_exe:
file_info = getoutput('"' + ffprobe_exe + '"' + " " + '"' + os.path.join(path, file) + '"')
else:
file_info = ffmpeg("-i", os.path.join(path, file))
if 'Invalid data found' in file_info:
print("File " + file + " is NOT A VIDEO FILE cannot be converted!")
return
encode_crf = []
if file_info.find("Video: " + video_type) != -1:
vcodec = 'copy'
print("Video is " + video_type + ", remuxing....")
else:
vcodec = video_codec
if crf:
encode_crf = ["-crf", "" + crf]
print("Video is not " + video_type + ", converting...")
encode_vbr = []
if "Audio: " + audio_type in file_info:
acodec = 'copy'
print("Audio is " + audio_type + ", remuxing....")
else:
acodec = audio_codec
if vbr:
encode_vbr = ["-vbr", "" + vbr]
print("Audio is not " + audio_type + ", converting...")
if extension == outmode and vcodec == 'copy' and acodec == 'copy':
print(file + " is already " + outmode + " and no conversion needed. Skipping...")
return
print(
"Using video codec: " + vcodec + " audio codec: " + acodec + " and Container format " + outformat + " for\nFile: " + file + "\nStarting Conversion...\n")
filename = filename.replace("XVID", video_type)
filename = filename.replace("xvid", video_type)
try:
args = ['-i', os.path.join(path, file), '-y', '-f', outformat, '-acodec', acodec]
if encode_vbr:
args.extend(encode_vbr)
args.extend(['-vcodec', vcodec])
if encode_crf:
args.extend(encode_crf)
if additional_ffmpeg:
args.extend(additional_ffmpeg.split(" "))
if threads:
args.extend(['-threads', str(threads)])
args.append(os.path.join(path, filename + '.temp'))
ffmpeg(*args)
print("")
except Exception as e:
print("Error: %s" % e)
print("Removing temp file and skipping file")
if os.path.isfile(os.path.join(path, filename + '.temp')):
os.remove(os.path.join(path, filename + '.temp'))
return
if extract_subtitle and (file_info.find("Subtitle:") != -1):
print("Extracting Subtitles")
matches = re.finditer("Stream #(\d+):(\d+)\((\w+)\): Subtitle: (.*)", file_info)
for m in matches:
if m.group(3).lower() not in subtitle_languages.split(" "):
continue
try:
if 'subrip' in m.group(4):
sub_format = 'copy'
sub_ext = '.srt'
elif mkvextract_exe and 'hdmv_pgs' in m.group(4):
subprocess.check_output([mkvextract_exe, 'tracks', os.path.join(path, file),
m.group(2) + ':' + os.path.join(path, filename + '.' + m.group(
3) + '.' + m.group(2) + '.sup')])
continue
else:
sub_format = 'srt'
sub_ext = '.srt'
ffmpeg("-i", os.path.join(path, file), '-y', '-map', m.group(1) + ':' + m.group(2), '-c:s:0',
sub_format,
os.path.join(path, filename + '.' + m.group(3) + '.' + m.group(2) + sub_ext))
print("")
except Exception as e:
print("Error: %s" % e)
print("Deleting subtitle.")
if os.path.isfile(os.path.join(path, filename + '.' + m.group(3) + '.' + m.group(2) + sub_ext)):
os.remove(os.path.join(path, filename + '.' + m.group(3) + '.' + m.group(2) + sub_ext))
if remover:
print("Deleting original file: " + file)
os.remove(os.path.join(path, file))
if outmode == extension:
shutil.move(os.path.join(path, filename + ".temp"), os.path.join(path, filename + ".enc." + outmode))
filename += ".enc"
else:
shutil.move(os.path.join(path, filename + ".temp"), os.path.join(path, filename + "." + outmode))
def process_directory(path):
if os.path.isfile(os.path.join(path, ".noconvert")):
return
for file in os.listdir(path):
filepath = os.path.join(path, file)
if os.path.isdir(filepath):
process_directory(filepath)
elif os.path.isfile(filepath):
process_file(path, file)
for arg in sys.argv[1:]:
if os.path.isdir(arg):
process_directory(arg)
elif os.path.isfile(arg):
process_file(os.path.dirname(arg), os.path.basename(arg))
The error I am getting is this:
Traceback (most recent call last):
File "/media/569f/ethan1878/bin/convert.py", line 209, in <module>
process_file(os.path.dirname(arg), os.path.basename(arg))
File "/media/569f/ethan1878/bin/convert.py", line 100, in process_file
if 'Invalid data found' in file_info:
TypeError: argument of type 'NoneType' is not iterable
and the original file is hosted Here (as a .txt file)

Related

nmap script not saving to the file i want it to.. its saving to desktop and not as a note or .txt

when using nm-scan it doesn save in the propper place and keeps saving without a name and not as a .txt
code here:
nm = nmap.PortScanner()
folder_path = os.path.expanduser("~/Desktop/AIO-1/scanresults")
full_path = os.path.join(folder_path, file_name + '.txt')
while True:
if command == "exit":
break
elif command == "nm-scan":
target = input("Enter the target IP or hostname: ")
file_name = input("Enter the file name to save the results: ")
folder_path = os.path.expanduser("~/Desktop/AIO-1/scanresults")
full_path = os.path.join(folder_path, file_name + ".txt")
os.system("nmap -sS -sV -oN " + full_path + " " + target)
print("Scan results saved to " + full_path)
elif command == "nm-list":
file_name = input("Enter the file name to list the results: ")
folder_path = os.path.expanduser("~/Desktop/AIO-1/scanresults")
full_path = os.path.join(folder_path, file_name + ".txt")
os.system("cat " + full_path)
elif command == "nm-delete":
file_name = input("Enter the file name to delete the results: ")
folder_path = os.path.expanduser("~/Desktop/AIO-1/scanresults")
full_path = os.path.join(folder_path, file_name + ".txt")
os.system("rm " + full_path)
print(full_path + " scan results deleted")
elif command == "nm-help":
nm_help()
elif command == "exit":
break
tried everything and cant find it
it has taken me ages and idk i kinda need help

os.walk() filename scope inside inner loop

Writing a script to help with data migration in renaming images. It seems as though when I try to access the variable filename from within the inner-for-loop, it's just printing .DS_Store
See commented lines for example:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
# Walk the tree.
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
# Join the two strings in order to form the full filepath.
filePath = os.path.join(root, filename)
#print(filePath) - this results in the actual file path
for row in reader:
#print(filePath) - this results in .DS_Store
oldFilePath = row[1].strip()
displayName = row[0].strip()
colour = row[2].strip()
if " " in colour:
colour = colour.replace(" ", "-")
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number)
os.rename(newFileName)
COUNT = COUNT+1
print(COUNT)
Why would this be?
After changing my code as per the comments, to store the results in a list, now the for root, directories, files in os.walk(FILE_PATH): is not being executed.
I verified that the FILE_PATH exists and printed it to console, also that it has contents.
My new code is as follows:
#!/usr/bin/env python
import os
import csv
FILE_PATH = '/Users/admin/Desktop/data-migration/images/product'
COUNT = 0
productInfo = []
with open('paths_formatted.csv') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
productInfo.append(row)
for root, directories, files in os.walk(FILE_PATH):
for filename in files:
for info in productInfo:
displayName = info[0]
oldFilePath = info[1]
colour = info[2]
slashIndex = oldFilePath.rfind("/")
oldFileName = oldFilePath[slashIndex+1:]
if " " in colour:
colour = colour.replace(" ", "-")
if oldFileName == filename:
number = 1;
newFileName = displayName + "_" + colour + "-" + str(number) + ".jpg"
while os.path.exists(FILE_PATH + leadingPath + newFileName):
number = number + 1
newFileName = filePath, displayName + "_" + colour + "-" + str(number) + ".jpg"
os.rename(newFileName)
COUNT = COUNT + 1
print(COUNT)

Python tarfile gzipped file bigger than sum of source files

I have a Python routine which archives file recordings into a GZipped tarball. The output file appears to be far larger than the source files, and I cannot work out why. As an example of the scale of the issue, 6GB of call recordings are generating an archive of 10GB.
There appear to be no errors in the script and the output .gz file is readable and appears OK apart from the huge size.
Excerpt from my script as follows:
# construct tar filename and open file
client_fileid = client_id + "_" + dt.datetime.now().strftime("%Y%m%d_%H%M%S")
tarname = tar_path + "/" + client_fileid + ".tar.gz"
print "Opening tar file %s " % (tarname), "\n"
try:
tar = tarfile.open (tarname, "w:gz")
except:
print "Error opening tar file: %s" % sys.exc_info()[0]
sql="""SELECT number, er.id, e.id, flow, filename, filesize, unread, er.cr_date, callerid,
length, callid, info, party FROM extension_recording er, extension e, client c
WHERE er.extension_id = e.id AND e.client_id = c.id AND c.parent_client_id = %s
AND DATE(er.cr_date) BETWEEN '%s' AND '%s'""" % (client_id, start_date, end_date)
rows = cur.execute(sql)
recordings = cur.fetchall()
if rows == 0: sys.exit("No recordings for selected date range - exiting")
for recording in recordings: # loop through recordings cursor
try:
ext_len = len(str(recording[0]))
# add preceding zeroes if the ext no starts with 0 or 00
if ext_len == 2: extension_no = "0" + str(recording[0])
elif ext_len == 1: extension_no = "00" + str(recording[0])
else: extension_no = str(recording[0])
filename = recording[4]
extended_no = client_id + "*%s" % (extension_no)
sourcedir = recording_path + "/" + extended_no
tardir = extended_no + "/" + filename
complete_name = sourcedir + "/" + filename
tar.add(complete_name, arcname=tardir) # add to tar archive
except:
print "Error '%s' writing to tar file %s" % (sys.exc_info()[1], csvfullfilename)

Python Nested Loop Fails

I am writing a program to perform file integrity checks of files in a directory. There are 3 nested loops in the code. When I run the code, the first two loops work great but the third loop does not run more than once.
import hashlib
import logging as log
import optparse
import os
import re
import sys
import glob
import shutil
def md5(fileName):
"""Compute md5 hash of the specified file"""
try:
fileHandle = open(fileName, "rb")
except IOError:
return
m5Hash = hashlib.md5()
while True:
data = fileHandle.read(8192)
if not data:
break
m5Hash.update(data)
fileHandle.close()
return m5Hash.hexdigest()
req = open("requested.txt")
for reqline in req:
reqName = reqline[reqline.rfind('/') + 1:len(reqline) - 1]
reqDir = reqline[0:reqline.rfind('/') + 1]
ezfimlog = open("ezfimlog.txt", 'a')
actFile = open("activefile.txt")
tempFile = open("activetemp.txt", 'w')
for name in glob.glob(reqDir + reqName):
fileHash = md5(name)
actInt = 0
if fileHash != None:
print fileHash
for actLine in actFile:
actNameDir = actLine[0:actLine.rfind(' : ')]
actHash = actLine[actLine.rfind(' : ') + 3:len(actLine) -1]
print (name + " " + actHash + " " + fileHash)
if actNameDir == name and actHash == fileHash:
tempFile.write(name + " : " + fileHash + "\n")
actInt = 1
if actNameDir == name and actHash != fileHash:
tempFile.write(name + " : " + actHash + "\n")
actInt = 1
ezfimlog.write("EzFIM Log: The file " + name + " was modified: " + actHash + "\n")
if actInt == 0:
ezfimlog.write("EzFIM Log: The file " + name + " was created: " + fileHash + "\n")
tempFile.write(name + " : " + fileHash + "\n")
shutil.copyfile("activetemp.txt", "activefile.txt")

You open actFile once and then try to read it many times. You'll need to open it each time you want to read it.
Move this line:
actFile = open("activefile.txt")
to just before this line:
for actLine in actFile:

I'm trying to understand how to call a function from another function in the same class

I'm attempting to login to an Ubuntu server and search logs at several different paths with a function that already works locally (Python 2.7 - win7 machine). Below is the function of how I login and select the logs (also, the basis of my program is Python's cmd module):
def do_siteserver(self, line):
import paramiko
paramiko.util.log_to_file('c:\Python27\paramiko-wininst.log')
host = '10.5.48.65'
portNum = raw_input("\nEnter a port number: ")
while True:
try:
port = int(portNum)
break
except:
print "\nPort is not valid!!!"
break
transport = paramiko.Transport((host,port))
while True:
try:
passW = raw_input("\nEnter the SiteServer weekly password: ")
password = passW
username = 'gilbert'
nport = str(port)
print '\nEstablishing SFTP connection to: {}:{} ...'.format(host,port)
transport.connect(username = username, password = password)
sftp = paramiko.SFTPClient.from_transport(transport)
print 'Authorization Successful!!!'
log_names = ("/var/log/apache2/access.log",
"/var/log/apache2/error.log",
"/var/opt/smartmerch/log/merch_error.log",
"/var/opt/smartmerch/log/merch_event.log",
"/var/opt/smartmerch/log/server_sync.log")
#call search function here?
#for log_file, local_name in log_names.iteritems():
# sftp.get(log_file, local_name)
#sftp.close()
#transport.close()
break
except:
print "\nAuthorization Failed!!!"
Here is the function (in the same class) that I want to call:
def do_search(self, line):
print '\nCurrent dir: '+ os.getcwd()
userpath = raw_input("\nPlease enter a path to search (only enter folder name, eg. SQA\log): ")
directory = os.path.join("c:\\",userpath)
os.chdir(directory)
print "\n SEARCHES ARE CASE SENSITIVE"
print " "
line = "[1]Single File [2]Multiple Files [3]STATIC HEX"
col1 = line[0:14]
col2 = line[15:32]
col3 = line[33:46]
print " " + col1 + " " + col2 + " " + col3
print "\nCurrent Dir: " + os.getcwd()
searchType = raw_input("\nSelect type of search: ")
if searchType == '1':
logfile = raw_input("\nEnter filename to search (eg. name.log): ")
fiLe = open(logfile, "r")
userString = raw_input("\nEnter a string name to search: ")
for i,line in enumerate(fiLe.readlines()):
if userString in line:
print "String: " + userString
print "File: " + os.join(directory,logfile)
print "Line: " + str(i)
break
else:
print "%s NOT in %s" % (userString, logfile)
fiLe.close()
elif searchType =='2':
print "\nDirectory to be searched: " + directory
#directory = os.path.join("c:\\","SQA_log")
userstring = raw_input("Enter a string name to search: ")
userStrHEX = userstring.encode('hex')
userStrASCII = ''.join(str(ord(char)) for char in userstring)
regex = re.compile(r"(%s|%s|%s)" % ( re.escape( userstring ), re.escape( userStrHEX ), re.escape( userStrASCII )))
choice = raw_input("1: search with respect to whitespace. 2: search ignoring whitespace: ")
if choice == '1':
for root,dirname, files in os.walk(directory):
for file in files:
if file.endswith(".log") or file.endswith(".txt"):
f=open(os.path.join(root, file))
for i,line in enumerate(f.readlines()):
result = regex.search(line)
if result:
print "\nLine: " + str(i)
print "File: " + os.path.join(root,file)
print "String Type: " + result.group() + '\n'
f.close()
re.purge()
if choice == '2':
regex2 = re.compile(r'\s+')
for root,dirname, files in os.walk(directory):
for file in files:
if file.endswith(".log") or file.endswith(".txt"):
f=open(os.path.join(root, file))
for i,line in enumerate(f.readlines()):
result2 = regex.search(re.sub(regex2, '',line))
if result2:
print "\nLine: " + str(i)
print "File: " + os.path.join(root,file)
print "String Type: " + result2.group() + '\n'
f.close()
re.purge()
elif searchType =='3':
print "\nDirectory to be searched: " + directory
print " "
#directory = os.path.join("c:\\","SQA_log")
regex = re.compile(r'(?:3\d){6}')
for root,dirname, files in os.walk(directory):
for file in files:
if file.endswith(".log") or file.endswith(".txt"):
f=open(os.path.join(root,file))
for i, line in enumerate(f.readlines()):
searchedstr = regex.findall(line)
ln = str(i)
for word in searchedstr:
print "\nString found: " + word
print "Line: " + ln
print "File: " + os.path.join(root,file)
print " "
logfile = open('result3.log', 'w')
f.close()
re.purge()

self.do_search(linenumber)
That is all.

Methods are invoked via the object that has them.
self.do_search(...)

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Traverse folder to convert all contained files - python

Related

nmap script not saving to the file i want it to.. its saving to desktop and not as a note or .txt

os.walk() filename scope inside inner loop

Python tarfile gzipped file bigger than sum of source files

Python Nested Loop Fails

I'm trying to understand how to call a function from another function in the same class

Categories

Resources