white space in filename python 3.4.2 - python

I have created this small prog to search all PDF's in a directory, determine if they are searchable or not and then move them to the appropriate directory.
I am new to Python and it is probably not the best way but it does work until the file name has White Space in it and I get the following returned.
Any help would be appreciated.
>>> os.system("pdffonts.exe " + pdfFile + "> output.txt")
99
import os
import glob
import shutil
directory = os.chdir("C:\MyDir") # Change working directory
fileDir = glob.glob('*.pdf') # Create a list of all PDF's in declared directory
numFiles = len(fileDir) # Lenght of list
startFile = 0 # Counter variable
seekWord = "TrueType"
while startFile < numFiles:
pdfFile=fileDir[startFile]
os.system("pdffonts.exe " + pdfFile + "> output.txt")
file1output = open("output.txt","r")
fileContent = file1output.read()
if seekWord in fileContent:
shutil.move(pdfFile , "NO_OCR")
else: shutil.move(pdfFile, "OCR")
startFile = startFile + 1

os.system() uses the shell to execute your command. You'd have to quote your filename for the shell to recognise spaces as part of the file, you could do so with the shlex.quote() function:
os.system("pdffonts.exe " + shlex.quote(pdfFile) + "> output.txt")
However, there is no reason at all to use os.system() and the shell. You should use the subprocess.run() function and configure that to pass back the output without using redirection or a shell:
import subprocess
seekWord = b"TrueType"
for pdfFile in fileDir:
result = subprocess.run(["pdffonts.exe", pdfFile], stdout=subprocess.PIPE)
fileContent = result.stdout
if seekWord in fileContent:
# ...
Because pdfFile is passed to pdffonts.exe directly there is no need to worry about a shell parsing and whitespace no longer matters.
Note that I changed seekWord to be a bytes literal instead as result.stdout is a bytes value (no need to try to decode the result to Unicode here).

It seems the problem doesn't come from python, but the Windows shell. You need to enclose in quotation mark. As I don't have your program pdffonts.exe, I cannot debug. I also made your code more pythonic
import os
import glob
import shutil
directory = os.chdir("C:\MyDir") # Change working directory
fileDir = glob.glob('*.pdf') # Create a list of all PDF's in declared directory
seekWord = "TrueType"
for pdfFile in fileDir:
os.system('pdffonts.exe "{0}"> output.txt'.format(pdfFile))
file1output = open("output.txt","r")
fileContent = file1output.read()
if seekWord in fileContent:
shutil.move(pdfFile , "NO_OCR")
else:
shutil.move(pdfFile, "OCR")

Related

Python script violates the command-line character limit

I am using a Python script to batch convert many images in different folders into single pdfs (with https://pypi.org/project/img2pdf/):
import os
import subprocess
import img2pdf
from shutil import copyfile
def main():
folders = [name for name in os.listdir(".") if os.path.isdir(name)]
for f in folders:
files = [f for f in os.listdir(f)]
p = ""
for ffile in files:
p += f+'\\' + ffile + " "
os.system("py -m img2pdf *.pn* " + p + " --output " + f + "\combined.pdf")
if __name__ == '__main__':
main()
However, despite running the command via Powershell on Windows 10, and despite using very short filenames, when the number of images is very high (eg over 600 or so), Powershell gives me the error "The command line is too long" and it does not create the pdf. I know there is a command-line string limitation (https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/command-line-string-limitation), but I also know that for powershell this limit is higher (Powershell to avoid cmd 8191 character limit), and I can't figure out how to fix the script. I would like to ask you if you could help me fix the script to avoid violating the character limit. Thank you
PS: I use the script after inserting it in the parent folder that contains the folders with the images; then in each subfolder the output pdf file is created.
Using img2pdf library you can use this script:
import img2pdf
import os
for r, _, f in os.walk("."):
imgs = []
for fname in f:
if fname.endswith(".jpg") or fname.endswith(".png"):
imgs.append(os.path.join(r, fname))
if len(imgs) > 0:
with open(r+"\output.pdf","wb") as f:
f.write(img2pdf.convert(imgs))

Using terminal command to search through files for a specific string within a python script

I have a parent directory, and I'd like to go through that directory and grab each file with a specific string for editing in python. I have been using grep -r 'string' filepath in terminal, but I want to be able to do everything using python. I'm hoping to get all the files into an array and go through each of them to edit them.
Is there a way to do this by only running a python script?
changing current folder to parent
import os
os.chdir("..")
changing folder
import os
os.chdir(dir_of_your_choice)
finding files with a rule in the current folder
import glob
import os
current_dir = os.getcwd()
for f in glob.glob('*string*'):
do_things(f)
import os
#sourceFolder is the folder you're going to be looking inside for backslashes are a special character in python so they're escaped as double backslashes
sourceFolder = "C:\\FolderBeingSearched\\"
myFiles = []
# Find all files in the directory
for file in os.listdir(sourceFolder):
myFiles.append(file)
#open them for editing
for file in myFiles:
try:
open(sourceFolder + file,'r')
except:
continue
#run whatever code you need to do on each open file here
print("opened %s" % file)
EDIT: If you want to separate all files that contain a string (this just prints the list at the end currently):
import os
#sourceFolder is the folder you're going to be looking inside for backslashes are a special character in python so they're escaped as double backslashes
sourceFolder = "C:\\FolderBeingSearched\\"
myFiles = []
filesContainString = []
stringsImLookingFor = ['first','second']
# Find all files in the directory
for file in os.listdir(sourceFolder):
myFiles.append(file)
#open them for editing
for file in myFiles:
looking = sourceFolder + file
try:
open(looking,'r')
except:
continue
print("opened %s" % file)
found = 0
with open(looking,encoding="latin1") as in_file:
for line in in_file:
for x in stringsImLookingFor:
if line.find(x) != -1:
#do whatever you need to do to the file or add it to a list like I am
filesContainString.append(file)
found = 1
break
if found:
break
print(filesContainString)

Python: Trying to put the contents of a folder into a text file:

I'm in the process of writing a python script that takes two arguments that will allow me to output the contents of a folder to a text file for me to use for another process. The snippet of I have is below:
#!/usr/bin/python
import cv2
import numpy as np
import random
import sys
import os
import fileinput
#Variables:
img_path= str(sys.argv[1])
file_path = str(sys.argv[2])
print img_path
print file_path
cmd = 'find ' + img_path + '/*.png | sed -e "s/^/\"/g;s/$/\"/g" >' + file_path + '/desc.txt'
print "command: ", cmd
#Generate desc.txt file:
os.system(cmd)
When I try and run that from my command line, I get the following output, and I have no idea how to fix it.
sh: 1: s/$//g: not found
I tested the command I am using by running the following command in a fresh terminal instance, and it works out fine:
images/*.png | sed -e "s/^/\"/g;s/$/\"/g" > desc.txt
Can anyone see why my snippet isn't working? When I run it, I get an empty file...
Thanks in advance!
its not sending the full text for your regular expression through to bash because of how python processes and escapes string content, so the best quickest solution would be to just manually escape the back slashes in the string, because python thinks they currently are escape codes. so change this line:
cmd = 'find ' + img_path + '/*.png | sed -e "s/^/\"/g;s/$/\"/g" >' + file_path + '/desc.txt'
to this:
cmd = 'find ' + img_path + '/*.png | sed -e "s/^/\\"/g;s/$/\\"/g" >' + file_path + '/desc.txt'
and that should work for you.
although, the comment on your question has a great point, you could totally just do it from python, something like:
import os
import sys
def main():
# variables
img_path= str(sys.argv[1])
file_path = str(sys.argv[2])
with open(file_path,'w') as f:
f.writelines(['{}\n'.format(line) for line in os.listdir(img_path) if line.endswith('*.png')])
if __name__ == "__main__":
main()
I fully agree with Kyle. My recommendation is to do using only python code better than call bash commands from your code. Here it is my recommended code, it is longer and not as optimal than the aforementioned one, but IMHO it is a more easy to understand solution.
#!/usr/bin/python
import glob
import sys
import os
# Taking arguments
img_path = str(sys.argv[1])
file_path = str(sys.argv[2])
# lets put the target filename in a variable (it is better than hardcoding it)
file_name = 'desc.txt'
# folder_separator is used to define how your operating system separates folders (unix / and windows \)
folder_separator = '\\' # Windows folders
# folder_separator = '/' # Unix folders
# better if you make sure that the target folder exists
if not os.path.exists(file_path):
# if it does not exist, you create it
os.makedirs(file_path)
# Create the target file (write mode).
outfile = open(file_path + '/' + file_name, 'w')
# loop over folder contents
for fname in glob.iglob("%s/*" % img_path):
# for every file found you take only the name (assuming that structure is folder/file.ext)
file_name_in_imgPath = fname.split('\\')[1]
# we want to avoid to write 'folders' in the target file
if os.path.isfile(file_name_in_imgPath):
# write filename in the target file
outfile.write(str(file_name_in_imgPath) + '\n')
outfile.close()

Trying to access a file located on a flashdrive

I have made a simple test code in python that reads from a text file, and then preforms an action if the text file contains a line "on".
My code works fine if i run the script on my hardive with the text file in the same folder. Example, (C:\Python27\my_file.txt, and C:\Python27\my_scipt.py).
However, if I try this code while my text file is located on my flashdrive and my script is still on my hardrive it won't work even though I have the correct path specified. Example, (G:\flashdrive_folder\flashdrive_file.txt, and C:\Python27\my_scipt.py).
Here is the code I have written out.
def locatedrive():
file = open("G:\flashdrive_folder\flashdrive_file.txt", "r")
flashdrive_file = file.read()
file.close()
if flashdrive_file == "on":
print "working"
else:
print"fail"
while True:
print "trying"
try:
locatedrive()
break
except:
pass
break
The backslash character does double duty. Windows uses it as a path separator, and Python uses it to introduce escape sequences.
You need to escape the backslash (using a backslash!), or use one of the other techniques below:
file = open("G:\\flashdrive_folder\\flashdrive_file.txt", "r")
or
file = open(r"G:\flashdrive_folder\flashdrive_file.txt", "r")
or
file = open("G:/flashdrive_folder/flashdrive_file.txt", "r")
cd /media/usb0
import os
path = "/media/usb0"
#!/usr/bin/python
import os
path = "/usr/tmp"
# Check current working directory.
retval = os.getcwd()
print "Current working directory %s" % retval
# Now change the directory
os.chdir( path )
# Check current working directory.
retval = os.getcwd()
print "Directory changed successfully %s" % retval
Use:
import os
os.chdir(path_to_flashdrive)

Rename multiple files in a directory in Python

I'm trying to rename some files in a directory using Python.
Say I have a file called CHEESE_CHEESE_TYPE.*** and want to remove CHEESE_ so my resulting filename would be CHEESE_TYPE
I'm trying to use the os.path.split but it's not working properly. I have also considered using string manipulations, but have not been successful with that either.
Use os.rename(src, dst) to rename or move a file or a directory.
$ ls
cheese_cheese_type.bar cheese_cheese_type.foo
$ python
>>> import os
>>> for filename in os.listdir("."):
... if filename.startswith("cheese_"):
... os.rename(filename, filename[7:])
...
>>>
$ ls
cheese_type.bar cheese_type.foo
Here's a script based on your newest comment.
#!/usr/bin/env python
from os import rename, listdir
badprefix = "cheese_"
fnames = listdir('.')
for fname in fnames:
if fname.startswith(badprefix*2):
rename(fname, fname.replace(badprefix, '', 1))
The following code should work. It takes every filename in the current directory, if the filename contains the pattern CHEESE_CHEESE_ then it is renamed. If not nothing is done to the filename.
import os
for fileName in os.listdir("."):
os.rename(fileName, fileName.replace("CHEESE_CHEESE_", "CHEESE_"))
Assuming you are already in the directory, and that the "first 8 characters" from your comment hold true always. (Although "CHEESE_" is 7 characters... ? If so, change the 8 below to 7)
from glob import glob
from os import rename
for fname in glob('*.prj'):
rename(fname, fname[8:])
I have the same issue, where I want to replace the white space in any pdf file to a dash -.
But the files were in multiple sub-directories. So, I had to use os.walk().
In your case for multiple sub-directories, it could be something like this:
import os
for dpath, dnames, fnames in os.walk('/path/to/directory'):
for f in fnames:
os.chdir(dpath)
if f.startswith('cheese_'):
os.rename(f, f.replace('cheese_', ''))
Try this:
import os
import shutil
for file in os.listdir(dirpath):
newfile = os.path.join(dirpath, file.split("_",1)[1])
shutil.move(os.path.join(dirpath,file),newfile)
I'm assuming you don't want to remove the file extension, but you can just do the same split with periods.
This sort of stuff is perfectly fitted for IPython, which has shell integration.
In [1] files = !ls
In [2] for f in files:
newname = process_filename(f)
mv $f $newname
Note: to store this in a script, use the .ipy extension, and prefix all shell commands with !.
See also: http://ipython.org/ipython-doc/stable/interactive/shell.html
Here is a more general solution:
This code can be used to remove any particular character or set of characters recursively from all filenames within a directory and replace them with any other character, set of characters or no character.
import os
paths = (os.path.join(root, filename)
for root, _, filenames in os.walk('C:\FolderName')
for filename in filenames)
for path in paths:
# the '#' in the example below will be replaced by the '-' in the filenames in the directory
newname = path.replace('#', '-')
if newname != path:
os.rename(path, newname)
It seems that your problem is more in determining the new file name rather than the rename itself (for which you could use the os.rename method).
It is not clear from your question what the pattern is that you want to be renaming. There is nothing wrong with string manipulation. A regular expression may be what you need here.
import os
import string
def rename_files():
#List all files in the directory
file_list = os.listdir("/Users/tedfuller/Desktop/prank/")
print(file_list)
#Change current working directory and print out it's location
working_location = os.chdir("/Users/tedfuller/Desktop/prank/")
working_location = os.getcwd()
print(working_location)
#Rename all the files in that directory
for file_name in file_list:
os.rename(file_name, file_name.translate(str.maketrans("","",string.digits)))
rename_files()
This command will remove the initial "CHEESE_" string from all the files in the current directory, using renamer:
$ renamer --find "/^CHEESE_/" *
I was originally looking for some GUI which would allow renaming using regular expressions and which had a preview of the result before applying changes.
On Linux I have successfully used krename, on Windows Total Commander does renaming with regexes, but I found no decent free equivalent for OSX, so I ended up writing a python script which works recursively and by default only prints the new file names without making any changes. Add the '-w' switch to actually modify the file names.
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import fnmatch
import sys
import shutil
import re
def usage():
print """
Usage:
%s <work_dir> <search_regex> <replace_regex> [-w|--write]
By default no changes are made, add '-w' or '--write' as last arg to actually rename files
after you have previewed the result.
""" % (os.path.basename(sys.argv[0]))
def rename_files(directory, search_pattern, replace_pattern, write_changes=False):
pattern_old = re.compile(search_pattern)
for path, dirs, files in os.walk(os.path.abspath(directory)):
for filename in fnmatch.filter(files, "*.*"):
if pattern_old.findall(filename):
new_name = pattern_old.sub(replace_pattern, filename)
filepath_old = os.path.join(path, filename)
filepath_new = os.path.join(path, new_name)
if not filepath_new:
print 'Replacement regex {} returns empty value! Skipping'.format(replace_pattern)
continue
print new_name
if write_changes:
shutil.move(filepath_old, filepath_new)
else:
print 'Name [{}] does not match search regex [{}]'.format(filename, search_pattern)
if __name__ == '__main__':
if len(sys.argv) < 4:
usage()
sys.exit(-1)
work_dir = sys.argv[1]
search_regex = sys.argv[2]
replace_regex = sys.argv[3]
write_changes = (len(sys.argv) > 4) and sys.argv[4].lower() in ['--write', '-w']
rename_files(work_dir, search_regex, replace_regex, write_changes)
Example use case
I want to flip parts of a file name in the following manner, i.e. move the bit m7-08 to the beginning of the file name:
# Before:
Summary-building-mobile-apps-ionic-framework-angularjs-m7-08.mp4
# After:
m7-08_Summary-building-mobile-apps-ionic-framework-angularjs.mp4
This will perform a dry run, and print the new file names without actually renaming any files:
rename_files_regex.py . "([^\.]+?)-(m\\d+-\\d+)" "\\2_\\1"
This will do the actual renaming (you can use either -w or --write):
rename_files_regex.py . "([^\.]+?)-(m\\d+-\\d+)" "\\2_\\1" --write
You can use os.system function for simplicity and to invoke bash to accomplish the task:
import os
os.system('mv old_filename new_filename')
This works for me.
import os
for afile in os.listdir('.'):
filename, file_extension = os.path.splitext(afile)
if not file_extension == '.xyz':
os.rename(afile, filename + '.abc')
What about this :
import re
p = re.compile(r'_')
p.split(filename, 1) #where filename is CHEESE_CHEESE_TYPE.***

Categories