I am writing a piece of code to recursively processing *.py files. The code block is as the following:
class FileProcessor(object):
def convert(self,file_path):
if os.path.isdir(file_path):
""" If the path is a directory,then process it recursively
untill a file is met"""
dir_list=os.listdir(file_path)
print("Now Processing Directory:",file_path)
i=1
for temp_dir in dir_list:
print(i,":",temp_dir)
i=i+1
self.convert(temp_dir)
else:
""" if the path is not a directory"""
""" TODO something meaningful """
if __name__ == '__main__':
tempObj=FileProcessor()
tempObj.convert(sys.argv[1])
When I run the script with a directory path as argument, it only runs the first layer of the directory, the line:
self.convert(temp_dir)
seems never get called. I'm using Python 3.5.
The recursion is happening fine, but temp_dir is not a directory so it passes control to your stub else block. You can see this if you put print(file_path) outside your if block.
temp_dir is the name of the next directory, not its absolute path. "C:/users/adsmith/tmp/folder" becomes just "folder". Use os.path.abspath to get that
self.convert(os.path.abspath(temp_dir))
Although the canonical way to do this (as mentioned in my comment on the question) is to use os.walk.
class FileProcessor(object):
def convert(self, file_path):
for root, dirs, files in os.walk(file_path):
# if file_path is C:/users/adsmith, then:
# root == C:/users/adsmith
# dirs is an iterator of each directory in C:/users/adsmith
# files is an iterator of each file in C:/users/adsmith
# this walks on its own, so your next iteration will be
# the next deeper directory in `dirs`
for i, d in enumerate(dirs):
# this is also preferred to setting a counter var and incrementing
print(i, ":", d)
# no need to recurse here since os.walk does that itself
for fname in files:
# do something with the files? I guess?
As temp_dir has the filename only without parent path, you should change
self.convert(temp_dir)
to
self.convert(os.path.join(file_path, temp_dir))
Related
I'm using os.walk() to traverse a folder and get the fully-qualified path for certain documents for processing.
def folderLoop():
for path, dirs, files in os.walk(inFolder):
for filename in files:
if fnmatch.fnmatch(filename, '*.mxd'):
#mxdFilePath = os.path.abspath(os.path.join(path, filename))
mxdFilePath1 = os.path.normpath(os.path.join(path, filename))
mxdFilePath = os.path.join(os.getcwd(), mxdFilePath1)
print("\tRe-source: " + mxdFilePath)
#mxd = arcpy.mapping.MapDocument(r'{}'.format(mxdFilePath))
### Process File ###
I read this post and you can see I've tried more than one method to get the correct path, but for some reason I haven't yet discovered, the result path seems to change depending on whether or not the last line is commented or not.
If I run the code with the mxd assignment commented out as shown above, all of the file paths printed look correct:
Re-source: C:\Users\CoryDavis\desktop\test_data\IL\Bond_IL\bond_il.mxd
Re-source: C:\Users\CoryDavis\desktop\test_data\IL\Bond_IL\bond_il_cmp.mxd
Re-source: C:\Users\CoryDavis\desktop\test_data\IL\Bond_IL\bond_il_detailed_outline.mxd
...
However, this is what I see if I uncomment it:
Re-source: C:\Users\CoryDavis\desktop\test_data\IL\Bond_IL\bond_il.mxd
Re-source: C:\Users\CoryDavis\desktop\test_data\IL\Bond_IL\test_data\IL\Bond_IL\bond_il_cmp.mxd
Then the assignment fails because the path to the second file is incorrect. Why does the second path repeat the relative path? Why does the printed string change if I uncomment the assignment statement? Why does the first file path remain unaffected?
I have some directories which I want to delete but one of the directory has a sub-directory that I want to keep.
Example:
Files
pictures
cat.png
icon.png
Music
song.mp3
Movies
First.mp4
I want to delete everything (directories and sub-directories) except the sub-directory pictures for example.
Right now I have this:
def Destroy_Path(path):
shutil.rmtree(path, ignore_errors=True)
Here is a solution. UNTESTED!
Note the use of os.walk()'s ability to change dirnames in-place to tell it not to recurse into a sub-directory, and the avoidance of using topdown=False which would break this feature:
When topdown is True, the caller can modify the dirnames list in-place
(perhaps using del or slice assignment), and walk() will only recurse
into the subdirectories whose names remain in dirnames; this can be
used to prune the search, [...]
Modifying dirnames when topdown is False has no effect on the behavior of the walk, because in bottom-up mode the directories in dirnames are generated before dirpath itself is generated.
See the docs for more information.
Also notable is the use of os.path.samefile() for path comparisons. This is available on Windows since version 3.2.
THIS IS UNTESTED!
USE AT YOUR OWN RISK!!
SERIOUSLY, BE CAREFUL!!!
import os
def gen_dir_paths_except(base_path, exceptions):
# behave like os.walk(): return nothing if the given path isn't a directory
if not os.path.isdir(base_path):
return
# keep only existing paths in exceptions, so that os.path.samefile() works
exceptions = list(filter(os.path.exists, exceptions))
# don't forget the base directory
if any(os.path.samefile(base_path, x) for x in exceptions):
return
yield base_path
for curr_dirpath, dirnames, filenames in os.walk(base_path):
# skip directories mentioned in exceptions
dirnames_to_skip = []
for dir_name in dirnames:
dir_path = os.path.join(curr_dirpath, dir_name)
if any(os.path.samefile(dir_path, x) for x in exceptions):
dirnames_to_skip.append(dir_name)
else:
yield dir_path
for dir_name in dirnames_to_skip:
dirnames.remove(dir_name)
def rmtree_except(path, exceptions):
# Check that the path is actually a directory. This is needed here
# because os.walk() will silently return nothing for a non-directory.
if not os.path.isdir(path):
if not os.path.exists(path):
raise OSError("No such directory: " + path)
else:
raise OSError("Not a directory: " + path)
# keep only existing paths in exceptions, so that os.path.samefile() works
exceptions = list(filter(os.path.exists, exceptions))
dirs = list(gen_dir_paths_except(path, exceptions))
# iterate through directories in bottom-up order
for dir_path in reversed(dirs):
filenames = [
x for x in os.listdir(dir_path)
if not os.path.isdir(os.path.join(dir_path, x))
]
for file_name in filenames:
# skip files mentioned in exceptions
file_path = os.path.join(dir_path, file_name)
if not any(os.path.samefile(file_path, x) for x in exceptions):
os.remove(file_path)
try:
os.rmdir(dir_path)
except OSError: # directory not empty
pass # just leave the directory
I am trying to list all the files in the current folder and also files in the folders of the current folder.
This is what I have been upto:
import os
def sendFnF(dirList):
for file in dirList:
if os.path.isdir(file):
print 'Going in dir:',file
dirList1= os.listdir('./'+file)
# print 'files in list', dirList1
sendFnF(dirList1)
print 'backToPrevDirectory:'
else:
print 'file name is',file
filename= raw_input()
dirList= os.listdir('./'+filename)
sendFnF(dirList)
This code does get me into folders of the current directory. But when it comes to sub-folders; it treats them as files.
Any idea what I am doing wrong?
Thanks in advance,
Sarge.
Prepending ./ to a path does essentially nothing. Also, just because you call a function recursively with a directory path doesn't change the current directory, and thus the meaning of . in a file path.
Your basic approach is right, to go down a directory use os.path.join(). It'd be best to restructure your code so you listdir() at the start of sendFnF():
def sendFnF(directory):
for fname in os.listdir(directory):
# Add the current directory to the filename
fpath = os.path.join(directory, fname)
# You need to check the full path, not just the filename
if os.path.isdir(fpath):
sendFnF(fpath)
else:
# ...
# ...
sendFnf(filename)
That said, unless this is an exercise, you can just use os.walk()
I have a directory structure that resembles the following:
Dir1
Dir2
Dir3
Dir4
L SubDir4.1
L SubDir4.2
L SubDir4.3
I want to generate a list of files (with full paths) that include all the contents of Dirs1-3, but only SubDir4.2 inside Dir4. The code I have so far is
import fnmatch
import os
for root, dirs, files in os.walk( '.' )
if 'Dir4' in dirs:
if not 'SubDir4.2' in 'Dir4':
dirs.remove( 'Dir4' )
for file in files
print os.path.join( root, file )
My problem is that the part where I attempt to exclude any file that does not have SubDir4.2 in it's path is excluding everything in Dir4, including the things I would like to remain. How should I amend that above to to do what I desire?
Update 1: I should add that there are a lot of directories below Dir4 so manually listing them in an excludes list isn't a practical option. I'd like to be able to specify SubDur4.2 as the only subdirectory within Dir4 to be read.
Update 2: For reason outside of my control, I only have access to Python version 2.4.3.
There are a few typos in your snippet. I propose this:
import os
def any_p(iterable):
for element in iterable:
if element:
return True
return False
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2', 'Dir3', 'Dir2'] # List all your included folder names in that
for root, dirs, files in os.walk( '.' ):
dirs[:] = [d for d in dirs if any_p(d in os.path.join(root, q_inc) for q_inc in include_dirs)]
for file in files:
print file
EDIT: According to comments, I have changed that so this is include list, instead of an exclude one.
EDIT2: Added a any_p (any() equivalent function for python version < 2.5)
EDIT3bis: if you have other subfolders with the same name 'SubDir4.2' in other folders, you can use the following to specify the location:
include_dirs = ['Dir4/SubDir4.2', 'Dir1/SubDir4.2']
Assuming you have a Dir1/SubDir4.2.
If they are a lot of those, then you may want to refine this approach with fnmatch, or probably a regex query.
I altered mstud's solution to give you what you are looking for:
import os;
for root, dirs, files in os.walk('.'):
# Split the root into its path parts
tmp = root.split(os.path.sep)
# If the lenth of the path is long enough to be your path AND
# The second to last part of the path is Dir4 AND
# The last part of the path is SubDir4.2 THEN
# Stop processing this pass.
if (len(tmp) > 2) and (tmp[-2] == 'Dir4') and (tmp[-1] != 'SubDir4.2'):
continue
# If we aren't in Dir4, print the file paths.
if tmp[-1] != 'Dir4':
for file in files:
print os.path.join(root, file)
In short, the first "if" skips the printing of any directory contents under Dir4 that aren't SubDir4.2. The second "if" skips the printing of the contents of the Dir4 directory.
for root, dirs, files in os.walk('.'):
tmp = root.split(os.path.sep)
if len(tmp)>2 and tmp[-2]=="Dir4" and tmp[-1]=="SubDir4.2":
continue
for file in files:
print os.path.join(root, file)
I'm just getting started with Python but already have found it much more productive than Bash shell scripting.
I'm trying to write a Python script that will traverse every directory that branches from the directory I launch the script in, and for each file it encounters, load an instance of this class:
class FileInfo:
def __init__(self, filename, filepath):
self.filename = filename
self.filepath = filepath
The filepath attribute would be the full absolute path from root (/). Here's the pseudocode mockup for what I'd like the main program to do:
from (current directory):
for each file in this directory,
create an instance of FileInfo and load the file name and path
switch to a nested directory, or if there is none, back out of this directory
I've been reading about os.walk() and ok.path.walk(), but I'd like some advice about what the most straightforward way to implement this in Python would be. Thanks in advance.
I'd use os.walk doing the following:
def getInfos(currentDir):
infos = []
for root, dirs, files in os.walk(currentDir): # Walk directory tree
for f in files:
infos.append(FileInfo(f,root))
return infos
Try
info = []
for path, dirs, files in os.walk("."):
info.extend(FileInfo(filename, path) for filename in files)
or
info = [FileInfo(filename, path)
for path, dirs, files in os.walk(".")
for filename in files]
to get a list of one FileInfo instance per file.
Try it
import os
for item in os.walk(".", "*"):
print(item)