Python recursive directory reading without os.walk - python

I am trying to walk through tree of directories and search for data in output text files by defining a recursive function (and not using os.walk) in Python.
import os
def walkfn(dirname):
if os.path.exists('output'):
file1 = open('output')
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
newdir = os.chdir(path)
walkfn(newdir)
cwd = os.getcwd()
walkfn(cwd)
I am getting the following error:
Traceback (most recent call last):
File "/home/Python Work/Test2.py", line 24, in <module>
walkfn(cwd)
File "/home/Python Work/Test2.py", line 19, in walkfn
walkfn(newdir)
File "/home/Python Work/Test2.py", line 12, in walkfn
for name in os.listdir(dirname):
TypeError: coercing to Unicode: need string or buffer, NoneType found

os.chdir() returns None, not the new directory name. You pass that result to the recursive walkfn() function, and then to os.listdir().
There is no need to assign, just pass path to walkfn():
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
os.chdir(path)
walkfn(path)
You usually want to avoid changing directories; there is no need to if your code uses absolute paths:
def walkfn(dirname):
output = os.path.join(dirname, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
walkfn(path)

Related

I have to run my code twice to get desired outcome, why?

I'm finding that I have to run my code twice for the desired output and I'm not sure why. It's also printing a long string of letters in the shell that aren't needed. I'd just like it to be a bit cleaner.
The code creates folders with subfolders, based on files names, then moves the files into specific subfolders.
Filename example is "A123456-20190101-A01.mp3"
import os
import shutil
path = "/Volumes/ADATA UFD/For script/Files"
file_names = [file for file in os.listdir(path) if
os.path.isfile(os.path.join(path, file))]
file_map = {'O':'1-Original','P':'2-PreservationMaster','M':'3-Mezzanine','T':'4-Presentation','A':'5-Access','R':'6-Reference'}
parent_folders = set(file_name.rsplit('-', 1)[0] for file_name in file_names)
sub_folders = ['1-Original','2-PreservationMaster','3-Mezzanine','4-Presentation','5-Access','6-Reference']
for folder in parent_folders:
folder_path = os.path.join(path, folder)
try:
os.mkdir(folder_path)
except:
print('folder already exist:', folder_path)
for folders in sub_folders:
try:
folders_path = os.path.join(folder_path, folders)
os.mkdir(folders_path)
except:
print('folder already exists:', folders_path)
for file_name in file_names:
parent_folder = file_name.rsplit('-', 1)[0]
ext = file_name[19]
print(ext)
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
src = os.path.join(path, file_name)
try:
shutil.move(src, dest)
except Exception as e:
print(e)
I'm getting this error message:
Traceback (most recent call last):
File "/Volumes/ADATA UFD/For script/MoveFilesToPreservationBundleTest3.py", line 30, in <module>
dest = os.path.join(path, parent_folder, file_map[ext.upper()], file_name)
builtins.KeyError: '0'

How do I rename multiple files in Python, using part of the existing name?

I have a few hundred .mp4 files in a directory. When originally created their names were set as "ExampleEventName - Day 1", "ExampleEventName - Day 2" etc. thus they are not in chronological order.
I need a script to modify each of their names by taking the last 5 characters in the corresponding string and add it to the front of the name so that File Explorer will arrange them properly.
I tried using the os module .listdir() and .rename() functions, inside a for loop. Depending on my input I get either a FileNotFoundError or a TypeError:List object is not callable.
import os
os.chdir("E:\\New folder(3)\\New folder\\New folder")
for i in os.listdir("E:\\New folder(3)\\New folder\\New folder"):
os.rename(i, i[:5] +i)
Traceback (most recent call last):
File "C:/Python Projects/Alex_I/venv/Alex_OS.py", line 15, in <module>
os.rename(path + i, path + i[:6] +i)
FileNotFoundError: [WinError 2] The system cannot find the file specified:
import os, shutil
file_list = os.listdir("E:\\New folder(3)\\New folder\\New folder")
for file_name in file_list("E:\\New folder(3)\\New folder\\New folder"):
dst = "!#" + " " + str(file_name) #!# meant as an experiment
src = "E:\\New folder(3)\\New folder\\New folder" + file_name
dst = "E:\\New folder(3)\\New folder\\New folder" + file_name
os.rename(src, dst)
file_name +=1
Traceback (most recent call last):
File "C:/Python Projects/Alex_I/venv/Alex_OS.py", line 14, in <module>
for file_name in file_list("E:\\New folder(3)\\New folder\\New folder"):
TypeError: 'list' object is not callable
Some other approach:
Not based on based length ( 5 for subname )
import glob
import os
# For testing i created 99 files -> asume last 5 chars but this is wrong if you have more files
# for i in range(1, 99):
# with open("mymusic/ExampleEventName - Day {}.mp4".format(i), "w+") as f:
# f.flush()
# acording to this i will split the name at - "- Day X"
files = sorted(glob.glob("mymusic/*"))
for mp4 in files:
# split path from file and return head ( path ), tail ( filename )
head, tail = os.path.split(mp4)
basename, ext = os.path.splitext(tail)
print(head, tail, basename)
num = [int(s) for s in basename.split() if s.isdigit()][0] #get the number extracted
newfile = "{}\\{}{}{}".format(head, num, basename.rsplit("-")[0][:-1], ext) # remove - day x and build filename
print(newfile)
os.rename(mp4, newfile)
You're having multiple problems:
You're trying to increment a value that should not be incremented. Also you've created the list file_list, and thus it should not take any arguments anymore.
When using the syntax:
for x in y:
you do not have to increment the value. It will simply iterate through the list until there is no more left.
Therefore you simply have to leave out the incrementation and iterate through the list file_list.
import os, shutil
file_list = os.listdir("E:\\New folder(3)\\New folder\\New folder")
for file_name in file_list: #removed the argument, the as file_list is a list and thus not callable.
dst = "!#" + " " + str(file_name) #!# meant as an experiment
src = "E:\\New folder(3)\\New folder\\New folder" + file_name
dst = "E:\\New folder(3)\\New folder\\New folder" + file_name
os.rename(src, dst)
#file_name +=1 removed this line
Now your solution should work.

Renaming file with os.rename causing NameError

I'm trying to rename 2 raster files: old_name.jpg and old_name.tiff to new_name.jpg and new_name.tiff:
new_name = 'new_name' # defining new name here
for root_dir, dirname, filenames in os.walk(TargetDir):
for file in filenames:
if re.match(r'.*.jpg$', file, re.IGNORECASE) is not None: # converting jpg
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".jpg"))
if re.match(r'.*.tiff$', file, re.IGNORECASE) is not None: # converting tiff
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".tiff"))
It works on jpg like charm, but then throws
Traceback (most recent call last):
File "C:/!Scripts/py2/meta_to_BKA.py", line 66, in <module>
os.rename(os.path.join(root_dir, file), os.path.join(root_dir, new_name + ".tiff"))
NameError: name 'new_name' is not defined
Note that it uses new_name to rename jpg, but then variable vanishes in the very next block. I tried using shutil.move(), but got the same error. What is the problem?
The stack trace suggests that your snippet isn't the whole story.
I can't reproduce:
from __future__ import division, print_function, unicode_literals
import os
TargetDir = '/tmp/test'
new_name = 'new_name'
def main():
for root_dir, _, filenames in os.walk(TargetDir):
for filename in filenames:
if '.' not in filename:
continue
endswith = filename.rsplit('.', 1)[-1].lower()
if endswith not in set(['jpg', 'tiff']):
continue
new_filename = '{}.{}'.format(new_name, endswith)
from_fn = os.path.join(root_dir, filename)
to_fn = os.path.join(root_dir, new_filename)
print ('Moving', from_fn, 'to', to_fn)
os.rename(from_fn, to_fn)
if __name__ == '__main__':
main()
but I took the liberty of rewriting a bit.
> python hest.py
Moving /tmp/test/narf.jpg to /tmp/test/new_name.jpg
Moving /tmp/test/bla.tiff to /tmp/test/new_name.tiff

Having Trouble outputting to a file

I have this simple code here
import os
path = (raw_input("Enter dir: "))
f = open('script_list.log', 'w')
for dirpath, dirname, filenames in os.walk(path):
for filename in [f for f in filenames]:
f.write(str(filename) + "\n")
print os.path.join(dirpath, filename)
When I run it I am getting the following
Enter dir: scripts
Traceback (most recent call last):
File "C:\Documents and Settings\CRichards\My Documents\My Dropbox\this_code.py", line 8, in <module>
f.write(str(filename) + "\n")
AttributeError: 'str' object has no attribute 'write'
I know it must be something simple, I just can't see it.
You're rebinding f in the loop when you do [f for f in filenames]. When you get to the point where f.write is called, f is the last member of filenames, so it's a string. Rename the outer f to something like log or output, or better, get rid of the useless list comprehension:
for file name in filenames:
suffices.
(List comprehensions don't introduce a new scope.)
for filename in [f for f in filenames]:
should instead be
for filename in filenames:

Python: Current directory in an os.walk

I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable
For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)
It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

Categories