Finding duplicate files with python

Finding duplicate files with python - python

I'm trying to write a Python script that will crawl through a directory and find all files that are duplicates and report back the duplicates. What's the best was to solve this?
import os, sys
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.join(path, subname) for path, dirnames, filenames in os.walk(directoryToCrawl) for subname in dirnames + filenames]
return crawledDirectory
#print 'Files crawled',crawlDirectories(sys.argv[1])
directoriesWithSize = {}
def getByteSize(crawledDirectory):
for eachFile in crawledDirectory:
size = os.path.getsize(eachFile)
directoriesWithSize[eachFile] = size
return directoriesWithSize
getByteSize(crawlDirectories(sys.argv[1]))
#print directoriesWithSize.values()
duplicateItems = {}
def compareSizes(dictionaryDirectoryWithSizes):
for key,value in dictionaryDirectoryWithSizes.items():
if directoriesWithSize.values().count(value) > 1:
duplicateItems[key] = value
compareSizes(directoriesWithSize)
#print directoriesWithSize.values().count(27085)
compareSizes(directoriesWithSize)
print duplicateItems
Why does this throw back this error?
Traceback (most recent call last):
File "main.py", line 16, in <module>
getByteSize(crawlDirectories(sys.argv[1]))
File "main.py", line 12, in getByteSize
size = os.path.getsize(eachFile)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/genericpath.py", line 49, in getsize
OSError: [Errno 2] No such file or directory: '../Library/Containers/com.apple.ImageKit.RecentPictureService/Data/Documents/iChats'

It seems to me that your crawledDirectory function is too complicated:
def crawlDirectories(directoryToCrawl):
output = []
for path, dirnames, filenames in os.walk(directoryToCrawl):
for fname in filenames:
output.append(os.path.join(path,fname))
return output

I'd suggest to try:
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.realpath(os.path.join(p, f))
for (p, d, f) in os.walk(directoryToCrawl)]
return crawledDirectory
That is, use a canonical path instead of relative paths in your crawl.

Related

I want to rename all the files in a folder with python

from glob import glob
from os import rename
import time
arrr = []
def getnames():
with open("name.txt", "r+") as nameFile:
for name in nameFile:
nameFile.readline()
newlinestrip = name.strip("\n")
arrr.append(newlinestrip)
renames()
def renames():
for fname in glob('*.png'):
print(fname)
for name in arrr:
time.sleep(1)
rename(fname, name)
print("bruh")
getnames()
It renames the 1st file and the it crashes with the error
Traceback (most recent call last):
File ".\rename.py", line 24, in <module>
getnames()
File ".\rename.py", line 13, in getnames
renames()
File ".\rename.py", line 20, in renames
rename(fname, name)
FileNotFoundError: [WinError 2] Den angivne fil blev ikke fundet: 'sans (100).png' -> 'acacia_door_top.png'
And i don't know how to fix this, i have a txt file with the new names that looks something like this
name.png
name1.png
and so on.

On way is to use zip() function if the len of both the files and arr is equal, However you could just the following if the intention is to rename all the files with names like name1.png, name2.png .. name608.png:
import os
for count, filename in enumerate(os.listdir("someDir")):
dst = "name" + str(count) + ".png"
src = 'someDir' + filename
dst = 'someDir' + dst
# rename() function will rename all the files
os.rename(src, dst)

os.walk find a directory which is not even present

I need to extract some values out of a file, i wrote the following code.
import os
import sys
rootdir='/home/nsingh/ansible-environments/aws'
for root, subdirs, files in os.walk(rootdir):
for j in subdirs:
print j
mypath=rootdir+'/'+j+'/inventory/group_vars/all'
#print mypath
fo=open(mypath,'r')
f=fo.readlines()
for line in f:
if ('isv_alias' in line or 'LMID' in line or 'products' in line):
path='/home/nsingh/krikCSV_fun.csv'
if('isv_alias' in line):
line=line.strip('isv_alias=')
line= line.strip('"')
elif('LMID' in line):
line=line.strip('LMID=')
else:
line=line.strip('products=')
fi= open(path,'a+')
fi.write(line)
fi.close()
fo.close()
the os.walk method somehow finds a hidden directory as well which is not actually present
loadgen
crapcity
rmstest2
suricatatest
.git
Traceback (most recent call last):
File "testme.py", line 9, in <module>
fo=open(mypath,'r')
IOError: [Errno 2] No such file or directory: '/home/nsingh/ansible-environments/aws/.git/inventory/group_vars/all'
OUTPUT:
: "suricatatest"^M
: suricatatest
: rms_ems_hosted
: 26
: rmstest2
: rms_scl
: 80
: suricatatest
: rms_ems_hosted
: 26
: "suricatatest"^M
: suricatatest
: rms_ems_hosted
: 26
I need the output as & also remove the semicolon:
suricatatest rms_ems_hosted 26

What makes you think that /.git doesn't exist?
Try this:
import os
rootdir = '/home/nsingh/ansible-environments/aws'
for root, subdirs, files in os.walk(rootdir):
for j in subdirs:
print(j)
my_path = rootdir + '/' + j + '/inventory/group_vars/all'
if os.path.isfile(my_path):
with open(my_path, 'r') as fo:
for line in fo.readlines():
if 'isv_alias' in line or 'LMID' in line or 'products' in line:
path = '/home/nsingh/krikCSV_fun.csv'
if 'isv_alias' in line:
line = line.strip('isv_alias=')
line = line.strip('"')
elif 'LMID' in line:
line = line.strip('LMID=')
else:
line = line.strip('products=')
with open(path, 'a+') as fi:
fi.write(line.lstrip(": "))

You should use os.path to make the file paths. os.walk will visit all the directories in the tree under the top directory - you are only interested in directories that end with 'inventory/group_vars', so check for that and take action. If you want to write the values as a group, you need to collect them in something.
import os, os.path, collections
rootdir = '/home/nsingh/ansible-environments/aws'
sub_folder = 'inventory/group_vars'
out_path = '/home/nsingh/krikCSV_fun.csv'
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
data = collections.defaultdict(list)
with open(os.join(dirpath, 'all')) as f, open(out_path, 'a+') as out:
for line in f:
if 'isv_alias' in line:
line = line.strip('isv_alias=')
line = line.strip('"')
data['isv_alias'].append(line)
elif 'LMID' in line:
line = line.strip('LMID=')
data['LMID'].append(line)
elif 'products' in line:
line = line.strip('products=')
data['products'].append(line)
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
I used a defaultdict to store multiple items of interest from a single file. If there is only one 'isv_alias', 'LMID', 'products' group in each file then you could just as easily store the information in a list or namedtuple.
You didn't provide an example of the file(s) so it's not clear what the line structure is. If it looks like this:
isv_alias="foo"
LMID=bar
products=26
It can be simplified to
keys = {'isv_alias', 'LMID', 'products'}
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
data = collections.defaultdict(list)
with open(os.join(dirpath, 'all')) as f, open(out_path, 'a+') as out:
for line in f:
line = line.strip()
key, value = line.split('=')
if key in keys:
value = value.strip('"')
data[key].append(value)
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
As long as you are accumulating the information in data, you can just open the output file once
data = collections.defaultdict(list)
keys = {'isv_alias', 'LMID', 'products'}
for dirpath, dirnames, filenames in os.walk(rootdir):
if dirpath.endswith(sub_folder):
with open(os.join(dirpath, 'all')) as f:
for line in f:
line = line.strip()
key, value = line.split('=')
if key in keys:
value = value.strip('"')
data[key].append(value)
with open(out_path, 'a+') as out:
for a, b, c in zip(*data.values()):
out.write('{},{},{}\n'format(a, b, c))
If using Python 3.6 or an ordered defaultdict then the solution above assumes the order of appearance of each key in the file is the order you want them written out.
If the file structure isn't ordered or the dictionary used isn't ordered, write to the file like this:
for a, b, c in zip(data['isv_alias'], data['LMID'], data['products']):
out.write('{},{},{}\n'format(a, b, c))

Python: Print file names and their directory based on their file size

I want to print filenames and their directory if their filesize is more than a certain amount. I wrote one and set the bar 1KB, but it doesn't work even if there are plenty of files larger than 1KB.
import os, shutil
def deleteFiles(folder):
folder = os.path.abspath(folder)
for foldername, subfolders, filenames in os.walk(folder):
for filename in filenames:
if os.path.getsize(filename) > 1000:
print(filename + ' is inside: ' + foldername)
deleteFiles('C:\\Cyber\\Downloads')
And I got 'Nothing'!
and then I wrote codes in interactive shell, I got following error:
Traceback (most recent call last):
File "<pyshell#14>", line 3, in <module>
if os.path.getsize(filename) > 100:
File "C:\Users\Cyber\Downloads\lib\genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError:
I am wondering How I can fix my code.

os can't find the file without a given path, following your code, you have to re-specify the absolute path. Replace
if os.path.getsize(filename) > 1000:
with
if os.path.getsize(os.path.abspath(foldername + "/" + filename)) > 1000:
And it should work.

Replace:
deleteFiles('C:\\Cyber\\Downloads')
with
import os
a = 'c:' # removed slash
b = 'Cyber' # removed slash
c = 'Downloads'
path = os.path.join(a + os.sep, b, c)
deleteFiles(path)

Python recursive directory reading without os.walk

I am trying to walk through tree of directories and search for data in output text files by defining a recursive function (and not using os.walk) in Python.
import os
def walkfn(dirname):
if os.path.exists('output'):
file1 = open('output')
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
newdir = os.chdir(path)
walkfn(newdir)
cwd = os.getcwd()
walkfn(cwd)
I am getting the following error:
Traceback (most recent call last):
File "/home/Python Work/Test2.py", line 24, in <module>
walkfn(cwd)
File "/home/Python Work/Test2.py", line 19, in walkfn
walkfn(newdir)
File "/home/Python Work/Test2.py", line 12, in walkfn
for name in os.listdir(dirname):
TypeError: coercing to Unicode: need string or buffer, NoneType found

os.chdir() returns None, not the new directory name. You pass that result to the recursive walkfn() function, and then to os.listdir().
There is no need to assign, just pass path to walkfn():
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
os.chdir(path)
walkfn(path)
You usually want to avoid changing directories; there is no need to if your code uses absolute paths:
def walkfn(dirname):
output = os.path.join(dirname, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
walkfn(path)

Python: Current directory in an os.walk

I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'

Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable

For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)

It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Finding duplicate files with python - python

It seems to me that your crawledDirectory function is too complicated: def crawlDirectories(directoryToCrawl): output = [] for path, dirnames, filenames in os.walk(directoryToCrawl): for fname in filenames: output.append(os.path.join(path,fname)) return output

I'd suggest to try: def crawlDirectories(directoryToCrawl): crawledDirectory = [os.path.realpath(os.path.join(p, f)) for (p, d, f) in os.walk(directoryToCrawl)] return crawledDirectory That is, use a canonical path instead of relative paths in your crawl.

Related

I want to rename all the files in a folder with python

os.walk find a directory which is not even present

Python: Print file names and their directory based on their file size

Python recursive directory reading without os.walk

Python: Current directory in an os.walk

Categories

Resources