PYTHON ghostscript loop only executes once - python

I have a folder called pdfs. I first obtain a list of the files and print them:
import ghostscript, os
from os import listdir
from os.path import isfile, join
def get_files(path):
input_files = [f for f in listdir(path) if isfile(join(path, f))]
return input_files
def pdf2jpeg(pdf_input_path, jpeg_output_path):
args = ["pdf2jpeg", # actual value doesn't matter
"-dNOPAUSE",
"-sDEVICE=jpeg",
"-dJPEGQ=95",
"-r600x600",
"-sOutputFile=" + jpeg_output_path,
pdf_input_path]
ghostscript.Ghostscript(*args)
if __name__ == '__main__':
input_files = get_files("pdfs")
# pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg")
for input_file in input_files:
input_file_name = str("pdfs/"+str(input_file))
output_file_name = str('jpgs/'+str(input_file).replace(" ", "_").replace("pdf", "jpg"))#split(".")[0]
print input_file_name
print output_file_name
# pdf2jpeg(input_file_name, output_file_name)
OUTPUT:
pdfs/test1 (5th copy).pdf
jpgs/test1_(5th_copy).jpg
pdfs/test1 (copy).pdf
jpgs/test1_(copy).jpg
pdfs/test1 (4th copy).pdf
jpgs/test1_(4th_copy).jpg
pdfs/test1 (3rd copy).pdf
jpgs/test1_(3rd_copy).jpg
pdfs/test1 (another copy).pdf
jpgs/test1_(another_copy).jpg
Also when i execute pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg") the code works and I get the converted jpg.
Now when I want to loop through the list and uncoment the last line:pdf2jpeg(input_file_name, output_file_name)
if __name__ == '__main__':
input_files = get_files("pdfs")
# pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg")
for input_file in input_files:
input_file_name = str("pdfs/"+str(input_file))
output_file_name = str('jpgs/'+str(input_file).replace(" ", "_").replace("pdf", "jpg"))#split(".")[0]
print input_file_name
print output_file_name
pdf2jpeg(input_file_name, output_file_name)
I GET THIS ERROR:
Traceback (most recent call last):
File "gsPdf2Jpg.py", line 28, in <module>
pdf2jpeg(input_file_name, output_file_name)
File "gsPdf2Jpg.py", line 17, in pdf2jpeg
ghostscript.Ghostscript(*args)
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/__init__.py", line 157, in Ghostscript
stderr=kw.get('stderr', None))
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/__init__.py", line 72, in __init__
rc = gs.init_with_args(instance, args)
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/_gsprint.py", line 177, in init_with_args
raise GhostscriptError(rc)
ghostscript._gsprint.GhostscriptError: limitcheck
I went through and changed the loop to only go through individual input_files[0], input_files[1] and they work, the moment I loop them all they stop working. The only thing I can think of is that I have to clear something from the memory, or disconnect from the file. I am just taking wild guesses here.

Related

IOError: [Errno 2] No such file or directory?

I'm trying to move 220 files in Wheat to train_reuters file package,and the another files in wheat move to train_reuters test_reuters file package,but when I run the code,it give me the error,I actually have the file in the right place!how can I solve the problem?
#!/usr/bin/python
#coding:utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import os
import os.path
import shutil
import random
path = '/home/user1/zhouchun/lda/reuters-21578/Wheat'
targetpath1 = '/home/user1/zhouchun/lda/reuters-21578/train_reuters'
targetpath2 = '/home/user1/zhouchun/lda/reuters-21578/test_reuters'
list=random.sample(range(1, 306),220)
for i in list:
file_dir = os.path.join(path, str(i))
# print file_dir
shutil.move(file_dir, targetpath1)
files = os.listdir(path)
for file in files:
# print file
dir = os.path.join(path, file)
if dir != file_dir:
shutil.move(dir, targetpath2)
I checked your code, it is right.
Then the problem maybe:
1. you could only run your code one time, two or more times will cause this error.
2. Before you run your code, make sure all the 306 files are in Wheat directory.
I suggest using copy, but not move,then clear the train and test file before each run.
Please check the ome/user1/zhouchun/lda/reuters-21578/Wheat files number is 305.
I created a function write the random file, the code you can reference.
import random
import os
path = r'E:\temp\temp'
list= random.sample(range(1, 306), 220)
for i in list:
file_dir = os.path.join(path, str(i))
with open(file_dir, 'w') as f:
f.write('file_dir: %s' % file_dir)
f.close()
please notices the 220 in line list= random.sample(range(1, 306), 220).
After do this paste you code and change the path,
#!/usr/bin/python
#coding:utf-8
import sys
import os.path
import shutil
import random
import time
path = r'E:\temp\temp'
targetpath1 = r'E:\temp\old'
targetpath2 = r'E:\temp\new'
# move the file
list = random.sample(range(1, 306), 220)
for i in list:
file_dir = os.path.join(path, str(i))
# print file_dir
# targetpath1_dir = os.path.join(targetpath1, str(i))
shutil.move(file_dir, targetpath1)
files = os.listdir(path)
for file in files:
# print(file)
# print file
dir = os.path.join(path, file)
if dir != file_dir:
shutil.move(dir, targetpath2)
Than run the code, the error information will income.
Traceback (most recent call last):
File "D:\Python_3.5\lib\shutil.py", line 544, in move
os.rename(src, real_dst)
FileNotFoundError: [WinError 2] System can't found the file.: 'E:\\temp\\temp\\182' -> 'E:\\temp\\old\\182'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/Python_Code/FaceDetect/123123.py", line 31, in <module>
shutil.move(file_dir, targetpath1)
File "D:\Python_3.5\lib\shutil.py", line 558, in move
copy_function(src, real_dst)
File "D:\Python_3.5\lib\shutil.py", line 257, in copy2
copyfile(src, dst, follow_symlinks=follow_symlinks)
File "D:\Python_3.5\lib\shutil.py", line 120, in copyfile
with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: 'E:\\temp\\temp\\182'
After change the number from 220 to 305 in line list= random.sample(range(1, 306), 220), the error will disappear.
The complete code.
#!/usr/bin/python
#coding:utf-8
import sys
import os.path
import shutil
import random
import time
path = r'E:\temp\temp'
targetpath1 = r'E:\temp\old'
targetpath2 = r'E:\temp\new'
# create the random file.
list= random.sample(range(1, 306), 220)
for i in list:
file_dir = os.path.join(path, str(i))
with open(file_dir, 'w') as f:
f.write('file_dir: %s' % file_dir)
f.close()
time.sleep(1)
# move the file
list = random.sample(range(1, 306), 220)
for i in list:
file_dir = os.path.join(path, str(i))
# print file_dir
# targetpath1_dir = os.path.join(targetpath1, str(i))
shutil.move(file_dir, targetpath1)
files = os.listdir(path)
for file in files:
# print(file)
# print file
dir = os.path.join(path, file)
if dir != file_dir:
shutil.move(dir, targetpath2)
Please reference.

Issue with moving a file using shutil.move()

I am trying to move a file using the shutil module in Python. I keep getting this error:
Traceback (most recent call last):
File "<input>", line 31, in <module>
File "C:\Python27\lib\shutil.py", line 302, in move
copy2(src, real_dst)
File "C:\Python27\lib\shutil.py", line 130, in copy2
copyfile(src, dst)
File "C:\Python27\lib\shutil.py", line 82, in copyfile
with open(src, 'rb') as fsrc:
IOError: [Errno 2] No such file or directory: 'MLR_Resi_Customer.txt'
I do not understand why I am getting no such file or directory. If instead of using a shutil.move(filename, new_dest) it will print the file name I am looking for.
import shutil
import os
import datetime
# set location of folders and files needed
source = '//nspinfwcipp01/BSA/marketing'
dest_cust = '//nspinfwcipp01/BSA/marketing/res_cust'
dest_pros = '//nspinfwcipp01/BSA/marketing/res_pros'
dest_win = '//nspinfwcipp01/BSA/marketing/res_win'
# create date time stamp of now
dt = str(datetime.datetime.now())
files = os.listdir(source)
print files
# create new path to storage files for old data
cust_files = os.listdir(dest_cust)
pros_files = os.listdir(dest_pros)
win_files = os.listdir(dest_win)
# new file names
new_cust = 'MLR_Resi_Customers'+dt+'.txt'
new_pros = 'MLR_Resi_Prospects'+dt+'.txt'
new_win = 'MLR_Resi_Winbacks'+dt+'.txt'
#move files from marketing folder into their own folder when after done processing
for f in files:
if (f.endswith("Customer.txt")):
print f
shutil.move(f,dest_cust)
elif (f.endswith("Prospects")):
#print f
shutil.move(f,dest_pros)
elif (f.endswith("Winbacks")):
#print f
shutil.move(f,dest_win)
##rename files in storage with data for Kalyan and Jake's Project
## rename customer file for storage
for x in cust_files:
if (x.endswith("Customers")):
#print x
new_cust = 'MLR_Resi_Customer'+dt+'.txt'
os.rename('MLR_Resi_Customers.txt', new_cust)
else:
print "no_customer_file"
## rename prospect file for storage
for x in cust_files:
if (x.endswith("Prospects")):
#print x
os.rename('MLR_Resi_Prospects.txt', new_pros)
else:
print "no_prospect_file"
## rename winback file for storage
for x in cust_files:
if (x.endswith("Winbacks")):
#print x
os.rename('MLR_Resi_Winbacks.txt', new_win)
else:
print "no_winback_file"
So I am not sure what I am doing wrong. The path to the files is correct and It seems to be able to print the file name just fine. Any help with those issues above is greatly appreciated.
Use shutil.move(glob.glob(f)[0],dest_whatever) and that should solve your problem by giving it an actual path to the file, although, if the file doesn't exist glob.glob will return an empty array.

Reading from multiple files and storing data in a list

I am trying to read print search for all files in a directory and store contents in each file in a list to be used.
My problem is when i use print to debug if the file exists, it prints out the current file or first file in the list. However, It complains that file is not found when i try to read from this file
import re
import os
# Program to extract emails from text files
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return os.listdir('/home/holy/thinker/leads') # returns a list like ["file1.txt", 'image.gif'] # need to remove trailing slashes
# read a file as 1 big string
def in_file():
print path_file()
content = []
for a_file in path_file(): # ['add.txt', 'email.txt']
print a_file
fin = open(a_file, 'r')
content.append(fin.read()) # store content of each file
print content
fin.close()
return content
print in_file()
# this is the error i get
""" ['add.txt', 'email.txt']
add.txt
Traceback (most recent call last):
File "Extractor.py", line 24, in <module>
print in_file()
File "Extractor.py", line 17, in in_file
fin = open(a_file, 'r')
IOError: [Errno 2] No such file or directory: 'add.txt'
"""
The error I get is aboive
os.listdir will return you only file name. You have to directory name on before that file name.
Its trying to open add.txt in same directory where you ran your program. Please add directory name before file name.
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return [os.path.join(path, x) for x in os.listdir(path)]
you should use the full path of the file you want to read.
so please do fin = open(os.path.join(r'/home/holy/thinker/leads/', a_file), 'r')
Here's a rewrite using glob to limit which files are considered;
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
else:
# Python 3.xrange
inp = input
def get_dir(prompt):
while True:
dir_name = inp(prompt)
dir_name = os.path.join(os.getcwd(), dir_name)
if os.path.isdir(dir_name):
return dir_name
else:
print("{} does not exist or is not a directory".format(dir_name))
def files_in_dir(dir_name, file_spec="*.txt"):
return glob.glob(os.path.join(dir_name, file_spec))
def file_iter(files):
for fname in files:
with open(fname) as inf:
yield fname, inf.read()
def main():
email_dir = get_dir("Please enter email directory: ")
email_files = files_in_dir(email_dir, "*.eml")
print(email_files)
content = [txt for fname,txt in file_iter(email_files)]
print(content)
if __name__=="__main__":
main()
and a trial run looks like
Please enter email directory: c:\temp
['c:\\temp\\file1.eml', 'c:\\temp\\file2.eml']
['file1 line one\nfile1 line two\nfile1 line three',
'file2 line one\nfile2 line two']

Finding duplicate files with python

I'm trying to write a Python script that will crawl through a directory and find all files that are duplicates and report back the duplicates. What's the best was to solve this?
import os, sys
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.join(path, subname) for path, dirnames, filenames in os.walk(directoryToCrawl) for subname in dirnames + filenames]
return crawledDirectory
#print 'Files crawled',crawlDirectories(sys.argv[1])
directoriesWithSize = {}
def getByteSize(crawledDirectory):
for eachFile in crawledDirectory:
size = os.path.getsize(eachFile)
directoriesWithSize[eachFile] = size
return directoriesWithSize
getByteSize(crawlDirectories(sys.argv[1]))
#print directoriesWithSize.values()
duplicateItems = {}
def compareSizes(dictionaryDirectoryWithSizes):
for key,value in dictionaryDirectoryWithSizes.items():
if directoriesWithSize.values().count(value) > 1:
duplicateItems[key] = value
compareSizes(directoriesWithSize)
#print directoriesWithSize.values().count(27085)
compareSizes(directoriesWithSize)
print duplicateItems
Why does this throw back this error?
Traceback (most recent call last):
File "main.py", line 16, in <module>
getByteSize(crawlDirectories(sys.argv[1]))
File "main.py", line 12, in getByteSize
size = os.path.getsize(eachFile)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/genericpath.py", line 49, in getsize
OSError: [Errno 2] No such file or directory: '../Library/Containers/com.apple.ImageKit.RecentPictureService/Data/Documents/iChats'
It seems to me that your crawledDirectory function is too complicated:
def crawlDirectories(directoryToCrawl):
output = []
for path, dirnames, filenames in os.walk(directoryToCrawl):
for fname in filenames:
output.append(os.path.join(path,fname))
return output
I'd suggest to try:
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.realpath(os.path.join(p, f))
for (p, d, f) in os.walk(directoryToCrawl)]
return crawledDirectory
That is, use a canonical path instead of relative paths in your crawl.

Python: Current directory in an os.walk

I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable
For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)
It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

Categories