Having Trouble outputting to a file - python

I have this simple code here
import os
path = (raw_input("Enter dir: "))
f = open('script_list.log', 'w')
for dirpath, dirname, filenames in os.walk(path):
for filename in [f for f in filenames]:
f.write(str(filename) + "\n")
print os.path.join(dirpath, filename)
When I run it I am getting the following
Enter dir: scripts
Traceback (most recent call last):
File "C:\Documents and Settings\CRichards\My Documents\My Dropbox\this_code.py", line 8, in <module>
f.write(str(filename) + "\n")
AttributeError: 'str' object has no attribute 'write'
I know it must be something simple, I just can't see it.

You're rebinding f in the loop when you do [f for f in filenames]. When you get to the point where f.write is called, f is the last member of filenames, so it's a string. Rename the outer f to something like log or output, or better, get rid of the useless list comprehension:
for file name in filenames:
suffices.
(List comprehensions don't introduce a new scope.)

for filename in [f for f in filenames]:
should instead be
for filename in filenames:

Related

Python: Print file names and their directory based on their file size

I want to print filenames and their directory if their filesize is more than a certain amount. I wrote one and set the bar 1KB, but it doesn't work even if there are plenty of files larger than 1KB.
import os, shutil
def deleteFiles(folder):
folder = os.path.abspath(folder)
for foldername, subfolders, filenames in os.walk(folder):
for filename in filenames:
if os.path.getsize(filename) > 1000:
print(filename + ' is inside: ' + foldername)
deleteFiles('C:\\Cyber\\Downloads')
And I got 'Nothing'!
and then I wrote codes in interactive shell, I got following error:
Traceback (most recent call last):
File "<pyshell#14>", line 3, in <module>
if os.path.getsize(filename) > 100:
File "C:\Users\Cyber\Downloads\lib\genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError:
I am wondering How I can fix my code.
os can't find the file without a given path, following your code, you have to re-specify the absolute path. Replace
if os.path.getsize(filename) > 1000:
with
if os.path.getsize(os.path.abspath(foldername + "/" + filename)) > 1000:
And it should work.
Replace:
deleteFiles('C:\\Cyber\\Downloads')
with
import os
a = 'c:' # removed slash
b = 'Cyber' # removed slash
c = 'Downloads'
path = os.path.join(a + os.sep, b, c)
deleteFiles(path)

Saving image header information to a text file

I need to extract image header information from multiple JPG files to a text or log file, however when I run the code below I receive an error:
for root, dirs, filenames in os.walk(topdir):
for f in filenames:
print(topdir)
print(f)
log = open(topdir + f, 'r')
data = p.get_json(log)
formatted_data =(( json.dumps(data, sort_keys=True,indent=4, separators=(',', ':')) ))
print(data)
print ("There are " + str(len(header_dict)) + " items on the menu.")
I get the following error when I run:
C:/Users/richie/Desktop/work/imagej/test images and files/XX1
image_D2016-02-03T15-27-56-763207Z_4.jpg
Traceback (most recent call last):
File "C:\Users\richie\Desktop\work\header_dir.py", line 25, in <module>
log = open(topdir + f, 'r')
FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/richie/Desktop/work/imagej/test images and files/XX1image_D2016-02- 03T15-27-56-763207Z_4.jpg'
How do I open image files to allow the function in the for loop to run against it?
Your problem lies in this code;
topdir + f
First, you should use join on paths, not +. The latter doesn't insert the separator between the path and file.
Second, you should join a filename with root, not with topdir.
for root, dirs, files in os.walk(topdir):
paths = [os.path.join(root, f) for f in files]
for p in paths:
log = open(p)
# et cetera
Working code:
import pyexifinfo as x
import json
import os
from tkinter import *
from tkinter.filedialog import askopenfilename
def askdirectory():
dirname = filedialog.askdirectory()
return dirname
topdir = askdirectory()
for root, dirs, files in os.walk(topdir):
paths = [os.path.join(root, f) for f in files]
for p in paths:
data = x.get_csv(p)
print(p)
print(data)
formatted_data =((json.dumps(data, sort_keys=True,indent=4, separators=(',', ':')) ))
f = open('Xheader_info_XML.txt','a')
f.write(p)
f.write(formatted_data)
f.close()

Python recursive directory reading without os.walk

I am trying to walk through tree of directories and search for data in output text files by defining a recursive function (and not using os.walk) in Python.
import os
def walkfn(dirname):
if os.path.exists('output'):
file1 = open('output')
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
newdir = os.chdir(path)
walkfn(newdir)
cwd = os.getcwd()
walkfn(cwd)
I am getting the following error:
Traceback (most recent call last):
File "/home/Python Work/Test2.py", line 24, in <module>
walkfn(cwd)
File "/home/Python Work/Test2.py", line 19, in walkfn
walkfn(newdir)
File "/home/Python Work/Test2.py", line 12, in walkfn
for name in os.listdir(dirname):
TypeError: coercing to Unicode: need string or buffer, NoneType found
os.chdir() returns None, not the new directory name. You pass that result to the recursive walkfn() function, and then to os.listdir().
There is no need to assign, just pass path to walkfn():
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
os.chdir(path)
walkfn(path)
You usually want to avoid changing directories; there is no need to if your code uses absolute paths:
def walkfn(dirname):
output = os.path.join(dirname, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
walkfn(path)

Finding duplicate files with python

I'm trying to write a Python script that will crawl through a directory and find all files that are duplicates and report back the duplicates. What's the best was to solve this?
import os, sys
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.join(path, subname) for path, dirnames, filenames in os.walk(directoryToCrawl) for subname in dirnames + filenames]
return crawledDirectory
#print 'Files crawled',crawlDirectories(sys.argv[1])
directoriesWithSize = {}
def getByteSize(crawledDirectory):
for eachFile in crawledDirectory:
size = os.path.getsize(eachFile)
directoriesWithSize[eachFile] = size
return directoriesWithSize
getByteSize(crawlDirectories(sys.argv[1]))
#print directoriesWithSize.values()
duplicateItems = {}
def compareSizes(dictionaryDirectoryWithSizes):
for key,value in dictionaryDirectoryWithSizes.items():
if directoriesWithSize.values().count(value) > 1:
duplicateItems[key] = value
compareSizes(directoriesWithSize)
#print directoriesWithSize.values().count(27085)
compareSizes(directoriesWithSize)
print duplicateItems
Why does this throw back this error?
Traceback (most recent call last):
File "main.py", line 16, in <module>
getByteSize(crawlDirectories(sys.argv[1]))
File "main.py", line 12, in getByteSize
size = os.path.getsize(eachFile)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/genericpath.py", line 49, in getsize
OSError: [Errno 2] No such file or directory: '../Library/Containers/com.apple.ImageKit.RecentPictureService/Data/Documents/iChats'
It seems to me that your crawledDirectory function is too complicated:
def crawlDirectories(directoryToCrawl):
output = []
for path, dirnames, filenames in os.walk(directoryToCrawl):
for fname in filenames:
output.append(os.path.join(path,fname))
return output
I'd suggest to try:
def crawlDirectories(directoryToCrawl):
crawledDirectory = [os.path.realpath(os.path.join(p, f))
for (p, d, f) in os.walk(directoryToCrawl)]
return crawledDirectory
That is, use a canonical path instead of relative paths in your crawl.

Python: Current directory in an os.walk

I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable
For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)
It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

Categories