Python: Current directory in an os.walk - python

I need to get the current directory in an os.walk process. It works when there is just one subdirectory level but fails when there's more. Please advise...
[CODE]
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, glob, arcpy, csv, sys, shutil, datetime
top = r'L:\Raster_Data\Topographic_Maps'
RootOutput = r'L:\Raster_Data\Topographic_Maps'
#FileList = csv.reader(open('FileList.csv'))
SearchString=['Temp_Pol', 'Spatial_Ex']
filecount=0
successcount=0
errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
#for File in FileList:
for root, dirs, files in os.walk(top, topdown=False):
#for directory in dirs:
for file in files:
#currentPath=os.path.join(root,directory)
currentPath=os.path.abspath(file)
os.chdir(currentPath)
#arcpy.env.workspace = currentPath
#print os.getcwd()
lstFCs = glob.glob('*'+SearchString[0]+'*.shp')
#print lstFCs
OutPutDir=os.path.abspath(currentPath)
for fc in lstFCs:
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
lstFCs = glob.glob('*'+SearchString[1]+'*.shp')
#print lstFCs
for fc in lstFCs:
OutPutDir=RootOutput+"\\"+directory
filecount=filecount+1
list.append(OutPutDir+"\\"+fc)
print 'Merging: ' + str(list)
#arcpy.Merge_management(list, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()
so the list should be appended with the fc and the full path to it but just gets the root path and the final part of the path -not the directories in between.
Thanks for your advise,
[Error Messages]
Working in: L:\Raster_Data\Topographic_Maps Merging:
['L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp',
'L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp',
'L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp',
'L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp',
'L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp'] Traceback
(most recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 64, in
arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
File "C:\Program Files\ArcGIS\Desktop10.0\arcpy\arcpy\management.py",
line 3124, in Merge
raise e ExecuteError: Failed to execute. Parameters are not valid.
ERROR 000732: Input Datasets: Dataset
L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\ecw\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\ecw\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SC55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SD55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SE54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\100K\2010_100K\Map_Sheets_BestResolution\qld_north\SE55\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\SE55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF54\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SF56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG55\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SG56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\SH56\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\Tablelands_100K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\200DPI\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\1M\prj_Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\1M\Temp_Polygon_Extent_0.shp;L:\Raster_Data\Topographic_Maps\250K\prj_Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Temp_Polygon_Extent_1.shp;L:\Raster_Data\Topographic_Maps\250K\Spatial_Extent.shp;L:\Raster_Data\Topographic_Maps\5M\prj_Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Temp_Polygon_Extent_2.shp;L:\Raster_Data\Topographic_Maps\5M\Spatial_Extent.shp
does not exist or is not supported Failed to execute (Merge).
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
directory 'SC54'
dirs ['SC54', 'SC55', 'SD54', 'SD55', 'SE54', 'SE55']
os.path.abspath(dirs[0])
'L:\Raster_Data\Topographic_Maps\ecw\SC54'
os.getcwd() 'L:\Raster_Data\Topographic_Maps\ecw'
Working in: L:\Raster_Data\Topographic_Maps Traceback (most
recent call last): File
"L:\Raster_Data\Topographic_Maps\CreateFileList.py", line 28, in
os.chdir(currentPath) WindowsError: [Error 2] The system cannot
find the file specified:
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'
file '7178cp_dd.ers'
os.path.abspath
os.path.abspath(file)
'L:\Raster_Data\Topographic_Maps\7178cp_dd.ers'

Thanks all, I used the input from the forum to complete the script. It's below for anyone who wants it. best,
# AFFECTS everything reachable from the directory named in "top",
# assuming there are no symbolic links.
# CAUTION: This is dangerous! For example, if top == '/', it
# could affect all your disk files.
import os, arcpy, sys, datetime
top = os.getcwd()
RootOutput = top
FileTypes=['shp']
SearchStrings=['Temp_Pol', 'Spatial_Ex']
filecount=0
#successcount=0
#errorcount=0
print "Working in: "+os.getcwd()
list =[]
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write("Log of files Succesfully processed. RESULT of process run #:"+str(datetime.datetime.now())+"\n")
f.close()
for root, dirs, files in os.walk(top, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
for FileType in FileTypes:
status= str.endswith(currentFile,FileType)
if str(status) == 'True':
for SearchString in SearchStrings:
if str(SearchString in currentFile) == 'True':
#print str(currentFile)+str(status)
filecount=filecount+1
list.append(currentFile)
print 'Merging: ' + str(list)
#Replace with any function you want to carry out on the generated list of files.
#arcpy.Merge_management(list, RootOutput+"\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(list)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

You should use
os.path.join(root, file)
instead of simply using file like suggested in the os.walk doc examples os.walk
Btw, be careful with the reserved keywords. file is a built-in function and list too
>>> a = list()
>>> a
[]
>>> list = []
>>> b = list()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'list' object is not callable

For some applications we always need to change the current working directory while we are in os.walk recursive call, in that case I would suggest changing the current working directory twice as shown below. I am writing about situations where having absolute file paths will not help .
from os import listdir
from os.path import isfile, join
import os
import re
# store the location of the top most directory
top = os.getcwd()
for (dirname, dirs, files) in os.walk(os.getcwd()):
for filename in files:
os.chdir(dirname)
# add all your operations for the current job in the directory
# Now go back to the top of the chain
os.chdir(top)

It looks like you're after a recursive glob. Something like the code below might be of use:
class rglob:
'''A recursive/regex enhanced glob
adapted from os-path-walk-example-3.py - http://effbot.org/librarybook/os-path.htm
'''
def __init__(self, directory, pattern="*", regex=False, regex_flags=0, recurse=True):
''' #type directory: C{str}
#param directory: Path to search
#type pattern: C{type}
#param pattern: Regular expression/wildcard pattern to match files against
#type regex: C{boolean}
#param regex: Use regular expression matching (if False, use fnmatch)
See U{http://docs.python.org/library/re.html}
#type regex_flags: C{int}
#param regex_flags: Flags to pass to the regular expression compiler.
See U{http://docs.python.org/library/re.html}
#type recurse: C{boolean}
#param recurse: Recurse into the directory?
'''
self.stack = [directory]
self.pattern = pattern
self.regex = regex
self.recurse = recurse
self.regex_flags = regex_flags
self.files = []
self.index = 0
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
try:
self.files = os.listdir(self.directory)
self.index = 0
except:pass
else:
# got a filename
fullname = os.path.join(self.directory, file)
if os.path.isdir(fullname) and not os.path.islink(fullname) and self.recurse:
self.stack.append(fullname)
if self.regex:
import re
if re.search(self.pattern,file,self.regex_flags):
return fullname
else:
import fnmatch
if fnmatch.fnmatch(file, self.pattern):
return fullname
shplist=[shp for shp in rglob(top,'*.shp')]
print 'Merging: ' + str(shplist)
#arcpy.Merge_management(shplist, RootOutput+"\\Full_Extent.shp")
print 'Created: '+RootOutput+"\\Full_Extent.shp"
f = open(RootOutput+'\\Success_LOG.txt', 'a')
f.write(str(shplist)+"\n\n Merged to: "+RootOutput+"\\Full_Extent.shp")
f.close()

Related

How to zip files that ends with certain extension

I want to get all files in a directory (I reached it after doing several for loops - hence fourth.path) that ends with .npy or with csv and then zip those files.
My code is running putting one file only in the zip file. What am I doing wrong?
I tried to change my indents, but no zip file is being created
import json
import os
import zipfile
import zlib
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
file = os.listdir(directory)
def r(p, name):
p = os.path.join(p, name)
return p.replace("/", "\\")
#This code will list all json files in e ach file
for first in os.scandir(directory):
if first.is_dir():
for second in os.scandir(first.path):
if second.is_dir():
for third in os.scandir(second.path):
if third.is_dir():
radar_folder_name = ''
list_files = ()
for fourth in os.scandir(third.path):
if fourth.is_dir():
if radarfolder in fourth.path:
radar_folder_name = fourth.path
print(radar_folder_name)
list_files = ()
for file in os.listdir(fourth.path):
if file.endswith(".npy") | file.endswith(".csv"):
list_files = (file)
print(list_files)
with zipfile.ZipFile(radar_folder_name +'\\' +'radar.zip', 'w', compression=zipfile.ZIP_DEFLATED ) as zipMe:
zipMe.write(radar_folder_name +'\\' +list_files)
zipMe.close()
I tried to change my indents either resulting in error: TypeError: can only concatenate str (not "tuple") to str or no zip file being created
As I said in my second comment, your problem comes from the 'w' argument in your zipping statement. It causes the zip to be overwritten every time it's opened, which you do for each file you zip in. You can fix this 2 ways (at least):
Replace 'w' with 'a'; this way the files will be appended to your zip (with the side effect that, if you do this several times, files will be added more than once).
Keep the 'w', but only open the zip once, having listed all the files you want to zip before. See my code below.
I've taken the liberty to rewrite the part of your code where you look for the 'RadarIfxAvian' folder, since embedded for are clumsy (and if your folder structure changes, they might not work), replacing it with a multi-purpose recursive function.
Note that the folder structure will be included in the .zip; if you want to zip only the files themselves, consider doing os.chdir(radar_folder_name) before zipping the files.
# This function recursively looks for the 'filename' file or folder
# under 'start_path' and returns the full path, or an empty string if not found.
def find_file(start_path, filename):
if filename in os.listdir(start_path):
return start_path + '/' + filename
for file in os.scandir(start_path):
if not file.is_dir():
continue
if (deep_path:=find_file(start_path + '/' + file.name, filename)):
return deep_path
return ''
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
radar_folder_name = find_file(directory, radarfolder)
print(radar_folder_name)
list_files = []
for file in os.listdir(radar_folder_name):
if file.endswith(".npy") or file.endswith(".csv"):
list_files.append(file)
with zipfile.ZipFile(radar_folder_name + '/' + 'radar.zip', 'w', compression=zipfile.ZIP_DEFLATED ) as zipMe:
for file in list_files:
zipMe.write(radar_folder_name + '/' + file)
If I understand your code correctly, you are looking for a folder "RadarIfxAvian" and want to place a .ZIP in that folder containing any .CSV or .NPY files in that directory. This should do the equivalent, using os.walk for the recursive search:
import os
import zipfile
for path, dirs, files in os.walk('recs'):
if os.path.basename(path) == 'RadarIfxAvian':
print(path)
with zipfile.ZipFile(os.path.join(path, 'radar.zip'), 'w', zipfile.ZIP_DEFLATED) as zip:
for file in files:
if file.endswith(".npy") | file.endswith(".csv"):
print(file)
zip.write(file)
break # stop search once the directory is found and processed
I adjusted my code with the following steps:
Put the if in a function
writing the the zip by looping over each item in the list I appended
import json
import os
import glob
import zipfile
import zlib
directory = os.path.join(os.getcwd(), 'recs')
radarfolder = 'RadarIfxAvian'
file = os.listdir(directory)
list_files = []
def r(p, name):
p = os.path.join(p, name)
return p.replace("/", "\\")
def tozip(path, file):
filestozip = []
if file.endswith(".npy") or file.endswith(".csv"):
filestozip = (path + '\\' + file)
list_files.append(filestozip)
return list_files
#This code will list all json files in each file
for first in os.scandir(directory):
if first.is_dir():
for second in os.scandir(first.path):
if second.is_dir():
for third in os.scandir(second.path):
if third.is_dir():
radar_folder_name = ''
filestozip = []
list_files.clear()
for fourth in os.scandir(third.path):
if fourth.is_dir():
if radarfolder in fourth.path:
radar_folder_name = fourth.path
for file in os.listdir(fourth.path):
filestozip = tozip(radar_folder_name,file)
print(filestozip)
ZipFile = zipfile.ZipFile(r(radar_folder_name,"radar.zip"), "w")
for a in filestozip:
ZipFile.write(a, compress_type= zipfile.ZIP_DEFLATED)
print(radar_folder_name + "added to zip")

MSI log file reader is not writing the results to the text file

I have created a MSI log file reader that searches for specific log file names that a some installers leave behind during installation. It prints results separately in the shell but is not writing them to the created "MSI_Return.txt" file.
#Import Python Modules
import os
import fnmatch
#Create a find function to search the directories for the file name
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
#Get the full list of LOG files from specified folder
infile = os.path.expanduser('~\\AppData\\Local\\Temp')
listoffiles = find("MSI*.LOG", infile)
#Process each file name in the file name array
key_phrases = ["Error",
"Failed",
"Exception", "Return Value 3"]
important = []
for eachfile in listoffiles:
#Open the file
with open(eachfile) as i:
i = i.readlines()
#Look through the lines for phrases
#If a phrase is found, append it to an array
for line in i:
for phrase in key_phrases:
if phrase in line:
important.append(line)
break
#Return output
MSI_Return = open('MSI_Return.txt','w')
MSI_Return.write('List of MSI files = ' + str(listoffiles))
MSI_Return.write('\n')
MSI_Return.write('Return of key lines from MSI files = ' + str(important))
MSI_Return.close
Any thoughts or input would be appreciated.

Python: Print file names and their directory based on their file size

I want to print filenames and their directory if their filesize is more than a certain amount. I wrote one and set the bar 1KB, but it doesn't work even if there are plenty of files larger than 1KB.
import os, shutil
def deleteFiles(folder):
folder = os.path.abspath(folder)
for foldername, subfolders, filenames in os.walk(folder):
for filename in filenames:
if os.path.getsize(filename) > 1000:
print(filename + ' is inside: ' + foldername)
deleteFiles('C:\\Cyber\\Downloads')
And I got 'Nothing'!
and then I wrote codes in interactive shell, I got following error:
Traceback (most recent call last):
File "<pyshell#14>", line 3, in <module>
if os.path.getsize(filename) > 100:
File "C:\Users\Cyber\Downloads\lib\genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError:
I am wondering How I can fix my code.
os can't find the file without a given path, following your code, you have to re-specify the absolute path. Replace
if os.path.getsize(filename) > 1000:
with
if os.path.getsize(os.path.abspath(foldername + "/" + filename)) > 1000:
And it should work.
Replace:
deleteFiles('C:\\Cyber\\Downloads')
with
import os
a = 'c:' # removed slash
b = 'Cyber' # removed slash
c = 'Downloads'
path = os.path.join(a + os.sep, b, c)
deleteFiles(path)

Python recursive directory reading without os.walk

I am trying to walk through tree of directories and search for data in output text files by defining a recursive function (and not using os.walk) in Python.
import os
def walkfn(dirname):
if os.path.exists('output'):
file1 = open('output')
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
newdir = os.chdir(path)
walkfn(newdir)
cwd = os.getcwd()
walkfn(cwd)
I am getting the following error:
Traceback (most recent call last):
File "/home/Python Work/Test2.py", line 24, in <module>
walkfn(cwd)
File "/home/Python Work/Test2.py", line 19, in walkfn
walkfn(newdir)
File "/home/Python Work/Test2.py", line 12, in walkfn
for name in os.listdir(dirname):
TypeError: coercing to Unicode: need string or buffer, NoneType found
os.chdir() returns None, not the new directory name. You pass that result to the recursive walkfn() function, and then to os.listdir().
There is no need to assign, just pass path to walkfn():
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
os.chdir(path)
walkfn(path)
You usually want to avoid changing directories; there is no need to if your code uses absolute paths:
def walkfn(dirname):
output = os.path.join(dirname, 'output')
if os.path.exists(output):
with open(output) as file1:
for line in file1:
if line.startswith('Final value:'):
print line
else:
for name in os.listdir(dirname):
path = os.path.join(dirname, name)
if os.path.isdir(path):
print "'", name, "'"
walkfn(path)

Reading from multiple files and storing data in a list

I am trying to read print search for all files in a directory and store contents in each file in a list to be used.
My problem is when i use print to debug if the file exists, it prints out the current file or first file in the list. However, It complains that file is not found when i try to read from this file
import re
import os
# Program to extract emails from text files
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return os.listdir('/home/holy/thinker/leads') # returns a list like ["file1.txt", 'image.gif'] # need to remove trailing slashes
# read a file as 1 big string
def in_file():
print path_file()
content = []
for a_file in path_file(): # ['add.txt', 'email.txt']
print a_file
fin = open(a_file, 'r')
content.append(fin.read()) # store content of each file
print content
fin.close()
return content
print in_file()
# this is the error i get
""" ['add.txt', 'email.txt']
add.txt
Traceback (most recent call last):
File "Extractor.py", line 24, in <module>
print in_file()
File "Extractor.py", line 17, in in_file
fin = open(a_file, 'r')
IOError: [Errno 2] No such file or directory: 'add.txt'
"""
The error I get is aboive
os.listdir will return you only file name. You have to directory name on before that file name.
Its trying to open add.txt in same directory where you ran your program. Please add directory name before file name.
def path_file():
#path = raw_input("Please enter path to file:\n> ")
path = '/home/holy/thinker/leads/'
return [os.path.join(path, x) for x in os.listdir(path)]
you should use the full path of the file you want to read.
so please do fin = open(os.path.join(r'/home/holy/thinker/leads/', a_file), 'r')
Here's a rewrite using glob to limit which files are considered;
import glob
import os
import re
import sys
if sys.hexversion < 0x3000000:
# Python 2.x
inp = raw_input
else:
# Python 3.xrange
inp = input
def get_dir(prompt):
while True:
dir_name = inp(prompt)
dir_name = os.path.join(os.getcwd(), dir_name)
if os.path.isdir(dir_name):
return dir_name
else:
print("{} does not exist or is not a directory".format(dir_name))
def files_in_dir(dir_name, file_spec="*.txt"):
return glob.glob(os.path.join(dir_name, file_spec))
def file_iter(files):
for fname in files:
with open(fname) as inf:
yield fname, inf.read()
def main():
email_dir = get_dir("Please enter email directory: ")
email_files = files_in_dir(email_dir, "*.eml")
print(email_files)
content = [txt for fname,txt in file_iter(email_files)]
print(content)
if __name__=="__main__":
main()
and a trial run looks like
Please enter email directory: c:\temp
['c:\\temp\\file1.eml', 'c:\\temp\\file2.eml']
['file1 line one\nfile1 line two\nfile1 line three',
'file2 line one\nfile2 line two']

Categories